예제 #1
0
def kernel_combined_modular (fm_train_real=traindat,fm_test_real=testdat,fm_train_dna=traindna,fm_test_dna=testdna ):
	from modshogun import CombinedKernel, GaussianKernel, FixedDegreeStringKernel, LocalAlignmentStringKernel
	from modshogun import RealFeatures, StringCharFeatures, CombinedFeatures, DNA

	kernel=CombinedKernel()
	feats_train=CombinedFeatures()
	feats_test=CombinedFeatures()

	subkfeats_train=RealFeatures(fm_train_real)
	subkfeats_test=RealFeatures(fm_test_real)
	subkernel=GaussianKernel(10, 1.1)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	subkfeats_train=StringCharFeatures(fm_train_dna, DNA)
	subkfeats_test=StringCharFeatures(fm_test_dna, DNA)
	degree=3
	subkernel=FixedDegreeStringKernel(10, degree)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	subkfeats_train=StringCharFeatures(fm_train_dna, DNA)
	subkfeats_test=StringCharFeatures(fm_test_dna, DNA)
	subkernel=LocalAlignmentStringKernel(10)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	kernel.init(feats_train, feats_train)
	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
def runShogunSVRWDKernel(train_xt, train_lt, test_xt, svm_c=1, svr_param=0.1):
    """
	serialize svr with string kernels
	"""

    ##################################################
    # set up svr
    feats_train = construct_features(train_xt)
    feats_test = construct_features(test_xt)

    max_len = len(train_xt[0])
    kernel_wdk = WeightedDegreePositionStringKernel(SIZE, 5)
    shifts_vector = np.ones(max_len, dtype=np.int32) * NUMSHIFTS
    kernel_wdk.set_shifts(shifts_vector)

    ########
    # set up spectrum
    use_sign = False
    kernel_spec_1 = WeightedCommWordStringKernel(SIZE, use_sign)
    #kernel_spec_2 = WeightedCommWordStringKernel(SIZE, use_sign)

    ########
    # combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(kernel_wdk)
    kernel.append_kernel(kernel_spec_1)
    #kernel.append_kernel(kernel_spec_2)

    # init kernel
    labels = RegressionLabels(train_lt)

    # two svr models: epsilon and nu
    svr_epsilon = LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR)
    print "Ready to train!"
    svr_epsilon.train(feats_train)
    #svr_nu=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR)
    #svr_nu.train(feats_train)

    # predictions
    print "Making predictions!"
    kernel.init(feats_train, feats_test)
    out1_epsilon = svr_epsilon.apply().get_labels()
    out2_epsilon = svr_epsilon.apply(feats_test).get_labels()
    #out1_nu=svr_epsilon.apply().get_labels()
    #out2_nu=svr_epsilon.apply(feats_test).get_labels()

    ##################################################
    # serialize to file
    fEpsilon = open(FNEPSILON, 'w+')
    #fNu = open(FNNU, 'w+')
    svr_epsilon.save(fEpsilon)
    #svr_nu.save(fNu)
    fEpsilon.close()
    #fNu.close()

    ##################################################

    #return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel
    return out1_epsilon, out2_epsilon, kernel
def runShogunSVRWDKernel(train_xt, train_lt, test_xt, svm_c=1, svr_param=0.1):
	"""
	serialize svr with string kernels
	"""

    ##################################################
    # set up svr
	feats_train = construct_features(train_xt)
	feats_test = construct_features(test_xt)

	max_len = len(train_xt[0])
	kernel_wdk = WeightedDegreePositionStringKernel(SIZE, 5)
	shifts_vector = np.ones(max_len, dtype=np.int32)*NUMSHIFTS
	kernel_wdk.set_shifts(shifts_vector)

    ########
    # set up spectrum
	use_sign = False
	kernel_spec_1 = WeightedCommWordStringKernel(SIZE, use_sign)
	#kernel_spec_2 = WeightedCommWordStringKernel(SIZE, use_sign)

    ########
    # combined kernel
	kernel = CombinedKernel()
	kernel.append_kernel(kernel_wdk)
	kernel.append_kernel(kernel_spec_1)
	#kernel.append_kernel(kernel_spec_2)

    # init kernel
	labels = RegressionLabels(train_lt)
	
	# two svr models: epsilon and nu
	svr_epsilon=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR)
	print "Ready to train!"
	svr_epsilon.train(feats_train)
	#svr_nu=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR)
	#svr_nu.train(feats_train)

	# predictions
	print "Making predictions!"
	kernel.init(feats_train, feats_test)
	out1_epsilon=svr_epsilon.apply().get_labels()
	out2_epsilon=svr_epsilon.apply(feats_test).get_labels()
	#out1_nu=svr_epsilon.apply().get_labels()
	#out2_nu=svr_epsilon.apply(feats_test).get_labels()

    ##################################################
    # serialize to file
	fEpsilon = open(FNEPSILON, 'w+')
	#fNu = open(FNNU, 'w+')
	svr_epsilon.save(fEpsilon)
	#svr_nu.save(fNu)
	fEpsilon.close()
	#fNu.close()

    ##################################################
	
	#return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel
	return out1_epsilon,out2_epsilon,kernel
예제 #4
0
def runShogunSVMDNACombinedSpectrumKernel(train_xt, train_lt, test_xt):
	"""
	run svm with combined spectrum kernel
	"""

    ##################################################
    # set up svm
	kernel=CombinedKernel()
	feats_train=CombinedFeatures()
	feats_test=CombinedFeatures()
	
	for K in KList:
		# Iterate through the K's and make a spectrum kernel for each
		charfeat_train = StringCharFeatures(train_xt, DNA)
		current_feats_train = StringWordFeatures(DNA)
		current_feats_train.obtain_from_char(charfeat_train, K-1, K, GAP, False)
		preproc=SortWordString()
		preproc.init(current_feats_train)
		current_feats_train.add_preprocessor(preproc)
		current_feats_train.apply_preprocessor()
		feats_train.append_feature_obj(current_feats_train)
	
		charfeat_test = StringCharFeatures(test_xt, DNA)
		current_feats_test=StringWordFeatures(DNA)
		current_feats_test.obtain_from_char(charfeat_test, K-1, K, GAP, False)
		current_feats_test.add_preprocessor(preproc)
		current_feats_test.apply_preprocessor()
		feats_test.append_feature_obj(current_feats_test)
	
		current_kernel=CommWordStringKernel(10, False)
		kernel.append_kernel(current_kernel)
	
	kernel.io.set_loglevel(MSG_DEBUG)

    # init kernel
	labels = BinaryLabels(train_lt)
	
	# run svm model
	print "Ready to train!"
	kernel.init(feats_train, feats_train)
	svm=LibSVM(SVMC, kernel, labels)
	svm.io.set_loglevel(MSG_DEBUG)
	svm.train()

	# predictions
	print "Making predictions!"
	out1DecisionValues = svm.apply(feats_train)
	out1=out1DecisionValues.get_labels()
	kernel.init(feats_train, feats_test)
	out2DecisionValues = svm.apply(feats_test)
	out2=out2DecisionValues.get_labels()

	return out1,out2,out1DecisionValues,out2DecisionValues
예제 #5
0
def mkl(train_features,
        train_labels,
        test_features,
        test_labels,
        width=5,
        C=1.2,
        epsilon=1e-2,
        mkl_epsilon=0.001,
        mkl_norm=2):
    from modshogun import CombinedKernel, CombinedFeatures
    from modshogun import GaussianKernel, LinearKernel, PolyKernel
    from modshogun import MKLMulticlass, MulticlassAccuracy

    kernel = CombinedKernel()
    feats_train = CombinedFeatures()
    feats_test = CombinedFeatures()

    feats_train.append_feature_obj(train_features)
    feats_test.append_feature_obj(test_features)
    subkernel = GaussianKernel(10, width)
    kernel.append_kernel(subkernel)

    feats_train.append_feature_obj(train_features)
    feats_test.append_feature_obj(test_features)
    subkernel = LinearKernel()
    kernel.append_kernel(subkernel)

    feats_train.append_feature_obj(train_features)
    feats_test.append_feature_obj(test_features)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)
    mkl = MKLMulticlass(C, kernel, train_labels)

    mkl.set_epsilon(epsilon)
    mkl.set_mkl_epsilon(mkl_epsilon)
    mkl.set_mkl_norm(mkl_norm)

    mkl.train()
    train_output = mkl.apply()

    kernel.init(feats_train, feats_test)

    test_output = mkl.apply()
    evaluator = MulticlassAccuracy()
    print 'MKL training error is %.4f' % (
        (1 - evaluator.evaluate(train_output, train_labels)) * 100)
    print 'MKL test error is %.4f' % (
        (1 - evaluator.evaluate(test_output, test_labels)) * 100)
예제 #6
0
def mkl_multiclass_modular (fm_train_real, fm_test_real, label_train_multiclass,
	width, C, epsilon, num_threads, mkl_epsilon, mkl_norm):

	from modshogun import CombinedFeatures, RealFeatures, MulticlassLabels
	from modshogun import CombinedKernel, GaussianKernel, LinearKernel,PolyKernel
	from modshogun import MKLMulticlass

	kernel = CombinedKernel()
	feats_train = CombinedFeatures()
	feats_test = CombinedFeatures()

	subkfeats_train = RealFeatures(fm_train_real)
	subkfeats_test = RealFeatures(fm_test_real)
	subkernel = GaussianKernel(10, width)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	subkfeats_train = RealFeatures(fm_train_real)
	subkfeats_test = RealFeatures(fm_test_real)
	subkernel = LinearKernel()
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	subkfeats_train = RealFeatures(fm_train_real)
	subkfeats_test = RealFeatures(fm_test_real)
	subkernel = PolyKernel(10,2)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	kernel.init(feats_train, feats_train)

	labels = MulticlassLabels(label_train_multiclass)

	mkl = MKLMulticlass(C, kernel, labels)

	mkl.set_epsilon(epsilon);
	mkl.parallel.set_num_threads(num_threads)
	mkl.set_mkl_epsilon(mkl_epsilon)
	mkl.set_mkl_norm(mkl_norm)

	mkl.train()

	kernel.init(feats_train, feats_test)

	out =  mkl.apply().get_labels()
	return out
def kernel_combined_custom_poly_modular(train_fname=traindat,
                                        test_fname=testdat,
                                        train_label_fname=label_traindat):
    from modshogun import CombinedFeatures, RealFeatures, BinaryLabels
    from modshogun import CombinedKernel, PolyKernel, CustomKernel
    from modshogun import LibSVM, CSVFile

    kernel = CombinedKernel()
    feats_train = CombinedFeatures()

    tfeats = RealFeatures(CSVFile(train_fname))
    tkernel = PolyKernel(10, 3)
    tkernel.init(tfeats, tfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_train = RealFeatures(CSVFile(train_fname))
    feats_train.append_feature_obj(subkfeats_train)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)

    labels = BinaryLabels(CSVFile(train_label_fname))
    svm = LibSVM(1.0, kernel, labels)
    svm.train()

    kernel = CombinedKernel()
    feats_pred = CombinedFeatures()

    pfeats = RealFeatures(CSVFile(test_fname))
    tkernel = PolyKernel(10, 3)
    tkernel.init(tfeats, pfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_test = RealFeatures(CSVFile(test_fname))
    feats_pred.append_feature_obj(subkfeats_test)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)
    kernel.init(feats_train, feats_pred)

    svm.set_kernel(kernel)
    svm.apply()
    km_train = kernel.get_kernel_matrix()
    return km_train, kernel
예제 #8
0
def mkl_regression_modular(n=100,n_test=100, \
  x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1):

    from modshogun import RegressionLabels, RealFeatures
    from modshogun import GaussianKernel, PolyKernel, CombinedKernel
    from modshogun import MKLRegression, SVRLight

    # reproducible results
    random.seed(seed)

    # easy regression data: one dimensional noisy sine wave
    n = 15
    n_test = 100
    x_range_test = 10
    noise_var = 0.5
    X = random.rand(1, n) * x_range

    X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]])
    Y_test = sin(X_test)
    Y = sin(X) + random.randn(n) * noise_var

    # shogun representation
    labels = RegressionLabels(Y[0])
    feats_train = RealFeatures(X)
    feats_test = RealFeatures(X_test)

    # combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(GaussianKernel(10, 2))
    kernel.append_kernel(GaussianKernel(10, 3))
    kernel.append_kernel(PolyKernel(10, 2))
    kernel.init(feats_train, feats_train)

    # constraint generator and MKLRegression
    svr_constraints = SVRLight()
    svr_mkl = MKLRegression(svr_constraints)
    svr_mkl.set_kernel(kernel)
    svr_mkl.set_labels(labels)
    svr_mkl.train()

    # predictions
    kernel.init(feats_train, feats_test)
    out = svr_mkl.apply().get_labels()

    return out, svr_mkl, kernel
def mkl_regression_modular(n=100,n_test=100, \
		x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1):

	from modshogun import RegressionLabels, RealFeatures
	from modshogun import GaussianKernel, PolyKernel, CombinedKernel
	from modshogun import MKLRegression, SVRLight

	# reproducible results
	random.seed(seed)

	# easy regression data: one dimensional noisy sine wave
	n=15
	n_test=100
	x_range_test=10
	noise_var=0.5;
	X=random.rand(1,n)*x_range

	X_test=array([[float(i)/n_test*x_range_test for i in range(n_test)]])
	Y_test=sin(X_test)
	Y=sin(X)+random.randn(n)*noise_var

	# shogun representation
	labels=RegressionLabels(Y[0])
	feats_train=RealFeatures(X)
	feats_test=RealFeatures(X_test)

	# combined kernel
	kernel = CombinedKernel()
	kernel.append_kernel(GaussianKernel(10,2))
	kernel.append_kernel(GaussianKernel(10,3))
	kernel.append_kernel(PolyKernel(10,2))
	kernel.init(feats_train, feats_train)

	# constraint generator and MKLRegression
	svr_constraints=SVRLight()
	svr_mkl=MKLRegression(svr_constraints)
	svr_mkl.set_kernel(kernel)
	svr_mkl.set_labels(labels)
	svr_mkl.train()

	# predictions
	kernel.init(feats_train, feats_test)
	out=svr_mkl.apply().get_labels()

	return out, svr_mkl, kernel
def kernel_combined_custom_poly_modular (train_fname = traindat,test_fname = testdat,train_label_fname=label_traindat):
    from modshogun import CombinedFeatures, RealFeatures, BinaryLabels
    from modshogun import CombinedKernel, PolyKernel, CustomKernel
    from modshogun import LibSVM, CSVFile
   
    kernel = CombinedKernel()
    feats_train = CombinedFeatures()
    
    tfeats = RealFeatures(CSVFile(train_fname))
    tkernel = PolyKernel(10,3)
    tkernel.init(tfeats, tfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))
        
    subkfeats_train = RealFeatures(CSVFile(train_fname))
    feats_train.append_feature_obj(subkfeats_train)
    subkernel = PolyKernel(10,2)
    kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)
    
    labels = BinaryLabels(CSVFile(train_label_fname))
    svm = LibSVM(1.0, kernel, labels)
    svm.train()

    kernel = CombinedKernel()
    feats_pred = CombinedFeatures()

    pfeats = RealFeatures(CSVFile(test_fname))
    tkernel = PolyKernel(10,3)
    tkernel.init(tfeats, pfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_test = RealFeatures(CSVFile(test_fname))
    feats_pred.append_feature_obj(subkfeats_test)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)
    kernel.init(feats_train, feats_pred)

    svm.set_kernel(kernel)
    svm.apply()
    km_train=kernel.get_kernel_matrix()
    return km_train,kernel
def make_combined_kernel(feats_train, raw_train, use_sign=True, minseq=3, maxseq=8):
    from modshogun import CombinedKernel
    from modshogun import CommUlongStringKernel

    # init the combined kernel
    kernel=CombinedKernel()

    # initialize the subkernels
    count = 0
    for seqlen in range(minseq, maxseq+1):

        subkernel=CommUlongStringKernel(raw_train[count], raw_train[count], use_sign)
        kernel.append_kernel(subkernel)

        count += 1

    kernel.init(feats_train, feats_train)
    km_train=kernel.get_kernel_matrix()

    return kernel
예제 #12
0
def mkl(train_features, train_labels, test_features, test_labels, width=5, C=1.2, epsilon=1e-2, mkl_epsilon=0.001, mkl_norm=2):
	from modshogun import CombinedKernel, CombinedFeatures
	from modshogun import GaussianKernel, LinearKernel, PolyKernel
	from modshogun import MKLMulticlass, MulticlassAccuracy

	kernel = CombinedKernel()
	feats_train = CombinedFeatures()
	feats_test = CombinedFeatures()

	feats_train.append_feature_obj(train_features)
	feats_test.append_feature_obj(test_features)
	subkernel = GaussianKernel(10,width)
	kernel.append_kernel(subkernel)

	feats_train.append_feature_obj(train_features)
	feats_test.append_feature_obj(test_features)
	subkernel = LinearKernel()
	kernel.append_kernel(subkernel)

	feats_train.append_feature_obj(train_features)
	feats_test.append_feature_obj(test_features)
	subkernel = PolyKernel(10,2)
	kernel.append_kernel(subkernel)

	kernel.init(feats_train, feats_train)
	mkl = MKLMulticlass(C, kernel, train_labels)

	mkl.set_epsilon(epsilon);
	mkl.set_mkl_epsilon(mkl_epsilon)
	mkl.set_mkl_norm(mkl_norm)

	mkl.train()
	train_output = mkl.apply()

	kernel.init(feats_train, feats_test)

	test_output = mkl.apply()
	evaluator = MulticlassAccuracy()
	print 'MKL training error is %.4f' % ((1-evaluator.evaluate(train_output, train_labels))*100)
	print 'MKL test error is %.4f' % ((1-evaluator.evaluate(test_output, test_labels))*100)
예제 #13
0
def predict_new_data(graph_file, cons_file, tri_file, other_feature_file):
    print 'reading extracted features'
    graph_feature = read_feature_data(graph_file)
    graph_feature = get_normalized_given_max_min(graph_feature,
                                                 'models/grtaph_max_size')
    cons_feature = read_feature_data(cons_file)
    cons_feature = get_normalized_given_max_min(cons_feature,
                                                'models/cons_max_size')
    CC_feature = read_feature_data(tri_file)
    CC_feature = get_normalized_given_max_min(CC_feature,
                                              'models/tri_max_size')
    ATOS_feature = read_feature_data(other_feature_file)
    ATOS_feature = get_normalized_given_max_min(ATOS_feature,
                                                'models/alu_max_size')

    width, C, epsilon, num_threads, mkl_epsilon, mkl_norm = 0.5, 1.2, 1e-5, 1, 0.001, 3.5
    kernel = CombinedKernel()
    feats_train = CombinedFeatures()
    feats_test = CombinedFeatures()

    #pdb.set_trace()
    subkfeats_train = RealFeatures()
    subkfeats_test = RealFeatures(np.transpose(np.array(graph_feature)))
    subkernel = GaussianKernel(10, width)
    feats_test.append_feature_obj(subkfeats_test)

    fstream = SerializableAsciiFile("models/graph.dat", "r")
    status = subkfeats_train.load_serializable(fstream)
    feats_train.append_feature_obj(subkfeats_train)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures()
    subkfeats_test = RealFeatures(np.transpose(np.array(cons_feature)))
    subkernel = GaussianKernel(10, width)
    feats_test.append_feature_obj(subkfeats_test)

    fstream = SerializableAsciiFile("models/cons.dat", "r")
    status = subkfeats_train.load_serializable(fstream)
    feats_train.append_feature_obj(subkfeats_train)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures()
    subkfeats_test = RealFeatures(np.transpose(np.array(CC_feature)))
    subkernel = GaussianKernel(10, width)
    feats_test.append_feature_obj(subkfeats_test)

    fstream = SerializableAsciiFile("models/tri.dat", "r")
    status = subkfeats_train.load_serializable(fstream)
    feats_train.append_feature_obj(subkfeats_train)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures()
    subkfeats_test = RealFeatures(np.transpose(np.array(ATOS_feature)))
    subkernel = GaussianKernel(10, width)
    feats_test.append_feature_obj(subkfeats_test)

    fstream = SerializableAsciiFile("models/alu.dat", "r")
    status = subkfeats_train.load_serializable(fstream)
    feats_train.append_feature_obj(subkfeats_train)
    kernel.append_kernel(subkernel)

    model_file = "models/mkl.dat"
    if not os.path.exists(model_file):
        print 'downloading model file'
        url_add = 'http://rth.dk/resources/mirnasponge/data/mkl.dat'
        urllib.urlretrieve(url_add, model_file)
    print 'loading trained model'
    fstream = SerializableAsciiFile("models/mkl.dat", "r")
    new_mkl = MKLClassification()
    status = new_mkl.load_serializable(fstream)

    print 'model predicting'
    kernel.init(feats_train, feats_test)
    new_mkl.set_kernel(kernel)
    y_out = new_mkl.apply().get_labels()

    return y_out
예제 #14
0
def mkl_binclass_modular (fm_train_real=traindat,fm_test_real=testdat,fm_label_twoclass = label_traindat):

    ##################################
    # set up and train

    # create some poly train/test matrix
    tfeats = RealFeatures(fm_train_real)
    tkernel = PolyKernel(10,3)
    tkernel.init(tfeats, tfeats)
    K_train = tkernel.get_kernel_matrix()

    pfeats = RealFeatures(fm_test_real)
    tkernel.init(tfeats, pfeats)
    K_test = tkernel.get_kernel_matrix()

    # create combined train features
    feats_train = CombinedFeatures()
    feats_train.append_feature_obj(RealFeatures(fm_train_real))

    # and corresponding combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(CustomKernel(K_train))
    kernel.append_kernel(PolyKernel(10,2))
    kernel.init(feats_train, feats_train)

    # train mkl
    labels = BinaryLabels(fm_label_twoclass)
    mkl = MKLClassification()

    # which norm to use for MKL
    mkl.set_mkl_norm(1) #2,3

    # set cost (neg, pos)
    mkl.set_C(1, 1)

    # set kernel and labels
    mkl.set_kernel(kernel)
    mkl.set_labels(labels)

    # train
    mkl.train()
    #w=kernel.get_subkernel_weights()
    #kernel.set_subkernel_weights(w)


    ##################################
    # test

    # create combined test features
    feats_pred = CombinedFeatures()
    feats_pred.append_feature_obj(RealFeatures(fm_test_real))

    # and corresponding combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(CustomKernel(K_test))
    kernel.append_kernel(PolyKernel(10, 2))
    kernel.init(feats_train, feats_pred)

    # and classify
    mkl.set_kernel(kernel)
    mkl.apply()
    return mkl.apply(),kernel
예제 #15
0
def predict_new_data(graph_file, cons_file, tri_file, other_feature_file):
    print "reading extracted features"
    graph_feature = read_feature_data(graph_file)
    graph_feature = get_normalized_given_max_min(graph_feature, "models/grtaph_max_size")
    cons_feature = read_feature_data(cons_file)
    cons_feature = get_normalized_given_max_min(cons_feature, "models/cons_max_size")
    CC_feature = read_feature_data(tri_file)
    CC_feature = get_normalized_given_max_min(CC_feature, "models/tri_max_size")
    ATOS_feature = read_feature_data(other_feature_file)
    ATOS_feature = get_normalized_given_max_min(ATOS_feature, "models/alu_max_size")

    width, C, epsilon, num_threads, mkl_epsilon, mkl_norm = 0.5, 1.2, 1e-5, 1, 0.001, 3.5
    kernel = CombinedKernel()
    feats_train = CombinedFeatures()
    feats_test = CombinedFeatures()

    # pdb.set_trace()
    subkfeats_train = RealFeatures()
    subkfeats_test = RealFeatures(np.transpose(np.array(graph_feature)))
    subkernel = GaussianKernel(10, width)
    feats_test.append_feature_obj(subkfeats_test)

    fstream = SerializableAsciiFile("models/graph.dat", "r")
    status = subkfeats_train.load_serializable(fstream)
    feats_train.append_feature_obj(subkfeats_train)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures()
    subkfeats_test = RealFeatures(np.transpose(np.array(cons_feature)))
    subkernel = GaussianKernel(10, width)
    feats_test.append_feature_obj(subkfeats_test)

    fstream = SerializableAsciiFile("models/cons.dat", "r")
    status = subkfeats_train.load_serializable(fstream)
    feats_train.append_feature_obj(subkfeats_train)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures()
    subkfeats_test = RealFeatures(np.transpose(np.array(CC_feature)))
    subkernel = GaussianKernel(10, width)
    feats_test.append_feature_obj(subkfeats_test)

    fstream = SerializableAsciiFile("models/tri.dat", "r")
    status = subkfeats_train.load_serializable(fstream)
    feats_train.append_feature_obj(subkfeats_train)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures()
    subkfeats_test = RealFeatures(np.transpose(np.array(ATOS_feature)))
    subkernel = GaussianKernel(10, width)
    feats_test.append_feature_obj(subkfeats_test)

    fstream = SerializableAsciiFile("models/alu.dat", "r")
    status = subkfeats_train.load_serializable(fstream)
    feats_train.append_feature_obj(subkfeats_train)
    kernel.append_kernel(subkernel)

    model_file = "models/mkl.dat"
    if not os.path.exists(model_file):
        print "downloading model file"
        url_add = "http://rth.dk/resources/mirnasponge/data/mkl.dat"
        urllib.urlretrieve(url_add, model_file)
    print "loading trained model"
    fstream = SerializableAsciiFile("models/mkl.dat", "r")
    new_mkl = MKLClassification()
    status = new_mkl.load_serializable(fstream)

    print "model predicting"
    kernel.init(feats_train, feats_test)
    new_mkl.set_kernel(kernel)
    y_out = new_mkl.apply().get_labels()

    return y_out