def evaluation_cross_validation_mkl_weight_storage( traindat=traindat, label_traindat=label_traindat): from modshogun import CrossValidation, CrossValidationResult from modshogun import CrossValidationPrintOutput from modshogun import CrossValidationMKLStorage from modshogun import ContingencyTableEvaluation, ACCURACY from modshogun import StratifiedCrossValidationSplitting from modshogun import BinaryLabels from modshogun import RealFeatures, CombinedFeatures from modshogun import GaussianKernel, CombinedKernel from modshogun import LibSVM, MKLClassification # training data, combined features all on same data features = RealFeatures(traindat) comb_features = CombinedFeatures() comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) labels = BinaryLabels(label_traindat) # kernel, different Gaussians combined kernel = CombinedKernel() kernel.append_kernel(GaussianKernel(10, 0.1)) kernel.append_kernel(GaussianKernel(10, 1)) kernel.append_kernel(GaussianKernel(10, 2)) # create mkl using libsvm, due to a mem-bug, interleaved is not possible svm = MKLClassification(LibSVM()) svm.set_interleaved_optimization_enabled(False) svm.set_kernel(kernel) # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "StratifiedCrossValidationSplitting" is also available splitting_strategy = StratifiedCrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium = ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation = CrossValidation(svm, comb_features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # append cross vlaidation output classes #cross_validation.add_cross_validation_output(CrossValidationPrintOutput()) mkl_storage = CrossValidationMKLStorage() cross_validation.add_cross_validation_output(mkl_storage) cross_validation.set_num_runs(3) # perform cross-validation result = cross_validation.evaluate() # print mkl weights weights = mkl_storage.get_mkl_weights()
def evaluation_cross_validation_mkl_weight_storage(traindat=traindat, label_traindat=label_traindat): from modshogun import CrossValidation, CrossValidationResult from modshogun import CrossValidationPrintOutput from modshogun import CrossValidationMKLStorage from modshogun import ContingencyTableEvaluation, ACCURACY from modshogun import StratifiedCrossValidationSplitting from modshogun import BinaryLabels from modshogun import RealFeatures, CombinedFeatures from modshogun import GaussianKernel, CombinedKernel from modshogun import LibSVM, MKLClassification from modshogun import Statistics # training data, combined features all on same data features=RealFeatures(traindat) comb_features=CombinedFeatures() comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) labels=BinaryLabels(label_traindat) # kernel, different Gaussians combined kernel=CombinedKernel() kernel.append_kernel(GaussianKernel(10, 0.1)) kernel.append_kernel(GaussianKernel(10, 1)) kernel.append_kernel(GaussianKernel(10, 2)) # create mkl using libsvm, due to a mem-bug, interleaved is not possible svm=MKLClassification(LibSVM()); svm.set_interleaved_optimization_enabled(False); svm.set_kernel(kernel); # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "StratifiedCrossValidationSplitting" is also available splitting_strategy=StratifiedCrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium=ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation=CrossValidation(svm, comb_features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # append cross vlaidation output classes #cross_validation.add_cross_validation_output(CrossValidationPrintOutput()) mkl_storage=CrossValidationMKLStorage() cross_validation.add_cross_validation_output(mkl_storage) cross_validation.set_num_runs(3) # perform cross-validation result=cross_validation.evaluate() # print mkl weights weights=mkl_storage.get_mkl_weights()
def predict_new_data(graph_file, cons_file, tri_file, other_feature_file): print 'reading extracted features' graph_feature = read_feature_data(graph_file) graph_feature = get_normalized_given_max_min(graph_feature, 'models/grtaph_max_size') cons_feature = read_feature_data(cons_file) cons_feature = get_normalized_given_max_min(cons_feature, 'models/cons_max_size') CC_feature = read_feature_data(tri_file) CC_feature = get_normalized_given_max_min(CC_feature, 'models/tri_max_size') ATOS_feature = read_feature_data(other_feature_file) ATOS_feature = get_normalized_given_max_min(ATOS_feature, 'models/alu_max_size') width, C, epsilon, num_threads, mkl_epsilon, mkl_norm = 0.5, 1.2, 1e-5, 1, 0.001, 3.5 kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() #pdb.set_trace() subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(graph_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/graph.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(cons_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/cons.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(CC_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/tri.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(ATOS_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/alu.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) model_file = "models/mkl.dat" if not os.path.exists(model_file): print 'downloading model file' url_add = 'http://rth.dk/resources/mirnasponge/data/mkl.dat' urllib.urlretrieve(url_add, model_file) print 'loading trained model' fstream = SerializableAsciiFile("models/mkl.dat", "r") new_mkl = MKLClassification() status = new_mkl.load_serializable(fstream) print 'model predicting' kernel.init(feats_train, feats_test) new_mkl.set_kernel(kernel) y_out = new_mkl.apply().get_labels() return y_out
def mkl_binclass_modular (fm_train_real=traindat,fm_test_real=testdat,fm_label_twoclass = label_traindat): ################################## # set up and train # create some poly train/test matrix tfeats = RealFeatures(fm_train_real) tkernel = PolyKernel(10,3) tkernel.init(tfeats, tfeats) K_train = tkernel.get_kernel_matrix() pfeats = RealFeatures(fm_test_real) tkernel.init(tfeats, pfeats) K_test = tkernel.get_kernel_matrix() # create combined train features feats_train = CombinedFeatures() feats_train.append_feature_obj(RealFeatures(fm_train_real)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_train)) kernel.append_kernel(PolyKernel(10,2)) kernel.init(feats_train, feats_train) # train mkl labels = BinaryLabels(fm_label_twoclass) mkl = MKLClassification() # which norm to use for MKL mkl.set_mkl_norm(1) #2,3 # set cost (neg, pos) mkl.set_C(1, 1) # set kernel and labels mkl.set_kernel(kernel) mkl.set_labels(labels) # train mkl.train() #w=kernel.get_subkernel_weights() #kernel.set_subkernel_weights(w) ################################## # test # create combined test features feats_pred = CombinedFeatures() feats_pred.append_feature_obj(RealFeatures(fm_test_real)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_test)) kernel.append_kernel(PolyKernel(10, 2)) kernel.init(feats_train, feats_pred) # and classify mkl.set_kernel(kernel) mkl.apply() return mkl.apply(),kernel
def runShogunSVMMultipleKernels(train_xt, train_lt, test_xt): """ Run SVM with Multiple Kernels """ ################################################## # Take all examples idxs = np.random.randint(1,14000,14000); train_xt = np.array(train_xt)[idxs]; train_lt = np.array(train_lt)[idxs]; # Initialize kernel and features kernel=CombinedKernel() feats_train=CombinedFeatures() feats_test=CombinedFeatures() labels = BinaryLabels(train_lt) ##################### Multiple Spectrum Kernels ######################### for i in range(K1,K2,-1): # append training data to combined feature object charfeat_train = StringCharFeatures(list(train_xt), DNA) feats_train_k1 = StringWordFeatures(DNA) feats_train_k1.obtain_from_char(charfeat_train, i-1, i, GAP, False) preproc=SortWordString() preproc.init(feats_train_k1) feats_train_k1.add_preprocessor(preproc) feats_train_k1.apply_preprocessor() # append testing data to combined feature object charfeat_test = StringCharFeatures(test_xt, DNA) feats_test_k1=StringWordFeatures(DNA) feats_test_k1.obtain_from_char(charfeat_test, i-1, i, GAP, False) feats_test_k1.add_preprocessor(preproc) feats_test_k1.apply_preprocessor() # append features feats_train.append_feature_obj(charfeat_train); feats_test.append_feature_obj(charfeat_test); # append spectrum kernel kernel1=CommWordStringKernel(10,i); kernel1.io.set_loglevel(MSG_DEBUG); kernel.append_kernel(kernel1); ''' Uncomment this for Multiple Weighted degree kernels and comment the multiple spectrum kernel block above instead ##################### Multiple Weighted Degree Kernel ######################### for i in range(K1,K2,-1): # append training data to combined feature object charfeat_train = StringCharFeatures(list(train_xt), DNA) # append testing data to combined feature object charfeat_test = StringCharFeatures(test_xt, DNA) # append features feats_train.append_feature_obj(charfeat_train); feats_test.append_feature_obj(charfeat_test); # setup weighted degree kernel kernel1=WeightedDegreePositionStringKernel(10,i); kernel1.io.set_loglevel(MSG_DEBUG); kernel1.set_shifts(SHIFT*np.ones(len(train_xt[0]), dtype=np.int32)) kernel1.set_position_weights(np.ones(len(train_xt[0]), dtype=np.float64)); kernel.append_kernel(kernel1); ''' ##################### Training ######################### print "Starting MKL training.." mkl = MKLClassification(); mkl.set_mkl_norm(3) #1,2,3 mkl.set_C(SVMC, SVMC) mkl.set_kernel(kernel) mkl.set_labels(labels) mkl.train(feats_train) print "Making predictions!" out1 = mkl.apply(feats_train).get_labels(); out2 = mkl.apply(feats_test).get_labels(); return out1,out2,train_lt
def runShogunSVMMultipleKernels(train_xt, train_lt, test_xt): """ Run SVM with Multiple Kernels """ ################################################## # Take all examples idxs = np.random.randint(1, 14000, 14000) train_xt = np.array(train_xt)[idxs] train_lt = np.array(train_lt)[idxs] # Initialize kernel and features kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() labels = BinaryLabels(train_lt) ##################### Multiple Spectrum Kernels ######################### for i in range(K1, K2, -1): # append training data to combined feature object charfeat_train = StringCharFeatures(list(train_xt), DNA) feats_train_k1 = StringWordFeatures(DNA) feats_train_k1.obtain_from_char(charfeat_train, i - 1, i, GAP, False) preproc = SortWordString() preproc.init(feats_train_k1) feats_train_k1.add_preprocessor(preproc) feats_train_k1.apply_preprocessor() # append testing data to combined feature object charfeat_test = StringCharFeatures(test_xt, DNA) feats_test_k1 = StringWordFeatures(DNA) feats_test_k1.obtain_from_char(charfeat_test, i - 1, i, GAP, False) feats_test_k1.add_preprocessor(preproc) feats_test_k1.apply_preprocessor() # append features feats_train.append_feature_obj(charfeat_train) feats_test.append_feature_obj(charfeat_test) # append spectrum kernel kernel1 = CommWordStringKernel(10, i) kernel1.io.set_loglevel(MSG_DEBUG) kernel.append_kernel(kernel1) ''' Uncomment this for Multiple Weighted degree kernels and comment the multiple spectrum kernel block above instead ##################### Multiple Weighted Degree Kernel ######################### for i in range(K1,K2,-1): # append training data to combined feature object charfeat_train = StringCharFeatures(list(train_xt), DNA) # append testing data to combined feature object charfeat_test = StringCharFeatures(test_xt, DNA) # append features feats_train.append_feature_obj(charfeat_train); feats_test.append_feature_obj(charfeat_test); # setup weighted degree kernel kernel1=WeightedDegreePositionStringKernel(10,i); kernel1.io.set_loglevel(MSG_DEBUG); kernel1.set_shifts(SHIFT*np.ones(len(train_xt[0]), dtype=np.int32)) kernel1.set_position_weights(np.ones(len(train_xt[0]), dtype=np.float64)); kernel.append_kernel(kernel1); ''' ##################### Training ######################### print "Starting MKL training.." mkl = MKLClassification() mkl.set_mkl_norm(3) #1,2,3 mkl.set_C(SVMC, SVMC) mkl.set_kernel(kernel) mkl.set_labels(labels) mkl.train(feats_train) print "Making predictions!" out1 = mkl.apply(feats_train).get_labels() out2 = mkl.apply(feats_test).get_labels() return out1, out2, train_lt
def predict_new_data(graph_file, cons_file, tri_file, other_feature_file): print "reading extracted features" graph_feature = read_feature_data(graph_file) graph_feature = get_normalized_given_max_min(graph_feature, "models/grtaph_max_size") cons_feature = read_feature_data(cons_file) cons_feature = get_normalized_given_max_min(cons_feature, "models/cons_max_size") CC_feature = read_feature_data(tri_file) CC_feature = get_normalized_given_max_min(CC_feature, "models/tri_max_size") ATOS_feature = read_feature_data(other_feature_file) ATOS_feature = get_normalized_given_max_min(ATOS_feature, "models/alu_max_size") width, C, epsilon, num_threads, mkl_epsilon, mkl_norm = 0.5, 1.2, 1e-5, 1, 0.001, 3.5 kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() # pdb.set_trace() subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(graph_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/graph.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(cons_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/cons.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(CC_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/tri.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(ATOS_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/alu.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) model_file = "models/mkl.dat" if not os.path.exists(model_file): print "downloading model file" url_add = "http://rth.dk/resources/mirnasponge/data/mkl.dat" urllib.urlretrieve(url_add, model_file) print "loading trained model" fstream = SerializableAsciiFile("models/mkl.dat", "r") new_mkl = MKLClassification() status = new_mkl.load_serializable(fstream) print "model predicting" kernel.init(feats_train, feats_test) new_mkl.set_kernel(kernel) y_out = new_mkl.apply().get_labels() return y_out