def predict_new_data(graph_file, cons_file, tri_file, other_feature_file): print 'reading extracted features' graph_feature = read_feature_data(graph_file) graph_feature = get_normalized_given_max_min(graph_feature, 'models/grtaph_max_size') cons_feature = read_feature_data(cons_file) cons_feature = get_normalized_given_max_min(cons_feature, 'models/cons_max_size') CC_feature = read_feature_data(tri_file) CC_feature = get_normalized_given_max_min(CC_feature, 'models/tri_max_size') ATOS_feature = read_feature_data(other_feature_file) ATOS_feature = get_normalized_given_max_min(ATOS_feature, 'models/alu_max_size') width, C, epsilon, num_threads, mkl_epsilon, mkl_norm = 0.5, 1.2, 1e-5, 1, 0.001, 3.5 kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() #pdb.set_trace() subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(graph_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/graph.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(cons_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/cons.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(CC_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/tri.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(ATOS_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/alu.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) model_file = "models/mkl.dat" if not os.path.exists(model_file): print 'downloading model file' url_add = 'http://rth.dk/resources/mirnasponge/data/mkl.dat' urllib.urlretrieve(url_add, model_file) print 'loading trained model' fstream = SerializableAsciiFile("models/mkl.dat", "r") new_mkl = MKLClassification() status = new_mkl.load_serializable(fstream) print 'model predicting' kernel.init(feats_train, feats_test) new_mkl.set_kernel(kernel) y_out = new_mkl.apply().get_labels() return y_out
def mkl_binclass_modular (fm_train_real=traindat,fm_test_real=testdat,fm_label_twoclass = label_traindat): ################################## # set up and train # create some poly train/test matrix tfeats = RealFeatures(fm_train_real) tkernel = PolyKernel(10,3) tkernel.init(tfeats, tfeats) K_train = tkernel.get_kernel_matrix() pfeats = RealFeatures(fm_test_real) tkernel.init(tfeats, pfeats) K_test = tkernel.get_kernel_matrix() # create combined train features feats_train = CombinedFeatures() feats_train.append_feature_obj(RealFeatures(fm_train_real)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_train)) kernel.append_kernel(PolyKernel(10,2)) kernel.init(feats_train, feats_train) # train mkl labels = BinaryLabels(fm_label_twoclass) mkl = MKLClassification() # which norm to use for MKL mkl.set_mkl_norm(1) #2,3 # set cost (neg, pos) mkl.set_C(1, 1) # set kernel and labels mkl.set_kernel(kernel) mkl.set_labels(labels) # train mkl.train() #w=kernel.get_subkernel_weights() #kernel.set_subkernel_weights(w) ################################## # test # create combined test features feats_pred = CombinedFeatures() feats_pred.append_feature_obj(RealFeatures(fm_test_real)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_test)) kernel.append_kernel(PolyKernel(10, 2)) kernel.init(feats_train, feats_pred) # and classify mkl.set_kernel(kernel) mkl.apply() return mkl.apply(),kernel
def runShogunSVMMultipleKernels(train_xt, train_lt, test_xt): """ Run SVM with Multiple Kernels """ ################################################## # Take all examples idxs = np.random.randint(1, 14000, 14000) train_xt = np.array(train_xt)[idxs] train_lt = np.array(train_lt)[idxs] # Initialize kernel and features kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() labels = BinaryLabels(train_lt) ##################### Multiple Spectrum Kernels ######################### for i in range(K1, K2, -1): # append training data to combined feature object charfeat_train = StringCharFeatures(list(train_xt), DNA) feats_train_k1 = StringWordFeatures(DNA) feats_train_k1.obtain_from_char(charfeat_train, i - 1, i, GAP, False) preproc = SortWordString() preproc.init(feats_train_k1) feats_train_k1.add_preprocessor(preproc) feats_train_k1.apply_preprocessor() # append testing data to combined feature object charfeat_test = StringCharFeatures(test_xt, DNA) feats_test_k1 = StringWordFeatures(DNA) feats_test_k1.obtain_from_char(charfeat_test, i - 1, i, GAP, False) feats_test_k1.add_preprocessor(preproc) feats_test_k1.apply_preprocessor() # append features feats_train.append_feature_obj(charfeat_train) feats_test.append_feature_obj(charfeat_test) # append spectrum kernel kernel1 = CommWordStringKernel(10, i) kernel1.io.set_loglevel(MSG_DEBUG) kernel.append_kernel(kernel1) ''' Uncomment this for Multiple Weighted degree kernels and comment the multiple spectrum kernel block above instead ##################### Multiple Weighted Degree Kernel ######################### for i in range(K1,K2,-1): # append training data to combined feature object charfeat_train = StringCharFeatures(list(train_xt), DNA) # append testing data to combined feature object charfeat_test = StringCharFeatures(test_xt, DNA) # append features feats_train.append_feature_obj(charfeat_train); feats_test.append_feature_obj(charfeat_test); # setup weighted degree kernel kernel1=WeightedDegreePositionStringKernel(10,i); kernel1.io.set_loglevel(MSG_DEBUG); kernel1.set_shifts(SHIFT*np.ones(len(train_xt[0]), dtype=np.int32)) kernel1.set_position_weights(np.ones(len(train_xt[0]), dtype=np.float64)); kernel.append_kernel(kernel1); ''' ##################### Training ######################### print "Starting MKL training.." mkl = MKLClassification() mkl.set_mkl_norm(3) #1,2,3 mkl.set_C(SVMC, SVMC) mkl.set_kernel(kernel) mkl.set_labels(labels) mkl.train(feats_train) print "Making predictions!" out1 = mkl.apply(feats_train).get_labels() out2 = mkl.apply(feats_test).get_labels() return out1, out2, train_lt
def runShogunSVMMultipleKernels(train_xt, train_lt, test_xt): """ Run SVM with Multiple Kernels """ ################################################## # Take all examples idxs = np.random.randint(1,14000,14000); train_xt = np.array(train_xt)[idxs]; train_lt = np.array(train_lt)[idxs]; # Initialize kernel and features kernel=CombinedKernel() feats_train=CombinedFeatures() feats_test=CombinedFeatures() labels = BinaryLabels(train_lt) ##################### Multiple Spectrum Kernels ######################### for i in range(K1,K2,-1): # append training data to combined feature object charfeat_train = StringCharFeatures(list(train_xt), DNA) feats_train_k1 = StringWordFeatures(DNA) feats_train_k1.obtain_from_char(charfeat_train, i-1, i, GAP, False) preproc=SortWordString() preproc.init(feats_train_k1) feats_train_k1.add_preprocessor(preproc) feats_train_k1.apply_preprocessor() # append testing data to combined feature object charfeat_test = StringCharFeatures(test_xt, DNA) feats_test_k1=StringWordFeatures(DNA) feats_test_k1.obtain_from_char(charfeat_test, i-1, i, GAP, False) feats_test_k1.add_preprocessor(preproc) feats_test_k1.apply_preprocessor() # append features feats_train.append_feature_obj(charfeat_train); feats_test.append_feature_obj(charfeat_test); # append spectrum kernel kernel1=CommWordStringKernel(10,i); kernel1.io.set_loglevel(MSG_DEBUG); kernel.append_kernel(kernel1); ''' Uncomment this for Multiple Weighted degree kernels and comment the multiple spectrum kernel block above instead ##################### Multiple Weighted Degree Kernel ######################### for i in range(K1,K2,-1): # append training data to combined feature object charfeat_train = StringCharFeatures(list(train_xt), DNA) # append testing data to combined feature object charfeat_test = StringCharFeatures(test_xt, DNA) # append features feats_train.append_feature_obj(charfeat_train); feats_test.append_feature_obj(charfeat_test); # setup weighted degree kernel kernel1=WeightedDegreePositionStringKernel(10,i); kernel1.io.set_loglevel(MSG_DEBUG); kernel1.set_shifts(SHIFT*np.ones(len(train_xt[0]), dtype=np.int32)) kernel1.set_position_weights(np.ones(len(train_xt[0]), dtype=np.float64)); kernel.append_kernel(kernel1); ''' ##################### Training ######################### print "Starting MKL training.." mkl = MKLClassification(); mkl.set_mkl_norm(3) #1,2,3 mkl.set_C(SVMC, SVMC) mkl.set_kernel(kernel) mkl.set_labels(labels) mkl.train(feats_train) print "Making predictions!" out1 = mkl.apply(feats_train).get_labels(); out2 = mkl.apply(feats_test).get_labels(); return out1,out2,train_lt
def predict_new_data(graph_file, cons_file, tri_file, other_feature_file): print "reading extracted features" graph_feature = read_feature_data(graph_file) graph_feature = get_normalized_given_max_min(graph_feature, "models/grtaph_max_size") cons_feature = read_feature_data(cons_file) cons_feature = get_normalized_given_max_min(cons_feature, "models/cons_max_size") CC_feature = read_feature_data(tri_file) CC_feature = get_normalized_given_max_min(CC_feature, "models/tri_max_size") ATOS_feature = read_feature_data(other_feature_file) ATOS_feature = get_normalized_given_max_min(ATOS_feature, "models/alu_max_size") width, C, epsilon, num_threads, mkl_epsilon, mkl_norm = 0.5, 1.2, 1e-5, 1, 0.001, 3.5 kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() # pdb.set_trace() subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(graph_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/graph.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(cons_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/cons.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(CC_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/tri.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(ATOS_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/alu.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) model_file = "models/mkl.dat" if not os.path.exists(model_file): print "downloading model file" url_add = "http://rth.dk/resources/mirnasponge/data/mkl.dat" urllib.urlretrieve(url_add, model_file) print "loading trained model" fstream = SerializableAsciiFile("models/mkl.dat", "r") new_mkl = MKLClassification() status = new_mkl.load_serializable(fstream) print "model predicting" kernel.init(feats_train, feats_test) new_mkl.set_kernel(kernel) y_out = new_mkl.apply().get_labels() return y_out