def runShogunSVMDNAWDKernel(train_xt, train_lt, test_xt): """ run svm with string kernels """ ################################################## # set up svm feats_train = StringCharFeatures(train_xt, DNA) feats_test = StringCharFeatures(test_xt, DNA) kernel = WeightedDegreePositionStringKernel(feats_train, feats_train, DEGREE) kernel.io.set_loglevel(MSG_DEBUG) kernel.set_shifts(NUMSHIFTS * ones(len(train_xt[0]), dtype=int32)) kernel.set_position_weights(ones(len(train_xt[0]), dtype=float64)) # init kernel labels = BinaryLabels(train_lt) # run svm model print "Ready to train!" svm = LibSVM(SVMC, kernel, labels) svm.io.set_loglevel(MSG_DEBUG) svm.train() # predictions print "Making predictions!" out1DecisionValues = svm.apply(feats_train) out1 = out1DecisionValues.get_labels() kernel.init(feats_train, feats_test) out2DecisionValues = svm.apply(feats_test) out2 = out2DecisionValues.get_labels() return out1, out2, out1DecisionValues, out2DecisionValues
def runShogunSVRWDKernel(train_xt, train_lt, test_xt, svm_c=1, svr_param=0.1): """ serialize svr with string kernels """ ################################################## # set up svr feats_train = construct_features(train_xt) feats_test = construct_features(test_xt) max_len = len(train_xt[0]) kernel_wdk = WeightedDegreePositionStringKernel(SIZE, 5) shifts_vector = np.ones(max_len, dtype=np.int32) * NUMSHIFTS kernel_wdk.set_shifts(shifts_vector) ######## # set up spectrum use_sign = False kernel_spec_1 = WeightedCommWordStringKernel(SIZE, use_sign) #kernel_spec_2 = WeightedCommWordStringKernel(SIZE, use_sign) ######## # combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_wdk) kernel.append_kernel(kernel_spec_1) #kernel.append_kernel(kernel_spec_2) # init kernel labels = RegressionLabels(train_lt) # two svr models: epsilon and nu svr_epsilon = LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR) print "Ready to train!" svr_epsilon.train(feats_train) #svr_nu=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR) #svr_nu.train(feats_train) # predictions print "Making predictions!" kernel.init(feats_train, feats_test) out1_epsilon = svr_epsilon.apply().get_labels() out2_epsilon = svr_epsilon.apply(feats_test).get_labels() #out1_nu=svr_epsilon.apply().get_labels() #out2_nu=svr_epsilon.apply(feats_test).get_labels() ################################################## # serialize to file fEpsilon = open(FNEPSILON, 'w+') #fNu = open(FNNU, 'w+') svr_epsilon.save(fEpsilon) #svr_nu.save(fNu) fEpsilon.close() #fNu.close() ################################################## #return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel return out1_epsilon, out2_epsilon, kernel
def runShogunSVRWDKernel(train_xt, train_lt, test_xt, svm_c=1, svr_param=0.1): """ serialize svr with string kernels """ ################################################## # set up svr feats_train = construct_features(train_xt) feats_test = construct_features(test_xt) max_len = len(train_xt[0]) kernel_wdk = WeightedDegreePositionStringKernel(SIZE, 5) shifts_vector = np.ones(max_len, dtype=np.int32)*NUMSHIFTS kernel_wdk.set_shifts(shifts_vector) ######## # set up spectrum use_sign = False kernel_spec_1 = WeightedCommWordStringKernel(SIZE, use_sign) #kernel_spec_2 = WeightedCommWordStringKernel(SIZE, use_sign) ######## # combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_wdk) kernel.append_kernel(kernel_spec_1) #kernel.append_kernel(kernel_spec_2) # init kernel labels = RegressionLabels(train_lt) # two svr models: epsilon and nu svr_epsilon=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR) print "Ready to train!" svr_epsilon.train(feats_train) #svr_nu=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR) #svr_nu.train(feats_train) # predictions print "Making predictions!" kernel.init(feats_train, feats_test) out1_epsilon=svr_epsilon.apply().get_labels() out2_epsilon=svr_epsilon.apply(feats_test).get_labels() #out1_nu=svr_epsilon.apply().get_labels() #out2_nu=svr_epsilon.apply(feats_test).get_labels() ################################################## # serialize to file fEpsilon = open(FNEPSILON, 'w+') #fNu = open(FNNU, 'w+') svr_epsilon.save(fEpsilon) #svr_nu.save(fNu) fEpsilon.close() #fNu.close() ################################################## #return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel return out1_epsilon,out2_epsilon,kernel
def kernel_weighted_degree_position_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=20): from modshogun import StringCharFeatures, DNA from modshogun import WeightedDegreePositionStringKernel, MSG_DEBUG feats_train=StringCharFeatures(fm_train_dna, DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_test=StringCharFeatures(fm_test_dna, DNA) kernel=WeightedDegreePositionStringKernel(feats_train, feats_train, degree) from numpy import zeros,ones,float64,int32 kernel.set_shifts(10*ones(len(fm_train_dna[0]), dtype=int32)) kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64)) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def svm_process(args_tuple): X_train, Y_train, X_test, Y_test, d, c = args_tuple kernel = WeightedDegreePositionStringKernel(X_train, X_train, d) kernel.set_shifts(np.ones(SEQ_LEN, dtype=np.int32)) kernel.set_position_weights(np.ones(SEQ_LEN, dtype=np.float64)) kernel.init(X_train, X_train) model = SVMLight(c, kernel, Y_train) model.train() Y_test_pred = model.apply(X_test).get_labels() Y_test_dist = model.apply(X_test).get_values() Y_test_proba = np.exp(Y_test_dist) / (1 + np.exp(Y_test_dist)) accuracy = np.where(Y_test_pred - Y_test == 0)[0].size * 1.0 / Y_test.size return (accuracy, Y_test_proba)
def svm_process(args_tuple): X_train, Y_train, X_test, Y_test, d, c = args_tuple kernel = WeightedDegreePositionStringKernel(X_train, X_train, d) kernel.set_shifts(np.ones(SEQ_LEN, dtype=np.int32)) kernel.set_position_weights(np.ones(SEQ_LEN, dtype=np.float64)) kernel.init(X_train, X_train) model = SVMLight(c, kernel, Y_train) model.train() Y_test_pred = model.apply(X_test).get_labels() Y_test_dist = model.apply(X_test).get_values() Y_test_proba = np.exp(Y_test_dist)/(1 + np.exp(Y_test_dist)) accuracy = np.where(Y_test_pred - Y_test == 0)[0].size*1.0/Y_test.size return (accuracy, Y_test_proba)
def kernel_weighted_degree_position_string_modular(fm_train_dna=traindat, fm_test_dna=testdat, degree=20): from modshogun import StringCharFeatures, DNA from modshogun import WeightedDegreePositionStringKernel, MSG_DEBUG feats_train = StringCharFeatures(fm_train_dna, DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_test = StringCharFeatures(fm_test_dna, DNA) kernel = WeightedDegreePositionStringKernel(feats_train, feats_train, degree) from numpy import zeros, ones, float64, int32 kernel.set_shifts(10 * ones(len(fm_train_dna[0]), dtype=int32)) kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64)) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def runShogunSVRWDKernel(train_xt, train_lt, test_xt, svm_c=1): """ serialize svr with string kernels """ ################################################## # set up svr feats_train = StringCharFeatures(train_xt, PROTEIN) feats_test = StringCharFeatures(test_xt, PROTEIN) kernel = WeightedDegreePositionStringKernel(feats_train, feats_train, DEGREE) kernel.io.set_loglevel(MSG_DEBUG) kernel.set_shifts(NUMSHIFTS*ones(len(train_xt[0]), dtype=int32)) kernel.set_position_weights(ones(len(train_xt[0]), dtype=float64)) # init kernel labels = RegressionLabels(train_lt) # two svr models: epsilon and nu print "Ready to train!" svr_epsilon=LibSVR(svm_c, SVRPARAM, kernel, labels, LIBSVR_EPSILON_SVR) svr_epsilon.io.set_loglevel(MSG_DEBUG) svr_epsilon.train() #svr_nu=LibSVR(svm_c, SVRPARAM, kernel, labels, LIBSVR_NU_SVR) #svr_nu.train() # predictions print "Making predictions!" out1_epsilon=svr_epsilon.apply(feats_train).get_labels() kernel.init(feats_train, feats_test) out2_epsilon=svr_epsilon.apply(feats_test).get_labels() #out1_nu=svr_epsilon.apply(feats_train).get_labels() #out2_nu=svr_epsilon.apply(feats_test).get_labels() #return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel return out1_epsilon,out2_epsilon,kernel
def serialization_string_kernels_modular(n_data, num_shifts, size): """ serialize svm with string kernels """ ################################################## # set up toy data and svm train_xt, train_lt = generate_random_data(n_data) test_xt, test_lt = generate_random_data(n_data) feats_train = construct_features(train_xt) feats_test = construct_features(test_xt) max_len = len(train_xt[0]) kernel_wdk = WeightedDegreePositionStringKernel(size, 5) shifts_vector = numpy.ones(max_len, dtype=numpy.int32) * num_shifts kernel_wdk.set_shifts(shifts_vector) ######## # set up spectrum use_sign = False kernel_spec_1 = WeightedCommWordStringKernel(size, use_sign) kernel_spec_2 = WeightedCommWordStringKernel(size, use_sign) ######## # combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_wdk) kernel.append_kernel(kernel_spec_1) kernel.append_kernel(kernel_spec_2) # init kernel labels = BinaryLabels(train_lt) svm = SVMLight(1.0, kernel, labels) #svm.io.set_loglevel(MSG_DEBUG) svm.train(feats_train) ################################################## # serialize to file fn = "serialized_svm.bz2" #print("serializing SVM to file", fn) save(fn, svm) ################################################## # unserialize and sanity check #print("unserializing SVM") svm2 = load(fn) #print("comparing predictions") out = svm.apply(feats_test).get_labels() out2 = svm2.apply(feats_test).get_labels() # assert outputs are close for i in range(len(out)): assert abs(out[i] - out2[i] < 0.000001) #print("all checks passed.") return out, out2
def serialization_string_kernels_modular(n_data, num_shifts, size): """ serialize svm with string kernels """ ################################################## # set up toy data and svm train_xt, train_lt = generate_random_data(n_data) test_xt, test_lt = generate_random_data(n_data) feats_train = construct_features(train_xt) feats_test = construct_features(test_xt) max_len = len(train_xt[0]) kernel_wdk = WeightedDegreePositionStringKernel(size, 5) shifts_vector = numpy.ones(max_len, dtype=numpy.int32)*num_shifts kernel_wdk.set_shifts(shifts_vector) ######## # set up spectrum use_sign = False kernel_spec_1 = WeightedCommWordStringKernel(size, use_sign) kernel_spec_2 = WeightedCommWordStringKernel(size, use_sign) ######## # combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_wdk) kernel.append_kernel(kernel_spec_1) kernel.append_kernel(kernel_spec_2) # init kernel labels = BinaryLabels(train_lt); svm = SVMLight(1.0, kernel, labels) #svm.io.set_loglevel(MSG_DEBUG) svm.train(feats_train) ################################################## # serialize to file fn = "serialized_svm.bz2" #print("serializing SVM to file", fn) save(fn, svm) ################################################## # unserialize and sanity check #print("unserializing SVM") svm2 = load(fn) #print("comparing predictions") out = svm.apply(feats_test).get_labels() out2 = svm2.apply(feats_test).get_labels() # assert outputs are close for i in range(len(out)): assert abs(out[i] - out2[i] < 0.000001) #print("all checks passed.") return out,out2