def classifier_svmlight_linear_term_modular (fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna,degree=3, \ C=10,epsilon=1e-5,num_threads=1): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel try: from modshogun import SVMLight except ImportError: print("SVMLight is not available") exit(0) feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) labels=BinaryLabels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_qpsize(3) svm.set_linear_term(-numpy.array([1,2,3,4,5,6,7,8,7,6], dtype=numpy.double)); svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) out = svm.apply().get_labels() return out,kernel
def classifier_svmlight_modular(fm_train_dna=traindat, fm_test_dna=testdat, label_train_dna=label_traindat, C=1.2, epsilon=1e-5, num_threads=1): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel try: from modshogun import SVMLight except ImportError: print('No support for SVMLight available.') return feats_train = StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree = 20 kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) svm.apply().get_labels() return kernel
def classifier_domainadaptationsvm_modular (fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna, \ label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \ label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3): feats_train = StringCharFeatures(fm_train_dna, DNA) feats_test = StringCharFeatures(fm_test_dna, DNA) kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.train() #svm.io.set_loglevel(MSG_DEBUG) ##################################### #print("obtaining DA SVM from previously trained SVM") feats_train2 = StringCharFeatures(fm_train_dna, DNA) feats_test2 = StringCharFeatures(fm_test_dna, DNA) kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels2 = BinaryLabels(label_train_dna) # we regularize against the previously obtained solution dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0) dasvm.train() out = dasvm.apply_binary(feats_test2) return out #,dasvm TODO
def classifier_svmlight_linear_term_modular (fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna,degree=3, \ C=10,epsilon=1e-5,num_threads=1): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel try: from modshogun import SVMLight except ImportError: print("SVMLight is not available") exit(0) feats_train = StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.set_qpsize(3) svm.set_linear_term( -numpy.array([1, 2, 3, 4, 5, 6, 7, 8, 7, 6], dtype=numpy.double)) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) out = svm.apply().get_labels() return out, kernel
def classifier_svmlight_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,C=1.2,epsilon=1e-5,num_threads=1): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel try: from modshogun import SVMLight except ImportError: print('No support for SVMLight available.') return feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree=20 kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) labels=BinaryLabels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) svm.apply().get_labels() return kernel
def kernel_weighted_degree_string_modular(fm_train_dna=traindat, fm_test_dna=testdat, degree=20): from modshogun import StringCharFeatures, DNA from modshogun import WeightedDegreeStringKernel, MSG_DEBUG feats_train = StringCharFeatures(fm_train_dna, DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_test = StringCharFeatures(fm_test_dna, DNA) kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) from numpy import arange, double weights=arange(1,degree+1,dtype=double)[::-1]/ \ sum(arange(1,degree+1,dtype=double)) kernel.set_wd_weights(weights) #from numpy import ones,float64,int32 #kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64)) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() #this is how to serializate the kernel #import pickle #pickle.dump(kernel, file('kernel_obj.dump','w'), protocol=2) #k=pickle.load(file('kernel_obj.dump','r')) return km_train, km_test, kernel
def kernel_weighted_degree_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=20): from modshogun import StringCharFeatures, DNA from modshogun import WeightedDegreeStringKernel, MSG_DEBUG feats_train=StringCharFeatures(fm_train_dna, DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_test=StringCharFeatures(fm_test_dna, DNA) kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) from numpy import arange,double weights=arange(1,degree+1,dtype=double)[::-1]/ \ sum(arange(1,degree+1,dtype=double)) kernel.set_wd_weights(weights) #from numpy import ones,float64,int32 #kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64)) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() #this is how to serializate the kernel #import pickle #pickle.dump(kernel, file('tmp/kernel_obj.dump','w'), protocol=2) #k=pickle.load(file('tmp/kernel_obj.dump','r')) return km_train, km_test, kernel
def embed(file='data/mml.txt'): strings = [] with open(file) as f: strings = [s.rstrip() for s in f.readlines()] features = StringCharFeatures(strings,DNA) kernel = WeightedDegreeStringKernel(10) distance = KernelDistance(1.0,kernel) distance.init(features,features) converter = MultidimensionalScaling() converter.set_target_dim(2) return converter.embed_distance(distance).get_feature_matrix(), strings
def classifier_svmlight_batch_linadd_modular( fm_train_dna, fm_test_dna, label_train_dna, degree, C, epsilon, num_threads ): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel, MSG_DEBUG try: from modshogun import SVMLight except ImportError: print("No support for SVMLight available.") return feats_train = StringCharFeatures(DNA) # feats_train.io.set_loglevel(MSG_DEBUG) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree = 20 kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) # print('SVMLight Objective: %f num_sv: %d' % \) # (svm.get_objective(), svm.get_num_support_vectors()) svm.set_batch_computation_enabled(False) svm.set_linadd_enabled(False) svm.apply().get_labels() svm.set_batch_computation_enabled(True) labels = svm.apply().get_labels() return labels, svm
def runShogunSVMDNAWDNoPositionKernel(train_xt, train_lt, test_xt): """ run svm with non-position WD kernel """ ################################################## # set up svm feats_train = StringCharFeatures(train_xt, DNA) feats_test = StringCharFeatures(test_xt, DNA) kernel = WeightedDegreeStringKernel(feats_train, feats_train, DEGREE) kernel.io.set_loglevel(MSG_DEBUG) weights=arange(1,DEGREE+1,dtype=double)[::-1]/ \ sum(arange(1,DEGREE+1,dtype=double)) kernel.set_wd_weights(weights) # init kernel labels = BinaryLabels(train_lt) # run svm model print "Ready to train!" svm = LibSVM(SVMC, kernel, labels) svm.io.set_loglevel(MSG_DEBUG) svm.train() # predictions print "Making predictions!" out1 = svm.apply(feats_train).get_labels() kernel.init(feats_train, feats_test) out2 = svm.apply(feats_test).get_labels() return out1, out2
def classifier_svmlight_batch_linadd_modular(fm_train_dna, fm_test_dna, label_train_dna, degree, C, epsilon, num_threads): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel, MSG_DEBUG try: from modshogun import SVMLight except ImportError: print('No support for SVMLight available.') return feats_train = StringCharFeatures(DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree = 20 kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) #print('SVMLight Objective: %f num_sv: %d' % \) # (svm.get_objective(), svm.get_num_support_vectors()) svm.set_batch_computation_enabled(False) svm.set_linadd_enabled(False) svm.apply().get_labels() svm.set_batch_computation_enabled(True) labels = svm.apply().get_labels() return labels, svm
def embed(file='mml.pickle',N=500): strings = [] print '%s reading %s' % (datetime.datetime.now(), file) file_contents = load(file) print '%s there are %d strings in %s' % (datetime.datetime.now(), len(file_contents['examples']), file) positives = numpy.where(numpy.array(file_contents['labels'])>0)[0] selected_idxs = random.sample(positives,N) for i in selected_idxs: strings.append(file_contents['examples'][i]) features = StringCharFeatures(strings,DNA) kernel = WeightedDegreeStringKernel(10) distance = KernelDistance(1.0,kernel) distance.init(features,features) converter = MultidimensionalScaling() converter.set_target_dim(2) return converter.embed_distance(distance).get_feature_matrix(), strings
def embed(file='mml.pickle'): strings = [] print '%s reading %s' % (datetime.datetime.now(), file) file_contents = load(file) print '%s there are %d strings in %s' % ( datetime.datetime.now(), len(file_contents['examples']), file) count = 0 for i in xrange(len(file_contents['labels'])): if file_contents['labels'][i] > 0.0 and count < 1000: strings.append(file_contents['examples'][i]) count += 1 features = StringCharFeatures(strings, DNA) kernel = WeightedDegreeStringKernel(10) distance = KernelDistance(1.0, kernel) distance.init(features, features) converter = MultidimensionalScaling() converter.set_target_dim(2) return converter.embed_distance(distance).get_feature_matrix(), strings
class svm_splice_model(object): def __init__(self, order, traindat, alphas, b, (window_left,offset,window_right), consensus): f=StringCharFeatures(DNA) f.set_features(traindat) wd_kernel = WeightedDegreeStringKernel(f,f, int(order)) wd_kernel.io.set_target_to_stderr() self.svm=KernelMachine(wd_kernel, alphas, numpy.arange(len(alphas), dtype=numpy.int32), b) self.svm.io.set_target_to_stderr() self.svm.parallel.set_num_threads(self.svm.parallel.get_num_cpus()) self.svm.set_linadd_enabled(False) self.svm.set_batch_computation_enabled(False) self.window_left=int(window_left) self.window_right=int(window_right) self.consensus=consensus self.wd_kernel=wd_kernel self.traindat=f self.offset=offset