def svm_light (): print 'SVMLight' from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel try: from shogun.Classifier import SVMLight except ImportError: print 'No support for SVMLight available.' return feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree=20 kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) C=1.2 epsilon=1e-5 num_threads=1 labels=Labels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) svm.classify().get_labels()
def classifier_svmlight_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,C=1.2,epsilon=1e-5,num_threads=1): from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel try: from shogun.Classifier import SVMLight except ImportError: print 'No support for SVMLight available.' return feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree=20 kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) labels=Labels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) svm.apply().get_labels() return kernel
def classifier_svmlight_linear_term_modular(fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna,degree=3, \ C=10,epsilon=1e-5,num_threads=1): from shogun.Features import StringCharFeatures, BinaryLabels, DNA from shogun.Kernel import WeightedDegreeStringKernel from shogun.Classifier import SVMLight feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) labels=BinaryLabels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_qpsize(3) svm.set_linear_term(-numpy.array([1,2,3,4,5,6,7,8,7,6], dtype=numpy.double)); svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) out = svm.apply().get_labels() return out,kernel
def create_kernel(examples, param): """ kernel factory @param examples: list/array of examples @type examples: list @param param: parameter object @type param: Parameter @return subclass of shogun Kernel object @rtype: Kernel """ # first create feature object of correct type feat = create_features(examples, param) kernel = None if param.kernel == "WeightedDegreeStringKernel": kernel = WeightedDegreeStringKernel(feat, feat, param.wdk_degree) kernel.set_cache_size(200) elif param.kernel == "LinearKernel": kernel = LinearKernel(feat, feat) elif param.kernel == "PolyKernel": kernel = PolyKernel(feat, feat, 1, False) elif param.kernel == "GaussianKernel": kernel = GaussianKernel(feat, feat, param.sigma) elif param.kernel == "WeightedDegreeRBFKernel": size_cache = 200 nof_properties = 20 sigma = param.base_similarity kernel = WeightedDegreeRBFKernel(feat, feat, sigma, param.wdk_degree, nof_properties, size_cache) elif param.kernel == "Promoter": kernel = create_promoter_kernel(examples, param.flags) else: raise Exception, "Unknown kernel type." if hasattr(param, "flags") and param.flags.has_key("cache_size"): kernel.set_cache_size(param.flags["cache_size"]) if param.flags.has_key("debug"): kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG) return kernel
def kernel_weighted_degree_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=20): from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import WeightedDegreeStringKernel, MSG_DEBUG feats_train=StringCharFeatures(fm_train_dna, DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_test=StringCharFeatures(fm_test_dna, DNA) kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) from numpy import arange,double weights=arange(1,degree+1,dtype=double)[::-1]/ \ sum(arange(1,degree+1,dtype=double)) kernel.set_wd_weights(weights) #from numpy import ones,float64,int32 #kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64)) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() #this is how to serializate the kernel #import pickle #pickle.dump(kernel, file('kernel_obj.dump','w'), protocol=2) #k=pickle.load(file('kernel_obj.dump','r')) return km_train, km_test, kernel
def weighted_degree_string (): print 'WeightedDegreeString' from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import WeightedDegreeStringKernel feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_test_dna, DNA) degree=20 kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) #weights=arange(1,degree+1,dtype=double)[::-1]/ \ # sum(arange(1,degree+1,dtype=double)) #kernel.set_wd_weights(weights) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix()
def create_empty_promoter_kernel(param): """ creates an uninitialized promoter kernel @param param: """ # centered WDK/WDK-shift if param["shifts"] == 0: kernel_center = WeightedDegreeStringKernel(param["degree"]) else: kernel_center = WeightedDegreePositionStringKernel(10, param["degree"]) shifts_vector = numpy.ones(param["center_offset"]*2, dtype=numpy.int32)*param["shifts"] kernel_center.set_shifts(shifts_vector) kernel_center.set_cache_size(param["kernel_cache"]/3) # border spetrum kernels size = param["kernel_cache"]/3 use_sign = False kernel_left = WeightedCommWordStringKernel(size, use_sign) kernel_right = WeightedCommWordStringKernel(size, use_sign) # assemble combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_center) kernel.append_kernel(kernel_left) kernel.append_kernel(kernel_right) return kernel
def do_batch_linadd (): print 'SVMlight batch' from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel try: from shogun.Classifier import SVMLight except ImportError: print 'No support for SVMLight available.' return feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree=20 kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) C=1 epsilon=1e-5 num_threads=2 labels=Labels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) #print 'SVMLight Objective: %f num_sv: %d' % \ # (svm.get_objective(), svm.get_num_support_vectors()) svm.set_batch_computation_enabled(False) svm.set_linadd_enabled(False) svm.classify().get_labels() svm.set_batch_computation_enabled(True) svm.classify().get_labels()
def classifier_svmlight_batch_linadd_modular(fm_train_dna, fm_test_dna, label_train_dna, degree, C, epsilon, num_threads): from shogun.Features import StringCharFeatures, BinaryLabels, DNA from shogun.Kernel import WeightedDegreeStringKernel, MSG_DEBUG try: from shogun.Classifier import SVMLight except ImportError: print('No support for SVMLight available.') return feats_train=StringCharFeatures(DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree=20 kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) labels=BinaryLabels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) #print('SVMLight Objective: %f num_sv: %d' % \) # (svm.get_objective(), svm.get_num_support_vectors()) svm.set_batch_computation_enabled(False) svm.set_linadd_enabled(False) svm.apply().get_labels() svm.set_batch_computation_enabled(True) labels = svm.apply().get_labels() return labels, svm
def create_empty_kernel(param): """ kernel factory @param param: parameter object @type param: Parameter @return subclass of shogun Kernel object @rtype: Kernel """ kernel = None if param.kernel == "WeightedDegreeStringKernel": kernel = WeightedDegreeStringKernel(param.wdk_degree) elif param.kernel == "LinearKernel": kernel = LinearKernel() elif param.kernel == "PolyKernel": kernel = PolyKernel(10, 1, False) elif param.kernel == "GaussianKernel": kernel = GaussianKernel(10, param.sigma) elif param.kernel == "WeightedDegreeRBFKernel": size_cache = 50 nof_properties = 5 #20 sigma = param.transform kernel = WeightedDegreeRBFKernel(size_cache, sigma, param.wdk_degree, nof_properties) else: raise Exception, "Unknown kernel type:" + param.kernel if hasattr(param, "flags") and param.flags.has_key("cache_size"): kernel.set_cache_size(param.flags["cache_size"]) if param.flags.has_key("debug"): kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG) return kernel
def train(self, data, labels): """ model training """ # centered WDK/WDK-shift if self.param["shifts"] == 0: kernel_center = WeightedDegreeStringKernel(self.param["degree"]) else: kernel_center = WeightedDegreePositionStringKernel(10, self.param["degree"]) shifts_vector = numpy.ones(self.param["center_offset"]*2, dtype=numpy.int32)*self.param["shifts"] kernel_center.set_shifts(shifts_vector) kernel_center.set_cache_size(self.param["kernel_cache"]/3) # border spetrum kernels size = self.param["kernel_cache"]/3 use_sign = False kernel_left = WeightedCommWordStringKernel(size, use_sign) kernel_right = WeightedCommWordStringKernel(size, use_sign) # assemble combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_center) kernel.append_kernel(kernel_left) kernel.append_kernel(kernel_right) ## building features feat = create_features(data, self.param["center_offset"], self.param["center_pos"]) # init combined kernel kernel.init(feat, feat) print "len(labels) = %i" % (len(labels)) lab = BinaryLabels(numpy.double(labels)) self.svm = SVMLight(self.param["cost"], kernel, lab) # show debugging output self.svm.io.enable_progress() self.svm.io.set_loglevel(MSG_DEBUG) # optimization settings num_threads = 2 self.svm.parallel.set_num_threads(num_threads) self.svm.set_epsilon(10e-8) self.svm.train() return self
class svm_splice_model(object): def __init__(self, order, traindat, alphas, b, (window_left, offset, window_right), consensus): f = StringCharFeatures(traindat, DNA) wd_kernel = WeightedDegreeStringKernel(f, f, int(order)) wd_kernel.io.set_target_to_stderr() self.svm = SVM(wd_kernel, alphas, numpy.arange(len(alphas), dtype=numpy.int32), b) self.svm.io.set_target_to_stderr() self.svm.parallel.set_num_threads(self.svm.parallel.get_num_cpus()) self.svm.set_linadd_enabled(False) self.svm.set_batch_computation_enabled(False) self.window_left = int(window_left) self.window_right = int(window_right) self.consensus = consensus self.wd_kernel = wd_kernel self.traindat = f self.offset = offset
feat = StringCharFeatures(DNA) feat.set_features(examples) lab = Labels(numpy.array(labels)) N = subset_size ################################################################## # internal modification ################################################################## task_vector = [0]*(N/2) task_vector.extend([1]*(N/2)) base_wdk = WeightedDegreeStringKernel(feat, feat, 1) normalizer = MultitaskKernelNormalizer(task_vector) #wdk.set_task_vector(task_vector) #, task_vector) for i in xrange(2): for j in xrange(2): if i==j: normalizer.set_task_similarity(i,j, 4.0) else: normalizer.set_task_similarity(i,j, 1.0)
'CGACGGCCGGGGGGGCGTA'] label_test_dna=numpy.array(5*[-1.0] + 5*[1.0]) print 'SVMLight' from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel from shogun.Classifier import SVMLight feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) C=10 epsilon=1e-5 num_threads=1 labels=Labels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_qpsize(3) svm.set_linear_term(-numpy.array([1,2,3,4,5,6,7,8,7,6], dtype=numpy.double)); svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) out = svm.classify().get_labels()
labels[19] = 1 feat = StringCharFeatures(DNA) feat.set_features(examples) lab = Labels(numpy.array(labels)) N = subset_size ################################################################## # internal modification ################################################################## task_vector = [0] * (N / 2) task_vector.extend([1] * (N / 2)) base_wdk = WeightedDegreeStringKernel(feat, feat, 1) normalizer = MultitaskKernelNormalizer(task_vector) #wdk.set_task_vector(task_vector) #, task_vector) for i in xrange(2): for j in xrange(2): if i == j: normalizer.set_task_similarity(i, j, 4.0) else: normalizer.set_task_similarity(i, j, 1.0) base_wdk.set_normalizer(normalizer)
d = dat["thaliana"] subset_size = 2000 examples = [i.example for i in d[0:subset_size]] labels = [i.label for i in d[0:subset_size]] labels[2] = 1 labels[12] = 1 labels[15] = 1 labels[8] = 1 labels[19] = 1 feat = StringCharFeatures(DNA) feat.set_features(examples) wdk = WeightedDegreeStringKernel(feat, feat, 1) lab = Labels(numpy.array(labels)) svm = SVMLight(1, wdk, lab) svm.train() svm.set_shrinking_enabled(False) print "simple svm", svm.get_objective() print "len(examples)", len(examples) print "##############"
def solver_mtk_shogun(C, all_xt, all_lt, task_indicator, M, L, eps, target_obj): """ implementation using multitask kernel """ xt = numpy.array(all_xt) lt = numpy.array(all_lt) tt = numpy.array(task_indicator, dtype=numpy.int32) tsm = numpy.array(M) print "task_sim:", tsm num_tasks = L.shape[0] # sanity checks assert len(xt) == len(lt) == len(tt) assert M.shape == L.shape assert num_tasks == len(set(tt)) # set up shogun objects if type(xt[0]) == numpy.string_: feat = StringCharFeatures(DNA) xt = [str(a) for a in xt] feat.set_features(xt) base_kernel = WeightedDegreeStringKernel(feat, feat, 8) else: feat = RealFeatures(xt.T) base_kernel = LinearKernel(feat, feat) lab = Labels(lt) # set up normalizer normalizer = MultitaskKernelNormalizer(tt.tolist()) for i in xrange(num_tasks): for j in xrange(num_tasks): normalizer.set_task_similarity(i, j, M[i, j]) print "num of unique tasks: ", normalizer.get_num_unique_tasks( task_indicator) # set up kernel base_kernel.set_cache_size(2000) base_kernel.set_normalizer(normalizer) base_kernel.init_normalizer() # set up svm svm = SVMLight() #LibSVM() svm.set_epsilon(eps) #print "reducing num threads to one" #svm.parallel.set_num_threads(1) #print "using one thread" # how often do we like to compute objective etc svm.set_record_interval(0) svm.set_target_objective(target_obj) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) svm.io.set_loglevel(MSG_DEBUG) #SET THREADS TO 1 svm.set_C(C, C) svm.set_bias_enabled(False) # prepare for training svm.set_labels(lab) svm.set_kernel(base_kernel) # train svm svm.train() train_times = svm.get_training_times() objectives = [-obj for obj in svm.get_dual_objectives()] if False: # get model parameters sv_idx = svm.get_support_vectors() sparse_alphas = svm.get_alphas() assert len(sv_idx) == len(sparse_alphas) # compute dense alpha (remove label) alphas = numpy.zeros(len(xt)) for id_sparse, id_dense in enumerate(sv_idx): alphas[id_dense] = sparse_alphas[id_sparse] * lt[id_dense] # print alphas W = alphas_to_w(alphas, xt, lt, task_indicator, M) primal_obj = compute_primal_objective( W.reshape(W.shape[0] * W.shape[1]), C, all_xt, all_lt, task_indicator, L) objectives.append(primal_obj) train_times.append(train_times[-1] + 100) return objectives, train_times
print "len(examples)", len(examples) print "string length", len(examples[0]) labels[2] = 1 labels[12] = 1 labels[15] = 1 labels[8] = 1 labels[19] = 1 feat = StringCharFeatures(DNA) feat.set_features(examples) helper.save("/tmp/feat", feat) feat2 = helper.load("/tmp/feat") wdk = WeightedDegreeStringKernel(feat, feat, 1) print "PY: saving kernel" wdk.io.set_loglevel(MSG_DEBUG) helper.save("/tmp/awesome", wdk) #print wdk.toString() #print "PY: kernel saved, loading kernel" wdk2 = helper.load("/tmp/awesome") print "PY: kernel loaded" #wdk2 = WeightedDegreeStringKernel(feat2, feat2, 1) lab = Labels(numpy.array(labels)) svm = SVMLight(1, wdk2, lab) #print "saving SVM"
############################################# # compute pre-svm ############################################# examples_presvm = [i.example for i in d[0:subset_size]] labels_presvm = [i.label for i in d[0:subset_size]] labels_presvm[2] = 1 labels_presvm[12] = 1 labels_presvm[15] = 1 labels_presvm[8] = 1 labels_presvm[19] = 1 feat_presvm = StringCharFeatures(DNA) feat_presvm.set_features(examples_presvm) wdk_presvm = WeightedDegreeStringKernel(feat_presvm, feat_presvm, 1) lab_presvm = Labels(numpy.array(labels_presvm)) presvm = SVMLight(1, wdk_presvm, lab_presvm) presvm.train() presvm2 = LibSVM(1, wdk_presvm, lab_presvm) presvm2.train() print "svmlight", presvm.get_objective() print "libsvm", presvm2.get_objective() assert (abs(presvm.get_objective() - presvm2.get_objective()) <= 0.001) print "simple svm", presvm.get_objective()