def training_run(options): """Conduct a training run and return a trained SVM kernel""" settings = MotifFinderSettings(kirmes_ini.MOTIF_LENGTH, options.window_width, options.replace) positives = MotifFinder(finder_settings=settings) positives.setFastaFile(options.positives) positives.setMotifs(options.pgff) pmotifs, ppositions = positives.getResults() negatives = MotifFinder(finder_settings=settings) negatives.setFastaFile(options.negatives) negatives.setMotifs(options.ngff) nmotifs, npositions = negatives.getResults() wds_kparams = kirmes_ini.WDS_KERNEL_PARAMETERS wds_svm = EasySVM.EasySVM(wds_kparams) num_positives = len(pmotifs.values()[0]) num_negatives = len(nmotifs.values()[0]) # Creating Kernel Objects kernel = CombinedKernel() features = CombinedFeatures() kernel_array = [] motifs = pmotifs.keys() motifs.sort() # Adding Kmer Kernels for motif in motifs: all_examples = pmotifs[motif] + nmotifs[motif] motif_features = wds_svm.createFeatures(all_examples) wds_kernel = WeightedDegreePositionStringKernel(motif_features, motif_features, wds_kparams["degree"]) wds_kernel.set_shifts(wds_kparams["shift"] * ones(wds_kparams["seqlength"], dtype=int32)) features.append_feature_obj(motif_features) kernel_array.append(wds_kernel) kernel.append_kernel(wds_kernel) rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS) positions = array(ppositions + npositions, dtype=float64).T position_features = rbf_svm.createFeatures(positions) features.append_feature_obj(position_features) motif_labels = append(ones(num_positives), -ones(num_negatives)) complete_labels = Labels(motif_labels) rbf_kernel = GaussianKernel(position_features, position_features, kirmes_ini.RBF_KERNEL_PARAMETERS["width"]) kernel_array.append(rbf_kernel) kernel.append_kernel(rbf_kernel) # Kernel init kernel.init(features, features) kernel.set_cache_size(kirmes_ini.K_CACHE_SIZE) svm = LibSVM(kirmes_ini.K_COMBINED_C, kernel, complete_labels) svm.parallel.set_num_threads(kirmes_ini.K_NUM_THREADS) # Training svm.train() if not os.path.exists(options.output_path): os.mkdir(options.output_path) html = {} if options.contrib: html["contrib"] = contrib(svm, kernel, motif_labels, kernel_array, motifs) if options.logos: html["poims"] = poims(svm, kernel, kernel_array, motifs, options.output_path) if options.query: html["query"] = evaluate(options, svm, kernel, features, motifs) htmlize(html, options.output_html)
def create_empty_promoter_kernel(param): """ creates an uninitialized promoter kernel @param param: """ # centered WDK/WDK-shift if param["shifts"] == 0: kernel_center = WeightedDegreeStringKernel(param["degree"]) else: kernel_center = WeightedDegreePositionStringKernel(10, param["degree"]) shifts_vector = numpy.ones(param["center_offset"] * 2, dtype=numpy.int32) * param["shifts"] kernel_center.set_shifts(shifts_vector) kernel_center.set_cache_size(param["kernel_cache"] / 3) # border spetrum kernels size = param["kernel_cache"] / 3 use_sign = False kernel_left = WeightedCommWordStringKernel(size, use_sign) kernel_right = WeightedCommWordStringKernel(size, use_sign) # assemble combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_center) kernel.append_kernel(kernel_left) kernel.append_kernel(kernel_right) return kernel
def init_sensor(self, kernel, svs): f = StringCharFeatures(svs, DNA) kname = kernel['name'] if kname == 'spectrum': wf = StringWordFeatures(f.get_alphabet()) wf.obtain_from_char(f, kernel['order'] - 1, kernel['order'], 0, False) pre = SortWordString() pre.init(wf) wf.add_preprocessor(pre) wf.apply_preprocessor() f = wf k = CommWordStringKernel(0, False) k.set_use_dict_diagonal_optimization(kernel['order'] < 8) self.preproc = pre elif kname == 'wdshift': k = WeightedDegreePositionStringKernel(0, kernel['order']) k.set_normalizer(IdentityKernelNormalizer()) k.set_shifts( kernel['shift'] * numpy.ones(f.get_max_vector_length(), dtype=numpy.int32)) k.set_position_weights( 1.0 / f.get_max_vector_length() * numpy.ones(f.get_max_vector_length(), dtype=numpy.float64)) else: raise "Currently, only wdshift and spectrum kernels supported" self.kernel = k self.train_features = f return (self.kernel, self.train_features)
def kernel_weighted_degree_position_string_modular(fm_train_dna=traindat,fm_test_dna=testdat,degree=20): from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import WeightedDegreePositionStringKernel, MSG_DEBUG feats_train=StringCharFeatures(fm_train_dna, DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_test=StringCharFeatures(fm_test_dna, DNA) kernel=WeightedDegreePositionStringKernel(feats_train, feats_train, degree) from numpy import zeros,ones,float64,int32 kernel.set_shifts(10*ones(len(fm_train_dna[0]), dtype=int32)) kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64)) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def kernel_weighted_degree_position_string_modular(fm_train_dna=traindat,fm_test_dna=testdat,degree=20): from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import WeightedDegreePositionStringKernel, MSG_DEBUG feats_train=StringCharFeatures(fm_train_dna, DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_test=StringCharFeatures(fm_test_dna, DNA) kernel=WeightedDegreePositionStringKernel(feats_train, feats_train, degree) from numpy import zeros,ones,float64,int32 #kernel.set_shifts(zeros(len(fm_train_dna[0]), dtype=int32)) #kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64)) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def train(self, data, labels): """ model training """ # centered WDK/WDK-shift if self.param["shifts"] == 0: kernel_center = WeightedDegreeStringKernel(self.param["degree"]) else: kernel_center = WeightedDegreePositionStringKernel( 10, self.param["degree"]) shifts_vector = numpy.ones( self.param["center_offset"] * 2, dtype=numpy.int32) * self.param["shifts"] kernel_center.set_shifts(shifts_vector) kernel_center.set_cache_size(self.param["kernel_cache"] / 3) # border spetrum kernels size = self.param["kernel_cache"] / 3 use_sign = False kernel_left = WeightedCommWordStringKernel(size, use_sign) kernel_right = WeightedCommWordStringKernel(size, use_sign) # assemble combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_center) kernel.append_kernel(kernel_left) kernel.append_kernel(kernel_right) ## building features feat = create_features(data, self.param["center_offset"], self.param["center_pos"]) # init combined kernel kernel.init(feat, feat) print "len(labels) = %i" % (len(labels)) lab = BinaryLabels(numpy.double(labels)) self.svm = SVMLight(self.param["cost"], kernel, lab) # show debugging output self.svm.io.enable_progress() self.svm.io.set_loglevel(MSG_DEBUG) # optimization settings num_threads = 2 self.svm.parallel.set_num_threads(num_threads) self.svm.set_epsilon(10e-8) self.svm.train() return self
def create_kernel(kname, kparam, feats_train): """Call the corresponding constructor for the kernel""" if kname == 'gauss': kernel = GaussianKernel(feats_train, feats_train, kparam['width']) elif kname == 'linear': kernel = LinearKernel(feats_train, feats_train) kernel.set_normalizer(AvgDiagKernelNormalizer(kparam['scale'])) elif kname == 'poly': kernel = PolyKernel(feats_train, feats_train, kparam['degree'], kparam['inhomogene'], kparam['normal']) elif kname == 'wd': kernel = WeightedDegreePositionStringKernel(feats_train, feats_train, kparam['degree']) kernel.set_normalizer( AvgDiagKernelNormalizer(float(kparam['seqlength']))) kernel.set_shifts(kparam['shift'] * numpy.ones(kparam['seqlength'], dtype=numpy.int32)) #kernel=WeightedDegreeStringKernel(feats_train, feats_train, kparam['degree']) elif kname == 'spec': kernel = CommUlongStringKernel(feats_train, feats_train) elif kname == 'cumspec': kernel = WeightedCommWordStringKernel(feats_train, feats_train) kernel.set_weights(numpy.ones(kparam['degree'])) elif kname == 'spec2': kernel = CombinedKernel() k0 = CommWordStringKernel(feats_train['f0'], feats_train['f0']) k0.io.disable_progress() kernel.append_kernel(k0) k1 = CommWordStringKernel(feats_train['f1'], feats_train['f1']) k1.io.disable_progress() kernel.append_kernel(k1) elif kname == 'cumspec2': kernel = CombinedKernel() k0 = WeightedCommWordStringKernel(feats_train['f0'], feats_train['f0']) k0.set_weights(numpy.ones(kparam['degree'])) k0.io.disable_progress() kernel.append_kernel(k0) k1 = WeightedCommWordStringKernel(feats_train['f1'], feats_train['f1']) k1.set_weights(numpy.ones(kparam['degree'])) k1.io.disable_progress() kernel.append_kernel(k1) elif kname == 'localalign': kernel = LocalAlignmentStringKernel(feats_train, feats_train) elif kname == 'localimprove': kernel = LocalityImprovedStringKernel(feats_train, feats_train, kparam['length'],\ kparam['indeg'], kparam['outdeg']) else: print 'Unknown kernel %s' % kname kernel.set_cache_size(32) return kernel
def serialization_string_kernels_modular(n_data, num_shifts, size): """ serialize svm with string kernels """ ################################################## # set up toy data and svm train_xt, train_lt = generate_random_data(n_data) test_xt, test_lt = generate_random_data(n_data) feats_train = construct_features(train_xt) feats_test = construct_features(test_xt) max_len = len(train_xt[0]) kernel_wdk = WeightedDegreePositionStringKernel(size, 5) shifts_vector = numpy.ones(max_len, dtype=numpy.int32) * num_shifts kernel_wdk.set_shifts(shifts_vector) ######## # set up spectrum use_sign = False kernel_spec_1 = WeightedCommWordStringKernel(size, use_sign) kernel_spec_2 = WeightedCommWordStringKernel(size, use_sign) ######## # combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_wdk) kernel.append_kernel(kernel_spec_1) kernel.append_kernel(kernel_spec_2) # init kernel labels = BinaryLabels(train_lt) svm = SVMLight(1.0, kernel, labels) #svm.io.set_loglevel(MSG_DEBUG) svm.train(feats_train) ################################################## # serialize to file fn = "serialized_svm.bz2" #print("serializing SVM to file", fn) save(fn, svm) ################################################## # unserialize and sanity check #print("unserializing SVM") svm2 = load(fn) #print("comparing predictions") out = svm.apply(feats_test).get_labels() out2 = svm2.apply(feats_test).get_labels() # assert outputs are close for i in xrange(len(out)): assert abs(out[i] - out2[i] < 0.000001) #print("all checks passed.") return out, out2
def serialization_string_kernels_modular(n_data, num_shifts, size): """ serialize svm with string kernels """ ################################################## # set up toy data and svm train_xt, train_lt = generate_random_data(n_data) test_xt, test_lt = generate_random_data(n_data) feats_train = construct_features(train_xt) feats_test = construct_features(test_xt) max_len = len(train_xt[0]) kernel_wdk = WeightedDegreePositionStringKernel(size, 5) shifts_vector = numpy.ones(max_len, dtype=numpy.int32)*num_shifts kernel_wdk.set_shifts(shifts_vector) ######## # set up spectrum use_sign = False kernel_spec_1 = WeightedCommWordStringKernel(size, use_sign) kernel_spec_2 = WeightedCommWordStringKernel(size, use_sign) ######## # combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_wdk) kernel.append_kernel(kernel_spec_1) kernel.append_kernel(kernel_spec_2) # init kernel labels = BinaryLabels(train_lt); svm = SVMLight(1.0, kernel, labels) #svm.io.set_loglevel(MSG_DEBUG) svm.train(feats_train) ################################################## # serialize to file fn = "serialized_svm.bz2" #print("serializing SVM to file", fn) save(fn, svm) ################################################## # unserialize and sanity check #print("unserializing SVM") svm2 = load(fn) #print("comparing predictions") out = svm.apply(feats_test).get_labels() out2 = svm2.apply(feats_test).get_labels() # assert outputs are close for i in range(len(out)): assert abs(out[i] - out2[i] < 0.000001) #print("all checks passed.") return out,out2
def training_run(options): """Conduct a training run and return a trained SVM kernel""" settings = MotifFinderSettings(kirmes_ini.MOTIF_LENGTH, options.window_width, options.replace) positives = MotifFinder(finder_settings=settings) positives.setFastaFile(options.positives) positives.setMotifs(options.pgff) pmotifs, ppositions = positives.getResults() negatives = MotifFinder(finder_settings=settings) negatives.setFastaFile(options.negatives) negatives.setMotifs(options.ngff) nmotifs, npositions = negatives.getResults() wds_kparams = kirmes_ini.WDS_KERNEL_PARAMETERS wds_svm = EasySVM.EasySVM(wds_kparams) num_positives = len(pmotifs.values()[0]) num_negatives = len(nmotifs.values()[0]) #Creating Kernel Objects kernel = CombinedKernel() features = CombinedFeatures() kernel_array = [] motifs = pmotifs.keys() motifs.sort() #Adding Kmer Kernels for motif in motifs: all_examples = pmotifs[motif] + nmotifs[motif] motif_features = wds_svm.createFeatures(all_examples) wds_kernel = WeightedDegreePositionStringKernel(motif_features, motif_features, \ wds_kparams['degree']) wds_kernel.set_shifts(wds_kparams['shift'] * ones(wds_kparams['seqlength'], dtype=int32)) features.append_feature_obj(motif_features) kernel_array.append(wds_kernel) kernel.append_kernel(wds_kernel) rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS) positions = array(ppositions + npositions, dtype=float64).T position_features = rbf_svm.createFeatures(positions) features.append_feature_obj(position_features) motif_labels = append(ones(num_positives), -ones(num_negatives)) complete_labels = Labels(motif_labels) rbf_kernel = GaussianKernel(position_features, position_features, \ kirmes_ini.RBF_KERNEL_PARAMETERS['width']) kernel_array.append(rbf_kernel) kernel.append_kernel(rbf_kernel) #Kernel init kernel.init(features, features) kernel.set_cache_size(kirmes_ini.K_CACHE_SIZE) svm = LibSVM(kirmes_ini.K_COMBINED_C, kernel, complete_labels) svm.parallel.set_num_threads(kirmes_ini.K_NUM_THREADS) #Training svm.train() if not os.path.exists(options.output_path): os.mkdir(options.output_path) html = {} if options.contrib: html["contrib"] = contrib(svm, kernel, motif_labels, kernel_array, motifs) if options.logos: html["poims"] = poims(svm, kernel, kernel_array, motifs, options.output_path) if options.query: html["query"] = evaluate(options, svm, kernel, features, motifs) htmlize(html, options.output_html)