def get_weighted_spectrum_kernel(subfeats_list, options): """build weighted spectrum kernel with non-redundant k-mer list (removing reverse complement) Arguments: subfeats_list -- list of sub-feature objects options -- object containing option data Return: CombinedFeatures of StringWord(Ulong)Features, CombinedKernel of CommWord(Ulong)StringKernel """ kmerlen = options.kmerlen kmerlen2 = options.kmerlen2 subkernels = 0 kernel = CombinedKernel() feats = CombinedFeatures() for subfeats in subfeats_list: feats.append_feature_obj(subfeats) for k in xrange(kmerlen, kmerlen2+1): if k <= 8: subkernel = CommWordStringKernel(10, False) else: subkernel = CommUlongStringKernel(10, False) kernel.append_kernel(subkernel) subkernels+=1 kernel.init(feats, feats) kernel.set_subkernel_weights(numpy.array([1/float(subkernels)]*subkernels, numpy.dtype('float64'))) return kernel
def kernel_combined_modular(fm_train_real=traindat,fm_test_real=testdat,fm_train_dna=traindna,fm_test_dna=testdna ): from shogun.Kernel import CombinedKernel, GaussianKernel, FixedDegreeStringKernel, LocalAlignmentStringKernel from shogun.Features import RealFeatures, StringCharFeatures, CombinedFeatures, DNA kernel=CombinedKernel() feats_train=CombinedFeatures() feats_test=CombinedFeatures() subkfeats_train=RealFeatures(fm_train_real) subkfeats_test=RealFeatures(fm_test_real) subkernel=GaussianKernel(10, 1.1) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train=StringCharFeatures(fm_train_dna, DNA) subkfeats_test=StringCharFeatures(fm_test_dna, DNA) degree=3 subkernel=FixedDegreeStringKernel(10, degree) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train=StringCharFeatures(fm_train_dna, DNA) subkfeats_test=StringCharFeatures(fm_test_dna, DNA) subkernel=LocalAlignmentStringKernel(10) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def mkl_multiclass (): print 'mkl_multiclass' from shogun.Features import CombinedFeatures, RealFeatures, Labels from shogun.Kernel import CombinedKernel, GaussianKernel, LinearKernel,PolyKernel from shogun.Classifier import MKLMultiClass width=1.2 C=1.2 epsilon=1e-5 num_threads=1 kernel=CombinedKernel() feats_train=CombinedFeatures() feats_test=CombinedFeatures() subkfeats_train=RealFeatures(fm_train_real) subkfeats_test=RealFeatures(fm_test_real) subkernel=GaussianKernel(10, width) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train=RealFeatures(fm_train_real) subkfeats_test=RealFeatures(fm_test_real) subkernel=LinearKernel() feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train=RealFeatures(fm_train_real) subkfeats_test=RealFeatures(fm_test_real) subkernel=PolyKernel(10,2) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels=Labels(label_train_multiclass) mkl=MKLMultiClass(C, kernel, labels) mkl.set_epsilon(epsilon); mkl.parallel.set_num_threads(num_threads) mkl.set_mkl_epsilon(0.001) mkl.train() kernel.init(feats_train, feats_test) out= mkl.classify().get_labels() print out
def mkl_binclass_modular (train_data, testdata, train_labels, test_labels, d1, d2): # create some Gaussian train/test matrix tfeats = RealFeatures(train_data) tkernel = GaussianKernel(128, d1) tkernel.init(tfeats, tfeats) K_train = tkernel.get_kernel_matrix() pfeats = RealFeatures(test_data) tkernel.init(tfeats, pfeats) K_test = tkernel.get_kernel_matrix() # create combined train features feats_train = CombinedFeatures() feats_train.append_feature_obj(RealFeatures(train_data)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_train)) kernel.append_kernel(GaussianKernel(128, d2)) kernel.init(feats_train, feats_train) # train mkl labels = Labels(train_labels) mkl = MKLClassification() # not to use svmlight mkl.set_interleaved_optimization_enabled(0) # which norm to use for MKL mkl.set_mkl_norm(2) # set cost (neg, pos) mkl.set_C(1, 1) # set kernel and labels mkl.set_kernel(kernel) mkl.set_labels(labels) # train mkl.train() # test # create combined test features feats_pred = CombinedFeatures() feats_pred.append_feature_obj(RealFeatures(test_data)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_test)) kernel.append_kernel(GaussianKernel(128, d2)) kernel.init(feats_train, feats_pred) # and classify mkl.set_kernel(kernel) output = mkl.apply().get_labels() output = [1.0 if i>0 else -1.0 for i in output] accu = len(where(output == test_labels)[0]) / float(len(output)) return accu
def training_run(options): """Conduct a training run and return a trained SVM kernel""" settings = MotifFinderSettings(kirmes_ini.MOTIF_LENGTH, options.window_width, options.replace) positives = MotifFinder(finder_settings=settings) positives.setFastaFile(options.positives) positives.setMotifs(options.pgff) pmotifs, ppositions = positives.getResults() negatives = MotifFinder(finder_settings=settings) negatives.setFastaFile(options.negatives) negatives.setMotifs(options.ngff) nmotifs, npositions = negatives.getResults() wds_kparams = kirmes_ini.WDS_KERNEL_PARAMETERS wds_svm = EasySVM.EasySVM(wds_kparams) num_positives = len(pmotifs.values()[0]) num_negatives = len(nmotifs.values()[0]) # Creating Kernel Objects kernel = CombinedKernel() features = CombinedFeatures() kernel_array = [] motifs = pmotifs.keys() motifs.sort() # Adding Kmer Kernels for motif in motifs: all_examples = pmotifs[motif] + nmotifs[motif] motif_features = wds_svm.createFeatures(all_examples) wds_kernel = WeightedDegreePositionStringKernel(motif_features, motif_features, wds_kparams["degree"]) wds_kernel.set_shifts(wds_kparams["shift"] * ones(wds_kparams["seqlength"], dtype=int32)) features.append_feature_obj(motif_features) kernel_array.append(wds_kernel) kernel.append_kernel(wds_kernel) rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS) positions = array(ppositions + npositions, dtype=float64).T position_features = rbf_svm.createFeatures(positions) features.append_feature_obj(position_features) motif_labels = append(ones(num_positives), -ones(num_negatives)) complete_labels = Labels(motif_labels) rbf_kernel = GaussianKernel(position_features, position_features, kirmes_ini.RBF_KERNEL_PARAMETERS["width"]) kernel_array.append(rbf_kernel) kernel.append_kernel(rbf_kernel) # Kernel init kernel.init(features, features) kernel.set_cache_size(kirmes_ini.K_CACHE_SIZE) svm = LibSVM(kirmes_ini.K_COMBINED_C, kernel, complete_labels) svm.parallel.set_num_threads(kirmes_ini.K_NUM_THREADS) # Training svm.train() if not os.path.exists(options.output_path): os.mkdir(options.output_path) html = {} if options.contrib: html["contrib"] = contrib(svm, kernel, motif_labels, kernel_array, motifs) if options.logos: html["poims"] = poims(svm, kernel, kernel_array, motifs, options.output_path) if options.query: html["query"] = evaluate(options, svm, kernel, features, motifs) htmlize(html, options.output_html)
def train(self, data, labels): """ model training """ # centered WDK/WDK-shift if self.param["shifts"] == 0: kernel_center = WeightedDegreeStringKernel(self.param["degree"]) else: kernel_center = WeightedDegreePositionStringKernel( 10, self.param["degree"]) shifts_vector = numpy.ones( self.param["center_offset"] * 2, dtype=numpy.int32) * self.param["shifts"] kernel_center.set_shifts(shifts_vector) kernel_center.set_cache_size(self.param["kernel_cache"] / 3) # border spetrum kernels size = self.param["kernel_cache"] / 3 use_sign = False kernel_left = WeightedCommWordStringKernel(size, use_sign) kernel_right = WeightedCommWordStringKernel(size, use_sign) # assemble combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_center) kernel.append_kernel(kernel_left) kernel.append_kernel(kernel_right) ## building features feat = create_features(data, self.param["center_offset"], self.param["center_pos"]) # init combined kernel kernel.init(feat, feat) print "len(labels) = %i" % (len(labels)) lab = BinaryLabels(numpy.double(labels)) self.svm = SVMLight(self.param["cost"], kernel, lab) # show debugging output self.svm.io.enable_progress() self.svm.io.set_loglevel(MSG_DEBUG) # optimization settings num_threads = 2 self.svm.parallel.set_num_threads(num_threads) self.svm.set_epsilon(10e-8) self.svm.train() return self
def get_weighted_spectrum_kernel(subfeats_list, options): """build weighted spectrum kernel with non-redundant k-mer list (removing reverse complement) Arguments: subfeats_list -- list of sub-feature objects options -- object containing option data Return: CombinedFeatures of StringWord(Ulong)Features, CombinedKernel of CommWord(Ulong)StringKernel """ kmerlen = options.kmerlen kmerlen2 = options.kmerlen2 subkernels = 0 kernel = CombinedKernel() feats = CombinedFeatures() weights = [] i = 0 for subfeats in subfeats_list: feats.append_feature_obj(subfeats) combine_kcount = Counter() for i in xrange(subfeats.get_num_vectors()): fv = list(subfeats.get_feature_vector(i)) combine_kcount += Counter(fv) number = len(combine_kcount) klen = kmerlen + i for k in xrange(kmerlen, kmerlen2 + 1): if k <= 8: subkernel = CommWordStringKernel(10, False) else: subkernel = CommUlongStringKernel(10, False) kernel.append_kernel(subkernel) subkernels += 1 kernel.init(feats, feats) # here the weight for each k-mer is uniform ''' subkernels = 8 numpy.array([1 / float(subkernels)] * subkernels, numpy.dtype('float64')) array([ 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125]) ''' kernel.set_subkernel_weights( numpy.array([1 / float(subkernels)] * subkernels, numpy.dtype('float64'))) return kernel
def mkl_multiclass_modular(fm_train_real, fm_test_real, label_train_multiclass, width, C, epsilon, num_threads, mkl_epsilon, mkl_norm): from shogun.Features import CombinedFeatures, RealFeatures, MulticlassLabels from shogun.Kernel import CombinedKernel, GaussianKernel, LinearKernel, PolyKernel from shogun.Classifier import MKLMulticlass kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = GaussianKernel(10, width) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = LinearKernel() feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = PolyKernel(10, 2) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = MulticlassLabels(label_train_multiclass) mkl = MKLMulticlass(C, kernel, labels) mkl.set_epsilon(epsilon) mkl.parallel.set_num_threads(num_threads) mkl.set_mkl_epsilon(mkl_epsilon) mkl.set_mkl_norm(mkl_norm) mkl.train() kernel.init(feats_train, feats_test) out = mkl.apply().get_labels() return out
def mkl_multiclass_modular(fm_train_real, fm_test_real, label_train_multiclass, width, C, epsilon, num_threads, mkl_epsilon, mkl_norm): from shogun.Features import CombinedFeatures, RealFeatures, Labels from shogun.Kernel import CombinedKernel, GaussianKernel, LinearKernel, PolyKernel from shogun.Classifier import MKLMultiClass kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = GaussianKernel(10, width) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = LinearKernel() feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = PolyKernel(10, 2) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = Labels(label_train_multiclass) mkl = MKLMultiClass(C, kernel, labels) mkl.set_epsilon(epsilon) mkl.parallel.set_num_threads(num_threads) mkl.set_mkl_epsilon(mkl_epsilon) mkl.set_mkl_norm(mkl_norm) mkl.train() kernel.init(feats_train, feats_test) out = mkl.apply().get_labels() return out
def train(self, data, labels): """ model training """ # centered WDK/WDK-shift if self.param["shifts"] == 0: kernel_center = WeightedDegreeStringKernel(self.param["degree"]) else: kernel_center = WeightedDegreePositionStringKernel(10, self.param["degree"]) shifts_vector = numpy.ones(self.param["center_offset"]*2, dtype=numpy.int32)*self.param["shifts"] kernel_center.set_shifts(shifts_vector) kernel_center.set_cache_size(self.param["kernel_cache"]/3) # border spetrum kernels size = self.param["kernel_cache"]/3 use_sign = False kernel_left = WeightedCommWordStringKernel(size, use_sign) kernel_right = WeightedCommWordStringKernel(size, use_sign) # assemble combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_center) kernel.append_kernel(kernel_left) kernel.append_kernel(kernel_right) ## building features feat = create_features(data, self.param["center_offset"], self.param["center_pos"]) # init combined kernel kernel.init(feat, feat) print "len(labels) = %i" % (len(labels)) lab = BinaryLabels(numpy.double(labels)) self.svm = SVMLight(self.param["cost"], kernel, lab) # show debugging output self.svm.io.enable_progress() self.svm.io.set_loglevel(MSG_DEBUG) # optimization settings num_threads = 2 self.svm.parallel.set_num_threads(num_threads) self.svm.set_epsilon(10e-8) self.svm.train() return self
def kernel_combined_custom_poly_modular(fm_train_real=traindat, fm_test_real=testdat, fm_label_twoclass=label_traindat): from shogun.Features import CombinedFeatures, RealFeatures, Labels from shogun.Kernel import CombinedKernel, PolyKernel, CustomKernel from shogun.Classifier import LibSVM kernel = CombinedKernel() feats_train = CombinedFeatures() tfeats = RealFeatures(fm_train_real) tkernel = PolyKernel(10, 3) tkernel.init(tfeats, tfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_train = RealFeatures(fm_train_real) feats_train.append_feature_obj(subkfeats_train) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = Labels(fm_label_twoclass) svm = LibSVM(1.0, kernel, labels) svm.train() kernel = CombinedKernel() feats_pred = CombinedFeatures() pfeats = RealFeatures(fm_test_real) tkernel = PolyKernel(10, 3) tkernel.init(tfeats, pfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_test = RealFeatures(fm_test_real) feats_pred.append_feature_obj(subkfeats_test) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_pred) svm.set_kernel(kernel) svm.classify() km_train = kernel.get_kernel_matrix() return km_train, kernel
def mkl_binclass_modular(fm_train_real=traindat, fm_test_real=testdat, fm_label_twoclass=label_traindat): sc1 = StringCharFeatures(strings1, RAWBYTE) sc2 = StringCharFeatures(strings2, RAWBYTE) sfeats1 = StringWordFeatures(RAWBYTE) sfeats1.obtain_from_char(sc1, 0, 2, 0, False) skernel1 = CommWordStringKernel(10, False) skernel1.init(sfeats1, sfeats1) sfeats2 = StringWordFeatures(RAWBYTE) sfeats2.obtain_from_char(sc2, 0, 2, 0, False) skernel2 = CommWordStringKernel(10, False) skernel2.init(sfeats2, sfeats2) ffeats = RealFeatures(traindat) fkernel = LinearKernel(ffeats, ffeats) fffeats = RealFeatures(traindat) ffkernel = GaussianKernel(fffeats, fffeats, 1.0) # COMBINING LINADD FEATURES/KERNELS LEAD TO FAIL feats_train = CombinedFeatures() feats_train.append_feature_obj(ffeats) #feats_train.append_feature_obj(fffeats) feats_train.append_feature_obj(sfeats2) print feats_train.get_num_vectors() kernel = CombinedKernel() kernel.append_kernel(fkernel) #kernel.append_kernel(ffkernel) kernel.append_kernel(skernel2) kernel.init(feats_train, feats_train) labels = RegressionLabels(fm_label_twoclass) mkl = MKLRegression() mkl.set_mkl_norm(1) #2,3 mkl.set_C(1, 1) mkl.set_kernel(kernel) mkl.set_labels(labels) mkl.io.enable_file_and_line() mkl.io.set_loglevel(MSG_DEBUG)
def kernel_combined_custom_poly_modular(fm_train_real = traindat,fm_test_real = testdat,fm_label_twoclass=label_traindat): from shogun.Features import CombinedFeatures, RealFeatures, BinaryLabels from shogun.Kernel import CombinedKernel, PolyKernel, CustomKernel from shogun.Classifier import LibSVM kernel = CombinedKernel() feats_train = CombinedFeatures() tfeats = RealFeatures(fm_train_real) tkernel = PolyKernel(10,3) tkernel.init(tfeats, tfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_train = RealFeatures(fm_train_real) feats_train.append_feature_obj(subkfeats_train) subkernel = PolyKernel(10,2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = BinaryLabels(fm_label_twoclass) svm = LibSVM(1.0, kernel, labels) svm.train() kernel = CombinedKernel() feats_pred = CombinedFeatures() pfeats = RealFeatures(fm_test_real) tkernel = PolyKernel(10,3) tkernel.init(tfeats, pfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_test = RealFeatures(fm_test_real) feats_pred.append_feature_obj(subkfeats_test) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_pred) svm.set_kernel(kernel) svm.apply() km_train=kernel.get_kernel_matrix() return km_train,kernel
def create_combined_wd_kernel(instances, param): """ creates a combined wd kernel object """ num_features = len(instances[0]) # contruct combined features kernel = CombinedKernel() for idx in range(num_features): param.kernel = "WeightedDegreeStringKernel" tmp_kernel = create_empty_kernel(param) kernel.append_kernel(tmp_kernel) combined_features = create_combined_wd_features(instances, feat_type="dna") kernel.init(combined_features, combined_features) return kernel
def kernel_combined_modular(fm_train_real=traindat, fm_test_real=testdat, fm_train_dna=traindna, fm_test_dna=testdna): from shogun.Kernel import CombinedKernel, GaussianKernel, FixedDegreeStringKernel, LocalAlignmentStringKernel from shogun.Features import RealFeatures, StringCharFeatures, CombinedFeatures, DNA kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = GaussianKernel(10, 1.1) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train = StringCharFeatures(fm_train_dna, DNA) subkfeats_test = StringCharFeatures(fm_test_dna, DNA) degree = 3 subkernel = FixedDegreeStringKernel(10, degree) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train = StringCharFeatures(fm_train_dna, DNA) subkfeats_test = StringCharFeatures(fm_test_dna, DNA) subkernel = LocalAlignmentStringKernel(10) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def get_weighted_spectrum_kernel(subfeats_list, options): """build weighted spectrum kernel with non-redundant k-mer list (removing reverse complement) Arguments: subfeats_list -- list of sub-feature objects options -- object containing option data Return: CombinedFeatures of StringWord(Ulong)Features, CombinedKernel of CommWord(Ulong)StringKernel """ kmerlen = options.kmerlen kmerlen2 = options.kmerlen2 subkernels = 0 kernel = CombinedKernel() feats = CombinedFeatures() for subfeats in subfeats_list: feats.append_feature_obj(subfeats) for k in xrange(kmerlen, kmerlen2 + 1): if k <= 8: subkernel = CommWordStringKernel(10, False) else: subkernel = CommUlongStringKernel(10, False) kernel.append_kernel(subkernel) subkernels += 1 kernel.init(feats, feats) kernel.set_subkernel_weights( numpy.array([1 / float(subkernels)] * subkernels, numpy.dtype('float64'))) return kernel
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ # dict to save additional information for later analysis self.additional_information = {} # merge data sets data = PreparedMultitaskData(train_data, shuffle=True) # create shogun label lab = shogun_factory.create_labels(data.labels) ######################################################## print "creating a kernel for each node:" ######################################################## # assemble combined kernel combined_kernel = CombinedKernel() combined_kernel.io.set_loglevel(shogun.Kernel.MSG_INFO) base_features = shogun_factory.create_features(data.examples, param) combined_features = CombinedFeatures() ################################################## # intra-domain blocks (dirac kernel) intra_block_vec = PairiiVec() for task_id in data.get_task_ids(): intra_block_vec.push_back(Pairii(task_id, task_id)) # create mask-based normalizer normalizer = MultitaskKernelMaskPairNormalizer(data.task_vector_nums, intra_block_vec) kernel = shogun_factory.create_empty_kernel(param) kernel.set_normalizer(normalizer) # append current kernel to CombinedKernel combined_kernel.append_kernel(kernel) # append features combined_features.append_feature_obj(base_features) print "------" ################################################## # all blocks (full kernel matrix) all_block_vec = PairiiVec() for task_id_1 in data.get_task_ids(): for task_id_2 in data.get_task_ids(): all_block_vec.push_back(Pairii(task_id_1, task_id_2)) # create mask-based normalizer normalizer_all = MultitaskKernelMaskPairNormalizer(data.task_vector_nums, all_block_vec) kernel_all = shogun_factory.create_empty_kernel(param) kernel_all.set_normalizer(normalizer_all) # append current kernel to CombinedKernel combined_kernel.append_kernel(kernel_all) # append features combined_features.append_feature_obj(base_features) ################################################## # hack # hack_block_vec = PairiiVec() # # for task_id_1 in data.get_task_ids(): # for task_id_2 in data.get_task_ids(): # hack_block_vec.push_back(Pairii(task_id_1, task_id_2)) # # hack_block_vec.push_back(Pairii(data.name_to_id("B_2705"), data.name_to_id("B_4001"))) # other_group = ["B_0702", "B_1501", "B_5801"] # for task_id_1 in other_group: # for task_id_2 in other_group: # hack_block_vec.push_back(Pairii(data.name_to_id(task_id_1), data.name_to_id(task_id_2))) # # # # # create mask-based normalizer # normalizer_hack = MultitaskKernelMaskPairNormalizer(data.task_vector_nums, hack_block_vec) # kernel_hack = shogun_factory.create_empty_kernel(param) # kernel_hack.set_normalizer(normalizer_hack) # # # append current kernel to CombinedKernel # combined_kernel.append_kernel(kernel_hack) # # # append features # combined_features.append_feature_obj(base_features) ################################################## # init combined kernel combined_kernel.init(combined_features, combined_features) #combined_kernel.precompute_subkernels() self.additional_information["mkl weights before"] = combined_kernel.get_subkernel_weights() print "subkernel weights:", combined_kernel.get_subkernel_weights() svm = None print "using MKL:", (param.flags["mkl_q"] >= 1.0) if param.flags["mkl_q"] >= 1.0: svm = MKLClassification() svm.set_mkl_norm(param.flags["mkl_q"]) svm.set_kernel(combined_kernel) svm.set_labels(lab) else: # create SVM (disable unsupported optimizations) combined_kernel.set_cache_size(500) svm = SVMLight(param.cost, combined_kernel, lab) num_threads = 8 svm.io.enable_progress() svm.io.set_loglevel(shogun.Classifier.MSG_INFO) svm.parallel.set_num_threads(num_threads) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) svm.set_epsilon(0.03) # set cost if param.flags["normalize_cost"]: norm_c_pos = param.cost / float(len([l for l in data.labels if l==1])) norm_c_neg = param.cost / float(len([l for l in data.labels if l==-1])) svm.set_C(norm_c_neg, norm_c_pos) else: svm.set_C(param.cost, param.cost) svm.train() print "subkernel weights (after):", combined_kernel.get_subkernel_weights() ######################################################## print "svm objective:" print svm.get_objective() self.additional_information["svm_objective"] = svm.get_objective() self.additional_information["svm num sv"] = svm.get_num_support_vectors() self.additional_information["mkl weights post-training"] = combined_kernel.get_subkernel_weights() ######################################################## # wrap up predictors svms = {} # use a reference to the same svm several times for task_name in train_data.keys(): svms[task_name] = (data.name_to_id(task_name), combined_kernel, svm, param) return svms
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ # merge data sets data = PreparedMultitaskData(train_data, shuffle=False) # create shogun data objects base_wdk = shogun_factory.create_empty_kernel(param) lab = shogun_factory.create_labels(data.labels) combined_kernel = CombinedKernel() combined_kernel.io.set_loglevel(shogun.Kernel.MSG_INFO) base_features = shogun_factory.create_features(data.examples) combined_features = CombinedFeatures() # set normalizer normalizer = MultitaskKernelNormalizer(data.task_vector_nums) # load data #f = file("/fml/ag-raetsch/home/cwidmer/Documents/phd/projects/multitask/data/mhc/MHC_Distanzen/MHC_Distanzen/ALL_PseudoSeq_BlosumEnc_pearson.txt") f = file( "/fml/ag-raetsch/home/cwidmer/Documents/phd/projects/multitask/data/mhc/MHC_Distanzen/MHC_Distanzen/All_PseudoSeq_Hamming.txt" ) #f = file("/fml/ag-raetsch/home/cwidmer/Documents/phd/projects/multitask/data/mhc/MHC_Distanzen/MHC_Distanzen/ALL_PseudoSeq_BlosumEnc_euklid.txt") #f = file("/fml/ag-raetsch/home/cwidmer/Documents/phd/projects/multitask/data/mhc/MHC_Distanzen/MHC_Distanzen/ALL_RAxML.txt") num_lines = int(f.readline().strip()) task_distances = numpy.zeros((num_lines, num_lines)) name_to_id = {} for (i, line) in enumerate(f): tokens = line.strip().split("\t") name = str(tokens[0]) name_to_id[name] = i entry = numpy.array([v for (j, v) in enumerate(tokens) if j != 0]) assert len(entry) == num_lines, "len_entry %i, num_lines %i" % ( len(entry), num_lines) task_distances[i, :] = entry # cut relevant submatrix active_ids = [name_to_id[name] for name in data.get_task_names()] tmp_distances = task_distances[active_ids, :] tmp_distances = tmp_distances[:, active_ids] print "distances ", tmp_distances.shape # normalize distances task_distances = task_distances / numpy.max(tmp_distances) similarities = numpy.zeros( (data.get_num_tasks(), data.get_num_tasks())) # convert distance to similarity for task_name_lhs in data.get_task_names(): for task_name_rhs in data.get_task_names(): # convert similarity with simple transformation similarity = param.base_similarity - task_distances[ name_to_id[task_name_lhs], name_to_id[task_name_rhs]] normalizer.set_task_similarity(data.name_to_id(task_name_lhs), data.name_to_id(task_name_rhs), similarity) # save for later similarities[data.name_to_id(task_name_lhs), data.name_to_id(task_name_rhs)] = similarity # set normalizer base_wdk.set_normalizer(normalizer) #base_wdk.init_normalizer() combined_features.append_feature_obj(base_features) combined_kernel.append_kernel(base_wdk) ################################################## # intra-domain blocks intra_block_vec = PairiiVec() for task_id in data.get_task_ids(): intra_block_vec.push_back(Pairii(task_id, task_id)) # create mask-based normalizer normalizer = MultitaskKernelMaskPairNormalizer(data.task_vector_nums, intra_block_vec) kernel = shogun_factory.create_empty_kernel(param) kernel.set_normalizer(normalizer) # append current kernel to CombinedKernel combined_kernel.append_kernel(kernel) # append features combined_features.append_feature_obj(base_features) # set mixing factor (used if MKL is OFF) assert (param.base_similarity <= 1) assert (param.base_similarity >= 0) combined_kernel.set_subkernel_weights( [param.base_similarity, 1 - param.base_similarity]) combined_kernel.init(combined_features, combined_features) svm = None print "using MKL:", (param.transform >= 1.0) if param.transform >= 1.0: svm = MKLClassification() svm.set_mkl_norm(param.transform) #svm.set_solver_type(ST_CPLEX) #ST_GLPK) #DIRECT) #NEWTON)#ST_CPLEX) #auto svm.set_C(param.cost, param.cost) svm.set_kernel(combined_kernel) svm.set_labels(lab) else: # create SVM (disable unsupported optimizations) combined_kernel.set_cache_size(500) svm = SVMLight(param.cost, combined_kernel, lab) # set up SVM num_threads = 8 svm.io.enable_progress() svm.io.set_loglevel(shogun.Classifier.MSG_DEBUG) svm.parallel.set_num_threads(num_threads) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) print "WARNING: custom epsilon set" svm.set_epsilon(0.05) # normalize cost norm_c_pos = param.cost / float(len([l for l in data.labels if l == 1])) norm_c_neg = param.cost / float( len([l for l in data.labels if l == -1])) svm.set_C(norm_c_neg, norm_c_pos) # start training svm.train() # save additional information self.additional_information["svm objective"] = svm.get_objective() self.additional_information["num sv"] = svm.get_num_support_vectors() self.additional_information["similarities"] = similarities self.additional_information[ "post_weights"] = combined_kernel.get_subkernel_weights() # wrap up predictors svms = {} # use a reference to the same svm several times for task_name in data.get_task_names(): task_num = data.name_to_id(task_name) # save svm and task_num svms[task_name] = (task_num, combined_kernel, svm) return svms
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ #numpy.random.seed(1337) numpy.random.seed(666) # merge data sets data = PreparedMultitaskData(train_data, shuffle=True) # create shogun label lab = shogun_factory.create_labels(data.labels) # assemble combined kernel combined_kernel = CombinedKernel() combined_kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG) # set kernel cache if param.flags.has_key("cache_size"): combined_kernel.set_cache_size(param.flags["cache_size"]) # create features base_features = shogun_factory.create_features(data.examples) combined_features = CombinedFeatures() ######################################################## print "creating a masked kernel for each node:" ######################################################## # fetch taxonomy from parameter object taxonomy = param.taxonomy.data # create name to leaf map nodes = taxonomy.get_all_nodes() for node in nodes: print "creating kernel for ", node.name # fetch sub-tree active_task_ids = [data.name_to_id(leaf.name) for leaf in node.get_leaves()] print "masking all entries other than:", active_task_ids # create mask-based normalizer normalizer = MultitaskKernelMaskNormalizer(data.task_vector_nums, data.task_vector_nums, active_task_ids) # normalize trace if param.flags.has_key("normalize_trace") and param.flags["normalize_trace"]: norm_factor = len(node.get_leaves()) / len(active_task_ids) normalizer.set_normalization_constant(norm_factor) # create kernel kernel = shogun_factory.create_empty_kernel(param) kernel.set_normalizer(normalizer) # append current kernel to CombinedKernel combined_kernel.append_kernel(kernel) # append features combined_features.append_feature_obj(base_features) print "------" combined_kernel.init(combined_features, combined_features) #combined_kernel.precompute_subkernels() print "subkernel weights:", combined_kernel.get_subkernel_weights() svm = None print "using MKL:", (param.flags["mkl_q"] >= 1.0) if param.flags["mkl_q"] >= 1.0: # set up MKL svm = MKLClassification() # set the "q" in q-norm MKL svm.set_mkl_norm(param.flags["mkl_q"]) # set interleaved optimization if param.flags.has_key("interleaved"): svm.set_interleaved_optimization_enabled(param.flags["interleaved"]) # set solver type if param.flags.has_key("solver_type") and param.flags["solver_type"]: if param.flags["solver_type"] == "ST_CPLEX": svm.set_solver_type(ST_CPLEX) if param.flags["solver_type"] == "ST_DIRECT": svm.set_solver_type(ST_DIRECT) if param.flags["solver_type"] == "ST_NEWTON": svm.set_solver_type(ST_NEWTON) if param.flags["solver_type"] == "ST_GLPK": svm.set_solver_type(ST_GLPK) svm.set_kernel(combined_kernel) svm.set_labels(lab) else: # create vanilla SVM svm = SVMLight(param.cost, combined_kernel, lab) # optimization settings num_threads = 4 svm.parallel.set_num_threads(num_threads) if param.flags.has_key("epsilon"): svm.set_epsilon(param.flags["epsilon"]) # enable output svm.io.enable_progress() svm.io.set_loglevel(shogun.Classifier.MSG_DEBUG) # disable unsupported optimizations (due to special normalizer) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) # set cost if param.flags["normalize_cost"]: norm_c_pos = param.cost / float(len([l for l in data.labels if l==1])) norm_c_neg = param.cost / float(len([l for l in data.labels if l==-1])) svm.set_C(norm_c_neg, norm_c_pos) else: svm.set_C(param.cost, param.cost) # start training svm.train() ######################################################## print "svm objective:" print svm.get_objective() ######################################################## # store additional info self.additional_information["svm objective"] = svm.get_objective() self.additional_information["weights"] = combined_kernel.get_subkernel_weights() # wrap up predictors svms = {} # use a reference to the same svm several times for task_name in train_data.keys(): svms[task_name] = (data.name_to_id(task_name), len(nodes), combined_kernel, svm) return svms
def mkl_binclass_modular(fm_train_real=traindat, fm_test_real=testdat, fm_label_twoclass=label_traindat): ################################## # set up and train # create some poly train/test matrix tfeats = RealFeatures(fm_train_real) tkernel = PolyKernel(10, 3) tkernel.init(tfeats, tfeats) K_train = tkernel.get_kernel_matrix() pfeats = RealFeatures(fm_test_real) tkernel.init(tfeats, pfeats) K_test = tkernel.get_kernel_matrix() # create combined train features feats_train = CombinedFeatures() feats_train.append_feature_obj(RealFeatures(fm_train_real)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_train)) kernel.append_kernel(PolyKernel(10, 2)) kernel.init(feats_train, feats_train) # train mkl labels = BinaryLabels(fm_label_twoclass) mkl = MKLClassification() # which norm to use for MKL mkl.set_mkl_norm(1) # 2,3 # set cost (neg, pos) mkl.set_C(1, 1) # set kernel and labels mkl.set_kernel(kernel) mkl.set_labels(labels) # train mkl.train() # w=kernel.get_subkernel_weights() # kernel.set_subkernel_weights(w) ################################## # test # create combined test features feats_pred = CombinedFeatures() feats_pred.append_feature_obj(RealFeatures(fm_test_real)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_test)) kernel.append_kernel(PolyKernel(10, 2)) kernel.init(feats_train, feats_pred) # and classify mkl.set_kernel(kernel) mkl.apply() return mkl.apply(), kernel
class SignalSensor(object): """ A collection of sensors """ def __init__(self): self.sensors = list() self.kernel = CombinedKernel() self.svs = CombinedFeatures() self.svm = None self.window = (+100000, -1000000) def from_file(self, file): sys.stderr.write('loading model file') l = file.readline() if l != '%arts version: 1.0\n': sys.stderr.write("\nfile not an arts definition file\n") return None bias = None alphas = None num_kernels = None while l: # skip comment or empty line if not (l.startswith('%') or l.startswith('\n')): if bias is None: bias = parse_float(l, 'b') if alphas is None: alphas = parse_vector(l, file, 'alphas') if num_kernels is None: num_kernels = parse_int(l, 'num_kernels') if num_kernels and bias and alphas is not None: for i in xrange(num_kernels): s = Sensor() (k, f) = s.from_file(file, i + 1) k.io.enable_progress() self.window = (min(self.window[0], s.window[0]), max(self.window[1], s.window[2])) self.sensors.append(s) self.kernel.append_kernel(k) self.svs.append_feature_obj(f) self.kernel.init(self.svs, self.svs) self.svm = KernelMachine( self.kernel, alphas, numpy.arange(len(alphas), dtype=numpy.int32), bias) self.svm.io.set_target_to_stderr() self.svm.io.enable_progress() self.svm.parallel.set_num_threads( self.svm.parallel.get_num_cpus()) sys.stderr.write('done\n') return l = file.readline() sys.stderr.write('error loading model file\n') def predict(self, seq, chunk_size=int(10e6)): """ predicts on whole contig, splits up sequence in chunks of size chunk_size """ seq_len = len(seq) num_chunks = int(numpy.ceil(float(seq_len) / float(chunk_size))) assert (num_chunks > 0) sys.stderr.write("number of chunks for contig: %i\n" % (num_chunks)) start = 0 stop = min(chunk_size, seq_len) out = [] # iterate over chunks for chunk_idx in range(num_chunks): sys.stderr.write("processing chunk #%i\n" % (chunk_idx)) assert (start < stop) chunk = seq[start:stop] assert (len(self.sensors) > 0) tf = CombinedFeatures() for i in xrange(len(self.sensors)): f = self.sensors[i].get_test_features(chunk, self.window) tf.append_feature_obj(f) sys.stderr.write("initialising kernel...") self.kernel.init(self.svs, tf) sys.stderr.write("..done\n") self.svm.set_kernel(self.kernel) lab_out = self.svm.apply().get_values() assert (len(lab_out) > 0) out.extend(lab_out) # increment chunk start = stop stop = min(stop + chunk_size, seq_len) l = (-self.window[0]) * [-42] r = self.window[1] * [-42] # concatenate ret = l + out + r assert (len(ret) == len(seq)) return ret
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ # merge data sets data = PreparedMultitaskData(train_data, shuffle=True) # create shogun label lab = shogun_factory.create_labels(data.labels) ######################################################## print "creating a kernel for each node:" ######################################################## # assemble combined kernel combined_kernel = CombinedKernel() combined_kernel.io.set_loglevel(shogun.Kernel.MSG_INFO) base_features = shogun_factory.create_features(data.examples) combined_features = CombinedFeatures() ################################################## # intra-domain blocks # intra_block_vec = PairiiVec() # # for task_id in data.get_task_ids(): # intra_block_vec.push_back(Pairii(task_id, task_id)) # # # # # create mask-based normalizer # normalizer = MultitaskKernelMaskPairNormalizer(data.task_vector_nums, intra_block_vec) # kernel = shogun_factory.create_empty_kernel(param) # kernel.set_normalizer(normalizer) # # # append current kernel to CombinedKernel # combined_kernel.append_kernel(kernel) # # # append features # combined_features.append_feature_obj(base_features) # # print "------" # # ################################################## # # all blocks # # # all_block_vec = PairiiVec() # # for task_id_1 in data.get_task_ids(): # for task_id_2 in data.get_task_ids(): # all_block_vec.push_back(Pairii(task_id_1, task_id_2)) # # # # create mask-based normalizer # normalizer_all = MultitaskKernelMaskPairNormalizer(data.task_vector_nums, all_block_vec) # kernel_all = shogun_factory.create_empty_kernel(param) # kernel_all.set_normalizer(normalizer_all) # # # append current kernel to CombinedKernel # combined_kernel.append_kernel(kernel_all) # # # append features # combined_features.append_feature_obj(base_features) ################################################## # add one kernel per similarity position # init seq handler pseudoseqs = SequencesHandler() pseudoseq_length = pseudoseqs.seq_length for pos in range(pseudoseq_length): print "appending kernel for pos %i" % (pos) print "nums", data.task_vector_nums pos_block_vec = PairiiVec() # set similarity for task_name_lhs in data.get_task_names(): for task_name_rhs in data.get_task_names(): similarity = pseudoseqs.get_similarity(task_name_lhs, task_name_rhs, pos) #print "computing similarity for tasks (%s, %s) = %i" % (task_name_lhs, task_name_rhs, similarity) if similarity == 1: tmp_pair = Pairii(data.name_to_id(task_name_lhs), data.name_to_id(task_name_rhs)) pos_block_vec.push_back(tmp_pair) print "creating normalizer" normalizer_pos = MultitaskKernelMaskPairNormalizer(data.task_vector_nums, pos_block_vec) print "creating empty kernel" kernel_pos = shogun_factory.create_empty_kernel(param) print "setting normalizer" kernel_pos.set_normalizer(normalizer_pos) print "appending kernel" # append current kernel to CombinedKernel combined_kernel.append_kernel(kernel_pos) print "appending features" # append features combined_features.append_feature_obj(base_features) print "done constructing combined kernel" ################################################## # init combined kernel combined_kernel.init(combined_features, combined_features) print "subkernel weights:", combined_kernel.get_subkernel_weights() svm = None print "using MKL:", (param.transform >= 1.0) if param.transform >= 1.0: svm = MKLClassification() svm.set_mkl_norm(param.transform) #svm.set_solver_type(ST_CPLEX) #ST_GLPK) #DIRECT) #NEWTON)#ST_CPLEX) #auto svm.set_C(param.cost, param.cost) svm.set_kernel(combined_kernel) svm.set_labels(lab) else: # create SVM (disable unsupported optimizations) combined_kernel.set_cache_size(500) svm = SVMLight(param.cost, combined_kernel, lab) # set up SVM num_threads = 8 svm.io.enable_progress() #svm.io.set_loglevel(shogun.Classifier.MSG_INFO) svm.io.set_loglevel(shogun.Classifier.MSG_DEBUG) svm.parallel.set_num_threads(num_threads) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) print "WARNING: custom epsilon set" svm.set_epsilon(0.05) # normalize cost norm_c_pos = param.cost / float(len([l for l in data.labels if l==1])) norm_c_neg = param.cost / float(len([l for l in data.labels if l==-1])) svm.set_C(norm_c_neg, norm_c_pos) # start training svm.train() # save additional info self.additional_information["svm_objective"] = svm.get_objective() self.additional_information["svm num sv"] = svm.get_num_support_vectors() self.additional_information["mkl weights post-training"] = combined_kernel.get_subkernel_weights() print self.additional_information # wrap up predictors svms = {} # use a reference to the same svm several times for task_name in train_data.keys(): svms[task_name] = (data.name_to_id(task_name), combined_kernel, svm) return svms
class SignalSensor(object): """ A collection of sensors """ def __init__(self): self.sensors = list() self.kernel = CombinedKernel() self.svs = CombinedFeatures() self.svm = None self.window = (+100000, -1000000) def from_file(self, file): sys.stderr.write('loading model file') l = file.readline(); if l != '%arts version: 1.0\n': sys.stderr.write("\nfile not an arts definition file\n") return None bias = None alphas = None num_kernels = None while l: # skip comment or empty line if not (l.startswith('%') or l.startswith('\n')): if bias is None: bias = parse_float(l, 'b') if alphas is None: alphas = parse_vector(l, file, 'alphas') if num_kernels is None: num_kernels = parse_int(l, 'num_kernels') if num_kernels and bias and alphas is not None: for i in xrange(num_kernels): s = Sensor() (k, f) = s.from_file(file, i + 1) k.io.enable_progress() self.window = (min(self.window[0], s.window[0]), max(self.window[1], s.window[2])) self.sensors.append(s) self.kernel.append_kernel(k) self.svs.append_feature_obj(f) self.kernel.init(self.svs, self.svs) self.svm = SVM(self.kernel, alphas, numpy.arange(len(alphas), dtype=numpy.int32), bias) self.svm.io.set_target_to_stderr() self.svm.io.enable_progress() self.svm.parallel.set_num_threads(self.svm.parallel.get_num_cpus()) sys.stderr.write('done\n') return l = file.readline() sys.stderr.write('error loading model file\n') def predict(self, seq, chunk_size = int(10e6)): """ predicts on whole contig, splits up sequence in chunks of size chunk_size """ seq_len = len(seq) num_chunks = int(numpy.ceil(float(seq_len) / float(chunk_size))) assert(num_chunks > 0) sys.stderr.write("number of chunks for contig: %i\n" % (num_chunks)) start = 0 stop = min(chunk_size, seq_len) out = [] # iterate over chunks for chunk_idx in range(num_chunks): sys.stderr.write("processing chunk #%i\n" % (chunk_idx)) assert (start < stop) chunk = seq[start:stop] assert(len(self.sensors) > 0) tf = CombinedFeatures() for i in xrange(len(self.sensors)): f = self.sensors[i].get_test_features(chunk, self.window) tf.append_feature_obj(f) sys.stderr.write("initialising kernel...") self.kernel.init(self.svs, tf) sys.stderr.write("..done\n") lab_out = self.svm.apply() # work around problem with get_labels() tmp_out = [lab_out.get_label(idx) for idx in range(0, lab_out.get_num_labels())] assert(len(tmp_out) > 0) out.extend(tmp_out) print "len out", len(out) # increment chunk start = stop stop = min(stop+chunk_size, seq_len) l = (-self.window[0]) * [-42] r = self.window[1] * [-42] # concatenate ret = l + out + r assert(len(ret) == len(seq)) return ret
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ import numpy numpy.random.seed(666) # merge data sets data = PreparedMultitaskData(train_data, shuffle=True) # create shogun label lab = shogun_factory.create_labels(data.labels) # assemble combined kernel combined_kernel = CombinedKernel() combined_kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG) # set kernel cache if param.flags.has_key("cache_size"): combined_kernel.set_cache_size(param.flags["cache_size"]) # create features base_features = shogun_factory.create_features(data.examples, param) combined_features = CombinedFeatures() ######################################################## print "creating a masked kernel for possible subset:" ######################################################## power_set_tasks = power_set(data.get_task_ids()) for active_task_ids in power_set_tasks: print "masking all entries other than:", active_task_ids # create mask-based normalizer normalizer = MultitaskKernelMaskNormalizer(data.task_vector_nums, data.task_vector_nums, active_task_ids) # normalize trace if param.flags.has_key( "normalize_trace") and param.flags["normalize_trace"]: norm_factor = len(data.get_task_ids()) / len(active_task_ids) normalizer.set_normalization_constant(norm_factor) kernel = shogun_factory.create_empty_kernel(param) kernel.set_normalizer(normalizer) # append current kernel to CombinedKernel combined_kernel.append_kernel(kernel) # append features combined_features.append_feature_obj(base_features) print "------" combined_kernel.init(combined_features, combined_features) #combined_kernel.precompute_subkernels() self.additional_information[ "weights before trainng"] = combined_kernel.get_subkernel_weights( ) print "subkernel weights:", combined_kernel.get_subkernel_weights() svm = None print "using MKL:", (param.flags["mkl_q"] >= 1.0) if param.flags["mkl_q"] >= 1.0: svm = MKLClassification() svm.set_mkl_norm(param.flags["mkl_q"]) # set interleaved optimization if param.flags.has_key("interleaved"): svm.set_interleaved_optimization_enabled( param.flags["interleaved"]) # set solver type if param.flags.has_key( "solver_type") and param.flags["solver_type"]: if param.flags["solver_type"] == "ST_CPLEX": svm.set_solver_type(ST_CPLEX) if param.flags["solver_type"] == "ST_DIRECT": svm.set_solver_type(ST_DIRECT) if param.flags["solver_type"] == "ST_NEWTON": svm.set_solver_type(ST_NEWTON) if param.flags["solver_type"] == "ST_GLPK": svm.set_solver_type(ST_GLPK) svm.set_kernel(combined_kernel) svm.set_labels(lab) else: svm = SVMLight(param.cost, combined_kernel, lab) # optimization settings num_threads = 4 svm.parallel.set_num_threads(num_threads) if param.flags.has_key("epsilon"): svm.set_epsilon(param.flags["epsilon"]) # enable output svm.io.enable_progress() svm.io.set_loglevel(shogun.Classifier.MSG_DEBUG) # disable unsupported optimizations (due to special normalizer) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) # set cost if param.flags["normalize_cost"]: norm_c_pos = param.cost / float( len([l for l in data.labels if l == 1])) norm_c_neg = param.cost / float( len([l for l in data.labels if l == -1])) svm.set_C(norm_c_neg, norm_c_pos) else: svm.set_C(param.cost, param.cost) svm.train() # prepare mapping weight_map = {} weights = combined_kernel.get_subkernel_weights() for (i, pset) in enumerate(power_set_tasks): print pset subset_str = str([data.id_to_name(task_idx) for task_idx in pset]) weight_map[subset_str] = weights[i] # store additional info self.additional_information["svm objective"] = svm.get_objective() self.additional_information["weight_map"] = weight_map # wrap up predictors svms = {} # use a reference to the same svm several times for task_name in train_data.keys(): svms[task_name] = (data.name_to_id(task_name), len(power_set_tasks), combined_kernel, svm, param) return svms
def training_run(options): """Conduct a training run and return a trained SVM kernel""" settings = MotifFinderSettings(kirmes_ini.MOTIF_LENGTH, options.window_width, options.replace) positives = MotifFinder(finder_settings=settings) positives.setFastaFile(options.positives) positives.setMotifs(options.pgff) pmotifs, ppositions = positives.getResults() negatives = MotifFinder(finder_settings=settings) negatives.setFastaFile(options.negatives) negatives.setMotifs(options.ngff) nmotifs, npositions = negatives.getResults() wds_kparams = kirmes_ini.WDS_KERNEL_PARAMETERS wds_svm = EasySVM.EasySVM(wds_kparams) num_positives = len(pmotifs.values()[0]) num_negatives = len(nmotifs.values()[0]) #Creating Kernel Objects kernel = CombinedKernel() features = CombinedFeatures() kernel_array = [] motifs = pmotifs.keys() motifs.sort() #Adding Kmer Kernels for motif in motifs: all_examples = pmotifs[motif] + nmotifs[motif] motif_features = wds_svm.createFeatures(all_examples) wds_kernel = WeightedDegreePositionStringKernel(motif_features, motif_features, \ wds_kparams['degree']) wds_kernel.set_shifts(wds_kparams['shift'] * ones(wds_kparams['seqlength'], dtype=int32)) features.append_feature_obj(motif_features) kernel_array.append(wds_kernel) kernel.append_kernel(wds_kernel) rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS) positions = array(ppositions + npositions, dtype=float64).T position_features = rbf_svm.createFeatures(positions) features.append_feature_obj(position_features) motif_labels = append(ones(num_positives), -ones(num_negatives)) complete_labels = Labels(motif_labels) rbf_kernel = GaussianKernel(position_features, position_features, \ kirmes_ini.RBF_KERNEL_PARAMETERS['width']) kernel_array.append(rbf_kernel) kernel.append_kernel(rbf_kernel) #Kernel init kernel.init(features, features) kernel.set_cache_size(kirmes_ini.K_CACHE_SIZE) svm = LibSVM(kirmes_ini.K_COMBINED_C, kernel, complete_labels) svm.parallel.set_num_threads(kirmes_ini.K_NUM_THREADS) #Training svm.train() if not os.path.exists(options.output_path): os.mkdir(options.output_path) html = {} if options.contrib: html["contrib"] = contrib(svm, kernel, motif_labels, kernel_array, motifs) if options.logos: html["poims"] = poims(svm, kernel, kernel_array, motifs, options.output_path) if options.query: html["query"] = evaluate(options, svm, kernel, features, motifs) htmlize(html, options.output_html)
# features_train = RealFeatures(train_feats.T) # features_test = RealFeatures(test_feats.T) # labels_train = BinaryLabels(train_label.T) # labels_test = BinaryLabels(test_label.T) epsilon = 0.001 C = 100000 combined_kernel = CombinedKernel() #gauss_kernel_1 = GaussianKernel(features_train, features_train, 15) gauss_kernel_1 = GaussianKernel(features_train, features_train, 10) gauss_kernel_2 = GaussianKernel(features_train, features_train, 1.0) combined_kernel.append_kernel(gauss_kernel_1) combined_kernel.append_kernel(gauss_kernel_2) combined_kernel.init(features_train, features_train) libsvm = LibSVM() svm = MKLClassification(libsvm) svm.set_interleaved_optimization_enabled(False) svm.set_kernel(combined_kernel) svm.set_labels(labels_train) svm.train() beta = combined_kernel.get_subkernel_weights() alpha = svm.get_alphas() combined_kernel.init(features_train, features_test) labels_predict = svm.apply()
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ # merge data sets data = PreparedMultitaskData(train_data, shuffle=True) # create shogun label lab = shogun_factory.create_labels(data.labels) ################################################## # define pockets ################################################## pockets = [0]*9 pockets[0] = [1, 5, 6, 7, 8, 31, 32, 33, 34] pockets[1] = [1, 2, 3, 4, 6, 7, 8, 9, 11, 21, 31] pockets[2] = [11, 20, 21, 22, 29, 31] pockets[3] = [8, 30, 31, 32] pockets[4] = [10, 11, 30] pockets[5] = [10, 11, 12, 13, 20, 29] pockets[6] = [10, 12, 20, 22, 26, 27, 28, 29] pockets[7] = [12, 14, 15, 26] pockets[8] = [13, 15, 16, 17, 18, 19, 20, 23, 24, 25, 26] #new_pockets = [] # merge neighboring pockets #for i in range(8): # new_pockets.append(list(set(pockets[i]).union(set(pockets[i+1])))) #pockets = new_pockets ######################################################## print "creating a kernel:" ######################################################## # assemble combined kernel combined_kernel = CombinedKernel() combined_kernel.io.set_loglevel(shogun.Kernel.MSG_INFO) base_features = shogun_factory.create_features(data.examples) combined_features = CombinedFeatures() ################################################## # intra-domain blocks # intra_block_vec = PairiiVec() # # for task_id in data.get_task_ids(): # intra_block_vec.push_back(Pairii(task_id, task_id)) # # # # # create mask-based normalizer # normalizer = MultitaskKernelMaskPairNormalizer(data.task_vector_nums, intra_block_vec) # kernel = shogun_factory.create_empty_kernel(param) # kernel.set_normalizer(normalizer) # # # append current kernel to CombinedKernel # combined_kernel.append_kernel(kernel) # # # append features # combined_features.append_feature_obj(base_features) # # print "------" # # ################################################## # # all blocks # # # all_block_vec = PairiiVec() # # for task_id_1 in data.get_task_ids(): # for task_id_2 in data.get_task_ids(): # all_block_vec.push_back(Pairii(task_id_1, task_id_2)) # # # # create mask-based normalizer # normalizer_all = MultitaskKernelMaskPairNormalizer(data.task_vector_nums, all_block_vec) # kernel_all = shogun_factory.create_empty_kernel(param) # kernel_all.set_normalizer(normalizer_all) # # # append current kernel to CombinedKernel # combined_kernel.append_kernel(kernel_all) # # # append features # combined_features.append_feature_obj(base_features) ################################################## # add one kernel per similarity position # init seq handler pseudoseqs = SequencesHandler() for pocket in pockets: print "creating normalizer" #import pdb #pdb.set_trace() normalizer = MultitaskKernelNormalizer(data.task_vector_nums) print "processing pocket", pocket # set similarity for task_name_lhs in data.get_task_names(): for task_name_rhs in data.get_task_names(): similarity = 0.0 for pseudo_seq_pos in pocket: similarity += float(pseudoseqs.get_similarity(task_name_lhs, task_name_rhs, pseudo_seq_pos-1)) # normalize similarity = similarity / float(len(pocket)) print "pocket %s (%s, %s) = %f" % (str(pocket), task_name_lhs, task_name_rhs, similarity) normalizer.set_task_similarity(data.name_to_id(task_name_lhs), data.name_to_id(task_name_rhs), similarity) print "creating empty kernel" kernel_pos = shogun_factory.create_empty_kernel(param) print "setting normalizer" kernel_pos.set_normalizer(normalizer) print "appending kernel" # append current kernel to CombinedKernel combined_kernel.append_kernel(kernel_pos) print "appending features" # append features combined_features.append_feature_obj(base_features) print "done constructing combined kernel" ################################################## # init combined kernel # init weights # combined_kernel.set_subkernel_weights([1.0/2.85]*combined_kernel.get_num_subkernels()) combined_kernel.init(combined_features, combined_features) print "subkernel weights:", combined_kernel.get_subkernel_weights() svm = None print "using MKL:", (param.transform >= 1.0) if param.transform >= 1.0: svm = MKLClassification() svm.set_mkl_norm(param.transform) #svm.set_solver_type(ST_CPLEX) #ST_GLPK) #DIRECT) #NEWTON)#ST_CPLEX) #auto svm.set_C(param.cost, param.cost) svm.set_kernel(combined_kernel) svm.set_labels(lab) else: # create SVM (disable unsupported optimizations) combined_kernel.set_cache_size(500) svm = SVMLight(param.cost, combined_kernel, lab) # set up SVM num_threads = 8 svm.io.enable_progress() #svm.io.set_loglevel(shogun.Classifier.MSG_INFO) svm.io.set_loglevel(shogun.Classifier.MSG_DEBUG) svm.parallel.set_num_threads(num_threads) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) #print "WARNING: custom epsilon set" #svm.set_epsilon(0.05) # normalize cost norm_c_pos = param.cost / float(len([l for l in data.labels if l==1])) norm_c_neg = param.cost / float(len([l for l in data.labels if l==-1])) svm.set_C(norm_c_neg, norm_c_pos) # start training svm.train() # save additional info self.additional_information["svm_objective"] = svm.get_objective() self.additional_information["svm num sv"] = svm.get_num_support_vectors() self.additional_information["post_weights"] = combined_kernel.get_subkernel_weights() print self.additional_information # wrap up predictors svms = {} # use a reference to the same svm several times for task_name in train_data.keys(): svms[task_name] = (data.name_to_id(task_name), combined_kernel, svm) return svms
#feats_test.append_feature_obj(RealFeatures(testdata_real)) #feats_test.append_feature_obj(RealFeatures(testdata_real)) #feats_test.append_feature_obj(RealFeatures(testdata_real)) #feats_test.append_feature_obj(RealFeatures(testdata_real)) #feats_test.append_feature_obj(RealFeatures(testdata_real)) labels = Labels(trainlab) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(GaussianKernel(10, 2.0)) kernel.append_kernel(GaussianKernel(10, 0.25)) kernel.append_kernel(GaussianKernel(10, 0.062)) kernel.append_kernel(GaussianKernel(10, 8.0)) kernel.append_kernel(GaussianKernel(10, 10.0)) kernel.init(feats_train, feats_train) # Create a classifier classifier=MKLClassification(LibSVM()) classifier.set_interleaved_optimization_enabled(False) classifier.set_kernel(kernel) classifier.set_labels(labels) classifier.set_C(1, 1) param_tree_root=ModelSelectionParameters() # () C1 parameter to the tree c1=ModelSelectionParameters("C1"); c1.build_values(-4.0, 4.0, R_EXP); param_tree_root.append_child(c1)
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ # dict to save additional information for later analysis self.additional_information = {} # merge data sets data = PreparedMultitaskData(train_data, shuffle=True) # create shogun label lab = shogun_factory.create_labels(data.labels) ######################################################## print "creating a kernel for each node:" ######################################################## # assemble combined kernel combined_kernel = CombinedKernel() combined_kernel.io.set_loglevel(shogun.Kernel.MSG_INFO) base_features = shogun_factory.create_features(data.examples, param) combined_features = CombinedFeatures() ################################################## # intra-domain blocks (dirac kernel) intra_block_vec = PairiiVec() for task_id in data.get_task_ids(): intra_block_vec.push_back(Pairii(task_id, task_id)) # create mask-based normalizer normalizer = MultitaskKernelMaskPairNormalizer(data.task_vector_nums, intra_block_vec) kernel = shogun_factory.create_empty_kernel(param) kernel.set_normalizer(normalizer) # append current kernel to CombinedKernel combined_kernel.append_kernel(kernel) # append features combined_features.append_feature_obj(base_features) print "------" ################################################## # all blocks (full kernel matrix) all_block_vec = PairiiVec() for task_id_1 in data.get_task_ids(): for task_id_2 in data.get_task_ids(): all_block_vec.push_back(Pairii(task_id_1, task_id_2)) # create mask-based normalizer normalizer_all = MultitaskKernelMaskPairNormalizer( data.task_vector_nums, all_block_vec) kernel_all = shogun_factory.create_empty_kernel(param) kernel_all.set_normalizer(normalizer_all) # append current kernel to CombinedKernel combined_kernel.append_kernel(kernel_all) # append features combined_features.append_feature_obj(base_features) ################################################## # hack # hack_block_vec = PairiiVec() # # for task_id_1 in data.get_task_ids(): # for task_id_2 in data.get_task_ids(): # hack_block_vec.push_back(Pairii(task_id_1, task_id_2)) # # hack_block_vec.push_back(Pairii(data.name_to_id("B_2705"), data.name_to_id("B_4001"))) # other_group = ["B_0702", "B_1501", "B_5801"] # for task_id_1 in other_group: # for task_id_2 in other_group: # hack_block_vec.push_back(Pairii(data.name_to_id(task_id_1), data.name_to_id(task_id_2))) # # # # # create mask-based normalizer # normalizer_hack = MultitaskKernelMaskPairNormalizer(data.task_vector_nums, hack_block_vec) # kernel_hack = shogun_factory.create_empty_kernel(param) # kernel_hack.set_normalizer(normalizer_hack) # # # append current kernel to CombinedKernel # combined_kernel.append_kernel(kernel_hack) # # # append features # combined_features.append_feature_obj(base_features) ################################################## # init combined kernel combined_kernel.init(combined_features, combined_features) #combined_kernel.precompute_subkernels() self.additional_information[ "mkl weights before"] = combined_kernel.get_subkernel_weights() print "subkernel weights:", combined_kernel.get_subkernel_weights() svm = None print "using MKL:", (param.flags["mkl_q"] >= 1.0) if param.flags["mkl_q"] >= 1.0: svm = MKLClassification() svm.set_mkl_norm(param.flags["mkl_q"]) svm.set_kernel(combined_kernel) svm.set_labels(lab) else: # create SVM (disable unsupported optimizations) combined_kernel.set_cache_size(500) svm = SVMLight(param.cost, combined_kernel, lab) num_threads = 8 svm.io.enable_progress() svm.io.set_loglevel(shogun.Classifier.MSG_INFO) svm.parallel.set_num_threads(num_threads) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) svm.set_epsilon(0.03) # set cost if param.flags["normalize_cost"]: norm_c_pos = param.cost / float( len([l for l in data.labels if l == 1])) norm_c_neg = param.cost / float( len([l for l in data.labels if l == -1])) svm.set_C(norm_c_neg, norm_c_pos) else: svm.set_C(param.cost, param.cost) svm.train() print "subkernel weights (after):", combined_kernel.get_subkernel_weights( ) ######################################################## print "svm objective:" print svm.get_objective() self.additional_information["svm_objective"] = svm.get_objective() self.additional_information[ "svm num sv"] = svm.get_num_support_vectors() self.additional_information[ "mkl weights post-training"] = combined_kernel.get_subkernel_weights( ) ######################################################## # wrap up predictors svms = {} # use a reference to the same svm several times for task_name in train_data.keys(): svms[task_name] = (data.name_to_id(task_name), combined_kernel, svm, param) return svms
def mkl_binclass_modular(fm_train_real=traindat, fm_test_real=testdat, fm_label_twoclass=label_traindat): ################################## # set up and train # create some poly train/test matrix tfeats = RealFeatures(fm_train_real) tkernel = PolyKernel(10, 3) tkernel.init(tfeats, tfeats) K_train = tkernel.get_kernel_matrix() pfeats = RealFeatures(fm_test_real) tkernel.init(tfeats, pfeats) K_test = tkernel.get_kernel_matrix() # create combined train features feats_train = CombinedFeatures() feats_train.append_feature_obj(RealFeatures(fm_train_real)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_train)) kernel.append_kernel(PolyKernel(10, 2)) kernel.init(feats_train, feats_train) # train mkl labels = BinaryLabels(fm_label_twoclass) mkl = MKLClassification() # which norm to use for MKL mkl.set_mkl_norm(1) #2,3 # set cost (neg, pos) mkl.set_C(1, 1) # set kernel and labels mkl.set_kernel(kernel) mkl.set_labels(labels) # train mkl.train() #w=kernel.get_subkernel_weights() #kernel.set_subkernel_weights(w) ################################## # test # create combined test features feats_pred = CombinedFeatures() feats_pred.append_feature_obj(RealFeatures(fm_test_real)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_test)) kernel.append_kernel(PolyKernel(10, 2)) kernel.init(feats_train, feats_pred) # and classify mkl.set_kernel(kernel) mkl.apply() return mkl.apply(), kernel
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ # merge data sets data = PreparedMultitaskData(train_data, shuffle=True) # create shogun label lab = shogun_factory.create_labels(data.labels) ################################################## # define pockets ################################################## pockets = [0] * 9 pockets[0] = [1, 5, 6, 7, 8, 31, 32, 33, 34] pockets[1] = [1, 2, 3, 4, 6, 7, 8, 9, 11, 21, 31] pockets[2] = [11, 20, 21, 22, 29, 31] pockets[3] = [8, 30, 31, 32] pockets[4] = [10, 11, 30] pockets[5] = [10, 11, 12, 13, 20, 29] pockets[6] = [10, 12, 20, 22, 26, 27, 28, 29] pockets[7] = [12, 14, 15, 26] pockets[8] = [13, 15, 16, 17, 18, 19, 20, 23, 24, 25, 26] #new_pockets = [] # merge neighboring pockets #for i in range(8): # new_pockets.append(list(set(pockets[i]).union(set(pockets[i+1])))) #pockets = new_pockets ######################################################## print "creating a kernel:" ######################################################## # assemble combined kernel combined_kernel = CombinedKernel() combined_kernel.io.set_loglevel(shogun.Kernel.MSG_INFO) base_features = shogun_factory.create_features(data.examples) combined_features = CombinedFeatures() ################################################## # intra-domain blocks # intra_block_vec = PairiiVec() # # for task_id in data.get_task_ids(): # intra_block_vec.push_back(Pairii(task_id, task_id)) # # # # # create mask-based normalizer # normalizer = MultitaskKernelMaskPairNormalizer(data.task_vector_nums, intra_block_vec) # kernel = shogun_factory.create_empty_kernel(param) # kernel.set_normalizer(normalizer) # # # append current kernel to CombinedKernel # combined_kernel.append_kernel(kernel) # # # append features # combined_features.append_feature_obj(base_features) # # print "------" # # ################################################## # # all blocks # # # all_block_vec = PairiiVec() # # for task_id_1 in data.get_task_ids(): # for task_id_2 in data.get_task_ids(): # all_block_vec.push_back(Pairii(task_id_1, task_id_2)) # # # # create mask-based normalizer # normalizer_all = MultitaskKernelMaskPairNormalizer(data.task_vector_nums, all_block_vec) # kernel_all = shogun_factory.create_empty_kernel(param) # kernel_all.set_normalizer(normalizer_all) # # # append current kernel to CombinedKernel # combined_kernel.append_kernel(kernel_all) # # # append features # combined_features.append_feature_obj(base_features) ################################################## # add one kernel per similarity position # init seq handler pseudoseqs = SequencesHandler() for pocket in pockets: print "creating normalizer" #import pdb #pdb.set_trace() normalizer = MultitaskKernelNormalizer(data.task_vector_nums) print "processing pocket", pocket # set similarity for task_name_lhs in data.get_task_names(): for task_name_rhs in data.get_task_names(): similarity = 0.0 for pseudo_seq_pos in pocket: similarity += float( pseudoseqs.get_similarity(task_name_lhs, task_name_rhs, pseudo_seq_pos - 1)) # normalize similarity = similarity / float(len(pocket)) print "pocket %s (%s, %s) = %f" % ( str(pocket), task_name_lhs, task_name_rhs, similarity) normalizer.set_task_similarity( data.name_to_id(task_name_lhs), data.name_to_id(task_name_rhs), similarity) print "creating empty kernel" kernel_pos = shogun_factory.create_empty_kernel(param) print "setting normalizer" kernel_pos.set_normalizer(normalizer) print "appending kernel" # append current kernel to CombinedKernel combined_kernel.append_kernel(kernel_pos) print "appending features" # append features combined_features.append_feature_obj(base_features) print "done constructing combined kernel" ################################################## # init combined kernel # init weights # combined_kernel.set_subkernel_weights([1.0/2.85]*combined_kernel.get_num_subkernels()) combined_kernel.init(combined_features, combined_features) print "subkernel weights:", combined_kernel.get_subkernel_weights() svm = None print "using MKL:", (param.transform >= 1.0) if param.transform >= 1.0: svm = MKLClassification() svm.set_mkl_norm(param.transform) #svm.set_solver_type(ST_CPLEX) #ST_GLPK) #DIRECT) #NEWTON)#ST_CPLEX) #auto svm.set_C(param.cost, param.cost) svm.set_kernel(combined_kernel) svm.set_labels(lab) else: # create SVM (disable unsupported optimizations) combined_kernel.set_cache_size(500) svm = SVMLight(param.cost, combined_kernel, lab) # set up SVM num_threads = 8 svm.io.enable_progress() #svm.io.set_loglevel(shogun.Classifier.MSG_INFO) svm.io.set_loglevel(shogun.Classifier.MSG_DEBUG) svm.parallel.set_num_threads(num_threads) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) #print "WARNING: custom epsilon set" #svm.set_epsilon(0.05) # normalize cost norm_c_pos = param.cost / float(len([l for l in data.labels if l == 1])) norm_c_neg = param.cost / float( len([l for l in data.labels if l == -1])) svm.set_C(norm_c_neg, norm_c_pos) # start training svm.train() # save additional info self.additional_information["svm_objective"] = svm.get_objective() self.additional_information[ "svm num sv"] = svm.get_num_support_vectors() self.additional_information[ "post_weights"] = combined_kernel.get_subkernel_weights() print self.additional_information # wrap up predictors svms = {} # use a reference to the same svm several times for task_name in train_data.keys(): svms[task_name] = (data.name_to_id(task_name), combined_kernel, svm) return svms