def classifier_multiclassmachine_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, width=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Kernel import GaussianKernel from shogun.Classifier import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) kernel = GaussianKernel(feats_train, feats_train, width) labels = MulticlassLabels(label_train_multiclass) classifier = LibSVM() classifier.set_epsilon(epsilon) print labels.get_labels() mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(), kernel, classifier, labels) mc_classifier.train() kernel.init(feats_train, feats_test) out = mc_classifier.apply().get_labels() return out
class svm_splice_model(object): def __init__(self, order, traindat, alphas, b, (window_left, offset, window_right), consensus): f = StringCharFeatures(traindat, DNA) wd_kernel = WeightedDegreeStringKernel(f, f, int(order)) wd_kernel.io.set_target_to_stdout() self.svm = LibSVM() self.svm.set_kernel(wd_kernel) self.svm.set_alphas(alphas) self.svm.set_support_vectors( numpy.arange(len(alphas), dtype=numpy.int32)) self.svm.set_bias(b) self.svm.io.set_target_to_stdout() self.svm.parallel.set_num_threads(self.svm.parallel.get_num_cpus()) self.svm.set_linadd_enabled(True) self.svm.set_batch_computation_enabled(True) self.window_left = int(window_left) self.window_right = int(window_right) self.consensus = consensus self.wd_kernel = wd_kernel self.traindat = f self.offset = offset
def classifier_libsvm_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_twoclass=label_traindat, width=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import LibSVM feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) kernel = GaussianKernel(feats_train, feats_train, width) labels = Labels(label_train_twoclass) svm = LibSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() kernel.init(feats_train, feats_test) labels = svm.classify().get_labels() supportvectors = sv_idx = svm.get_support_vectors() alphas = svm.get_alphas() predictions = svm.classify() return predictions, svm, predictions.get_labels()
def svm_learn(kernel, labels, options): """train SVM using SVMLight or LibSVM Arguments: kernel -- kernel object from Shogun toolbox lebels -- list of labels options -- object containing option data Return: trained svm object """ try: svm = SVMLight(options.svmC, kernel, Labels(numpy.array(labels, dtype=numpy.double))) except NameError: svm = LibSVM(options.svmC, kernel, Labels(numpy.array(labels, dtype=numpy.double))) if options.quiet == False: svm.io.set_loglevel(MSG_INFO) svm.io.set_target_to_stderr() svm.set_epsilon(options.epsilon) svm.parallel.set_num_threads(1) if options.weight != 1.0: svm.set_C(options.svmC, options.svmC * options.weight) svm.train() if options.quiet == False: svm.io.set_loglevel(MSG_ERROR) return svm
def libsvm(): print 'LibSVM' from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Evaluation import PerformanceMeasures from shogun.Classifier import LibSVM feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) width = 2.1 kernel = GaussianKernel(feats_train, feats_train, width) C = 1 epsilon = 1e-5 labels = Labels(label_train_twoclass) svm = LibSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() #kernel.init(feats_train, feats_test) output = svm.classify(feats_test) #.get_labels() #output_vector = output.get_labels() out = svm.classify().get_labels() testerr = mean(sign(out) != testlab) print testerr
def modelselection_grid_search_kernel(): num_subsets = 3 num_vectors = 20 dim_vectors = 3 # create some (non-sense) data matrix = rand(dim_vectors, num_vectors) # create num_feautres 2-dimensional vectors features = RealFeatures() features.set_feature_matrix(matrix) # create labels, two classes labels = Labels(num_vectors) for i in range(num_vectors): labels.set_label(i, 1 if i % 2 == 0 else -1) # create svm classifier = LibSVM() # splitting strategy splitting_strategy = StratifiedCrossValidationSplitting( labels, num_subsets) # accuracy evaluation evaluation_criterion = ContingencyTableEvaluation(ACCURACY) # cross validation class for evaluation in model selection cross = CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterion) cross.set_num_runs(1) # print all parameter available for modelselection # Dont worry if yours is not included, simply write to the mailing list classifier.print_modsel_params() # model parameter selection param_tree = create_param_tree() param_tree.print_tree() grid_search = GridSearchModelSelection(param_tree, cross) print_state = True best_combination = grid_search.select_model(print_state) print("best parameter(s):") best_combination.print_tree() best_combination.apply_to_machine(classifier) # larger number of runs to have tighter confidence intervals cross.set_num_runs(10) cross.set_conf_int_alpha(0.01) result = cross.evaluate() print("result: ") result.print_result() return 0
def modelselection_grid_search_kernel (num_subsets, num_vectors, dim_vectors): # init seed for reproducability Math.init_random(1) random.seed(1); # create some (non-sense) data matrix=random.rand(dim_vectors, num_vectors) # create num_feautres 2-dimensional vectors features=RealFeatures() features.set_feature_matrix(matrix) # create labels, two classes labels=BinaryLabels(num_vectors) for i in range(num_vectors): labels.set_label(i, 1 if i%2==0 else -1) # create svm classifier=LibSVM() # splitting strategy splitting_strategy=StratifiedCrossValidationSplitting(labels, num_subsets) # accuracy evaluation evaluation_criterion=ContingencyTableEvaluation(ACCURACY) # cross validation class for evaluation in model selection cross=CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterion) cross.set_num_runs(1) # print all parameter available for modelselection # Dont worry if yours is not included, simply write to the mailing list #classifier.print_modsel_params() # model parameter selection param_tree=create_param_tree() #param_tree.print_tree() grid_search=GridSearchModelSelection(param_tree, cross) print_state=False best_combination=grid_search.select_model(print_state) #print("best parameter(s):") #best_combination.print_tree() best_combination.apply_to_machine(classifier) # larger number of runs to have tighter confidence intervals cross.set_num_runs(10) cross.set_conf_int_alpha(0.01) result=cross.evaluate() casted=CrossValidationResult.obtain_from_generic(result); #print "result mean:", casted.mean return classifier,result,casted.mean
def evaluation_cross_validation_mkl_weight_storage(traindat=traindat, label_traindat=label_traindat): from shogun.Evaluation import CrossValidation, CrossValidationResult from shogun.Evaluation import CrossValidationPrintOutput from shogun.Evaluation import CrossValidationMKLStorage from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY from shogun.Evaluation import StratifiedCrossValidationSplitting from shogun.Features import BinaryLabels from shogun.Features import RealFeatures, CombinedFeatures from shogun.Kernel import GaussianKernel, CombinedKernel from shogun.Classifier import LibSVM, MKLClassification from shogun.Mathematics import Statistics # training data, combined features all on same data features=RealFeatures(traindat) comb_features=CombinedFeatures() comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) labels=BinaryLabels(label_traindat) # kernel, different Gaussians combined kernel=CombinedKernel() kernel.append_kernel(GaussianKernel(10, 0.1)) kernel.append_kernel(GaussianKernel(10, 1)) kernel.append_kernel(GaussianKernel(10, 2)) # create mkl using libsvm, due to a mem-bug, interleaved is not possible svm=MKLClassification(LibSVM()); svm.set_interleaved_optimization_enabled(False); svm.set_kernel(kernel); # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "StratifiedCrossValidationSplitting" is also available splitting_strategy=StratifiedCrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium=ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation=CrossValidation(svm, comb_features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # append cross vlaidation output classes #cross_validation.add_cross_validation_output(CrossValidationPrintOutput()) mkl_storage=CrossValidationMKLStorage() cross_validation.add_cross_validation_output(mkl_storage) cross_validation.set_num_runs(3) # perform cross-validation result=cross_validation.evaluate() # print mkl weights weights=mkl_storage.get_mkl_weights()
def svm_train(kernel, labels, C1, C2=None): """Trains a SVM with the given kernel""" num_threads = 1 kernel.io.disable_progress() svm = LibSVM(C1, kernel, labels) if C2: svm.set_C(C1, C2) svm.parallel.set_num_threads(num_threads) svm.io.disable_progress() svm.train() return svm
def init_svm(task_type, kernel, labels): """A factory for creating the right svm type""" C=1 epsilon=1e-5 if task_type == 'Binary Classification': svm = LibSVM(C, kernel, labels) elif task_type == 'Multi Class Classification': svm = LibSVMMultiClass(C, kernel, labels) elif task_type == 'Regression': tube_epsilon=1e-2 svm=LibSVR(C, epsilon, kernel, labels) svm.set_tube_epsilon(tube_epsilon) else: print(task_type + ' unknown!') return svm
def kernel_combined_custom_poly_modular(fm_train_real=traindat, fm_test_real=testdat, fm_label_twoclass=label_traindat): from shogun.Features import CombinedFeatures, RealFeatures, Labels from shogun.Kernel import CombinedKernel, PolyKernel, CustomKernel from shogun.Classifier import LibSVM kernel = CombinedKernel() feats_train = CombinedFeatures() tfeats = RealFeatures(fm_train_real) tkernel = PolyKernel(10, 3) tkernel.init(tfeats, tfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_train = RealFeatures(fm_train_real) feats_train.append_feature_obj(subkfeats_train) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = Labels(fm_label_twoclass) svm = LibSVM(1.0, kernel, labels) svm.train() kernel = CombinedKernel() feats_pred = CombinedFeatures() pfeats = RealFeatures(fm_test_real) tkernel = PolyKernel(10, 3) tkernel.init(tfeats, pfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_test = RealFeatures(fm_test_real) feats_pred.append_feature_obj(subkfeats_test) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_pred) svm.set_kernel(kernel) svm.classify() km_train = kernel.get_kernel_matrix() return km_train, kernel
def bench_shogun(X, y, T, valid): # # .. Shogun .. # from shogun.Classifier import LibSVM from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel start = datetime.now() feat = RealFeatures(X.T) feat_test = RealFeatures(T.T) labels = Labels(y.astype(np.float64)) kernel = GaussianKernel(feat, feat, sigma) shogun_svm = LibSVM(1., kernel, labels) shogun_svm.train() dec_func = shogun_svm.classify(feat_test).get_labels() score = np.mean(np.sign(dec_func) == valid) return score, datetime.now() - start
def classifier_libsvm_minimal_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_twoclass=label_traindat, width=2.1, C=1): from shogun.Features import RealFeatures, BinaryLabels from shogun.Classifier import LibSVM from shogun.Kernel import GaussianKernel feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) kernel = GaussianKernel(feats_train, feats_train, width) labels = BinaryLabels(label_train_twoclass) svm = LibSVM(C, kernel, labels) svm.train() kernel.init(feats_train, feats_test) out = svm.apply().get_labels() testerr = mean(sign(out) != label_train_twoclass)
def classifier_custom_kernel_modular(C=1, dim=7): from shogun.Features import RealFeatures, Labels from shogun.Kernel import CustomKernel from shogun.Classifier import LibSVM from numpy import diag, ones, sign from numpy.random import rand, seed seed((C, dim)) lab = sign(2 * rand(dim) - 1) data = rand(dim, dim) symdata = data * data.T + diag(ones(dim)) kernel = CustomKernel() kernel.set_full_kernel_matrix_from_full(data) labels = Labels(lab) svm = LibSVM(C, kernel, labels) svm.train() predictions = svm.apply() out = svm.apply().get_labels() return svm, out
labels_presvm[2] = 1 labels_presvm[12] = 1 labels_presvm[15] = 1 labels_presvm[8] = 1 labels_presvm[19] = 1 feat_presvm = StringCharFeatures(DNA) feat_presvm.set_features(examples_presvm) wdk_presvm = WeightedDegreeStringKernel(feat_presvm, feat_presvm, 1) lab_presvm = Labels(numpy.array(labels_presvm)) presvm = SVMLight(1, wdk_presvm, lab_presvm) presvm.train() presvm2 = LibSVM(1, wdk_presvm, lab_presvm) presvm2.train() print "svmlight", presvm.get_objective() print "libsvm", presvm2.get_objective() assert (abs(presvm.get_objective() - presvm2.get_objective()) <= 0.001) print "simple svm", presvm.get_objective() print "len(examples_presvm)", len(examples_presvm) print "##############" ############################################# # compute linear term manually
from numpy import random from shogun.Classifier import LibSVM from shogun.Features import RealFeatures, Labels from shogun.Kernel import LinearKernel num_feats = 23 num_vec = 42 scale = 2.1 size_cache = 10 C = 0.017 epsilon = 1e-5 tube_epsilon = 1e-2 svm = LibSVM() svm.set_C(C, C) svm.set_epsilon(epsilon) svm.set_tube_epsilon(tube_epsilon) for i in xrange(3): data_train = random.rand(num_feats, num_vec) data_test = random.rand(num_feats, num_vec) feats_train = RealFeatures(data_train) feats_test = RealFeatures(data_test) labels = Labels(random.rand(num_vec).round() * 2 - 1) svm.set_kernel(LinearKernel(size_cache, scale)) svm.set_labels(labels) kernel = svm.get_kernel()
################################################################## km = wdk.get_kernel_matrix() for i in xrange(N): for j in xrange(N): km[i, j] = km[i, j] * relate_tasks(i, j) #km = km*1.0 print km #precompute kernel matrix using shogun y = numpy.array(labels) K = numpy.transpose(y.flatten() * (km * y.flatten()).transpose()) f = -numpy.ones(N) C = 1.0 # Important!! QP does not accept ndarray as a type, it must be an array p = QP(K, f, Aeq=y, beq=0, lb=numpy.zeros(N), ub=C * numpy.ones(N)) r = p.solve('cvxopt_qp', iprint=0) #print "cvxopt objective:", r.ff print "externally modified kernel. objective:", r.ff ck = CustomKernel() ck.set_full_kernel_matrix_from_full(km) # svm = LibSVM(1, ck, lab) svm.train() print "externally modified kernel. objective:", svm.get_objective()
############################################# # create real-valued features as first step examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64) examples_presvm = numpy.transpose(examples_presvm) feat_presvm = RealFeatures(examples_presvm) lab_presvm = Labels(numpy.array(labels_presvm)) wdk_presvm = LinearKernel(feat_presvm, feat_presvm) presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm) presvm_liblinear.set_max_iterations(10000) presvm_liblinear.set_bias_enabled(False) presvm_liblinear.train() presvm_libsvm = LibSVM(1, wdk_presvm, lab_presvm) #presvm_libsvm = SVMLight(1, wdk_presvm, lab_presvm) #presvm_libsvm.io.set_loglevel(MSG_DEBUG) presvm_libsvm.set_bias_enabled(False) presvm_libsvm.train() my_w = presvm_liblinear.get_w() presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm) presvm_liblinear.set_w(my_w) ############################################# # compute linear term manually ############################################# examples = numpy.array(examples, dtype=numpy.float64)
from shogun.Classifier import LibSVM, LDA from shogun.Evaluation import PRCEvaluation import util util.set_title('PRC example') util.DISTANCE=0.5 subplots_adjust(hspace=0.3) pos=util.get_realdata(True) neg=util.get_realdata(False) features=util.get_realfeatures(pos, neg) labels=util.get_labels() # classifiers gk=GaussianKernel(features, features, 1.0) svm = LibSVM(1000.0, gk, labels) svm.train() lda=LDA(1,features,labels) lda.train() ## plot points subplot(211) plot(pos[0,:], pos[1,:], "r.") plot(neg[0,:], neg[1,:], "b.") grid(True) title('Data',size=10) # plot PRC for SVM subplot(223) PRC_evaluation=PRCEvaluation() PRC_evaluation.evaluate(svm.classify(),labels)
def training_run(options): """Conduct a training run and return a trained SVM kernel""" settings = MotifFinderSettings(kirmes_ini.MOTIF_LENGTH, options.window_width, options.replace) positives = MotifFinder(finder_settings=settings) positives.setFastaFile(options.positives) positives.setMotifs(options.pgff) pmotifs, ppositions = positives.getResults() negatives = MotifFinder(finder_settings=settings) negatives.setFastaFile(options.negatives) negatives.setMotifs(options.ngff) nmotifs, npositions = negatives.getResults() wds_kparams = kirmes_ini.WDS_KERNEL_PARAMETERS wds_svm = EasySVM.EasySVM(wds_kparams) num_positives = len(pmotifs.values()[0]) num_negatives = len(nmotifs.values()[0]) #Creating Kernel Objects kernel = CombinedKernel() features = CombinedFeatures() kernel_array = [] motifs = pmotifs.keys() motifs.sort() #Adding Kmer Kernels for motif in motifs: all_examples = pmotifs[motif] + nmotifs[motif] motif_features = wds_svm.createFeatures(all_examples) wds_kernel = WeightedDegreePositionStringKernel(motif_features, motif_features, \ wds_kparams['degree']) wds_kernel.set_shifts(wds_kparams['shift'] * ones(wds_kparams['seqlength'], dtype=int32)) features.append_feature_obj(motif_features) kernel_array.append(wds_kernel) kernel.append_kernel(wds_kernel) rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS) positions = array(ppositions + npositions, dtype=float64).T position_features = rbf_svm.createFeatures(positions) features.append_feature_obj(position_features) motif_labels = append(ones(num_positives), -ones(num_negatives)) complete_labels = Labels(motif_labels) rbf_kernel = GaussianKernel(position_features, position_features, \ kirmes_ini.RBF_KERNEL_PARAMETERS['width']) kernel_array.append(rbf_kernel) kernel.append_kernel(rbf_kernel) #Kernel init kernel.init(features, features) kernel.set_cache_size(kirmes_ini.K_CACHE_SIZE) svm = LibSVM(kirmes_ini.K_COMBINED_C, kernel, complete_labels) svm.parallel.set_num_threads(kirmes_ini.K_NUM_THREADS) #Training svm.train() if not os.path.exists(options.output_path): os.mkdir(options.output_path) html = {} if options.contrib: html["contrib"] = contrib(svm, kernel, motif_labels, kernel_array, motifs) if options.logos: html["poims"] = poims(svm, kernel, kernel_array, motifs, options.output_path) if options.query: html["query"] = evaluate(options, svm, kernel, features, motifs) htmlize(html, options.output_html)