def evaluation_thresholds_modular(index): from modshogun import BinaryLabels, ROCEvaluation import numpy numpy.random.seed(17) output = numpy.arange(-1, 1, 0.001) output = (0.3 * output + 0.7 * (numpy.random.rand(len(output)) - 0.5)) label = [-1.0] * (len(output) // 2) label.extend([1.0] * (len(output) // 2)) label = numpy.array(label) pred = BinaryLabels(output) truth = BinaryLabels(label) evaluator = ROCEvaluation() evaluator.evaluate(pred, truth) [fp, tp] = evaluator.get_ROC() thresh = evaluator.get_thresholds() b = thresh[index] #print("tpr", numpy.mean(output[label>0]>b), tp[index]) #print("fpr", numpy.mean(output[label<0]>b), fp[index]) return tp[index], fp[index], numpy.mean(output[label > 0] > b), numpy.mean( output[label < 0] > b)
def classifier_domainadaptationsvm_modular (fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna, \ label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \ label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3): feats_train = StringCharFeatures(fm_train_dna, DNA) feats_test = StringCharFeatures(fm_test_dna, DNA) kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.train() #svm.io.set_loglevel(MSG_DEBUG) ##################################### #print("obtaining DA SVM from previously trained SVM") feats_train2 = StringCharFeatures(fm_train_dna, DNA) feats_test2 = StringCharFeatures(fm_test_dna, DNA) kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels2 = BinaryLabels(label_train_dna) # we regularize against the previously obtained solution dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0) dasvm.train() out = dasvm.apply_binary(feats_test2) return out #,dasvm TODO
def evaluation_director_contingencytableevaluation_modular( ground_truth, predicted): try: from modshogun import DirectorContingencyTableEvaluation, ED_MAXIMIZE except ImportError: print("recompile shogun with --enable-swig-directors") return class SimpleWeightedBinaryEvaluator(DirectorContingencyTableEvaluation): def __init__(self): DirectorContingencyTableEvaluation.__init__(self) def get_custom_direction(self): return ED_MAXIMIZE def get_custom_score(self): return self.get_WRACC() + self.get_BAL() from modshogun import BinaryLabels evaluator = SimpleWeightedBinaryEvaluator() r = evaluator.evaluate(BinaryLabels(ground_truth), BinaryLabels(predicted)) r2 = evaluator.get_custom_score() print(r, r2) return r, r2
def outputResultsClassificationWithMajorityClass(out1, out2, out1DecisionValues, out2DecisionValues, train_lt, test_lt, test_majorityClass): # Output the results to the appropriate output files writeFloatList(out1, TRAINPREDICTIONSEPSILONFILENAME) writeFloatList(out2, VALIDATIONPREDICTIONSEPSILONFILENAME) numTrainCorrect = 0 for i in range(len(train_lt)): # Iterate through training labels and count the number that are the same as the predicted labels if out1[i] == train_lt[i]: # The current prediction is correct numTrainCorrect = numTrainCorrect + 1 fracTrainCorrect = float(numTrainCorrect)/float(len(train_lt)) print "Training accuracy:" print fracTrainCorrect trainLabels = BinaryLabels(train_lt) evaluatorTrain = ROCEvaluation() evaluatorTrain.evaluate(out1DecisionValues, trainLabels) print "Training AUC:" print evaluatorTrain.get_auROC() numValidCorrect = 0 numPosCorrect = 0 numNegCorrect = 0 numMajorityClassCorrect = 0 numMinorityClassCorrect = 0 for i in range(len(test_lt)): # Iterate through validation labels and count the number that are the same as the predicted labels if out2[i] == test_lt[i]: # The current prediction is correct numValidCorrect = numValidCorrect + 1 if (out2[i] == 1) and (test_lt[i] == 1): # The prediction is a positive example numPosCorrect = numPosCorrect + 1 else: numNegCorrect = numNegCorrect + 1 if test_majorityClass[i] == 1: numMajorityClassCorrect = numMajorityClassCorrect + 1 else: numMinorityClassCorrect = numMinorityClassCorrect + 1 fracValidCorrect = float(numValidCorrect)/float(len(test_lt)) print "Validation accuracy:" print fracValidCorrect print "Fraction of correct positive examples:" print float(numPosCorrect)/float(len(np.where(test_lt > 0)[0])) print "Fraction of correct negative examples:" print float(numNegCorrect)/float(len(np.where(test_lt <= 0)[0])) print "Fraction of correct majority class examples:" print float(numMajorityClassCorrect)/float(len(np.where(test_majorityClass > 0)[0])) print "Fraction of correct minority class examples:" print float(numMinorityClassCorrect)/float(len(np.where(test_majorityClass <= 0)[0])) validLabels = BinaryLabels(test_lt) evaluatorValid = ROCEvaluation() evaluatorValid.evaluate(out2DecisionValues, validLabels) print "Validation AUC:" print evaluatorValid.get_auROC()
def modelselection_grid_search_kernel(num_subsets, num_vectors, dim_vectors): # init seed for reproducability Math.init_random(1) random.seed(1) # create some (non-sense) data matrix = random.rand(dim_vectors, num_vectors) # create num_feautres 2-dimensional vectors features = RealFeatures() features.set_feature_matrix(matrix) # create labels, two classes labels = BinaryLabels(num_vectors) for i in range(num_vectors): labels.set_label(i, 1 if i % 2 == 0 else -1) # create svm classifier = LibSVM() # splitting strategy splitting_strategy = StratifiedCrossValidationSplitting( labels, num_subsets) # accuracy evaluation evaluation_criterion = ContingencyTableEvaluation(ACCURACY) # cross validation class for evaluation in model selection cross = CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterion) cross.set_num_runs(1) # print all parameter available for modelselection # Dont worry if yours is not included, simply write to the mailing list #classifier.print_modsel_params() # model parameter selection param_tree = create_param_tree() #param_tree.print_tree() grid_search = GridSearchModelSelection(cross, param_tree) print_state = False best_combination = grid_search.select_model(print_state) #print("best parameter(s):") #best_combination.print_tree() best_combination.apply_to_machine(classifier) # larger number of runs to have tighter confidence intervals cross.set_num_runs(10) cross.set_conf_int_alpha(0.01) result = cross.evaluate() casted = CrossValidationResult.obtain_from_generic(result) #print "result mean:", casted.mean return classifier, result, casted.mean
def modelselection_grid_search_kernel (num_subsets, num_vectors, dim_vectors): # init seed for reproducability Math.init_random(1) random.seed(1); # create some (non-sense) data matrix=random.rand(dim_vectors, num_vectors) # create num_feautres 2-dimensional vectors features=RealFeatures() features.set_feature_matrix(matrix) # create labels, two classes labels=BinaryLabels(num_vectors) for i in range(num_vectors): labels.set_label(i, 1 if i%2==0 else -1) # create svm classifier=LibSVM() # splitting strategy splitting_strategy=StratifiedCrossValidationSplitting(labels, num_subsets) # accuracy evaluation evaluation_criterion=ContingencyTableEvaluation(ACCURACY) # cross validation class for evaluation in model selection cross=CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterion) cross.set_num_runs(1) # print all parameter available for modelselection # Dont worry if yours is not included, simply write to the mailing list #classifier.print_modsel_params() # model parameter selection param_tree=create_param_tree() #param_tree.print_tree() grid_search=GridSearchModelSelection(cross, param_tree) print_state=False best_combination=grid_search.select_model(print_state) #print("best parameter(s):") #best_combination.print_tree() best_combination.apply_to_machine(classifier) # larger number of runs to have tighter confidence intervals cross.set_num_runs(10) cross.set_conf_int_alpha(0.01) result=cross.evaluate() casted=CrossValidationResult.obtain_from_generic(result); #print "result mean:", casted.mean return classifier,result,casted.mean
def evaluation_rocevaluation_modular(ground_truth, predicted): from modshogun import BinaryLabels from modshogun import ROCEvaluation ground_truth_labels = BinaryLabels(ground_truth) predicted_labels = BinaryLabels(predicted) evaluator = ROCEvaluation() evaluator.evaluate(predicted_labels, ground_truth_labels) return evaluator.get_ROC(), evaluator.get_auROC()
def label_function(): from modshogun import BinaryLabels from modshogun import CSVFile #generate random labels label = BinaryLabels(5) print label.get_num_labels() #→ 5 print label.get_values() #→ array([5 label values]) #Labels from CSVFile label_from_csv = BinaryLabels(CSVFile("csv/label.csv"))
def classifier_featureblock_logistic_regression(fm_train=traindat, fm_test=testdat, label_train=label_traindat): from modshogun import BinaryLabels, RealFeatures, IndexBlock, IndexBlockGroup try: from modshogun import FeatureBlockLogisticRegression except ImportError: print("FeatureBlockLogisticRegression not available") exit(0) features = RealFeatures(hstack((traindat, traindat))) labels = BinaryLabels(hstack((label_train, label_train))) n_features = features.get_num_features() block_one = IndexBlock(0, n_features // 2) block_two = IndexBlock(n_features // 2, n_features) block_group = IndexBlockGroup() block_group.add_block(block_one) block_group.add_block(block_two) mtlr = FeatureBlockLogisticRegression(0.1, features, labels, block_group) mtlr.set_regularization(1) # use regularization ratio mtlr.set_tolerance(1e-2) # use 1e-2 tolerance mtlr.train() out = mtlr.apply().get_labels() return out
def cross_validation(X, Y, d, c, K): N = len(Y) n = N / K accuracy_list = [] for k in range(0, K): print 'degree = %s\tC = %s\tcross_validation_iter = %s/%s' % (d, c, k + 1, K) sys.stdout.flush() X_test = list(X[k:k + n]) Y_test = list(Y[k:k + n]) X_train = [] X_train.extend(X[:k]) X_train.extend(X[k + n:]) Y_train = [] Y_train.extend(Y[:k]) Y_train.extend(Y[k + n:]) X_train = StringCharFeatures(X_train, DNA) X_test = StringCharFeatures(X_test, DNA) Y_train = BinaryLabels(np.array(Y_train, dtype=np.float64)) Y_test = np.array(Y_test) args_tuple = (X_train, Y_train, X_test, Y_test, d, c) accuracy, Y_test_proba = svm_process(args_tuple) accuracy_list.append(accuracy) return np.array(accuracy_list).mean()
def classifier_gpbtsvm_modular(train_fname=traindat, test_fname=testdat, label_fname=label_traindat, width=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, BinaryLabels from modshogun import GaussianKernel from modshogun import CSVFile try: from modshogun import GPBTSVM except ImportError: print("GPBTSVM not available") exit(0) feats_train = RealFeatures(CSVFile(train_fname)) feats_test = RealFeatures(CSVFile(test_fname)) labels = BinaryLabels(CSVFile(label_fname)) kernel = GaussianKernel(feats_train, feats_train, width) svm = GPBTSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() predictions = svm.apply(feats_test) return predictions, svm, predictions.get_labels()
def transfer_multitask_l12_logistic_regression(fm_train=traindat, fm_test=testdat, label_train=label_traindat): from modshogun import BinaryLabels, RealFeatures, Task, TaskGroup try: from modshogun import MultitaskL12LogisticRegression except ImportError: print("MultitaskL12LogisticRegression not available") exit(0) features = RealFeatures(hstack((traindat, traindat))) labels = BinaryLabels(hstack((label_train, label_train))) n_vectors = features.get_num_vectors() task_one = Task(0, n_vectors // 2) task_two = Task(n_vectors // 2, n_vectors) task_group = TaskGroup() task_group.append_task(task_one) task_group.append_task(task_two) mtlr = MultitaskL12LogisticRegression(0.1, 0.1, features, labels, task_group) mtlr.set_tolerance(1e-2) # use 1e-2 tolerance mtlr.set_max_iter(10) mtlr.train() mtlr.set_current_task(0) out = mtlr.apply_regression().get_labels() return out
def classifier_svmlight_linear_term_modular (fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna,degree=3, \ C=10,epsilon=1e-5,num_threads=1): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel try: from modshogun import SVMLight except ImportError: print("SVMLight is not available") exit(0) feats_train = StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.set_qpsize(3) svm.set_linear_term( -numpy.array([1, 2, 3, 4, 5, 6, 7, 8, 7, 6], dtype=numpy.double)) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) out = svm.apply().get_labels() return out, kernel
def runShogunSVMDNAWDKernel(train_xt, train_lt, test_xt): """ run svm with string kernels """ ################################################## # set up svm feats_train = StringCharFeatures(train_xt, DNA) feats_test = StringCharFeatures(test_xt, DNA) kernel = WeightedDegreePositionStringKernel(feats_train, feats_train, DEGREE) kernel.io.set_loglevel(MSG_DEBUG) kernel.set_shifts(NUMSHIFTS * ones(len(train_xt[0]), dtype=int32)) kernel.set_position_weights(ones(len(train_xt[0]), dtype=float64)) # init kernel labels = BinaryLabels(train_lt) # run svm model print "Ready to train!" svm = LibSVM(SVMC, kernel, labels) svm.io.set_loglevel(MSG_DEBUG) svm.train() # predictions print "Making predictions!" out1DecisionValues = svm.apply(feats_train) out1 = out1DecisionValues.get_labels() kernel.init(feats_train, feats_test) out2DecisionValues = svm.apply(feats_test) out2 = out2DecisionValues.get_labels() return out1, out2, out1DecisionValues, out2DecisionValues
def runShogunSVMDNASubsequenceStringKernel(train_xt, train_lt, test_xt): """ run svm with spectrum kernel """ ################################################## # set up svm feats_train = StringCharFeatures(train_xt, DNA) feats_test = StringCharFeatures(test_xt, DNA) kernel = SubsequenceStringKernel(feats_train, feats_train, MAXLEN, DECAY) kernel.io.set_loglevel(MSG_DEBUG) kernel.init(feats_train, feats_train) # init kernel labels = BinaryLabels(train_lt) # run svm model print "Ready to train!" svm = LibSVM(SVMC, kernel, labels) svm.io.set_loglevel(MSG_DEBUG) svm.train() # predictions print "Making predictions!" out1DecisionValues = svm.apply(feats_train) out1 = out1DecisionValues.get_labels() kernel.init(feats_train, feats_test) out2DecisionValues = svm.apply(feats_test) out2 = out2DecisionValues.get_labels() return out1, out2, out1DecisionValues, out2DecisionValues
def transfer_multitask_clustered_logistic_regression(fm_train=traindat, fm_test=testdat, label_train=label_traindat ): from modshogun import BinaryLabels, RealFeatures, Task, TaskGroup, MultitaskClusteredLogisticRegression, MSG_DEBUG features = RealFeatures(hstack((traindat, sin(traindat), cos(traindat)))) labels = BinaryLabels(hstack((label_train, label_train, label_train))) n_vectors = features.get_num_vectors() task_one = Task(0, n_vectors // 3) task_two = Task(n_vectors // 3, 2 * n_vectors // 3) task_three = Task(2 * n_vectors // 3, n_vectors) task_group = TaskGroup() task_group.append_task(task_one) task_group.append_task(task_two) task_group.append_task(task_three) mtlr = MultitaskClusteredLogisticRegression(1.0, 100.0, features, labels, task_group, 2) #mtlr.io.set_loglevel(MSG_DEBUG) mtlr.set_tolerance(1e-3) # use 1e-2 tolerance mtlr.set_max_iter(100) mtlr.train() mtlr.set_current_task(0) #print mtlr.get_w() out = mtlr.apply_regression().get_labels() return out
def classifier_svmlight_modular(fm_train_dna=traindat, fm_test_dna=testdat, label_train_dna=label_traindat, C=1.2, epsilon=1e-5, num_threads=1): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel try: from modshogun import SVMLight except ImportError: print('No support for SVMLight available.') return feats_train = StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree = 20 kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) svm.apply().get_labels() return kernel
def kernel_salzberg_word_string_modular(fm_train_dna=traindat, fm_test_dna=testdat, label_train_dna=label_traindat, order=3, gap=0, reverse=False): from modshogun import StringCharFeatures, StringWordFeatures, DNA, BinaryLabels from modshogun import SalzbergWordStringKernel from modshogun import PluginEstimate charfeat = StringCharFeatures(fm_train_dna, DNA) feats_train = StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) charfeat = StringCharFeatures(fm_test_dna, DNA) feats_test = StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse) pie = PluginEstimate() labels = BinaryLabels(label_train_dna) pie.set_labels(labels) pie.set_features(feats_train) pie.train() kernel = SalzbergWordStringKernel(feats_train, feats_train, pie, labels) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) pie.set_features(feats_test) pie.apply().get_labels() km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def runShogunSVMDNAWDNoPositionKernel(train_xt, train_lt, test_xt): """ run svm with non-position WD kernel """ ################################################## # set up svm feats_train = StringCharFeatures(train_xt, DNA) feats_test = StringCharFeatures(test_xt, DNA) kernel = WeightedDegreeStringKernel(feats_train, feats_train, DEGREE) kernel.io.set_loglevel(MSG_DEBUG) weights=arange(1,DEGREE+1,dtype=double)[::-1]/ \ sum(arange(1,DEGREE+1,dtype=double)) kernel.set_wd_weights(weights) # init kernel labels = BinaryLabels(train_lt) # run svm model print "Ready to train!" svm = LibSVM(SVMC, kernel, labels) svm.io.set_loglevel(MSG_DEBUG) svm.train() # predictions print "Making predictions!" out1 = svm.apply(feats_train).get_labels() kernel.init(feats_train, feats_test) out2 = svm.apply(feats_test).get_labels() return out1, out2
def runShogunSVMDNALinearStringKernel(train_xt, train_lt, test_xt): """ run svm with spectrum kernel """ ################################################## # set up svm feats_train = StringCharFeatures(train_xt, DNA) feats_test = StringCharFeatures(test_xt, DNA) kernel = LinearStringKernel(feats_train, feats_train) kernel.io.set_loglevel(MSG_DEBUG) # init kernel labels = BinaryLabels(train_lt) # run svm model print "Ready to train!" svm = LibSVM(SVMC, kernel, labels) svm.io.set_loglevel(MSG_DEBUG) svm.train() # predictions print "Making predictions!" out1 = svm.apply(feats_train).get_labels() kernel.init(feats_train, feats_test) out2 = svm.apply(feats_test).get_labels() return out1, out2
def evaluation_cross_validation_classification(traindat=traindat, label_traindat=label_traindat): from modshogun import CrossValidation, CrossValidationResult from modshogun import ContingencyTableEvaluation, ACCURACY from modshogun import StratifiedCrossValidationSplitting from modshogun import BinaryLabels from modshogun import RealFeatures from modshogun import LibLinear, L2R_L2LOSS_SVC # training data features = RealFeatures(traindat) labels = BinaryLabels(label_traindat) # classifier classifier = LibLinear(L2R_L2LOSS_SVC) # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "CrossValidationSplitting" is also available splitting_strategy = StratifiedCrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium = ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation = CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # (optional) repeat x-val 10 times cross_validation.set_num_runs(10) # perform cross-validation and print(results) result = cross_validation.evaluate()
def classifier_svmocas_modular(train_fname=traindat, test_fname=testdat, label_fname=label_traindat, C=0.9, epsilon=1e-5, num_threads=1): from modshogun import RealFeatures, BinaryLabels from modshogun import CSVFile try: from modshogun import SVMOcas except ImportError: print("SVMOcas not available") return feats_train = RealFeatures(CSVFile(train_fname)) feats_test = RealFeatures(CSVFile(test_fname)) labels = BinaryLabels(CSVFile(label_fname)) svm = SVMOcas(C, feats_train, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.set_bias_enabled(False) svm.train() bias = svm.get_bias() w = svm.get_w() predictions = svm.apply(feats_test) return predictions, svm, predictions.get_labels()
def kernel_histogram_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,order=3,ppseudo_count=1,npseudo_count=1): from modshogun import StringCharFeatures, StringWordFeatures, DNA, BinaryLabels from modshogun import HistogramWordStringKernel, AvgDiagKernelNormalizer from modshogun import PluginEstimate#, MSG_DEBUG charfeat=StringCharFeatures(DNA) #charfeat.io.set_loglevel(MSG_DEBUG) charfeat.set_features(fm_train_dna) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, 0, False) charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, 0, False) pie=PluginEstimate(ppseudo_count,npseudo_count) labels=BinaryLabels(label_train_dna) pie.set_labels(labels) pie.set_features(feats_train) pie.train() kernel=HistogramWordStringKernel(feats_train, feats_train, pie) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) pie.set_features(feats_test) pie.apply().get_labels() km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def classifier_ssk_modular(fm_train_dna=traindat, fm_test_dna=testdat, label_train_dna=label_traindat, C=1, maxlen=1, decay=1): from modshogun import StringCharFeatures, BinaryLabels from modshogun import LibSVM, StringSubsequenceKernel, DNA from modshogun import ErrorRateMeasure feats_train = StringCharFeatures(fm_train_dna, DNA) feats_test = StringCharFeatures(fm_test_dna, DNA) labels = BinaryLabels(label_train_dna) kernel = StringSubsequenceKernel(feats_train, feats_train, maxlen, decay) svm = LibSVM(C, kernel, labels) svm.train() out = svm.apply(feats_train) evaluator = ErrorRateMeasure() trainerr = evaluator.evaluate(out, labels) # print(trainerr) kernel.init(feats_train, feats_test) predicted_labels = svm.apply(feats_test).get_labels() # print predicted_labels return predicted_labels
def modelselection_random_search_liblinear_modular (traindat=traindat, label_traindat=label_traindat): from modshogun import CrossValidation, CrossValidationResult from modshogun import ContingencyTableEvaluation, ACCURACY from modshogun import StratifiedCrossValidationSplitting from modshogun import RandomSearchModelSelection from modshogun import ModelSelectionParameters, R_EXP from modshogun import ParameterCombination from modshogun import BinaryLabels from modshogun import RealFeatures from modshogun import LibLinear, L2R_L2LOSS_SVC # build parameter tree to select C1 and C2 param_tree_root=ModelSelectionParameters() c1=ModelSelectionParameters("C1"); param_tree_root.append_child(c1) c1.build_values(-2.0, 2.0, R_EXP); c2=ModelSelectionParameters("C2"); param_tree_root.append_child(c2); c2.build_values(-2.0, 2.0, R_EXP); # training data features=RealFeatures(traindat) labels=BinaryLabels(label_traindat) # classifier classifier=LibLinear(L2R_L2LOSS_SVC) # print all parameter available for modelselection # Dont worry if yours is not included but, write to the mailing list #classifier.print_modsel_params() # splitting strategy for cross-validation splitting_strategy=StratifiedCrossValidationSplitting(labels, 10) # evaluation method evaluation_criterium=ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation=CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # model selection instance model_selection=RandomSearchModelSelection(cross_validation, param_tree_root, 0.5) # perform model selection with selected methods #print "performing model selection of" #param_tree_root.print_tree() best_parameters=model_selection.select_model() # print best parameters #print "best parameters:" #best_parameters.print_tree() # apply them and print result best_parameters.apply_to_machine(classifier) result=cross_validation.evaluate()
def evaluation_cross_validation_mkl_weight_storage( traindat=traindat, label_traindat=label_traindat): from modshogun import CrossValidation, CrossValidationResult from modshogun import CrossValidationPrintOutput from modshogun import CrossValidationMKLStorage from modshogun import ContingencyTableEvaluation, ACCURACY from modshogun import StratifiedCrossValidationSplitting from modshogun import BinaryLabels from modshogun import RealFeatures, CombinedFeatures from modshogun import GaussianKernel, CombinedKernel from modshogun import LibSVM, MKLClassification # training data, combined features all on same data features = RealFeatures(traindat) comb_features = CombinedFeatures() comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) labels = BinaryLabels(label_traindat) # kernel, different Gaussians combined kernel = CombinedKernel() kernel.append_kernel(GaussianKernel(10, 0.1)) kernel.append_kernel(GaussianKernel(10, 1)) kernel.append_kernel(GaussianKernel(10, 2)) # create mkl using libsvm, due to a mem-bug, interleaved is not possible svm = MKLClassification(LibSVM()) svm.set_interleaved_optimization_enabled(False) svm.set_kernel(kernel) # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "StratifiedCrossValidationSplitting" is also available splitting_strategy = StratifiedCrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium = ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation = CrossValidation(svm, comb_features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # append cross vlaidation output classes #cross_validation.add_cross_validation_output(CrossValidationPrintOutput()) mkl_storage = CrossValidationMKLStorage() cross_validation.add_cross_validation_output(mkl_storage) cross_validation.set_num_runs(3) # perform cross-validation result = cross_validation.evaluate() # print mkl weights weights = mkl_storage.get_mkl_weights()
def load_sparse_data(filename, dimension=None): input_file = LibSVMFile(args.dataset) sparse_feats = SparseRealFeatures() label_array = sparse_feats.load_with_labels(input_file) labels = BinaryLabels(label_array) if dimension != None: sparse_feats.set_num_features(dimension) return {'data': sparse_feats, 'labels': labels}
def evaluation_contingencytableevaluation_modular(ground_truth, predicted): from modshogun import BinaryLabels from modshogun import ContingencyTableEvaluation from modshogun import AccuracyMeasure, ErrorRateMeasure, BALMeasure from modshogun import WRACCMeasure, F1Measure, CrossCorrelationMeasure from modshogun import RecallMeasure, PrecisionMeasure, SpecificityMeasure ground_truth_labels = BinaryLabels(ground_truth) predicted_labels = BinaryLabels(predicted) base_evaluator = ContingencyTableEvaluation() base_evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = AccuracyMeasure() accuracy = evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = ErrorRateMeasure() errorrate = evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = BALMeasure() bal = evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = WRACCMeasure() wracc = evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = F1Measure() f1 = evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = CrossCorrelationMeasure() crosscorrelation = evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = RecallMeasure() recall = evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = PrecisionMeasure() precision = evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = SpecificityMeasure() specificity = evaluator.evaluate(predicted_labels, ground_truth_labels) return accuracy, errorrate, bal, wracc, f1, crosscorrelation, recall, precision, specificity
def runShogunSVMDNACombinedSpectrumKernel(train_xt, train_lt, test_xt): """ run svm with combined spectrum kernel """ ################################################## # set up svm kernel=CombinedKernel() feats_train=CombinedFeatures() feats_test=CombinedFeatures() for K in KList: # Iterate through the K's and make a spectrum kernel for each charfeat_train = StringCharFeatures(train_xt, DNA) current_feats_train = StringWordFeatures(DNA) current_feats_train.obtain_from_char(charfeat_train, K-1, K, GAP, False) preproc=SortWordString() preproc.init(current_feats_train) current_feats_train.add_preprocessor(preproc) current_feats_train.apply_preprocessor() feats_train.append_feature_obj(current_feats_train) charfeat_test = StringCharFeatures(test_xt, DNA) current_feats_test=StringWordFeatures(DNA) current_feats_test.obtain_from_char(charfeat_test, K-1, K, GAP, False) current_feats_test.add_preprocessor(preproc) current_feats_test.apply_preprocessor() feats_test.append_feature_obj(current_feats_test) current_kernel=CommWordStringKernel(10, False) kernel.append_kernel(current_kernel) kernel.io.set_loglevel(MSG_DEBUG) # init kernel labels = BinaryLabels(train_lt) # run svm model print "Ready to train!" kernel.init(feats_train, feats_train) svm=LibSVM(SVMC, kernel, labels) svm.io.set_loglevel(MSG_DEBUG) svm.train() # predictions print "Making predictions!" out1DecisionValues = svm.apply(feats_train) out1=out1DecisionValues.get_labels() kernel.init(feats_train, feats_test) out2DecisionValues = svm.apply(feats_test) out2=out2DecisionValues.get_labels() return out1,out2,out1DecisionValues,out2DecisionValues
def get_labels(raw=False, type='binary'): data = concatenate( array( (-ones(NUM_EXAMPLES, dtype=double), ones(NUM_EXAMPLES, dtype=double)))) if raw: return data else: if type == 'binary': return BinaryLabels(data) if type == 'regression': return RegressionLabels(data) return None
def kernel_auc_modular(train_fname=traindat, label_fname=label_traindat, width=1.7): from modshogun import GaussianKernel, AUCKernel, RealFeatures from modshogun import BinaryLabels, CSVFile feats_train = RealFeatures(CSVFile(train_fname)) subkernel = GaussianKernel(feats_train, feats_train, width) kernel = AUCKernel(0, subkernel) kernel.setup_auc_maximization(BinaryLabels(CSVFile(label_fname))) km_train = kernel.get_kernel_matrix() return kernel
def runShogunOneClassSVMDNASpectrumKernel(train_xt, train_lt, test_xt): """ run svm with spectrum kernel """ ################################################## # set up svr charfeat_train = StringCharFeatures(train_xt, DNA) feats_train = StringWordFeatures(DNA) feats_train.obtain_from_char(charfeat_train, K-1, K, GAP, False) preproc=SortWordString() preproc.init(feats_train) feats_train.add_preprocessor(preproc) feats_train.apply_preprocessor() charfeat_test = StringCharFeatures(test_xt, DNA) feats_test=StringWordFeatures(DNA) feats_test.obtain_from_char(charfeat_test, K-1, K, GAP, False) feats_test.add_preprocessor(preproc) feats_test.apply_preprocessor() kernel=CommWordStringKernel(feats_train, feats_train, False) kernel.io.set_loglevel(MSG_DEBUG) # init kernel labels = BinaryLabels(train_lt) # run svm model print "Ready to train!" svm=LibSVMOneClass(SVMC, kernel) svm.set_epsilon(EPSILON) svm.train() # predictions print "Making predictions!" out1DecisionValues = svm.apply(feats_train) out1=out1DecisionValues.get_labels() kernel.init(feats_train, feats_test) out2DecisionValues = svm.apply(feats_test) out2=out2DecisionValues.get_labels() # predictions = svm.apply(feats_test) # return predictions, svm, predictions.get_labels() return out1,out2,out1DecisionValues,out2DecisionValues
#!/usr/bin/env python2.7 # # This software is distributed under BSD 3-clause license (see LICENSE file). # # Copyright (C) 2014 Thoralf Klein # from modshogun import RealFeatures, BinaryLabels, LibLinear from numpy import random, mean X_train = RealFeatures(random.randn(30, 100)) Y_train = BinaryLabels(random.randn(X_train.get_num_vectors())) svm = LibLinear(1.0, X_train, Y_train) svm.train() Y_pred = svm.apply_binary(X_train) Y_train.get_labels() == Y_pred.get_labels() print "accuracy:", mean(Y_train.get_labels() == Y_pred.get_labels())
#!/usr/bin/env python2.7 # # This software is distributed under BSD 3-clause license (see LICENSE file). # # Copyright (C) 2014 Thoralf Klein # from modshogun import RealFeatures, BinaryLabels from modshogun import LibLinear, L2R_L2LOSS_SVC_DUAL from numpy import random, mean X_train = RealFeatures(random.randn(30, 100)) Y_train = BinaryLabels(random.randn(X_train.get_num_vectors())) results = [] for C1_pow in range(-3, 1): for C2_pow in range(-3, 1): svm = LibLinear() svm.set_bias_enabled(False) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) svm.set_C(10**C1_pow, 10**C2_pow) svm.set_features(X_train) svm.set_labels(Y_train) svm.train() Y_pred = svm.apply_binary(X_train) accuracy = mean(Y_train.get_labels() == Y_pred.get_labels())