def classifier_ssk(fm_train_dna=traindat, fm_test_dna=testdat, label_train_dna=label_traindat, C=1, maxlen=1, decay=1): from shogun import StringCharFeatures, BinaryLabels from shogun import LibSVM, SubsequenceStringKernel, DNA from shogun import ErrorRateMeasure feats_train = StringCharFeatures(fm_train_dna, DNA) feats_test = StringCharFeatures(fm_test_dna, DNA) labels = BinaryLabels(label_train_dna) kernel = SubsequenceStringKernel(feats_train, feats_train, maxlen, decay) svm = LibSVM(C, kernel, labels) svm.train() out = svm.apply(feats_train) evaluator = ErrorRateMeasure() trainerr = evaluator.evaluate(out, labels) # print(trainerr) kernel.init(feats_train, feats_test) predicted_labels = svm.apply(feats_test).get_labels() # print predicted_labels return predicted_labels
def svm_train(kernel, labels, C1, C2=None): """Trains a SVM with the given kernel""" num_threads = 1 kernel.io.disable_progress() svm = LibSVM(C1, kernel, labels) if C2: svm.set_C(C1, C2) svm.parallel.set_num_threads(num_threads) svm.io.disable_progress() svm.train() return svm
def kernel_combined_custom_poly(train_fname=traindat, test_fname=testdat, train_label_fname=label_traindat): from shogun import CombinedFeatures, RealFeatures, BinaryLabels from shogun import CombinedKernel, PolyKernel, CustomKernel from shogun import LibSVM, CSVFile kernel = CombinedKernel() feats_train = CombinedFeatures() tfeats = RealFeatures(CSVFile(train_fname)) tkernel = PolyKernel(10, 3) tkernel.init(tfeats, tfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_train = RealFeatures(CSVFile(train_fname)) feats_train.append_feature_obj(subkfeats_train) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = BinaryLabels(CSVFile(train_label_fname)) svm = LibSVM(1.0, kernel, labels) svm.train() kernel = CombinedKernel() feats_pred = CombinedFeatures() pfeats = RealFeatures(CSVFile(test_fname)) tkernel = PolyKernel(10, 3) tkernel.init(tfeats, pfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_test = RealFeatures(CSVFile(test_fname)) feats_pred.append_feature_obj(subkfeats_test) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_pred) svm.set_kernel(kernel) svm.apply() km_train = kernel.get_kernel_matrix() return km_train, kernel
def kernel_combined_custom_poly (train_fname = traindat,test_fname = testdat,train_label_fname=label_traindat): from shogun import CombinedFeatures, RealFeatures, BinaryLabels from shogun import CombinedKernel, PolyKernel, CustomKernel from shogun import LibSVM, CSVFile kernel = CombinedKernel() feats_train = CombinedFeatures() tfeats = RealFeatures(CSVFile(train_fname)) tkernel = PolyKernel(10,3) tkernel.init(tfeats, tfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_train = RealFeatures(CSVFile(train_fname)) feats_train.append_feature_obj(subkfeats_train) subkernel = PolyKernel(10,2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = BinaryLabels(CSVFile(train_label_fname)) svm = LibSVM(1.0, kernel, labels) svm.train() kernel = CombinedKernel() feats_pred = CombinedFeatures() pfeats = RealFeatures(CSVFile(test_fname)) tkernel = PolyKernel(10,3) tkernel.init(tfeats, pfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_test = RealFeatures(CSVFile(test_fname)) feats_pred.append_feature_obj(subkfeats_test) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_pred) svm.set_kernel(kernel) svm.apply() km_train=kernel.get_kernel_matrix() return km_train,kernel
def classifier_custom_kernel (C=1,dim=7): from shogun import RealFeatures, BinaryLabels, CustomKernel, LibSVM from numpy import diag,ones,sign from numpy.random import rand,seed seed((C,dim)) lab=sign(2*rand(dim) - 1) data=rand(dim, dim) symdata=data*data.T + diag(ones(dim)) kernel=CustomKernel() kernel.set_full_kernel_matrix_from_full(data) labels=BinaryLabels(lab) svm=LibSVM(C, kernel, labels) svm.train() predictions =svm.apply() out=svm.apply().get_labels() return svm,out
def classifier_custom_kernel(C=1, dim=7): from shogun import RealFeatures, BinaryLabels, CustomKernel, LibSVM from numpy import diag, ones, sign from numpy.random import rand, seed seed((C, dim)) lab = sign(2 * rand(dim) - 1) data = rand(dim, dim) symdata = data * data.T + diag(ones(dim)) kernel = CustomKernel() kernel.set_full_kernel_matrix_from_full(data) labels = BinaryLabels(lab) svm = LibSVM(C, kernel, labels) svm.train() predictions = svm.apply() out = svm.apply().get_labels() return svm, out
def runShogunSVMDNASpectrumKernel(train_xt, train_lt, test_xt): """ run svm with spectrum kernel """ ################################################## # set up SVM charfeat_train = StringCharFeatures(train_xt, DNA) feats_train = StringWordFeatures(DNA) feats_train.obtain_from_char(charfeat_train, K-1, K, GAP, False) preproc=SortWordString() preproc.init(feats_train) feats_train.add_preprocessor(preproc) feats_train.apply_preprocessor() charfeat_test = StringCharFeatures(test_xt, DNA) feats_test=StringWordFeatures(DNA) feats_test.obtain_from_char(charfeat_test, K-1, K, GAP, False) feats_test.add_preprocessor(preproc) feats_test.apply_preprocessor() kernel=CommWordStringKernel(feats_train, feats_train, False) kernel.io.set_loglevel(MSG_DEBUG) # init kernel labels = BinaryLabels(train_lt) # run svm model print "Ready to train!" svm=LibSVM(SVMC, kernel, labels) svm.io.set_loglevel(MSG_DEBUG) svm.train() # predictions print "Making predictions!" out1DecisionValues = svm.apply(feats_train) out1=out1DecisionValues.get_labels() kernel.init(feats_train, feats_test) out2DecisionValues = svm.apply(feats_test) out2=out2DecisionValues.get_labels() return out1,out2,out1DecisionValues,out2DecisionValues
def classifier_ssk (fm_train_dna=traindat,fm_test_dna=testdat, label_train_dna=label_traindat,C=1,maxlen=1,decay=1): from shogun import StringCharFeatures, BinaryLabels from shogun import LibSVM, SubsequenceStringKernel, DNA from shogun import ErrorRateMeasure feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_test_dna, DNA) labels=BinaryLabels(label_train_dna) kernel=SubsequenceStringKernel(feats_train, feats_train, maxlen, decay); svm=LibSVM(C, kernel, labels); svm.train(); out=svm.apply(feats_train); evaluator = ErrorRateMeasure() trainerr = evaluator.evaluate(out,labels) # print(trainerr) kernel.init(feats_train, feats_test) predicted_labels=svm.apply(feats_test).get_labels() # print predicted_labels return predicted_labels
print_to_console_log(data_info) ## Codify Labels (1,-1) y_train[np.where(y_train==0)] = -1 y_test [np.where(y_test==0)] = -1 ## Set up Subsequence string Kernel features = StringCharFeatures(X_train, RAWBYTE) test_features = StringCharFeatures(X_test, RAWBYTE) labels = BinaryLabels(y_train) sk = SubsequenceStringKernel(features, features, SUBSEQUENCE_SIZE, LAMBDA) ## Fitting Kernel SVM to the Training set startTime1 = time.time() svm = LibSVM(C, sk, labels) svm.train() endTime1 = time.time() ## Print Training Time formatedTime = time.strftime('%H:%M:%S', time.gmtime(endTime1-startTime1)) print_to_console_log("Training Time\nTime:{:010.4f}s\nTime:{:s}\n".format(endTime1-startTime1,formatedTime)) # save the model to disk pickle.dump(svm, open(FILE_NAME, 'wb')) # Model Evaluation print_to_console_log("Evaluate Model...\n") startTime2 = time.time() predicted_labels = svm.apply(test_features).get_labels() y_predicted = np.array(predicted_labels) endTime2 = time.time()
from shogun import ROCEvaluation import util util.set_title('ROC example') util.DISTANCE=0.5 subplots_adjust(hspace=0.3) pos=util.get_realdata(True) neg=util.get_realdata(False) features=util.get_realfeatures(pos, neg) labels=util.get_labels() # classifiers gk=GaussianKernel(features, features, 1.0) svm = LibSVM(1000.0, gk, labels) svm.train() lda=LDA(1,features,labels) lda.train() ## plot points subplot(211) plot(pos[0,:], pos[1,:], "r.") plot(neg[0,:], neg[1,:], "b.") grid(True) title('Data',size=10) # plot ROC for SVM subplot(223) ROC_evaluation=ROCEvaluation() ROC_evaluation.evaluate(svm.apply(),labels) roc = ROC_evaluation.get_ROC()