def kernel_comm_ulong_string_modular(fm_train_dna=traindat, fm_test_dna=testdat, order=3, gap=0, reverse=False): from modshogun import CommUlongStringKernel from modshogun import StringUlongFeatures, StringCharFeatures, DNA from modshogun import SortUlongString charfeat = StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train = StringUlongFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) preproc = SortUlongString() preproc.init(feats_train) feats_train.add_preprocessor(preproc) feats_train.apply_preprocessor() charfeat = StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test = StringUlongFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse) feats_test.add_preprocessor(preproc) feats_test.apply_preprocessor() use_sign = False kernel = CommUlongStringKernel(feats_train, feats_train, use_sign) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def runShogunSVMDNAUlongSpectrumKernel(train_xt, train_lt, test_xt): """ run svm with ulong spectrum kernel """ ################################################## # set up svr charfeat_train = StringCharFeatures(train_xt, DNA) feats_train = StringUlongFeatures(DNA) feats_train.obtain_from_char(charfeat_train, K - 1, K, GAP, False) preproc = SortUlongString() preproc.init(feats_train) feats_train.add_preprocessor(preproc) feats_train.apply_preprocessor() charfeat_test = StringCharFeatures(test_xt, DNA) feats_test = StringUlongFeatures(DNA) feats_test.obtain_from_char(charfeat_test, K - 1, K, GAP, False) feats_test.add_preprocessor(preproc) feats_test.apply_preprocessor() kernel = CommUlongStringKernel(feats_train, feats_train, False) kernel.io.set_loglevel(MSG_DEBUG) # init kernel labels = BinaryLabels(train_lt) # run svm model print "Ready to train!" svm = LibSVM(SVMC, kernel, labels) svm.io.set_loglevel(MSG_DEBUG) svm.train() # predictions print "Making predictions!" out1DecisionValues = svm.apply(feats_train) out1 = out1DecisionValues.get_labels() kernel.init(feats_train, feats_test) out2DecisionValues = svm.apply(feats_test) out2 = svm.apply(feats_test).get_labels() return out1, out2, out1DecisionValues, out2DecisionValues
def features_string_ulong_modular (start=0,order=2,gap=0,rev=False): from modshogun import StringCharFeatures, StringUlongFeatures, RAWBYTE from numpy import array, uint64 #create string features cf=StringCharFeatures(['hey','guys','string'], RAWBYTE) uf=StringUlongFeatures(RAWBYTE) uf.obtain_from_char(cf, start,order,gap,rev) #replace string 0 uf.set_feature_vector(array([1,2,3,4,5], dtype=uint64), 0) return uf.get_features(),uf.get_feature_vector(2), uf.get_num_vectors()