# This is an example for the initialization of the CommWordString-kernel (aka # Spectrum or n-gram kernel; its name is derived from the unix command comm). This kernel # sums over k-mere matches (k='order'). For efficient computing a preprocessor is used # that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted # only once. from tools.load import LoadMatrix from sg import sg lm=LoadMatrix() traindna=lm.load_dna('../data/fm_train_dna.dat') testdna=lm.load_dna('../data/fm_test_dna.dat') parameter_list=[[traindna,testdna,10,3,0,'n',False,'FULL'], [traindna,testdna,11,4,0,'n',False,'FULL']] def kernel_commwordstring (fm_train_dna=traindna,fm_test_dna=testdna, size_cache=10, order=3,gap=0,reverse='n', use_sign=False,normalization='FULL'): sg('add_preproc', 'SORTWORDSTRING') sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TRAIN') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TEST') sg('set_kernel', 'COMMSTRING', 'WORD', size_cache, use_sign, normalization) km=sg('get_kernel_matrix', 'TRAIN')
from tools.load import LoadMatrix lm = LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') parameter_list = [[traindat, testdat, 5, 5], [traindat, testdat, 6, 6]] def kernel_distantsegments_modular(fm_train_dna=traindat, fm_test_dna=testdat, delta=5, theta=5): from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import DistantSegmentsKernel feats_train = StringCharFeatures(fm_train_dna, DNA) feats_test = StringCharFeatures(fm_test_dna, DNA) kernel = DistantSegmentsKernel(feats_train, feats_train, 10, delta, theta) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel if __name__ == '__main__': print('DistantSegments') kernel_distantsegments_modular(*parameter_list[0])
reverse=False charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_dna) feats=StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order-1, order, gap, reverse) histo=Histogram(feats) histo.train() histo.get_histogram() num_examples=feats.get_num_vectors() num_param=histo.get_num_model_parameters() #for i in xrange(num_examples): # for j in xrange(num_param): # histo.get_log_derivative(j, i) histo.get_log_likelihood() histo.get_log_likelihood_sample() ########################################################################### # call functions ########################################################################### if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_dna=lm.load_dna('../data/fm_train_dna.dat') histogram()
from tools.load import LoadMatrix lm=LoadMatrix() traindna = lm.load_dna('../data/fm_train_dna.dat') parameter_list = [[traindna,3,0,False],[traindna,4,0,False]] def distribution_histogram_modular (fm_dna=traindna,order=3,gap=0,reverse=False): from shogun.Features import StringWordFeatures, StringCharFeatures, DNA from shogun.Distribution import Histogram charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_dna) feats=StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order-1, order, gap, reverse) histo=Histogram(feats) histo.train() histo.get_histogram() num_examples=feats.get_num_vectors() num_param=histo.get_num_model_parameters() #for i in xrange(num_examples): # for j in xrange(num_param): # histo.get_log_derivative(j, i) out_likelihood = histo.get_log_likelihood() out_sample = histo.get_log_likelihood_sample() return histo,out_sample,out_likelihood ###########################################################################
#!/usr/bin/env python from tools.load import LoadMatrix lm = LoadMatrix() train_dna = lm.load_dna("../data/fm_train_dna.dat") test_dna = lm.load_dna("../data/fm_test_dna.dat") label = lm.load_labels("../data/label_train_dna.dat") parameter_list = [[train_dna, test_dna, label, 20, 0.9, 1e-3, 1], [train_dna, test_dna, label, 20, 2.3, 1e-5, 4]] def classifier_svmlight_batch_linadd_modular( fm_train_dna, fm_test_dna, label_train_dna, degree, C, epsilon, num_threads ): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel, MSG_DEBUG try: from modshogun import SVMLight except ImportError: print("No support for SVMLight available.") return feats_train = StringCharFeatures(DNA) # feats_train.io.set_loglevel(MSG_DEBUG) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree = 20