예제 #1
0
# This is an example for the initialization of the CommWordString-kernel (aka
# Spectrum or n-gram kernel; its name is derived from the unix command comm). This kernel 
# sums over k-mere matches (k='order'). For efficient computing a preprocessor is used 
# that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted 
# only once. 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,3,0,'n',False,'FULL'],
		[traindna,testdna,11,4,0,'n',False,'FULL']]

def kernel_commwordstring (fm_train_dna=traindna,fm_test_dna=testdna,
			    size_cache=10,
			    order=3,gap=0,reverse='n',
			    use_sign=False,normalization='FULL'):

	sg('add_preproc', 'SORTWORDSTRING')
	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TRAIN')

	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TEST')

	sg('set_kernel', 'COMMSTRING', 'WORD', size_cache, use_sign, normalization)
	km=sg('get_kernel_matrix', 'TRAIN')
from tools.load import LoadMatrix
lm = LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')

parameter_list = [[traindat, testdat, 5, 5], [traindat, testdat, 6, 6]]


def kernel_distantsegments_modular(fm_train_dna=traindat,
                                   fm_test_dna=testdat,
                                   delta=5,
                                   theta=5):
    from shogun.Features import StringCharFeatures, DNA
    from shogun.Kernel import DistantSegmentsKernel

    feats_train = StringCharFeatures(fm_train_dna, DNA)
    feats_test = StringCharFeatures(fm_test_dna, DNA)

    kernel = DistantSegmentsKernel(feats_train, feats_train, 10, delta, theta)

    km_train = kernel.get_kernel_matrix()
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()

    return km_train, km_test, kernel


if __name__ == '__main__':
    print('DistantSegments')
    kernel_distantsegments_modular(*parameter_list[0])
	reverse=False

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_dna)
	feats=StringWordFeatures(charfeat.get_alphabet())
	feats.obtain_from_char(charfeat, order-1, order, gap, reverse)

	histo=Histogram(feats)
	histo.train()

	histo.get_histogram()

	num_examples=feats.get_num_vectors()
	num_param=histo.get_num_model_parameters()
	#for i in xrange(num_examples):
	#	for j in xrange(num_param):
	#		histo.get_log_derivative(j, i)

	histo.get_log_likelihood()
	histo.get_log_likelihood_sample()

###########################################################################
# call functions
###########################################################################

if __name__=='__main__':
	from tools.load import LoadMatrix
	lm=LoadMatrix()
	fm_dna=lm.load_dna('../data/fm_train_dna.dat')
	histogram()
from tools.load import LoadMatrix
lm=LoadMatrix()

traindna = lm.load_dna('../data/fm_train_dna.dat')

parameter_list = [[traindna,3,0,False],[traindna,4,0,False]]

def distribution_histogram_modular (fm_dna=traindna,order=3,gap=0,reverse=False):
	from shogun.Features import StringWordFeatures, StringCharFeatures, DNA
	from shogun.Distribution import Histogram

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_dna)
	feats=StringWordFeatures(charfeat.get_alphabet())
	feats.obtain_from_char(charfeat, order-1, order, gap, reverse)

	histo=Histogram(feats)
	histo.train()

	histo.get_histogram()

	num_examples=feats.get_num_vectors()
	num_param=histo.get_num_model_parameters()
	#for i in xrange(num_examples):
	#	for j in xrange(num_param):
	#		histo.get_log_derivative(j, i)

	out_likelihood = histo.get_log_likelihood()
	out_sample = histo.get_log_likelihood_sample()
	return histo,out_sample,out_likelihood
###########################################################################
#!/usr/bin/env python
from tools.load import LoadMatrix

lm = LoadMatrix()

train_dna = lm.load_dna("../data/fm_train_dna.dat")
test_dna = lm.load_dna("../data/fm_test_dna.dat")
label = lm.load_labels("../data/label_train_dna.dat")

parameter_list = [[train_dna, test_dna, label, 20, 0.9, 1e-3, 1], [train_dna, test_dna, label, 20, 2.3, 1e-5, 4]]


def classifier_svmlight_batch_linadd_modular(
    fm_train_dna, fm_test_dna, label_train_dna, degree, C, epsilon, num_threads
):

    from modshogun import StringCharFeatures, BinaryLabels, DNA
    from modshogun import WeightedDegreeStringKernel, MSG_DEBUG

    try:
        from modshogun import SVMLight
    except ImportError:
        print("No support for SVMLight available.")
        return

    feats_train = StringCharFeatures(DNA)
    # feats_train.io.set_loglevel(MSG_DEBUG)
    feats_train.set_features(fm_train_dna)
    feats_test = StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)
    degree = 20