예제 #1
0
    def init_sensor(self, kernel, svs):
        f = StringCharFeatures(svs, DNA)

        kname = kernel['name']
        if  kname == 'spectrum':
            wf = StringWordFeatures(f.get_alphabet())
            wf.obtain_from_char(f, kernel['order'] - 1, kernel['order'], 0, False)

            pre = SortWordString()
            pre.init(wf)
            wf.add_preprocessor(pre)
            wf.apply_preprocessor()
            f = wf

            k = CommWordStringKernel(0, False)
            k.set_use_dict_diagonal_optimization(kernel['order'] < 8)
            self.preproc = pre

        elif kname == 'wdshift':
                k = WeightedDegreePositionStringKernel(0, kernel['order'])
                k.set_normalizer(IdentityKernelNormalizer())
                k.set_shifts(kernel['shift'] *
                        numpy.ones(f.get_max_vector_length(), dtype=numpy.int32))
                k.set_position_weights(1.0 / f.get_max_vector_length() *
                        numpy.ones(f.get_max_vector_length(), dtype=numpy.float64))
        else:
            raise "Currently, only wdshift and spectrum kernels supported"

        self.kernel = k
        self.train_features = f

        return (self.kernel, self.train_features)
def preproc_sortwordstring_modular (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False,use_sign=False):

	from shogun.Kernel import CommWordStringKernel
	from shogun.Features import StringCharFeatures, StringWordFeatures, DNA
	from shogun.Preprocessor import SortWordString

	charfeat=StringCharFeatures(fm_train_dna, DNA)
	feats_train=StringWordFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	preproc=SortWordString()
	preproc.init(feats_train)
	feats_train.add_preproc(preproc)
	feats_train.apply_preproc()

	charfeat=StringCharFeatures(fm_test_dna, DNA)
	feats_test=StringWordFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
	feats_test.add_preproc(preproc)
	feats_test.apply_preproc()

	kernel=CommWordStringKernel(feats_train, feats_train, use_sign)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()

	return km_train,km_test,kernel
def sort_word_string ():
	print 'CommWordString'

	from shogun.Kernel import CommWordStringKernel
	from shogun.Features import StringCharFeatures, StringWordFeatures, DNA
	from shogun.PreProc import SortWordString

	order=3
	gap=0
	reverse=False

	charfeat=StringCharFeatures(fm_train_dna, DNA)
	feats_train=StringWordFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	preproc=SortWordString()
	preproc.init(feats_train)
	feats_train.add_preproc(preproc)
	feats_train.apply_preproc()

	charfeat=StringCharFeatures(fm_test_dna, DNA)
	feats_test=StringWordFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
	feats_test.add_preproc(preproc)
	feats_test.apply_preproc()

	use_sign=False

	kernel=CommWordStringKernel(feats_train, feats_train, use_sign)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
예제 #4
0
def create_kernel(kname, kparam, feats_train):
    """Call the corresponding constructor for the kernel"""

    if kname == 'gauss':
        kernel = GaussianKernel(feats_train, feats_train, kparam['width'])
    elif kname == 'linear':
        kernel = LinearKernel(feats_train, feats_train)
        kernel.set_normalizer(AvgDiagKernelNormalizer(kparam['scale']))
    elif kname == 'poly':
        kernel = PolyKernel(feats_train, feats_train, kparam['degree'],
                            kparam['inhomogene'], kparam['normal'])
    elif kname == 'wd':
        kernel = WeightedDegreePositionStringKernel(feats_train, feats_train,
                                                    kparam['degree'])
        kernel.set_normalizer(
            AvgDiagKernelNormalizer(float(kparam['seqlength'])))
        kernel.set_shifts(kparam['shift'] *
                          numpy.ones(kparam['seqlength'], dtype=numpy.int32))
        #kernel=WeightedDegreeStringKernel(feats_train, feats_train, kparam['degree'])
    elif kname == 'spec':
        kernel = CommUlongStringKernel(feats_train, feats_train)
    elif kname == 'cumspec':
        kernel = WeightedCommWordStringKernel(feats_train, feats_train)
        kernel.set_weights(numpy.ones(kparam['degree']))
    elif kname == 'spec2':
        kernel = CombinedKernel()
        k0 = CommWordStringKernel(feats_train['f0'], feats_train['f0'])
        k0.io.disable_progress()
        kernel.append_kernel(k0)
        k1 = CommWordStringKernel(feats_train['f1'], feats_train['f1'])
        k1.io.disable_progress()
        kernel.append_kernel(k1)
    elif kname == 'cumspec2':
        kernel = CombinedKernel()
        k0 = WeightedCommWordStringKernel(feats_train['f0'], feats_train['f0'])
        k0.set_weights(numpy.ones(kparam['degree']))
        k0.io.disable_progress()
        kernel.append_kernel(k0)
        k1 = WeightedCommWordStringKernel(feats_train['f1'], feats_train['f1'])
        k1.set_weights(numpy.ones(kparam['degree']))
        k1.io.disable_progress()
        kernel.append_kernel(k1)
    elif kname == 'localalign':
        kernel = LocalAlignmentStringKernel(feats_train, feats_train)
    elif kname == 'localimprove':
        kernel = LocalityImprovedStringKernel(feats_train, feats_train, kparam['length'],\
                                              kparam['indeg'], kparam['outdeg'])
    else:
        print 'Unknown kernel %s' % kname

    kernel.set_cache_size(32)
    return kernel
예제 #5
0
def kernel_comm_word_string_modular(fm_train_dna=traindat,
                                    fm_test_dna=testdat,
                                    order=3,
                                    gap=0,
                                    reverse=False,
                                    use_sign=False):

    from shogun.Kernel import CommWordStringKernel
    from shogun.Features import StringWordFeatures, StringCharFeatures, DNA
    from shogun.Preprocessor import SortWordString

    charfeat = StringCharFeatures(DNA)
    charfeat.set_features(fm_train_dna)
    feats_train = StringWordFeatures(charfeat.get_alphabet())
    feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    preproc = SortWordString()
    preproc.init(feats_train)
    feats_train.add_preproc(preproc)
    feats_train.apply_preproc()

    charfeat = StringCharFeatures(DNA)
    charfeat.set_features(fm_test_dna)
    feats_test = StringWordFeatures(charfeat.get_alphabet())
    feats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    feats_test.add_preproc(preproc)
    feats_test.apply_preproc()

    kernel = CommWordStringKernel(feats_train, feats_train, use_sign)

    km_train = kernel.get_kernel_matrix()
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
예제 #6
0
def get_weighted_spectrum_kernel(subfeats_list, options):
    """build weighted spectrum kernel with non-redundant k-mer list (removing reverse complement)

	Arguments:
	subfeats_list -- list of sub-feature objects
	options -- object containing option data 

	Return:
	CombinedFeatures of StringWord(Ulong)Features, CombinedKernel of CommWord(Ulong)StringKernel 
	"""
    kmerlen = options.kmerlen
    kmerlen2 = options.kmerlen2

    subkernels = 0
    kernel = CombinedKernel()
    feats = CombinedFeatures()

    weights = []

    i = 0
    for subfeats in subfeats_list:
        feats.append_feature_obj(subfeats)

        combine_kcount = Counter()
        for i in xrange(subfeats.get_num_vectors()):
            fv = list(subfeats.get_feature_vector(i))
            combine_kcount += Counter(fv)
            number = len(combine_kcount)
            klen = kmerlen + i

    for k in xrange(kmerlen, kmerlen2 + 1):
        if k <= 8:
            subkernel = CommWordStringKernel(10, False)
        else:
            subkernel = CommUlongStringKernel(10, False)

        kernel.append_kernel(subkernel)
        subkernels += 1

    kernel.init(feats, feats)
    # here the weight for each k-mer is uniform
    '''
	subkernels = 8
	numpy.array([1 / float(subkernels)] * subkernels, numpy.dtype('float64'))
	array([ 0.125,  0.125,  0.125,  0.125,  0.125,  0.125,  0.125,  0.125])
	'''
    kernel.set_subkernel_weights(
        numpy.array([1 / float(subkernels)] * subkernels,
                    numpy.dtype('float64')))

    return kernel
예제 #7
0
def create_kernel(kname,kparam,feats_train):
    """Call the corresponding constructor for the kernel"""

    if kname == 'gauss':
        kernel = GaussianKernel(feats_train, feats_train, kparam['width'])
    elif kname == 'linear':
        kernel = LinearKernel(feats_train, feats_train)
        kernel.set_normalizer(AvgDiagKernelNormalizer(kparam['scale']))
    elif kname == 'poly':
        kernel = PolyKernel(feats_train, feats_train, kparam['degree'], kparam['inhomogene'], kparam['normal'])
    elif kname == 'wd':
        kernel=WeightedDegreePositionStringKernel(feats_train, feats_train, kparam['degree'])
        kernel.set_normalizer(AvgDiagKernelNormalizer(float(kparam['seqlength'])))
        kernel.set_shifts(kparam['shift']*numpy.ones(kparam['seqlength'],dtype=numpy.int32))
        #kernel=WeightedDegreeStringKernel(feats_train, feats_train, kparam['degree'])
    elif kname == 'spec':
        kernel = CommUlongStringKernel(feats_train, feats_train)
    elif kname == 'cumspec':
        kernel = WeightedCommWordStringKernel(feats_train, feats_train)
        kernel.set_weights(numpy.ones(kparam['degree']))
    elif kname == 'spec2':
        kernel = CombinedKernel()
        k0 = CommWordStringKernel(feats_train['f0'], feats_train['f0'])
        k0.io.disable_progress()
        kernel.append_kernel(k0)
        k1 = CommWordStringKernel(feats_train['f1'], feats_train['f1'])
        k1.io.disable_progress()
        kernel.append_kernel(k1)
    elif kname == 'cumspec2':
        kernel = CombinedKernel()
        k0 = WeightedCommWordStringKernel(feats_train['f0'], feats_train['f0'])
        k0.set_weights(numpy.ones(kparam['degree']))
        k0.io.disable_progress()
        kernel.append_kernel(k0)
        k1 = WeightedCommWordStringKernel(feats_train['f1'], feats_train['f1'])
        k1.set_weights(numpy.ones(kparam['degree']))
        k1.io.disable_progress()
        kernel.append_kernel(k1)
    elif kname == 'localalign':
        kernel = LocalAlignmentStringKernel(feats_train, feats_train)
    elif kname == 'localimprove':
        kernel = LocalityImprovedStringKernel(feats_train, feats_train, kparam['length'],\
                                              kparam['indeg'], kparam['outdeg'])
    else:
        print 'Unknown kernel %s' % kname

    kernel.set_cache_size(32) 
    return kernel
예제 #8
0
def get_spectrum_kernel(feats, options):
    """build spectrum kernel with non-redundant k-mer list (removing reverse complement)

	Arguments:
	feats -- feature object
	options -- object containing option data 

	Return:
	StringWord(Ulong)Features, CommWord(Ulong)StringKernel
	"""
    if options.kmerlen <= 8:
        return CommWordStringKernel(feats, feats)
    else:
        return CommUlongStringKernel(feats, feats)
예제 #9
0
def get_weighted_spectrum_kernel(subfeats_list, options):
    """build weighted spectrum kernel with non-redundant k-mer list (removing reverse complement)

	Arguments:
	subfeats_list -- list of sub-feature objects
	options -- object containing option data 

	Return:
	CombinedFeatures of StringWord(Ulong)Features, CombinedKernel of CommWord(Ulong)StringKernel 
	"""
    kmerlen = options.kmerlen
    kmerlen2 = options.kmerlen2

    subkernels = 0
    kernel = CombinedKernel()
    feats = CombinedFeatures()

    for subfeats in subfeats_list:
        feats.append_feature_obj(subfeats)

    for k in xrange(kmerlen, kmerlen2 + 1):
        if k <= 8:
            subkernel = CommWordStringKernel(10, False)
        else:
            subkernel = CommUlongStringKernel(10, False)

        kernel.append_kernel(subkernel)
        subkernels += 1

    kernel.init(feats, feats)

    kernel.set_subkernel_weights(
        numpy.array([1 / float(subkernels)] * subkernels,
                    numpy.dtype('float64')))

    return kernel
def tests_check_commwordkernel_memleak_modular(num, order, gap, reverse):
	import gc
	from shogun.Features import Alphabet,StringCharFeatures,StringWordFeatures,DNA
	from shogun.Preprocessor import SortWordString, MSG_DEBUG
	from shogun.Kernel import CommWordStringKernel, IdentityKernelNormalizer
	from numpy import mat

	POS=[num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT']
	NEG=[num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT']

	for i in xrange(10):
		alpha=Alphabet(DNA)
		traindat=StringCharFeatures(alpha)
		traindat.set_features(POS+NEG)
		trainudat=StringWordFeatures(traindat.get_alphabet());
		trainudat.obtain_from_char(traindat, order-1, order, gap, reverse)
		#trainudat.io.set_loglevel(MSG_DEBUG)
		pre = SortWordString()
		#pre.io.set_loglevel(MSG_DEBUG)
		pre.init(trainudat)
		trainudat.add_preproc(pre)
		trainudat.apply_preproc()
		spec = CommWordStringKernel(10, False)
		spec.set_normalizer(IdentityKernelNormalizer())
		spec.init(trainudat, trainudat)
		K=spec.get_kernel_matrix()

	del POS
	del NEG
	del order
	del gap
	del reverse
	return K
def tests_check_commwordkernel_memleak_modular (num, order, gap, reverse):
	import gc
	from shogun.Features import Alphabet,StringCharFeatures,StringWordFeatures,DNA
	from shogun.Preprocessor import SortWordString, MSG_DEBUG
	from shogun.Kernel import CommWordStringKernel, IdentityKernelNormalizer
	from numpy import mat

	POS=[num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT']
	NEG=[num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT']

	for i in range(10):
		alpha=Alphabet(DNA)
		traindat=StringCharFeatures(alpha)
		traindat.set_features(POS+NEG)
		trainudat=StringWordFeatures(traindat.get_alphabet());
		trainudat.obtain_from_char(traindat, order-1, order, gap, reverse)
		#trainudat.io.set_loglevel(MSG_DEBUG)
		pre = SortWordString()
		#pre.io.set_loglevel(MSG_DEBUG)
		pre.init(trainudat)
		trainudat.add_preprocessor(pre)
		trainudat.apply_preprocessor()
		spec = CommWordStringKernel(10, False)
		spec.set_normalizer(IdentityKernelNormalizer())
		spec.init(trainudat, trainudat)
		K=spec.get_kernel_matrix()

	del POS
	del NEG
	del order
	del gap
	del reverse
	return K
예제 #12
0
    def init_sensor(self, kernel, svs):
        f = StringCharFeatures(svs, DNA)

        kname = kernel['name']
        if kname == 'spectrum':
            wf = StringWordFeatures(f.get_alphabet())
            wf.obtain_from_char(f, kernel['order'] - 1, kernel['order'], 0,
                                False)

            pre = SortWordString()
            pre.init(wf)
            wf.add_preprocessor(pre)
            wf.apply_preprocessor()
            f = wf

            k = CommWordStringKernel(0, False)
            k.set_use_dict_diagonal_optimization(kernel['order'] < 8)
            self.preproc = pre

        elif kname == 'wdshift':
            k = WeightedDegreePositionStringKernel(0, kernel['order'])
            k.set_normalizer(IdentityKernelNormalizer())
            k.set_shifts(
                kernel['shift'] *
                numpy.ones(f.get_max_vector_length(), dtype=numpy.int32))
            k.set_position_weights(
                1.0 / f.get_max_vector_length() *
                numpy.ones(f.get_max_vector_length(), dtype=numpy.float64))
        else:
            raise "Currently, only wdshift and spectrum kernels supported"

        self.kernel = k
        self.train_features = f

        return (self.kernel, self.train_features)
100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 
100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 
100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 
100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT']
order=7
gap=0
reverse=False

for i in xrange(10):
    alpha=Alphabet(DNA)
    traindat=StringCharFeatures(alpha)
    traindat.set_features(POS+NEG)
    trainudat=StringWordFeatures(traindat.get_alphabet());
    trainudat.obtain_from_char(traindat, order-1, order, gap, reverse)
    #trainudat.io.set_loglevel(MSG_DEBUG)
    pre = SortWordString()
    #pre.io.set_loglevel(MSG_DEBUG)
    pre.init(trainudat)
    trainudat.add_preproc(pre)
    trainudat.apply_preproc()
    spec = CommWordStringKernel(10, False)
    spec.set_normalizer(IdentityKernelNormalizer())
    spec.init(trainudat, trainudat)
    K=mat(spec.get_kernel_matrix())

del POS
del NEG
del order
del gap
del reverse