def comm_ulong_string ():
	print 'CommUlongString'
	from shogun.Kernel import CommUlongStringKernel
	from shogun.Features import StringUlongFeatures, StringCharFeatures, DNA
	from shogun.PreProc import SortUlongString
	order=3
	gap=0
	reverse=False

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_train_dna)
	feats_train=StringUlongFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	preproc=SortUlongString()
	preproc.init(feats_train)
	feats_train.add_preproc(preproc)
	feats_train.apply_preproc()


	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_test_dna)
	feats_test=StringUlongFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
	feats_test.add_preproc(preproc)
	feats_test.apply_preproc()

	use_sign=False

	kernel=CommUlongStringKernel(feats_train, feats_train, use_sign)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
def preprocessor_sortulongstring_modular (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False,use_sign=False):

	from shogun.Kernel import CommUlongStringKernel
	from shogun.Features import StringCharFeatures, StringUlongFeatures, DNA
	from shogun.Preprocessor import SortUlongString


	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_train_dna)
	feats_train=StringUlongFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_test_dna)
	feats_test=StringUlongFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)

	preproc=SortUlongString()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()

	kernel=CommUlongStringKernel(feats_train, feats_train, use_sign)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
def get_kernel_matrix(li):
    """
    Get kernel matrix from a list of strings.
    """

    order = 6
    gap = 2
    reverse = False
    charfeat = StringCharFeatures(RAWBYTE)
    charfeat.set_features(li)
    #Get alphabet.
    feats_train = StringUlongFeatures(charfeat.get_alphabet())
    feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
    #CommUlongStringKernel needs sorted features.
    preproc = SortUlongString()
    preproc.init(feats_train)
    feats_train.add_preproc(preproc)
    feats_train.apply_preproc()

    use_sign = False

    #Compute kernel matrix between train features.
    kernel = CommUlongStringKernel(feats_train, feats_train, use_sign)
    km_train = kernel.get_kernel_matrix()
    return km_train
Esempio n. 4
0
def get_kernel_matrix(li):
    """
    Get kernel matrix from a list of strings.
    """

    order = 6
    gap = 2
    reverse = False
    charfeat = StringCharFeatures(RAWBYTE)
    charfeat.set_features(li)
    #Get alphabet.
    feats_train = StringUlongFeatures(charfeat.get_alphabet())
    feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    #CommUlongStringKernel needs sorted features.
    preproc = SortUlongString()
    preproc.init(feats_train)
    feats_train.add_preproc(preproc)
    feats_train.apply_preproc()

    use_sign = False

    #Compute kernel matrix between train features.
    kernel = CommUlongStringKernel(feats_train, feats_train, use_sign)
    km_train = kernel.get_kernel_matrix()
    return km_train
Esempio n. 5
0
def kernel_comm_ulong_string_modular(fm_train_dna=traindat,
                                     fm_test_dna=testdat,
                                     order=3,
                                     gap=0,
                                     reverse=False):

    from shogun.Kernel import CommUlongStringKernel
    from shogun.Features import StringUlongFeatures, StringCharFeatures, DNA
    from shogun.PreProc import SortUlongString

    charfeat = StringCharFeatures(DNA)
    charfeat.set_features(fm_train_dna)
    feats_train = StringUlongFeatures(charfeat.get_alphabet())
    feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    preproc = SortUlongString()
    preproc.init(feats_train)
    feats_train.add_preproc(preproc)
    feats_train.apply_preproc()

    charfeat = StringCharFeatures(DNA)
    charfeat.set_features(fm_test_dna)
    feats_test = StringUlongFeatures(charfeat.get_alphabet())
    feats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    feats_test.add_preproc(preproc)
    feats_test.apply_preproc()

    use_sign = False

    kernel = CommUlongStringKernel(feats_train, feats_train, use_sign)

    km_train = kernel.get_kernel_matrix()
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
Esempio n. 6
0
def create_kernel(kname, kparam, feats_train):
    """Call the corresponding constructor for the kernel"""

    if kname == 'gauss':
        kernel = GaussianKernel(feats_train, feats_train, kparam['width'])
    elif kname == 'linear':
        kernel = LinearKernel(feats_train, feats_train)
        kernel.set_normalizer(AvgDiagKernelNormalizer(kparam['scale']))
    elif kname == 'poly':
        kernel = PolyKernel(feats_train, feats_train, kparam['degree'],
                            kparam['inhomogene'], kparam['normal'])
    elif kname == 'wd':
        kernel = WeightedDegreePositionStringKernel(feats_train, feats_train,
                                                    kparam['degree'])
        kernel.set_normalizer(
            AvgDiagKernelNormalizer(float(kparam['seqlength'])))
        kernel.set_shifts(kparam['shift'] *
                          numpy.ones(kparam['seqlength'], dtype=numpy.int32))
        #kernel=WeightedDegreeStringKernel(feats_train, feats_train, kparam['degree'])
    elif kname == 'spec':
        kernel = CommUlongStringKernel(feats_train, feats_train)
    elif kname == 'cumspec':
        kernel = WeightedCommWordStringKernel(feats_train, feats_train)
        kernel.set_weights(numpy.ones(kparam['degree']))
    elif kname == 'spec2':
        kernel = CombinedKernel()
        k0 = CommWordStringKernel(feats_train['f0'], feats_train['f0'])
        k0.io.disable_progress()
        kernel.append_kernel(k0)
        k1 = CommWordStringKernel(feats_train['f1'], feats_train['f1'])
        k1.io.disable_progress()
        kernel.append_kernel(k1)
    elif kname == 'cumspec2':
        kernel = CombinedKernel()
        k0 = WeightedCommWordStringKernel(feats_train['f0'], feats_train['f0'])
        k0.set_weights(numpy.ones(kparam['degree']))
        k0.io.disable_progress()
        kernel.append_kernel(k0)
        k1 = WeightedCommWordStringKernel(feats_train['f1'], feats_train['f1'])
        k1.set_weights(numpy.ones(kparam['degree']))
        k1.io.disable_progress()
        kernel.append_kernel(k1)
    elif kname == 'localalign':
        kernel = LocalAlignmentStringKernel(feats_train, feats_train)
    elif kname == 'localimprove':
        kernel = LocalityImprovedStringKernel(feats_train, feats_train, kparam['length'],\
                                              kparam['indeg'], kparam['outdeg'])
    else:
        print 'Unknown kernel %s' % kname

    kernel.set_cache_size(32)
    return kernel
Esempio n. 7
0
def get_weighted_spectrum_kernel(subfeats_list, options):
    """build weighted spectrum kernel with non-redundant k-mer list (removing reverse complement)

	Arguments:
	subfeats_list -- list of sub-feature objects
	options -- object containing option data 

	Return:
	CombinedFeatures of StringWord(Ulong)Features, CombinedKernel of CommWord(Ulong)StringKernel 
	"""
    kmerlen = options.kmerlen
    kmerlen2 = options.kmerlen2

    subkernels = 0
    kernel = CombinedKernel()
    feats = CombinedFeatures()

    weights = []

    i = 0
    for subfeats in subfeats_list:
        feats.append_feature_obj(subfeats)

        combine_kcount = Counter()
        for i in xrange(subfeats.get_num_vectors()):
            fv = list(subfeats.get_feature_vector(i))
            combine_kcount += Counter(fv)
            number = len(combine_kcount)
            klen = kmerlen + i

    for k in xrange(kmerlen, kmerlen2 + 1):
        if k <= 8:
            subkernel = CommWordStringKernel(10, False)
        else:
            subkernel = CommUlongStringKernel(10, False)

        kernel.append_kernel(subkernel)
        subkernels += 1

    kernel.init(feats, feats)
    # here the weight for each k-mer is uniform
    '''
	subkernels = 8
	numpy.array([1 / float(subkernels)] * subkernels, numpy.dtype('float64'))
	array([ 0.125,  0.125,  0.125,  0.125,  0.125,  0.125,  0.125,  0.125])
	'''
    kernel.set_subkernel_weights(
        numpy.array([1 / float(subkernels)] * subkernels,
                    numpy.dtype('float64')))

    return kernel
Esempio n. 8
0
def get_spectrum_kernel(feats, options):
    """build spectrum kernel with non-redundant k-mer list (removing reverse complement)

	Arguments:
	feats -- feature object
	options -- object containing option data 

	Return:
	StringWord(Ulong)Features, CommWord(Ulong)StringKernel
	"""
    if options.kmerlen <= 8:
        return CommWordStringKernel(feats, feats)
    else:
        return CommUlongStringKernel(feats, feats)
Esempio n. 9
0
def get_weighted_spectrum_kernel(subfeats_list, options):
    """build weighted spectrum kernel with non-redundant k-mer list (removing reverse complement)

	Arguments:
	subfeats_list -- list of sub-feature objects
	options -- object containing option data 

	Return:
	CombinedFeatures of StringWord(Ulong)Features, CombinedKernel of CommWord(Ulong)StringKernel 
	"""
    kmerlen = options.kmerlen
    kmerlen2 = options.kmerlen2

    subkernels = 0
    kernel = CombinedKernel()
    feats = CombinedFeatures()

    for subfeats in subfeats_list:
        feats.append_feature_obj(subfeats)

    for k in xrange(kmerlen, kmerlen2 + 1):
        if k <= 8:
            subkernel = CommWordStringKernel(10, False)
        else:
            subkernel = CommUlongStringKernel(10, False)

        kernel.append_kernel(subkernel)
        subkernels += 1

    kernel.init(feats, feats)

    kernel.set_subkernel_weights(
        numpy.array([1 / float(subkernels)] * subkernels,
                    numpy.dtype('float64')))

    return kernel