Exemple #1
0
def kernel_fisher_modular(fm_train_dna=traindat,
                          fm_test_dna=testdat,
                          label_train_dna=label_traindat,
                          N=1,
                          M=4,
                          pseudo=1e-1,
                          order=1,
                          gap=0,
                          reverse=False,
                          kargs=[1, False, True]):

    from modshogun import StringCharFeatures, StringWordFeatures, FKFeatures, DNA
    from modshogun import PolyKernel
    from modshogun import HMM, BW_NORMAL  #, MSG_DEBUG

    # train HMM for positive class
    charfeat = StringCharFeatures(fm_hmm_pos, DNA)
    #charfeat.io.set_loglevel(MSG_DEBUG)
    hmm_pos_train = StringWordFeatures(charfeat.get_alphabet())
    hmm_pos_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    pos = HMM(hmm_pos_train, N, M, pseudo)
    pos.baum_welch_viterbi_train(BW_NORMAL)

    # train HMM for negative class
    charfeat = StringCharFeatures(fm_hmm_neg, DNA)
    hmm_neg_train = StringWordFeatures(charfeat.get_alphabet())
    hmm_neg_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    neg = HMM(hmm_neg_train, N, M, pseudo)
    neg.baum_welch_viterbi_train(BW_NORMAL)

    # Kernel training data
    charfeat = StringCharFeatures(fm_train_dna, DNA)
    wordfeats_train = StringWordFeatures(charfeat.get_alphabet())
    wordfeats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    # Kernel testing data
    charfeat = StringCharFeatures(fm_test_dna, DNA)
    wordfeats_test = StringWordFeatures(charfeat.get_alphabet())
    wordfeats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    # get kernel on training data
    pos.set_observations(wordfeats_train)
    neg.set_observations(wordfeats_train)
    feats_train = FKFeatures(10, pos, neg)
    feats_train.set_opt_a(-1)  #estimate prior
    kernel = PolyKernel(feats_train, feats_train, *kargs)
    km_train = kernel.get_kernel_matrix()

    # get kernel on testing data
    pos_clone = HMM(pos)
    neg_clone = HMM(neg)
    pos_clone.set_observations(wordfeats_test)
    neg_clone.set_observations(wordfeats_test)
    feats_test = FKFeatures(10, pos_clone, neg_clone)
    feats_test.set_a(feats_train.get_a())  #use prior from training data
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
def kernel_top_modular(fm_train_dna=traindat,
                       fm_test_dna=testdat,
                       label_train_dna=label_traindat,
                       pseudo=1e-1,
                       order=1,
                       gap=0,
                       reverse=False,
                       kargs=[1, False, True]):
    from modshogun import StringCharFeatures, StringWordFeatures, TOPFeatures, DNA
    from modshogun import PolyKernel
    from modshogun import HMM, BW_NORMAL

    N = 1  # toy HMM with 1 state
    M = 4  # 4 observations -> DNA

    # train HMM for positive class
    charfeat = StringCharFeatures(fm_hmm_pos, DNA)
    hmm_pos_train = StringWordFeatures(charfeat.get_alphabet())
    hmm_pos_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    pos = HMM(hmm_pos_train, N, M, pseudo)
    pos.baum_welch_viterbi_train(BW_NORMAL)

    # train HMM for negative class
    charfeat = StringCharFeatures(fm_hmm_neg, DNA)
    hmm_neg_train = StringWordFeatures(charfeat.get_alphabet())
    hmm_neg_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    neg = HMM(hmm_neg_train, N, M, pseudo)
    neg.baum_welch_viterbi_train(BW_NORMAL)

    # Kernel training data
    charfeat = StringCharFeatures(fm_train_dna, DNA)
    wordfeats_train = StringWordFeatures(charfeat.get_alphabet())
    wordfeats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    # Kernel testing data
    charfeat = StringCharFeatures(fm_test_dna, DNA)
    wordfeats_test = StringWordFeatures(charfeat.get_alphabet())
    wordfeats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    # get kernel on training data
    pos.set_observations(wordfeats_train)
    neg.set_observations(wordfeats_train)
    feats_train = TOPFeatures(10, pos, neg, False, False)
    kernel = PolyKernel(feats_train, feats_train, *kargs)
    km_train = kernel.get_kernel_matrix()

    # get kernel on testing data
    pos_clone = HMM(pos)
    neg_clone = HMM(neg)
    pos_clone.set_observations(wordfeats_test)
    neg_clone.set_observations(wordfeats_test)
    feats_test = TOPFeatures(10, pos_clone, neg_clone, False, False)
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
Exemple #3
0
def get_kernel_mat(fm_train_dna,
                   fm_test_dna,
                   N,
                   M,
                   pseudo=1e-1,
                   order=1,
                   gap=0,
                   reverse=False):

    # train HMM for positive class
    print "hmm training"
    charfeat = StringCharFeatures(fm_train_dna, DNA)
    #charfeat.io.set_loglevel(MSG_DEBUG)
    hmm_train = StringWordFeatures(charfeat.get_alphabet())
    hmm_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    pos = HMM(hmm_train, N, M, pseudo)
    pos.baum_welch_viterbi_train(BW_NORMAL)
    neg = HMM(pos)

    print "Kernel training data"
    charfeat = StringCharFeatures(fm_train_dna, DNA)
    wordfeats_train = StringWordFeatures(charfeat.get_alphabet())
    wordfeats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    print "Kernel testing data"
    charfeat = StringCharFeatures(fm_test_dna, DNA)
    wordfeats_test = StringWordFeatures(charfeat.get_alphabet())
    wordfeats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    print "get kernel on training data"
    pos.set_observations(wordfeats_train)
    neg.set_observations(wordfeats_train)
    feats_train = FKFeatures(10, pos, neg)
    feats_train.set_opt_a(-1)  #estimate prior

    print 'getting feature matrix'
    v0 = feats_train.get_feature_vector(0)
    v1 = feats_train.get_feature_vector(1)
    print np.dot(v0, v1)
    kernel = LinearKernel(feats_train, feats_train)
    #kernel=PolyKernel(feats_train, feats_train, *kargs)
    km_train = kernel.get_kernel_matrix()
    print km_train.shape, km_train[0, 1]

    print "get kernel on testing data"
    pos_clone = HMM(pos)
    neg_clone = HMM(neg)
    pos_clone.set_observations(wordfeats_test)
    neg_clone.set_observations(wordfeats_test)
    feats_test = FKFeatures(10, pos_clone, neg_clone)
    feats_test.set_a(feats_train.get_a())  #use prior from training data
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
def kernel_fisher_modular (fm_train_dna=traindat, fm_test_dna=testdat,
		label_train_dna=label_traindat,
		N=1,M=4,pseudo=1e-1,order=1,gap=0,reverse=False,
		kargs=[1,False,True]):

	from modshogun import StringCharFeatures, StringWordFeatures, FKFeatures, DNA
	from modshogun import PolyKernel
	from modshogun import HMM, BW_NORMAL#, MSG_DEBUG

	# train HMM for positive class
	charfeat=StringCharFeatures(fm_hmm_pos, DNA)
	#charfeat.io.set_loglevel(MSG_DEBUG)
	hmm_pos_train=StringWordFeatures(charfeat.get_alphabet())
	hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	pos=HMM(hmm_pos_train, N, M, pseudo)
	pos.baum_welch_viterbi_train(BW_NORMAL)

	# train HMM for negative class
	charfeat=StringCharFeatures(fm_hmm_neg, DNA)
	hmm_neg_train=StringWordFeatures(charfeat.get_alphabet())
	hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	neg=HMM(hmm_neg_train, N, M, pseudo)
	neg.baum_welch_viterbi_train(BW_NORMAL)

	# Kernel training data
	charfeat=StringCharFeatures(fm_train_dna, DNA)
	wordfeats_train=StringWordFeatures(charfeat.get_alphabet())
	wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)

	# Kernel testing data
	charfeat=StringCharFeatures(fm_test_dna, DNA)
	wordfeats_test=StringWordFeatures(charfeat.get_alphabet())
	wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)

	# get kernel on training data
	pos.set_observations(wordfeats_train)
	neg.set_observations(wordfeats_train)
	feats_train=FKFeatures(10, pos, neg)
	feats_train.set_opt_a(-1) #estimate prior
	kernel=PolyKernel(feats_train, feats_train, *kargs)
	km_train=kernel.get_kernel_matrix()

	# get kernel on testing data
	pos_clone=HMM(pos)
	neg_clone=HMM(neg)
	pos_clone.set_observations(wordfeats_test)
	neg_clone.set_observations(wordfeats_test)
	feats_test=FKFeatures(10, pos_clone, neg_clone)
	feats_test.set_a(feats_train.get_a()) #use prior from training data
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
Exemple #5
0
def _evaluate_top_fisher(indata, prefix):
    feats = {}
    wordfeats = util.get_features(indata, prefix)

    pos_train = HMM(wordfeats['train'], indata[prefix + 'N'],
                    indata[prefix + 'M'], indata[prefix + 'pseudo'])
    pos_train.train()
    pos_train.baum_welch_viterbi_train(BW_NORMAL)
    neg_train = HMM(wordfeats['train'], indata[prefix + 'N'],
                    indata[prefix + 'M'], indata[prefix + 'pseudo'])
    neg_train.train()
    neg_train.baum_welch_viterbi_train(BW_NORMAL)
    pos_test = HMM(pos_train)
    pos_test.set_observations(wordfeats['test'])
    neg_test = HMM(neg_train)
    neg_test.set_observations(wordfeats['test'])

    if indata[prefix + 'name'] == 'TOP':
        feats['train'] = TOPFeatures(10, pos_train, neg_train, False, False)
        feats['test'] = TOPFeatures(10, pos_test, neg_test, False, False)
    else:
        feats['train'] = FKFeatures(10, pos_train, neg_train)
        feats['train'].set_opt_a(-1)  #estimate prior
        feats['test'] = FKFeatures(10, pos_test, neg_test)
        feats['test'].set_a(
            feats['train'].get_a())  #use prior from training data

    prefix = 'kernel_'
    args = util.get_args(indata, prefix)
    kernel = PolyKernel(feats['train'], feats['train'], *args)
    #	kernel=PolyKernel(*args)
    #	kernel.init(feats['train'], feats['train'])
    km_train = max(
        abs(indata[prefix + 'matrix_train'] - kernel.get_kernel_matrix()).flat)
    kernel.init(feats['train'], feats['test'])
    km_test = max(
        abs(indata[prefix + 'matrix_test'] - kernel.get_kernel_matrix()).flat)

    return util.check_accuracy(indata[prefix + 'accuracy'],
                               km_train=km_train,
                               km_test=km_test)
def kernel_top_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,pseudo=1e-1,
	order=1,gap=0,reverse=False,kargs=[1, False, True]):
	from modshogun import StringCharFeatures, StringWordFeatures, TOPFeatures, DNA
	from modshogun import PolyKernel
	from modshogun import HMM, BW_NORMAL

	N=1 # toy HMM with 1 state
	M=4 # 4 observations -> DNA


	# train HMM for positive class
	charfeat=StringCharFeatures(fm_hmm_pos, DNA)
	hmm_pos_train=StringWordFeatures(charfeat.get_alphabet())
	hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	pos=HMM(hmm_pos_train, N, M, pseudo)
	pos.baum_welch_viterbi_train(BW_NORMAL)

	# train HMM for negative class
	charfeat=StringCharFeatures(fm_hmm_neg, DNA)
	hmm_neg_train=StringWordFeatures(charfeat.get_alphabet())
	hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	neg=HMM(hmm_neg_train, N, M, pseudo)
	neg.baum_welch_viterbi_train(BW_NORMAL)

	# Kernel training data
	charfeat=StringCharFeatures(fm_train_dna, DNA)
	wordfeats_train=StringWordFeatures(charfeat.get_alphabet())
	wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)

	# Kernel testing data
	charfeat=StringCharFeatures(fm_test_dna, DNA)
	wordfeats_test=StringWordFeatures(charfeat.get_alphabet())
	wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)

	# get kernel on training data
	pos.set_observations(wordfeats_train)
	neg.set_observations(wordfeats_train)
	feats_train=TOPFeatures(10, pos, neg, False, False)
	kernel=PolyKernel(feats_train, feats_train, *kargs)
	km_train=kernel.get_kernel_matrix()

	# get kernel on testing data
	pos_clone=HMM(pos)
	neg_clone=HMM(neg)
	pos_clone.set_observations(wordfeats_test)
	neg_clone.set_observations(wordfeats_test)
	feats_test=TOPFeatures(10, pos_clone, neg_clone, False, False)
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
Exemple #7
0
def _evaluate_top_fisher (indata, prefix):
	feats={}
	wordfeats=util.get_features(indata, prefix)

	pos_train=HMM(wordfeats['train'], indata[prefix+'N'], indata[prefix+'M'],
		indata[prefix+'pseudo'])
	pos_train.train()
	pos_train.baum_welch_viterbi_train(BW_NORMAL)
	neg_train=HMM(wordfeats['train'], indata[prefix+'N'], indata[prefix+'M'],
		indata[prefix+'pseudo'])
	neg_train.train()
	neg_train.baum_welch_viterbi_train(BW_NORMAL)
	pos_test=HMM(pos_train)
	pos_test.set_observations(wordfeats['test'])
	neg_test=HMM(neg_train)
	neg_test.set_observations(wordfeats['test'])

	if indata[prefix+'name']=='TOP':
		feats['train']=TOPFeatures(10, pos_train, neg_train, False, False)
		feats['test']=TOPFeatures(10, pos_test, neg_test, False, False)
	else:
		feats['train']=FKFeatures(10, pos_train, neg_train)
		feats['train'].set_opt_a(-1) #estimate prior
		feats['test']=FKFeatures(10, pos_test, neg_test)
		feats['test'].set_a(feats['train'].get_a()) #use prior from training data

	prefix='kernel_'
	args=util.get_args(indata, prefix)
	kernel=PolyKernel(feats['train'], feats['train'], *args)
#	kernel=PolyKernel(*args)
#	kernel.init(feats['train'], feats['train'])
	km_train=max(abs(
		indata[prefix+'matrix_train']-kernel.get_kernel_matrix()).flat)
	kernel.init(feats['train'], feats['test'])
	km_test=max(abs(
		indata[prefix+'matrix_test']-kernel.get_kernel_matrix()).flat)

	return util.check_accuracy(indata[prefix+'accuracy'],
		km_train=km_train, km_test=km_test)
def get_kernel_mat(fm_train_dna, fm_test_dna, N, M,
		pseudo=1e-1,order=1,gap=0,reverse=False):

	# train HMM for positive class
	print "hmm training"
	charfeat=StringCharFeatures(fm_train_dna, DNA)
	#charfeat.io.set_loglevel(MSG_DEBUG)
	hmm_train=StringWordFeatures(charfeat.get_alphabet())
	hmm_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	pos=HMM(hmm_train, N, M, pseudo)
	pos.baum_welch_viterbi_train(BW_NORMAL)
	neg = HMM(pos)

	print "Kernel training data"
	charfeat=StringCharFeatures(fm_train_dna, DNA)
	wordfeats_train=StringWordFeatures(charfeat.get_alphabet())
	wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)

	print "Kernel testing data"
	charfeat=StringCharFeatures(fm_test_dna, DNA)
	wordfeats_test=StringWordFeatures(charfeat.get_alphabet())
	wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)

	print "get kernel on training data"
	pos.set_observations(wordfeats_train)
	neg.set_observations(wordfeats_train)
	feats_train=FKFeatures(10, pos, neg)
	feats_train.set_opt_a(-1) #estimate prior
	
	print 'getting feature matrix'
	v0 = feats_train.get_feature_vector(0)
	v1 = feats_train.get_feature_vector(1)
	print np.dot(v0, v1)
	kernel=LinearKernel(feats_train, feats_train)
	#kernel=PolyKernel(feats_train, feats_train, *kargs)
	km_train=kernel.get_kernel_matrix()
	print km_train.shape, km_train[0, 1]

	print "get kernel on testing data"
	pos_clone=HMM(pos)
	neg_clone=HMM(neg)
	pos_clone.set_observations(wordfeats_test)
	neg_clone.set_observations(wordfeats_test)
	feats_test=FKFeatures(10, pos_clone, neg_clone)
	feats_test.set_a(feats_train.get_a()) #use prior from training data
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
def get_feature_mat(fm_train_dna, fm_test_dna, N, M,
		pseudo=1e-1,order=1,gap=0,reverse=False):

	# train HMM for positive class
	print "hmm training"
	charfeat=StringCharFeatures(fm_train_dna, DNA)
	#charfeat.io.set_loglevel(MSG_DEBUG)
	hmm_train=StringWordFeatures(charfeat.get_alphabet())
	hmm_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	pos=HMM(hmm_train, N, M, pseudo)
	pos.baum_welch_viterbi_train(BW_NORMAL)
	neg = HMM(pos)

	print "Kernel training data"
	charfeat=StringCharFeatures(fm_train_dna, DNA)
	wordfeats_train=StringWordFeatures(charfeat.get_alphabet())
	wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)

	print "Kernel testing data"
	charfeat=StringCharFeatures(fm_test_dna, DNA)
	wordfeats_test=StringWordFeatures(charfeat.get_alphabet())
	wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)

	print "get kernel on training data"
	pos.set_observations(wordfeats_train)
	neg.set_observations(wordfeats_train)
	feats_train=FKFeatures(10, pos, neg)
	feats_train.set_opt_a(-1) #estimate prior

	print 'getting feature train'
	train_featmat = []
	for i in range(len(fm_train_dna)):
		train_featmat.append(feats_train.get_computed_dot_feature_vector(i))
	train_featmat = np.array(train_featmat)

	print "get feature on testing"
	pos_clone=HMM(pos)
	neg_clone=HMM(neg)
	pos_clone.set_observations(wordfeats_test)
	neg_clone.set_observations(wordfeats_test)
	feats_test=FKFeatures(10, pos_clone, neg_clone)
	feats_test.set_a(feats_train.get_a()) #use prior from training data

	test_featmat = []
	for i in range(len(fm_test_dna)):
		test_featmat.append(feats_test.get_feature_vector(i))
	test_featmat = np.array(test_featmat)
	return train_featmat, test_featmat