def kernel_fisher_modular (fm_train_dna=traindat, fm_test_dna=testdat, label_train_dna=label_traindat, N=1,M=4,pseudo=1e-1,order=1,gap=0,reverse=False, kargs=[1,False,True]): from modshogun import StringCharFeatures, StringWordFeatures, FKFeatures, DNA from modshogun import PolyKernel from modshogun import HMM, BW_NORMAL#, MSG_DEBUG # train HMM for positive class charfeat=StringCharFeatures(fm_hmm_pos, DNA) #charfeat.io.set_loglevel(MSG_DEBUG) hmm_pos_train=StringWordFeatures(charfeat.get_alphabet()) hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse) pos=HMM(hmm_pos_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) # train HMM for negative class charfeat=StringCharFeatures(fm_hmm_neg, DNA) hmm_neg_train=StringWordFeatures(charfeat.get_alphabet()) hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse) neg=HMM(hmm_neg_train, N, M, pseudo) neg.baum_welch_viterbi_train(BW_NORMAL) # Kernel training data charfeat=StringCharFeatures(fm_train_dna, DNA) wordfeats_train=StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) # Kernel testing data charfeat=StringCharFeatures(fm_test_dna, DNA) wordfeats_test=StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) # get kernel on training data pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train=FKFeatures(10, pos, neg) feats_train.set_opt_a(-1) #estimate prior kernel=PolyKernel(feats_train, feats_train, *kargs) km_train=kernel.get_kernel_matrix() # get kernel on testing data pos_clone=HMM(pos) neg_clone=HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test=FKFeatures(10, pos_clone, neg_clone) feats_test.set_a(feats_train.get_a()) #use prior from training data kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def distribution_hmm_modular(fm_cube, N, M, pseudo, order, gap, reverse, num_examples): from modshogun import StringWordFeatures, StringCharFeatures, CUBE from modshogun import HMM, BW_NORMAL charfeat=StringCharFeatures(CUBE) charfeat.set_features(fm_cube) feats=StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order-1, order, gap, reverse) hmm=HMM(feats, N, M, pseudo) hmm.train() hmm.baum_welch_viterbi_train(BW_NORMAL) num_examples=feats.get_num_vectors() num_param=hmm.get_num_model_parameters() for i in range(num_examples): for j in range(num_param): hmm.get_log_derivative(j, i) best_path=0 best_path_state=0 for i in range(num_examples): best_path+=hmm.best_path(i) for j in range(N): best_path_state+=hmm.get_best_path_state(i, j) lik_example = hmm.get_log_likelihood() lik_sample = hmm.get_log_likelihood_sample() return lik_example, lik_sample, hmm
def kernel_top_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,pseudo=1e-1, order=1,gap=0,reverse=False,kargs=[1, False, True]): from modshogun import StringCharFeatures, StringWordFeatures, TOPFeatures, DNA from modshogun import PolyKernel from modshogun import HMM, BW_NORMAL N=1 # toy HMM with 1 state M=4 # 4 observations -> DNA # train HMM for positive class charfeat=StringCharFeatures(fm_hmm_pos, DNA) hmm_pos_train=StringWordFeatures(charfeat.get_alphabet()) hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse) pos=HMM(hmm_pos_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) # train HMM for negative class charfeat=StringCharFeatures(fm_hmm_neg, DNA) hmm_neg_train=StringWordFeatures(charfeat.get_alphabet()) hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse) neg=HMM(hmm_neg_train, N, M, pseudo) neg.baum_welch_viterbi_train(BW_NORMAL) # Kernel training data charfeat=StringCharFeatures(fm_train_dna, DNA) wordfeats_train=StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) # Kernel testing data charfeat=StringCharFeatures(fm_test_dna, DNA) wordfeats_test=StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) # get kernel on training data pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train=TOPFeatures(10, pos, neg, False, False) kernel=PolyKernel(feats_train, feats_train, *kargs) km_train=kernel.get_kernel_matrix() # get kernel on testing data pos_clone=HMM(pos) neg_clone=HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test=TOPFeatures(10, pos_clone, neg_clone, False, False) kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def kernel_fisher_modular(fm_train_dna=traindat, fm_test_dna=testdat, label_train_dna=label_traindat, N=1, M=4, pseudo=1e-1, order=1, gap=0, reverse=False, kargs=[1, False, True]): from modshogun import StringCharFeatures, StringWordFeatures, FKFeatures, DNA from modshogun import PolyKernel from modshogun import HMM, BW_NORMAL #, MSG_DEBUG # train HMM for positive class charfeat = StringCharFeatures(fm_hmm_pos, DNA) #charfeat.io.set_loglevel(MSG_DEBUG) hmm_pos_train = StringWordFeatures(charfeat.get_alphabet()) hmm_pos_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) pos = HMM(hmm_pos_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) # train HMM for negative class charfeat = StringCharFeatures(fm_hmm_neg, DNA) hmm_neg_train = StringWordFeatures(charfeat.get_alphabet()) hmm_neg_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) neg = HMM(hmm_neg_train, N, M, pseudo) neg.baum_welch_viterbi_train(BW_NORMAL) # Kernel training data charfeat = StringCharFeatures(fm_train_dna, DNA) wordfeats_train = StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) # Kernel testing data charfeat = StringCharFeatures(fm_test_dna, DNA) wordfeats_test = StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse) # get kernel on training data pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train = FKFeatures(10, pos, neg) feats_train.set_opt_a(-1) #estimate prior kernel = PolyKernel(feats_train, feats_train, *kargs) km_train = kernel.get_kernel_matrix() # get kernel on testing data pos_clone = HMM(pos) neg_clone = HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test = FKFeatures(10, pos_clone, neg_clone) feats_test.set_a(feats_train.get_a()) #use prior from training data kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def kernel_top_modular(fm_train_dna=traindat, fm_test_dna=testdat, label_train_dna=label_traindat, pseudo=1e-1, order=1, gap=0, reverse=False, kargs=[1, False, True]): from modshogun import StringCharFeatures, StringWordFeatures, TOPFeatures, DNA from modshogun import PolyKernel from modshogun import HMM, BW_NORMAL N = 1 # toy HMM with 1 state M = 4 # 4 observations -> DNA # train HMM for positive class charfeat = StringCharFeatures(fm_hmm_pos, DNA) hmm_pos_train = StringWordFeatures(charfeat.get_alphabet()) hmm_pos_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) pos = HMM(hmm_pos_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) # train HMM for negative class charfeat = StringCharFeatures(fm_hmm_neg, DNA) hmm_neg_train = StringWordFeatures(charfeat.get_alphabet()) hmm_neg_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) neg = HMM(hmm_neg_train, N, M, pseudo) neg.baum_welch_viterbi_train(BW_NORMAL) # Kernel training data charfeat = StringCharFeatures(fm_train_dna, DNA) wordfeats_train = StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) # Kernel testing data charfeat = StringCharFeatures(fm_test_dna, DNA) wordfeats_test = StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse) # get kernel on training data pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train = TOPFeatures(10, pos, neg, False, False) kernel = PolyKernel(feats_train, feats_train, *kargs) km_train = kernel.get_kernel_matrix() # get kernel on testing data pos_clone = HMM(pos) neg_clone = HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test = TOPFeatures(10, pos_clone, neg_clone, False, False) kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def get_kernel_mat(fm_train_dna, fm_test_dna, N, M, pseudo=1e-1,order=1,gap=0,reverse=False): # train HMM for positive class print "hmm training" charfeat=StringCharFeatures(fm_train_dna, DNA) #charfeat.io.set_loglevel(MSG_DEBUG) hmm_train=StringWordFeatures(charfeat.get_alphabet()) hmm_train.obtain_from_char(charfeat, order-1, order, gap, reverse) pos=HMM(hmm_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) neg = HMM(pos) print "Kernel training data" charfeat=StringCharFeatures(fm_train_dna, DNA) wordfeats_train=StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) print "Kernel testing data" charfeat=StringCharFeatures(fm_test_dna, DNA) wordfeats_test=StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) print "get kernel on training data" pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train=FKFeatures(10, pos, neg) feats_train.set_opt_a(-1) #estimate prior print 'getting feature matrix' v0 = feats_train.get_feature_vector(0) v1 = feats_train.get_feature_vector(1) print np.dot(v0, v1) kernel=LinearKernel(feats_train, feats_train) #kernel=PolyKernel(feats_train, feats_train, *kargs) km_train=kernel.get_kernel_matrix() print km_train.shape, km_train[0, 1] print "get kernel on testing data" pos_clone=HMM(pos) neg_clone=HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test=FKFeatures(10, pos_clone, neg_clone) feats_test.set_a(feats_train.get_a()) #use prior from training data kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def _evaluate_top_fisher (indata, prefix): feats={} wordfeats=util.get_features(indata, prefix) pos_train=HMM(wordfeats['train'], indata[prefix+'N'], indata[prefix+'M'], indata[prefix+'pseudo']) pos_train.train() pos_train.baum_welch_viterbi_train(BW_NORMAL) neg_train=HMM(wordfeats['train'], indata[prefix+'N'], indata[prefix+'M'], indata[prefix+'pseudo']) neg_train.train() neg_train.baum_welch_viterbi_train(BW_NORMAL) pos_test=HMM(pos_train) pos_test.set_observations(wordfeats['test']) neg_test=HMM(neg_train) neg_test.set_observations(wordfeats['test']) if indata[prefix+'name']=='TOP': feats['train']=TOPFeatures(10, pos_train, neg_train, False, False) feats['test']=TOPFeatures(10, pos_test, neg_test, False, False) else: feats['train']=FKFeatures(10, pos_train, neg_train) feats['train'].set_opt_a(-1) #estimate prior feats['test']=FKFeatures(10, pos_test, neg_test) feats['test'].set_a(feats['train'].get_a()) #use prior from training data prefix='kernel_' args=util.get_args(indata, prefix) kernel=PolyKernel(feats['train'], feats['train'], *args) # kernel=PolyKernel(*args) # kernel.init(feats['train'], feats['train']) km_train=max(abs( indata[prefix+'matrix_train']-kernel.get_kernel_matrix()).flat) kernel.init(feats['train'], feats['test']) km_test=max(abs( indata[prefix+'matrix_test']-kernel.get_kernel_matrix()).flat) return util.check_accuracy(indata[prefix+'accuracy'], km_train=km_train, km_test=km_test)
def _evaluate_top_fisher(indata, prefix): feats = {} wordfeats = util.get_features(indata, prefix) pos_train = HMM(wordfeats['train'], indata[prefix + 'N'], indata[prefix + 'M'], indata[prefix + 'pseudo']) pos_train.train() pos_train.baum_welch_viterbi_train(BW_NORMAL) neg_train = HMM(wordfeats['train'], indata[prefix + 'N'], indata[prefix + 'M'], indata[prefix + 'pseudo']) neg_train.train() neg_train.baum_welch_viterbi_train(BW_NORMAL) pos_test = HMM(pos_train) pos_test.set_observations(wordfeats['test']) neg_test = HMM(neg_train) neg_test.set_observations(wordfeats['test']) if indata[prefix + 'name'] == 'TOP': feats['train'] = TOPFeatures(10, pos_train, neg_train, False, False) feats['test'] = TOPFeatures(10, pos_test, neg_test, False, False) else: feats['train'] = FKFeatures(10, pos_train, neg_train) feats['train'].set_opt_a(-1) #estimate prior feats['test'] = FKFeatures(10, pos_test, neg_test) feats['test'].set_a( feats['train'].get_a()) #use prior from training data prefix = 'kernel_' args = util.get_args(indata, prefix) kernel = PolyKernel(feats['train'], feats['train'], *args) # kernel=PolyKernel(*args) # kernel.init(feats['train'], feats['train']) km_train = max( abs(indata[prefix + 'matrix_train'] - kernel.get_kernel_matrix()).flat) kernel.init(feats['train'], feats['test']) km_test = max( abs(indata[prefix + 'matrix_test'] - kernel.get_kernel_matrix()).flat) return util.check_accuracy(indata[prefix + 'accuracy'], km_train=km_train, km_test=km_test)
def get_feature_mat(fm_train_dna, fm_test_dna, N, M, pseudo=1e-1,order=1,gap=0,reverse=False): # train HMM for positive class print "hmm training" charfeat=StringCharFeatures(fm_train_dna, DNA) #charfeat.io.set_loglevel(MSG_DEBUG) hmm_train=StringWordFeatures(charfeat.get_alphabet()) hmm_train.obtain_from_char(charfeat, order-1, order, gap, reverse) pos=HMM(hmm_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) neg = HMM(pos) print "Kernel training data" charfeat=StringCharFeatures(fm_train_dna, DNA) wordfeats_train=StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) print "Kernel testing data" charfeat=StringCharFeatures(fm_test_dna, DNA) wordfeats_test=StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) print "get kernel on training data" pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train=FKFeatures(10, pos, neg) feats_train.set_opt_a(-1) #estimate prior print 'getting feature train' train_featmat = [] for i in range(len(fm_train_dna)): train_featmat.append(feats_train.get_computed_dot_feature_vector(i)) train_featmat = np.array(train_featmat) print "get feature on testing" pos_clone=HMM(pos) neg_clone=HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test=FKFeatures(10, pos_clone, neg_clone) feats_test.set_a(feats_train.get_a()) #use prior from training data test_featmat = [] for i in range(len(fm_test_dna)): test_featmat.append(feats_test.get_feature_vector(i)) test_featmat = np.array(test_featmat) return train_featmat, test_featmat
def get_kernel_mat(fm_train_dna, fm_test_dna, N, M, pseudo=1e-1, order=1, gap=0, reverse=False): # train HMM for positive class print "hmm training" charfeat = StringCharFeatures(fm_train_dna, DNA) #charfeat.io.set_loglevel(MSG_DEBUG) hmm_train = StringWordFeatures(charfeat.get_alphabet()) hmm_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) pos = HMM(hmm_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) neg = HMM(pos) print "Kernel training data" charfeat = StringCharFeatures(fm_train_dna, DNA) wordfeats_train = StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) print "Kernel testing data" charfeat = StringCharFeatures(fm_test_dna, DNA) wordfeats_test = StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse) print "get kernel on training data" pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train = FKFeatures(10, pos, neg) feats_train.set_opt_a(-1) #estimate prior print 'getting feature matrix' v0 = feats_train.get_feature_vector(0) v1 = feats_train.get_feature_vector(1) print np.dot(v0, v1) kernel = LinearKernel(feats_train, feats_train) #kernel=PolyKernel(feats_train, feats_train, *kargs) km_train = kernel.get_kernel_matrix() print km_train.shape, km_train[0, 1] print "get kernel on testing data" pos_clone = HMM(pos) neg_clone = HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test = FKFeatures(10, pos_clone, neg_clone) feats_test.set_a(feats_train.get_a()) #use prior from training data kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel