def distribution_hmm_modular(fm_cube, N, M, pseudo, order, gap, reverse, num_examples): from shogun.Features import StringWordFeatures, StringCharFeatures, CUBE from shogun.Distribution import HMM, BW_NORMAL charfeat=StringCharFeatures(CUBE) charfeat.set_features(fm_cube) feats=StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order-1, order, gap, reverse) hmm=HMM(feats, N, M, pseudo) hmm.train() hmm.baum_welch_viterbi_train(BW_NORMAL) num_examples=feats.get_num_vectors() num_param=hmm.get_num_model_parameters() for i in xrange(num_examples): for j in xrange(num_param): hmm.get_log_derivative(j, i) best_path=0 best_path_state=0 for i in xrange(num_examples): best_path+=hmm.best_path(i) for j in xrange(N): best_path_state+=hmm.get_best_path_state(i, j) lik_example = hmm.get_log_likelihood() lik_sample = hmm.get_log_likelihood_sample() return lik_example, lik_sample, hmm
def kernel_top_modular(fm_train_dna=traindat, fm_test_dna=testdat, label_train_dna=label_traindat, pseudo=1e-1, order=1, gap=0, reverse=False, kargs=[1, False, True]): from shogun.Features import StringCharFeatures, StringWordFeatures, TOPFeatures, DNA from shogun.Kernel import PolyKernel from shogun.Distribution import HMM, BW_NORMAL N = 1 # toy HMM with 1 state M = 4 # 4 observations -> DNA # train HMM for positive class charfeat = StringCharFeatures(fm_hmm_pos, DNA) hmm_pos_train = StringWordFeatures(charfeat.get_alphabet()) hmm_pos_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) pos = HMM(hmm_pos_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) # train HMM for negative class charfeat = StringCharFeatures(fm_hmm_neg, DNA) hmm_neg_train = StringWordFeatures(charfeat.get_alphabet()) hmm_neg_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) neg = HMM(hmm_neg_train, N, M, pseudo) neg.baum_welch_viterbi_train(BW_NORMAL) # Kernel training data charfeat = StringCharFeatures(fm_train_dna, DNA) wordfeats_train = StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) # Kernel testing data charfeat = StringCharFeatures(fm_test_dna, DNA) wordfeats_test = StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse) # get kernel on training data pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train = TOPFeatures(10, pos, neg, False, False) kernel = PolyKernel(feats_train, feats_train, *kargs) km_train = kernel.get_kernel_matrix() # get kernel on testing data pos_clone = HMM(pos) neg_clone = HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test = TOPFeatures(10, pos_clone, neg_clone, False, False) kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def kernel_fisher_modular(fm_train_dna=traindat, fm_test_dna=testdat, label_train_dna=label_traindat, N=1,M=4,pseudo=1e-1,order=1,gap=0,reverse=False, kargs=[1,False,True]): from shogun.Features import StringCharFeatures, StringWordFeatures, FKFeatures, DNA from shogun.Kernel import PolyKernel from shogun.Distribution import HMM, BW_NORMAL#, MSG_DEBUG # train HMM for positive class charfeat=StringCharFeatures(fm_hmm_pos, DNA) #charfeat.io.set_loglevel(MSG_DEBUG) hmm_pos_train=StringWordFeatures(charfeat.get_alphabet()) hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse) pos=HMM(hmm_pos_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) # train HMM for negative class charfeat=StringCharFeatures(fm_hmm_neg, DNA) hmm_neg_train=StringWordFeatures(charfeat.get_alphabet()) hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse) neg=HMM(hmm_neg_train, N, M, pseudo) neg.baum_welch_viterbi_train(BW_NORMAL) # Kernel training data charfeat=StringCharFeatures(fm_train_dna, DNA) wordfeats_train=StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) # Kernel testing data charfeat=StringCharFeatures(fm_test_dna, DNA) wordfeats_test=StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) # get kernel on training data pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train=FKFeatures(10, pos, neg) feats_train.set_opt_a(-1) #estimate prior kernel=PolyKernel(feats_train, feats_train, *kargs) km_train=kernel.get_kernel_matrix() # get kernel on testing data pos_clone=HMM(pos) neg_clone=HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test=FKFeatures(10, pos_clone, neg_clone) feats_test.set_a(feats_train.get_a()) #use prior from training data kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def _evaluate_top_fisher (indata, prefix): feats={} wordfeats=util.get_features(indata, prefix) pos_train=HMM(wordfeats['train'], indata[prefix+'N'], indata[prefix+'M'], indata[prefix+'pseudo']) pos_train.train() pos_train.baum_welch_viterbi_train(BW_NORMAL) neg_train=HMM(wordfeats['train'], indata[prefix+'N'], indata[prefix+'M'], indata[prefix+'pseudo']) neg_train.train() neg_train.baum_welch_viterbi_train(BW_NORMAL) pos_test=HMM(pos_train) pos_test.set_observations(wordfeats['test']) neg_test=HMM(neg_train) neg_test.set_observations(wordfeats['test']) if indata[prefix+'name']=='TOP': feats['train']=TOPFeatures(10, pos_train, neg_train, False, False) feats['test']=TOPFeatures(10, pos_test, neg_test, False, False) else: feats['train']=FKFeatures(10, pos_train, neg_train) feats['train'].set_opt_a(-1) #estimate prior feats['test']=FKFeatures(10, pos_test, neg_test) feats['test'].set_a(feats['train'].get_a()) #use prior from training data prefix='kernel_' args=util.get_args(indata, prefix) kernel=PolyKernel(feats['train'], feats['train'], *args) # kernel=PolyKernel(*args) # kernel.init(feats['train'], feats['train']) km_train=max(abs( indata[prefix+'matrix_train']-kernel.get_kernel_matrix()).flat) kernel.init(feats['train'], feats['test']) km_test=max(abs( indata[prefix+'matrix_test']-kernel.get_kernel_matrix()).flat) return util.check_accuracy(indata[prefix+'accuracy'], km_train=km_train, km_test=km_test)
def _run_top_fisher(): """Run Linear Kernel with {Top,Fisher}Features.""" # put some constantness into randomness Math_init_random(dataop.INIT_RANDOM) data = dataop.get_cubes(4, 8) prefix = 'topfk_' params = { prefix + 'N': 3, prefix + 'M': 6, prefix + 'pseudo': 1e-1, prefix + 'order': 1, prefix + 'gap': 0, prefix + 'reverse': False, prefix + 'alphabet': 'CUBE', prefix + 'feature_class': 'string_complex', prefix + 'feature_type': 'Word', prefix + 'data_train': numpy.matrix(data['train']), prefix + 'data_test': numpy.matrix(data['test']) } wordfeats = featop.get_features(params[prefix + 'feature_class'], params[prefix + 'feature_type'], data, eval(params[prefix + 'alphabet']), params[prefix + 'order'], params[prefix + 'gap'], params[prefix + 'reverse']) pos_train = HMM(wordfeats['train'], params[prefix + 'N'], params[prefix + 'M'], params[prefix + 'pseudo']) pos_train.train() pos_train.baum_welch_viterbi_train(BW_NORMAL) neg_train = HMM(wordfeats['train'], params[prefix + 'N'], params[prefix + 'M'], params[prefix + 'pseudo']) neg_train.train() neg_train.baum_welch_viterbi_train(BW_NORMAL) pos_test = HMM(pos_train) pos_test.set_observations(wordfeats['test']) neg_test = HMM(neg_train) neg_test.set_observations(wordfeats['test']) feats = {} feats['train'] = TOPFeatures(10, pos_train, neg_train, False, False) feats['test'] = TOPFeatures(10, pos_test, neg_test, False, False) params[prefix + 'name'] = 'TOP' _compute_top_fisher(feats, params) feats['train'] = FKFeatures(10, pos_train, neg_train) feats['train'].set_opt_a(-1) #estimate prior feats['test'] = FKFeatures(10, pos_test, neg_test) feats['test'].set_a(feats['train'].get_a()) #use prior from training data params[prefix + 'name'] = 'FK' _compute_top_fisher(feats, params)
def kernel_fisher_modular(fm_train_dna=traindat, fm_test_dna=testdat, label_train_dna=label_traindat, N=1, M=4, pseudo=1e-1, order=1, gap=0, reverse=False, kargs=[1, False, True]): from shogun.Features import StringCharFeatures, StringWordFeatures, FKFeatures, DNA from shogun.Kernel import PolyKernel from shogun.Distribution import HMM, BW_NORMAL #, MSG_DEBUG # train HMM for positive class charfeat = StringCharFeatures(fm_hmm_pos, DNA) #charfeat.io.set_loglevel(MSG_DEBUG) hmm_pos_train = StringWordFeatures(charfeat.get_alphabet()) hmm_pos_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) pos = HMM(hmm_pos_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) # train HMM for negative class charfeat = StringCharFeatures(fm_hmm_neg, DNA) hmm_neg_train = StringWordFeatures(charfeat.get_alphabet()) hmm_neg_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) neg = HMM(hmm_neg_train, N, M, pseudo) neg.baum_welch_viterbi_train(BW_NORMAL) # Kernel training data charfeat = StringCharFeatures(fm_train_dna, DNA) wordfeats_train = StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) # Kernel testing data charfeat = StringCharFeatures(fm_test_dna, DNA) wordfeats_test = StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse) # get kernel on training data pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train = FKFeatures(10, pos, neg) feats_train.set_opt_a(-1) #estimate prior kernel = PolyKernel(feats_train, feats_train, *kargs) km_train = kernel.get_kernel_matrix() # get kernel on testing data pos_clone = HMM(pos) neg_clone = HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test = FKFeatures(10, pos_clone, neg_clone) feats_test.set_a(feats_train.get_a()) #use prior from training data kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def _run_top_fisher(): """Run Linear Kernel with {Top,Fisher}Features.""" # put some constantness into randomness Math_init_random(dataop.INIT_RANDOM) data = dataop.get_cubes(4, 8) prefix = "topfk_" params = { prefix + "N": 3, prefix + "M": 6, prefix + "pseudo": 1e-1, prefix + "order": 1, prefix + "gap": 0, prefix + "reverse": False, prefix + "alphabet": "CUBE", prefix + "feature_class": "string_complex", prefix + "feature_type": "Word", prefix + "data_train": numpy.matrix(data["train"]), prefix + "data_test": numpy.matrix(data["test"]), } wordfeats = featop.get_features( params[prefix + "feature_class"], params[prefix + "feature_type"], data, eval(params[prefix + "alphabet"]), params[prefix + "order"], params[prefix + "gap"], params[prefix + "reverse"], ) pos_train = HMM(wordfeats["train"], params[prefix + "N"], params[prefix + "M"], params[prefix + "pseudo"]) pos_train.train() pos_train.baum_welch_viterbi_train(BW_NORMAL) neg_train = HMM(wordfeats["train"], params[prefix + "N"], params[prefix + "M"], params[prefix + "pseudo"]) neg_train.train() neg_train.baum_welch_viterbi_train(BW_NORMAL) pos_test = HMM(pos_train) pos_test.set_observations(wordfeats["test"]) neg_test = HMM(neg_train) neg_test.set_observations(wordfeats["test"]) feats = {} feats["train"] = TOPFeatures(10, pos_train, neg_train, False, False) feats["test"] = TOPFeatures(10, pos_test, neg_test, False, False) params[prefix + "name"] = "TOP" _compute_top_fisher(feats, params) feats["train"] = FKFeatures(10, pos_train, neg_train) feats["train"].set_opt_a(-1) # estimate prior feats["test"] = FKFeatures(10, pos_test, neg_test) feats["test"].set_a(feats["train"].get_a()) # use prior from training data params[prefix + "name"] = "FK" _compute_top_fisher(feats, params)
def distribution_hmm_modular(fm_cube, N, M, pseudo, order, gap, reverse, num_examples): from shogun.Features import StringWordFeatures, StringCharFeatures, CUBE from shogun.Distribution import HMM, BW_NORMAL charfeat=StringCharFeatures(CUBE) charfeat.set_features(fm_cube) feats=StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order-1, order, gap, reverse) hmm=HMM(feats, N, M, pseudo) hmm.train() hmm.baum_welch_viterbi_train(BW_NORMAL) num_examples=feats.get_num_vectors() num_param=hmm.get_num_model_parameters() for i in range(num_examples): for j in range(num_param): hmm.get_log_derivative(j, i) best_path=0 best_path_state=0 for i in range(num_examples): best_path+=hmm.best_path(i) for j in range(N): best_path_state+=hmm.get_best_path_state(i, j) lik_example = hmm.get_log_likelihood() lik_sample = hmm.get_log_likelihood_sample() return lik_example, lik_sample, hmm
def kernel_top_modular(fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,pseudo=1e-1, order=1,gap=0,reverse=False,kargs=[1, False, True]): from shogun.Features import StringCharFeatures, StringWordFeatures, TOPFeatures, DNA from shogun.Kernel import PolyKernel from shogun.Distribution import HMM, BW_NORMAL N=1 # toy HMM with 1 state M=4 # 4 observations -> DNA # train HMM for positive class charfeat=StringCharFeatures(fm_hmm_pos, DNA) hmm_pos_train=StringWordFeatures(charfeat.get_alphabet()) hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse) pos=HMM(hmm_pos_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) # train HMM for negative class charfeat=StringCharFeatures(fm_hmm_neg, DNA) hmm_neg_train=StringWordFeatures(charfeat.get_alphabet()) hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse) neg=HMM(hmm_neg_train, N, M, pseudo) neg.baum_welch_viterbi_train(BW_NORMAL) # Kernel training data charfeat=StringCharFeatures(fm_train_dna, DNA) wordfeats_train=StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) # Kernel testing data charfeat=StringCharFeatures(fm_test_dna, DNA) wordfeats_test=StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) # get kernel on training data pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train=TOPFeatures(10, pos, neg, False, False) kernel=PolyKernel(feats_train, feats_train, *kargs) km_train=kernel.get_kernel_matrix() # get kernel on testing data pos_clone=HMM(pos) neg_clone=HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test=TOPFeatures(10, pos_clone, neg_clone, False, False) kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def fisher (): print "Fisher Kernel" from shogun.Features import StringCharFeatures, StringWordFeatures, FKFeatures, DNA from shogun.Kernel import PolyKernel from shogun.Distribution import HMM, BW_NORMAL N=1 # toy HMM with 1 state M=4 # 4 observations -> DNA pseudo=1e-1 order=1 gap=0 reverse=False kargs=[1, False, True] # train HMM for positive class charfeat=StringCharFeatures(fm_hmm_pos, DNA) hmm_pos_train=StringWordFeatures(charfeat.get_alphabet()) hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse) pos=HMM(hmm_pos_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) # train HMM for negative class charfeat=StringCharFeatures(fm_hmm_neg, DNA) hmm_neg_train=StringWordFeatures(charfeat.get_alphabet()) hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse) neg=HMM(hmm_neg_train, N, M, pseudo) neg.baum_welch_viterbi_train(BW_NORMAL) # Kernel training data charfeat=StringCharFeatures(fm_train_dna, DNA) wordfeats_train=StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) # Kernel testing data charfeat=StringCharFeatures(fm_test_dna, DNA) wordfeats_test=StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) # get kernel on training data pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train=FKFeatures(10, pos, neg) feats_train.set_opt_a(-1) #estimate prior kernel=PolyKernel(feats_train, feats_train, *kargs) km_train=kernel.get_kernel_matrix() # get kernel on testing data pos_clone=HMM(pos) neg_clone=HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test=FKFeatures(10, pos_clone, neg_clone) feats_test.set_a(feats_train.get_a()) #use prior from training data kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix()
def _evaluate_top_fisher(indata, prefix): feats = {} wordfeats = util.get_features(indata, prefix) pos_train = HMM(wordfeats['train'], indata[prefix + 'N'], indata[prefix + 'M'], indata[prefix + 'pseudo']) pos_train.train() pos_train.baum_welch_viterbi_train(BW_NORMAL) neg_train = HMM(wordfeats['train'], indata[prefix + 'N'], indata[prefix + 'M'], indata[prefix + 'pseudo']) neg_train.train() neg_train.baum_welch_viterbi_train(BW_NORMAL) pos_test = HMM(pos_train) pos_test.set_observations(wordfeats['test']) neg_test = HMM(neg_train) neg_test.set_observations(wordfeats['test']) if indata[prefix + 'name'] == 'TOP': feats['train'] = TOPFeatures(10, pos_train, neg_train, False, False) feats['test'] = TOPFeatures(10, pos_test, neg_test, False, False) else: feats['train'] = FKFeatures(10, pos_train, neg_train) feats['train'].set_opt_a(-1) #estimate prior feats['test'] = FKFeatures(10, pos_test, neg_test) feats['test'].set_a( feats['train'].get_a()) #use prior from training data prefix = 'kernel_' args = util.get_args(indata, prefix) kernel = PolyKernel(feats['train'], feats['train'], *args) # kernel=PolyKernel(*args) # kernel.init(feats['train'], feats['train']) km_train = max( abs(indata[prefix + 'matrix_train'] - kernel.get_kernel_matrix()).flat) kernel.init(feats['train'], feats['test']) km_test = max( abs(indata[prefix + 'matrix_test'] - kernel.get_kernel_matrix()).flat) return util.check_accuracy(indata[prefix + 'accuracy'], km_train=km_train, km_test=km_test)
def _run_top_fisher (): """Run Linear Kernel with {Top,Fisher}Features.""" # put some constantness into randomness Math_init_random(dataop.INIT_RANDOM) data=dataop.get_cubes(4, 8) prefix='topfk_' params={ prefix+'N': 3, prefix+'M': 6, prefix+'pseudo': 1e-1, prefix+'order': 1, prefix+'gap': 0, prefix+'reverse': False, prefix+'alphabet': 'CUBE', prefix+'feature_class': 'string_complex', prefix+'feature_type': 'Word', prefix+'data_train': numpy.matrix(data['train']), prefix+'data_test': numpy.matrix(data['test']) } wordfeats=featop.get_features( params[prefix+'feature_class'], params[prefix+'feature_type'], data, eval(params[prefix+'alphabet']), params[prefix+'order'], params[prefix+'gap'], params[prefix+'reverse']) pos_train=HMM(wordfeats['train'], params[prefix+'N'], params[prefix+'M'], params[prefix+'pseudo']) pos_train.train() pos_train.baum_welch_viterbi_train(BW_NORMAL) neg_train=HMM(wordfeats['train'], params[prefix+'N'], params[prefix+'M'], params[prefix+'pseudo']) neg_train.train() neg_train.baum_welch_viterbi_train(BW_NORMAL) pos_test=HMM(pos_train) pos_test.set_observations(wordfeats['test']) neg_test=HMM(neg_train) neg_test.set_observations(wordfeats['test']) feats={} feats['train']=TOPFeatures(10, pos_train, neg_train, False, False) feats['test']=TOPFeatures(10, pos_test, neg_test, False, False) params[prefix+'name']='TOP' _compute_top_fisher(feats, params) feats['train']=FKFeatures(10, pos_train, neg_train) feats['train'].set_opt_a(-1) #estimate prior feats['test']=FKFeatures(10, pos_test, neg_test) feats['test'].set_a(feats['train'].get_a()) #use prior from training data params[prefix+'name']='FK' _compute_top_fisher(feats, params)