def distribution_hmm_modular(fm_cube, N, M, pseudo, order, gap, reverse, num_examples): from modshogun import StringWordFeatures, StringCharFeatures, CUBE from modshogun import HMM, BW_NORMAL charfeat=StringCharFeatures(CUBE) charfeat.set_features(fm_cube) feats=StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order-1, order, gap, reverse) hmm=HMM(feats, N, M, pseudo) hmm.train() hmm.baum_welch_viterbi_train(BW_NORMAL) num_examples=feats.get_num_vectors() num_param=hmm.get_num_model_parameters() for i in range(num_examples): for j in range(num_param): hmm.get_log_derivative(j, i) best_path=0 best_path_state=0 for i in range(num_examples): best_path+=hmm.best_path(i) for j in range(N): best_path_state+=hmm.get_best_path_state(i, j) lik_example = hmm.get_log_likelihood() lik_sample = hmm.get_log_likelihood_sample() return lik_example, lik_sample, hmm
def _evaluate_top_fisher(indata, prefix): feats = {} wordfeats = util.get_features(indata, prefix) pos_train = HMM(wordfeats['train'], indata[prefix + 'N'], indata[prefix + 'M'], indata[prefix + 'pseudo']) pos_train.train() pos_train.baum_welch_viterbi_train(BW_NORMAL) neg_train = HMM(wordfeats['train'], indata[prefix + 'N'], indata[prefix + 'M'], indata[prefix + 'pseudo']) neg_train.train() neg_train.baum_welch_viterbi_train(BW_NORMAL) pos_test = HMM(pos_train) pos_test.set_observations(wordfeats['test']) neg_test = HMM(neg_train) neg_test.set_observations(wordfeats['test']) if indata[prefix + 'name'] == 'TOP': feats['train'] = TOPFeatures(10, pos_train, neg_train, False, False) feats['test'] = TOPFeatures(10, pos_test, neg_test, False, False) else: feats['train'] = FKFeatures(10, pos_train, neg_train) feats['train'].set_opt_a(-1) #estimate prior feats['test'] = FKFeatures(10, pos_test, neg_test) feats['test'].set_a( feats['train'].get_a()) #use prior from training data prefix = 'kernel_' args = util.get_args(indata, prefix) kernel = PolyKernel(feats['train'], feats['train'], *args) # kernel=PolyKernel(*args) # kernel.init(feats['train'], feats['train']) km_train = max( abs(indata[prefix + 'matrix_train'] - kernel.get_kernel_matrix()).flat) kernel.init(feats['train'], feats['test']) km_test = max( abs(indata[prefix + 'matrix_test'] - kernel.get_kernel_matrix()).flat) return util.check_accuracy(indata[prefix + 'accuracy'], km_train=km_train, km_test=km_test)
def _evaluate_top_fisher (indata, prefix): feats={} wordfeats=util.get_features(indata, prefix) pos_train=HMM(wordfeats['train'], indata[prefix+'N'], indata[prefix+'M'], indata[prefix+'pseudo']) pos_train.train() pos_train.baum_welch_viterbi_train(BW_NORMAL) neg_train=HMM(wordfeats['train'], indata[prefix+'N'], indata[prefix+'M'], indata[prefix+'pseudo']) neg_train.train() neg_train.baum_welch_viterbi_train(BW_NORMAL) pos_test=HMM(pos_train) pos_test.set_observations(wordfeats['test']) neg_test=HMM(neg_train) neg_test.set_observations(wordfeats['test']) if indata[prefix+'name']=='TOP': feats['train']=TOPFeatures(10, pos_train, neg_train, False, False) feats['test']=TOPFeatures(10, pos_test, neg_test, False, False) else: feats['train']=FKFeatures(10, pos_train, neg_train) feats['train'].set_opt_a(-1) #estimate prior feats['test']=FKFeatures(10, pos_test, neg_test) feats['test'].set_a(feats['train'].get_a()) #use prior from training data prefix='kernel_' args=util.get_args(indata, prefix) kernel=PolyKernel(feats['train'], feats['train'], *args) # kernel=PolyKernel(*args) # kernel.init(feats['train'], feats['train']) km_train=max(abs( indata[prefix+'matrix_train']-kernel.get_kernel_matrix()).flat) kernel.init(feats['train'], feats['test']) km_test=max(abs( indata[prefix+'matrix_test']-kernel.get_kernel_matrix()).flat) return util.check_accuracy(indata[prefix+'accuracy'], km_train=km_train, km_test=km_test)