def kernel_top_modular(fm_train_dna=traindat, fm_test_dna=testdat, label_train_dna=label_traindat, pseudo=1e-1, order=1, gap=0, reverse=False, kargs=[1, False, True]): from shogun.Features import StringCharFeatures, StringWordFeatures, TOPFeatures, DNA from shogun.Kernel import PolyKernel from shogun.Distribution import HMM, BW_NORMAL N = 1 # toy HMM with 1 state M = 4 # 4 observations -> DNA # train HMM for positive class charfeat = StringCharFeatures(fm_hmm_pos, DNA) hmm_pos_train = StringWordFeatures(charfeat.get_alphabet()) hmm_pos_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) pos = HMM(hmm_pos_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) # train HMM for negative class charfeat = StringCharFeatures(fm_hmm_neg, DNA) hmm_neg_train = StringWordFeatures(charfeat.get_alphabet()) hmm_neg_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) neg = HMM(hmm_neg_train, N, M, pseudo) neg.baum_welch_viterbi_train(BW_NORMAL) # Kernel training data charfeat = StringCharFeatures(fm_train_dna, DNA) wordfeats_train = StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) # Kernel testing data charfeat = StringCharFeatures(fm_test_dna, DNA) wordfeats_test = StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse) # get kernel on training data pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train = TOPFeatures(10, pos, neg, False, False) kernel = PolyKernel(feats_train, feats_train, *kargs) km_train = kernel.get_kernel_matrix() # get kernel on testing data pos_clone = HMM(pos) neg_clone = HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test = TOPFeatures(10, pos_clone, neg_clone, False, False) kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def _run_top_fisher(): """Run Linear Kernel with {Top,Fisher}Features.""" # put some constantness into randomness Math_init_random(dataop.INIT_RANDOM) data = dataop.get_cubes(4, 8) prefix = 'topfk_' params = { prefix + 'N': 3, prefix + 'M': 6, prefix + 'pseudo': 1e-1, prefix + 'order': 1, prefix + 'gap': 0, prefix + 'reverse': False, prefix + 'alphabet': 'CUBE', prefix + 'feature_class': 'string_complex', prefix + 'feature_type': 'Word', prefix + 'data_train': numpy.matrix(data['train']), prefix + 'data_test': numpy.matrix(data['test']) } wordfeats = featop.get_features(params[prefix + 'feature_class'], params[prefix + 'feature_type'], data, eval(params[prefix + 'alphabet']), params[prefix + 'order'], params[prefix + 'gap'], params[prefix + 'reverse']) pos_train = HMM(wordfeats['train'], params[prefix + 'N'], params[prefix + 'M'], params[prefix + 'pseudo']) pos_train.train() pos_train.baum_welch_viterbi_train(BW_NORMAL) neg_train = HMM(wordfeats['train'], params[prefix + 'N'], params[prefix + 'M'], params[prefix + 'pseudo']) neg_train.train() neg_train.baum_welch_viterbi_train(BW_NORMAL) pos_test = HMM(pos_train) pos_test.set_observations(wordfeats['test']) neg_test = HMM(neg_train) neg_test.set_observations(wordfeats['test']) feats = {} feats['train'] = TOPFeatures(10, pos_train, neg_train, False, False) feats['test'] = TOPFeatures(10, pos_test, neg_test, False, False) params[prefix + 'name'] = 'TOP' _compute_top_fisher(feats, params) feats['train'] = FKFeatures(10, pos_train, neg_train) feats['train'].set_opt_a(-1) #estimate prior feats['test'] = FKFeatures(10, pos_test, neg_test) feats['test'].set_a(feats['train'].get_a()) #use prior from training data params[prefix + 'name'] = 'FK' _compute_top_fisher(feats, params)