Ejemplo n.º 1
0
def kernel_fisher_modular(fm_train_dna=traindat,
                          fm_test_dna=testdat,
                          label_train_dna=label_traindat,
                          N=1,
                          M=4,
                          pseudo=1e-1,
                          order=1,
                          gap=0,
                          reverse=False,
                          kargs=[1, False, True]):

    from shogun.Features import StringCharFeatures, StringWordFeatures, FKFeatures, DNA
    from shogun.Kernel import PolyKernel
    from shogun.Distribution import HMM, BW_NORMAL  #, MSG_DEBUG

    # train HMM for positive class
    charfeat = StringCharFeatures(fm_hmm_pos, DNA)
    #charfeat.io.set_loglevel(MSG_DEBUG)
    hmm_pos_train = StringWordFeatures(charfeat.get_alphabet())
    hmm_pos_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    pos = HMM(hmm_pos_train, N, M, pseudo)
    pos.baum_welch_viterbi_train(BW_NORMAL)

    # train HMM for negative class
    charfeat = StringCharFeatures(fm_hmm_neg, DNA)
    hmm_neg_train = StringWordFeatures(charfeat.get_alphabet())
    hmm_neg_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    neg = HMM(hmm_neg_train, N, M, pseudo)
    neg.baum_welch_viterbi_train(BW_NORMAL)

    # Kernel training data
    charfeat = StringCharFeatures(fm_train_dna, DNA)
    wordfeats_train = StringWordFeatures(charfeat.get_alphabet())
    wordfeats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    # Kernel testing data
    charfeat = StringCharFeatures(fm_test_dna, DNA)
    wordfeats_test = StringWordFeatures(charfeat.get_alphabet())
    wordfeats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    # get kernel on training data
    pos.set_observations(wordfeats_train)
    neg.set_observations(wordfeats_train)
    feats_train = FKFeatures(10, pos, neg)
    feats_train.set_opt_a(-1)  #estimate prior
    kernel = PolyKernel(feats_train, feats_train, *kargs)
    km_train = kernel.get_kernel_matrix()

    # get kernel on testing data
    pos_clone = HMM(pos)
    neg_clone = HMM(neg)
    pos_clone.set_observations(wordfeats_test)
    neg_clone.set_observations(wordfeats_test)
    feats_test = FKFeatures(10, pos_clone, neg_clone)
    feats_test.set_a(feats_train.get_a())  #use prior from training data
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
Ejemplo n.º 2
0
def kernel_top_modular(fm_train_dna=traindat,
                       fm_test_dna=testdat,
                       label_train_dna=label_traindat,
                       pseudo=1e-1,
                       order=1,
                       gap=0,
                       reverse=False,
                       kargs=[1, False, True]):
    from shogun.Features import StringCharFeatures, StringWordFeatures, TOPFeatures, DNA
    from shogun.Kernel import PolyKernel
    from shogun.Distribution import HMM, BW_NORMAL

    N = 1  # toy HMM with 1 state
    M = 4  # 4 observations -> DNA

    # train HMM for positive class
    charfeat = StringCharFeatures(fm_hmm_pos, DNA)
    hmm_pos_train = StringWordFeatures(charfeat.get_alphabet())
    hmm_pos_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    pos = HMM(hmm_pos_train, N, M, pseudo)
    pos.baum_welch_viterbi_train(BW_NORMAL)

    # train HMM for negative class
    charfeat = StringCharFeatures(fm_hmm_neg, DNA)
    hmm_neg_train = StringWordFeatures(charfeat.get_alphabet())
    hmm_neg_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    neg = HMM(hmm_neg_train, N, M, pseudo)
    neg.baum_welch_viterbi_train(BW_NORMAL)

    # Kernel training data
    charfeat = StringCharFeatures(fm_train_dna, DNA)
    wordfeats_train = StringWordFeatures(charfeat.get_alphabet())
    wordfeats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    # Kernel testing data
    charfeat = StringCharFeatures(fm_test_dna, DNA)
    wordfeats_test = StringWordFeatures(charfeat.get_alphabet())
    wordfeats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    # get kernel on training data
    pos.set_observations(wordfeats_train)
    neg.set_observations(wordfeats_train)
    feats_train = TOPFeatures(10, pos, neg, False, False)
    kernel = PolyKernel(feats_train, feats_train, *kargs)
    km_train = kernel.get_kernel_matrix()

    # get kernel on testing data
    pos_clone = HMM(pos)
    neg_clone = HMM(neg)
    pos_clone.set_observations(wordfeats_test)
    neg_clone.set_observations(wordfeats_test)
    feats_test = TOPFeatures(10, pos_clone, neg_clone, False, False)
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
Ejemplo n.º 3
0
def _run_top_fisher():
    """Run Linear Kernel with {Top,Fisher}Features."""

    # put some constantness into randomness
    Math_init_random(dataop.INIT_RANDOM)

    data = dataop.get_cubes(4, 8)
    prefix = 'topfk_'
    params = {
        prefix + 'N': 3,
        prefix + 'M': 6,
        prefix + 'pseudo': 1e-1,
        prefix + 'order': 1,
        prefix + 'gap': 0,
        prefix + 'reverse': False,
        prefix + 'alphabet': 'CUBE',
        prefix + 'feature_class': 'string_complex',
        prefix + 'feature_type': 'Word',
        prefix + 'data_train': numpy.matrix(data['train']),
        prefix + 'data_test': numpy.matrix(data['test'])
    }

    wordfeats = featop.get_features(params[prefix + 'feature_class'],
                                    params[prefix + 'feature_type'], data,
                                    eval(params[prefix + 'alphabet']),
                                    params[prefix + 'order'],
                                    params[prefix + 'gap'],
                                    params[prefix + 'reverse'])
    pos_train = HMM(wordfeats['train'], params[prefix + 'N'],
                    params[prefix + 'M'], params[prefix + 'pseudo'])
    pos_train.train()
    pos_train.baum_welch_viterbi_train(BW_NORMAL)
    neg_train = HMM(wordfeats['train'], params[prefix + 'N'],
                    params[prefix + 'M'], params[prefix + 'pseudo'])
    neg_train.train()
    neg_train.baum_welch_viterbi_train(BW_NORMAL)
    pos_test = HMM(pos_train)
    pos_test.set_observations(wordfeats['test'])
    neg_test = HMM(neg_train)
    neg_test.set_observations(wordfeats['test'])
    feats = {}

    feats['train'] = TOPFeatures(10, pos_train, neg_train, False, False)
    feats['test'] = TOPFeatures(10, pos_test, neg_test, False, False)
    params[prefix + 'name'] = 'TOP'
    _compute_top_fisher(feats, params)

    feats['train'] = FKFeatures(10, pos_train, neg_train)
    feats['train'].set_opt_a(-1)  #estimate prior
    feats['test'] = FKFeatures(10, pos_test, neg_test)
    feats['test'].set_a(feats['train'].get_a())  #use prior from training data
    params[prefix + 'name'] = 'FK'
    _compute_top_fisher(feats, params)
def distribution_hmm_modular(fm_cube, N, M, pseudo, order, gap, reverse, num_examples):
	from shogun.Features import StringWordFeatures, StringCharFeatures, CUBE
	from shogun.Distribution import HMM, BW_NORMAL

	charfeat=StringCharFeatures(CUBE)
	charfeat.set_features(fm_cube)
	feats=StringWordFeatures(charfeat.get_alphabet())
	feats.obtain_from_char(charfeat, order-1, order, gap, reverse)

	hmm=HMM(feats, N, M, pseudo)
	hmm.train()
	hmm.baum_welch_viterbi_train(BW_NORMAL)

	num_examples=feats.get_num_vectors()
	num_param=hmm.get_num_model_parameters()
	for i in range(num_examples):
		for j in range(num_param):
			hmm.get_log_derivative(j, i)

	best_path=0
	best_path_state=0
	for i in range(num_examples):
		best_path+=hmm.best_path(i)
		for j in range(N):
			best_path_state+=hmm.get_best_path_state(i, j)

	lik_example = hmm.get_log_likelihood()
	lik_sample = hmm.get_log_likelihood_sample()

	return lik_example, lik_sample, hmm
Ejemplo n.º 5
0
def _evaluate_top_fisher(indata, prefix):
    feats = {}
    wordfeats = util.get_features(indata, prefix)

    pos_train = HMM(wordfeats['train'], indata[prefix + 'N'],
                    indata[prefix + 'M'], indata[prefix + 'pseudo'])
    pos_train.train()
    pos_train.baum_welch_viterbi_train(BW_NORMAL)
    neg_train = HMM(wordfeats['train'], indata[prefix + 'N'],
                    indata[prefix + 'M'], indata[prefix + 'pseudo'])
    neg_train.train()
    neg_train.baum_welch_viterbi_train(BW_NORMAL)
    pos_test = HMM(pos_train)
    pos_test.set_observations(wordfeats['test'])
    neg_test = HMM(neg_train)
    neg_test.set_observations(wordfeats['test'])

    if indata[prefix + 'name'] == 'TOP':
        feats['train'] = TOPFeatures(10, pos_train, neg_train, False, False)
        feats['test'] = TOPFeatures(10, pos_test, neg_test, False, False)
    else:
        feats['train'] = FKFeatures(10, pos_train, neg_train)
        feats['train'].set_opt_a(-1)  #estimate prior
        feats['test'] = FKFeatures(10, pos_test, neg_test)
        feats['test'].set_a(
            feats['train'].get_a())  #use prior from training data

    prefix = 'kernel_'
    args = util.get_args(indata, prefix)
    kernel = PolyKernel(feats['train'], feats['train'], *args)
    #	kernel=PolyKernel(*args)
    #	kernel.init(feats['train'], feats['train'])
    km_train = max(
        abs(indata[prefix + 'matrix_train'] - kernel.get_kernel_matrix()).flat)
    kernel.init(feats['train'], feats['test'])
    km_test = max(
        abs(indata[prefix + 'matrix_test'] - kernel.get_kernel_matrix()).flat)

    return util.check_accuracy(indata[prefix + 'accuracy'],
                               km_train=km_train,
                               km_test=km_test)