コード例 #1
0
def _run(name):
    """Run generator for a specific distribution method.

	@param name Name of the distribtuion method
	"""

    # put some constantness into randomness
    Math_init_random(INIT_RANDOM)

    params = {
        'name': name,
        'accuracy': 1e-7,
        'data': dataop.get_dna(),
        'alphabet': 'DNA',
        'feature_class': 'string_complex',
        'feature_type': 'Word'
    }
    output = fileop.get_output(category.DISTRIBUTION, params)
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])

    dfun = eval('distribution.' + name)
    dist = dfun(feats['train'])
    dist.train()

    output[PREFIX + 'likelihood'] = dist.get_log_likelihood_sample()
    output[PREFIX + 'derivatives'] = _get_derivatives(
        dist, feats['train'].get_num_vectors())

    fileop.write(category.DISTRIBUTION, output)
コード例 #2
0
def classifier_larank_modular(fm_train_real=traindat,
                              fm_test_real=testdat,
                              label_train_multiclass=label_traindat,
                              C=0.9,
                              num_threads=1,
                              num_iter=5):

    from shogun.Features import RealFeatures, Labels
    from shogun.Kernel import GaussianKernel
    from shogun.Classifier import LaRank
    from shogun.Library import Math_init_random
    Math_init_random(17)

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    width = 2.1
    kernel = GaussianKernel(feats_train, feats_train, width)

    epsilon = 1e-5
    labels = Labels(label_train_multiclass)

    svm = LaRank(C, kernel, labels)
    #svm.set_tau(1e-3)
    svm.set_batch_mode(False)
    #svm.io.enable_progress()
    svm.set_epsilon(epsilon)
    svm.train()
    out = svm.classify(feats_train).get_labels()
    predictions = svm.classify()
    return predictions, svm, predictions.get_labels()
コード例 #3
0
def _run_top_fisher():
    """Run Linear Kernel with {Top,Fisher}Features."""

    # put some constantness into randomness
    Math_init_random(dataop.INIT_RANDOM)

    data = dataop.get_cubes(4, 8)
    prefix = 'topfk_'
    params = {
        prefix + 'N': 3,
        prefix + 'M': 6,
        prefix + 'pseudo': 1e-1,
        prefix + 'order': 1,
        prefix + 'gap': 0,
        prefix + 'reverse': False,
        prefix + 'alphabet': 'CUBE',
        prefix + 'feature_class': 'string_complex',
        prefix + 'feature_type': 'Word',
        prefix + 'data_train': numpy.matrix(data['train']),
        prefix + 'data_test': numpy.matrix(data['test'])
    }

    wordfeats = featop.get_features(params[prefix + 'feature_class'],
                                    params[prefix + 'feature_type'], data,
                                    eval(params[prefix + 'alphabet']),
                                    params[prefix + 'order'],
                                    params[prefix + 'gap'],
                                    params[prefix + 'reverse'])
    pos_train = HMM(wordfeats['train'], params[prefix + 'N'],
                    params[prefix + 'M'], params[prefix + 'pseudo'])
    pos_train.train()
    pos_train.baum_welch_viterbi_train(BW_NORMAL)
    neg_train = HMM(wordfeats['train'], params[prefix + 'N'],
                    params[prefix + 'M'], params[prefix + 'pseudo'])
    neg_train.train()
    neg_train.baum_welch_viterbi_train(BW_NORMAL)
    pos_test = HMM(pos_train)
    pos_test.set_observations(wordfeats['test'])
    neg_test = HMM(neg_train)
    neg_test.set_observations(wordfeats['test'])
    feats = {}

    feats['train'] = TOPFeatures(10, pos_train, neg_train, False, False)
    feats['test'] = TOPFeatures(10, pos_test, neg_test, False, False)
    params[prefix + 'name'] = 'TOP'
    _compute_top_fisher(feats, params)

    feats['train'] = FKFeatures(10, pos_train, neg_train)
    feats['train'].set_opt_a(-1)  #estimate prior
    feats['test'] = FKFeatures(10, pos_test, neg_test)
    feats['test'].set_a(feats['train'].get_a())  #use prior from training data
    params[prefix + 'name'] = 'FK'
    _compute_top_fisher(feats, params)
コード例 #4
0
def clustering_gmm_modular (fm_train=generated,n=2,min_cov=1e-9,max_iter=1000,min_change=1e-9,cov_type=0):

	from shogun.Distribution import GMM
	from shogun.Features import RealFeatures
	from shogun.Library import Math_init_random

	Math_init_random(5)

	feat_train=RealFeatures(generated)

	est_gmm=GMM(n, cov_type)
	est_gmm.train(feat_train)
	est_gmm.train_em(min_cov, max_iter, min_change)

	return est_gmm
コード例 #5
0
def run(argv):
    """
	Run all individual generators or only one if present in
	argument list.
	"""

    # put some constantness into randomness
    Math_init_random(INIT_RANDOM)
    random.seed(INIT_RANDOM)

    arglen = len(argv)
    if arglen == 2:  # run given category
        if argv[1] == 'clear':
            clean_dir_outdata()
        else:
            try:
                __import__(argv[1], globals(), locals())
            except ImportError:
                raise ImportError, 'Unknown category ' + argv[1]

            category = eval(argv[1])
            category.run()
    else:
        # run given category by calling self again, one by one
        # this is due to an issue somewhere with classifiers (atm) and
        # 'static randomness'

        if arglen == 1:
            command = argv[0]
            cats = CATEGORIES
        else:
            command = argv.pop(0)
            cats = argv

        for cat in cats:
            if not cat in CATEGORIES:
                cats = ', '.join(CATEGORIES)
                msg = "Unknown category: %s\nTry one of these: %s\n" % (cat,
                                                                        cats)
                sys.stderr.write(msg)
                sys.exit(1)

            ret = os.system('%s %s' % (command, cat))
            if ret != 0:
                sys.exit(ret)
コード例 #6
0
def clustering_kmeans_modular (fm_train=traindat,k=3):

	from shogun.Distance import EuclidianDistance
	from shogun.Features import RealFeatures
	from shogun.Clustering import KMeans
	from shogun.Library import Math_init_random
	Math_init_random(17)

	feats_train=RealFeatures(fm_train)
	distance=EuclidianDistance(feats_train, feats_train)

	kmeans=KMeans(k, distance)
	kmeans.train()

	out_centers = kmeans.get_cluster_centers()
	kmeans.get_radiuses()

	return out_centers, kmeans
コード例 #7
0
def _run_hmm():
    """Run generator for Hidden-Markov-Model."""

    # put some constantness into randomness
    Math_init_random(INIT_RANDOM)

    num_examples = 4
    params = {
        'name': 'HMM',
        'accuracy': 1e-6,
        'N': 3,
        'M': 6,
        'num_examples': num_examples,
        'pseudo': 1e-10,
        'order': 1,
        'alphabet': 'CUBE',
        'feature_class': 'string_complex',
        'feature_type': 'Word',
        'data': dataop.get_cubes(num_examples, 1)
    }
    output = fileop.get_output(category.DISTRIBUTION, params)

    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'],
                                eval('features.' + params['alphabet']),
                                params['order'])

    hmm = distribution.HMM(feats['train'], params['N'], params['M'],
                           params['pseudo'])
    hmm.train()
    hmm.baum_welch_viterbi_train(distribution.BW_NORMAL)

    output[PREFIX + 'likelihood'] = hmm.get_log_likelihood_sample()
    output[PREFIX + 'derivatives'] = _get_derivatives(
        hmm, feats['train'].get_num_vectors())

    output[PREFIX + 'best_path'] = 0
    output[PREFIX + 'best_path_state'] = 0
    for i in xrange(num_examples):
        output[PREFIX + 'best_path'] += hmm.best_path(i)
        for j in xrange(params['N']):
            output[PREFIX + 'best_path_state'] += hmm.get_best_path_state(i, j)

    fileop.write(category.DISTRIBUTION, output)
コード例 #8
0
ファイル: clustering.py プロジェクト: vinodrajendran001/ASP
def _run(name, first_arg):
    """
	Run generator for a specific clustering method.

	@param name Name of the clustering method to run.
	@param first_arg First argument to the clustering's constructor; so far, only this distinguishes the instantion of the different methods.
	"""

    # put some constantness into randomness
    Math_init_random(dataop.INIT_RANDOM)

    num_clouds = 3
    params = {
        'name': 'EuclidianDistance',
        'data': dataop.get_clouds(num_clouds, 5),
        'feature_class': 'simple',
        'feature_type': 'Real'
    }
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])
    dfun = eval(params['name'])
    distance = dfun(feats['train'], feats['train'])
    output = fileop.get_output(category.DISTANCE, params)

    params = {'name': name, 'accuracy': 1e-8, first_arg: num_clouds}
    fun = eval('clustering.' + name)
    clustering = fun(params[first_arg], distance)
    clustering.train()

    distance.init(feats['train'], feats['test'])
    if name == 'KMeans':
        params['radi'] = clustering.get_radiuses()
        params['centers'] = clustering.get_cluster_centers()
    elif name == 'Hierarchical':
        params['merge_distance'] = clustering.get_merge_distances()
        params['pairs'] = clustering.get_cluster_pairs()

    output.update(fileop.get_output(category.CLUSTERING, params))
    fileop.write(category.CLUSTERING, output)
コード例 #9
0
def classifier_liblinear_modular(fm_train_real, fm_test_real,
                                 label_train_twoclass, C, epsilon):

    from shogun.Features import RealFeatures, SparseRealFeatures, Labels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL
    from shogun.Library import Math_init_random
    Math_init_random(17)

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    labels = Labels(label_train_twoclass)

    svm = LibLinear(C, feats_train, labels)
    svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL)
    svm.set_epsilon(epsilon)
    svm.set_bias_enabled(True)
    svm.train()

    svm.set_features(feats_test)
    svm.apply().get_labels()
    predictions = svm.apply()
    return predictions, svm, predictions.get_labels()
コード例 #10
0
				indata[param]=eval(line.split('=')[1])
			except SyntaxError: # might be MultiClass SVM and hence matrix
				indata[param]=_read_matrix(line)
		elif param=='clustering_centers' or param=='clustering_pairs':
			indata[param]=_read_matrix(line)
		else:
			if (line.find("'")==-1):
				indata[param]=eval(line.split('=')[1])
			else:
				indata[param]=line.split('=')[1].strip().split("'")[1]

	mfile.close()
	fun=eval(name_fun)

	# seed random to constant value used at data file's creation
	Math_init_random(indata['init_random'])
	random.seed(indata['init_random'])

	return fun(indata)

def _read_matrix (line):
	try:
		str_line=(line.split('[')[1]).split(']')[0]
	except IndexError:
		str_line=(line.split('{')[1]).split('}')[0]

	lines=str_line.split(';')
	lis2d=list()

	for x in lines:
		lis=list()
コード例 #11
0
for i in xrange(N):

    try:
        assert (abs(inner[i] - tmp_out[i]) <= 0.001)
        assert (abs(inner[i] - tmp_out2[i]) <= 0.001)
    except Exception, message:
        print "difference in outputs: (%.4f, %.4f, %.4f)" % (tmp_out[i],
                                                             tmp_out2[i])

###############
# compare to LibSVM

dasvm_manual_libsvm = LibSVM(1.0, wdk, lab)
dasvm_manual_libsvm.set_linear_term(linterm_manual)
dasvm_manual_libsvm.set_bias_enabled(False)
Math_init_random(1)
dasvm_manual_libsvm.train()

###############
# compare to LibLinear

dasvm_manual_liblinear = LibLinear(1.0, feat, lab)
dasvm_manual_liblinear.set_linear_term(linterm_manual)
dasvm_manual_liblinear.set_bias_enabled(False)
dasvm_manual_liblinear.train()

#############################################
#    compute DA-SVMs in shogun (kernelized AND linear)
#############################################

dasvm_libsvm = DomainAdaptationSVM(1.0, wdk, lab, presvm_libsvm, B)
#!/usr/bin/env python
from tools.load import LoadMatrix
lm = LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat, testdat, 1.5, 10], [traindat, testdat, 1.5, 10]]

from shogun.Library import Math_init_random
Math_init_random(12345)


def preprocessor_randomfouriergausspreproc_modular(fm_train_real=traindat,
                                                   fm_test_real=testdat,
                                                   width=1.4,
                                                   size_cache=10):
    from shogun.Kernel import Chi2Kernel
    from shogun.Features import RealFeatures
    from shogun.Preprocessor import RandomFourierGaussPreproc

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    preproc = RandomFourierGaussPreproc()
    preproc.init(feats_train)
    feats_train.add_preprocessor(preproc)
    feats_train.apply_preprocessor()
    feats_test.add_preprocessor(preproc)
    feats_test.apply_preprocessor()
コード例 #13
0
from numpy import array, append
from shogun.Distribution import GMM
from shogun.Library import Math_init_random

Math_init_random(5)

real_gmm = GMM(2, 0)

real_gmm.set_nth_mean(array([1.0, 1.0]), 0)
real_gmm.set_nth_mean(array([-1.0, -1.0]), 1)

real_gmm.set_nth_cov(array([[1.0, 0.2], [0.2, 0.1]]), 0)
real_gmm.set_nth_cov(array([[0.3, 0.1], [0.1, 1.0]]), 1)

real_gmm.set_coef(array([0.3, 0.7]))

generated = array([real_gmm.sample()])
for i in range(199):
    generated = append(generated, array([real_gmm.sample()]), axis=0)

generated = generated.transpose()

parameter_list = [[generated, 2, 1e-9, 1000, 1e-9, 0]]


def clustering_gmm_modular(fm_train=generated,
                           n=2,
                           min_cov=1e-9,
                           max_iter=1000,
                           min_change=1e-9,
                           cov_type=0):