def _run(name): """Run generator for a specific distribution method. @param name Name of the distribtuion method """ # put some constantness into randomness Math_init_random(INIT_RANDOM) params = { 'name': name, 'accuracy': 1e-7, 'data': dataop.get_dna(), 'alphabet': 'DNA', 'feature_class': 'string_complex', 'feature_type': 'Word' } output = fileop.get_output(category.DISTRIBUTION, params) feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) dfun = eval('distribution.' + name) dist = dfun(feats['train']) dist.train() output[PREFIX + 'likelihood'] = dist.get_log_likelihood_sample() output[PREFIX + 'derivatives'] = _get_derivatives( dist, feats['train'].get_num_vectors()) fileop.write(category.DISTRIBUTION, output)
def classifier_larank_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, C=0.9, num_threads=1, num_iter=5): from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import LaRank from shogun.Library import Math_init_random Math_init_random(17) feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) width = 2.1 kernel = GaussianKernel(feats_train, feats_train, width) epsilon = 1e-5 labels = Labels(label_train_multiclass) svm = LaRank(C, kernel, labels) #svm.set_tau(1e-3) svm.set_batch_mode(False) #svm.io.enable_progress() svm.set_epsilon(epsilon) svm.train() out = svm.classify(feats_train).get_labels() predictions = svm.classify() return predictions, svm, predictions.get_labels()
def _run_top_fisher(): """Run Linear Kernel with {Top,Fisher}Features.""" # put some constantness into randomness Math_init_random(dataop.INIT_RANDOM) data = dataop.get_cubes(4, 8) prefix = 'topfk_' params = { prefix + 'N': 3, prefix + 'M': 6, prefix + 'pseudo': 1e-1, prefix + 'order': 1, prefix + 'gap': 0, prefix + 'reverse': False, prefix + 'alphabet': 'CUBE', prefix + 'feature_class': 'string_complex', prefix + 'feature_type': 'Word', prefix + 'data_train': numpy.matrix(data['train']), prefix + 'data_test': numpy.matrix(data['test']) } wordfeats = featop.get_features(params[prefix + 'feature_class'], params[prefix + 'feature_type'], data, eval(params[prefix + 'alphabet']), params[prefix + 'order'], params[prefix + 'gap'], params[prefix + 'reverse']) pos_train = HMM(wordfeats['train'], params[prefix + 'N'], params[prefix + 'M'], params[prefix + 'pseudo']) pos_train.train() pos_train.baum_welch_viterbi_train(BW_NORMAL) neg_train = HMM(wordfeats['train'], params[prefix + 'N'], params[prefix + 'M'], params[prefix + 'pseudo']) neg_train.train() neg_train.baum_welch_viterbi_train(BW_NORMAL) pos_test = HMM(pos_train) pos_test.set_observations(wordfeats['test']) neg_test = HMM(neg_train) neg_test.set_observations(wordfeats['test']) feats = {} feats['train'] = TOPFeatures(10, pos_train, neg_train, False, False) feats['test'] = TOPFeatures(10, pos_test, neg_test, False, False) params[prefix + 'name'] = 'TOP' _compute_top_fisher(feats, params) feats['train'] = FKFeatures(10, pos_train, neg_train) feats['train'].set_opt_a(-1) #estimate prior feats['test'] = FKFeatures(10, pos_test, neg_test) feats['test'].set_a(feats['train'].get_a()) #use prior from training data params[prefix + 'name'] = 'FK' _compute_top_fisher(feats, params)
def clustering_gmm_modular (fm_train=generated,n=2,min_cov=1e-9,max_iter=1000,min_change=1e-9,cov_type=0): from shogun.Distribution import GMM from shogun.Features import RealFeatures from shogun.Library import Math_init_random Math_init_random(5) feat_train=RealFeatures(generated) est_gmm=GMM(n, cov_type) est_gmm.train(feat_train) est_gmm.train_em(min_cov, max_iter, min_change) return est_gmm
def run(argv): """ Run all individual generators or only one if present in argument list. """ # put some constantness into randomness Math_init_random(INIT_RANDOM) random.seed(INIT_RANDOM) arglen = len(argv) if arglen == 2: # run given category if argv[1] == 'clear': clean_dir_outdata() else: try: __import__(argv[1], globals(), locals()) except ImportError: raise ImportError, 'Unknown category ' + argv[1] category = eval(argv[1]) category.run() else: # run given category by calling self again, one by one # this is due to an issue somewhere with classifiers (atm) and # 'static randomness' if arglen == 1: command = argv[0] cats = CATEGORIES else: command = argv.pop(0) cats = argv for cat in cats: if not cat in CATEGORIES: cats = ', '.join(CATEGORIES) msg = "Unknown category: %s\nTry one of these: %s\n" % (cat, cats) sys.stderr.write(msg) sys.exit(1) ret = os.system('%s %s' % (command, cat)) if ret != 0: sys.exit(ret)
def clustering_kmeans_modular (fm_train=traindat,k=3): from shogun.Distance import EuclidianDistance from shogun.Features import RealFeatures from shogun.Clustering import KMeans from shogun.Library import Math_init_random Math_init_random(17) feats_train=RealFeatures(fm_train) distance=EuclidianDistance(feats_train, feats_train) kmeans=KMeans(k, distance) kmeans.train() out_centers = kmeans.get_cluster_centers() kmeans.get_radiuses() return out_centers, kmeans
def _run_hmm(): """Run generator for Hidden-Markov-Model.""" # put some constantness into randomness Math_init_random(INIT_RANDOM) num_examples = 4 params = { 'name': 'HMM', 'accuracy': 1e-6, 'N': 3, 'M': 6, 'num_examples': num_examples, 'pseudo': 1e-10, 'order': 1, 'alphabet': 'CUBE', 'feature_class': 'string_complex', 'feature_type': 'Word', 'data': dataop.get_cubes(num_examples, 1) } output = fileop.get_output(category.DISTRIBUTION, params) feats = featop.get_features(params['feature_class'], params['feature_type'], params['data'], eval('features.' + params['alphabet']), params['order']) hmm = distribution.HMM(feats['train'], params['N'], params['M'], params['pseudo']) hmm.train() hmm.baum_welch_viterbi_train(distribution.BW_NORMAL) output[PREFIX + 'likelihood'] = hmm.get_log_likelihood_sample() output[PREFIX + 'derivatives'] = _get_derivatives( hmm, feats['train'].get_num_vectors()) output[PREFIX + 'best_path'] = 0 output[PREFIX + 'best_path_state'] = 0 for i in xrange(num_examples): output[PREFIX + 'best_path'] += hmm.best_path(i) for j in xrange(params['N']): output[PREFIX + 'best_path_state'] += hmm.get_best_path_state(i, j) fileop.write(category.DISTRIBUTION, output)
def _run(name, first_arg): """ Run generator for a specific clustering method. @param name Name of the clustering method to run. @param first_arg First argument to the clustering's constructor; so far, only this distinguishes the instantion of the different methods. """ # put some constantness into randomness Math_init_random(dataop.INIT_RANDOM) num_clouds = 3 params = { 'name': 'EuclidianDistance', 'data': dataop.get_clouds(num_clouds, 5), 'feature_class': 'simple', 'feature_type': 'Real' } feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) dfun = eval(params['name']) distance = dfun(feats['train'], feats['train']) output = fileop.get_output(category.DISTANCE, params) params = {'name': name, 'accuracy': 1e-8, first_arg: num_clouds} fun = eval('clustering.' + name) clustering = fun(params[first_arg], distance) clustering.train() distance.init(feats['train'], feats['test']) if name == 'KMeans': params['radi'] = clustering.get_radiuses() params['centers'] = clustering.get_cluster_centers() elif name == 'Hierarchical': params['merge_distance'] = clustering.get_merge_distances() params['pairs'] = clustering.get_cluster_pairs() output.update(fileop.get_output(category.CLUSTERING, params)) fileop.write(category.CLUSTERING, output)
def classifier_liblinear_modular(fm_train_real, fm_test_real, label_train_twoclass, C, epsilon): from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL from shogun.Library import Math_init_random Math_init_random(17) feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = Labels(label_train_twoclass) svm = LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.apply().get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels()
indata[param]=eval(line.split('=')[1]) except SyntaxError: # might be MultiClass SVM and hence matrix indata[param]=_read_matrix(line) elif param=='clustering_centers' or param=='clustering_pairs': indata[param]=_read_matrix(line) else: if (line.find("'")==-1): indata[param]=eval(line.split('=')[1]) else: indata[param]=line.split('=')[1].strip().split("'")[1] mfile.close() fun=eval(name_fun) # seed random to constant value used at data file's creation Math_init_random(indata['init_random']) random.seed(indata['init_random']) return fun(indata) def _read_matrix (line): try: str_line=(line.split('[')[1]).split(']')[0] except IndexError: str_line=(line.split('{')[1]).split('}')[0] lines=str_line.split(';') lis2d=list() for x in lines: lis=list()
for i in xrange(N): try: assert (abs(inner[i] - tmp_out[i]) <= 0.001) assert (abs(inner[i] - tmp_out2[i]) <= 0.001) except Exception, message: print "difference in outputs: (%.4f, %.4f, %.4f)" % (tmp_out[i], tmp_out2[i]) ############### # compare to LibSVM dasvm_manual_libsvm = LibSVM(1.0, wdk, lab) dasvm_manual_libsvm.set_linear_term(linterm_manual) dasvm_manual_libsvm.set_bias_enabled(False) Math_init_random(1) dasvm_manual_libsvm.train() ############### # compare to LibLinear dasvm_manual_liblinear = LibLinear(1.0, feat, lab) dasvm_manual_liblinear.set_linear_term(linterm_manual) dasvm_manual_liblinear.set_bias_enabled(False) dasvm_manual_liblinear.train() ############################################# # compute DA-SVMs in shogun (kernelized AND linear) ############################################# dasvm_libsvm = DomainAdaptationSVM(1.0, wdk, lab, presvm_libsvm, B)
#!/usr/bin/env python from tools.load import LoadMatrix lm = LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat, testdat, 1.5, 10], [traindat, testdat, 1.5, 10]] from shogun.Library import Math_init_random Math_init_random(12345) def preprocessor_randomfouriergausspreproc_modular(fm_train_real=traindat, fm_test_real=testdat, width=1.4, size_cache=10): from shogun.Kernel import Chi2Kernel from shogun.Features import RealFeatures from shogun.Preprocessor import RandomFourierGaussPreproc feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) preproc = RandomFourierGaussPreproc() preproc.init(feats_train) feats_train.add_preprocessor(preproc) feats_train.apply_preprocessor() feats_test.add_preprocessor(preproc) feats_test.apply_preprocessor()
from numpy import array, append from shogun.Distribution import GMM from shogun.Library import Math_init_random Math_init_random(5) real_gmm = GMM(2, 0) real_gmm.set_nth_mean(array([1.0, 1.0]), 0) real_gmm.set_nth_mean(array([-1.0, -1.0]), 1) real_gmm.set_nth_cov(array([[1.0, 0.2], [0.2, 0.1]]), 0) real_gmm.set_nth_cov(array([[0.3, 0.1], [0.1, 1.0]]), 1) real_gmm.set_coef(array([0.3, 0.7])) generated = array([real_gmm.sample()]) for i in range(199): generated = append(generated, array([real_gmm.sample()]), axis=0) generated = generated.transpose() parameter_list = [[generated, 2, 1e-9, 1000, 1e-9, 0]] def clustering_gmm_modular(fm_train=generated, n=2, min_cov=1e-9, max_iter=1000, min_change=1e-9, cov_type=0):