def _run_perceptron (): """Run Perceptron classifier.""" params={ 'name': 'Perceptron', 'type': 'perceptron', 'num_threads': 1, 'learn_rate': .1, 'max_iter': 1000, 'data': dataop.get_clouds(2), 'feature_class': 'simple', 'feature_type': 'Real', 'label_type': 'twoclass', 'accuracy': 1e-7 } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) num_vec=feats['train'].get_num_vectors() params['labels'], labels=dataop.get_labels(num_vec, params['label_type']) perceptron=classifier.Perceptron(feats['train'], labels) perceptron.parallel.set_num_threads(params['num_threads']) perceptron.set_learn_rate(params['learn_rate']) perceptron.set_max_iter(params['max_iter']) perceptron.train() params['bias']=perceptron.get_bias() perceptron.set_features(feats['test']) params['classified']=perceptron.classify().get_labels() output=fileop.get_output(category.CLASSIFIER, params) fileop.write(category.CLASSIFIER, output)
def _compute_pie(feats, params): """Compute a kernel with PluginEstimate. @param feats kernel features @param params dict containing various kernel parameters """ output = fileop.get_output(category.KERNEL, params) lab, labels = dataop.get_labels(feats["train"].get_num_vectors()) output["classifier_labels"] = lab pie = PluginEstimate() pie.set_labels(labels) pie.set_features(feats["train"]) pie.train() kfun = eval("kernel." + params["name"] + "Kernel") kern = kfun(feats["train"], feats["train"], pie) output["kernel_matrix_train"] = kern.get_kernel_matrix() kern.init(feats["train"], feats["test"]) pie.set_features(feats["test"]) output["kernel_matrix_test"] = kern.get_kernel_matrix() classified = pie.apply().get_labels() output["classifier_classified"] = classified fileop.write(category.KERNEL, output)
def _run_custom(): """Run Custom kernel.""" params = {"name": "Custom", "accuracy": 1e-7, "feature_class": "simple", "feature_type": "Real"} dim_square = 7 data = dataop.get_rand(dim_square=dim_square) feats = featop.get_features(params["feature_class"], params["feature_type"], data) data = data["train"] symdata = data + data.T lowertriangle = numpy.array( [symdata[(x, y)] for x in xrange(symdata.shape[1]) for y in xrange(symdata.shape[0]) if y <= x] ) kern = kernel.CustomKernel() # kern.init(feats['train'], feats['train'] kern.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle = kern.get_kernel_matrix() kern.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle = kern.get_kernel_matrix() kern.set_full_kernel_matrix_from_full(data) km_fullfull = kern.get_kernel_matrix() output = { "kernel_matrix_triangletriangle": km_triangletriangle, "kernel_matrix_fulltriangle": km_fulltriangle, "kernel_matrix_fullfull": km_fullfull, "kernel_symdata": numpy.matrix(symdata), "kernel_data": numpy.matrix(data), "kernel_dim_square": dim_square, } output.update(fileop.get_output(category.KERNEL, params)) fileop.write(category.KERNEL, output)
def _compute_pie(feats, params): """Compute a kernel with PluginEstimate. @param feats kernel features @param params dict containing various kernel parameters """ output = fileop.get_output(category.KERNEL, params) lab, labels = dataop.get_labels(feats['train'].get_num_vectors()) output['classifier_labels'] = lab pie = PluginEstimate() pie.set_labels(labels) pie.set_features(feats['train']) pie.train() kfun = eval('kernel.' + params['name'] + 'Kernel') kern = kfun(feats['train'], feats['train'], pie) output['kernel_matrix_train'] = kern.get_kernel_matrix() kern.init(feats['train'], feats['test']) pie.set_features(feats['test']) output['kernel_matrix_test'] = kern.get_kernel_matrix() classified = pie.classify().get_labels() output['classifier_classified'] = classified fileop.write(category.KERNEL, output)
def _compute(feats, params, pout=None): """ Compute a kernel and write gathered data to file. @param name name of the kernel @param feats features of the kernel @param params dict with parameters to kernel @param pout previously gathered data ready to be written to file """ output = fileop.get_output(category.KERNEL, params) if pout: output.update(pout) kfun = eval("kernel." + params["name"] + "Kernel") if params.has_key("args"): kern = kfun(*params["args"]["val"]) else: kern = kfun() if params.has_key("normalizer"): kern.set_normalizer(params["normalizer"]) kern.init(feats["train"], feats["train"]) output["kernel_matrix_train"] = kern.get_kernel_matrix() kern.init(feats["train"], feats["test"]) output["kernel_matrix_test"] = kern.get_kernel_matrix() fileop.write(category.KERNEL, output)
def _run_real (name, args=None): """Run preprocessor applied on RealFeatures. @param name name of the preprocessor @param args argument list (in a dict) for the preprocessor """ params={ 'name': 'Gaussian', 'accuracy': 1e-8, 'data': dataop.get_rand(), 'feature_class': 'simple', 'feature_type': 'Real', 'args': {'key': ('width',), 'val': (1.2,)} } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) if args: feats=featop.add_preproc(name, feats, *args['val']) else: feats=featop.add_preproc(name, feats) output=_compute(feats, params) params={ 'name': name } if args: params['args']=args output.update(fileop.get_output(category.PREPROC, params)) fileop.write(category.PREPROC, output)
def _run (name): """Run generator for a specific distribution method. @param name Name of the distribtuion method """ # put some constantness into randomness Math_init_random(INIT_RANDOM) params={ 'name': name, 'accuracy': 1e-7, 'data':dataop.get_dna(), 'alphabet': 'DNA', 'feature_class': 'string_complex', 'feature_type': 'Word' } output=fileop.get_output(category.DISTRIBUTION, params) feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) dfun=eval('distribution.'+name) dist=dfun(feats['train']) dist.train() output[PREFIX+'likelihood']=dist.get_log_likelihood_sample() output[PREFIX+'derivatives']=_get_derivatives( dist, feats['train'].get_num_vectors()) fileop.write(category.DISTRIBUTION, output)
def _compute(feats, params, pout=None): """ Compute a kernel and write gathered data to file. @param name name of the kernel @param feats features of the kernel @param params dict with parameters to kernel @param pout previously gathered data ready to be written to file """ output = fileop.get_output(category.KERNEL, params) if pout: output.update(pout) kfun = eval('kernel.' + params['name'] + 'Kernel') if params.has_key('args'): kern = kfun(*params['args']['val']) else: kern = kfun() if params.has_key('normalizer'): kern.set_normalizer(params['normalizer']) kern.init(feats['train'], feats['train']) output['kernel_matrix_train'] = kern.get_kernel_matrix() kern.init(feats['train'], feats['test']) output['kernel_matrix_test'] = kern.get_kernel_matrix() fileop.write(category.KERNEL, output)
def _compute_top_fisher(feats, pout): """Compute PolyKernel with TOP or FKFeatures @param feats features of the kernel @param pout previously gathered data ready to be written to file """ params = { 'name': 'Poly', 'accuracy': 1e-6, 'args': { 'key': ('size', 'degree', 'inhomogene'), 'val': (10, 1, False) } } output = fileop.get_output(category.KERNEL, params) output.update(pout) kfun = eval('kernel.' + params['name'] + 'Kernel') kern = kfun(feats['train'], feats['train'], *params['args']['val']) output['kernel_matrix_train'] = kern.get_kernel_matrix() kern.init(feats['train'], feats['test']) output['kernel_matrix_test'] = kern.get_kernel_matrix() fileop.write(category.KERNEL, output)
def _compute_top_fisher (feats, pout): """Compute PolyKernel with TOP or FKFeatures @param feats features of the kernel @param pout previously gathered data ready to be written to file """ params={ 'name': 'Poly', 'accuracy': 1e-6, 'args': { 'key': ('size', 'degree', 'inhomogene'), 'val': (10, 1, False) } } output=fileop.get_output(category.KERNEL, params) output.update(pout) kfun=eval('kernel.'+params['name']+'Kernel') kern=kfun(feats['train'], feats['train'], *params['args']['val']) output['kernel_matrix_train']=kern.get_kernel_matrix() kern.init(feats['train'], feats['test']) output['kernel_matrix_test']=kern.get_kernel_matrix() fileop.write(category.KERNEL, output)
def _compute (feats, params, pout=None): """ Compute a kernel and write gathered data to file. @param name name of the kernel @param feats features of the kernel @param params dict with parameters to kernel @param pout previously gathered data ready to be written to file """ output=fileop.get_output(category.KERNEL, params) if pout: output.update(pout) kfun=eval('kernel.'+params['name']+'Kernel') if params.has_key('args'): kern=kfun(*params['args']['val']) else: kern=kfun() if params.has_key('normalizer'): kern.set_normalizer(params['normalizer']) kern.init(feats['train'], feats['train']) output['kernel_matrix_train']=kern.get_kernel_matrix() kern.init(feats['train'], feats['test']) output['kernel_matrix_test']=kern.get_kernel_matrix() fileop.write(category.KERNEL, output)
def _compute_pie (feats, params): """Compute a kernel with PluginEstimate. @param feats kernel features @param params dict containing various kernel parameters """ output=fileop.get_output(category.KERNEL, params) lab, labels=dataop.get_labels(feats['train'].get_num_vectors()) output['classifier_labels']=lab pie=PluginEstimate() pie.set_labels(labels) pie.set_features(feats['train']) pie.train() kfun=eval('kernel.'+params['name']+'Kernel') kern=kfun(feats['train'], feats['train'], pie) output['kernel_matrix_train']=kern.get_kernel_matrix() kern.init(feats['train'], feats['test']) pie.set_features(feats['test']) output['kernel_matrix_test']=kern.get_kernel_matrix() classified=pie.classify().get_labels() output['classifier_classified']=classified fileop.write(category.KERNEL, output)
def _run(name): """Run generator for a specific distribution method. @param name Name of the distribtuion method """ # put some constantness into randomness Math_init_random(INIT_RANDOM) params = { 'name': name, 'accuracy': 1e-7, 'data': dataop.get_dna(), 'alphabet': 'DNA', 'feature_class': 'string_complex', 'feature_type': 'Word' } output = fileop.get_output(category.DISTRIBUTION, params) feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) dfun = eval('distribution.' + name) dist = dfun(feats['train']) dist.train() output[PREFIX + 'likelihood'] = dist.get_log_likelihood_sample() output[PREFIX + 'derivatives'] = _get_derivatives( dist, feats['train'].get_num_vectors()) fileop.write(category.DISTRIBUTION, output)
def _run_real(name, args=None): """Run preprocessor applied on RealFeatures. @param name name of the preprocessor @param args argument list (in a dict) for the preprocessor """ params = { "name": "Gaussian", "accuracy": 1e-8, "data": dataop.get_rand(), "feature_class": "simple", "feature_type": "Real", "args": {"key": ("width",), "val": (1.2,)}, } feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) if args: feats = featop.add_preproc(name, feats, *args["val"]) else: feats = featop.add_preproc(name, feats) output = _compute(feats, params) params = {"name": name} if args: params["args"] = args output.update(fileop.get_output(category.PREPROC, params)) fileop.write(category.PREPROC, output)
def _run_lda (): """Run Linear Discriminant Analysis classifier.""" params={ 'name': 'LDA', 'type': 'lda', 'gamma': 0.1, 'num_threads': 1, 'data': dataop.get_clouds(2), 'feature_class': 'simple', 'feature_type': 'Real', 'label_type': 'twoclass', 'accuracy': 1e-7 } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) params['labels'], labels=dataop.get_labels( feats['train'].get_num_vectors(), params['label_type']) lda=classifier.LDA(params['gamma'], feats['train'], labels) lda.parallel.set_num_threads(params['num_threads']) lda.train() lda.set_features(feats['test']) params['classified']=lda.classify().get_labels() output=fileop.get_output(category.CLASSIFIER, params) fileop.write(category.CLASSIFIER, output)
def _run_combined (): """Run Combined kernel.""" kern=kernel.CombinedKernel() feats={'train': CombinedFeatures(), 'test': CombinedFeatures()} output={} params={ 'name': 'Combined', 'accuracy': 1e-7 } subkdata=[ { 'name': 'FixedDegreeString', 'feature_class': 'string', 'feature_type': 'Char', 'args': {'key': ('size', 'degree'), 'val': (10, 3)} }, { 'name': 'PolyMatchString', 'feature_class': 'string', 'feature_type': 'Char', 'args': { 'key': ('size', 'degree', 'inhomogene'), 'val': (10, 3, True) } }, { 'name': 'LocalAlignmentString', 'feature_class': 'string', 'feature_type': 'Char', 'args': {'key': ('size',), 'val': (10,)} } ] i=0 for sd in subkdata: kfun=eval('kernel.'+sd['name']+'Kernel') subk=kfun(*sd['args']['val']) sd['data']=dataop.get_dna() subkfeats=featop.get_features( sd['feature_class'], sd['feature_type'], sd['data']) output.update( fileop.get_output(category.KERNEL, sd, 'subkernel'+str(i)+'_')) kern.append_kernel(subk) feats['train'].append_feature_obj(subkfeats['train']) feats['test'].append_feature_obj(subkfeats['test']) i+=1 output.update(fileop.get_output(category.KERNEL, params)) kern.init(feats['train'], feats['train']) output['kernel_matrix_train']=kern.get_kernel_matrix() kern.init(feats['train'], feats['test']) output['kernel_matrix_test']=kern.get_kernel_matrix() fileop.write(category.KERNEL, output)
def _compute_svm (params, labels, feats, kernel, pout): """Perform computations on SVM. Perform all necessary computations on SVM and gather the output. @param params misc parameters for the SVM's constructor @param labels labels to be used for the SVM (if at all) @param feats features to the SVM @param kernel kernel for kernel-SVMs @param pout previously gathered output data ready to be written to file """ svm=_get_svm(params, labels, feats, kernel) if not svm: return svm.parallel.set_num_threads(params['num_threads']) try: svm.set_epsilon(params['epsilon']) except AttributeError: #SGD does not have an accuracy parameter pass if params.has_key('bias_enabled'): svm.set_bias_enabled(params['bias_enabled']) if params.has_key('max_train_time'): svm.set_max_train_time(params['max_train_time']) params['max_train_time']=params['max_train_time'] if params.has_key('linadd_enabled'): svm.set_linadd_enabled(params['linadd_enabled']) if params.has_key('batch_enabled'): svm.set_batch_computation_enabled(params['batch_enabled']) svm.train() if ((params.has_key('bias_enabled') and params['bias_enabled']) or params['type']=='kernel'): params['bias']=svm.get_bias() if params['type']=='kernel': params['alpha_sum'], params['sv_sum']= \ _get_svm_sum_alpha_and_sv(svm, params['label_type']) kernel.init(feats['train'], feats['test']) elif params['type']=='linear' or params['type']=='wdsvmocas': svm.set_features(feats['test']) params['classified']=svm.classify().get_labels() output=fileop.get_output(category.CLASSIFIER, params) if pout: output.update(pout) fileop.write(category.CLASSIFIER, output)
def _run_combined(): """Run Combined kernel.""" kern = kernel.CombinedKernel() feats = {"train": CombinedFeatures(), "test": CombinedFeatures()} output = {} params = {"name": "Combined", "accuracy": 1e-7} subkdata = [ { "name": "FixedDegreeString", "feature_class": "string", "feature_type": "Char", "args": {"key": ("size", "degree"), "val": (10, 3)}, }, { "name": "PolyMatchString", "feature_class": "string", "feature_type": "Char", "args": {"key": ("size", "degree", "inhomogene"), "val": (10, 3, True)}, }, { "name": "LocalAlignmentString", "feature_class": "string", "feature_type": "Char", "args": {"key": ("size",), "val": (10,)}, }, ] i = 0 for sd in subkdata: kfun = eval("kernel." + sd["name"] + "Kernel") subk = kfun(*sd["args"]["val"]) sd["data"] = dataop.get_dna() subkfeats = featop.get_features(sd["feature_class"], sd["feature_type"], sd["data"]) output.update(fileop.get_output(category.KERNEL, sd, "subkernel" + str(i) + "_")) kern.append_kernel(subk) feats["train"].append_feature_obj(subkfeats["train"]) feats["test"].append_feature_obj(subkfeats["test"]) i += 1 output.update(fileop.get_output(category.KERNEL, params)) kern.init(feats["train"], feats["train"]) output["kernel_matrix_train"] = kern.get_kernel_matrix() kern.init(feats["train"], feats["test"]) output["kernel_matrix_test"] = kern.get_kernel_matrix() fileop.write(category.KERNEL, output)
def _compute(params, feats, kernel, pout): """ Compute a regression and gather result data. @param params misc parameters for the regression method @param feats features of the kernel/regression @param kernel kernel @param pout previously gathered data from kernel ready to be written to file """ kernel.parallel.set_num_threads(params['num_threads']) kernel.init(feats['train'], feats['train']) params['labels'], labels = dataop.get_labels( feats['train'].get_num_vectors()) try: fun = eval('regression.' + params['name']) except AttributeError: return if params['type'] == 'svm': regression = fun(params['C'], params['epsilon'], kernel, labels) regression.set_tube_epsilon(params['tube_epsilon']) else: regression = fun(params['tau'], kernel, labels) regression.parallel.set_num_threads(params['num_threads']) regression.train() if params['type'] == 'svm': params['bias'] = regression.get_bias() params['alpha_sum'] = 0 for item in regression.get_alphas().tolist(): params['alpha_sum'] += item params['sv_sum'] = 0 for item in regression.get_support_vectors(): params['sv_sum'] += item kernel.init(feats['train'], feats['test']) params['classified'] = regression.classify().get_labels() output = pout.copy() output.update(fileop.get_output(category.REGRESSION, params)) fileop.write(category.REGRESSION, output)
def _run_hmm(): """Run generator for Hidden-Markov-Model.""" # put some constantness into randomness Math_init_random(INIT_RANDOM) num_examples = 4 params = { 'name': 'HMM', 'accuracy': 1e-6, 'N': 3, 'M': 6, 'num_examples': num_examples, 'pseudo': 1e-10, 'order': 1, 'alphabet': 'CUBE', 'feature_class': 'string_complex', 'feature_type': 'Word', 'data': dataop.get_cubes(num_examples, 1) } output = fileop.get_output(category.DISTRIBUTION, params) feats = featop.get_features(params['feature_class'], params['feature_type'], params['data'], eval('features.' + params['alphabet']), params['order']) hmm = distribution.HMM(feats['train'], params['N'], params['M'], params['pseudo']) hmm.train() hmm.baum_welch_viterbi_train(distribution.BW_NORMAL) output[PREFIX + 'likelihood'] = hmm.get_log_likelihood_sample() output[PREFIX + 'derivatives'] = _get_derivatives( hmm, feats['train'].get_num_vectors()) output[PREFIX + 'best_path'] = 0 output[PREFIX + 'best_path_state'] = 0 for i in xrange(num_examples): output[PREFIX + 'best_path'] += hmm.best_path(i) for j in xrange(params['N']): output[PREFIX + 'best_path_state'] += hmm.get_best_path_state(i, j) fileop.write(category.DISTRIBUTION, output)
def _compute (params, feats, kernel, pout): """ Compute a regression and gather result data. @param params misc parameters for the regression method @param feats features of the kernel/regression @param kernel kernel @param pout previously gathered data from kernel ready to be written to file """ kernel.parallel.set_num_threads(params['num_threads']) kernel.init(feats['train'], feats['train']) params['labels'], labels=dataop.get_labels(feats['train'].get_num_vectors()) try: fun=eval('regression.'+params['name']) except AttributeError: return if params['type']=='svm': regression=fun(params['C'], params['epsilon'], kernel, labels) regression.set_tube_epsilon(params['tube_epsilon']) else: regression=fun(params['tau'], kernel, labels) regression.parallel.set_num_threads(params['num_threads']) regression.train() if params['type']=='svm': params['bias']=regression.get_bias() params['alpha_sum']=0 for item in regression.get_alphas().tolist(): params['alpha_sum']+=item params['sv_sum']=0 for item in regression.get_support_vectors(): params['sv_sum']+=item kernel.init(feats['train'], feats['test']) params['classified']=regression.apply().get_labels() output=pout.copy() output.update(fileop.get_output(category.REGRESSION, params)) fileop.write(category.REGRESSION, output)
def _run (name, first_arg): """ Run generator for a specific clustering method. @param name Name of the clustering method to run. @param first_arg First argument to the clustering's constructor; so far, only this distinguishes the instantion of the different methods. """ # put some constantness into randomness Math_init_random(dataop.INIT_RANDOM) num_clouds=3 params={ 'name': 'EuclidianDistance', 'data': dataop.get_clouds(num_clouds, 5), 'feature_class': 'simple', 'feature_type': 'Real' } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) dfun=eval(params['name']) distance=dfun(feats['train'], feats['train']) output=fileop.get_output(category.DISTANCE, params) params={ 'name': name, 'accuracy': 1e-8, first_arg: num_clouds } fun=eval('clustering.'+name) clustering=fun(params[first_arg], distance) clustering.train() distance.init(feats['train'], feats['test']) if name=='KMeans': params['radi']=clustering.get_radiuses() params['centers']=clustering.get_cluster_centers() elif name=='Hierarchical': params['merge_distance']=clustering.get_merge_distances() params['pairs']=clustering.get_cluster_pairs() output.update(fileop.get_output(category.CLUSTERING, params)) fileop.write(category.CLUSTERING, output)
def _run_hmm (): """Run generator for Hidden-Markov-Model.""" # put some constantness into randomness Math_init_random(INIT_RANDOM) num_examples=4 params={ 'name': 'HMM', 'accuracy': 1e-6, 'N': 3, 'M': 6, 'num_examples': num_examples, 'pseudo': 1e-10, 'order': 1, 'alphabet': 'CUBE', 'feature_class': 'string_complex', 'feature_type': 'Word', 'data': dataop.get_cubes(num_examples, 1) } output=fileop.get_output(category.DISTRIBUTION, params) feats=featop.get_features( params['feature_class'], params['feature_type'], params['data'], eval('features.'+params['alphabet']), params['order']) hmm=distribution.HMM( feats['train'], params['N'], params['M'], params['pseudo']) hmm.train() hmm.baum_welch_viterbi_train(distribution.BW_NORMAL) output[PREFIX+'likelihood']=hmm.get_log_likelihood_sample() output[PREFIX+'derivatives']=_get_derivatives( hmm, feats['train'].get_num_vectors()) output[PREFIX+'best_path']=0 output[PREFIX+'best_path_state']=0 for i in xrange(num_examples): output[PREFIX+'best_path']+=hmm.best_path(i) for j in xrange(params['N']): output[PREFIX+'best_path_state']+=hmm.get_best_path_state(i, j) fileop.write(category.DISTRIBUTION, output)
def _run_string_complex(ftype): """Run preprocessor applied on complex StringFeatures. @param ftype Feature type, like Word """ params = { "name": "Comm" + ftype + "String", "accuracy": 1e-9, "feature_class": "string_complex", "feature_type": ftype, "data": dataop.get_dna(), } feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) # string_complex gets preproc added implicitely on Word/Ulong feats output = _compute(feats, params) params = {"name": "Sort" + ftype + "String"} output.update(fileop.get_output(category.PREPROC, params)) fileop.write(category.PREPROC, output)
def _run_custom(): """Run Custom kernel.""" params = { 'name': 'Custom', 'accuracy': 1e-7, 'feature_class': 'simple', 'feature_type': 'Real' } dim_square = 7 data = dataop.get_rand(dim_square=dim_square) feats = featop.get_features(params['feature_class'], params['feature_type'], data) data = data['train'] symdata = data + data.T lowertriangle = numpy.array([ symdata[(x, y)] for x in xrange(symdata.shape[1]) for y in xrange(symdata.shape[0]) if y <= x ]) kern = kernel.CustomKernel() #kern.init(feats['train'], feats['train'] kern.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle = kern.get_kernel_matrix() kern.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle = kern.get_kernel_matrix() kern.set_full_kernel_matrix_from_full(data) km_fullfull = kern.get_kernel_matrix() output = { 'kernel_matrix_triangletriangle': km_triangletriangle, 'kernel_matrix_fulltriangle': km_fulltriangle, 'kernel_matrix_fullfull': km_fullfull, 'kernel_symdata': numpy.matrix(symdata), 'kernel_data': numpy.matrix(data), 'kernel_dim_square': dim_square } output.update(fileop.get_output(category.KERNEL, params)) fileop.write(category.KERNEL, output)
def _run_string_complex(ftype): """Run preprocessor applied on complex StringFeatures. @param ftype Feature type, like Word """ params = { 'name': 'Comm' + ftype + 'String', 'accuracy': 1e-9, 'feature_class': 'string_complex', 'feature_type': ftype, 'data': dataop.get_dna() } feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) # string_complex gets preproc added implicitely on Word/Ulong feats output = _compute(feats, params) params = {'name': 'Sort' + ftype + 'String'} output.update(fileop.get_output(category.PREPROC, params)) fileop.write(category.PREPROC, output)
def _compute_top_fisher(feats, pout): """Compute PolyKernel with TOP or FKFeatures @param feats features of the kernel @param pout previously gathered data ready to be written to file """ params = { "name": "Poly", "accuracy": 1e-6, "args": {"key": ("size", "degree", "inhomogene"), "val": (10, 1, False)}, } output = fileop.get_output(category.KERNEL, params) output.update(pout) kfun = eval("kernel." + params["name"] + "Kernel") kern = kfun(feats["train"], feats["train"], *params["args"]["val"]) output["kernel_matrix_train"] = kern.get_kernel_matrix() kern.init(feats["train"], feats["test"]) output["kernel_matrix_test"] = kern.get_kernel_matrix() fileop.write(category.KERNEL, output)
def _run(name, first_arg): """ Run generator for a specific clustering method. @param name Name of the clustering method to run. @param first_arg First argument to the clustering's constructor; so far, only this distinguishes the instantion of the different methods. """ # put some constantness into randomness Math_init_random(dataop.INIT_RANDOM) num_clouds = 3 params = { 'name': 'EuclidianDistance', 'data': dataop.get_clouds(num_clouds, 5), 'feature_class': 'simple', 'feature_type': 'Real' } feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) dfun = eval(params['name']) distance = dfun(feats['train'], feats['train']) output = fileop.get_output(category.DISTANCE, params) params = {'name': name, 'accuracy': 1e-8, first_arg: num_clouds} fun = eval('clustering.' + name) clustering = fun(params[first_arg], distance) clustering.train() distance.init(feats['train'], feats['test']) if name == 'KMeans': params['radi'] = clustering.get_radiuses() params['centers'] = clustering.get_cluster_centers() elif name == 'Hierarchical': params['merge_distance'] = clustering.get_merge_distances() params['pairs'] = clustering.get_cluster_pairs() output.update(fileop.get_output(category.CLUSTERING, params)) fileop.write(category.CLUSTERING, output)
def _run_custom (): """Run Custom kernel.""" params={ 'name': 'Custom', 'accuracy': 1e-7, 'feature_class': 'simple', 'feature_type': 'Real' } dim_square=7 data=dataop.get_rand(dim_square=dim_square) feats=featop.get_features( params['feature_class'], params['feature_type'], data) data=data['train'] symdata=data+data.T lowertriangle=numpy.array([symdata[(x,y)] for x in xrange(symdata.shape[1]) for y in xrange(symdata.shape[0]) if y<=x]) kern=kernel.CustomKernel() #kern.init(feats['train'], feats['train'] kern.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle=kern.get_kernel_matrix() kern.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle=kern.get_kernel_matrix() kern.set_full_kernel_matrix_from_full(data) km_fullfull=kern.get_kernel_matrix() output={ 'kernel_matrix_triangletriangle': km_triangletriangle, 'kernel_matrix_fulltriangle': km_fulltriangle, 'kernel_matrix_fullfull': km_fullfull, 'kernel_symdata': numpy.matrix(symdata), 'kernel_data': numpy.matrix(data), 'kernel_dim_square': dim_square } output.update(fileop.get_output(category.KERNEL, params)) fileop.write(category.KERNEL, output)
def _compute(feats, params): """Compute a distance and gather result data. @param feats Train and test features @param params dict with parameters to distance """ fun = eval('distance.' + params['name']) if params.has_key('args'): dist = fun(feats['train'], feats['train'], *params['args']['val']) else: dist = fun(feats['train'], feats['train']) dm_train = dist.get_distance_matrix() dist.init(feats['train'], feats['test']) dm_test = dist.get_distance_matrix() output = { 'distance_matrix_train': dm_train, 'distance_matrix_test': dm_test, } output.update(fileop.get_output(category.DISTANCE, params)) fileop.write(category.DISTANCE, output)
def _compute (feats, params): """Compute a distance and gather result data. @param feats Train and test features @param params dict with parameters to distance """ fun=eval('distance.'+params['name']) if params.has_key('args'): dist=fun(feats['train'], feats['train'], *params['args']['val']) else: dist=fun(feats['train'], feats['train']) dm_train=dist.get_distance_matrix() dist.init(feats['train'], feats['test']) dm_test=dist.get_distance_matrix() output={ 'distance_matrix_train':dm_train, 'distance_matrix_test':dm_test, } output.update(fileop.get_output(category.DISTANCE, params)) fileop.write(category.DISTANCE, output)
def _run_string_complex (ftype): """Run preprocessor applied on complex StringFeatures. @param ftype Feature type, like Word """ params={ 'name': 'Comm'+ftype+'String', 'accuracy': 1e-9, 'feature_class': 'string_complex', 'feature_type': ftype, 'data': dataop.get_dna() } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) # string_complex gets preproc added implicitely on Word/Ulong feats output=_compute(feats, params) params={ 'name': 'Sort'+ftype+'String' } output.update(fileop.get_output(category.PREPROC, params)) fileop.write(category.PREPROC, output)
def _run_knn (): """Run K-Nearest-Neighbour classifier. """ params={ 'name': 'EuclidianDistance', 'data': dataop.get_clouds(2), 'feature_class': 'simple', 'feature_type': 'Real' } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) dfun=eval(params['name']) distance=dfun(feats['train'], feats['train']) output=fileop.get_output(category.DISTANCE, params) params={ 'name': 'KNN', 'type': 'knn', 'num_threads': 1, 'k': 3, 'label_type': 'twoclass', 'accuracy': 1e-8 } params['labels'], labels=dataop.get_labels( feats['train'].get_num_vectors(), params['label_type']) knn=classifier.KNN(params['k'], distance, labels) knn.parallel.set_num_threads(params['num_threads']) knn.train() distance.init(feats['train'], feats['test']) params['classified']=knn.classify().get_labels() output.update(fileop.get_output(category.CLASSIFIER, params)) fileop.write(category.CLASSIFIER, output)