def _run_auc (): """Run AUC kernel.""" # handle subkernel params={ 'name': 'Gaussian', 'data': dataop.get_rand(), 'feature_class': 'simple', 'feature_type': 'Real', 'args': {'key': ('size', 'width'), 'val': (10, 1.7)} } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) subk=kernel.GaussianKernel(*params['args']['val']) subk.init(feats['train'], feats['test']) output=fileop.get_output(category.KERNEL, params, 'subkernel0_') # handle AUC params={ 'name': 'AUC', 'data': dataop.get_rand(numpy.ushort, num_feats=2, max_train=dataop.NUM_VEC_TRAIN, max_test=dataop.NUM_VEC_TEST), 'feature_class': 'simple', 'feature_type': 'Word', 'accuracy': 1e-8, 'args': {'key': ('size', 'subkernel'), 'val': (10, subk)} } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) _compute(feats, params, output)
def _run_feats_string_complex(): """Run kernel with complex StringFeatures.""" params = {"data": dataop.get_dna(), "feature_class": "string_complex"} params["feature_type"] = "Word" wordfeats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) params["name"] = "CommWordString" params["accuracy"] = 1e-9 params["args"] = {"key": ("size", "use_sign"), "val": (10, False)} _compute(wordfeats, params) params["name"] = "WeightedCommWordString" _compute(wordfeats, params) params["name"] = "PolyMatchWordString" params["accuracy"] = 1e-10 params["args"] = {"key": ("size", "degree", "inhomogene"), "val": (10, 3, True)} _compute(wordfeats, params) params["args"]["val"] = (10, 3, False) _compute(wordfeats, params) params["name"] = "MatchWordString" params["args"] = {"key": ("size", "degree"), "val": (10, 3)} _compute(wordfeats, params) params["feature_type"] = "Ulong" params["accuracy"] = 1e-9 ulongfeats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) params["name"] = "CommUlongString" params["args"] = {"key": ("size", "use_sign"), "val": (10, False)} _compute(ulongfeats, params)
def _run_auc(): """Run AUC kernel.""" # handle subkernel params = { "name": "Gaussian", "data": dataop.get_rand(), "feature_class": "simple", "feature_type": "Real", "args": {"key": ("size", "width"), "val": (10, 1.7)}, } feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) subk = kernel.GaussianKernel(*params["args"]["val"]) subk.init(feats["train"], feats["test"]) output = fileop.get_output(category.KERNEL, params, "subkernel0_") # handle AUC params = { "name": "AUC", "data": dataop.get_rand( numpy.ushort, num_feats=2, max_train=dataop.NUM_VEC_TRAIN, max_test=dataop.NUM_VEC_TEST ), "feature_class": "simple", "feature_type": "Word", "accuracy": 1e-8, "args": {"key": ("size", "subkernel"), "val": (10, subk)}, } feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) _compute(feats, params, output)
def _run_feats_real(): """Run kernel with RealFeatures.""" params = {"data": dataop.get_rand(), "accuracy": 1e-8, "feature_class": "simple", "feature_type": "Real"} feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) sparsefeats = featop.get_features(params["feature_class"], params["feature_type"], params["data"], sparse=True) params["name"] = "Gaussian" params["args"] = {"key": ("size", "width"), "val": (10, 1.3)} _compute(feats, params) params["name"] = "GaussianShift" params["args"] = {"key": ("size", "width", "max_shift", "shift_step"), "val": (10, 1.3, 2, 1)} _compute(feats, params) params["name"] = "Gaussian" params["args"] = {"key": ("size", "width"), "val": (10, 1.7)} _compute(sparsefeats, params) params["accuracy"] = 0 params["name"] = "Const" params["args"] = {"key": ("c",), "val": (23.0,)} _compute(feats, params) params["name"] = "Diag" params["args"] = {"key": ("size", "diag"), "val": (10, 23.0)} _compute(feats, params) params["accuracy"] = 1e-9 params["name"] = "Sigmoid" params["args"] = {"key": ("size", "gamma", "coef0"), "val": (10, 1.1, 1.3)} _compute(feats, params) params["args"]["val"] = (10, 0.5, 0.7) _compute(feats, params) params["name"] = "Chi2" params["args"] = {"key": ("size", "width"), "val": (10, 1.2)} _compute(feats, params) params["accuracy"] = 1e-8 params["name"] = "Poly" params["args"] = {"key": ("size", "degree", "inhomogene"), "val": (10, 3, True)} _compute(sparsefeats, params) params["args"]["val"] = (10, 3, False) _compute(sparsefeats, params) params["name"] = "Poly" params["normalizer"] = kernel.SqrtDiagKernelNormalizer() params["args"] = {"key": ("size", "degree", "inhomogene"), "val": (10, 3, True)} _compute(feats, params) params["args"]["val"] = (10, 3, False) _compute(feats, params) params["normalizer"] = kernel.AvgDiagKernelNormalizer() del params["args"] params["name"] = "Linear" _compute(feats, params) params["name"] = "Linear" _compute(sparsefeats, params)
def _run_real (name, args=None): """Run preprocessor applied on RealFeatures. @param name name of the preprocessor @param args argument list (in a dict) for the preprocessor """ params={ 'name': 'Gaussian', 'accuracy': 1e-8, 'data': dataop.get_rand(), 'feature_class': 'simple', 'feature_type': 'Real', 'args': {'key': ('width',), 'val': (1.2,)} } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) if args: feats=featop.add_preproc(name, feats, *args['val']) else: feats=featop.add_preproc(name, feats) output=_compute(feats, params) params={ 'name': name } if args: params['args']=args output.update(fileop.get_output(category.PREPROC, params)) fileop.write(category.PREPROC, output)
def _run (name): """Run generator for a specific distribution method. @param name Name of the distribtuion method """ # put some constantness into randomness Math_init_random(INIT_RANDOM) params={ 'name': name, 'accuracy': 1e-7, 'data':dataop.get_dna(), 'alphabet': 'DNA', 'feature_class': 'string_complex', 'feature_type': 'Word' } output=fileop.get_output(category.DISTRIBUTION, params) feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) dfun=eval('distribution.'+name) dist=dfun(feats['train']) dist.train() output[PREFIX+'likelihood']=dist.get_log_likelihood_sample() output[PREFIX+'derivatives']=_get_derivatives( dist, feats['train'].get_num_vectors()) fileop.write(category.DISTRIBUTION, output)
def _run_feats_string_complex (): """Run distances with complex StringFeatures, like WordString.""" params={ 'accuracy': 1e-7, 'feature_class': 'string_complex', 'feature_type': 'Word', 'data': dataop.get_dna(num_vec_test=dataop.NUM_VEC_TRAIN+42) } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) params['name']='CanberraWordDistance' _compute(feats, params) params['accuracy']=1e-8 params['name']='ManhattanWordDistance' _compute(feats, params) params['name']='HammingWordDistance' params['args']={'key': ('use_sign',), 'val': (False,)} _compute(feats, params) params['name']='HammingWordDistance' params['args']={'key': ('use_sign',), 'val': (True,)} _compute(feats, params)
def _run_perceptron (): """Run Perceptron classifier.""" params={ 'name': 'Perceptron', 'type': 'perceptron', 'num_threads': 1, 'learn_rate': .1, 'max_iter': 1000, 'data': dataop.get_clouds(2), 'feature_class': 'simple', 'feature_type': 'Real', 'label_type': 'twoclass', 'accuracy': 1e-7 } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) num_vec=feats['train'].get_num_vectors() params['labels'], labels=dataop.get_labels(num_vec, params['label_type']) perceptron=classifier.Perceptron(feats['train'], labels) perceptron.parallel.set_num_threads(params['num_threads']) perceptron.set_learn_rate(params['learn_rate']) perceptron.set_max_iter(params['max_iter']) perceptron.train() params['bias']=perceptron.get_bias() perceptron.set_features(feats['test']) params['classified']=perceptron.classify().get_labels() output=fileop.get_output(category.CLASSIFIER, params) fileop.write(category.CLASSIFIER, output)
def _run_lda (): """Run Linear Discriminant Analysis classifier.""" params={ 'name': 'LDA', 'type': 'lda', 'gamma': 0.1, 'num_threads': 1, 'data': dataop.get_clouds(2), 'feature_class': 'simple', 'feature_type': 'Real', 'label_type': 'twoclass', 'accuracy': 1e-7 } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) params['labels'], labels=dataop.get_labels( feats['train'].get_num_vectors(), params['label_type']) lda=classifier.LDA(params['gamma'], feats['train'], labels) lda.parallel.set_num_threads(params['num_threads']) lda.train() lda.set_features(feats['test']) params['classified']=lda.classify().get_labels() output=fileop.get_output(category.CLASSIFIER, params) fileop.write(category.CLASSIFIER, output)
def _run_real(name, args=None): """Run preprocessor applied on RealFeatures. @param name name of the preprocessor @param args argument list (in a dict) for the preprocessor """ params = { "name": "Gaussian", "accuracy": 1e-8, "data": dataop.get_rand(), "feature_class": "simple", "feature_type": "Real", "args": {"key": ("width",), "val": (1.2,)}, } feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) if args: feats = featop.add_preproc(name, feats, *args["val"]) else: feats = featop.add_preproc(name, feats) output = _compute(feats, params) params = {"name": name} if args: params["args"] = args output.update(fileop.get_output(category.PREPROC, params)) fileop.write(category.PREPROC, output)
def _run_feats_string_complex(): """Run distances with complex StringFeatures, like WordString.""" params = { 'accuracy': 1e-7, 'feature_class': 'string_complex', 'feature_type': 'Word', 'data': dataop.get_dna(num_vec_test=dataop.NUM_VEC_TRAIN + 42) } feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) params['name'] = 'CanberraWordDistance' _compute(feats, params) params['accuracy'] = 1e-8 params['name'] = 'ManhattanWordDistance' _compute(feats, params) params['name'] = 'HammingWordDistance' params['args'] = {'key': ('use_sign', ), 'val': (False, )} _compute(feats, params) params['name'] = 'HammingWordDistance' params['args'] = {'key': ('use_sign', ), 'val': (True, )} _compute(feats, params)
def _run_custom(): """Run Custom kernel.""" params = {"name": "Custom", "accuracy": 1e-7, "feature_class": "simple", "feature_type": "Real"} dim_square = 7 data = dataop.get_rand(dim_square=dim_square) feats = featop.get_features(params["feature_class"], params["feature_type"], data) data = data["train"] symdata = data + data.T lowertriangle = numpy.array( [symdata[(x, y)] for x in xrange(symdata.shape[1]) for y in xrange(symdata.shape[0]) if y <= x] ) kern = kernel.CustomKernel() # kern.init(feats['train'], feats['train'] kern.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle = kern.get_kernel_matrix() kern.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle = kern.get_kernel_matrix() kern.set_full_kernel_matrix_from_full(data) km_fullfull = kern.get_kernel_matrix() output = { "kernel_matrix_triangletriangle": km_triangletriangle, "kernel_matrix_fulltriangle": km_fulltriangle, "kernel_matrix_fullfull": km_fullfull, "kernel_symdata": numpy.matrix(symdata), "kernel_data": numpy.matrix(data), "kernel_dim_square": dim_square, } output.update(fileop.get_output(category.KERNEL, params)) fileop.write(category.KERNEL, output)
def _run(name): """Run generator for a specific distribution method. @param name Name of the distribtuion method """ # put some constantness into randomness Math_init_random(INIT_RANDOM) params = { 'name': name, 'accuracy': 1e-7, 'data': dataop.get_dna(), 'alphabet': 'DNA', 'feature_class': 'string_complex', 'feature_type': 'Word' } output = fileop.get_output(category.DISTRIBUTION, params) feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) dfun = eval('distribution.' + name) dist = dfun(feats['train']) dist.train() output[PREFIX + 'likelihood'] = dist.get_log_likelihood_sample() output[PREFIX + 'derivatives'] = _get_derivatives( dist, feats['train'].get_num_vectors()) fileop.write(category.DISTRIBUTION, output)
def _run_feats_real(): """Run distances with RealFeatures.""" params = { 'accuracy': 1e-8, 'feature_class': 'simple', 'feature_type': 'Real', 'data': dataop.get_rand() } feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) params['name'] = 'EuclidianDistance' _compute(feats, params) params['name'] = 'CanberraMetric' _compute(feats, params) params['name'] = 'ChebyshewMetric' _compute(feats, params) params['name'] = 'GeodesicMetric' _compute(feats, params) params['name'] = 'JensenMetric' _compute(feats, params) params['name'] = 'ManhattanMetric' _compute(feats, params) params['name'] = 'BrayCurtisDistance' _compute(feats, params) params['name'] = 'ChiSquareDistance' _compute(feats, params) params['name'] = 'CosineDistance' _compute(feats, params) params['name'] = 'TanimotoDistance' _compute(feats, params) params['name'] = 'ManhattanMetric' _compute(feats, params) params['name'] = 'MinkowskiMetric' params['args'] = {'key': ('k', ), 'val': (1.3, )} _compute(feats, params) params['name'] = 'SparseEuclidianDistance' params['accuracy'] = 1e-7 del params['args'] feats = featop.get_features(params['feature_class'], params['feature_type'], params['data'], sparse=True) _compute(feats, params)
def _run_feats_real (): """Run distances with RealFeatures.""" params={ 'accuracy': 1e-8, 'feature_class': 'simple', 'feature_type': 'Real', 'data': dataop.get_rand() } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) params['name']='EuclidianDistance' _compute(feats, params) params['name']='CanberraMetric' _compute(feats, params) params['name']='ChebyshewMetric' _compute(feats, params) params['name']='GeodesicMetric' _compute(feats, params) params['name']='JensenMetric' _compute(feats, params) params['name']='ManhattanMetric' _compute(feats, params) params['name']='BrayCurtisDistance' _compute(feats, params) params['name']='ChiSquareDistance' _compute(feats, params) params['name']='CosineDistance' _compute(feats, params) params['name']='TanimotoDistance' _compute(feats, params) params['name']='ManhattanMetric' _compute(feats, params) params['name']='MinkowskiMetric' params['args']={'key': ('k',), 'val': (1.3,)} _compute(feats, params) params['name']='SparseEuclidianDistance' params['accuracy']=1e-7 del params['args'] feats=featop.get_features( params['feature_class'], params['feature_type'], params['data'], sparse=True) _compute(feats, params)
def _run_combined (): """Run Combined kernel.""" kern=kernel.CombinedKernel() feats={'train': CombinedFeatures(), 'test': CombinedFeatures()} output={} params={ 'name': 'Combined', 'accuracy': 1e-7 } subkdata=[ { 'name': 'FixedDegreeString', 'feature_class': 'string', 'feature_type': 'Char', 'args': {'key': ('size', 'degree'), 'val': (10, 3)} }, { 'name': 'PolyMatchString', 'feature_class': 'string', 'feature_type': 'Char', 'args': { 'key': ('size', 'degree', 'inhomogene'), 'val': (10, 3, True) } }, { 'name': 'LocalAlignmentString', 'feature_class': 'string', 'feature_type': 'Char', 'args': {'key': ('size',), 'val': (10,)} } ] i=0 for sd in subkdata: kfun=eval('kernel.'+sd['name']+'Kernel') subk=kfun(*sd['args']['val']) sd['data']=dataop.get_dna() subkfeats=featop.get_features( sd['feature_class'], sd['feature_type'], sd['data']) output.update( fileop.get_output(category.KERNEL, sd, 'subkernel'+str(i)+'_')) kern.append_kernel(subk) feats['train'].append_feature_obj(subkfeats['train']) feats['test'].append_feature_obj(subkfeats['test']) i+=1 output.update(fileop.get_output(category.KERNEL, params)) kern.init(feats['train'], feats['train']) output['kernel_matrix_train']=kern.get_kernel_matrix() kern.init(feats['train'], feats['test']) output['kernel_matrix_test']=kern.get_kernel_matrix() fileop.write(category.KERNEL, output)
def _run_pie(): """Run kernel with PluginEstimate.""" params = {"data": dataop.get_dna(), "accuracy": 1e-6, "feature_class": "string_complex", "feature_type": "Word"} feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) params["name"] = "HistogramWordString" _compute_pie(feats, params) params["name"] = "SalzbergWordString" _compute_pie(feats, params)
def _run_svm_linear (): """Run all SVMs based on (Sparse) Linear Classifiers.""" params={ 'type': 'linear', 'bias_enabled': False, 'data': dataop.get_clouds(2), 'feature_class': 'simple', 'feature_type': 'Real', 'label_type': 'twoclass' } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data'], sparse=True) svms=('LibLinear', 'SVMLin', 'SVMSGD') params['bias_enabled']=True _loop_svm(svms, params, feats) # SubGradientSVM needs max_train_time to terminate svms=('SubGradientSVM',) params['bias_enabled']=False params['max_train_time']=.5 # up to 2. does not improve test results :( _loop_svm(svms, params, feats) svms=('SVMOcas',) _loop_svm(svms, params, feats) params={ 'type': 'linear', 'bias_enabled': False, 'label_type': 'twoclass', 'feature_class': 'wd', 'feature_type': 'Byte', 'data': dataop.get_dna(), 'alphabet': 'RAWDNA', 'order': 1 } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data'], params['order']) _loop_svm(svms, params, feats)
def _run_top_fisher(): """Run Linear Kernel with {Top,Fisher}Features.""" # put some constantness into randomness Math_init_random(dataop.INIT_RANDOM) data = dataop.get_cubes(4, 8) prefix = "topfk_" params = { prefix + "N": 3, prefix + "M": 6, prefix + "pseudo": 1e-1, prefix + "order": 1, prefix + "gap": 0, prefix + "reverse": False, prefix + "alphabet": "CUBE", prefix + "feature_class": "string_complex", prefix + "feature_type": "Word", prefix + "data_train": numpy.matrix(data["train"]), prefix + "data_test": numpy.matrix(data["test"]), } wordfeats = featop.get_features( params[prefix + "feature_class"], params[prefix + "feature_type"], data, eval(params[prefix + "alphabet"]), params[prefix + "order"], params[prefix + "gap"], params[prefix + "reverse"], ) pos_train = HMM(wordfeats["train"], params[prefix + "N"], params[prefix + "M"], params[prefix + "pseudo"]) pos_train.train() pos_train.baum_welch_viterbi_train(BW_NORMAL) neg_train = HMM(wordfeats["train"], params[prefix + "N"], params[prefix + "M"], params[prefix + "pseudo"]) neg_train.train() neg_train.baum_welch_viterbi_train(BW_NORMAL) pos_test = HMM(pos_train) pos_test.set_observations(wordfeats["test"]) neg_test = HMM(neg_train) neg_test.set_observations(wordfeats["test"]) feats = {} feats["train"] = TOPFeatures(10, pos_train, neg_train, False, False) feats["test"] = TOPFeatures(10, pos_test, neg_test, False, False) params[prefix + "name"] = "TOP" _compute_top_fisher(feats, params) feats["train"] = FKFeatures(10, pos_train, neg_train) feats["train"].set_opt_a(-1) # estimate prior feats["test"] = FKFeatures(10, pos_test, neg_test) feats["test"].set_a(feats["train"].get_a()) # use prior from training data params[prefix + "name"] = "FK" _compute_top_fisher(feats, params)
def _run_feats_string(): """Run kernel with StringFeatures.""" params = {"accuracy": 1e-9, "data": dataop.get_dna(), "feature_class": "string", "feature_type": "Char"} feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) params["name"] = "FixedDegreeString" params["args"] = {"key": ("size", "degree"), "val": (10, 3)} _compute(feats, params) params["accuracy"] = 0 params["name"] = "LocalAlignmentString" params["args"] = {"key": ("size",), "val": (10,)} _compute(feats, params) params["accuracy"] = 1e-10 params["name"] = "PolyMatchString" params["args"] = {"key": ("size", "degree", "inhomogene"), "val": (10, 3, True)} _compute(feats, params) params["args"]["val"] = (10, 3, False) _compute(feats, params) params["accuracy"] = 1e-15 params["name"] = "SimpleLocalityImprovedString" params["args"] = {"key": ("size", "length", "inner_degree", "outer_degree"), "val": (10, 5, 7, 5)} _compute(feats, params) # buggy: # params['name']='LocalityImprovedString' # _compute(feats, params) params["name"] = "WeightedDegreeString" params["accuracy"] = 1e-9 params["args"] = {"key": ("degree",), "val": (20,)} _compute(feats, params) params["args"] = {"key": ("degree",), "val": (1,)} _compute(feats, params) params["name"] = "WeightedDegreePositionString" params["args"] = {"key": ("size", "degree"), "val": (10, 20)} _compute(feats, params) params["args"] = {"key": ("size", "degree"), "val": (10, 1)} _compute(feats, params) params["name"] = "OligoString" params["args"] = {"key": ("size", "k", "width"), "val": (10, 3, 1.2)} _compute(feats, params) params["args"] = {"key": ("size", "k", "width"), "val": (10, 4, 1.7)} _compute(feats, params) params["name"] = "LinearString" params["accuracy"] = 1e-8 params["normalizer"] = kernel.AvgDiagKernelNormalizer() del params["args"] _compute(feats, params)
def _run_top_fisher(): """Run Linear Kernel with {Top,Fisher}Features.""" # put some constantness into randomness Math_init_random(dataop.INIT_RANDOM) data = dataop.get_cubes(4, 8) prefix = 'topfk_' params = { prefix + 'N': 3, prefix + 'M': 6, prefix + 'pseudo': 1e-1, prefix + 'order': 1, prefix + 'gap': 0, prefix + 'reverse': False, prefix + 'alphabet': 'CUBE', prefix + 'feature_class': 'string_complex', prefix + 'feature_type': 'Word', prefix + 'data_train': numpy.matrix(data['train']), prefix + 'data_test': numpy.matrix(data['test']) } wordfeats = featop.get_features(params[prefix + 'feature_class'], params[prefix + 'feature_type'], data, eval(params[prefix + 'alphabet']), params[prefix + 'order'], params[prefix + 'gap'], params[prefix + 'reverse']) pos_train = HMM(wordfeats['train'], params[prefix + 'N'], params[prefix + 'M'], params[prefix + 'pseudo']) pos_train.train() pos_train.baum_welch_viterbi_train(BW_NORMAL) neg_train = HMM(wordfeats['train'], params[prefix + 'N'], params[prefix + 'M'], params[prefix + 'pseudo']) neg_train.train() neg_train.baum_welch_viterbi_train(BW_NORMAL) pos_test = HMM(pos_train) pos_test.set_observations(wordfeats['test']) neg_test = HMM(neg_train) neg_test.set_observations(wordfeats['test']) feats = {} feats['train'] = TOPFeatures(10, pos_train, neg_train, False, False) feats['test'] = TOPFeatures(10, pos_test, neg_test, False, False) params[prefix + 'name'] = 'TOP' _compute_top_fisher(feats, params) feats['train'] = FKFeatures(10, pos_train, neg_train) feats['train'].set_opt_a(-1) #estimate prior feats['test'] = FKFeatures(10, pos_test, neg_test) feats['test'].set_a(feats['train'].get_a()) #use prior from training data params[prefix + 'name'] = 'FK' _compute_top_fisher(feats, params)
def _run_feats_string_complex (): """Run kernel with complex StringFeatures.""" params={ 'data': dataop.get_dna(), 'feature_class': 'string_complex' } params['feature_type']='Word' wordfeats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) params['name']='CommWordString' params['accuracy']=1e-9 params['args']={'key': ('size', 'use_sign'), 'val': (10, False)} _compute(wordfeats, params) params['name']='WeightedCommWordString' _compute(wordfeats, params) params['name']='PolyMatchWordString' params['accuracy']=1e-10 params['args']={ 'key': ('size', 'degree', 'inhomogene'), 'val': (10, 3, True) } _compute(wordfeats, params) params['args']['val']=(10, 3, False) _compute(wordfeats, params) params['name']='MatchWordString' params['args']={'key': ('size', 'degree'), 'val': (10, 3)} _compute(wordfeats, params) params['feature_type']='Ulong' params['accuracy']=1e-9 ulongfeats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) params['name']='CommUlongString' params['args']={'key': ('size', 'use_sign'), 'val': (10, False)} _compute(ulongfeats, params)
def _run_top_fisher (): """Run Linear Kernel with {Top,Fisher}Features.""" # put some constantness into randomness Math_init_random(dataop.INIT_RANDOM) data=dataop.get_cubes(4, 8) prefix='topfk_' params={ prefix+'N': 3, prefix+'M': 6, prefix+'pseudo': 1e-1, prefix+'order': 1, prefix+'gap': 0, prefix+'reverse': False, prefix+'alphabet': 'CUBE', prefix+'feature_class': 'string_complex', prefix+'feature_type': 'Word', prefix+'data_train': numpy.matrix(data['train']), prefix+'data_test': numpy.matrix(data['test']) } wordfeats=featop.get_features( params[prefix+'feature_class'], params[prefix+'feature_type'], data, eval(params[prefix+'alphabet']), params[prefix+'order'], params[prefix+'gap'], params[prefix+'reverse']) pos_train=HMM(wordfeats['train'], params[prefix+'N'], params[prefix+'M'], params[prefix+'pseudo']) pos_train.train() pos_train.baum_welch_viterbi_train(BW_NORMAL) neg_train=HMM(wordfeats['train'], params[prefix+'N'], params[prefix+'M'], params[prefix+'pseudo']) neg_train.train() neg_train.baum_welch_viterbi_train(BW_NORMAL) pos_test=HMM(pos_train) pos_test.set_observations(wordfeats['test']) neg_test=HMM(neg_train) neg_test.set_observations(wordfeats['test']) feats={} feats['train']=TOPFeatures(10, pos_train, neg_train, False, False) feats['test']=TOPFeatures(10, pos_test, neg_test, False, False) params[prefix+'name']='TOP' _compute_top_fisher(feats, params) feats['train']=FKFeatures(10, pos_train, neg_train) feats['train'].set_opt_a(-1) #estimate prior feats['test']=FKFeatures(10, pos_test, neg_test) feats['test'].set_a(feats['train'].get_a()) #use prior from training data params[prefix+'name']='FK' _compute_top_fisher(feats, params)
def _run_feats_byte(): """Run kernel with ByteFeatures.""" params = { "name": "Linear", "accuracy": 1e-8, "feature_class": "simple", "feature_type": "Byte", "data": dataop.get_rand(dattype=numpy.ubyte), "normalizer": kernel.AvgDiagKernelNormalizer(), } feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"], RAWBYTE) _compute(feats, params)
def _run_distance(): """Run distance kernel.""" params = { "name": "Distance", "accuracy": 1e-9, "feature_class": "simple", "feature_type": "Real", "data": dataop.get_rand(), "args": {"key": ("size", "width", "distance"), "val": (10, 1.7, CanberraMetric())}, } feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) _compute(feats, params)
def _run_combined(): """Run Combined kernel.""" kern = kernel.CombinedKernel() feats = {"train": CombinedFeatures(), "test": CombinedFeatures()} output = {} params = {"name": "Combined", "accuracy": 1e-7} subkdata = [ { "name": "FixedDegreeString", "feature_class": "string", "feature_type": "Char", "args": {"key": ("size", "degree"), "val": (10, 3)}, }, { "name": "PolyMatchString", "feature_class": "string", "feature_type": "Char", "args": {"key": ("size", "degree", "inhomogene"), "val": (10, 3, True)}, }, { "name": "LocalAlignmentString", "feature_class": "string", "feature_type": "Char", "args": {"key": ("size",), "val": (10,)}, }, ] i = 0 for sd in subkdata: kfun = eval("kernel." + sd["name"] + "Kernel") subk = kfun(*sd["args"]["val"]) sd["data"] = dataop.get_dna() subkfeats = featop.get_features(sd["feature_class"], sd["feature_type"], sd["data"]) output.update(fileop.get_output(category.KERNEL, sd, "subkernel" + str(i) + "_")) kern.append_kernel(subk) feats["train"].append_feature_obj(subkfeats["train"]) feats["test"].append_feature_obj(subkfeats["test"]) i += 1 output.update(fileop.get_output(category.KERNEL, params)) kern.init(feats["train"], feats["train"]) output["kernel_matrix_train"] = kern.get_kernel_matrix() kern.init(feats["train"], feats["test"]) output["kernel_matrix_test"] = kern.get_kernel_matrix() fileop.write(category.KERNEL, output)
def _run_feats_byte (): """Run kernel with ByteFeatures.""" params={ 'name': 'Linear', 'accuracy': 1e-8, 'feature_class': 'simple', 'feature_type': 'Byte', 'data': dataop.get_rand(dattype=numpy.ubyte), 'normalizer': kernel.AvgDiagKernelNormalizer() } feats=featop.get_features(params['feature_class'], params['feature_type'], params['data'], RAWBYTE) _compute(feats, params)
def _run_feats_word(): """Run kernel with WordFeatures.""" maxval = 42 params = { "name": "Linear", "accuracy": 1e-8, "feature_class": "simple", "feature_type": "Word", "data": dataop.get_rand(dattype=numpy.ushort, max_train=maxval, max_test=maxval), "normalizer": kernel.AvgDiagKernelNormalizer(), } feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) _compute(feats, params)
def _run_feats_byte (): """Run kernel with ByteFeatures.""" params={ 'name': 'LinearByte', 'accuracy': 1e-8, 'feature_class': 'simple', 'feature_type': 'Byte', 'data': dataop.get_rand(dattype=numpy.ubyte), 'normalizer': kernel.AvgDiagKernelNormalizer() } feats=featop.get_features(params['feature_class'], params['feature_type'], params['data'], RAWBYTE) _compute(feats, params)
def _run_pie (): """Run kernel with PluginEstimate.""" params={ 'data': dataop.get_dna(), 'accuracy': 1e-6, 'feature_class': 'string_complex', 'feature_type': 'Word' } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) params['name']='HistogramWordString' _compute_pie(feats, params) params['name']='SalzbergWordString' _compute_pie(feats, params)
def _run_pie(): """Run kernel with PluginEstimate.""" params = { 'data': dataop.get_dna(), 'accuracy': 1e-6, 'feature_class': 'string_complex', 'feature_type': 'Word' } feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) params['name'] = 'HistogramWordString' _compute_pie(feats, params) params['name'] = 'SalzbergWordString' _compute_pie(feats, params)
def _run_feats_word (): """Run kernel with WordFeatures.""" maxval=42 params={ 'name': 'Linear', 'accuracy': 1e-8, 'feature_class': 'simple', 'feature_type': 'Word', 'data': dataop.get_rand( dattype=numpy.ushort, max_train=maxval, max_test=maxval), 'normalizer': kernel.AvgDiagKernelNormalizer() } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) _compute(feats, params)
def _run_feats_word (): """Run kernel with WordFeatures.""" maxval=42 params={ 'name': 'LinearWord', 'accuracy': 1e-8, 'feature_class': 'simple', 'feature_type': 'Word', 'data': dataop.get_rand( dattype=numpy.ushort, max_train=maxval, max_test=maxval), 'normalizer': kernel.AvgDiagKernelNormalizer() } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) _compute(feats, params)
def _run_distance (): """Run distance kernel.""" params={ 'name': 'Distance', 'accuracy': 1e-9, 'feature_class': 'simple', 'feature_type': 'Real', 'data': dataop.get_rand(), 'args': { 'key': ('size', 'width', 'distance'), 'val': (10, 1.7, CanberraMetric()) } } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) _compute(feats, params)
def _run_distance(): """Run distance kernel.""" params = { 'name': 'Distance', 'accuracy': 1e-9, 'feature_class': 'simple', 'feature_type': 'Real', 'data': dataop.get_rand(), 'args': { 'key': ('size', 'width', 'distance'), 'val': (10, 1.7, CanberraMetric()) } } feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) _compute(feats, params)
def _run_hmm(): """Run generator for Hidden-Markov-Model.""" # put some constantness into randomness Math_init_random(INIT_RANDOM) num_examples = 4 params = { 'name': 'HMM', 'accuracy': 1e-6, 'N': 3, 'M': 6, 'num_examples': num_examples, 'pseudo': 1e-10, 'order': 1, 'alphabet': 'CUBE', 'feature_class': 'string_complex', 'feature_type': 'Word', 'data': dataop.get_cubes(num_examples, 1) } output = fileop.get_output(category.DISTRIBUTION, params) feats = featop.get_features(params['feature_class'], params['feature_type'], params['data'], eval('features.' + params['alphabet']), params['order']) hmm = distribution.HMM(feats['train'], params['N'], params['M'], params['pseudo']) hmm.train() hmm.baum_welch_viterbi_train(distribution.BW_NORMAL) output[PREFIX + 'likelihood'] = hmm.get_log_likelihood_sample() output[PREFIX + 'derivatives'] = _get_derivatives( hmm, feats['train'].get_num_vectors()) output[PREFIX + 'best_path'] = 0 output[PREFIX + 'best_path_state'] = 0 for i in xrange(num_examples): output[PREFIX + 'best_path'] += hmm.best_path(i) for j in xrange(params['N']): output[PREFIX + 'best_path_state'] += hmm.get_best_path_state(i, j) fileop.write(category.DISTRIBUTION, output)
def _run (name, first_arg): """ Run generator for a specific clustering method. @param name Name of the clustering method to run. @param first_arg First argument to the clustering's constructor; so far, only this distinguishes the instantion of the different methods. """ # put some constantness into randomness Math_init_random(dataop.INIT_RANDOM) num_clouds=3 params={ 'name': 'EuclidianDistance', 'data': dataop.get_clouds(num_clouds, 5), 'feature_class': 'simple', 'feature_type': 'Real' } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) dfun=eval(params['name']) distance=dfun(feats['train'], feats['train']) output=fileop.get_output(category.DISTANCE, params) params={ 'name': name, 'accuracy': 1e-8, first_arg: num_clouds } fun=eval('clustering.'+name) clustering=fun(params[first_arg], distance) clustering.train() distance.init(feats['train'], feats['test']) if name=='KMeans': params['radi']=clustering.get_radiuses() params['centers']=clustering.get_cluster_centers() elif name=='Hierarchical': params['merge_distance']=clustering.get_merge_distances() params['pairs']=clustering.get_cluster_pairs() output.update(fileop.get_output(category.CLUSTERING, params)) fileop.write(category.CLUSTERING, output)
def _run_hmm (): """Run generator for Hidden-Markov-Model.""" # put some constantness into randomness Math_init_random(INIT_RANDOM) num_examples=4 params={ 'name': 'HMM', 'accuracy': 1e-6, 'N': 3, 'M': 6, 'num_examples': num_examples, 'pseudo': 1e-10, 'order': 1, 'alphabet': 'CUBE', 'feature_class': 'string_complex', 'feature_type': 'Word', 'data': dataop.get_cubes(num_examples, 1) } output=fileop.get_output(category.DISTRIBUTION, params) feats=featop.get_features( params['feature_class'], params['feature_type'], params['data'], eval('features.'+params['alphabet']), params['order']) hmm=distribution.HMM( feats['train'], params['N'], params['M'], params['pseudo']) hmm.train() hmm.baum_welch_viterbi_train(distribution.BW_NORMAL) output[PREFIX+'likelihood']=hmm.get_log_likelihood_sample() output[PREFIX+'derivatives']=_get_derivatives( hmm, feats['train'].get_num_vectors()) output[PREFIX+'best_path']=0 output[PREFIX+'best_path_state']=0 for i in xrange(num_examples): output[PREFIX+'best_path']+=hmm.best_path(i) for j in xrange(params['N']): output[PREFIX+'best_path_state']+=hmm.get_best_path_state(i, j) fileop.write(category.DISTRIBUTION, output)
def _run_string_complex(ftype): """Run preprocessor applied on complex StringFeatures. @param ftype Feature type, like Word """ params = { "name": "Comm" + ftype + "String", "accuracy": 1e-9, "feature_class": "string_complex", "feature_type": ftype, "data": dataop.get_dna(), } feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) # string_complex gets preproc added implicitely on Word/Ulong feats output = _compute(feats, params) params = {"name": "Sort" + ftype + "String"} output.update(fileop.get_output(category.PREPROC, params)) fileop.write(category.PREPROC, output)
def _run_custom(): """Run Custom kernel.""" params = { 'name': 'Custom', 'accuracy': 1e-7, 'feature_class': 'simple', 'feature_type': 'Real' } dim_square = 7 data = dataop.get_rand(dim_square=dim_square) feats = featop.get_features(params['feature_class'], params['feature_type'], data) data = data['train'] symdata = data + data.T lowertriangle = numpy.array([ symdata[(x, y)] for x in xrange(symdata.shape[1]) for y in xrange(symdata.shape[0]) if y <= x ]) kern = kernel.CustomKernel() #kern.init(feats['train'], feats['train'] kern.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle = kern.get_kernel_matrix() kern.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle = kern.get_kernel_matrix() kern.set_full_kernel_matrix_from_full(data) km_fullfull = kern.get_kernel_matrix() output = { 'kernel_matrix_triangletriangle': km_triangletriangle, 'kernel_matrix_fulltriangle': km_fulltriangle, 'kernel_matrix_fullfull': km_fullfull, 'kernel_symdata': numpy.matrix(symdata), 'kernel_data': numpy.matrix(data), 'kernel_dim_square': dim_square } output.update(fileop.get_output(category.KERNEL, params)) fileop.write(category.KERNEL, output)
def _run_string_complex(ftype): """Run preprocessor applied on complex StringFeatures. @param ftype Feature type, like Word """ params = { 'name': 'Comm' + ftype + 'String', 'accuracy': 1e-9, 'feature_class': 'string_complex', 'feature_type': ftype, 'data': dataop.get_dna() } feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) # string_complex gets preproc added implicitely on Word/Ulong feats output = _compute(feats, params) params = {'name': 'Sort' + ftype + 'String'} output.update(fileop.get_output(category.PREPROC, params)) fileop.write(category.PREPROC, output)
def _run(name, first_arg): """ Run generator for a specific clustering method. @param name Name of the clustering method to run. @param first_arg First argument to the clustering's constructor; so far, only this distinguishes the instantion of the different methods. """ # put some constantness into randomness Math_init_random(dataop.INIT_RANDOM) num_clouds = 3 params = { 'name': 'EuclidianDistance', 'data': dataop.get_clouds(num_clouds, 5), 'feature_class': 'simple', 'feature_type': 'Real' } feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) dfun = eval(params['name']) distance = dfun(feats['train'], feats['train']) output = fileop.get_output(category.DISTANCE, params) params = {'name': name, 'accuracy': 1e-8, first_arg: num_clouds} fun = eval('clustering.' + name) clustering = fun(params[first_arg], distance) clustering.train() distance.init(feats['train'], feats['test']) if name == 'KMeans': params['radi'] = clustering.get_radiuses() params['centers'] = clustering.get_cluster_centers() elif name == 'Hierarchical': params['merge_distance'] = clustering.get_merge_distances() params['pairs'] = clustering.get_cluster_pairs() output.update(fileop.get_output(category.CLUSTERING, params)) fileop.write(category.CLUSTERING, output)
def _run_knn (): """Run K-Nearest-Neighbour classifier. """ params={ 'name': 'EuclidianDistance', 'data': dataop.get_clouds(2), 'feature_class': 'simple', 'feature_type': 'Real' } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) dfun=eval(params['name']) distance=dfun(feats['train'], feats['train']) output=fileop.get_output(category.DISTANCE, params) params={ 'name': 'KNN', 'type': 'knn', 'num_threads': 1, 'k': 3, 'label_type': 'twoclass', 'accuracy': 1e-8 } params['labels'], labels=dataop.get_labels( feats['train'].get_num_vectors(), params['label_type']) knn=classifier.KNN(params['k'], distance, labels) knn.parallel.set_num_threads(params['num_threads']) knn.train() distance.init(feats['train'], feats['test']) params['classified']=knn.classify().get_labels() output.update(fileop.get_output(category.CLASSIFIER, params)) fileop.write(category.CLASSIFIER, output)
def _run_wdsvmocas (): """Run Weighted Degree SVM Ocas classifier.""" svms=('WDSVMOcas',) params={ 'type': 'wdsvmocas', 'degree': 1, 'bias_enabled': False, #'data': dataop.get_rawdna(), 'data': dataop.get_dna( dataop.NUM_VEC_TRAIN, dataop.NUM_VEC_TRAIN, dataop.NUM_VEC_TRAIN), 'feature_class': 'string_complex', 'feature_type': 'Byte', 'alphabet': 'RAWDNA', 'label_type': 'twoclass', 'order': 1, 'gap': 0, 'reverse': False } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data'], eval(params['alphabet']), params['order'], params['gap'], params['reverse']) _loop_svm(svms, params, feats)
def _run_feats_string(): """Run kernel with StringFeatures.""" params = { 'accuracy': 1e-9, 'data': dataop.get_dna(), 'feature_class': 'string', 'feature_type': 'Char', } feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) params['name'] = 'FixedDegreeString' params['args'] = {'key': ('size', 'degree'), 'val': (10, 3)} _compute(feats, params) params['accuracy'] = 0 params['name'] = 'LocalAlignmentString' params['args'] = {'key': ('size', ), 'val': (10, )} _compute(feats, params) params['accuracy'] = 1e-10 params['name'] = 'PolyMatchString' params['args'] = { 'key': ('size', 'degree', 'inhomogene'), 'val': (10, 3, True) } _compute(feats, params) params['args']['val'] = (10, 3, False) _compute(feats, params) params['accuracy'] = 1e-15 params['name'] = 'SimpleLocalityImprovedString' params['args'] = { 'key': ('size', 'length', 'inner_degree', 'outer_degree'), 'val': (10, 5, 7, 5) } _compute(feats, params) # buggy: #params['name']='LocalityImprovedString' #_compute(feats, params) params['name'] = 'WeightedDegreeString' params['accuracy'] = 1e-9 params['args'] = {'key': ('degree', ), 'val': (20, )} _compute(feats, params) params['args'] = {'key': ('degree', ), 'val': (1, )} _compute(feats, params) params['name'] = 'WeightedDegreePositionString' params['args'] = {'key': ('size', 'degree'), 'val': (10, 20)} _compute(feats, params) params['args'] = {'key': ('size', 'degree'), 'val': (10, 1)} _compute(feats, params) params['name'] = 'OligoString' params['args'] = {'key': ('size', 'k', 'width'), 'val': (10, 3, 1.2)} _compute(feats, params) params['args'] = {'key': ('size', 'k', 'width'), 'val': (10, 4, 1.7)} _compute(feats, params) params['name'] = 'LinearString' params['accuracy'] = 1e-8 params['normalizer'] = kernel.AvgDiagKernelNormalizer() del params['args'] _compute(feats, params)
def _run_feats_real(): """Run kernel with RealFeatures.""" params = { 'data': dataop.get_rand(), 'accuracy': 1e-8, 'feature_class': 'simple', 'feature_type': 'Real' } feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) sparsefeats = featop.get_features(params['feature_class'], params['feature_type'], params['data'], sparse=True) params['name'] = 'Gaussian' params['args'] = { 'key': ( 'size', 'width', ), 'val': (10, 1.3) } _compute(feats, params) params['name'] = 'GaussianShift' params['args'] = { 'key': ('size', 'width', 'max_shift', 'shift_step'), 'val': (10, 1.3, 2, 1) } _compute(feats, params) params['name'] = 'SparseGaussian' params['args'] = {'key': ('size', 'width'), 'val': (10, 1.7)} _compute(sparsefeats, params) params['accuracy'] = 0 params['name'] = 'Const' params['args'] = {'key': ('c', ), 'val': (23., )} _compute(feats, params) params['name'] = 'Diag' params['args'] = {'key': ('size', 'diag'), 'val': (10, 23.)} _compute(feats, params) params['accuracy'] = 1e-9 params['name'] = 'Sigmoid' params['args'] = {'key': ('size', 'gamma', 'coef0'), 'val': (10, 1.1, 1.3)} _compute(feats, params) params['args']['val'] = (10, 0.5, 0.7) _compute(feats, params) params['name'] = 'Chi2' params['args'] = {'key': ('size', 'width'), 'val': (10, 1.2)} _compute(feats, params) params['accuracy'] = 1e-8 params['name'] = 'SparsePoly' params['args'] = { 'key': ('size', 'degree', 'inhomogene'), 'val': (10, 3, True) } _compute(sparsefeats, params) params['args']['val'] = (10, 3, False) _compute(sparsefeats, params) params['name'] = 'Poly' params['normalizer'] = kernel.SqrtDiagKernelNormalizer() params['args'] = { 'key': ('size', 'degree', 'inhomogene'), 'val': (10, 3, True) } _compute(feats, params) params['args']['val'] = (10, 3, False) _compute(feats, params) params['normalizer'] = kernel.AvgDiagKernelNormalizer() del params['args'] params['name'] = 'Linear' _compute(feats, params) params['name'] = 'SparseLinear' _compute(sparsefeats, params)
def _run_svm_kernel (): """Run all kernel-based SVMs.""" kparams={ 'name': 'Gaussian', 'args': {'key': ('width',), 'val': (1.5,)}, 'feature_class': 'simple', 'feature_type': 'Real', 'data': dataop.get_clouds(2) } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=GaussianKernel(10, *kparams['args']['val']) output=fileop.get_output(category.KERNEL, kparams) svms=('SVMLight', 'LibSVM', 'GPBTSVM', 'MPDSVM') params={ 'type': 'kernel', 'label_type': 'twoclass' } _loop_svm(svms, params, feats, kernel, output) svms=('LibSVMOneClass',) params['label_type']=None _loop_svm(svms, params, feats, kernel, output) svms=('LibSVMMultiClass', 'GMNPSVM') params['label_type']='series' kparams['data']=dataop.get_clouds(3) feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) output=fileop.get_output(category.KERNEL, kparams) _loop_svm(svms, params, feats, kernel, output) svms=('SVMLight', 'GPBTSVM') params['label_type']='twoclass' kparams={ 'name': 'Linear', 'feature_class': 'simple', 'feature_type': 'Real', 'data': dataop.get_clouds(2), 'normalizer': AvgDiagKernelNormalizer() } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=LinearKernel() kernel.set_normalizer(kparams['normalizer']) output=fileop.get_output(category.KERNEL, kparams) _loop_svm(svms, params, feats, kernel, output) kparams={ 'name': 'CommWordString', 'args': {'key': ('use_sign',), 'val': (False,)}, 'data': dataop.get_dna(), 'feature_class': 'string_complex', 'feature_type': 'Word' } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=CommWordStringKernel(10, *kparams['args']['val']) output=fileop.get_output(category.KERNEL, kparams) _loop_svm(svms, params, feats, kernel, output) kparams={ 'name': 'CommUlongString', 'args': {'key': ('use_sign',), 'val': (False,)}, 'data': dataop.get_dna(), 'feature_class': 'string_complex', 'feature_type': 'Ulong' } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=CommUlongStringKernel(10, *kparams['args']['val']) output=fileop.get_output(category.KERNEL, kparams) _loop_svm(svms, params, feats, kernel, output) kparams={ 'name': 'WeightedDegreeString', 'args': {'key': ('degree',), 'val': (3,)}, 'data': dataop.get_dna(), 'feature_class': 'string', 'feature_type': 'Char' } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=WeightedDegreeStringKernel(*kparams['args']['val']) output=fileop.get_output(category.KERNEL, kparams) _loop_svm(svms, params, feats, kernel, output) params['linadd_enabled']=True _loop_svm(svms, params, feats, kernel, output) params['batch_enabled']=True _loop_svm(svms, params, feats, kernel, output) kparams={ 'name': 'WeightedDegreePositionString', 'args': {'key': ('degree',), 'val': (20,)}, 'data': dataop.get_dna(), 'feature_class': 'string', 'feature_type': 'Char' } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=WeightedDegreePositionStringKernel(10, *kparams['args']['val']) output=fileop.get_output(category.KERNEL, kparams) del params['linadd_enabled'] del params['batch_enabled'] _loop_svm(svms, params, feats, kernel, output) params['linadd_enabled']=True _loop_svm(svms, params, feats, kernel, output) params['batch_enabled']=True _loop_svm(svms, params, feats, kernel, output)