def _run_lda (): """Run Linear Discriminant Analysis classifier.""" params={ 'name': 'LDA', 'type': 'lda', 'gamma': 0.1, 'num_threads': 1, 'data': dataop.get_clouds(2), 'feature_class': 'simple', 'feature_type': 'Real', 'label_type': 'twoclass', 'accuracy': 1e-7 } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) params['labels'], labels=dataop.get_labels( feats['train'].get_num_vectors(), params['label_type']) lda=classifier.LDA(params['gamma'], feats['train'], labels) lda.parallel.set_num_threads(params['num_threads']) lda.train() lda.set_features(feats['test']) params['classified']=lda.classify().get_labels() output=fileop.get_output(category.CLASSIFIER, params) fileop.write(category.CLASSIFIER, output)
def _run_perceptron (): """Run Perceptron classifier.""" params={ 'name': 'Perceptron', 'type': 'perceptron', 'num_threads': 1, 'learn_rate': .1, 'max_iter': 1000, 'data': dataop.get_clouds(2), 'feature_class': 'simple', 'feature_type': 'Real', 'label_type': 'twoclass', 'accuracy': 1e-7 } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) num_vec=feats['train'].get_num_vectors() params['labels'], labels=dataop.get_labels(num_vec, params['label_type']) perceptron=classifier.Perceptron(feats['train'], labels) perceptron.parallel.set_num_threads(params['num_threads']) perceptron.set_learn_rate(params['learn_rate']) perceptron.set_max_iter(params['max_iter']) perceptron.train() params['bias']=perceptron.get_bias() perceptron.set_features(feats['test']) params['classified']=perceptron.classify().get_labels() output=fileop.get_output(category.CLASSIFIER, params) fileop.write(category.CLASSIFIER, output)
def _run (name, first_arg): """ Run generator for a specific clustering method. @param name Name of the clustering method to run. @param first_arg First argument to the clustering's constructor; so far, only this distinguishes the instantion of the different methods. """ # put some constantness into randomness Math_init_random(dataop.INIT_RANDOM) num_clouds=3 params={ 'name': 'EuclidianDistance', 'data': dataop.get_clouds(num_clouds, 5), 'feature_class': 'simple', 'feature_type': 'Real' } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) dfun=eval(params['name']) distance=dfun(feats['train'], feats['train']) output=fileop.get_output(category.DISTANCE, params) params={ 'name': name, 'accuracy': 1e-8, first_arg: num_clouds } fun=eval('clustering.'+name) clustering=fun(params[first_arg], distance) clustering.train() distance.init(feats['train'], feats['test']) if name=='KMeans': params['radi']=clustering.get_radiuses() params['centers']=clustering.get_cluster_centers() elif name=='Hierarchical': params['merge_distance']=clustering.get_merge_distances() params['pairs']=clustering.get_cluster_pairs() output.update(fileop.get_output(category.CLUSTERING, params)) fileop.write(category.CLUSTERING, output)
def _run_svm_linear (): """Run all SVMs based on (Sparse) Linear Classifiers.""" params={ 'type': 'linear', 'bias_enabled': False, 'data': dataop.get_clouds(2), 'feature_class': 'simple', 'feature_type': 'Real', 'label_type': 'twoclass' } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data'], sparse=True) svms=('LibLinear', 'SVMLin', 'SVMSGD') params['bias_enabled']=True _loop_svm(svms, params, feats) # SubGradientSVM needs max_train_time to terminate svms=('SubGradientSVM',) params['bias_enabled']=False params['max_train_time']=.5 # up to 2. does not improve test results :( _loop_svm(svms, params, feats) svms=('SVMOcas',) _loop_svm(svms, params, feats) params={ 'type': 'linear', 'bias_enabled': False, 'label_type': 'twoclass', 'feature_class': 'wd', 'feature_type': 'Byte', 'data': dataop.get_dna(), 'alphabet': 'RAWDNA', 'order': 1 } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data'], params['order']) _loop_svm(svms, params, feats)
def _run(name, first_arg): """ Run generator for a specific clustering method. @param name Name of the clustering method to run. @param first_arg First argument to the clustering's constructor; so far, only this distinguishes the instantion of the different methods. """ # put some constantness into randomness Math_init_random(dataop.INIT_RANDOM) num_clouds = 3 params = { 'name': 'EuclidianDistance', 'data': dataop.get_clouds(num_clouds, 5), 'feature_class': 'simple', 'feature_type': 'Real' } feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) dfun = eval(params['name']) distance = dfun(feats['train'], feats['train']) output = fileop.get_output(category.DISTANCE, params) params = {'name': name, 'accuracy': 1e-8, first_arg: num_clouds} fun = eval('clustering.' + name) clustering = fun(params[first_arg], distance) clustering.train() distance.init(feats['train'], feats['test']) if name == 'KMeans': params['radi'] = clustering.get_radiuses() params['centers'] = clustering.get_cluster_centers() elif name == 'Hierarchical': params['merge_distance'] = clustering.get_merge_distances() params['pairs'] = clustering.get_cluster_pairs() output.update(fileop.get_output(category.CLUSTERING, params)) fileop.write(category.CLUSTERING, output)
def _run_knn (): """Run K-Nearest-Neighbour classifier. """ params={ 'name': 'EuclidianDistance', 'data': dataop.get_clouds(2), 'feature_class': 'simple', 'feature_type': 'Real' } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) dfun=eval(params['name']) distance=dfun(feats['train'], feats['train']) output=fileop.get_output(category.DISTANCE, params) params={ 'name': 'KNN', 'type': 'knn', 'num_threads': 1, 'k': 3, 'label_type': 'twoclass', 'accuracy': 1e-8 } params['labels'], labels=dataop.get_labels( feats['train'].get_num_vectors(), params['label_type']) knn=classifier.KNN(params['k'], distance, labels) knn.parallel.set_num_threads(params['num_threads']) knn.train() distance.init(feats['train'], feats['test']) params['classified']=knn.classify().get_labels() output.update(fileop.get_output(category.CLASSIFIER, params)) fileop.write(category.CLASSIFIER, output)
def _run_svm_kernel (): """Run all kernel-based SVMs.""" kparams={ 'name': 'Gaussian', 'args': {'key': ('width',), 'val': (1.5,)}, 'feature_class': 'simple', 'feature_type': 'Real', 'data': dataop.get_clouds(2) } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=GaussianKernel(10, *kparams['args']['val']) output=fileop.get_output(category.KERNEL, kparams) svms=('SVMLight', 'LibSVM', 'GPBTSVM', 'MPDSVM') params={ 'type': 'kernel', 'label_type': 'twoclass' } _loop_svm(svms, params, feats, kernel, output) svms=('LibSVMOneClass',) params['label_type']=None _loop_svm(svms, params, feats, kernel, output) svms=('LibSVMMultiClass', 'GMNPSVM') params['label_type']='series' kparams['data']=dataop.get_clouds(3) feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) output=fileop.get_output(category.KERNEL, kparams) _loop_svm(svms, params, feats, kernel, output) svms=('SVMLight', 'GPBTSVM') params['label_type']='twoclass' kparams={ 'name': 'Linear', 'feature_class': 'simple', 'feature_type': 'Real', 'data': dataop.get_clouds(2), 'normalizer': AvgDiagKernelNormalizer() } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=LinearKernel() kernel.set_normalizer(kparams['normalizer']) output=fileop.get_output(category.KERNEL, kparams) _loop_svm(svms, params, feats, kernel, output) kparams={ 'name': 'CommWordString', 'args': {'key': ('use_sign',), 'val': (False,)}, 'data': dataop.get_dna(), 'feature_class': 'string_complex', 'feature_type': 'Word' } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=CommWordStringKernel(10, *kparams['args']['val']) output=fileop.get_output(category.KERNEL, kparams) _loop_svm(svms, params, feats, kernel, output) kparams={ 'name': 'CommUlongString', 'args': {'key': ('use_sign',), 'val': (False,)}, 'data': dataop.get_dna(), 'feature_class': 'string_complex', 'feature_type': 'Ulong' } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=CommUlongStringKernel(10, *kparams['args']['val']) output=fileop.get_output(category.KERNEL, kparams) _loop_svm(svms, params, feats, kernel, output) kparams={ 'name': 'WeightedDegreeString', 'args': {'key': ('degree',), 'val': (3,)}, 'data': dataop.get_dna(), 'feature_class': 'string', 'feature_type': 'Char' } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=WeightedDegreeStringKernel(*kparams['args']['val']) output=fileop.get_output(category.KERNEL, kparams) _loop_svm(svms, params, feats, kernel, output) params['linadd_enabled']=True _loop_svm(svms, params, feats, kernel, output) params['batch_enabled']=True _loop_svm(svms, params, feats, kernel, output) kparams={ 'name': 'WeightedDegreePositionString', 'args': {'key': ('degree',), 'val': (20,)}, 'data': dataop.get_dna(), 'feature_class': 'string', 'feature_type': 'Char' } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=WeightedDegreePositionStringKernel(10, *kparams['args']['val']) output=fileop.get_output(category.KERNEL, kparams) del params['linadd_enabled'] del params['batch_enabled'] _loop_svm(svms, params, feats, kernel, output) params['linadd_enabled']=True _loop_svm(svms, params, feats, kernel, output) params['batch_enabled']=True _loop_svm(svms, params, feats, kernel, output)