예제 #1
0
def _run_lda ():
	"""Run Linear Discriminant Analysis classifier."""

	params={
		'name': 'LDA',
		'type': 'lda',
		'gamma': 0.1,
		'num_threads': 1,
		'data': dataop.get_clouds(2),
		'feature_class': 'simple',
		'feature_type': 'Real',
		'label_type': 'twoclass',
		'accuracy': 1e-7
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	params['labels'], labels=dataop.get_labels(
		feats['train'].get_num_vectors(), params['label_type'])

	lda=classifier.LDA(params['gamma'], feats['train'], labels)
	lda.parallel.set_num_threads(params['num_threads'])
	lda.train()

	lda.set_features(feats['test'])
	params['classified']=lda.classify().get_labels()

	output=fileop.get_output(category.CLASSIFIER, params)
	fileop.write(category.CLASSIFIER, output)
예제 #2
0
def _run_perceptron ():
	"""Run Perceptron classifier."""

	params={
		'name': 'Perceptron',
		'type': 'perceptron',
		'num_threads': 1,
		'learn_rate': .1,
		'max_iter': 1000,
		'data': dataop.get_clouds(2),
		'feature_class': 'simple',
		'feature_type': 'Real',
		'label_type': 'twoclass',
		'accuracy': 1e-7
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	num_vec=feats['train'].get_num_vectors()
	params['labels'], labels=dataop.get_labels(num_vec, params['label_type'])

	perceptron=classifier.Perceptron(feats['train'], labels)
	perceptron.parallel.set_num_threads(params['num_threads'])
	perceptron.set_learn_rate(params['learn_rate'])
	perceptron.set_max_iter(params['max_iter'])
	perceptron.train()

	params['bias']=perceptron.get_bias()
	perceptron.set_features(feats['test'])
	params['classified']=perceptron.classify().get_labels()

	output=fileop.get_output(category.CLASSIFIER, params)
	fileop.write(category.CLASSIFIER, output)
예제 #3
0
def _run (name, first_arg):
	"""
	Run generator for a specific clustering method.

	@param name Name of the clustering method to run.
	@param first_arg First argument to the clustering's constructor; so far, only this distinguishes the instantion of the different methods.
	"""

	# put some constantness into randomness
	Math_init_random(dataop.INIT_RANDOM)

	num_clouds=3
	params={
		'name': 'EuclidianDistance',
		'data': dataop.get_clouds(num_clouds, 5),
		'feature_class': 'simple',
		'feature_type': 'Real'
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	dfun=eval(params['name'])
	distance=dfun(feats['train'], feats['train'])
	output=fileop.get_output(category.DISTANCE, params)

	params={
		'name': name,
		'accuracy': 1e-8,
		first_arg: num_clouds
	}
	fun=eval('clustering.'+name)
	clustering=fun(params[first_arg], distance)
	clustering.train()

	distance.init(feats['train'], feats['test'])
	if name=='KMeans':
		params['radi']=clustering.get_radiuses()
		params['centers']=clustering.get_cluster_centers()
	elif name=='Hierarchical':
		params['merge_distance']=clustering.get_merge_distances()
		params['pairs']=clustering.get_cluster_pairs()

	output.update(fileop.get_output(category.CLUSTERING, params))
	fileop.write(category.CLUSTERING, output)
예제 #4
0
def _run_svm_linear ():
	"""Run all SVMs based on (Sparse) Linear Classifiers."""

	params={
		'type': 'linear',
		'bias_enabled': False,
		'data': dataop.get_clouds(2),
		'feature_class': 'simple',
		'feature_type': 'Real',
		'label_type': 'twoclass'
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'],
		params['data'], sparse=True)

	svms=('LibLinear', 'SVMLin', 'SVMSGD')
	params['bias_enabled']=True
	_loop_svm(svms, params, feats)

	# SubGradientSVM needs max_train_time to terminate
	svms=('SubGradientSVM',)
	params['bias_enabled']=False
	params['max_train_time']=.5 # up to 2. does not improve test results :(
	_loop_svm(svms, params, feats)

	svms=('SVMOcas',)
	_loop_svm(svms, params, feats)

	params={
		'type': 'linear',
		'bias_enabled': False,
		'label_type': 'twoclass',
		'feature_class': 'wd',
		'feature_type': 'Byte',
		'data': dataop.get_dna(),
		'alphabet': 'RAWDNA',
		'order': 1
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'],
		params['data'], params['order'])
	_loop_svm(svms, params, feats)
예제 #5
0
def _run(name, first_arg):
    """
	Run generator for a specific clustering method.

	@param name Name of the clustering method to run.
	@param first_arg First argument to the clustering's constructor; so far, only this distinguishes the instantion of the different methods.
	"""

    # put some constantness into randomness
    Math_init_random(dataop.INIT_RANDOM)

    num_clouds = 3
    params = {
        'name': 'EuclidianDistance',
        'data': dataop.get_clouds(num_clouds, 5),
        'feature_class': 'simple',
        'feature_type': 'Real'
    }
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])
    dfun = eval(params['name'])
    distance = dfun(feats['train'], feats['train'])
    output = fileop.get_output(category.DISTANCE, params)

    params = {'name': name, 'accuracy': 1e-8, first_arg: num_clouds}
    fun = eval('clustering.' + name)
    clustering = fun(params[first_arg], distance)
    clustering.train()

    distance.init(feats['train'], feats['test'])
    if name == 'KMeans':
        params['radi'] = clustering.get_radiuses()
        params['centers'] = clustering.get_cluster_centers()
    elif name == 'Hierarchical':
        params['merge_distance'] = clustering.get_merge_distances()
        params['pairs'] = clustering.get_cluster_pairs()

    output.update(fileop.get_output(category.CLUSTERING, params))
    fileop.write(category.CLUSTERING, output)
예제 #6
0
def _run_knn ():
	"""Run K-Nearest-Neighbour classifier.
	"""

	params={
		'name': 'EuclidianDistance',
		'data': dataop.get_clouds(2),
		'feature_class': 'simple',
		'feature_type': 'Real'
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	dfun=eval(params['name'])
	distance=dfun(feats['train'], feats['train'])
	output=fileop.get_output(category.DISTANCE, params)

	params={
		'name': 'KNN',
		'type': 'knn',
		'num_threads': 1,
		'k': 3,
		'label_type': 'twoclass',
		'accuracy': 1e-8
	}
	params['labels'], labels=dataop.get_labels(
		feats['train'].get_num_vectors(), params['label_type'])

	knn=classifier.KNN(params['k'], distance, labels)
	knn.parallel.set_num_threads(params['num_threads'])
	knn.train()

	distance.init(feats['train'], feats['test'])
	params['classified']=knn.classify().get_labels()

	output.update(fileop.get_output(category.CLASSIFIER, params))
	fileop.write(category.CLASSIFIER, output)
예제 #7
0
def _run_svm_kernel ():
	"""Run all kernel-based SVMs."""

	kparams={
		'name': 'Gaussian',
		'args': {'key': ('width',), 'val': (1.5,)},
		'feature_class': 'simple',
		'feature_type': 'Real',
		'data': dataop.get_clouds(2)
	}
	feats=featop.get_features(
		kparams['feature_class'], kparams['feature_type'], kparams['data'])
	kernel=GaussianKernel(10, *kparams['args']['val'])
	output=fileop.get_output(category.KERNEL, kparams)

	svms=('SVMLight', 'LibSVM', 'GPBTSVM', 'MPDSVM')
	params={
		'type': 'kernel',
		'label_type': 'twoclass'
	}
	_loop_svm(svms, params, feats, kernel, output)

	svms=('LibSVMOneClass',)
	params['label_type']=None
	_loop_svm(svms, params, feats, kernel, output)

	svms=('LibSVMMultiClass', 'GMNPSVM')
	params['label_type']='series'
	kparams['data']=dataop.get_clouds(3)
	feats=featop.get_features(
		kparams['feature_class'], kparams['feature_type'], kparams['data'])
	output=fileop.get_output(category.KERNEL, kparams)
	_loop_svm(svms, params, feats, kernel, output)

	svms=('SVMLight', 'GPBTSVM')
	params['label_type']='twoclass'
	kparams={
		'name': 'Linear',
		'feature_class': 'simple',
		'feature_type': 'Real',
		'data': dataop.get_clouds(2),
		'normalizer': AvgDiagKernelNormalizer()
	}
	feats=featop.get_features(
		kparams['feature_class'], kparams['feature_type'], kparams['data'])
	kernel=LinearKernel()
	kernel.set_normalizer(kparams['normalizer'])
	output=fileop.get_output(category.KERNEL, kparams)
	_loop_svm(svms, params, feats, kernel, output)

	kparams={
		'name': 'CommWordString',
		'args': {'key': ('use_sign',), 'val': (False,)},
		'data': dataop.get_dna(),
		'feature_class': 'string_complex',
		'feature_type': 'Word'
	}
	feats=featop.get_features(
		kparams['feature_class'], kparams['feature_type'], kparams['data'])
	kernel=CommWordStringKernel(10, *kparams['args']['val'])
	output=fileop.get_output(category.KERNEL, kparams)
	_loop_svm(svms, params, feats, kernel, output)

	kparams={
		'name': 'CommUlongString',
		'args': {'key': ('use_sign',), 'val': (False,)},
		'data': dataop.get_dna(),
		'feature_class': 'string_complex',
		'feature_type': 'Ulong'
	}
	feats=featop.get_features(
		kparams['feature_class'], kparams['feature_type'], kparams['data'])
	kernel=CommUlongStringKernel(10, *kparams['args']['val'])
	output=fileop.get_output(category.KERNEL, kparams)
	_loop_svm(svms, params, feats, kernel, output)

	kparams={
		'name': 'WeightedDegreeString',
		'args': {'key': ('degree',), 'val': (3,)},
		'data': dataop.get_dna(),
		'feature_class': 'string',
		'feature_type': 'Char'
	}
	feats=featop.get_features(
		kparams['feature_class'], kparams['feature_type'], kparams['data'])
	kernel=WeightedDegreeStringKernel(*kparams['args']['val'])
	output=fileop.get_output(category.KERNEL, kparams)
	_loop_svm(svms, params, feats, kernel, output)
	params['linadd_enabled']=True
	_loop_svm(svms, params, feats, kernel, output)
	params['batch_enabled']=True
	_loop_svm(svms, params, feats, kernel, output)

	kparams={
		'name': 'WeightedDegreePositionString',
		'args': {'key': ('degree',), 'val': (20,)},
		'data': dataop.get_dna(),
		'feature_class': 'string',
		'feature_type': 'Char'
	}
	feats=featop.get_features(
		kparams['feature_class'], kparams['feature_type'], kparams['data'])
	kernel=WeightedDegreePositionStringKernel(10, *kparams['args']['val'])
	output=fileop.get_output(category.KERNEL, kparams)
	del params['linadd_enabled']
	del params['batch_enabled']
	_loop_svm(svms, params, feats, kernel, output)
	params['linadd_enabled']=True
	_loop_svm(svms, params, feats, kernel, output)
	params['batch_enabled']=True
	_loop_svm(svms, params, feats, kernel, output)