예제 #1
0
파일: kernel.py 프로젝트: AsherBond/shogun
def _run_auc ():
	"""Run AUC kernel."""

	# handle subkernel
	params={
		'name': 'Gaussian',
		'data': dataop.get_rand(),
		'feature_class': 'simple',
		'feature_type': 'Real',
		'args': {'key': ('size', 'width'), 'val': (10, 1.7)}
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	subk=kernel.GaussianKernel(*params['args']['val'])
	subk.init(feats['train'], feats['test'])
	output=fileop.get_output(category.KERNEL, params, 'subkernel0_')

	# handle AUC
	params={
		'name': 'AUC',
		'data': dataop.get_rand(numpy.ushort, num_feats=2,
			max_train=dataop.NUM_VEC_TRAIN, max_test=dataop.NUM_VEC_TEST),
		'feature_class': 'simple',
		'feature_type': 'Word',
		'accuracy': 1e-8,
		'args': {'key': ('size', 'subkernel'), 'val': (10, subk)}
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	_compute(feats, params, output)
예제 #2
0
def _run_feats_string_complex():
    """Run kernel with complex StringFeatures."""

    params = {"data": dataop.get_dna(), "feature_class": "string_complex"}

    params["feature_type"] = "Word"
    wordfeats = featop.get_features(params["feature_class"], params["feature_type"], params["data"])

    params["name"] = "CommWordString"
    params["accuracy"] = 1e-9
    params["args"] = {"key": ("size", "use_sign"), "val": (10, False)}
    _compute(wordfeats, params)
    params["name"] = "WeightedCommWordString"
    _compute(wordfeats, params)

    params["name"] = "PolyMatchWordString"
    params["accuracy"] = 1e-10
    params["args"] = {"key": ("size", "degree", "inhomogene"), "val": (10, 3, True)}
    _compute(wordfeats, params)
    params["args"]["val"] = (10, 3, False)
    _compute(wordfeats, params)

    params["name"] = "MatchWordString"
    params["args"] = {"key": ("size", "degree"), "val": (10, 3)}
    _compute(wordfeats, params)

    params["feature_type"] = "Ulong"
    params["accuracy"] = 1e-9
    ulongfeats = featop.get_features(params["feature_class"], params["feature_type"], params["data"])
    params["name"] = "CommUlongString"
    params["args"] = {"key": ("size", "use_sign"), "val": (10, False)}
    _compute(ulongfeats, params)
예제 #3
0
def _run_auc():
    """Run AUC kernel."""

    # handle subkernel
    params = {
        "name": "Gaussian",
        "data": dataop.get_rand(),
        "feature_class": "simple",
        "feature_type": "Real",
        "args": {"key": ("size", "width"), "val": (10, 1.7)},
    }
    feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"])
    subk = kernel.GaussianKernel(*params["args"]["val"])
    subk.init(feats["train"], feats["test"])
    output = fileop.get_output(category.KERNEL, params, "subkernel0_")

    # handle AUC
    params = {
        "name": "AUC",
        "data": dataop.get_rand(
            numpy.ushort, num_feats=2, max_train=dataop.NUM_VEC_TRAIN, max_test=dataop.NUM_VEC_TEST
        ),
        "feature_class": "simple",
        "feature_type": "Word",
        "accuracy": 1e-8,
        "args": {"key": ("size", "subkernel"), "val": (10, subk)},
    }
    feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"])
    _compute(feats, params, output)
예제 #4
0
파일: kernel.py 프로젝트: manantomar/test
def _run_auc ():
	"""Run AUC kernel."""

	# handle subkernel
	params={
		'name': 'Gaussian',
		'data': dataop.get_rand(),
		'feature_class': 'simple',
		'feature_type': 'Real',
		'args': {'key': ('size', 'width'), 'val': (10, 1.7)}
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	subk=kernel.GaussianKernel(*params['args']['val'])
	subk.init(feats['train'], feats['test'])
	output=fileop.get_output(category.KERNEL, params, 'subkernel0_')

	# handle AUC
	params={
		'name': 'AUC',
		'data': dataop.get_rand(numpy.ushort, num_feats=2,
			max_train=dataop.NUM_VEC_TRAIN, max_test=dataop.NUM_VEC_TEST),
		'feature_class': 'simple',
		'feature_type': 'Word',
		'accuracy': 1e-8,
		'args': {'key': ('size', 'subkernel'), 'val': (10, subk)}
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	_compute(feats, params, output)
예제 #5
0
def _run_feats_real():
    """Run kernel with RealFeatures."""

    params = {"data": dataop.get_rand(), "accuracy": 1e-8, "feature_class": "simple", "feature_type": "Real"}
    feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"])
    sparsefeats = featop.get_features(params["feature_class"], params["feature_type"], params["data"], sparse=True)

    params["name"] = "Gaussian"
    params["args"] = {"key": ("size", "width"), "val": (10, 1.3)}
    _compute(feats, params)

    params["name"] = "GaussianShift"
    params["args"] = {"key": ("size", "width", "max_shift", "shift_step"), "val": (10, 1.3, 2, 1)}
    _compute(feats, params)

    params["name"] = "Gaussian"
    params["args"] = {"key": ("size", "width"), "val": (10, 1.7)}
    _compute(sparsefeats, params)

    params["accuracy"] = 0
    params["name"] = "Const"
    params["args"] = {"key": ("c",), "val": (23.0,)}
    _compute(feats, params)

    params["name"] = "Diag"
    params["args"] = {"key": ("size", "diag"), "val": (10, 23.0)}
    _compute(feats, params)

    params["accuracy"] = 1e-9
    params["name"] = "Sigmoid"
    params["args"] = {"key": ("size", "gamma", "coef0"), "val": (10, 1.1, 1.3)}
    _compute(feats, params)
    params["args"]["val"] = (10, 0.5, 0.7)
    _compute(feats, params)

    params["name"] = "Chi2"
    params["args"] = {"key": ("size", "width"), "val": (10, 1.2)}
    _compute(feats, params)

    params["accuracy"] = 1e-8
    params["name"] = "Poly"
    params["args"] = {"key": ("size", "degree", "inhomogene"), "val": (10, 3, True)}
    _compute(sparsefeats, params)
    params["args"]["val"] = (10, 3, False)
    _compute(sparsefeats, params)

    params["name"] = "Poly"
    params["normalizer"] = kernel.SqrtDiagKernelNormalizer()
    params["args"] = {"key": ("size", "degree", "inhomogene"), "val": (10, 3, True)}
    _compute(feats, params)
    params["args"]["val"] = (10, 3, False)
    _compute(feats, params)

    params["normalizer"] = kernel.AvgDiagKernelNormalizer()
    del params["args"]
    params["name"] = "Linear"
    _compute(feats, params)
    params["name"] = "Linear"
    _compute(sparsefeats, params)
예제 #6
0
def _run_real (name, args=None):
	"""Run preprocessor applied on RealFeatures.

	@param name name of the preprocessor
	@param args argument list (in a dict) for the preprocessor
	"""

	params={
		'name': 'Gaussian',
		'accuracy': 1e-8,
		'data': dataop.get_rand(),
		'feature_class': 'simple',
		'feature_type': 'Real',
		'args': {'key': ('width',), 'val': (1.2,)}
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	if args:
		feats=featop.add_preproc(name, feats, *args['val'])
	else:
		feats=featop.add_preproc(name, feats)

	output=_compute(feats, params)

	params={ 'name': name }
	if args:
		params['args']=args

	output.update(fileop.get_output(category.PREPROC, params))

	fileop.write(category.PREPROC, output)
예제 #7
0
def _run (name):
	"""Run generator for a specific distribution method.

	@param name Name of the distribtuion method
	"""

	# put some constantness into randomness
	Math_init_random(INIT_RANDOM)

	params={
		'name': name,
		'accuracy': 1e-7,
		'data':dataop.get_dna(),
		'alphabet': 'DNA',
		'feature_class': 'string_complex',
		'feature_type': 'Word'
	}
	output=fileop.get_output(category.DISTRIBUTION, params)
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])

	dfun=eval('distribution.'+name)
	dist=dfun(feats['train'])
	dist.train()

	output[PREFIX+'likelihood']=dist.get_log_likelihood_sample()
	output[PREFIX+'derivatives']=_get_derivatives(
		dist, feats['train'].get_num_vectors())

	fileop.write(category.DISTRIBUTION, output)
예제 #8
0
def _run_feats_string_complex ():
	"""Run distances with complex StringFeatures, like WordString."""

	params={
		'accuracy': 1e-7,
		'feature_class': 'string_complex',
		'feature_type': 'Word',
		'data': dataop.get_dna(num_vec_test=dataop.NUM_VEC_TRAIN+42)
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])

	params['name']='CanberraWordDistance'
	_compute(feats, params)

	params['accuracy']=1e-8
	params['name']='ManhattanWordDistance'
	_compute(feats, params)

	params['name']='HammingWordDistance'
	params['args']={'key': ('use_sign',), 'val': (False,)}
	_compute(feats, params)
	params['name']='HammingWordDistance'
	params['args']={'key': ('use_sign',), 'val': (True,)}
	_compute(feats, params)
예제 #9
0
def _run_perceptron ():
	"""Run Perceptron classifier."""

	params={
		'name': 'Perceptron',
		'type': 'perceptron',
		'num_threads': 1,
		'learn_rate': .1,
		'max_iter': 1000,
		'data': dataop.get_clouds(2),
		'feature_class': 'simple',
		'feature_type': 'Real',
		'label_type': 'twoclass',
		'accuracy': 1e-7
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	num_vec=feats['train'].get_num_vectors()
	params['labels'], labels=dataop.get_labels(num_vec, params['label_type'])

	perceptron=classifier.Perceptron(feats['train'], labels)
	perceptron.parallel.set_num_threads(params['num_threads'])
	perceptron.set_learn_rate(params['learn_rate'])
	perceptron.set_max_iter(params['max_iter'])
	perceptron.train()

	params['bias']=perceptron.get_bias()
	perceptron.set_features(feats['test'])
	params['classified']=perceptron.classify().get_labels()

	output=fileop.get_output(category.CLASSIFIER, params)
	fileop.write(category.CLASSIFIER, output)
예제 #10
0
def _run_lda ():
	"""Run Linear Discriminant Analysis classifier."""

	params={
		'name': 'LDA',
		'type': 'lda',
		'gamma': 0.1,
		'num_threads': 1,
		'data': dataop.get_clouds(2),
		'feature_class': 'simple',
		'feature_type': 'Real',
		'label_type': 'twoclass',
		'accuracy': 1e-7
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	params['labels'], labels=dataop.get_labels(
		feats['train'].get_num_vectors(), params['label_type'])

	lda=classifier.LDA(params['gamma'], feats['train'], labels)
	lda.parallel.set_num_threads(params['num_threads'])
	lda.train()

	lda.set_features(feats['test'])
	params['classified']=lda.classify().get_labels()

	output=fileop.get_output(category.CLASSIFIER, params)
	fileop.write(category.CLASSIFIER, output)
예제 #11
0
def _run_real(name, args=None):
    """Run preprocessor applied on RealFeatures.

	@param name name of the preprocessor
	@param args argument list (in a dict) for the preprocessor
	"""

    params = {
        "name": "Gaussian",
        "accuracy": 1e-8,
        "data": dataop.get_rand(),
        "feature_class": "simple",
        "feature_type": "Real",
        "args": {"key": ("width",), "val": (1.2,)},
    }
    feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"])
    if args:
        feats = featop.add_preproc(name, feats, *args["val"])
    else:
        feats = featop.add_preproc(name, feats)

    output = _compute(feats, params)

    params = {"name": name}
    if args:
        params["args"] = args

    output.update(fileop.get_output(category.PREPROC, params))

    fileop.write(category.PREPROC, output)
예제 #12
0
def _run_feats_string_complex():
    """Run distances with complex StringFeatures, like WordString."""

    params = {
        'accuracy': 1e-7,
        'feature_class': 'string_complex',
        'feature_type': 'Word',
        'data': dataop.get_dna(num_vec_test=dataop.NUM_VEC_TRAIN + 42)
    }
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])

    params['name'] = 'CanberraWordDistance'
    _compute(feats, params)

    params['accuracy'] = 1e-8
    params['name'] = 'ManhattanWordDistance'
    _compute(feats, params)

    params['name'] = 'HammingWordDistance'
    params['args'] = {'key': ('use_sign', ), 'val': (False, )}
    _compute(feats, params)
    params['name'] = 'HammingWordDistance'
    params['args'] = {'key': ('use_sign', ), 'val': (True, )}
    _compute(feats, params)
예제 #13
0
def _run_custom():
    """Run Custom kernel."""

    params = {"name": "Custom", "accuracy": 1e-7, "feature_class": "simple", "feature_type": "Real"}
    dim_square = 7
    data = dataop.get_rand(dim_square=dim_square)
    feats = featop.get_features(params["feature_class"], params["feature_type"], data)
    data = data["train"]
    symdata = data + data.T

    lowertriangle = numpy.array(
        [symdata[(x, y)] for x in xrange(symdata.shape[1]) for y in xrange(symdata.shape[0]) if y <= x]
    )
    kern = kernel.CustomKernel()
    # kern.init(feats['train'], feats['train']
    kern.set_triangle_kernel_matrix_from_triangle(lowertriangle)
    km_triangletriangle = kern.get_kernel_matrix()
    kern.set_triangle_kernel_matrix_from_full(symdata)
    km_fulltriangle = kern.get_kernel_matrix()
    kern.set_full_kernel_matrix_from_full(data)
    km_fullfull = kern.get_kernel_matrix()

    output = {
        "kernel_matrix_triangletriangle": km_triangletriangle,
        "kernel_matrix_fulltriangle": km_fulltriangle,
        "kernel_matrix_fullfull": km_fullfull,
        "kernel_symdata": numpy.matrix(symdata),
        "kernel_data": numpy.matrix(data),
        "kernel_dim_square": dim_square,
    }
    output.update(fileop.get_output(category.KERNEL, params))

    fileop.write(category.KERNEL, output)
예제 #14
0
def _run(name):
    """Run generator for a specific distribution method.

	@param name Name of the distribtuion method
	"""

    # put some constantness into randomness
    Math_init_random(INIT_RANDOM)

    params = {
        'name': name,
        'accuracy': 1e-7,
        'data': dataop.get_dna(),
        'alphabet': 'DNA',
        'feature_class': 'string_complex',
        'feature_type': 'Word'
    }
    output = fileop.get_output(category.DISTRIBUTION, params)
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])

    dfun = eval('distribution.' + name)
    dist = dfun(feats['train'])
    dist.train()

    output[PREFIX + 'likelihood'] = dist.get_log_likelihood_sample()
    output[PREFIX + 'derivatives'] = _get_derivatives(
        dist, feats['train'].get_num_vectors())

    fileop.write(category.DISTRIBUTION, output)
예제 #15
0
def _run_feats_real():
    """Run distances with RealFeatures."""

    params = {
        'accuracy': 1e-8,
        'feature_class': 'simple',
        'feature_type': 'Real',
        'data': dataop.get_rand()
    }
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])

    params['name'] = 'EuclidianDistance'
    _compute(feats, params)
    params['name'] = 'CanberraMetric'
    _compute(feats, params)
    params['name'] = 'ChebyshewMetric'
    _compute(feats, params)
    params['name'] = 'GeodesicMetric'
    _compute(feats, params)
    params['name'] = 'JensenMetric'
    _compute(feats, params)
    params['name'] = 'ManhattanMetric'
    _compute(feats, params)
    params['name'] = 'BrayCurtisDistance'
    _compute(feats, params)
    params['name'] = 'ChiSquareDistance'
    _compute(feats, params)
    params['name'] = 'CosineDistance'
    _compute(feats, params)
    params['name'] = 'TanimotoDistance'
    _compute(feats, params)
    params['name'] = 'ManhattanMetric'
    _compute(feats, params)
    params['name'] = 'MinkowskiMetric'
    params['args'] = {'key': ('k', ), 'val': (1.3, )}
    _compute(feats, params)

    params['name'] = 'SparseEuclidianDistance'
    params['accuracy'] = 1e-7
    del params['args']
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'],
                                params['data'],
                                sparse=True)
    _compute(feats, params)
예제 #16
0
def _run_feats_real ():
	"""Run distances with RealFeatures."""

	params={
		'accuracy': 1e-8,
		'feature_class': 'simple',
		'feature_type': 'Real',
		'data': dataop.get_rand()
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])

	params['name']='EuclidianDistance'
	_compute(feats, params)
	params['name']='CanberraMetric'
	_compute(feats, params)
	params['name']='ChebyshewMetric'
	_compute(feats, params)
	params['name']='GeodesicMetric'
	_compute(feats, params)
	params['name']='JensenMetric'
	_compute(feats, params)
	params['name']='ManhattanMetric'
	_compute(feats, params)
	params['name']='BrayCurtisDistance'
	_compute(feats, params)
	params['name']='ChiSquareDistance'
	_compute(feats, params)
	params['name']='CosineDistance'
	_compute(feats, params)
	params['name']='TanimotoDistance'
	_compute(feats, params)
	params['name']='ManhattanMetric'
	_compute(feats, params)
	params['name']='MinkowskiMetric'
	params['args']={'key': ('k',), 'val': (1.3,)}
	_compute(feats, params)

	params['name']='SparseEuclidianDistance'
	params['accuracy']=1e-7
	del params['args']
	feats=featop.get_features(
		params['feature_class'], params['feature_type'],
		params['data'], sparse=True)
	_compute(feats, params)
예제 #17
0
파일: kernel.py 프로젝트: manantomar/test
def _run_combined ():
	"""Run Combined kernel."""

	kern=kernel.CombinedKernel()
	feats={'train': CombinedFeatures(), 'test': CombinedFeatures()}
	output={}
	params={
		'name': 'Combined',
		'accuracy': 1e-7
	}
	subkdata=[
		{
			'name': 'FixedDegreeString',
			'feature_class': 'string',
			'feature_type': 'Char',
			'args': {'key': ('size', 'degree'), 'val': (10, 3)}
		},
		{
			'name': 'PolyMatchString',
			'feature_class': 'string',
			'feature_type': 'Char',
			'args': {
				'key': ('size', 'degree', 'inhomogene'),
				'val': (10, 3, True)
			}
		},
		{
			'name': 'LocalAlignmentString',
			'feature_class': 'string',
			'feature_type': 'Char',
			'args': {'key': ('size',), 'val': (10,)}
		}
	]

	i=0
	for sd in subkdata:
		kfun=eval('kernel.'+sd['name']+'Kernel')
		subk=kfun(*sd['args']['val'])
		sd['data']=dataop.get_dna()
		subkfeats=featop.get_features(
			sd['feature_class'], sd['feature_type'], sd['data'])
		output.update(
			fileop.get_output(category.KERNEL, sd, 'subkernel'+str(i)+'_'))

		kern.append_kernel(subk)
		feats['train'].append_feature_obj(subkfeats['train'])
		feats['test'].append_feature_obj(subkfeats['test'])

		i+=1

	output.update(fileop.get_output(category.KERNEL, params))
	kern.init(feats['train'], feats['train'])
	output['kernel_matrix_train']=kern.get_kernel_matrix()
	kern.init(feats['train'], feats['test'])
	output['kernel_matrix_test']=kern.get_kernel_matrix()

	fileop.write(category.KERNEL, output)
예제 #18
0
파일: kernel.py 프로젝트: AsherBond/shogun
def _run_combined ():
	"""Run Combined kernel."""

	kern=kernel.CombinedKernel()
	feats={'train': CombinedFeatures(), 'test': CombinedFeatures()}
	output={}
	params={
		'name': 'Combined',
		'accuracy': 1e-7
	}
	subkdata=[
		{
			'name': 'FixedDegreeString',
			'feature_class': 'string',
			'feature_type': 'Char',
			'args': {'key': ('size', 'degree'), 'val': (10, 3)}
		},
		{
			'name': 'PolyMatchString',
			'feature_class': 'string',
			'feature_type': 'Char',
			'args': {
				'key': ('size', 'degree', 'inhomogene'),
				'val': (10, 3, True)
			}
		},
		{
			'name': 'LocalAlignmentString',
			'feature_class': 'string',
			'feature_type': 'Char',
			'args': {'key': ('size',), 'val': (10,)}
		}
	]

	i=0
	for sd in subkdata:
		kfun=eval('kernel.'+sd['name']+'Kernel')
		subk=kfun(*sd['args']['val'])
		sd['data']=dataop.get_dna()
		subkfeats=featop.get_features(
			sd['feature_class'], sd['feature_type'], sd['data'])
		output.update(
			fileop.get_output(category.KERNEL, sd, 'subkernel'+str(i)+'_'))

		kern.append_kernel(subk)
		feats['train'].append_feature_obj(subkfeats['train'])
		feats['test'].append_feature_obj(subkfeats['test'])

		i+=1

	output.update(fileop.get_output(category.KERNEL, params))
	kern.init(feats['train'], feats['train'])
	output['kernel_matrix_train']=kern.get_kernel_matrix()
	kern.init(feats['train'], feats['test'])
	output['kernel_matrix_test']=kern.get_kernel_matrix()

	fileop.write(category.KERNEL, output)
예제 #19
0
def _run_pie():
    """Run kernel with PluginEstimate."""

    params = {"data": dataop.get_dna(), "accuracy": 1e-6, "feature_class": "string_complex", "feature_type": "Word"}
    feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"])

    params["name"] = "HistogramWordString"
    _compute_pie(feats, params)
    params["name"] = "SalzbergWordString"
    _compute_pie(feats, params)
예제 #20
0
def _run_svm_linear ():
	"""Run all SVMs based on (Sparse) Linear Classifiers."""

	params={
		'type': 'linear',
		'bias_enabled': False,
		'data': dataop.get_clouds(2),
		'feature_class': 'simple',
		'feature_type': 'Real',
		'label_type': 'twoclass'
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'],
		params['data'], sparse=True)

	svms=('LibLinear', 'SVMLin', 'SVMSGD')
	params['bias_enabled']=True
	_loop_svm(svms, params, feats)

	# SubGradientSVM needs max_train_time to terminate
	svms=('SubGradientSVM',)
	params['bias_enabled']=False
	params['max_train_time']=.5 # up to 2. does not improve test results :(
	_loop_svm(svms, params, feats)

	svms=('SVMOcas',)
	_loop_svm(svms, params, feats)

	params={
		'type': 'linear',
		'bias_enabled': False,
		'label_type': 'twoclass',
		'feature_class': 'wd',
		'feature_type': 'Byte',
		'data': dataop.get_dna(),
		'alphabet': 'RAWDNA',
		'order': 1
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'],
		params['data'], params['order'])
	_loop_svm(svms, params, feats)
예제 #21
0
def _run_top_fisher():
    """Run Linear Kernel with {Top,Fisher}Features."""

    # put some constantness into randomness
    Math_init_random(dataop.INIT_RANDOM)

    data = dataop.get_cubes(4, 8)
    prefix = "topfk_"
    params = {
        prefix + "N": 3,
        prefix + "M": 6,
        prefix + "pseudo": 1e-1,
        prefix + "order": 1,
        prefix + "gap": 0,
        prefix + "reverse": False,
        prefix + "alphabet": "CUBE",
        prefix + "feature_class": "string_complex",
        prefix + "feature_type": "Word",
        prefix + "data_train": numpy.matrix(data["train"]),
        prefix + "data_test": numpy.matrix(data["test"]),
    }

    wordfeats = featop.get_features(
        params[prefix + "feature_class"],
        params[prefix + "feature_type"],
        data,
        eval(params[prefix + "alphabet"]),
        params[prefix + "order"],
        params[prefix + "gap"],
        params[prefix + "reverse"],
    )
    pos_train = HMM(wordfeats["train"], params[prefix + "N"], params[prefix + "M"], params[prefix + "pseudo"])
    pos_train.train()
    pos_train.baum_welch_viterbi_train(BW_NORMAL)
    neg_train = HMM(wordfeats["train"], params[prefix + "N"], params[prefix + "M"], params[prefix + "pseudo"])
    neg_train.train()
    neg_train.baum_welch_viterbi_train(BW_NORMAL)
    pos_test = HMM(pos_train)
    pos_test.set_observations(wordfeats["test"])
    neg_test = HMM(neg_train)
    neg_test.set_observations(wordfeats["test"])
    feats = {}

    feats["train"] = TOPFeatures(10, pos_train, neg_train, False, False)
    feats["test"] = TOPFeatures(10, pos_test, neg_test, False, False)
    params[prefix + "name"] = "TOP"
    _compute_top_fisher(feats, params)

    feats["train"] = FKFeatures(10, pos_train, neg_train)
    feats["train"].set_opt_a(-1)  # estimate prior
    feats["test"] = FKFeatures(10, pos_test, neg_test)
    feats["test"].set_a(feats["train"].get_a())  # use prior from training data
    params[prefix + "name"] = "FK"
    _compute_top_fisher(feats, params)
예제 #22
0
def _run_feats_string():
    """Run kernel with StringFeatures."""

    params = {"accuracy": 1e-9, "data": dataop.get_dna(), "feature_class": "string", "feature_type": "Char"}
    feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"])

    params["name"] = "FixedDegreeString"
    params["args"] = {"key": ("size", "degree"), "val": (10, 3)}
    _compute(feats, params)

    params["accuracy"] = 0
    params["name"] = "LocalAlignmentString"
    params["args"] = {"key": ("size",), "val": (10,)}
    _compute(feats, params)

    params["accuracy"] = 1e-10
    params["name"] = "PolyMatchString"
    params["args"] = {"key": ("size", "degree", "inhomogene"), "val": (10, 3, True)}
    _compute(feats, params)
    params["args"]["val"] = (10, 3, False)
    _compute(feats, params)

    params["accuracy"] = 1e-15
    params["name"] = "SimpleLocalityImprovedString"
    params["args"] = {"key": ("size", "length", "inner_degree", "outer_degree"), "val": (10, 5, 7, 5)}
    _compute(feats, params)
    # buggy:
    # params['name']='LocalityImprovedString'
    # _compute(feats, params)

    params["name"] = "WeightedDegreeString"
    params["accuracy"] = 1e-9
    params["args"] = {"key": ("degree",), "val": (20,)}
    _compute(feats, params)
    params["args"] = {"key": ("degree",), "val": (1,)}
    _compute(feats, params)

    params["name"] = "WeightedDegreePositionString"
    params["args"] = {"key": ("size", "degree"), "val": (10, 20)}
    _compute(feats, params)
    params["args"] = {"key": ("size", "degree"), "val": (10, 1)}
    _compute(feats, params)

    params["name"] = "OligoString"
    params["args"] = {"key": ("size", "k", "width"), "val": (10, 3, 1.2)}
    _compute(feats, params)
    params["args"] = {"key": ("size", "k", "width"), "val": (10, 4, 1.7)}
    _compute(feats, params)

    params["name"] = "LinearString"
    params["accuracy"] = 1e-8
    params["normalizer"] = kernel.AvgDiagKernelNormalizer()
    del params["args"]
    _compute(feats, params)
예제 #23
0
def _run_top_fisher():
    """Run Linear Kernel with {Top,Fisher}Features."""

    # put some constantness into randomness
    Math_init_random(dataop.INIT_RANDOM)

    data = dataop.get_cubes(4, 8)
    prefix = 'topfk_'
    params = {
        prefix + 'N': 3,
        prefix + 'M': 6,
        prefix + 'pseudo': 1e-1,
        prefix + 'order': 1,
        prefix + 'gap': 0,
        prefix + 'reverse': False,
        prefix + 'alphabet': 'CUBE',
        prefix + 'feature_class': 'string_complex',
        prefix + 'feature_type': 'Word',
        prefix + 'data_train': numpy.matrix(data['train']),
        prefix + 'data_test': numpy.matrix(data['test'])
    }

    wordfeats = featop.get_features(params[prefix + 'feature_class'],
                                    params[prefix + 'feature_type'], data,
                                    eval(params[prefix + 'alphabet']),
                                    params[prefix + 'order'],
                                    params[prefix + 'gap'],
                                    params[prefix + 'reverse'])
    pos_train = HMM(wordfeats['train'], params[prefix + 'N'],
                    params[prefix + 'M'], params[prefix + 'pseudo'])
    pos_train.train()
    pos_train.baum_welch_viterbi_train(BW_NORMAL)
    neg_train = HMM(wordfeats['train'], params[prefix + 'N'],
                    params[prefix + 'M'], params[prefix + 'pseudo'])
    neg_train.train()
    neg_train.baum_welch_viterbi_train(BW_NORMAL)
    pos_test = HMM(pos_train)
    pos_test.set_observations(wordfeats['test'])
    neg_test = HMM(neg_train)
    neg_test.set_observations(wordfeats['test'])
    feats = {}

    feats['train'] = TOPFeatures(10, pos_train, neg_train, False, False)
    feats['test'] = TOPFeatures(10, pos_test, neg_test, False, False)
    params[prefix + 'name'] = 'TOP'
    _compute_top_fisher(feats, params)

    feats['train'] = FKFeatures(10, pos_train, neg_train)
    feats['train'].set_opt_a(-1)  #estimate prior
    feats['test'] = FKFeatures(10, pos_test, neg_test)
    feats['test'].set_a(feats['train'].get_a())  #use prior from training data
    params[prefix + 'name'] = 'FK'
    _compute_top_fisher(feats, params)
예제 #24
0
파일: kernel.py 프로젝트: manantomar/test
def _run_feats_string_complex ():
	"""Run kernel with complex StringFeatures."""

	params={
		'data': dataop.get_dna(),
		'feature_class': 'string_complex'
	}

	params['feature_type']='Word'
	wordfeats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])

	params['name']='CommWordString'
	params['accuracy']=1e-9
	params['args']={'key': ('size', 'use_sign'), 'val': (10, False)}
	_compute(wordfeats, params)
	params['name']='WeightedCommWordString'
	_compute(wordfeats, params)

	params['name']='PolyMatchWordString'
	params['accuracy']=1e-10
	params['args']={
		'key': ('size', 'degree', 'inhomogene'),
		'val': (10, 3, True)
	}
	_compute(wordfeats, params)
	params['args']['val']=(10, 3, False)
	_compute(wordfeats, params)

	params['name']='MatchWordString'
	params['args']={'key': ('size', 'degree'), 'val': (10, 3)}
	_compute(wordfeats, params)

	params['feature_type']='Ulong'
	params['accuracy']=1e-9
	ulongfeats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	params['name']='CommUlongString'
	params['args']={'key': ('size', 'use_sign'), 'val': (10, False)}
	_compute(ulongfeats, params)
예제 #25
0
파일: kernel.py 프로젝트: AsherBond/shogun
def _run_feats_string_complex ():
	"""Run kernel with complex StringFeatures."""

	params={
		'data': dataop.get_dna(),
		'feature_class': 'string_complex'
	}

	params['feature_type']='Word'
	wordfeats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])

	params['name']='CommWordString'
	params['accuracy']=1e-9
	params['args']={'key': ('size', 'use_sign'), 'val': (10, False)}
	_compute(wordfeats, params)
	params['name']='WeightedCommWordString'
	_compute(wordfeats, params)

	params['name']='PolyMatchWordString'
	params['accuracy']=1e-10
	params['args']={
		'key': ('size', 'degree', 'inhomogene'),
		'val': (10, 3, True)
	}
	_compute(wordfeats, params)
	params['args']['val']=(10, 3, False)
	_compute(wordfeats, params)

	params['name']='MatchWordString'
	params['args']={'key': ('size', 'degree'), 'val': (10, 3)}
	_compute(wordfeats, params)

	params['feature_type']='Ulong'
	params['accuracy']=1e-9
	ulongfeats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	params['name']='CommUlongString'
	params['args']={'key': ('size', 'use_sign'), 'val': (10, False)}
	_compute(ulongfeats, params)
예제 #26
0
파일: kernel.py 프로젝트: AsherBond/shogun
def _run_top_fisher ():
	"""Run Linear Kernel with {Top,Fisher}Features."""

	# put some constantness into randomness
	Math_init_random(dataop.INIT_RANDOM)

	data=dataop.get_cubes(4, 8)
	prefix='topfk_'
	params={
		prefix+'N': 3,
		prefix+'M': 6,
		prefix+'pseudo': 1e-1,
		prefix+'order': 1,
		prefix+'gap': 0,
		prefix+'reverse': False,
		prefix+'alphabet': 'CUBE',
		prefix+'feature_class': 'string_complex',
		prefix+'feature_type': 'Word',
		prefix+'data_train': numpy.matrix(data['train']),
		prefix+'data_test': numpy.matrix(data['test'])
	}

	wordfeats=featop.get_features(
		params[prefix+'feature_class'], params[prefix+'feature_type'],
		data, eval(params[prefix+'alphabet']),
		params[prefix+'order'], params[prefix+'gap'], params[prefix+'reverse'])
	pos_train=HMM(wordfeats['train'],
		params[prefix+'N'], params[prefix+'M'], params[prefix+'pseudo'])
	pos_train.train()
	pos_train.baum_welch_viterbi_train(BW_NORMAL)
	neg_train=HMM(wordfeats['train'],
		params[prefix+'N'], params[prefix+'M'], params[prefix+'pseudo'])
	neg_train.train()
	neg_train.baum_welch_viterbi_train(BW_NORMAL)
	pos_test=HMM(pos_train)
	pos_test.set_observations(wordfeats['test'])
	neg_test=HMM(neg_train)
	neg_test.set_observations(wordfeats['test'])
	feats={}

	feats['train']=TOPFeatures(10, pos_train, neg_train, False, False)
	feats['test']=TOPFeatures(10, pos_test, neg_test, False, False)
	params[prefix+'name']='TOP'
	_compute_top_fisher(feats, params)

	feats['train']=FKFeatures(10, pos_train, neg_train)
	feats['train'].set_opt_a(-1) #estimate prior
	feats['test']=FKFeatures(10, pos_test, neg_test)
	feats['test'].set_a(feats['train'].get_a()) #use prior from training data
	params[prefix+'name']='FK'
	_compute_top_fisher(feats, params)
예제 #27
0
def _run_feats_byte():
    """Run kernel with ByteFeatures."""

    params = {
        "name": "Linear",
        "accuracy": 1e-8,
        "feature_class": "simple",
        "feature_type": "Byte",
        "data": dataop.get_rand(dattype=numpy.ubyte),
        "normalizer": kernel.AvgDiagKernelNormalizer(),
    }
    feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"], RAWBYTE)

    _compute(feats, params)
예제 #28
0
def _run_distance():
    """Run distance kernel."""

    params = {
        "name": "Distance",
        "accuracy": 1e-9,
        "feature_class": "simple",
        "feature_type": "Real",
        "data": dataop.get_rand(),
        "args": {"key": ("size", "width", "distance"), "val": (10, 1.7, CanberraMetric())},
    }
    feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"])

    _compute(feats, params)
예제 #29
0
def _run_combined():
    """Run Combined kernel."""

    kern = kernel.CombinedKernel()
    feats = {"train": CombinedFeatures(), "test": CombinedFeatures()}
    output = {}
    params = {"name": "Combined", "accuracy": 1e-7}
    subkdata = [
        {
            "name": "FixedDegreeString",
            "feature_class": "string",
            "feature_type": "Char",
            "args": {"key": ("size", "degree"), "val": (10, 3)},
        },
        {
            "name": "PolyMatchString",
            "feature_class": "string",
            "feature_type": "Char",
            "args": {"key": ("size", "degree", "inhomogene"), "val": (10, 3, True)},
        },
        {
            "name": "LocalAlignmentString",
            "feature_class": "string",
            "feature_type": "Char",
            "args": {"key": ("size",), "val": (10,)},
        },
    ]

    i = 0
    for sd in subkdata:
        kfun = eval("kernel." + sd["name"] + "Kernel")
        subk = kfun(*sd["args"]["val"])
        sd["data"] = dataop.get_dna()
        subkfeats = featop.get_features(sd["feature_class"], sd["feature_type"], sd["data"])
        output.update(fileop.get_output(category.KERNEL, sd, "subkernel" + str(i) + "_"))

        kern.append_kernel(subk)
        feats["train"].append_feature_obj(subkfeats["train"])
        feats["test"].append_feature_obj(subkfeats["test"])

        i += 1

    output.update(fileop.get_output(category.KERNEL, params))
    kern.init(feats["train"], feats["train"])
    output["kernel_matrix_train"] = kern.get_kernel_matrix()
    kern.init(feats["train"], feats["test"])
    output["kernel_matrix_test"] = kern.get_kernel_matrix()

    fileop.write(category.KERNEL, output)
예제 #30
0
파일: kernel.py 프로젝트: manantomar/test
def _run_feats_byte ():
	"""Run kernel with ByteFeatures."""

	params={
		'name': 'Linear',
		'accuracy': 1e-8,
		'feature_class': 'simple',
		'feature_type': 'Byte',
		'data': dataop.get_rand(dattype=numpy.ubyte),
		'normalizer': kernel.AvgDiagKernelNormalizer()
	}
	feats=featop.get_features(params['feature_class'], params['feature_type'],
		params['data'], RAWBYTE)

	_compute(feats, params)
예제 #31
0
def _run_feats_word():
    """Run kernel with WordFeatures."""

    maxval = 42
    params = {
        "name": "Linear",
        "accuracy": 1e-8,
        "feature_class": "simple",
        "feature_type": "Word",
        "data": dataop.get_rand(dattype=numpy.ushort, max_train=maxval, max_test=maxval),
        "normalizer": kernel.AvgDiagKernelNormalizer(),
    }
    feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"])

    _compute(feats, params)
예제 #32
0
파일: kernel.py 프로젝트: AsherBond/shogun
def _run_feats_byte ():
	"""Run kernel with ByteFeatures."""

	params={
		'name': 'LinearByte',
		'accuracy': 1e-8,
		'feature_class': 'simple',
		'feature_type': 'Byte',
		'data': dataop.get_rand(dattype=numpy.ubyte),
		'normalizer': kernel.AvgDiagKernelNormalizer()
	}
	feats=featop.get_features(params['feature_class'], params['feature_type'],
		params['data'], RAWBYTE)

	_compute(feats, params)
예제 #33
0
파일: kernel.py 프로젝트: AsherBond/shogun
def _run_pie ():
	"""Run kernel with PluginEstimate."""

	params={
		'data': dataop.get_dna(),
		'accuracy': 1e-6,
		'feature_class': 'string_complex',
		'feature_type': 'Word'
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])

	params['name']='HistogramWordString'
	_compute_pie(feats, params)
	params['name']='SalzbergWordString'
	_compute_pie(feats, params)
예제 #34
0
def _run_pie():
    """Run kernel with PluginEstimate."""

    params = {
        'data': dataop.get_dna(),
        'accuracy': 1e-6,
        'feature_class': 'string_complex',
        'feature_type': 'Word'
    }
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])

    params['name'] = 'HistogramWordString'
    _compute_pie(feats, params)
    params['name'] = 'SalzbergWordString'
    _compute_pie(feats, params)
예제 #35
0
파일: kernel.py 프로젝트: manantomar/test
def _run_feats_word ():
	"""Run kernel with WordFeatures."""

	maxval=42
	params={
		'name': 'Linear',
		'accuracy': 1e-8,
		'feature_class': 'simple',
		'feature_type': 'Word',
		'data': dataop.get_rand(
			dattype=numpy.ushort, max_train=maxval, max_test=maxval),
		'normalizer': kernel.AvgDiagKernelNormalizer()
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])

	_compute(feats, params)
예제 #36
0
파일: kernel.py 프로젝트: AsherBond/shogun
def _run_feats_word ():
	"""Run kernel with WordFeatures."""

	maxval=42
	params={
		'name': 'LinearWord',
		'accuracy': 1e-8,
		'feature_class': 'simple',
		'feature_type': 'Word',
		'data': dataop.get_rand(
			dattype=numpy.ushort, max_train=maxval, max_test=maxval),
		'normalizer': kernel.AvgDiagKernelNormalizer()
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])

	_compute(feats, params)
예제 #37
0
파일: kernel.py 프로젝트: AsherBond/shogun
def _run_distance ():
	"""Run distance kernel."""

	params={
		'name': 'Distance',
		'accuracy': 1e-9,
		'feature_class': 'simple',
		'feature_type': 'Real',
		'data': dataop.get_rand(),
		'args': {
			'key': ('size', 'width', 'distance'),
			'val': (10, 1.7, CanberraMetric())
		}
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])

	_compute(feats, params)
예제 #38
0
def _run_distance():
    """Run distance kernel."""

    params = {
        'name': 'Distance',
        'accuracy': 1e-9,
        'feature_class': 'simple',
        'feature_type': 'Real',
        'data': dataop.get_rand(),
        'args': {
            'key': ('size', 'width', 'distance'),
            'val': (10, 1.7, CanberraMetric())
        }
    }
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])

    _compute(feats, params)
예제 #39
0
def _run_hmm():
    """Run generator for Hidden-Markov-Model."""

    # put some constantness into randomness
    Math_init_random(INIT_RANDOM)

    num_examples = 4
    params = {
        'name': 'HMM',
        'accuracy': 1e-6,
        'N': 3,
        'M': 6,
        'num_examples': num_examples,
        'pseudo': 1e-10,
        'order': 1,
        'alphabet': 'CUBE',
        'feature_class': 'string_complex',
        'feature_type': 'Word',
        'data': dataop.get_cubes(num_examples, 1)
    }
    output = fileop.get_output(category.DISTRIBUTION, params)

    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'],
                                eval('features.' + params['alphabet']),
                                params['order'])

    hmm = distribution.HMM(feats['train'], params['N'], params['M'],
                           params['pseudo'])
    hmm.train()
    hmm.baum_welch_viterbi_train(distribution.BW_NORMAL)

    output[PREFIX + 'likelihood'] = hmm.get_log_likelihood_sample()
    output[PREFIX + 'derivatives'] = _get_derivatives(
        hmm, feats['train'].get_num_vectors())

    output[PREFIX + 'best_path'] = 0
    output[PREFIX + 'best_path_state'] = 0
    for i in xrange(num_examples):
        output[PREFIX + 'best_path'] += hmm.best_path(i)
        for j in xrange(params['N']):
            output[PREFIX + 'best_path_state'] += hmm.get_best_path_state(i, j)

    fileop.write(category.DISTRIBUTION, output)
예제 #40
0
def _run (name, first_arg):
	"""
	Run generator for a specific clustering method.

	@param name Name of the clustering method to run.
	@param first_arg First argument to the clustering's constructor; so far, only this distinguishes the instantion of the different methods.
	"""

	# put some constantness into randomness
	Math_init_random(dataop.INIT_RANDOM)

	num_clouds=3
	params={
		'name': 'EuclidianDistance',
		'data': dataop.get_clouds(num_clouds, 5),
		'feature_class': 'simple',
		'feature_type': 'Real'
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	dfun=eval(params['name'])
	distance=dfun(feats['train'], feats['train'])
	output=fileop.get_output(category.DISTANCE, params)

	params={
		'name': name,
		'accuracy': 1e-8,
		first_arg: num_clouds
	}
	fun=eval('clustering.'+name)
	clustering=fun(params[first_arg], distance)
	clustering.train()

	distance.init(feats['train'], feats['test'])
	if name=='KMeans':
		params['radi']=clustering.get_radiuses()
		params['centers']=clustering.get_cluster_centers()
	elif name=='Hierarchical':
		params['merge_distance']=clustering.get_merge_distances()
		params['pairs']=clustering.get_cluster_pairs()

	output.update(fileop.get_output(category.CLUSTERING, params))
	fileop.write(category.CLUSTERING, output)
예제 #41
0
def _run_hmm ():
	"""Run generator for Hidden-Markov-Model."""

	# put some constantness into randomness
	Math_init_random(INIT_RANDOM)

	num_examples=4
	params={
		'name': 'HMM',
		'accuracy': 1e-6,
		'N': 3,
		'M': 6,
		'num_examples': num_examples,
		'pseudo': 1e-10,
		'order': 1,
		'alphabet': 'CUBE',
		'feature_class': 'string_complex',
		'feature_type': 'Word',
		'data': dataop.get_cubes(num_examples, 1)
	}
	output=fileop.get_output(category.DISTRIBUTION, params)

	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'],
		eval('features.'+params['alphabet']), params['order'])

	hmm=distribution.HMM(
		feats['train'], params['N'], params['M'], params['pseudo'])
	hmm.train()
	hmm.baum_welch_viterbi_train(distribution.BW_NORMAL)

	output[PREFIX+'likelihood']=hmm.get_log_likelihood_sample()
	output[PREFIX+'derivatives']=_get_derivatives(
		hmm, feats['train'].get_num_vectors())

	output[PREFIX+'best_path']=0
	output[PREFIX+'best_path_state']=0
	for i in xrange(num_examples):
		output[PREFIX+'best_path']+=hmm.best_path(i)
		for j in xrange(params['N']):
			output[PREFIX+'best_path_state']+=hmm.get_best_path_state(i, j)

	fileop.write(category.DISTRIBUTION, output)
예제 #42
0
def _run_string_complex(ftype):
    """Run preprocessor applied on complex StringFeatures.

	@param ftype Feature type, like Word
	"""

    params = {
        "name": "Comm" + ftype + "String",
        "accuracy": 1e-9,
        "feature_class": "string_complex",
        "feature_type": ftype,
        "data": dataop.get_dna(),
    }
    feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"])
    # string_complex gets preproc added implicitely on Word/Ulong feats
    output = _compute(feats, params)

    params = {"name": "Sort" + ftype + "String"}
    output.update(fileop.get_output(category.PREPROC, params))

    fileop.write(category.PREPROC, output)
예제 #43
0
def _run_custom():
    """Run Custom kernel."""

    params = {
        'name': 'Custom',
        'accuracy': 1e-7,
        'feature_class': 'simple',
        'feature_type': 'Real'
    }
    dim_square = 7
    data = dataop.get_rand(dim_square=dim_square)
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], data)
    data = data['train']
    symdata = data + data.T

    lowertriangle = numpy.array([
        symdata[(x, y)] for x in xrange(symdata.shape[1])
        for y in xrange(symdata.shape[0]) if y <= x
    ])
    kern = kernel.CustomKernel()
    #kern.init(feats['train'], feats['train']
    kern.set_triangle_kernel_matrix_from_triangle(lowertriangle)
    km_triangletriangle = kern.get_kernel_matrix()
    kern.set_triangle_kernel_matrix_from_full(symdata)
    km_fulltriangle = kern.get_kernel_matrix()
    kern.set_full_kernel_matrix_from_full(data)
    km_fullfull = kern.get_kernel_matrix()

    output = {
        'kernel_matrix_triangletriangle': km_triangletriangle,
        'kernel_matrix_fulltriangle': km_fulltriangle,
        'kernel_matrix_fullfull': km_fullfull,
        'kernel_symdata': numpy.matrix(symdata),
        'kernel_data': numpy.matrix(data),
        'kernel_dim_square': dim_square
    }
    output.update(fileop.get_output(category.KERNEL, params))

    fileop.write(category.KERNEL, output)
예제 #44
0
def _run_string_complex(ftype):
    """Run preprocessor applied on complex StringFeatures.

	@param ftype Feature type, like Word
	"""

    params = {
        'name': 'Comm' + ftype + 'String',
        'accuracy': 1e-9,
        'feature_class': 'string_complex',
        'feature_type': ftype,
        'data': dataop.get_dna()
    }
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])
    # string_complex gets preproc added implicitely on Word/Ulong feats
    output = _compute(feats, params)

    params = {'name': 'Sort' + ftype + 'String'}
    output.update(fileop.get_output(category.PREPROC, params))

    fileop.write(category.PREPROC, output)
예제 #45
0
def _run(name, first_arg):
    """
	Run generator for a specific clustering method.

	@param name Name of the clustering method to run.
	@param first_arg First argument to the clustering's constructor; so far, only this distinguishes the instantion of the different methods.
	"""

    # put some constantness into randomness
    Math_init_random(dataop.INIT_RANDOM)

    num_clouds = 3
    params = {
        'name': 'EuclidianDistance',
        'data': dataop.get_clouds(num_clouds, 5),
        'feature_class': 'simple',
        'feature_type': 'Real'
    }
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])
    dfun = eval(params['name'])
    distance = dfun(feats['train'], feats['train'])
    output = fileop.get_output(category.DISTANCE, params)

    params = {'name': name, 'accuracy': 1e-8, first_arg: num_clouds}
    fun = eval('clustering.' + name)
    clustering = fun(params[first_arg], distance)
    clustering.train()

    distance.init(feats['train'], feats['test'])
    if name == 'KMeans':
        params['radi'] = clustering.get_radiuses()
        params['centers'] = clustering.get_cluster_centers()
    elif name == 'Hierarchical':
        params['merge_distance'] = clustering.get_merge_distances()
        params['pairs'] = clustering.get_cluster_pairs()

    output.update(fileop.get_output(category.CLUSTERING, params))
    fileop.write(category.CLUSTERING, output)
예제 #46
0
def _run_knn ():
	"""Run K-Nearest-Neighbour classifier.
	"""

	params={
		'name': 'EuclidianDistance',
		'data': dataop.get_clouds(2),
		'feature_class': 'simple',
		'feature_type': 'Real'
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	dfun=eval(params['name'])
	distance=dfun(feats['train'], feats['train'])
	output=fileop.get_output(category.DISTANCE, params)

	params={
		'name': 'KNN',
		'type': 'knn',
		'num_threads': 1,
		'k': 3,
		'label_type': 'twoclass',
		'accuracy': 1e-8
	}
	params['labels'], labels=dataop.get_labels(
		feats['train'].get_num_vectors(), params['label_type'])

	knn=classifier.KNN(params['k'], distance, labels)
	knn.parallel.set_num_threads(params['num_threads'])
	knn.train()

	distance.init(feats['train'], feats['test'])
	params['classified']=knn.classify().get_labels()

	output.update(fileop.get_output(category.CLASSIFIER, params))
	fileop.write(category.CLASSIFIER, output)
예제 #47
0
def _run_wdsvmocas ():
	"""Run Weighted Degree SVM Ocas classifier."""

	svms=('WDSVMOcas',)
	params={
		'type': 'wdsvmocas',
		'degree': 1,
		'bias_enabled': False,
		#'data': dataop.get_rawdna(),
		'data': dataop.get_dna(
			dataop.NUM_VEC_TRAIN, dataop.NUM_VEC_TRAIN, dataop.NUM_VEC_TRAIN),
		'feature_class': 'string_complex',
		'feature_type': 'Byte',
		'alphabet': 'RAWDNA',
		'label_type': 'twoclass',
		'order': 1,
		'gap': 0,
		'reverse': False
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'],
		params['data'], eval(params['alphabet']),
		params['order'], params['gap'], params['reverse'])
	_loop_svm(svms, params, feats)
예제 #48
0
def _run_feats_string():
    """Run kernel with StringFeatures."""

    params = {
        'accuracy': 1e-9,
        'data': dataop.get_dna(),
        'feature_class': 'string',
        'feature_type': 'Char',
    }
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])

    params['name'] = 'FixedDegreeString'
    params['args'] = {'key': ('size', 'degree'), 'val': (10, 3)}
    _compute(feats, params)

    params['accuracy'] = 0
    params['name'] = 'LocalAlignmentString'
    params['args'] = {'key': ('size', ), 'val': (10, )}
    _compute(feats, params)

    params['accuracy'] = 1e-10
    params['name'] = 'PolyMatchString'
    params['args'] = {
        'key': ('size', 'degree', 'inhomogene'),
        'val': (10, 3, True)
    }
    _compute(feats, params)
    params['args']['val'] = (10, 3, False)
    _compute(feats, params)

    params['accuracy'] = 1e-15
    params['name'] = 'SimpleLocalityImprovedString'
    params['args'] = {
        'key': ('size', 'length', 'inner_degree', 'outer_degree'),
        'val': (10, 5, 7, 5)
    }
    _compute(feats, params)
    # buggy:
    #params['name']='LocalityImprovedString'
    #_compute(feats, params)

    params['name'] = 'WeightedDegreeString'
    params['accuracy'] = 1e-9
    params['args'] = {'key': ('degree', ), 'val': (20, )}
    _compute(feats, params)
    params['args'] = {'key': ('degree', ), 'val': (1, )}
    _compute(feats, params)

    params['name'] = 'WeightedDegreePositionString'
    params['args'] = {'key': ('size', 'degree'), 'val': (10, 20)}
    _compute(feats, params)
    params['args'] = {'key': ('size', 'degree'), 'val': (10, 1)}
    _compute(feats, params)

    params['name'] = 'OligoString'
    params['args'] = {'key': ('size', 'k', 'width'), 'val': (10, 3, 1.2)}
    _compute(feats, params)
    params['args'] = {'key': ('size', 'k', 'width'), 'val': (10, 4, 1.7)}
    _compute(feats, params)

    params['name'] = 'LinearString'
    params['accuracy'] = 1e-8
    params['normalizer'] = kernel.AvgDiagKernelNormalizer()
    del params['args']
    _compute(feats, params)
예제 #49
0
def _run_feats_real():
    """Run kernel with RealFeatures."""

    params = {
        'data': dataop.get_rand(),
        'accuracy': 1e-8,
        'feature_class': 'simple',
        'feature_type': 'Real'
    }
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])
    sparsefeats = featop.get_features(params['feature_class'],
                                      params['feature_type'],
                                      params['data'],
                                      sparse=True)

    params['name'] = 'Gaussian'
    params['args'] = {
        'key': (
            'size',
            'width',
        ),
        'val': (10, 1.3)
    }
    _compute(feats, params)

    params['name'] = 'GaussianShift'
    params['args'] = {
        'key': ('size', 'width', 'max_shift', 'shift_step'),
        'val': (10, 1.3, 2, 1)
    }
    _compute(feats, params)

    params['name'] = 'SparseGaussian'
    params['args'] = {'key': ('size', 'width'), 'val': (10, 1.7)}
    _compute(sparsefeats, params)

    params['accuracy'] = 0
    params['name'] = 'Const'
    params['args'] = {'key': ('c', ), 'val': (23., )}
    _compute(feats, params)

    params['name'] = 'Diag'
    params['args'] = {'key': ('size', 'diag'), 'val': (10, 23.)}
    _compute(feats, params)

    params['accuracy'] = 1e-9
    params['name'] = 'Sigmoid'
    params['args'] = {'key': ('size', 'gamma', 'coef0'), 'val': (10, 1.1, 1.3)}
    _compute(feats, params)
    params['args']['val'] = (10, 0.5, 0.7)
    _compute(feats, params)

    params['name'] = 'Chi2'
    params['args'] = {'key': ('size', 'width'), 'val': (10, 1.2)}
    _compute(feats, params)

    params['accuracy'] = 1e-8
    params['name'] = 'SparsePoly'
    params['args'] = {
        'key': ('size', 'degree', 'inhomogene'),
        'val': (10, 3, True)
    }
    _compute(sparsefeats, params)
    params['args']['val'] = (10, 3, False)
    _compute(sparsefeats, params)

    params['name'] = 'Poly'
    params['normalizer'] = kernel.SqrtDiagKernelNormalizer()
    params['args'] = {
        'key': ('size', 'degree', 'inhomogene'),
        'val': (10, 3, True)
    }
    _compute(feats, params)
    params['args']['val'] = (10, 3, False)
    _compute(feats, params)

    params['normalizer'] = kernel.AvgDiagKernelNormalizer()
    del params['args']
    params['name'] = 'Linear'
    _compute(feats, params)
    params['name'] = 'SparseLinear'
    _compute(sparsefeats, params)
예제 #50
0
def _run_svm_kernel ():
	"""Run all kernel-based SVMs."""

	kparams={
		'name': 'Gaussian',
		'args': {'key': ('width',), 'val': (1.5,)},
		'feature_class': 'simple',
		'feature_type': 'Real',
		'data': dataop.get_clouds(2)
	}
	feats=featop.get_features(
		kparams['feature_class'], kparams['feature_type'], kparams['data'])
	kernel=GaussianKernel(10, *kparams['args']['val'])
	output=fileop.get_output(category.KERNEL, kparams)

	svms=('SVMLight', 'LibSVM', 'GPBTSVM', 'MPDSVM')
	params={
		'type': 'kernel',
		'label_type': 'twoclass'
	}
	_loop_svm(svms, params, feats, kernel, output)

	svms=('LibSVMOneClass',)
	params['label_type']=None
	_loop_svm(svms, params, feats, kernel, output)

	svms=('LibSVMMultiClass', 'GMNPSVM')
	params['label_type']='series'
	kparams['data']=dataop.get_clouds(3)
	feats=featop.get_features(
		kparams['feature_class'], kparams['feature_type'], kparams['data'])
	output=fileop.get_output(category.KERNEL, kparams)
	_loop_svm(svms, params, feats, kernel, output)

	svms=('SVMLight', 'GPBTSVM')
	params['label_type']='twoclass'
	kparams={
		'name': 'Linear',
		'feature_class': 'simple',
		'feature_type': 'Real',
		'data': dataop.get_clouds(2),
		'normalizer': AvgDiagKernelNormalizer()
	}
	feats=featop.get_features(
		kparams['feature_class'], kparams['feature_type'], kparams['data'])
	kernel=LinearKernel()
	kernel.set_normalizer(kparams['normalizer'])
	output=fileop.get_output(category.KERNEL, kparams)
	_loop_svm(svms, params, feats, kernel, output)

	kparams={
		'name': 'CommWordString',
		'args': {'key': ('use_sign',), 'val': (False,)},
		'data': dataop.get_dna(),
		'feature_class': 'string_complex',
		'feature_type': 'Word'
	}
	feats=featop.get_features(
		kparams['feature_class'], kparams['feature_type'], kparams['data'])
	kernel=CommWordStringKernel(10, *kparams['args']['val'])
	output=fileop.get_output(category.KERNEL, kparams)
	_loop_svm(svms, params, feats, kernel, output)

	kparams={
		'name': 'CommUlongString',
		'args': {'key': ('use_sign',), 'val': (False,)},
		'data': dataop.get_dna(),
		'feature_class': 'string_complex',
		'feature_type': 'Ulong'
	}
	feats=featop.get_features(
		kparams['feature_class'], kparams['feature_type'], kparams['data'])
	kernel=CommUlongStringKernel(10, *kparams['args']['val'])
	output=fileop.get_output(category.KERNEL, kparams)
	_loop_svm(svms, params, feats, kernel, output)

	kparams={
		'name': 'WeightedDegreeString',
		'args': {'key': ('degree',), 'val': (3,)},
		'data': dataop.get_dna(),
		'feature_class': 'string',
		'feature_type': 'Char'
	}
	feats=featop.get_features(
		kparams['feature_class'], kparams['feature_type'], kparams['data'])
	kernel=WeightedDegreeStringKernel(*kparams['args']['val'])
	output=fileop.get_output(category.KERNEL, kparams)
	_loop_svm(svms, params, feats, kernel, output)
	params['linadd_enabled']=True
	_loop_svm(svms, params, feats, kernel, output)
	params['batch_enabled']=True
	_loop_svm(svms, params, feats, kernel, output)

	kparams={
		'name': 'WeightedDegreePositionString',
		'args': {'key': ('degree',), 'val': (20,)},
		'data': dataop.get_dna(),
		'feature_class': 'string',
		'feature_type': 'Char'
	}
	feats=featop.get_features(
		kparams['feature_class'], kparams['feature_type'], kparams['data'])
	kernel=WeightedDegreePositionStringKernel(10, *kparams['args']['val'])
	output=fileop.get_output(category.KERNEL, kparams)
	del params['linadd_enabled']
	del params['batch_enabled']
	_loop_svm(svms, params, feats, kernel, output)
	params['linadd_enabled']=True
	_loop_svm(svms, params, feats, kernel, output)
	params['batch_enabled']=True
	_loop_svm(svms, params, feats, kernel, output)