예제 #1
0
def _run_perceptron ():
	"""Run Perceptron classifier."""

	params={
		'name': 'Perceptron',
		'type': 'perceptron',
		'num_threads': 1,
		'learn_rate': .1,
		'max_iter': 1000,
		'data': dataop.get_clouds(2),
		'feature_class': 'simple',
		'feature_type': 'Real',
		'label_type': 'twoclass',
		'accuracy': 1e-7
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	num_vec=feats['train'].get_num_vectors()
	params['labels'], labels=dataop.get_labels(num_vec, params['label_type'])

	perceptron=classifier.Perceptron(feats['train'], labels)
	perceptron.parallel.set_num_threads(params['num_threads'])
	perceptron.set_learn_rate(params['learn_rate'])
	perceptron.set_max_iter(params['max_iter'])
	perceptron.train()

	params['bias']=perceptron.get_bias()
	perceptron.set_features(feats['test'])
	params['classified']=perceptron.classify().get_labels()

	output=fileop.get_output(category.CLASSIFIER, params)
	fileop.write(category.CLASSIFIER, output)
예제 #2
0
def _compute_pie(feats, params):
    """Compute a kernel with PluginEstimate.

	@param feats kernel features
	@param params dict containing various kernel parameters
	"""

    output = fileop.get_output(category.KERNEL, params)

    lab, labels = dataop.get_labels(feats["train"].get_num_vectors())
    output["classifier_labels"] = lab
    pie = PluginEstimate()
    pie.set_labels(labels)
    pie.set_features(feats["train"])
    pie.train()

    kfun = eval("kernel." + params["name"] + "Kernel")
    kern = kfun(feats["train"], feats["train"], pie)
    output["kernel_matrix_train"] = kern.get_kernel_matrix()
    kern.init(feats["train"], feats["test"])
    pie.set_features(feats["test"])
    output["kernel_matrix_test"] = kern.get_kernel_matrix()

    classified = pie.apply().get_labels()
    output["classifier_classified"] = classified

    fileop.write(category.KERNEL, output)
예제 #3
0
def _run_custom():
    """Run Custom kernel."""

    params = {"name": "Custom", "accuracy": 1e-7, "feature_class": "simple", "feature_type": "Real"}
    dim_square = 7
    data = dataop.get_rand(dim_square=dim_square)
    feats = featop.get_features(params["feature_class"], params["feature_type"], data)
    data = data["train"]
    symdata = data + data.T

    lowertriangle = numpy.array(
        [symdata[(x, y)] for x in xrange(symdata.shape[1]) for y in xrange(symdata.shape[0]) if y <= x]
    )
    kern = kernel.CustomKernel()
    # kern.init(feats['train'], feats['train']
    kern.set_triangle_kernel_matrix_from_triangle(lowertriangle)
    km_triangletriangle = kern.get_kernel_matrix()
    kern.set_triangle_kernel_matrix_from_full(symdata)
    km_fulltriangle = kern.get_kernel_matrix()
    kern.set_full_kernel_matrix_from_full(data)
    km_fullfull = kern.get_kernel_matrix()

    output = {
        "kernel_matrix_triangletriangle": km_triangletriangle,
        "kernel_matrix_fulltriangle": km_fulltriangle,
        "kernel_matrix_fullfull": km_fullfull,
        "kernel_symdata": numpy.matrix(symdata),
        "kernel_data": numpy.matrix(data),
        "kernel_dim_square": dim_square,
    }
    output.update(fileop.get_output(category.KERNEL, params))

    fileop.write(category.KERNEL, output)
예제 #4
0
def _compute_pie(feats, params):
    """Compute a kernel with PluginEstimate.

	@param feats kernel features
	@param params dict containing various kernel parameters
	"""

    output = fileop.get_output(category.KERNEL, params)

    lab, labels = dataop.get_labels(feats['train'].get_num_vectors())
    output['classifier_labels'] = lab
    pie = PluginEstimate()
    pie.set_labels(labels)
    pie.set_features(feats['train'])
    pie.train()

    kfun = eval('kernel.' + params['name'] + 'Kernel')
    kern = kfun(feats['train'], feats['train'], pie)
    output['kernel_matrix_train'] = kern.get_kernel_matrix()
    kern.init(feats['train'], feats['test'])
    pie.set_features(feats['test'])
    output['kernel_matrix_test'] = kern.get_kernel_matrix()

    classified = pie.classify().get_labels()
    output['classifier_classified'] = classified

    fileop.write(category.KERNEL, output)
예제 #5
0
def _compute(feats, params, pout=None):
    """
	Compute a kernel and write gathered data to file.

	@param name name of the kernel
	@param feats features of the kernel
	@param params dict with parameters to kernel
	@param pout previously gathered data ready to be written to file
	"""

    output = fileop.get_output(category.KERNEL, params)
    if pout:
        output.update(pout)

    kfun = eval("kernel." + params["name"] + "Kernel")
    if params.has_key("args"):
        kern = kfun(*params["args"]["val"])
    else:
        kern = kfun()

    if params.has_key("normalizer"):
        kern.set_normalizer(params["normalizer"])
    kern.init(feats["train"], feats["train"])

    output["kernel_matrix_train"] = kern.get_kernel_matrix()
    kern.init(feats["train"], feats["test"])
    output["kernel_matrix_test"] = kern.get_kernel_matrix()

    fileop.write(category.KERNEL, output)
예제 #6
0
def _run_real (name, args=None):
	"""Run preprocessor applied on RealFeatures.

	@param name name of the preprocessor
	@param args argument list (in a dict) for the preprocessor
	"""

	params={
		'name': 'Gaussian',
		'accuracy': 1e-8,
		'data': dataop.get_rand(),
		'feature_class': 'simple',
		'feature_type': 'Real',
		'args': {'key': ('width',), 'val': (1.2,)}
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	if args:
		feats=featop.add_preproc(name, feats, *args['val'])
	else:
		feats=featop.add_preproc(name, feats)

	output=_compute(feats, params)

	params={ 'name': name }
	if args:
		params['args']=args

	output.update(fileop.get_output(category.PREPROC, params))

	fileop.write(category.PREPROC, output)
예제 #7
0
def _run (name):
	"""Run generator for a specific distribution method.

	@param name Name of the distribtuion method
	"""

	# put some constantness into randomness
	Math_init_random(INIT_RANDOM)

	params={
		'name': name,
		'accuracy': 1e-7,
		'data':dataop.get_dna(),
		'alphabet': 'DNA',
		'feature_class': 'string_complex',
		'feature_type': 'Word'
	}
	output=fileop.get_output(category.DISTRIBUTION, params)
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])

	dfun=eval('distribution.'+name)
	dist=dfun(feats['train'])
	dist.train()

	output[PREFIX+'likelihood']=dist.get_log_likelihood_sample()
	output[PREFIX+'derivatives']=_get_derivatives(
		dist, feats['train'].get_num_vectors())

	fileop.write(category.DISTRIBUTION, output)
예제 #8
0
def _compute(feats, params, pout=None):
    """
	Compute a kernel and write gathered data to file.

	@param name name of the kernel
	@param feats features of the kernel
	@param params dict with parameters to kernel
	@param pout previously gathered data ready to be written to file
	"""

    output = fileop.get_output(category.KERNEL, params)
    if pout:
        output.update(pout)

    kfun = eval('kernel.' + params['name'] + 'Kernel')
    if params.has_key('args'):
        kern = kfun(*params['args']['val'])
    else:
        kern = kfun()

    if params.has_key('normalizer'):
        kern.set_normalizer(params['normalizer'])
    kern.init(feats['train'], feats['train'])

    output['kernel_matrix_train'] = kern.get_kernel_matrix()
    kern.init(feats['train'], feats['test'])
    output['kernel_matrix_test'] = kern.get_kernel_matrix()

    fileop.write(category.KERNEL, output)
예제 #9
0
def _compute_top_fisher(feats, pout):
    """Compute PolyKernel with TOP or FKFeatures

	@param feats features of the kernel
	@param pout previously gathered data ready to be written to file
	"""

    params = {
        'name': 'Poly',
        'accuracy': 1e-6,
        'args': {
            'key': ('size', 'degree', 'inhomogene'),
            'val': (10, 1, False)
        }
    }
    output = fileop.get_output(category.KERNEL, params)
    output.update(pout)

    kfun = eval('kernel.' + params['name'] + 'Kernel')
    kern = kfun(feats['train'], feats['train'], *params['args']['val'])
    output['kernel_matrix_train'] = kern.get_kernel_matrix()
    kern.init(feats['train'], feats['test'])
    output['kernel_matrix_test'] = kern.get_kernel_matrix()

    fileop.write(category.KERNEL, output)
예제 #10
0
파일: kernel.py 프로젝트: AsherBond/shogun
def _compute_top_fisher (feats, pout):
	"""Compute PolyKernel with TOP or FKFeatures

	@param feats features of the kernel
	@param pout previously gathered data ready to be written to file
	"""

	params={
		'name': 'Poly',
		'accuracy': 1e-6,
		'args': {
			'key': ('size', 'degree', 'inhomogene'),
			'val': (10, 1, False)
		}
	}
	output=fileop.get_output(category.KERNEL, params)
	output.update(pout)

	kfun=eval('kernel.'+params['name']+'Kernel')
	kern=kfun(feats['train'], feats['train'], *params['args']['val'])
	output['kernel_matrix_train']=kern.get_kernel_matrix()
	kern.init(feats['train'], feats['test'])
	output['kernel_matrix_test']=kern.get_kernel_matrix()

	fileop.write(category.KERNEL, output)
예제 #11
0
파일: kernel.py 프로젝트: AsherBond/shogun
def _compute (feats, params, pout=None):
	"""
	Compute a kernel and write gathered data to file.

	@param name name of the kernel
	@param feats features of the kernel
	@param params dict with parameters to kernel
	@param pout previously gathered data ready to be written to file
	"""

	output=fileop.get_output(category.KERNEL, params)
	if pout:
		output.update(pout)

	kfun=eval('kernel.'+params['name']+'Kernel')
	if params.has_key('args'):
		kern=kfun(*params['args']['val'])
	else:
		kern=kfun()

	if params.has_key('normalizer'):
		kern.set_normalizer(params['normalizer'])
	kern.init(feats['train'], feats['train'])

	output['kernel_matrix_train']=kern.get_kernel_matrix()
	kern.init(feats['train'], feats['test'])
	output['kernel_matrix_test']=kern.get_kernel_matrix()

	fileop.write(category.KERNEL, output)
예제 #12
0
파일: kernel.py 프로젝트: AsherBond/shogun
def _compute_pie (feats, params):
	"""Compute a kernel with PluginEstimate.

	@param feats kernel features
	@param params dict containing various kernel parameters
	"""

	output=fileop.get_output(category.KERNEL, params)

	lab, labels=dataop.get_labels(feats['train'].get_num_vectors())
	output['classifier_labels']=lab
	pie=PluginEstimate()
	pie.set_labels(labels)
	pie.set_features(feats['train'])
	pie.train()

	kfun=eval('kernel.'+params['name']+'Kernel')
	kern=kfun(feats['train'], feats['train'], pie)
	output['kernel_matrix_train']=kern.get_kernel_matrix()
	kern.init(feats['train'], feats['test'])
	pie.set_features(feats['test'])
	output['kernel_matrix_test']=kern.get_kernel_matrix()

	classified=pie.classify().get_labels()
	output['classifier_classified']=classified

	fileop.write(category.KERNEL, output)
예제 #13
0
def _run(name):
    """Run generator for a specific distribution method.

	@param name Name of the distribtuion method
	"""

    # put some constantness into randomness
    Math_init_random(INIT_RANDOM)

    params = {
        'name': name,
        'accuracy': 1e-7,
        'data': dataop.get_dna(),
        'alphabet': 'DNA',
        'feature_class': 'string_complex',
        'feature_type': 'Word'
    }
    output = fileop.get_output(category.DISTRIBUTION, params)
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])

    dfun = eval('distribution.' + name)
    dist = dfun(feats['train'])
    dist.train()

    output[PREFIX + 'likelihood'] = dist.get_log_likelihood_sample()
    output[PREFIX + 'derivatives'] = _get_derivatives(
        dist, feats['train'].get_num_vectors())

    fileop.write(category.DISTRIBUTION, output)
예제 #14
0
def _run_real(name, args=None):
    """Run preprocessor applied on RealFeatures.

	@param name name of the preprocessor
	@param args argument list (in a dict) for the preprocessor
	"""

    params = {
        "name": "Gaussian",
        "accuracy": 1e-8,
        "data": dataop.get_rand(),
        "feature_class": "simple",
        "feature_type": "Real",
        "args": {"key": ("width",), "val": (1.2,)},
    }
    feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"])
    if args:
        feats = featop.add_preproc(name, feats, *args["val"])
    else:
        feats = featop.add_preproc(name, feats)

    output = _compute(feats, params)

    params = {"name": name}
    if args:
        params["args"] = args

    output.update(fileop.get_output(category.PREPROC, params))

    fileop.write(category.PREPROC, output)
예제 #15
0
def _run_lda ():
	"""Run Linear Discriminant Analysis classifier."""

	params={
		'name': 'LDA',
		'type': 'lda',
		'gamma': 0.1,
		'num_threads': 1,
		'data': dataop.get_clouds(2),
		'feature_class': 'simple',
		'feature_type': 'Real',
		'label_type': 'twoclass',
		'accuracy': 1e-7
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	params['labels'], labels=dataop.get_labels(
		feats['train'].get_num_vectors(), params['label_type'])

	lda=classifier.LDA(params['gamma'], feats['train'], labels)
	lda.parallel.set_num_threads(params['num_threads'])
	lda.train()

	lda.set_features(feats['test'])
	params['classified']=lda.classify().get_labels()

	output=fileop.get_output(category.CLASSIFIER, params)
	fileop.write(category.CLASSIFIER, output)
예제 #16
0
파일: kernel.py 프로젝트: AsherBond/shogun
def _run_combined ():
	"""Run Combined kernel."""

	kern=kernel.CombinedKernel()
	feats={'train': CombinedFeatures(), 'test': CombinedFeatures()}
	output={}
	params={
		'name': 'Combined',
		'accuracy': 1e-7
	}
	subkdata=[
		{
			'name': 'FixedDegreeString',
			'feature_class': 'string',
			'feature_type': 'Char',
			'args': {'key': ('size', 'degree'), 'val': (10, 3)}
		},
		{
			'name': 'PolyMatchString',
			'feature_class': 'string',
			'feature_type': 'Char',
			'args': {
				'key': ('size', 'degree', 'inhomogene'),
				'val': (10, 3, True)
			}
		},
		{
			'name': 'LocalAlignmentString',
			'feature_class': 'string',
			'feature_type': 'Char',
			'args': {'key': ('size',), 'val': (10,)}
		}
	]

	i=0
	for sd in subkdata:
		kfun=eval('kernel.'+sd['name']+'Kernel')
		subk=kfun(*sd['args']['val'])
		sd['data']=dataop.get_dna()
		subkfeats=featop.get_features(
			sd['feature_class'], sd['feature_type'], sd['data'])
		output.update(
			fileop.get_output(category.KERNEL, sd, 'subkernel'+str(i)+'_'))

		kern.append_kernel(subk)
		feats['train'].append_feature_obj(subkfeats['train'])
		feats['test'].append_feature_obj(subkfeats['test'])

		i+=1

	output.update(fileop.get_output(category.KERNEL, params))
	kern.init(feats['train'], feats['train'])
	output['kernel_matrix_train']=kern.get_kernel_matrix()
	kern.init(feats['train'], feats['test'])
	output['kernel_matrix_test']=kern.get_kernel_matrix()

	fileop.write(category.KERNEL, output)
예제 #17
0
파일: kernel.py 프로젝트: manantomar/test
def _run_combined ():
	"""Run Combined kernel."""

	kern=kernel.CombinedKernel()
	feats={'train': CombinedFeatures(), 'test': CombinedFeatures()}
	output={}
	params={
		'name': 'Combined',
		'accuracy': 1e-7
	}
	subkdata=[
		{
			'name': 'FixedDegreeString',
			'feature_class': 'string',
			'feature_type': 'Char',
			'args': {'key': ('size', 'degree'), 'val': (10, 3)}
		},
		{
			'name': 'PolyMatchString',
			'feature_class': 'string',
			'feature_type': 'Char',
			'args': {
				'key': ('size', 'degree', 'inhomogene'),
				'val': (10, 3, True)
			}
		},
		{
			'name': 'LocalAlignmentString',
			'feature_class': 'string',
			'feature_type': 'Char',
			'args': {'key': ('size',), 'val': (10,)}
		}
	]

	i=0
	for sd in subkdata:
		kfun=eval('kernel.'+sd['name']+'Kernel')
		subk=kfun(*sd['args']['val'])
		sd['data']=dataop.get_dna()
		subkfeats=featop.get_features(
			sd['feature_class'], sd['feature_type'], sd['data'])
		output.update(
			fileop.get_output(category.KERNEL, sd, 'subkernel'+str(i)+'_'))

		kern.append_kernel(subk)
		feats['train'].append_feature_obj(subkfeats['train'])
		feats['test'].append_feature_obj(subkfeats['test'])

		i+=1

	output.update(fileop.get_output(category.KERNEL, params))
	kern.init(feats['train'], feats['train'])
	output['kernel_matrix_train']=kern.get_kernel_matrix()
	kern.init(feats['train'], feats['test'])
	output['kernel_matrix_test']=kern.get_kernel_matrix()

	fileop.write(category.KERNEL, output)
예제 #18
0
def _compute_svm (params, labels, feats, kernel, pout):
	"""Perform computations on SVM.

	Perform all necessary computations on SVM and gather the output.

	@param params misc parameters for the SVM's constructor
	@param labels labels to be used for the SVM (if at all)
	@param feats features to the SVM
	@param kernel kernel for kernel-SVMs
	@param pout previously gathered output data ready to be written to file
	"""

	svm=_get_svm(params, labels, feats, kernel)
	if not svm:
		return

	svm.parallel.set_num_threads(params['num_threads'])
	try:
		svm.set_epsilon(params['epsilon'])
	except AttributeError: #SGD does not have an accuracy parameter
		pass

	if params.has_key('bias_enabled'):
		svm.set_bias_enabled(params['bias_enabled'])
	if params.has_key('max_train_time'):
		svm.set_max_train_time(params['max_train_time'])
		params['max_train_time']=params['max_train_time']
	if params.has_key('linadd_enabled'):
		svm.set_linadd_enabled(params['linadd_enabled'])
	if params.has_key('batch_enabled'):
		svm.set_batch_computation_enabled(params['batch_enabled'])

	svm.train()

	if ((params.has_key('bias_enabled') and params['bias_enabled']) or
		params['type']=='kernel'):
		params['bias']=svm.get_bias()

	if params['type']=='kernel':
		params['alpha_sum'], params['sv_sum']= \
			_get_svm_sum_alpha_and_sv(svm, params['label_type'])
		kernel.init(feats['train'], feats['test'])
	elif params['type']=='linear' or params['type']=='wdsvmocas':
		svm.set_features(feats['test'])

	params['classified']=svm.classify().get_labels()

	output=fileop.get_output(category.CLASSIFIER, params)
	if pout:
		output.update(pout)
	fileop.write(category.CLASSIFIER, output)
예제 #19
0
def _run_combined():
    """Run Combined kernel."""

    kern = kernel.CombinedKernel()
    feats = {"train": CombinedFeatures(), "test": CombinedFeatures()}
    output = {}
    params = {"name": "Combined", "accuracy": 1e-7}
    subkdata = [
        {
            "name": "FixedDegreeString",
            "feature_class": "string",
            "feature_type": "Char",
            "args": {"key": ("size", "degree"), "val": (10, 3)},
        },
        {
            "name": "PolyMatchString",
            "feature_class": "string",
            "feature_type": "Char",
            "args": {"key": ("size", "degree", "inhomogene"), "val": (10, 3, True)},
        },
        {
            "name": "LocalAlignmentString",
            "feature_class": "string",
            "feature_type": "Char",
            "args": {"key": ("size",), "val": (10,)},
        },
    ]

    i = 0
    for sd in subkdata:
        kfun = eval("kernel." + sd["name"] + "Kernel")
        subk = kfun(*sd["args"]["val"])
        sd["data"] = dataop.get_dna()
        subkfeats = featop.get_features(sd["feature_class"], sd["feature_type"], sd["data"])
        output.update(fileop.get_output(category.KERNEL, sd, "subkernel" + str(i) + "_"))

        kern.append_kernel(subk)
        feats["train"].append_feature_obj(subkfeats["train"])
        feats["test"].append_feature_obj(subkfeats["test"])

        i += 1

    output.update(fileop.get_output(category.KERNEL, params))
    kern.init(feats["train"], feats["train"])
    output["kernel_matrix_train"] = kern.get_kernel_matrix()
    kern.init(feats["train"], feats["test"])
    output["kernel_matrix_test"] = kern.get_kernel_matrix()

    fileop.write(category.KERNEL, output)
예제 #20
0
def _compute(params, feats, kernel, pout):
    """
	Compute a regression and gather result data.

	@param params misc parameters for the regression method
	@param feats features of the kernel/regression
	@param kernel kernel
	@param pout previously gathered data from kernel ready to be written to file
	"""

    kernel.parallel.set_num_threads(params['num_threads'])
    kernel.init(feats['train'], feats['train'])
    params['labels'], labels = dataop.get_labels(
        feats['train'].get_num_vectors())

    try:
        fun = eval('regression.' + params['name'])
    except AttributeError:
        return

    if params['type'] == 'svm':
        regression = fun(params['C'], params['epsilon'], kernel, labels)
        regression.set_tube_epsilon(params['tube_epsilon'])
    else:
        regression = fun(params['tau'], kernel, labels)
    regression.parallel.set_num_threads(params['num_threads'])

    regression.train()

    if params['type'] == 'svm':
        params['bias'] = regression.get_bias()
        params['alpha_sum'] = 0
        for item in regression.get_alphas().tolist():
            params['alpha_sum'] += item
        params['sv_sum'] = 0
        for item in regression.get_support_vectors():
            params['sv_sum'] += item

    kernel.init(feats['train'], feats['test'])
    params['classified'] = regression.classify().get_labels()

    output = pout.copy()
    output.update(fileop.get_output(category.REGRESSION, params))
    fileop.write(category.REGRESSION, output)
예제 #21
0
def _run_hmm():
    """Run generator for Hidden-Markov-Model."""

    # put some constantness into randomness
    Math_init_random(INIT_RANDOM)

    num_examples = 4
    params = {
        'name': 'HMM',
        'accuracy': 1e-6,
        'N': 3,
        'M': 6,
        'num_examples': num_examples,
        'pseudo': 1e-10,
        'order': 1,
        'alphabet': 'CUBE',
        'feature_class': 'string_complex',
        'feature_type': 'Word',
        'data': dataop.get_cubes(num_examples, 1)
    }
    output = fileop.get_output(category.DISTRIBUTION, params)

    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'],
                                eval('features.' + params['alphabet']),
                                params['order'])

    hmm = distribution.HMM(feats['train'], params['N'], params['M'],
                           params['pseudo'])
    hmm.train()
    hmm.baum_welch_viterbi_train(distribution.BW_NORMAL)

    output[PREFIX + 'likelihood'] = hmm.get_log_likelihood_sample()
    output[PREFIX + 'derivatives'] = _get_derivatives(
        hmm, feats['train'].get_num_vectors())

    output[PREFIX + 'best_path'] = 0
    output[PREFIX + 'best_path_state'] = 0
    for i in xrange(num_examples):
        output[PREFIX + 'best_path'] += hmm.best_path(i)
        for j in xrange(params['N']):
            output[PREFIX + 'best_path_state'] += hmm.get_best_path_state(i, j)

    fileop.write(category.DISTRIBUTION, output)
예제 #22
0
def _compute (params, feats, kernel, pout):
	"""
	Compute a regression and gather result data.

	@param params misc parameters for the regression method
	@param feats features of the kernel/regression
	@param kernel kernel
	@param pout previously gathered data from kernel ready to be written to file
	"""

	kernel.parallel.set_num_threads(params['num_threads'])
	kernel.init(feats['train'], feats['train'])
	params['labels'], labels=dataop.get_labels(feats['train'].get_num_vectors())

	try:
		fun=eval('regression.'+params['name'])
	except AttributeError:
		return

	if params['type']=='svm':
		regression=fun(params['C'], params['epsilon'], kernel, labels)
		regression.set_tube_epsilon(params['tube_epsilon'])
	else:
		regression=fun(params['tau'], kernel, labels)
	regression.parallel.set_num_threads(params['num_threads'])

	regression.train()

	if params['type']=='svm':
		params['bias']=regression.get_bias()
		params['alpha_sum']=0
		for item in regression.get_alphas().tolist():
			params['alpha_sum']+=item
		params['sv_sum']=0
		for item in regression.get_support_vectors():
			params['sv_sum']+=item

	kernel.init(feats['train'], feats['test'])
	params['classified']=regression.apply().get_labels()

	output=pout.copy()
	output.update(fileop.get_output(category.REGRESSION, params))
	fileop.write(category.REGRESSION, output)
예제 #23
0
def _run (name, first_arg):
	"""
	Run generator for a specific clustering method.

	@param name Name of the clustering method to run.
	@param first_arg First argument to the clustering's constructor; so far, only this distinguishes the instantion of the different methods.
	"""

	# put some constantness into randomness
	Math_init_random(dataop.INIT_RANDOM)

	num_clouds=3
	params={
		'name': 'EuclidianDistance',
		'data': dataop.get_clouds(num_clouds, 5),
		'feature_class': 'simple',
		'feature_type': 'Real'
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	dfun=eval(params['name'])
	distance=dfun(feats['train'], feats['train'])
	output=fileop.get_output(category.DISTANCE, params)

	params={
		'name': name,
		'accuracy': 1e-8,
		first_arg: num_clouds
	}
	fun=eval('clustering.'+name)
	clustering=fun(params[first_arg], distance)
	clustering.train()

	distance.init(feats['train'], feats['test'])
	if name=='KMeans':
		params['radi']=clustering.get_radiuses()
		params['centers']=clustering.get_cluster_centers()
	elif name=='Hierarchical':
		params['merge_distance']=clustering.get_merge_distances()
		params['pairs']=clustering.get_cluster_pairs()

	output.update(fileop.get_output(category.CLUSTERING, params))
	fileop.write(category.CLUSTERING, output)
예제 #24
0
def _run_hmm ():
	"""Run generator for Hidden-Markov-Model."""

	# put some constantness into randomness
	Math_init_random(INIT_RANDOM)

	num_examples=4
	params={
		'name': 'HMM',
		'accuracy': 1e-6,
		'N': 3,
		'M': 6,
		'num_examples': num_examples,
		'pseudo': 1e-10,
		'order': 1,
		'alphabet': 'CUBE',
		'feature_class': 'string_complex',
		'feature_type': 'Word',
		'data': dataop.get_cubes(num_examples, 1)
	}
	output=fileop.get_output(category.DISTRIBUTION, params)

	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'],
		eval('features.'+params['alphabet']), params['order'])

	hmm=distribution.HMM(
		feats['train'], params['N'], params['M'], params['pseudo'])
	hmm.train()
	hmm.baum_welch_viterbi_train(distribution.BW_NORMAL)

	output[PREFIX+'likelihood']=hmm.get_log_likelihood_sample()
	output[PREFIX+'derivatives']=_get_derivatives(
		hmm, feats['train'].get_num_vectors())

	output[PREFIX+'best_path']=0
	output[PREFIX+'best_path_state']=0
	for i in xrange(num_examples):
		output[PREFIX+'best_path']+=hmm.best_path(i)
		for j in xrange(params['N']):
			output[PREFIX+'best_path_state']+=hmm.get_best_path_state(i, j)

	fileop.write(category.DISTRIBUTION, output)
예제 #25
0
def _run_string_complex(ftype):
    """Run preprocessor applied on complex StringFeatures.

	@param ftype Feature type, like Word
	"""

    params = {
        "name": "Comm" + ftype + "String",
        "accuracy": 1e-9,
        "feature_class": "string_complex",
        "feature_type": ftype,
        "data": dataop.get_dna(),
    }
    feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"])
    # string_complex gets preproc added implicitely on Word/Ulong feats
    output = _compute(feats, params)

    params = {"name": "Sort" + ftype + "String"}
    output.update(fileop.get_output(category.PREPROC, params))

    fileop.write(category.PREPROC, output)
예제 #26
0
def _run_custom():
    """Run Custom kernel."""

    params = {
        'name': 'Custom',
        'accuracy': 1e-7,
        'feature_class': 'simple',
        'feature_type': 'Real'
    }
    dim_square = 7
    data = dataop.get_rand(dim_square=dim_square)
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], data)
    data = data['train']
    symdata = data + data.T

    lowertriangle = numpy.array([
        symdata[(x, y)] for x in xrange(symdata.shape[1])
        for y in xrange(symdata.shape[0]) if y <= x
    ])
    kern = kernel.CustomKernel()
    #kern.init(feats['train'], feats['train']
    kern.set_triangle_kernel_matrix_from_triangle(lowertriangle)
    km_triangletriangle = kern.get_kernel_matrix()
    kern.set_triangle_kernel_matrix_from_full(symdata)
    km_fulltriangle = kern.get_kernel_matrix()
    kern.set_full_kernel_matrix_from_full(data)
    km_fullfull = kern.get_kernel_matrix()

    output = {
        'kernel_matrix_triangletriangle': km_triangletriangle,
        'kernel_matrix_fulltriangle': km_fulltriangle,
        'kernel_matrix_fullfull': km_fullfull,
        'kernel_symdata': numpy.matrix(symdata),
        'kernel_data': numpy.matrix(data),
        'kernel_dim_square': dim_square
    }
    output.update(fileop.get_output(category.KERNEL, params))

    fileop.write(category.KERNEL, output)
예제 #27
0
def _run_string_complex(ftype):
    """Run preprocessor applied on complex StringFeatures.

	@param ftype Feature type, like Word
	"""

    params = {
        'name': 'Comm' + ftype + 'String',
        'accuracy': 1e-9,
        'feature_class': 'string_complex',
        'feature_type': ftype,
        'data': dataop.get_dna()
    }
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])
    # string_complex gets preproc added implicitely on Word/Ulong feats
    output = _compute(feats, params)

    params = {'name': 'Sort' + ftype + 'String'}
    output.update(fileop.get_output(category.PREPROC, params))

    fileop.write(category.PREPROC, output)
예제 #28
0
def _compute_top_fisher(feats, pout):
    """Compute PolyKernel with TOP or FKFeatures

	@param feats features of the kernel
	@param pout previously gathered data ready to be written to file
	"""

    params = {
        "name": "Poly",
        "accuracy": 1e-6,
        "args": {"key": ("size", "degree", "inhomogene"), "val": (10, 1, False)},
    }
    output = fileop.get_output(category.KERNEL, params)
    output.update(pout)

    kfun = eval("kernel." + params["name"] + "Kernel")
    kern = kfun(feats["train"], feats["train"], *params["args"]["val"])
    output["kernel_matrix_train"] = kern.get_kernel_matrix()
    kern.init(feats["train"], feats["test"])
    output["kernel_matrix_test"] = kern.get_kernel_matrix()

    fileop.write(category.KERNEL, output)
예제 #29
0
def _run(name, first_arg):
    """
	Run generator for a specific clustering method.

	@param name Name of the clustering method to run.
	@param first_arg First argument to the clustering's constructor; so far, only this distinguishes the instantion of the different methods.
	"""

    # put some constantness into randomness
    Math_init_random(dataop.INIT_RANDOM)

    num_clouds = 3
    params = {
        'name': 'EuclidianDistance',
        'data': dataop.get_clouds(num_clouds, 5),
        'feature_class': 'simple',
        'feature_type': 'Real'
    }
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])
    dfun = eval(params['name'])
    distance = dfun(feats['train'], feats['train'])
    output = fileop.get_output(category.DISTANCE, params)

    params = {'name': name, 'accuracy': 1e-8, first_arg: num_clouds}
    fun = eval('clustering.' + name)
    clustering = fun(params[first_arg], distance)
    clustering.train()

    distance.init(feats['train'], feats['test'])
    if name == 'KMeans':
        params['radi'] = clustering.get_radiuses()
        params['centers'] = clustering.get_cluster_centers()
    elif name == 'Hierarchical':
        params['merge_distance'] = clustering.get_merge_distances()
        params['pairs'] = clustering.get_cluster_pairs()

    output.update(fileop.get_output(category.CLUSTERING, params))
    fileop.write(category.CLUSTERING, output)
예제 #30
0
파일: kernel.py 프로젝트: AsherBond/shogun
def _run_custom ():
	"""Run Custom kernel."""

	params={
		'name': 'Custom',
		'accuracy': 1e-7,
		'feature_class': 'simple',
		'feature_type': 'Real'
	}
	dim_square=7
	data=dataop.get_rand(dim_square=dim_square)
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], data)
	data=data['train']
	symdata=data+data.T

	lowertriangle=numpy.array([symdata[(x,y)] for x in xrange(symdata.shape[1])
		for y in xrange(symdata.shape[0]) if y<=x])
	kern=kernel.CustomKernel()
	#kern.init(feats['train'], feats['train']
	kern.set_triangle_kernel_matrix_from_triangle(lowertriangle)
	km_triangletriangle=kern.get_kernel_matrix()
	kern.set_triangle_kernel_matrix_from_full(symdata)
	km_fulltriangle=kern.get_kernel_matrix()
	kern.set_full_kernel_matrix_from_full(data)
	km_fullfull=kern.get_kernel_matrix()

	output={
		'kernel_matrix_triangletriangle': km_triangletriangle,
		'kernel_matrix_fulltriangle': km_fulltriangle,
		'kernel_matrix_fullfull': km_fullfull,
		'kernel_symdata': numpy.matrix(symdata),
		'kernel_data': numpy.matrix(data),
		'kernel_dim_square': dim_square
	}
	output.update(fileop.get_output(category.KERNEL, params))

	fileop.write(category.KERNEL, output)
예제 #31
0
def _compute(feats, params):
    """Compute a distance and gather result data.

	@param feats Train and test features
	@param params dict with parameters to distance
	"""

    fun = eval('distance.' + params['name'])
    if params.has_key('args'):
        dist = fun(feats['train'], feats['train'], *params['args']['val'])
    else:
        dist = fun(feats['train'], feats['train'])
    dm_train = dist.get_distance_matrix()
    dist.init(feats['train'], feats['test'])
    dm_test = dist.get_distance_matrix()

    output = {
        'distance_matrix_train': dm_train,
        'distance_matrix_test': dm_test,
    }
    output.update(fileop.get_output(category.DISTANCE, params))

    fileop.write(category.DISTANCE, output)
예제 #32
0
def _compute (feats, params):
	"""Compute a distance and gather result data.

	@param feats Train and test features
	@param params dict with parameters to distance
	"""

	fun=eval('distance.'+params['name'])
	if params.has_key('args'):
		dist=fun(feats['train'], feats['train'], *params['args']['val'])
	else:
		dist=fun(feats['train'], feats['train'])
	dm_train=dist.get_distance_matrix()
	dist.init(feats['train'], feats['test'])
	dm_test=dist.get_distance_matrix()

	output={
		'distance_matrix_train':dm_train,
		'distance_matrix_test':dm_test,
	}
	output.update(fileop.get_output(category.DISTANCE, params))

	fileop.write(category.DISTANCE, output)
예제 #33
0
파일: preproc.py 프로젝트: AsherBond/shogun
def _run_string_complex (ftype):
	"""Run preprocessor applied on complex StringFeatures.

	@param ftype Feature type, like Word
	"""

	params={
		'name': 'Comm'+ftype+'String',
		'accuracy': 1e-9,
		'feature_class': 'string_complex',
		'feature_type': ftype,
		'data': dataop.get_dna()
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	# string_complex gets preproc added implicitely on Word/Ulong feats
	output=_compute(feats, params)

	params={
		'name': 'Sort'+ftype+'String'
	}
	output.update(fileop.get_output(category.PREPROC, params))

	fileop.write(category.PREPROC, output)
예제 #34
0
def _run_knn ():
	"""Run K-Nearest-Neighbour classifier.
	"""

	params={
		'name': 'EuclidianDistance',
		'data': dataop.get_clouds(2),
		'feature_class': 'simple',
		'feature_type': 'Real'
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	dfun=eval(params['name'])
	distance=dfun(feats['train'], feats['train'])
	output=fileop.get_output(category.DISTANCE, params)

	params={
		'name': 'KNN',
		'type': 'knn',
		'num_threads': 1,
		'k': 3,
		'label_type': 'twoclass',
		'accuracy': 1e-8
	}
	params['labels'], labels=dataop.get_labels(
		feats['train'].get_num_vectors(), params['label_type'])

	knn=classifier.KNN(params['k'], distance, labels)
	knn.parallel.set_num_threads(params['num_threads'])
	knn.train()

	distance.init(feats['train'], feats['test'])
	params['classified']=knn.classify().get_labels()

	output.update(fileop.get_output(category.CLASSIFIER, params))
	fileop.write(category.CLASSIFIER, output)