Beispiel #1
0
def modelselection_grid_search_kernel(num_subsets, num_vectors, dim_vectors):
    # init seed for reproducability
    Math.init_random(1)
    random.seed(1)

    # create some (non-sense) data
    matrix = random.rand(dim_vectors, num_vectors)

    # create num_feautres 2-dimensional vectors
    features = RealFeatures()
    features.set_feature_matrix(matrix)

    # create labels, two classes
    labels = BinaryLabels(num_vectors)
    for i in range(num_vectors):
        labels.set_label(i, 1 if i % 2 == 0 else -1)

    # create svm
    classifier = LibSVM()

    # splitting strategy
    splitting_strategy = StratifiedCrossValidationSplitting(
        labels, num_subsets)

    # accuracy evaluation
    evaluation_criterion = ContingencyTableEvaluation(ACCURACY)

    # cross validation class for evaluation in model selection
    cross = CrossValidation(classifier, features, labels, splitting_strategy,
                            evaluation_criterion)
    cross.set_num_runs(1)

    # print all parameter available for modelselection
    # Dont worry if yours is not included, simply write to the mailing list
    #classifier.print_modsel_params()

    # model parameter selection
    param_tree = create_param_tree()
    #param_tree.print_tree()

    grid_search = GridSearchModelSelection(cross, param_tree)

    print_state = False
    best_combination = grid_search.select_model(print_state)
    #print("best parameter(s):")
    #best_combination.print_tree()

    best_combination.apply_to_machine(classifier)

    # larger number of runs to have less variance
    cross.set_num_runs(10)
    result = cross.evaluate()
    casted = CrossValidationResult.obtain_from_generic(result)
    #print "result mean:", casted.mean

    return classifier, result, casted.get_mean()
def classifier_custom_kernel (C=1,dim=7):
	from shogun import RealFeatures, BinaryLabels, CustomKernel, LibSVM
	from numpy import diag,ones,sign
	from numpy.random import rand,seed

	seed((C,dim))

	lab=sign(2*rand(dim) - 1)
	data=rand(dim, dim)
	symdata=data*data.T + diag(ones(dim))

	kernel=CustomKernel()
	kernel.set_full_kernel_matrix_from_full(data)
	labels=BinaryLabels(lab)
	svm=LibSVM(C, kernel, labels)
	svm.train()
	predictions =svm.apply()
	out=svm.apply().get_labels()
	return svm,out
Beispiel #3
0
def classifier_multiclassmachine (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
	from shogun import RealFeatures, MulticlassLabels
	from shogun import GaussianKernel
	from shogun import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	kernel=GaussianKernel(feats_train, feats_train, width)

	labels=MulticlassLabels(label_train_multiclass)

	classifier = LibSVM()
	classifier.set_epsilon(epsilon)
	#print labels.get_labels()
	mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(),kernel,classifier,labels)
	mc_classifier.train()

	kernel.init(feats_train, feats_test)
	out = mc_classifier.apply().get_labels()
	return out
Beispiel #4
0
def classifier_ssk(fm_train_dna=traindat,
                   fm_test_dna=testdat,
                   label_train_dna=label_traindat,
                   C=1,
                   maxlen=1,
                   decay=1):
    from shogun import StringCharFeatures, BinaryLabels
    from shogun import LibSVM, SubsequenceStringKernel, DNA
    from shogun import ErrorRateMeasure

    feats_train = StringCharFeatures(fm_train_dna, DNA)
    feats_test = StringCharFeatures(fm_test_dna, DNA)
    labels = BinaryLabels(label_train_dna)
    kernel = SubsequenceStringKernel(feats_train, feats_train, maxlen, decay)

    svm = LibSVM(C, kernel, labels)
    svm.train()

    out = svm.apply(feats_train)
    evaluator = ErrorRateMeasure()
    trainerr = evaluator.evaluate(out, labels)
    # print(trainerr)

    kernel.init(feats_train, feats_test)
    predicted_labels = svm.apply(feats_test).get_labels()
    # print predicted_labels

    return predicted_labels
Beispiel #5
0
def kernel_combined_custom_poly(train_fname=traindat,
                                test_fname=testdat,
                                train_label_fname=label_traindat):
    from shogun import CombinedFeatures, RealFeatures, BinaryLabels
    from shogun import CombinedKernel, PolyKernel, CustomKernel
    from shogun import LibSVM, CSVFile

    kernel = CombinedKernel()
    feats_train = CombinedFeatures()

    tfeats = RealFeatures(CSVFile(train_fname))
    tkernel = PolyKernel(10, 3)
    tkernel.init(tfeats, tfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_train = RealFeatures(CSVFile(train_fname))
    feats_train.append_feature_obj(subkfeats_train)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)

    labels = BinaryLabels(CSVFile(train_label_fname))
    svm = LibSVM(1.0, kernel, labels)
    svm.train()

    kernel = CombinedKernel()
    feats_pred = CombinedFeatures()

    pfeats = RealFeatures(CSVFile(test_fname))
    tkernel = PolyKernel(10, 3)
    tkernel.init(tfeats, pfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_test = RealFeatures(CSVFile(test_fname))
    feats_pred.append_feature_obj(subkfeats_test)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)
    kernel.init(feats_train, feats_pred)

    svm.set_kernel(kernel)
    svm.apply()
    km_train = kernel.get_kernel_matrix()
    return km_train, kernel
Beispiel #6
0
def classifier_ssk (fm_train_dna=traindat,fm_test_dna=testdat,
		label_train_dna=label_traindat,C=1,maxlen=1,decay=1):
	from shogun import StringCharFeatures, BinaryLabels
	from shogun import LibSVM, SubsequenceStringKernel, DNA
	from shogun import ErrorRateMeasure

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	feats_test=StringCharFeatures(fm_test_dna, DNA)
	labels=BinaryLabels(label_train_dna)
	kernel=SubsequenceStringKernel(feats_train, feats_train, maxlen, decay);

	svm=LibSVM(C, kernel, labels);
	svm.train();

	out=svm.apply(feats_train);
	evaluator = ErrorRateMeasure()
	trainerr = evaluator.evaluate(out,labels)
	# print(trainerr)

	kernel.init(feats_train, feats_test)
	predicted_labels=svm.apply(feats_test).get_labels()
	# print predicted_labels

	return predicted_labels
Beispiel #7
0
def svm_train(kernel, labels, C1, C2=None):
    """Trains a SVM with the given kernel"""

    num_threads = 1

    kernel.io.disable_progress()
    svm = LibSVM(C1, kernel, labels)
    if C2:
        svm.set_C(C1, C2)
    svm.parallel.set_num_threads(num_threads)
    svm.io.disable_progress()
    svm.train()

    return svm
def kernel_combined_custom_poly (train_fname = traindat,test_fname = testdat,train_label_fname=label_traindat):
    from shogun import CombinedFeatures, RealFeatures, BinaryLabels
    from shogun import CombinedKernel, PolyKernel, CustomKernel
    from shogun import LibSVM, CSVFile

    kernel = CombinedKernel()
    feats_train = CombinedFeatures()

    tfeats = RealFeatures(CSVFile(train_fname))
    tkernel = PolyKernel(10,3)
    tkernel.init(tfeats, tfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_train = RealFeatures(CSVFile(train_fname))
    feats_train.append_feature_obj(subkfeats_train)
    subkernel = PolyKernel(10,2)
    kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)

    labels = BinaryLabels(CSVFile(train_label_fname))
    svm = LibSVM(1.0, kernel, labels)
    svm.train()

    kernel = CombinedKernel()
    feats_pred = CombinedFeatures()

    pfeats = RealFeatures(CSVFile(test_fname))
    tkernel = PolyKernel(10,3)
    tkernel.init(tfeats, pfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_test = RealFeatures(CSVFile(test_fname))
    feats_pred.append_feature_obj(subkfeats_test)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)
    kernel.init(feats_train, feats_pred)

    svm.set_kernel(kernel)
    svm.apply()
    km_train=kernel.get_kernel_matrix()
    return km_train,kernel
Beispiel #9
0
class svm_splice_model(object):
	def __init__(self, order, traindat, alphas, b, (window_left,offset,window_right), consensus):

		f=StringCharFeatures(traindat, DNA)
		wd_kernel = WeightedDegreeStringKernel(f,f, int(order))
		wd_kernel.io.set_target_to_stdout()

		self.svm=LibSVM()
		self.svm.set_kernel(wd_kernel)
		self.svm.set_alphas(alphas)
		self.svm.set_support_vectors(numpy.arange(len(alphas), dtype=numpy.int32))
		self.svm.set_bias(b)
		self.svm.io.set_target_to_stdout()
		self.svm.parallel.set_num_threads(self.svm.parallel.get_num_cpus())
		self.svm.set_linadd_enabled(True)
		self.svm.set_batch_computation_enabled(True)

		self.window_left=int(window_left)
		self.window_right=int(window_right)

		self.consensus=consensus
		self.wd_kernel=wd_kernel
		self.traindat=f
		self.offset=offset
Beispiel #10
0
def classifier_custom_kernel(C=1, dim=7):
    from shogun import RealFeatures, BinaryLabels, CustomKernel, LibSVM
    from numpy import diag, ones, sign
    from numpy.random import rand, seed

    seed((C, dim))

    lab = sign(2 * rand(dim) - 1)
    data = rand(dim, dim)
    symdata = data * data.T + diag(ones(dim))

    kernel = CustomKernel()
    kernel.set_full_kernel_matrix_from_full(data)
    labels = BinaryLabels(lab)
    svm = LibSVM(C, kernel, labels)
    svm.train()
    predictions = svm.apply()
    out = svm.apply().get_labels()
    return svm, out
Beispiel #11
0
def runShogunSVMDNASpectrumKernel(train_xt, train_lt, test_xt):
	"""
	run svm with spectrum kernel
	"""

    ##################################################
    # set up SVM
	charfeat_train = StringCharFeatures(train_xt, DNA)
	feats_train = StringWordFeatures(DNA)
	feats_train.obtain_from_char(charfeat_train, K-1, K, GAP, False)
	preproc=SortWordString()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()
	
	charfeat_test = StringCharFeatures(test_xt, DNA)
	feats_test=StringWordFeatures(DNA)
	feats_test.obtain_from_char(charfeat_test, K-1, K, GAP, False)
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()
	
	kernel=CommWordStringKernel(feats_train, feats_train, False)
	kernel.io.set_loglevel(MSG_DEBUG)

    # init kernel
	labels = BinaryLabels(train_lt)
	
	# run svm model
	print "Ready to train!"
	svm=LibSVM(SVMC, kernel, labels)
	svm.io.set_loglevel(MSG_DEBUG)
	svm.train()

	# predictions
	print "Making predictions!"
	out1DecisionValues = svm.apply(feats_train)
	out1=out1DecisionValues.get_labels()
	kernel.init(feats_train, feats_test)
	out2DecisionValues = svm.apply(feats_test)
	out2=out2DecisionValues.get_labels()

	return out1,out2,out1DecisionValues,out2DecisionValues
Beispiel #12
0
                                                    (datasetSize,trainingSize,testSize,TEST_SIZE)
    print_to_console_log(data_info)

    ## Codify Labels (1,-1)
    y_train[np.where(y_train==0)] = -1
    y_test [np.where(y_test==0)]  = -1
    
    ## Set up Subsequence string Kernel
    features = StringCharFeatures(X_train, RAWBYTE)
    test_features = StringCharFeatures(X_test, RAWBYTE)
    labels = BinaryLabels(y_train)
    sk = SubsequenceStringKernel(features, features, SUBSEQUENCE_SIZE, LAMBDA)

    ## Fitting Kernel SVM to the Training set
    startTime1 = time.time()
    svm = LibSVM(C, sk, labels)
    svm.train()
    endTime1 = time.time()

    ## Print Training Time
    formatedTime = time.strftime('%H:%M:%S', time.gmtime(endTime1-startTime1))
    print_to_console_log("Training Time\nTime:{:010.4f}s\nTime:{:s}\n".format(endTime1-startTime1,formatedTime))

    # save the model to disk
    pickle.dump(svm, open(FILE_NAME, 'wb'))

    # Model Evaluation
    print_to_console_log("Evaluate Model...\n")
    startTime2 = time.time()
    predicted_labels = svm.apply(test_features).get_labels()
    y_predicted = np.array(predicted_labels)
Beispiel #13
0
from shogun import LibSVM, LDA
from shogun import ROCEvaluation
import util

util.set_title('ROC example')
util.DISTANCE=0.5
subplots_adjust(hspace=0.3)

pos=util.get_realdata(True)
neg=util.get_realdata(False)
features=util.get_realfeatures(pos, neg)
labels=util.get_labels()

# classifiers
gk=GaussianKernel(features, features, 1.0)
svm = LibSVM(1000.0, gk, labels)
svm.train()
lda=LDA(1,features,labels)
lda.train()

## plot points
subplot(211)
plot(pos[0,:], pos[1,:], "r.")
plot(neg[0,:], neg[1,:], "b.")
grid(True)
title('Data',size=10)

# plot ROC for SVM
subplot(223)
ROC_evaluation=ROCEvaluation()
ROC_evaluation.evaluate(svm.apply(),labels)
Beispiel #14
0
from shogun import LibSVM, LDA
from shogun import ROCEvaluation
import util

util.set_title('ROC example')
util.DISTANCE = 0.5
subplots_adjust(hspace=0.3)

pos = util.get_realdata(True)
neg = util.get_realdata(False)
features = util.get_realfeatures(pos, neg)
labels = util.get_labels()

# classifiers
gk = GaussianKernel(features, features, 1.0)
svm = LibSVM(1000.0, gk, labels)
svm.train()
lda = LDA(1, features, labels)
lda.train()

## plot points
subplot(211)
plot(pos[0, :], pos[1, :], "r.")
plot(neg[0, :], neg[1, :], "b.")
grid(True)
title('Data', size=10)

# plot ROC for SVM
subplot(223)
ROC_evaluation = ROCEvaluation()
ROC_evaluation.evaluate(svm.apply(), labels)