def evaluation_thresholds_modular(index):
    from modshogun import BinaryLabels, ROCEvaluation
    import numpy
    numpy.random.seed(17)
    output = numpy.arange(-1, 1, 0.001)
    output = (0.3 * output + 0.7 * (numpy.random.rand(len(output)) - 0.5))
    label = [-1.0] * (len(output) // 2)
    label.extend([1.0] * (len(output) // 2))
    label = numpy.array(label)

    pred = BinaryLabels(output)
    truth = BinaryLabels(label)

    evaluator = ROCEvaluation()
    evaluator.evaluate(pred, truth)

    [fp, tp] = evaluator.get_ROC()

    thresh = evaluator.get_thresholds()
    b = thresh[index]

    #print("tpr", numpy.mean(output[label>0]>b), tp[index])
    #print("fpr", numpy.mean(output[label<0]>b), fp[index])

    return tp[index], fp[index], numpy.mean(output[label > 0] > b), numpy.mean(
        output[label < 0] > b)
Ejemplo n.º 2
0
def classifier_domainadaptationsvm_modular (fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna, \
                                               label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \
                                               label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3):




	feats_train = StringCharFeatures(fm_train_dna, DNA)
	feats_test = StringCharFeatures(fm_test_dna, DNA)
	kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)
	labels = BinaryLabels(label_train_dna)
	svm = SVMLight(C, kernel, labels)
	svm.train()
	#svm.io.set_loglevel(MSG_DEBUG)

	#####################################

	#print("obtaining DA SVM from previously trained SVM")

	feats_train2 = StringCharFeatures(fm_train_dna, DNA)
	feats_test2 = StringCharFeatures(fm_test_dna, DNA)
	kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree)
	labels2 = BinaryLabels(label_train_dna)

	# we regularize against the previously obtained solution
	dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0)
	dasvm.train()

	out = dasvm.apply_binary(feats_test2)

	return out #,dasvm TODO
Ejemplo n.º 3
0
def evaluation_director_contingencytableevaluation_modular(
        ground_truth, predicted):
    try:
        from modshogun import DirectorContingencyTableEvaluation, ED_MAXIMIZE
    except ImportError:
        print("recompile shogun with --enable-swig-directors")
        return

    class SimpleWeightedBinaryEvaluator(DirectorContingencyTableEvaluation):
        def __init__(self):
            DirectorContingencyTableEvaluation.__init__(self)

        def get_custom_direction(self):
            return ED_MAXIMIZE

        def get_custom_score(self):
            return self.get_WRACC() + self.get_BAL()

    from modshogun import BinaryLabels

    evaluator = SimpleWeightedBinaryEvaluator()
    r = evaluator.evaluate(BinaryLabels(ground_truth), BinaryLabels(predicted))
    r2 = evaluator.get_custom_score()
    print(r, r2)

    return r, r2
def outputResultsClassificationWithMajorityClass(out1, out2, out1DecisionValues, out2DecisionValues, train_lt, test_lt, test_majorityClass):
	# Output the results to the appropriate output files
	writeFloatList(out1, TRAINPREDICTIONSEPSILONFILENAME)
	writeFloatList(out2, VALIDATIONPREDICTIONSEPSILONFILENAME)
	
	numTrainCorrect = 0
	for i in range(len(train_lt)):
		# Iterate through training labels and count the number that are the same as the predicted labels
		if out1[i] == train_lt[i]:
			# The current prediction is correct
			numTrainCorrect = numTrainCorrect + 1
	fracTrainCorrect = float(numTrainCorrect)/float(len(train_lt))
	print "Training accuracy:"
	print fracTrainCorrect
	
	trainLabels = BinaryLabels(train_lt)
	evaluatorTrain = ROCEvaluation()
	evaluatorTrain.evaluate(out1DecisionValues, trainLabels)
	print "Training AUC:"
	print evaluatorTrain.get_auROC()
	
	numValidCorrect = 0
	numPosCorrect = 0
	numNegCorrect = 0
	numMajorityClassCorrect = 0
	numMinorityClassCorrect = 0
	for i in range(len(test_lt)):
		# Iterate through validation labels and count the number that are the same as the predicted labels
		if out2[i] == test_lt[i]:
			# The current prediction is correct
			numValidCorrect = numValidCorrect + 1
			if (out2[i] == 1) and (test_lt[i] == 1):
				# The prediction is a positive example
				numPosCorrect = numPosCorrect + 1
			else:
				numNegCorrect = numNegCorrect + 1
			if test_majorityClass[i] == 1:
				numMajorityClassCorrect = numMajorityClassCorrect + 1
			else:
				numMinorityClassCorrect = numMinorityClassCorrect + 1
	fracValidCorrect = float(numValidCorrect)/float(len(test_lt))
	print "Validation accuracy:"
	print fracValidCorrect
	print "Fraction of correct positive examples:"
	print float(numPosCorrect)/float(len(np.where(test_lt > 0)[0]))
	print "Fraction of correct negative examples:"
	print float(numNegCorrect)/float(len(np.where(test_lt <= 0)[0]))
	print "Fraction of correct majority class examples:"
	print float(numMajorityClassCorrect)/float(len(np.where(test_majorityClass > 0)[0]))
	print "Fraction of correct minority class examples:"
	print float(numMinorityClassCorrect)/float(len(np.where(test_majorityClass <= 0)[0]))
	
	validLabels = BinaryLabels(test_lt)
	evaluatorValid = ROCEvaluation()
	evaluatorValid.evaluate(out2DecisionValues, validLabels)
	print "Validation AUC:"
	print evaluatorValid.get_auROC()
Ejemplo n.º 5
0
def modelselection_grid_search_kernel(num_subsets, num_vectors, dim_vectors):
    # init seed for reproducability
    Math.init_random(1)
    random.seed(1)

    # create some (non-sense) data
    matrix = random.rand(dim_vectors, num_vectors)

    # create num_feautres 2-dimensional vectors
    features = RealFeatures()
    features.set_feature_matrix(matrix)

    # create labels, two classes
    labels = BinaryLabels(num_vectors)
    for i in range(num_vectors):
        labels.set_label(i, 1 if i % 2 == 0 else -1)

    # create svm
    classifier = LibSVM()

    # splitting strategy
    splitting_strategy = StratifiedCrossValidationSplitting(
        labels, num_subsets)

    # accuracy evaluation
    evaluation_criterion = ContingencyTableEvaluation(ACCURACY)

    # cross validation class for evaluation in model selection
    cross = CrossValidation(classifier, features, labels, splitting_strategy,
                            evaluation_criterion)
    cross.set_num_runs(1)

    # print all parameter available for modelselection
    # Dont worry if yours is not included, simply write to the mailing list
    #classifier.print_modsel_params()

    # model parameter selection
    param_tree = create_param_tree()
    #param_tree.print_tree()

    grid_search = GridSearchModelSelection(cross, param_tree)

    print_state = False
    best_combination = grid_search.select_model(print_state)
    #print("best parameter(s):")
    #best_combination.print_tree()

    best_combination.apply_to_machine(classifier)

    # larger number of runs to have tighter confidence intervals
    cross.set_num_runs(10)
    cross.set_conf_int_alpha(0.01)
    result = cross.evaluate()
    casted = CrossValidationResult.obtain_from_generic(result)
    #print "result mean:", casted.mean

    return classifier, result, casted.mean
def modelselection_grid_search_kernel (num_subsets, num_vectors, dim_vectors):
	# init seed for reproducability
	Math.init_random(1)
	random.seed(1);

	# create some (non-sense) data
	matrix=random.rand(dim_vectors, num_vectors)

	# create num_feautres 2-dimensional vectors
	features=RealFeatures()
	features.set_feature_matrix(matrix)

	# create labels, two classes
	labels=BinaryLabels(num_vectors)
	for i in range(num_vectors):
		labels.set_label(i, 1 if i%2==0 else -1)

	# create svm
	classifier=LibSVM()

	# splitting strategy
	splitting_strategy=StratifiedCrossValidationSplitting(labels, num_subsets)

	# accuracy evaluation
	evaluation_criterion=ContingencyTableEvaluation(ACCURACY)

	# cross validation class for evaluation in model selection
	cross=CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterion)
	cross.set_num_runs(1)

	# print all parameter available for modelselection
	# Dont worry if yours is not included, simply write to the mailing list
	#classifier.print_modsel_params()

	# model parameter selection
	param_tree=create_param_tree()
	#param_tree.print_tree()

	grid_search=GridSearchModelSelection(cross, param_tree)

	print_state=False
	best_combination=grid_search.select_model(print_state)
	#print("best parameter(s):")
	#best_combination.print_tree()

	best_combination.apply_to_machine(classifier)

	# larger number of runs to have tighter confidence intervals
	cross.set_num_runs(10)
	cross.set_conf_int_alpha(0.01)
	result=cross.evaluate()
	casted=CrossValidationResult.obtain_from_generic(result);
	#print "result mean:", casted.mean

	return classifier,result,casted.mean
Ejemplo n.º 7
0
def evaluation_rocevaluation_modular(ground_truth, predicted):
    from modshogun import BinaryLabels
    from modshogun import ROCEvaluation

    ground_truth_labels = BinaryLabels(ground_truth)
    predicted_labels = BinaryLabels(predicted)

    evaluator = ROCEvaluation()
    evaluator.evaluate(predicted_labels, ground_truth_labels)

    return evaluator.get_ROC(), evaluator.get_auROC()
Ejemplo n.º 8
0
def label_function():
    from modshogun import BinaryLabels
    from modshogun import CSVFile

    #generate random labels 
    label = BinaryLabels(5)

    print label.get_num_labels()
    #→ 5

    print label.get_values()
    #→ array([5 label values])

    #Labels from CSVFile
    label_from_csv = BinaryLabels(CSVFile("csv/label.csv"))
Ejemplo n.º 9
0
def classifier_featureblock_logistic_regression(fm_train=traindat,
                                                fm_test=testdat,
                                                label_train=label_traindat):

    from modshogun import BinaryLabels, RealFeatures, IndexBlock, IndexBlockGroup
    try:
        from modshogun import FeatureBlockLogisticRegression
    except ImportError:
        print("FeatureBlockLogisticRegression not available")
        exit(0)

    features = RealFeatures(hstack((traindat, traindat)))
    labels = BinaryLabels(hstack((label_train, label_train)))

    n_features = features.get_num_features()
    block_one = IndexBlock(0, n_features // 2)
    block_two = IndexBlock(n_features // 2, n_features)
    block_group = IndexBlockGroup()
    block_group.add_block(block_one)
    block_group.add_block(block_two)

    mtlr = FeatureBlockLogisticRegression(0.1, features, labels, block_group)
    mtlr.set_regularization(1)  # use regularization ratio
    mtlr.set_tolerance(1e-2)  # use 1e-2 tolerance
    mtlr.train()
    out = mtlr.apply().get_labels()

    return out
Ejemplo n.º 10
0
def cross_validation(X, Y, d, c, K):
    N = len(Y)
    n = N / K

    accuracy_list = []

    for k in range(0, K):
        print 'degree = %s\tC = %s\tcross_validation_iter = %s/%s' % (d, c,
                                                                      k + 1, K)
        sys.stdout.flush()

        X_test = list(X[k:k + n])
        Y_test = list(Y[k:k + n])
        X_train = []
        X_train.extend(X[:k])
        X_train.extend(X[k + n:])
        Y_train = []
        Y_train.extend(Y[:k])
        Y_train.extend(Y[k + n:])

        X_train = StringCharFeatures(X_train, DNA)
        X_test = StringCharFeatures(X_test, DNA)
        Y_train = BinaryLabels(np.array(Y_train, dtype=np.float64))
        Y_test = np.array(Y_test)

        args_tuple = (X_train, Y_train, X_test, Y_test, d, c)
        accuracy, Y_test_proba = svm_process(args_tuple)
        accuracy_list.append(accuracy)

    return np.array(accuracy_list).mean()
Ejemplo n.º 11
0
def classifier_gpbtsvm_modular(train_fname=traindat,
                               test_fname=testdat,
                               label_fname=label_traindat,
                               width=2.1,
                               C=1,
                               epsilon=1e-5):
    from modshogun import RealFeatures, BinaryLabels
    from modshogun import GaussianKernel
    from modshogun import CSVFile
    try:
        from modshogun import GPBTSVM
    except ImportError:
        print("GPBTSVM not available")
        exit(0)

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))
    labels = BinaryLabels(CSVFile(label_fname))
    kernel = GaussianKernel(feats_train, feats_train, width)

    svm = GPBTSVM(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.train()

    predictions = svm.apply(feats_test)
    return predictions, svm, predictions.get_labels()
def transfer_multitask_l12_logistic_regression(fm_train=traindat,
                                               fm_test=testdat,
                                               label_train=label_traindat):
    from modshogun import BinaryLabels, RealFeatures, Task, TaskGroup
    try:
        from modshogun import MultitaskL12LogisticRegression
    except ImportError:
        print("MultitaskL12LogisticRegression not available")
        exit(0)

    features = RealFeatures(hstack((traindat, traindat)))
    labels = BinaryLabels(hstack((label_train, label_train)))

    n_vectors = features.get_num_vectors()
    task_one = Task(0, n_vectors // 2)
    task_two = Task(n_vectors // 2, n_vectors)
    task_group = TaskGroup()
    task_group.append_task(task_one)
    task_group.append_task(task_two)

    mtlr = MultitaskL12LogisticRegression(0.1, 0.1, features, labels,
                                          task_group)
    mtlr.set_tolerance(1e-2)  # use 1e-2 tolerance
    mtlr.set_max_iter(10)
    mtlr.train()
    mtlr.set_current_task(0)
    out = mtlr.apply_regression().get_labels()

    return out
def classifier_svmlight_linear_term_modular (fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna,degree=3, \
                                                C=10,epsilon=1e-5,num_threads=1):

    from modshogun import StringCharFeatures, BinaryLabels, DNA
    from modshogun import WeightedDegreeStringKernel
    try:
        from modshogun import SVMLight
    except ImportError:
        print("SVMLight is not available")
        exit(0)

    feats_train = StringCharFeatures(DNA)
    feats_train.set_features(fm_train_dna)
    feats_test = StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)

    labels = BinaryLabels(label_train_dna)

    svm = SVMLight(C, kernel, labels)
    svm.set_qpsize(3)
    svm.set_linear_term(
        -numpy.array([1, 2, 3, 4, 5, 6, 7, 8, 7, 6], dtype=numpy.double))
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()

    kernel.init(feats_train, feats_test)
    out = svm.apply().get_labels()
    return out, kernel
def runShogunSVMDNAWDKernel(train_xt, train_lt, test_xt):
    """
	run svm with string kernels
	"""

    ##################################################
    # set up svm
    feats_train = StringCharFeatures(train_xt, DNA)
    feats_test = StringCharFeatures(test_xt, DNA)

    kernel = WeightedDegreePositionStringKernel(feats_train, feats_train,
                                                DEGREE)
    kernel.io.set_loglevel(MSG_DEBUG)
    kernel.set_shifts(NUMSHIFTS * ones(len(train_xt[0]), dtype=int32))
    kernel.set_position_weights(ones(len(train_xt[0]), dtype=float64))

    # init kernel
    labels = BinaryLabels(train_lt)

    # run svm model
    print "Ready to train!"
    svm = LibSVM(SVMC, kernel, labels)
    svm.io.set_loglevel(MSG_DEBUG)
    svm.train()

    # predictions
    print "Making predictions!"
    out1DecisionValues = svm.apply(feats_train)
    out1 = out1DecisionValues.get_labels()
    kernel.init(feats_train, feats_test)
    out2DecisionValues = svm.apply(feats_test)
    out2 = out2DecisionValues.get_labels()

    return out1, out2, out1DecisionValues, out2DecisionValues
def runShogunSVMDNASubsequenceStringKernel(train_xt, train_lt, test_xt):
    """
	run svm with spectrum kernel
	"""

    ##################################################
    # set up svm
    feats_train = StringCharFeatures(train_xt, DNA)
    feats_test = StringCharFeatures(test_xt, DNA)

    kernel = SubsequenceStringKernel(feats_train, feats_train, MAXLEN, DECAY)
    kernel.io.set_loglevel(MSG_DEBUG)
    kernel.init(feats_train, feats_train)

    # init kernel
    labels = BinaryLabels(train_lt)

    # run svm model
    print "Ready to train!"
    svm = LibSVM(SVMC, kernel, labels)
    svm.io.set_loglevel(MSG_DEBUG)
    svm.train()

    # predictions
    print "Making predictions!"
    out1DecisionValues = svm.apply(feats_train)
    out1 = out1DecisionValues.get_labels()
    kernel.init(feats_train, feats_test)
    out2DecisionValues = svm.apply(feats_test)
    out2 = out2DecisionValues.get_labels()

    return out1, out2, out1DecisionValues, out2DecisionValues
Ejemplo n.º 16
0
def transfer_multitask_clustered_logistic_regression(fm_train=traindat,
                                                     fm_test=testdat,
                                                     label_train=label_traindat
                                                     ):

    from modshogun import BinaryLabels, RealFeatures, Task, TaskGroup, MultitaskClusteredLogisticRegression, MSG_DEBUG

    features = RealFeatures(hstack((traindat, sin(traindat), cos(traindat))))
    labels = BinaryLabels(hstack((label_train, label_train, label_train)))

    n_vectors = features.get_num_vectors()
    task_one = Task(0, n_vectors // 3)
    task_two = Task(n_vectors // 3, 2 * n_vectors // 3)
    task_three = Task(2 * n_vectors // 3, n_vectors)
    task_group = TaskGroup()
    task_group.append_task(task_one)
    task_group.append_task(task_two)
    task_group.append_task(task_three)

    mtlr = MultitaskClusteredLogisticRegression(1.0, 100.0, features, labels,
                                                task_group, 2)
    #mtlr.io.set_loglevel(MSG_DEBUG)
    mtlr.set_tolerance(1e-3)  # use 1e-2 tolerance
    mtlr.set_max_iter(100)
    mtlr.train()
    mtlr.set_current_task(0)
    #print mtlr.get_w()
    out = mtlr.apply_regression().get_labels()

    return out
Ejemplo n.º 17
0
def classifier_svmlight_modular(fm_train_dna=traindat,
                                fm_test_dna=testdat,
                                label_train_dna=label_traindat,
                                C=1.2,
                                epsilon=1e-5,
                                num_threads=1):
    from modshogun import StringCharFeatures, BinaryLabels, DNA
    from modshogun import WeightedDegreeStringKernel
    try:
        from modshogun import SVMLight
    except ImportError:
        print('No support for SVMLight available.')
        return

    feats_train = StringCharFeatures(DNA)
    feats_train.set_features(fm_train_dna)
    feats_test = StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)
    degree = 20

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)

    labels = BinaryLabels(label_train_dna)

    svm = SVMLight(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()

    kernel.init(feats_train, feats_test)
    svm.apply().get_labels()
    return kernel
def kernel_salzberg_word_string_modular(fm_train_dna=traindat,
                                        fm_test_dna=testdat,
                                        label_train_dna=label_traindat,
                                        order=3,
                                        gap=0,
                                        reverse=False):
    from modshogun import StringCharFeatures, StringWordFeatures, DNA, BinaryLabels
    from modshogun import SalzbergWordStringKernel
    from modshogun import PluginEstimate

    charfeat = StringCharFeatures(fm_train_dna, DNA)
    feats_train = StringWordFeatures(charfeat.get_alphabet())
    feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    charfeat = StringCharFeatures(fm_test_dna, DNA)
    feats_test = StringWordFeatures(charfeat.get_alphabet())
    feats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    pie = PluginEstimate()
    labels = BinaryLabels(label_train_dna)
    pie.set_labels(labels)
    pie.set_features(feats_train)
    pie.train()

    kernel = SalzbergWordStringKernel(feats_train, feats_train, pie, labels)
    km_train = kernel.get_kernel_matrix()

    kernel.init(feats_train, feats_test)
    pie.set_features(feats_test)
    pie.apply().get_labels()
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
Ejemplo n.º 19
0
def runShogunSVMDNAWDNoPositionKernel(train_xt, train_lt, test_xt):
    """
	run svm with non-position WD kernel
	"""

    ##################################################
    # set up svm
    feats_train = StringCharFeatures(train_xt, DNA)
    feats_test = StringCharFeatures(test_xt, DNA)

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, DEGREE)
    kernel.io.set_loglevel(MSG_DEBUG)

    weights=arange(1,DEGREE+1,dtype=double)[::-1]/ \
     sum(arange(1,DEGREE+1,dtype=double))
    kernel.set_wd_weights(weights)

    # init kernel
    labels = BinaryLabels(train_lt)

    # run svm model
    print "Ready to train!"
    svm = LibSVM(SVMC, kernel, labels)
    svm.io.set_loglevel(MSG_DEBUG)
    svm.train()

    # predictions
    print "Making predictions!"
    out1 = svm.apply(feats_train).get_labels()
    kernel.init(feats_train, feats_test)
    out2 = svm.apply(feats_test).get_labels()

    return out1, out2
def runShogunSVMDNALinearStringKernel(train_xt, train_lt, test_xt):
    """
	run svm with spectrum kernel
	"""

    ##################################################
    # set up svm
    feats_train = StringCharFeatures(train_xt, DNA)
    feats_test = StringCharFeatures(test_xt, DNA)

    kernel = LinearStringKernel(feats_train, feats_train)
    kernel.io.set_loglevel(MSG_DEBUG)

    # init kernel
    labels = BinaryLabels(train_lt)

    # run svm model
    print "Ready to train!"
    svm = LibSVM(SVMC, kernel, labels)
    svm.io.set_loglevel(MSG_DEBUG)
    svm.train()

    # predictions
    print "Making predictions!"
    out1 = svm.apply(feats_train).get_labels()
    kernel.init(feats_train, feats_test)
    out2 = svm.apply(feats_test).get_labels()

    return out1, out2
def evaluation_cross_validation_classification(traindat=traindat,
                                               label_traindat=label_traindat):
    from modshogun import CrossValidation, CrossValidationResult
    from modshogun import ContingencyTableEvaluation, ACCURACY
    from modshogun import StratifiedCrossValidationSplitting
    from modshogun import BinaryLabels
    from modshogun import RealFeatures
    from modshogun import LibLinear, L2R_L2LOSS_SVC

    # training data
    features = RealFeatures(traindat)
    labels = BinaryLabels(label_traindat)

    # classifier
    classifier = LibLinear(L2R_L2LOSS_SVC)

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "CrossValidationSplitting" is also available
    splitting_strategy = StratifiedCrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium = ContingencyTableEvaluation(ACCURACY)

    # cross-validation instance
    cross_validation = CrossValidation(classifier, features, labels,
                                       splitting_strategy,
                                       evaluation_criterium)
    cross_validation.set_autolock(False)

    # (optional) repeat x-val 10 times
    cross_validation.set_num_runs(10)

    # perform cross-validation and print(results)
    result = cross_validation.evaluate()
Ejemplo n.º 22
0
def classifier_svmocas_modular(train_fname=traindat,
                               test_fname=testdat,
                               label_fname=label_traindat,
                               C=0.9,
                               epsilon=1e-5,
                               num_threads=1):
    from modshogun import RealFeatures, BinaryLabels
    from modshogun import CSVFile
    try:
        from modshogun import SVMOcas
    except ImportError:
        print("SVMOcas not available")
        return

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))
    labels = BinaryLabels(CSVFile(label_fname))

    svm = SVMOcas(C, feats_train, labels)
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.set_bias_enabled(False)
    svm.train()

    bias = svm.get_bias()
    w = svm.get_w()
    predictions = svm.apply(feats_test)
    return predictions, svm, predictions.get_labels()
def kernel_histogram_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,order=3,ppseudo_count=1,npseudo_count=1):

	from modshogun import StringCharFeatures, StringWordFeatures, DNA, BinaryLabels
	from modshogun import HistogramWordStringKernel, AvgDiagKernelNormalizer
	from modshogun import PluginEstimate#, MSG_DEBUG

	charfeat=StringCharFeatures(DNA)
	#charfeat.io.set_loglevel(MSG_DEBUG)
	charfeat.set_features(fm_train_dna)
	feats_train=StringWordFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, 0, False)

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_test_dna)
	feats_test=StringWordFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, 0, False)

	pie=PluginEstimate(ppseudo_count,npseudo_count)
	labels=BinaryLabels(label_train_dna)
	pie.set_labels(labels)
	pie.set_features(feats_train)
	pie.train()

	kernel=HistogramWordStringKernel(feats_train, feats_train, pie)
	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	pie.set_features(feats_test)
	pie.apply().get_labels()
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
Ejemplo n.º 24
0
def classifier_ssk_modular(fm_train_dna=traindat,
                           fm_test_dna=testdat,
                           label_train_dna=label_traindat,
                           C=1,
                           maxlen=1,
                           decay=1):
    from modshogun import StringCharFeatures, BinaryLabels
    from modshogun import LibSVM, StringSubsequenceKernel, DNA
    from modshogun import ErrorRateMeasure

    feats_train = StringCharFeatures(fm_train_dna, DNA)
    feats_test = StringCharFeatures(fm_test_dna, DNA)
    labels = BinaryLabels(label_train_dna)
    kernel = StringSubsequenceKernel(feats_train, feats_train, maxlen, decay)

    svm = LibSVM(C, kernel, labels)
    svm.train()

    out = svm.apply(feats_train)
    evaluator = ErrorRateMeasure()
    trainerr = evaluator.evaluate(out, labels)
    # print(trainerr)

    kernel.init(feats_train, feats_test)
    predicted_labels = svm.apply(feats_test).get_labels()
    # print predicted_labels

    return predicted_labels
def modelselection_random_search_liblinear_modular (traindat=traindat, label_traindat=label_traindat):
    from modshogun import CrossValidation, CrossValidationResult
    from modshogun import ContingencyTableEvaluation, ACCURACY
    from modshogun import StratifiedCrossValidationSplitting
    from modshogun import RandomSearchModelSelection
    from modshogun import ModelSelectionParameters, R_EXP
    from modshogun import ParameterCombination
    from modshogun import BinaryLabels
    from modshogun import RealFeatures
    from modshogun import LibLinear, L2R_L2LOSS_SVC

    # build parameter tree to select C1 and C2
    param_tree_root=ModelSelectionParameters()
    c1=ModelSelectionParameters("C1");
    param_tree_root.append_child(c1)
    c1.build_values(-2.0, 2.0, R_EXP);

    c2=ModelSelectionParameters("C2");
    param_tree_root.append_child(c2);
    c2.build_values(-2.0, 2.0, R_EXP);

    # training data
    features=RealFeatures(traindat)
    labels=BinaryLabels(label_traindat)

    # classifier
    classifier=LibLinear(L2R_L2LOSS_SVC)

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #classifier.print_modsel_params()

    # splitting strategy for cross-validation
    splitting_strategy=StratifiedCrossValidationSplitting(labels, 10)

    # evaluation method
    evaluation_criterium=ContingencyTableEvaluation(ACCURACY)

    # cross-validation instance
    cross_validation=CrossValidation(classifier, features, labels,
                                     splitting_strategy, evaluation_criterium)
    cross_validation.set_autolock(False)

    # model selection instance
    model_selection=RandomSearchModelSelection(cross_validation, param_tree_root, 0.5)

    # perform model selection with selected methods
    #print "performing model selection of"
    #param_tree_root.print_tree()
    best_parameters=model_selection.select_model()

    # print best parameters
    #print "best parameters:"
    #best_parameters.print_tree()

    # apply them and print result
    best_parameters.apply_to_machine(classifier)
    result=cross_validation.evaluate()
def evaluation_cross_validation_mkl_weight_storage(
        traindat=traindat, label_traindat=label_traindat):
    from modshogun import CrossValidation, CrossValidationResult
    from modshogun import CrossValidationPrintOutput
    from modshogun import CrossValidationMKLStorage
    from modshogun import ContingencyTableEvaluation, ACCURACY
    from modshogun import StratifiedCrossValidationSplitting
    from modshogun import BinaryLabels
    from modshogun import RealFeatures, CombinedFeatures
    from modshogun import GaussianKernel, CombinedKernel
    from modshogun import LibSVM, MKLClassification

    # training data, combined features all on same data
    features = RealFeatures(traindat)
    comb_features = CombinedFeatures()
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    labels = BinaryLabels(label_traindat)

    # kernel, different Gaussians combined
    kernel = CombinedKernel()
    kernel.append_kernel(GaussianKernel(10, 0.1))
    kernel.append_kernel(GaussianKernel(10, 1))
    kernel.append_kernel(GaussianKernel(10, 2))

    # create mkl using libsvm, due to a mem-bug, interleaved is not possible
    svm = MKLClassification(LibSVM())
    svm.set_interleaved_optimization_enabled(False)
    svm.set_kernel(kernel)

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy = StratifiedCrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium = ContingencyTableEvaluation(ACCURACY)

    # cross-validation instance
    cross_validation = CrossValidation(svm, comb_features, labels,
                                       splitting_strategy,
                                       evaluation_criterium)
    cross_validation.set_autolock(False)

    # append cross vlaidation output classes
    #cross_validation.add_cross_validation_output(CrossValidationPrintOutput())
    mkl_storage = CrossValidationMKLStorage()
    cross_validation.add_cross_validation_output(mkl_storage)
    cross_validation.set_num_runs(3)

    # perform cross-validation
    result = cross_validation.evaluate()

    # print mkl weights
    weights = mkl_storage.get_mkl_weights()
def load_sparse_data(filename, dimension=None):
    input_file = LibSVMFile(args.dataset)
    sparse_feats = SparseRealFeatures()
    label_array = sparse_feats.load_with_labels(input_file)
    labels = BinaryLabels(label_array)

    if dimension != None:
        sparse_feats.set_num_features(dimension)

    return {'data': sparse_feats, 'labels': labels}
Ejemplo n.º 28
0
def evaluation_contingencytableevaluation_modular(ground_truth, predicted):
    from modshogun import BinaryLabels
    from modshogun import ContingencyTableEvaluation
    from modshogun import AccuracyMeasure, ErrorRateMeasure, BALMeasure
    from modshogun import WRACCMeasure, F1Measure, CrossCorrelationMeasure
    from modshogun import RecallMeasure, PrecisionMeasure, SpecificityMeasure

    ground_truth_labels = BinaryLabels(ground_truth)
    predicted_labels = BinaryLabels(predicted)

    base_evaluator = ContingencyTableEvaluation()
    base_evaluator.evaluate(predicted_labels, ground_truth_labels)

    evaluator = AccuracyMeasure()
    accuracy = evaluator.evaluate(predicted_labels, ground_truth_labels)

    evaluator = ErrorRateMeasure()
    errorrate = evaluator.evaluate(predicted_labels, ground_truth_labels)

    evaluator = BALMeasure()
    bal = evaluator.evaluate(predicted_labels, ground_truth_labels)

    evaluator = WRACCMeasure()
    wracc = evaluator.evaluate(predicted_labels, ground_truth_labels)

    evaluator = F1Measure()
    f1 = evaluator.evaluate(predicted_labels, ground_truth_labels)

    evaluator = CrossCorrelationMeasure()
    crosscorrelation = evaluator.evaluate(predicted_labels,
                                          ground_truth_labels)

    evaluator = RecallMeasure()
    recall = evaluator.evaluate(predicted_labels, ground_truth_labels)

    evaluator = PrecisionMeasure()
    precision = evaluator.evaluate(predicted_labels, ground_truth_labels)

    evaluator = SpecificityMeasure()
    specificity = evaluator.evaluate(predicted_labels, ground_truth_labels)

    return accuracy, errorrate, bal, wracc, f1, crosscorrelation, recall, precision, specificity
Ejemplo n.º 29
0
def runShogunSVMDNACombinedSpectrumKernel(train_xt, train_lt, test_xt):
	"""
	run svm with combined spectrum kernel
	"""

    ##################################################
    # set up svm
	kernel=CombinedKernel()
	feats_train=CombinedFeatures()
	feats_test=CombinedFeatures()
	
	for K in KList:
		# Iterate through the K's and make a spectrum kernel for each
		charfeat_train = StringCharFeatures(train_xt, DNA)
		current_feats_train = StringWordFeatures(DNA)
		current_feats_train.obtain_from_char(charfeat_train, K-1, K, GAP, False)
		preproc=SortWordString()
		preproc.init(current_feats_train)
		current_feats_train.add_preprocessor(preproc)
		current_feats_train.apply_preprocessor()
		feats_train.append_feature_obj(current_feats_train)
	
		charfeat_test = StringCharFeatures(test_xt, DNA)
		current_feats_test=StringWordFeatures(DNA)
		current_feats_test.obtain_from_char(charfeat_test, K-1, K, GAP, False)
		current_feats_test.add_preprocessor(preproc)
		current_feats_test.apply_preprocessor()
		feats_test.append_feature_obj(current_feats_test)
	
		current_kernel=CommWordStringKernel(10, False)
		kernel.append_kernel(current_kernel)
	
	kernel.io.set_loglevel(MSG_DEBUG)

    # init kernel
	labels = BinaryLabels(train_lt)
	
	# run svm model
	print "Ready to train!"
	kernel.init(feats_train, feats_train)
	svm=LibSVM(SVMC, kernel, labels)
	svm.io.set_loglevel(MSG_DEBUG)
	svm.train()

	# predictions
	print "Making predictions!"
	out1DecisionValues = svm.apply(feats_train)
	out1=out1DecisionValues.get_labels()
	kernel.init(feats_train, feats_test)
	out2DecisionValues = svm.apply(feats_test)
	out2=out2DecisionValues.get_labels()

	return out1,out2,out1DecisionValues,out2DecisionValues
Ejemplo n.º 30
0
def get_labels(raw=False, type='binary'):
    data = concatenate(
        array(
            (-ones(NUM_EXAMPLES, dtype=double), ones(NUM_EXAMPLES,
                                                     dtype=double))))
    if raw:
        return data
    else:
        if type == 'binary':
            return BinaryLabels(data)
        if type == 'regression':
            return RegressionLabels(data)
        return None
Ejemplo n.º 31
0
def kernel_auc_modular(train_fname=traindat,
                       label_fname=label_traindat,
                       width=1.7):
    from modshogun import GaussianKernel, AUCKernel, RealFeatures
    from modshogun import BinaryLabels, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    subkernel = GaussianKernel(feats_train, feats_train, width)

    kernel = AUCKernel(0, subkernel)
    kernel.setup_auc_maximization(BinaryLabels(CSVFile(label_fname)))
    km_train = kernel.get_kernel_matrix()
    return kernel
def runShogunOneClassSVMDNASpectrumKernel(train_xt, train_lt, test_xt):
	"""
	run svm with spectrum kernel
	"""

    ##################################################
    # set up svr
	charfeat_train = StringCharFeatures(train_xt, DNA)
	feats_train = StringWordFeatures(DNA)
	feats_train.obtain_from_char(charfeat_train, K-1, K, GAP, False)
	preproc=SortWordString()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()
	
	charfeat_test = StringCharFeatures(test_xt, DNA)
	feats_test=StringWordFeatures(DNA)
	feats_test.obtain_from_char(charfeat_test, K-1, K, GAP, False)
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()
	
	kernel=CommWordStringKernel(feats_train, feats_train, False)
	kernel.io.set_loglevel(MSG_DEBUG)

    # init kernel
	labels = BinaryLabels(train_lt)
	
	# run svm model
	print "Ready to train!"
	svm=LibSVMOneClass(SVMC, kernel)
	svm.set_epsilon(EPSILON)
	svm.train()


	# predictions
	print "Making predictions!"
	out1DecisionValues = svm.apply(feats_train)
	out1=out1DecisionValues.get_labels()
	kernel.init(feats_train, feats_test)
	out2DecisionValues = svm.apply(feats_test)
	out2=out2DecisionValues.get_labels()


#	predictions = svm.apply(feats_test)
#	return predictions, svm, predictions.get_labels()

	return out1,out2,out1DecisionValues,out2DecisionValues
#!/usr/bin/env python2.7
#
# This software is distributed under BSD 3-clause license (see LICENSE file).
#
# Copyright (C) 2014 Thoralf Klein
#

from modshogun import RealFeatures, BinaryLabels, LibLinear
from numpy import random, mean

X_train = RealFeatures(random.randn(30, 100))
Y_train = BinaryLabels(random.randn(X_train.get_num_vectors()))

svm = LibLinear(1.0, X_train, Y_train)
svm.train()

Y_pred = svm.apply_binary(X_train)
Y_train.get_labels() == Y_pred.get_labels()

print "accuracy:", mean(Y_train.get_labels() == Y_pred.get_labels())
#!/usr/bin/env python2.7
#
# This software is distributed under BSD 3-clause license (see LICENSE file).
#
# Copyright (C) 2014 Thoralf Klein
#

from modshogun import RealFeatures, BinaryLabels
from modshogun import LibLinear, L2R_L2LOSS_SVC_DUAL

from numpy import random, mean

X_train = RealFeatures(random.randn(30, 100))
Y_train = BinaryLabels(random.randn(X_train.get_num_vectors()))

results = []

for C1_pow in range(-3, 1):
    for C2_pow in range(-3, 1):

        svm = LibLinear()
        svm.set_bias_enabled(False)
        svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL)
        svm.set_C(10**C1_pow, 10**C2_pow)

        svm.set_features(X_train)
        svm.set_labels(Y_train)
        svm.train()

        Y_pred = svm.apply_binary(X_train)
        accuracy = mean(Y_train.get_labels() == Y_pred.get_labels())