예제 #1
0
def calcprc(output, LTE):

    pm = PerformanceMeasures(Labels(numpy.array(LTE)),
                             Labels(numpy.array(output)))

    auPRC = pm.get_auPRC()
    return auPRC
예제 #2
0
def calcprc(output, LTE):
    """The area under the precision recall curve"""
    pm = PRCEvaluation()
    pm.evaluate(Labels(numpy.array(output)), Labels(numpy.array(LTE)))

    auPRC = pm.get_auPRC()
    return auPRC
예제 #3
0
def calcroc(output, LTE):
    """The area under the receiver operating characteristic curve"""
    pm = PerformanceMeasures(Labels(numpy.array(LTE)),
                             Labels(numpy.array(output)))

    auROC = pm.get_auROC()
    return auROC
예제 #4
0
def calcroc(output, LTE):
    """The area under the receiver operating characteristic curve"""
    pm = ROCEvaluation()
    pm.evaluate(Labels(numpy.array(output)), Labels(numpy.array(LTE)))

    auROC = pm.get_auROC()
    return auROC
예제 #5
0
def calcprc(output, LTE):
    """The area under the precision recall curve"""
    pm = PerformanceMeasures(Labels(numpy.array(LTE)),
                             Labels(numpy.array(output)))

    auPRC = pm.get_auPRC()
    return auPRC
def classifier_domainadaptationsvm_modular(fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna, \
                                               label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \
                                               label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3):



    
	feats_train = StringCharFeatures(fm_train_dna, DNA)
	feats_test = StringCharFeatures(fm_test_dna, DNA)
	kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)
	labels = Labels(label_train_dna)
	svm = SVMLight(C, kernel, labels)
	svm.train()
	#svm.io.set_loglevel(MSG_DEBUG)
    
	#####################################
		
	#print "obtaining DA SVM from previously trained SVM"

	feats_train2 = StringCharFeatures(fm_train_dna, DNA)
	feats_test2 = StringCharFeatures(fm_test_dna, DNA)
	kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree)
	labels2 = Labels(label_train_dna)

	# we regularize against the previously obtained solution
	dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0)
	dasvm.train()

	out = dasvm.classify(feats_test2).get_labels()

	return out #,dasvm TODO
예제 #7
0
def svm_learn(kernel, labels, options):
    """train SVM using SVMLight or LibSVM

	Arguments:
	kernel -- kernel object from Shogun toolbox
	lebels -- list of labels
	options -- object containing option data 

	Return:
	trained svm object 
	"""

    try:
        svm = SVMLight(options.svmC, kernel,
                       Labels(numpy.array(labels, dtype=numpy.double)))
    except NameError:
        svm = LibSVM(options.svmC, kernel,
                     Labels(numpy.array(labels, dtype=numpy.double)))

    if options.quiet == False:
        svm.io.set_loglevel(MSG_INFO)
        svm.io.set_target_to_stderr()

    svm.set_epsilon(options.epsilon)
    svm.parallel.set_num_threads(1)
    if options.weight != 1.0:
        svm.set_C(options.svmC, options.svmC * options.weight)
    svm.train()

    if options.quiet == False:
        svm.io.set_loglevel(MSG_ERROR)

    return svm
예제 #8
0
def plotprc(output, LTE, figure_fname="", prc_label='PRC'):
    """Plot the precision recall curve"""
    import pylab
    import matplotlib

    pylab.figure(2, dpi=150, figsize=(4, 4))

    pm = PerformanceMeasures(Labels(numpy.array(LTE)),
                             Labels(numpy.array(output)))

    points = pm.get_PRC()
    points = numpy.array(points).T  # for pylab.plot
    pylab.plot(points[0], points[1], 'b-', label=prc_label)
    pylab.axis([0, 1, 0, 1])
    ticks = numpy.arange(0., 1., .1, dtype=numpy.float64)
    pylab.xticks(ticks, size=10)
    pylab.yticks(ticks, size=10)
    pylab.xlabel('sensitivity (true positive rate)', size=10)
    pylab.ylabel('precision (1 - false discovery rate)', size=10)
    pylab.legend(loc='lower right')

    if figure_fname != None:
        warnings.filterwarnings('ignore', 'Could not match*')
        tempfname = figure_fname + '.png'
        pylab.savefig(tempfname)
        shutil.move(tempfname, figure_fname)

    auPRC = pm.get_auPRC()
    return auPRC
예제 #9
0
def plotroc(output, LTE, draw_random=False, figure_fname="", roc_label='ROC'):
    """Plot the receiver operating characteristic curve"""
    import pylab
    import matplotlib

    pylab.figure(1, dpi=150, figsize=(4, 4))
    fontdict = dict(family="cursive", weight="bold", size=7, y=1.05)

    pm = PerformanceMeasures(Labels(numpy.array(LTE)),
                             Labels(numpy.array(output)))

    points = pm.get_ROC()
    points = numpy.array(points).T  # for pylab.plot
    pylab.plot(points[0], points[1], 'b-', label=roc_label)
    if draw_random:
        pylab.plot([0, 1], [0, 1], 'r-', label='random guessing')
    pylab.axis([0, 1, 0, 1])
    ticks = numpy.arange(0., 1., .1, dtype=numpy.float64)
    pylab.xticks(ticks, size=10)
    pylab.yticks(ticks, size=10)
    pylab.xlabel('1 - specificity (false positive rate)', size=10)
    pylab.ylabel('sensitivity (true positive rate)', size=10)
    pylab.legend(loc='lower right',
                 prop=matplotlib.font_manager.FontProperties('tiny'))

    if figure_fname != None:
        warnings.filterwarnings('ignore', 'Could not match*')
        tempfname = figure_fname + '.png'
        pylab.savefig(tempfname)
        shutil.move(tempfname, figure_fname)

    auROC = pm.get_auROC()
    return auROC
예제 #10
0
def modelselection_grid_search_kernel():
    num_subsets = 3
    num_vectors = 20
    dim_vectors = 3

    # create some (non-sense) data
    matrix = rand(dim_vectors, num_vectors)

    # create num_feautres 2-dimensional vectors
    features = RealFeatures()
    features.set_feature_matrix(matrix)

    # create labels, two classes
    labels = Labels(num_vectors)
    for i in range(num_vectors):
        labels.set_label(i, 1 if i % 2 == 0 else -1)

    # create svm
    classifier = LibSVM()

    # splitting strategy
    splitting_strategy = StratifiedCrossValidationSplitting(
        labels, num_subsets)

    # accuracy evaluation
    evaluation_criterion = ContingencyTableEvaluation(ACCURACY)

    # cross validation class for evaluation in model selection
    cross = CrossValidation(classifier, features, labels, splitting_strategy,
                            evaluation_criterion)
    cross.set_num_runs(1)

    # print all parameter available for modelselection
    # Dont worry if yours is not included, simply write to the mailing list
    classifier.print_modsel_params()

    # model parameter selection
    param_tree = create_param_tree()
    param_tree.print_tree()

    grid_search = GridSearchModelSelection(param_tree, cross)

    print_state = True
    best_combination = grid_search.select_model(print_state)
    print("best parameter(s):")
    best_combination.print_tree()

    best_combination.apply_to_machine(classifier)

    # larger number of runs to have tighter confidence intervals
    cross.set_num_runs(10)
    cross.set_conf_int_alpha(0.01)
    result = cross.evaluate()
    print("result: ")
    result.print_result()

    return 0
def modelselection_grid_search_kernel():
	num_subsets=3
	num_vectors=20
	dim_vectors=3

	# create some (non-sense) data
	matrix=rand(dim_vectors, num_vectors)

	# create num_feautres 2-dimensional vectors
	features=RealFeatures()
	features.set_feature_matrix(matrix)

	# create labels, two classes
	labels=Labels(num_vectors)
	for i in range(num_vectors):
		labels.set_label(i, 1 if i%2==0 else -1)

	# create svm
	classifier=LibSVM()

	# splitting strategy
	splitting_strategy=StratifiedCrossValidationSplitting(labels, num_subsets)

	# accuracy evaluation
	evaluation_criterion=ContingencyTableEvaluation(ACCURACY)

	# cross validation class for evaluation in model selection
	cross=CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterion)
	cross.set_num_runs(1)

	# print all parameter available for modelselection
	# Dont worry if yours is not included, simply write to the mailing list
	classifier.print_modsel_params()

	# model parameter selection
	param_tree=create_param_tree()
	param_tree.print_tree()

	grid_search=GridSearchModelSelection(param_tree, cross)

	print_state=True
	best_combination=grid_search.select_model(print_state)
	print("best parameter(s):")
	best_combination.print_tree()

	best_combination.apply_to_machine(classifier)

	# larger number of runs to have tighter confidence intervals
	cross.set_num_runs(10)
	cross.set_conf_int_alpha(0.01)
	result=cross.evaluate()
	print("result: ")
	result.print_result()

	return 0
def evaluation_multiclassaccuracy_modular(ground_truth, predicted):
    from shogun.Features import Labels
    from shogun.Evaluation import MulticlassAccuracy

    ground_truth_labels = Labels(ground_truth)
    predicted_labels = Labels(predicted)

    evaluator = MulticlassAccuracy()
    accuracy = evaluator.evaluate(predicted_labels, ground_truth_labels)

    return accuracy
예제 #13
0
def knn_train(train_data=None, train_label = None, k=1):
    train_data  = RealFeatures(train_data)
    distance    = EuclidianDistance(train_data, train_data)
    try:
        train_label = Labels(array(train_label.tolist(), dtype=float64))
    except Exception as e:
        print e
        raise Exception
    knn_model   = KNN(k, distance, train_label)
    knn_train   = knn_model.train()
    return knn_model
def evaluation_meansquarederror_modular(ground_truth, predicted):
	from shogun.Features import Labels
	from shogun.Evaluation import MeanSquaredError

	ground_truth_labels = Labels(ground_truth)
	predicted_labels = Labels(predicted)
	
	evaluator = MeanSquaredError()
	mse = evaluator.evaluate(predicted_labels,ground_truth_labels)

	return mse
예제 #15
0
def evaluation_rocevaluation_modular(ground_truth, predicted):
	from shogun.Features import Labels
	from shogun.Evaluation import ROCEvaluation

	ground_truth_labels = Labels(ground_truth)
	predicted_labels = Labels(predicted)
	
	evaluator = ROCEvaluation()
	evaluator.evaluate(predicted_labels,ground_truth_labels)

	return evaluator.get_ROC(), evaluator.get_auROC()
예제 #16
0
def classifier_mpdsvm_modular(fm_train_real=traindat,
                              fm_test_real=testdat,
                              label_train_twoclass=label_traindat,
                              C=1,
                              epsilon=1e-5):

    from shogun.Features import RealFeatures, Labels
    from shogun.Kernel import GaussianKernel
    from shogun.Classifier import MPDSVM

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    width = 2.1
    kernel = GaussianKernel(feats_train, feats_train, width)

    labels = Labels(label_train_twoclass)

    svm = MPDSVM(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.train()

    kernel.init(feats_train, feats_test)
    svm.apply().get_labels()
    predictions = svm.apply()
    return predictions, svm, predictions.get_labels()
예제 #17
0
def regression_svrlight_modular(fm_train=traindat,fm_test=testdat,label_train=label_traindat, \
        width=1.2,C=1,epsilon=1e-5,tube_epsilon=1e-2,num_threads=3):

    from shogun.Features import Labels, RealFeatures
    from shogun.Kernel import GaussianKernel
    try:
        from shogun.Regression import SVRLight
    except ImportError:
        print 'No support for SVRLight available.'
        return

    feats_train = RealFeatures(fm_train)
    feats_test = RealFeatures(fm_test)

    kernel = GaussianKernel(feats_train, feats_train, width)

    labels = Labels(label_train)

    svr = SVRLight(C, epsilon, kernel, labels)
    svr.set_tube_epsilon(tube_epsilon)
    svr.parallel.set_num_threads(num_threads)
    svr.train()

    kernel.init(feats_train, feats_test)
    out = svr.classify().get_labels()

    return out, kernel
예제 #18
0
def classifier_svmsgd_modular(fm_train_real=traindat,
                              fm_test_real=testdat,
                              label_train_twoclass=label_traindat,
                              C=0.9,
                              num_threads=1,
                              num_iter=5):

    from shogun.Features import RealFeatures, SparseRealFeatures, Labels
    from shogun.Classifier import SVMSGD

    realfeat = RealFeatures(fm_train_real)
    feats_train = SparseRealFeatures()
    feats_train.obtain_from_simple(realfeat)
    realfeat = RealFeatures(fm_test_real)
    feats_test = SparseRealFeatures()
    feats_test.obtain_from_simple(realfeat)

    labels = Labels(label_train_twoclass)

    svm = SVMSGD(C, feats_train, labels)
    svm.set_epochs(num_iter)
    #svm.io.set_loglevel(0)
    svm.train()

    svm.set_features(feats_test)
    svm.apply().get_labels()
    predictions = svm.apply()
    return predictions, svm, predictions.get_labels()
def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
    from shogun.Features import RealFeatures, Labels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
    from shogun.Classifier import ECOCStrategy, ECOCOVREncoder, ECOCHDDecoder, MulticlassOneVsRestStrategy

    feats_train = RealFeatures(fm_train_real)
    feats_test  = RealFeatures(fm_test_real)

    labels = Labels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)

    mc_classifier = LinearMulticlassMachine(MulticlassOneVsRestStrategy(), feats_train, classifier, labels)
    mc_classifier.train()
    out_mc = mc_classifier.apply(feats_test).get_labels()

    ecoc_strategy = ECOCStrategy(ECOCOVREncoder(), ECOCHDDecoder())
    ecoc_classifier = LinearMulticlassMachine(ecoc_strategy, feats_train, classifier, labels)
    ecoc_classifier.train()
    out_ecoc = ecoc_classifier.apply(feats_test).get_labels()

    n_diff = (out_mc != out_ecoc).sum()
    if n_diff == 0:
        print("Same results for OvR and ECOCOvR")
    else:
        print("Different results for OvR and ECOCOvR (%d out of %d are different)" % (n_diff, len(out_mc)))

    return out_ecoc, out_mc
def classifier_svmlight_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,C=1.2,epsilon=1e-5,num_threads=1):
	from shogun.Features import StringCharFeatures, Labels, DNA
	from shogun.Kernel import WeightedDegreeStringKernel
	try:
		from shogun.Classifier import SVMLight
	except ImportError:
		print 'No support for SVMLight available.'
		return

	feats_train=StringCharFeatures(DNA)
	feats_train.set_features(fm_train_dna)
	feats_test=StringCharFeatures(DNA)
	feats_test.set_features(fm_test_dna)
	degree=20

	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	labels=Labels(label_train_dna)

	svm=SVMLight(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.parallel.set_num_threads(num_threads)
	svm.train()

	kernel.init(feats_train, feats_test)
	svm.apply().get_labels()
	return kernel
예제 #21
0
def classifier_svmlight_linear_term_modular(fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna,degree=3, \
                                                C=10,epsilon=1e-5,num_threads=1):

    from shogun.Features import StringCharFeatures, Labels, DNA
    from shogun.Kernel import WeightedDegreeStringKernel
    from shogun.Classifier import SVMLight

    feats_train = StringCharFeatures(DNA)
    feats_train.set_features(fm_train_dna)
    feats_test = StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)

    labels = Labels(label_train_dna)

    svm = SVMLight(C, kernel, labels)
    svm.set_qpsize(3)
    svm.set_linear_term(
        -numpy.array([1, 2, 3, 4, 5, 6, 7, 8, 7, 6], dtype=numpy.double))
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()

    kernel.init(feats_train, feats_test)
    out = svm.apply().get_labels()
    return out, kernel
예제 #22
0
def mlprocess(task_filename, data_filename, pred_filename, verbose=True):
    """Demo of creating machine learning process."""
    task_type, fidx, lidx, train_idx, test_idx = parse_task(task_filename)
    outputs = init_output(task_type)
    all_data = parse_data(data_filename)
    train_ex, train_lab, test_ex, test_lab = split_data(all_data, fidx, lidx, train_idx, test_idx)
    label_train = outputs.str2label(train_lab)

    if verbose:
        print('Number of features: %d' % train_ex.shape[0])
        print('%d training examples, %d test examples' % (len(train_lab), len(test_lab)))

    feats_train = RealFeatures(train_ex)
    feats_test = RealFeatures(test_ex)
    width=1.0
    kernel=GaussianKernel(feats_train, feats_train, width)
    labels=Labels(label_train)
    svm = init_svm(task_type, kernel, labels)
    svm.train()

    kernel.init(feats_train, feats_test)
    preds = svm.classify().get_labels()
    pred_label = outputs.label2str(preds)

    pf = open(pred_filename, 'w')
    for pred in pred_label:
        pf.write(pred+'\n')
    pf.close()
예제 #23
0
def libsvm():
    print 'LibSVM'

    from shogun.Features import RealFeatures, Labels
    from shogun.Kernel import GaussianKernel
    from shogun.Evaluation import PerformanceMeasures
    from shogun.Classifier import LibSVM

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    width = 2.1
    kernel = GaussianKernel(feats_train, feats_train, width)

    C = 1
    epsilon = 1e-5
    labels = Labels(label_train_twoclass)

    svm = LibSVM(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.train()

    #kernel.init(feats_train, feats_test)
    output = svm.classify(feats_test)  #.get_labels()
    #output_vector = output.get_labels()
    out = svm.classify().get_labels()
    testerr = mean(sign(out) != testlab)
    print testerr
예제 #24
0
def classifier_subgradientsvm_modular(fm_train_real, fm_test_real,
                                      label_train_twoclass, C, epsilon,
                                      max_train_time):

    from shogun.Features import RealFeatures, SparseRealFeatures, Labels
    from shogun.Classifier import SubGradientSVM

    realfeat = RealFeatures(fm_train_real)
    feats_train = SparseRealFeatures()
    feats_train.obtain_from_simple(realfeat)
    realfeat = RealFeatures(fm_test_real)
    feats_test = SparseRealFeatures()
    feats_test.obtain_from_simple(realfeat)

    labels = Labels(label_train_twoclass)

    svm = SubGradientSVM(C, feats_train, labels)
    svm.set_epsilon(epsilon)
    svm.set_max_train_time(max_train_time)
    svm.train()

    svm.set_features(feats_test)
    labels = svm.apply().get_labels()

    return labels, svm
예제 #25
0
def classifier_libsvm_modular(fm_train_real=traindat,
                              fm_test_real=testdat,
                              label_train_twoclass=label_traindat,
                              width=2.1,
                              C=1,
                              epsilon=1e-5):
    from shogun.Features import RealFeatures, Labels
    from shogun.Kernel import GaussianKernel
    from shogun.Classifier import LibSVM

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    kernel = GaussianKernel(feats_train, feats_train, width)
    labels = Labels(label_train_twoclass)

    svm = LibSVM(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.train()

    kernel.init(feats_train, feats_test)
    labels = svm.classify().get_labels()
    supportvectors = sv_idx = svm.get_support_vectors()
    alphas = svm.get_alphas()
    predictions = svm.classify()
    return predictions, svm, predictions.get_labels()
예제 #26
0
def classifier_svmlin_modular(fm_train_real=traindat,
                              fm_test_real=testdat,
                              label_train_twoclass=label_traindat,
                              C=0.9,
                              epsilon=1e-5,
                              num_threads=1):
    from shogun.Features import RealFeatures, SparseRealFeatures, Labels
    from shogun.Classifier import SVMLin

    realfeat = RealFeatures(fm_train_real)
    feats_train = SparseRealFeatures()
    feats_train.obtain_from_simple(realfeat)
    realfeat = RealFeatures(fm_test_real)
    feats_test = SparseRealFeatures()
    feats_test.obtain_from_simple(realfeat)

    labels = Labels(label_train_twoclass)

    svm = SVMLin(C, feats_train, labels)
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.set_bias_enabled(True)
    svm.train()

    svm.set_features(feats_test)
    svm.get_bias()
    svm.get_w()
    svm.classify().get_labels()
    predictions = svm.classify()
    return predictions, svm, predictions.get_labels()
def modelselection_grid_search_linear_modular(traindat=traindat,
                                              label_traindat=label_traindat):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY
    from shogun.Evaluation import StratifiedCrossValidationSplitting
    from shogun.ModelSelection import GridSearchModelSelection
    from shogun.ModelSelection import ModelSelectionParameters, R_EXP
    from shogun.ModelSelection import ParameterCombination
    from shogun.Features import Labels
    from shogun.Features import RealFeatures
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC

    # build parameter tree to select C1 and C2
    param_tree_root = ModelSelectionParameters()
    c1 = ModelSelectionParameters("C1")
    param_tree_root.append_child(c1)
    c1.build_values(-2.0, 2.0, R_EXP)

    c2 = ModelSelectionParameters("C2")
    param_tree_root.append_child(c2)
    c2.build_values(-2.0, 2.0, R_EXP)

    # training data
    features = RealFeatures(traindat)
    labels = Labels(label_traindat)

    # classifier
    classifier = LibLinear(L2R_L2LOSS_SVC)

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    classifier.print_modsel_params()

    # splitting strategy for cross-validation
    splitting_strategy = StratifiedCrossValidationSplitting(labels, 10)

    # evaluation method
    evaluation_criterium = ContingencyTableEvaluation(ACCURACY)

    # cross-validation instance
    cross_validation = CrossValidation(classifier, features, labels,
                                       splitting_strategy,
                                       evaluation_criterium)

    # model selection instance
    model_selection = GridSearchModelSelection(param_tree_root,
                                               cross_validation)

    # perform model selection with selected methods
    #print "performing model selection of"
    #param_tree_root.print_tree()
    best_parameters = model_selection.select_model()

    # print best parameters
    #print "best parameters:"
    #best_parameters.print_tree()

    # apply them and print result
    best_parameters.apply_to_machine(classifier)
    result = cross_validation.evaluate()
def classifier_larank_modular(fm_train_real=traindat,
                              fm_test_real=testdat,
                              label_train_multiclass=label_traindat,
                              C=0.9,
                              num_threads=1,
                              num_iter=5):

    from shogun.Features import RealFeatures, Labels
    from shogun.Kernel import GaussianKernel
    from shogun.Classifier import LaRank
    from shogun.Mathematics import Math_init_random
    Math_init_random(17)

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    width = 2.1
    kernel = GaussianKernel(feats_train, feats_train, width)

    epsilon = 1e-5
    labels = Labels(label_train_multiclass)

    svm = LaRank(C, kernel, labels)
    #svm.set_tau(1e-3)
    svm.set_batch_mode(False)
    #svm.io.enable_progress()
    svm.set_epsilon(epsilon)
    svm.train()
    out = svm.apply(feats_train).get_labels()
    predictions = svm.apply()
    return predictions, svm, predictions.get_labels()
def kernel_salzberg_word_string_modular(fm_train_dna=traindat,
                                        fm_test_dna=testdat,
                                        label_train_dna=label_traindat,
                                        order=3,
                                        gap=0,
                                        reverse=False):
    from shogun.Features import StringCharFeatures, StringWordFeatures, DNA, Labels
    from shogun.Kernel import SalzbergWordStringKernel
    from shogun.Classifier import PluginEstimate

    charfeat = StringCharFeatures(fm_train_dna, DNA)
    feats_train = StringWordFeatures(charfeat.get_alphabet())
    feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    charfeat = StringCharFeatures(fm_test_dna, DNA)
    feats_test = StringWordFeatures(charfeat.get_alphabet())
    feats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    pie = PluginEstimate()
    labels = Labels(label_train_dna)
    pie.set_labels(labels)
    pie.set_features(feats_train)
    pie.train()

    kernel = SalzbergWordStringKernel(feats_train, feats_train, pie, labels)
    km_train = kernel.get_kernel_matrix()

    kernel.init(feats_train, feats_test)
    pie.set_features(feats_test)
    pie.apply().get_labels()
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
예제 #30
0
def get_labels(raw=False):
    data = concatenate(
        array(
            (-ones(NUM_EXAMPLES, dtype=double), ones(NUM_EXAMPLES,
                                                     dtype=double))))
    if raw:
        return data
    else:
        return Labels(data)
def regression_least_squares_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,tau=1e-6):

	from shogun.Features import Labels, RealFeatures
	from shogun.Kernel import GaussianKernel
	from shogun.Regression import LeastSquaresRegression

	ls=LeastSquaresRegression(RealFeatures(traindat), Labels(label_train))
	ls.train()
	out = ls.apply(RealFeatures(fm_test)).get_labels()
	return out,ls
def evaluation_contingencytableevaluation_modular(ground_truth, predicted):
    from shogun.Features import Labels
    from shogun.Evaluation import ContingencyTableEvaluation
    from shogun.Evaluation import AccuracyMeasure, ErrorRateMeasure, BALMeasure
    from shogun.Evaluation import WRACCMeasure, F1Measure, CrossCorrelationMeasure
    from shogun.Evaluation import RecallMeasure, PrecisionMeasure, SpecificityMeasure

    ground_truth_labels = Labels(ground_truth)
    predicted_labels = Labels(predicted)

    base_evaluator = ContingencyTableEvaluation()
    base_evaluator.evaluate(predicted_labels, ground_truth_labels)

    evaluator = AccuracyMeasure()
    accuracy = evaluator.evaluate(predicted_labels, ground_truth_labels)

    evaluator = ErrorRateMeasure()
    errorrate = evaluator.evaluate(predicted_labels, ground_truth_labels)

    evaluator = BALMeasure()
    bal = evaluator.evaluate(predicted_labels, ground_truth_labels)

    evaluator = WRACCMeasure()
    wracc = evaluator.evaluate(predicted_labels, ground_truth_labels)

    evaluator = F1Measure()
    f1 = evaluator.evaluate(predicted_labels, ground_truth_labels)

    evaluator = CrossCorrelationMeasure()
    crosscorrelation = evaluator.evaluate(predicted_labels,
                                          ground_truth_labels)

    evaluator = RecallMeasure()
    recall = evaluator.evaluate(predicted_labels, ground_truth_labels)

    evaluator = PrecisionMeasure()
    precision = evaluator.evaluate(predicted_labels, ground_truth_labels)

    evaluator = SpecificityMeasure()
    specificity = evaluator.evaluate(predicted_labels, ground_truth_labels)

    return accuracy, errorrate, bal, wracc, f1, crosscorrelation, recall, precision, specificity
def classifier_multiclassmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
	from shogun.Features import RealFeatures, Labels
	from shogun.Kernel import GaussianKernel
	from shogun.Classifier import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	kernel=GaussianKernel(feats_train, feats_train, width)

	labels=Labels(label_train_multiclass)

	classifier = LibSVM(C, kernel, labels)
	classifier.set_epsilon(epsilon)
	print labels.get_labels()
	mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(),kernel,classifier,labels)
	mc_classifier.train()

	kernel.init(feats_train, feats_test)
	out = mc_classifier.apply().get_labels()
	return out
num=10
dist=1
width=2.1

traindata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1)
testdata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1);

trainlab=concatenate((-ones(num), ones(num)));
testlab=concatenate((-ones(num), ones(num)));

feats_train=RealFeatures(traindata_real);
feats_test=RealFeatures(testdata_real);
kernel=GaussianKernel(feats_train, feats_train, width);
kernel.io.set_loglevel(MSG_DEBUG)

labels=Labels(trainlab);

svm=SVMLight(2, kernel, labels)
svm.train()
svm.io.set_loglevel(MSG_DEBUG)

##################################################

print "labels:"
print labels.to_string()

print "features"
print feats_train.to_string()

print "kernel"
print kernel.to_string()
예제 #35
0
def features_io_modular(fm_train_real, label_train_twoclass):
	import numpy
	from shogun.Features import SparseRealFeatures, RealFeatures, Labels
	from shogun.Kernel import GaussianKernel
	from shogun.IO import AsciiFile, BinaryFile, HDF5File

	feats=SparseRealFeatures(fm_train_real)
	feats2=SparseRealFeatures()

	f=BinaryFile("fm_train_sparsereal.bin","w")
	feats.save(f)

	f=AsciiFile("fm_train_sparsereal.ascii","w")
	feats.save(f)

	f=BinaryFile("fm_train_sparsereal.bin")
	feats2.load(f)

	f=AsciiFile("fm_train_sparsereal.ascii")
	feats2.load(f)

	feats=RealFeatures(fm_train_real)
	feats2=RealFeatures()

	f=BinaryFile("fm_train_real.bin","w")
	feats.save(f)

	f=HDF5File("fm_train_real.h5","w", "/data/doubles")
	feats.save(f)

	f=AsciiFile("fm_train_real.ascii","w")
	feats.save(f)

	f=BinaryFile("fm_train_real.bin")
	feats2.load(f)
	#print "diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten()))

	f=AsciiFile("fm_train_real.ascii")
	feats2.load(f)
	#print "diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten()))

	lab=Labels(numpy.array([1.0,2.0,3.0]))
	lab2=Labels()
	f=AsciiFile("label_train_twoclass.ascii","w")
	lab.save(f)

	f=BinaryFile("label_train_twoclass.bin","w")
	lab.save(f)

	f=HDF5File("label_train_real.h5","w", "/data/labels")
	lab.save(f)

	f=AsciiFile("label_train_twoclass.ascii")
	lab2.load(f)

	f=BinaryFile("label_train_twoclass.bin")
	lab2.load(f)

	f=HDF5File("fm_train_real.h5","r", "/data/doubles")
	feats2.load(f)
	#print feats2.get_feature_matrix()
	f=HDF5File("label_train_real.h5","r", "/data/labels")
	lab2.load(f)
	#print lab2.get_labels()

	#clean up
	import os
	for f in ['fm_train_sparsereal.bin','fm_train_sparsereal.ascii',
			'fm_train_real.bin','fm_train_real.h5','fm_train_real.ascii',
			'label_train_real.h5', 'label_train_twoclass.ascii','label_train_twoclass.bin']:
		os.unlink(f)
	return feats, feats2, lab, lab2