예제 #1
0
def classifier_multiclasslogisticregression_modular(
        fm_train_real=traindat,
        fm_test_real=testdat,
        label_train_multiclass=label_traindat,
        label_test_multiclass=label_testdat,
        z=1,
        epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = MulticlassLogisticRegression(z, feats_train, labels)
    classifier.train()

    label_pred = classifier.apply(feats_test)
    out = label_pred.get_labels()

    if label_test_multiclass is not None:
        from shogun.Evaluation import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc = evaluator.evaluate(label_pred, labels_test)
        print('Accuracy = %.4f' % acc)

    return out
예제 #2
0
def evaluation_multiclassovrevaluation_modular(traindat, label_traindat,
                                               testdat, label_testdat):
    from shogun.Features import MulticlassLabels
    from shogun.Evaluation import MulticlassOVREvaluation, ROCEvaluation
    from modshogun import MulticlassLibLinear, RealFeatures, ContingencyTableEvaluation, ACCURACY
    from shogun.Mathematics import Math

    Math.init_random(1)

    ground_truth_labels = MulticlassLabels(label_traindat)
    svm = MulticlassLibLinear(1.0, RealFeatures(traindat),
                              MulticlassLabels(label_traindat))
    svm.train()
    predicted_labels = svm.apply()

    binary_evaluator = ROCEvaluation()
    evaluator = MulticlassOVREvaluation(binary_evaluator)
    mean_roc = evaluator.evaluate(predicted_labels, ground_truth_labels)
    #print mean_roc

    binary_evaluator = ContingencyTableEvaluation(ACCURACY)
    evaluator = MulticlassOVREvaluation(binary_evaluator)
    mean_accuracy = evaluator.evaluate(predicted_labels, ground_truth_labels)
    #print mean_accuracy

    return mean_roc, mean_accuracy
예제 #3
0
def classifier_multiclasslinearmachine_modular(
        fm_train_real=traindat,
        fm_test_real=testdat,
        label_train_multiclass=label_traindat,
        label_test_multiclass=label_testdat,
        width=2.1,
        C=1,
        epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)
    mc_classifier = LinearMulticlassMachine(MulticlassOneVsOneStrategy(),
                                            feats_train, classifier, labels)

    mc_classifier.train()
    label_pred = mc_classifier.apply()
    out = label_pred.get_labels()

    if label_test_multiclass is not None:
        from shogun.Evaluation import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc = evaluator.evaluate(label_pred, labels_test)
        print('Accuracy = %.4f' % acc)

    return out
예제 #4
0
def classifier_multiclassmachine_modular(fm_train_real=traindat,
                                         fm_test_real=testdat,
                                         label_train_multiclass=label_traindat,
                                         width=2.1,
                                         C=1,
                                         epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Kernel import GaussianKernel
    from shogun.Classifier import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    kernel = GaussianKernel(feats_train, feats_train, width)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibSVM()
    classifier.set_epsilon(epsilon)
    print labels.get_labels()
    mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(),
                                            kernel, classifier, labels)
    mc_classifier.train()

    kernel.init(feats_train, feats_test)
    out = mc_classifier.apply().get_labels()
    return out
def classifier_multiclass_shareboost(fm_train_real=traindat,
                                     fm_test_real=testdat,
                                     label_train_multiclass=label_traindat,
                                     label_test_multiclass=label_testdat,
                                     lawidth=2.1,
                                     C=1,
                                     epsilon=1e-5):
    from shogun.Features import RealFeatures, RealSubsetFeatures, MulticlassLabels
    from shogun.Classifier import ShareBoost

    #print('Working on a problem of %d features and %d samples' % fm_train_real.shape)

    feats_train = RealFeatures(fm_train_real)

    labels = MulticlassLabels(label_train_multiclass)

    shareboost = ShareBoost(feats_train, labels,
                            min(fm_train_real.shape[0] - 1, 30))
    shareboost.train()
    #print(shareboost.get_activeset())

    feats_test = RealSubsetFeatures(RealFeatures(fm_test_real),
                                    shareboost.get_activeset())
    label_pred = shareboost.apply(feats_test)

    out = label_pred.get_labels()

    if label_test_multiclass is not None:
        from shogun.Evaluation import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc = evaluator.evaluate(label_pred, labels_test)
        #print('Accuracy = %.4f' % acc)

    return out
예제 #6
0
def classifier_multiclass_relaxedtree(fm_train_real=traindat,
                                      fm_test_real=testdat,
                                      label_train_multiclass=label_traindat,
                                      label_test_multiclass=label_testdat,
                                      lawidth=2.1,
                                      C=1,
                                      epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import RelaxedTree, MulticlassLibLinear
    from shogun.Kernel import GaussianKernel

    #print('Working on a problem of %d features and %d samples' % fm_train_real.shape)

    feats_train = RealFeatures(fm_train_real)

    labels = MulticlassLabels(label_train_multiclass)

    machine = RelaxedTree()
    machine.set_machine_for_confusion_matrix(MulticlassLibLinear())
    machine.set_kernel(GaussianKernel())
    machine.set_labels(labels)
    machine.train(feats_train)

    label_pred = machine.apply_multiclass(RealFeatures(fm_test_real))
    out = label_pred.get_labels()

    if label_test_multiclass is not None:
        from shogun.Evaluation import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc = evaluator.evaluate(label_pred, labels_test)
        print('Accuracy = %.4f' % acc)

    return out
def evaluation_multiclassaccuracy_modular (ground_truth, predicted):
	from shogun.Features import MulticlassLabels
	from shogun.Evaluation import MulticlassAccuracy

	ground_truth_labels = MulticlassLabels(ground_truth)
	predicted_labels = MulticlassLabels(predicted)
	
	evaluator = MulticlassAccuracy()
	accuracy = evaluator.evaluate(predicted_labels,ground_truth_labels)
	
	return accuracy
예제 #8
0
def classifier_multiclass_ecoc_ovr(fm_train_real=traindat,
                                   fm_test_real=testdat,
                                   label_train_multiclass=label_traindat,
                                   label_test_multiclass=label_testdat,
                                   lawidth=2.1,
                                   C=1,
                                   epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
    from shogun.Classifier import ECOCStrategy, ECOCOVREncoder, ECOCLLBDecoder, MulticlassOneVsRestStrategy

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)

    mc_classifier = LinearMulticlassMachine(MulticlassOneVsRestStrategy(),
                                            feats_train, classifier, labels)
    mc_classifier.train()
    label_mc = mc_classifier.apply(feats_test)
    out_mc = label_mc.get_labels()

    ecoc_strategy = ECOCStrategy(ECOCOVREncoder(), ECOCLLBDecoder())
    ecoc_classifier = LinearMulticlassMachine(ecoc_strategy, feats_train,
                                              classifier, labels)
    ecoc_classifier.train()
    label_ecoc = ecoc_classifier.apply(feats_test)
    out_ecoc = label_ecoc.get_labels()

    n_diff = (out_mc != out_ecoc).sum()
    if n_diff == 0:
        print("Same results for OvR and ECOCOvR")
    else:
        print(
            "Different results for OvR and ECOCOvR (%d out of %d are different)"
            % (n_diff, len(out_mc)))

    if label_test_multiclass is not None:
        from shogun.Evaluation import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc_mc = evaluator.evaluate(label_mc, labels_test)
        acc_ecoc = evaluator.evaluate(label_ecoc, labels_test)
        print('Normal OVR Accuracy = %.4f' % acc_mc)
        print('ECOC OVR Accuracy   = %.4f' % acc_ecoc)

    return out_ecoc, out_mc
예제 #9
0
def classifier_multiclass_ecoc_random(fm_train_real=traindat,
                                      fm_test_real=testdat,
                                      label_train_multiclass=label_traindat,
                                      label_test_multiclass=label_testdat,
                                      lawidth=2.1,
                                      C=1,
                                      epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
    from shogun.Classifier import ECOCStrategy, ECOCRandomSparseEncoder, ECOCRandomDenseEncoder, ECOCHDDecoder

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)

    rnd_dense_strategy = ECOCStrategy(ECOCRandomDenseEncoder(),
                                      ECOCHDDecoder())
    rnd_sparse_strategy = ECOCStrategy(ECOCRandomSparseEncoder(),
                                       ECOCHDDecoder())

    dense_classifier = LinearMulticlassMachine(rnd_dense_strategy, feats_train,
                                               classifier, labels)
    dense_classifier.train()
    label_dense = dense_classifier.apply(feats_test)
    out_dense = label_dense.get_labels()

    sparse_classifier = LinearMulticlassMachine(rnd_sparse_strategy,
                                                feats_train, classifier,
                                                labels)
    sparse_classifier.train()
    label_sparse = sparse_classifier.apply(feats_test)
    out_sparse = label_sparse.get_labels()

    if label_test_multiclass is not None:
        from shogun.Evaluation import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc_dense = evaluator.evaluate(label_dense, labels_test)
        acc_sparse = evaluator.evaluate(label_sparse, labels_test)
        print('Random Dense Accuracy  = %.4f' % acc_dense)
        print('Random Sparse Accuracy = %.4f' % acc_sparse)

    return out_sparse, out_dense
def classifier_larank_modular(fm_train_real=traindat,
                              fm_test_real=testdat,
                              label_train_multiclass=label_traindat,
                              C=0.9,
                              num_threads=1,
                              num_iter=5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Kernel import GaussianKernel
    from shogun.Classifier import LaRank
    from shogun.Mathematics import Math_init_random
    Math_init_random(17)

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    width = 2.1
    kernel = GaussianKernel(feats_train, feats_train, width)

    epsilon = 1e-5
    labels = MulticlassLabels(label_train_multiclass)

    svm = LaRank(C, kernel, labels)
    #svm.set_tau(1e-3)
    svm.set_batch_mode(False)
    #svm.io.enable_progress()
    svm.set_epsilon(epsilon)
    svm.train()
    out = svm.apply(feats_train).get_labels()
    predictions = svm.apply()
    return predictions, svm, predictions.get_labels()
예제 #11
0
        def RunNBCShogun(q):
            totalTimer = Timer()

            Log.Info("Loading dataset", self.verbose)
            try:
                # Load train and test dataset.
                trainData = np.genfromtxt(self.dataset[0], delimiter=',')
                testData = np.genfromtxt(self.dataset[1], delimiter=',')

                # Labels are the last row of the training set.
                labels = MulticlassLabels(trainData[:,
                                                    (trainData.shape[1] - 1)])

                with totalTimer:
                    # Transform into features.
                    trainFeat = RealFeatures(trainData[:, :-1].T)
                    testFeat = RealFeatures(testData.T)

                    # Create and train the classifier.
                    nbc = GaussianNaiveBayes(trainFeat, labels)
                    nbc.train()
                    # Run Naive Bayes Classifier on the test dataset.
                    nbc.apply(testFeat).get_labels()
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
예제 #12
0
def evaluation_clustering(features=fea, ground_truth=gnd_raw, ncenters=10):
    from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation
    from shogun.Features import MulticlassLabels
    from shogun.Mathematics import Math

    # reproducable results
    Math.init_random(1)

    centroids = run_clustering(features, ncenters)
    gnd_hat = assign_labels(features, centroids, ncenters)
    gnd = MulticlassLabels(ground_truth)

    AccuracyEval = ClusteringAccuracy()
    AccuracyEval.best_map(gnd_hat, gnd)

    accuracy = AccuracyEval.evaluate(gnd_hat, gnd)
    #print(('Clustering accuracy = %.4f' % accuracy))

    MIEval = ClusteringMutualInformation()
    mutual_info = MIEval.evaluate(gnd_hat, gnd)
    #print(('Clustering mutual information = %.4f' % mutual_info))

    # TODO mutual information does not work with serialization
    #return gnd, gnd_hat, accuracy, MIEval, mutual_info
    return gnd, gnd_hat, accuracy
예제 #13
0
        def RunAllKnnShogun(q):
            totalTimer = Timer()

            # Load input dataset.
            # If the dataset contains two files then the second file is the query
            # file.
            try:
                Log.Info("Loading dataset", self.verbose)
                if len(self.dataset) == 2:
                    referenceData = np.genfromtxt(self.dataset[0],
                                                  delimiter=',')
                    queryData = np.genfromtxt(self.dataset[1], delimiter=',')
                    queryFeat = RealFeatures(queryFeat.T)
                else:
                    referenceData = np.genfromtxt(self.dataset, delimiter=',')

                # Labels are the last row of the dataset.
                labels = MulticlassLabels(
                    referenceData[:, (referenceData.shape[1] - 1)])
                referenceData = referenceData[:, :-1]

                with totalTimer:
                    # Get all the parameters.
                    k = re.search("-k (\d+)", options)
                    if not k:
                        Log.Fatal(
                            "Required option: Number of furthest neighbors to find."
                        )
                        q.put(-1)
                        return -1
                    else:
                        k = int(k.group(1))
                        if (k < 1 or k > referenceData.shape[0]):
                            Log.Fatal("Invalid k: " + k.group(1) +
                                      "; must be greater than 0" +
                                      " and less or equal than " +
                                      str(referenceData.shape[0]))
                            q.put(-1)
                            return -1

                    referenceFeat = RealFeatures(referenceData.T)
                    distance = EuclideanDistance(referenceFeat, referenceFeat)

                    # Perform All K-Nearest-Neighbors.
                    model = SKNN(k, distance, labels)
                    model.train()

                    if len(self.dataset) == 2:
                        out = model.apply(queryFeat).get_labels()
                    else:
                        out = model.apply(referenceFeat).get_labels()
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
예제 #14
0
def classifier_multiclasslinearmachine_modular(
        fm_train_real=traindat,
        fm_test_real=testdat,
        label_train_multiclass=label_traindat,
        label_test_multiclass=label_testdat,
        lawidth=2.1,
        C=1,
        epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
    from shogun.Classifier import ECOCStrategy, ECOCDiscriminantEncoder, ECOCHDDecoder

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)

    encoder = ECOCDiscriminantEncoder()
    encoder.set_features(feats_train)
    encoder.set_labels(labels)
    encoder.set_sffs_iterations(50)

    strategy = ECOCStrategy(encoder, ECOCHDDecoder())

    classifier = LinearMulticlassMachine(strategy, feats_train, classifier,
                                         labels)
    classifier.train()
    label_pred = classifier.apply(feats_test)
    out = label_pred.get_labels()

    if label_test_multiclass is not None:
        from shogun.Evaluation import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc = evaluator.evaluate(label_pred, labels_test)
        print('Accuracy = %.4f' % acc)

    return out
def classifier_multiclassmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Kernel import GaussianKernel
	from shogun.Classifier import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	kernel=GaussianKernel(feats_train, feats_train, width)

	labels=MulticlassLabels(label_train_multiclass)

	classifier = LibSVM()
	classifier.set_epsilon(epsilon)
	print labels.get_labels()
	mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(),kernel,classifier,labels)
	mc_classifier.train()

	kernel.init(feats_train, feats_test)
	out = mc_classifier.apply().get_labels()
	return out
예제 #16
0
def assign_labels(data, centroids):
    from shogun.Classifier import KNN
    from numpy import arange

    labels = MulticlassLabels(arange(0., 10.))
    fea = RealFeatures(data)
    fea_centroids = RealFeatures(centroids)
    distance = EuclidianDistance(fea_centroids, fea_centroids)
    knn = KNN(1, distance, labels)
    knn.train()
    return knn.apply(fea)
예제 #17
0
def mkl_multiclass_modular(fm_train_real, fm_test_real, label_train_multiclass,
                           width, C, epsilon, num_threads, mkl_epsilon,
                           mkl_norm):

    from shogun.Features import CombinedFeatures, RealFeatures, MulticlassLabels
    from shogun.Kernel import CombinedKernel, GaussianKernel, LinearKernel, PolyKernel
    from shogun.Classifier import MKLMulticlass

    kernel = CombinedKernel()
    feats_train = CombinedFeatures()
    feats_test = CombinedFeatures()

    subkfeats_train = RealFeatures(fm_train_real)
    subkfeats_test = RealFeatures(fm_test_real)
    subkernel = GaussianKernel(10, width)
    feats_train.append_feature_obj(subkfeats_train)
    feats_test.append_feature_obj(subkfeats_test)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures(fm_train_real)
    subkfeats_test = RealFeatures(fm_test_real)
    subkernel = LinearKernel()
    feats_train.append_feature_obj(subkfeats_train)
    feats_test.append_feature_obj(subkfeats_test)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures(fm_train_real)
    subkfeats_test = RealFeatures(fm_test_real)
    subkernel = PolyKernel(10, 2)
    feats_train.append_feature_obj(subkfeats_train)
    feats_test.append_feature_obj(subkfeats_test)
    kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)

    labels = MulticlassLabels(label_train_multiclass)

    mkl = MKLMulticlass(C, kernel, labels)

    mkl.set_epsilon(epsilon)
    mkl.parallel.set_num_threads(num_threads)
    mkl.set_mkl_epsilon(mkl_epsilon)
    mkl.set_mkl_norm(mkl_norm)

    mkl.train()

    kernel.init(feats_train, feats_test)

    out = mkl.apply().get_labels()
    return out
예제 #18
0
def classifier_gaussiannaivebayes_modular(
        fm_train_real=traindat,
        fm_test_real=testdat,
        label_train_multiclass=label_traindat):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import GaussianNaiveBayes

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    labels = MulticlassLabels(label_train_multiclass)

    gnb = GaussianNaiveBayes(feats_train, labels)
    gnb_train = gnb.train()
    output = gnb.apply(feats_test).get_labels()
    return gnb, gnb_train, output
def evaluation_clustering_simple(n_data=100, sqrt_num_blobs=4, distance=5):
    from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation
    from shogun.Features import MulticlassLabels, GaussianBlobsDataGenerator
    from shogun.Mathematics import Math

    # reproducable results
    Math.init_random(1)

    # produce sone Gaussian blobs to cluster
    ncenters = sqrt_num_blobs**2
    stretch = 1
    angle = 1
    gen = GaussianBlobsDataGenerator(sqrt_num_blobs, distance, stretch, angle)
    features = gen.get_streamed_features(n_data)
    X = features.get_feature_matrix()

    # compute approximate "ground truth" labels via taking the closest blob mean
    coords = array(range(0, sqrt_num_blobs * distance, distance))
    idx_0 = [abs(coords - x).argmin() for x in X[0]]
    idx_1 = [abs(coords - x).argmin() for x in X[1]]
    ground_truth = array(
        [idx_0[i] * sqrt_num_blobs + idx_1[i] for i in range(n_data)],
        dtype="float64")

    #for label in unique(ground_truth):
    #	indices=ground_truth==label
    #	plot(X[0][indices], X[1][indices], 'o')
    #show()

    centroids = run_clustering(features, ncenters)
    gnd_hat = assign_labels(features, centroids, ncenters)
    gnd = MulticlassLabels(ground_truth)

    AccuracyEval = ClusteringAccuracy()
    AccuracyEval.best_map(gnd_hat, gnd)

    accuracy = AccuracyEval.evaluate(gnd_hat, gnd)
    # in this case we know that the clustering has to be very good
    #print(('Clustering accuracy = %.4f' % accuracy))
    assert (accuracy > 0.8)

    MIEval = ClusteringMutualInformation()
    mutual_info = MIEval.evaluate(gnd_hat, gnd)
    #print(('Clustering mutual information = %.4f' % mutual_info))

    # TODO add multiclass labels and MI once the serialization works
    #return gnd, accuracy, mutual_info
    return accuracy
def classifier_multiclassmultipleoutputliblinear_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5):
	from shogun.Features import RealFeatures, MulticlassLabels, MulticlassMultipleOutputLabels
	from shogun.Classifier import MulticlassLibLinear

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	labels=MulticlassLabels(label_train_multiclass)

	classifier = MulticlassLibLinear(C,feats_train,labels)
	classifier.train()

	label_pred = classifier.apply_multiclass_multiple_output(feats_test,2)
	out = label_pred.get_labels()
	#print out
	return out
def classifier_conjugateindex_modular(fm_train_real=traindat,
                                      fm_test_real=testdat,
                                      label_train_multiclass=label_traindat):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import ConjugateIndex

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    ci = ConjugateIndex(feats_train, labels)
    ci.train()

    res = ci.apply(feats_test).get_labels()
    return ci, res
예제 #22
0
def evaluation_clustering(features=fea, ground_truth=gnd_raw, ncenters=10):
    from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation
    from shogun.Features import MulticlassLabels
    centroids = run_clustering(features, ncenters)
    gnd_hat = assign_labels(features, centroids, ncenters)
    gnd = MulticlassLabels(ground_truth)

    AccuracyEval = ClusteringAccuracy()
    AccuracyEval.best_map(gnd_hat, gnd)

    accuracy = AccuracyEval.evaluate(gnd_hat, gnd)
    #print(('Clustering accuracy = %.4f' % accuracy))

    MIEval = ClusteringMutualInformation()
    mutual_info = MIEval.evaluate(gnd_hat, gnd)
    #print(('Clustering mutual information = %.4f' % mutual_info))

    return gnd, accuracy, mutual_info
예제 #23
0
def classifier_qda_modular(fm_train_real=traindat,
                           fm_test_real=testdat,
                           label_train_twoclass=label_traindat,
                           tolerance=1e-4,
                           store_covs=False):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import QDA

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_twoclass)

    qda = QDA(feats_train, labels, tolerance, store_covs)
    qda.train()

    qda.apply(feats_test).get_labels()
    qda.set_features(feats_test)
    return qda, qda.apply().get_labels()
def classifier_knn_modular(fm_train_real=traindat,
                           fm_test_real=testdat,
                           label_train_multiclass=label_traindat,
                           k=3):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import KNN
    from shogun.Distance import EuclidianDistance

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    distance = EuclidianDistance(feats_train, feats_train)

    labels = MulticlassLabels(label_train_multiclass)

    knn = KNN(k, distance, labels)
    knn_train = knn.train()
    output = knn.apply(feats_test).get_labels()
    multiple_k = knn.classify_for_multiple_k()
    return knn, knn_train, output, multiple_k
예제 #25
0
def classifier_multiclassocas_modular(fm_train_real=traindat,
                                      fm_test_real=testdat,
                                      label_train_multiclass=label_traindat,
                                      width=2.1,
                                      C=1,
                                      epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import MulticlassOCAS

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = MulticlassOCAS(C, feats_train, labels)
    classifier.train()

    out = classifier.apply(feats_test).get_labels()
    return out
def classifier_multiclassocas_modular(num_vec=10,
                                      num_class=3,
                                      distance=15,
                                      width=2.1,
                                      C=1,
                                      epsilon=1e-5,
                                      seed=1):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import MulticlassOCAS
    from shogun.Mathematics import Math_init_random

    # reproducible results
    random.seed(seed)
    Math_init_random(seed)

    # generate some training data where each class pair is linearly separable
    label_train = array([mod(x, num_class) for x in range(num_vec)],
                        dtype="float64")
    label_test = array([mod(x, num_class) for x in range(num_vec)],
                       dtype="float64")
    fm_train = array(random.randn(num_class, num_vec))
    fm_test = array(random.randn(num_class, num_vec))
    for i in range(len(label_train)):
        fm_train[label_train[i], i] += distance
        fm_test[label_test[i], i] += distance

    feats_train = RealFeatures(fm_train)
    feats_test = RealFeatures(fm_test)

    labels = MulticlassLabels(label_train)

    classifier = MulticlassOCAS(C, feats_train, labels)
    classifier.train()

    out = classifier.apply(feats_test).get_labels()
    #print label_test
    #print out
    return out, classifier
예제 #27
0
def classifier_larank_modular (num_vec,num_class,distance,C=0.9,num_threads=1,num_iter=5,seed=1):
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Kernel import GaussianKernel
	from shogun.Classifier import LaRank
	from shogun.Mathematics import Math_init_random
	
	# reproducible results
	Math_init_random(seed)
	random.seed(seed)

	# generate some training data where each class pair is linearly separable
	label_train=array([mod(x,num_class) for x in range(num_vec)],dtype="float64")
	label_test=array([mod(x,num_class) for x in range(num_vec)],dtype="float64")
	fm_train=array(random.randn(num_class,num_vec))
	fm_test=array(random.randn(num_class,num_vec))
	for i in range(len(label_train)):
		fm_train[label_train[i],i]+=distance
		fm_test[label_test[i],i]+=distance

	feats_train=RealFeatures(fm_train)
	feats_test=RealFeatures(fm_test)
	
	width=2.1
	kernel=GaussianKernel(feats_train, feats_train, width)

	epsilon=1e-5
	labels=MulticlassLabels(label_train)

	svm=LaRank(C, kernel, labels)
	#svm.set_tau(1e-3)
	svm.set_batch_mode(False)
	#svm.io.enable_progress()
	svm.set_epsilon(epsilon)
	svm.train()
	out=svm.apply(feats_test).get_labels()
	predictions = svm.apply()
	return predictions, svm, predictions.get_labels()
예제 #28
0
def classifier_gmnpsvm_modular(fm_train_real=traindat,
                               fm_test_real=testdat,
                               label_train_multiclass=label_traindat,
                               width=2.1,
                               C=1,
                               epsilon=1e-5):

    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Kernel import GaussianKernel
    from shogun.Classifier import GMNPSVM

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    kernel = GaussianKernel(feats_train, feats_train, width)

    labels = MulticlassLabels(label_train_multiclass)

    svm = GMNPSVM(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.train(feats_train)
    kernel.init(feats_train, feats_test)
    out = svm.apply(feats_test).get_labels()
    return out, kernel
def classifier_multiclasslinearmachine_modular(
        fm_train_real=traindat,
        fm_test_real=testdat,
        label_train_multiclass=label_traindat,
        width=2.1,
        C=1,
        epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)
    mc_classifier = LinearMulticlassMachine(MulticlassOneVsOneStrategy(),
                                            feats_train, classifier, labels)

    mc_classifier.train()
    out = mc_classifier.apply().get_labels()
    return out
예제 #30
0
def serialization_complex_example(num=5, dist=1, dim=10, C=2.0, width=10):
    import os
    from numpy import concatenate, zeros, ones
    from numpy.random import randn, seed
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import GMNPSVM
    from shogun.Kernel import GaussianKernel
    from shogun.IO import SerializableHdf5File,SerializableAsciiFile, \
      SerializableJsonFile,SerializableXmlFile,MSG_DEBUG
    from shogun.Preprocessor import NormOne, LogPlusOne

    seed(17)

    data = concatenate(
        (randn(dim, num), randn(dim, num) + dist, randn(dim, num) + 2 * dist,
         randn(dim, num) + 3 * dist),
        axis=1)
    lab = concatenate((zeros(num), ones(num), 2 * ones(num), 3 * ones(num)))

    feats = RealFeatures(data)
    #feats.io.set_loglevel(MSG_DEBUG)
    kernel = GaussianKernel(feats, feats, width)

    labels = MulticlassLabels(lab)

    svm = GMNPSVM(C, kernel, labels)

    feats.add_preprocessor(NormOne())
    feats.add_preprocessor(LogPlusOne())
    feats.set_preprocessed(1)
    svm.train(feats)

    #svm.print_serializable()

    fstream = SerializableHdf5File("blaah.h5", "w")
    status = svm.save_serializable(fstream)
    check_status(status, 'h5')

    fstream = SerializableAsciiFile("blaah.asc", "w")
    status = svm.save_serializable(fstream)
    check_status(status, 'asc')

    fstream = SerializableJsonFile("blaah.json", "w")
    status = svm.save_serializable(fstream)
    check_status(status, 'json')

    fstream = SerializableXmlFile("blaah.xml", "w")
    status = svm.save_serializable(fstream)
    check_status(status, 'xml')

    fstream = SerializableHdf5File("blaah.h5", "r")
    new_svm = GMNPSVM()
    status = new_svm.load_serializable(fstream)
    check_status(status, 'h5')
    new_svm.train()

    fstream = SerializableAsciiFile("blaah.asc", "r")
    new_svm = GMNPSVM()
    status = new_svm.load_serializable(fstream)
    check_status(status, 'asc')
    new_svm.train()

    fstream = SerializableJsonFile("blaah.json", "r")
    new_svm = GMNPSVM()
    status = new_svm.load_serializable(fstream)
    check_status(status, 'json')
    new_svm.train()

    fstream = SerializableXmlFile("blaah.xml", "r")
    new_svm = GMNPSVM()
    status = new_svm.load_serializable(fstream)
    check_status(status, 'xml')
    new_svm.train()

    os.unlink("blaah.h5")
    os.unlink("blaah.asc")
    os.unlink("blaah.json")
    os.unlink("blaah.xml")
    return svm, new_svm
예제 #31
0
def features_io_modular (fm_train_real, label_train_twoclass):
	import numpy
	from shogun.Features import SparseRealFeatures, RealFeatures, MulticlassLabels
	from shogun.Kernel import GaussianKernel
	from shogun.IO import AsciiFile, BinaryFile, HDF5File

	feats=SparseRealFeatures(fm_train_real)
	feats2=SparseRealFeatures()

	f=BinaryFile("fm_train_sparsereal.bin","w")
	feats.save(f)

	f=AsciiFile("fm_train_sparsereal.ascii","w")
	feats.save(f)

	f=BinaryFile("fm_train_sparsereal.bin")
	feats2.load(f)

	f=AsciiFile("fm_train_sparsereal.ascii")
	feats2.load(f)

	feats=RealFeatures(fm_train_real)
	feats2=RealFeatures()

	f=BinaryFile("fm_train_real.bin","w")
	feats.save(f)

	f=HDF5File("fm_train_real.h5","w", "/data/doubles")
	feats.save(f)

	f=AsciiFile("fm_train_real.ascii","w")
	feats.save(f)

	f=BinaryFile("fm_train_real.bin")
	feats2.load(f)
	#print("diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())))

	f=AsciiFile("fm_train_real.ascii")
	feats2.load(f)
	#print("diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())))

	lab=MulticlassLabels(numpy.array([0.0,1.0,2.0,3.0]))
	lab2=MulticlassLabels()
	f=AsciiFile("label_train_twoclass.ascii","w")
	lab.save(f)

	f=BinaryFile("label_train_twoclass.bin","w")
	lab.save(f)

	f=HDF5File("label_train_real.h5","w", "/data/labels")
	lab.save(f)

	f=AsciiFile("label_train_twoclass.ascii")
	lab2.load(f)

	f=BinaryFile("label_train_twoclass.bin")
	lab2.load(f)

	f=HDF5File("fm_train_real.h5","r", "/data/doubles")
	feats2.load(f)
	#print(feats2.get_feature_matrix())
	f=HDF5File("label_train_real.h5","r", "/data/labels")
	lab2.load(f)
	#print(lab2.get_labels())

	#clean up
	import os
	for f in ['fm_train_sparsereal.bin','fm_train_sparsereal.ascii',
			'fm_train_real.bin','fm_train_real.h5','fm_train_real.ascii',
			'label_train_real.h5', 'label_train_twoclass.ascii','label_train_twoclass.bin']:
		os.unlink(f)
	return feats, feats2, lab, lab2
예제 #32
0
def evaluation_cross_validation_multiclass_storage(
        traindat=traindat, label_traindat=label_traindat):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import CrossValidationPrintOutput
    from shogun.Evaluation import CrossValidationMKLStorage, CrossValidationMulticlassStorage
    from shogun.Evaluation import MulticlassAccuracy, F1Measure
    from shogun.Evaluation import StratifiedCrossValidationSplitting
    from shogun.Features import MulticlassLabels
    from shogun.Features import RealFeatures, CombinedFeatures
    from shogun.Kernel import GaussianKernel, CombinedKernel
    from shogun.Classifier import MKLMulticlass
    from shogun.Mathematics import Statistics, MSG_DEBUG

    # training data, combined features all on same data
    features = RealFeatures(traindat)
    comb_features = CombinedFeatures()
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    labels = MulticlassLabels(label_traindat)

    # kernel, different Gaussians combined
    kernel = CombinedKernel()
    kernel.append_kernel(GaussianKernel(10, 0.1))
    kernel.append_kernel(GaussianKernel(10, 1))
    kernel.append_kernel(GaussianKernel(10, 2))

    # create mkl using libsvm, due to a mem-bug, interleaved is not possible
    svm = MKLMulticlass(1.0, kernel, labels)
    svm.set_kernel(kernel)

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy = StratifiedCrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium = MulticlassAccuracy()

    # cross-validation instance
    cross_validation = CrossValidation(svm, comb_features, labels,
                                       splitting_strategy,
                                       evaluation_criterium)
    cross_validation.set_autolock(False)

    # append cross vlaidation output classes
    #cross_validation.add_cross_validation_output(CrossValidationPrintOutput())
    #mkl_storage=CrossValidationMKLStorage()
    #cross_validation.add_cross_validation_output(mkl_storage)
    multiclass_storage = CrossValidationMulticlassStorage()
    multiclass_storage.append_binary_evaluation(F1Measure())
    cross_validation.add_cross_validation_output(multiclass_storage)
    cross_validation.set_num_runs(3)

    # perform cross-validation
    result = cross_validation.evaluate()

    roc_0_0_0 = multiclass_storage.get_fold_ROC(0, 0, 0)
    #print roc_0_0_0
    auc_0_0_0 = multiclass_storage.get_fold_evaluation_result(0, 0, 0, 0)
    #print auc_0_0_0
    return roc_0_0_0, auc_0_0_0