def classifier_multiclasslogisticregression_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, z=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels try: from modshogun import MulticlassLogisticRegression except ImportError: print("recompile shogun with Eigen3 support") return feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = MulticlassLogisticRegression(z, feats_train, labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def classifier_multiclass_relaxedtree(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import RelaxedTree, MulticlassLibLinear from modshogun import GaussianKernel #print('Working on a problem of %d features and %d samples' % fm_train_real.shape) feats_train = RealFeatures(fm_train_real) labels = MulticlassLabels(label_train_multiclass) machine = RelaxedTree() machine.set_machine_for_confusion_matrix(MulticlassLibLinear()) machine.set_kernel(GaussianKernel()) machine.set_labels(labels) machine.train(feats_train) label_pred = machine.apply_multiclass(RealFeatures(fm_test_real)) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def classifier_multiclass_relaxedtree (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import RelaxedTree, MulticlassLibLinear from modshogun import GaussianKernel #print('Working on a problem of %d features and %d samples' % fm_train_real.shape) feats_train = RealFeatures(fm_train_real) labels = MulticlassLabels(label_train_multiclass) machine = RelaxedTree() machine.set_machine_for_confusion_matrix(MulticlassLibLinear()) machine.set_kernel(GaussianKernel()) machine.set_labels(labels) machine.train(feats_train) label_pred = machine.apply_multiclass(RealFeatures(fm_test_real)) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def classifier_multiclasslogisticregression_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, z=1, epsilon=1e-5, ): from modshogun import RealFeatures, MulticlassLabels feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = MulticlassLogisticRegression(z, feats_train, labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print("Accuracy = %.4f" % acc) return out
def classifier_multiclass_ecoc_discriminant (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from modshogun import ECOCStrategy, ECOCDiscriminantEncoder, ECOCHDDecoder feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) encoder = ECOCDiscriminantEncoder() encoder.set_features(feats_train) encoder.set_labels(labels) encoder.set_sffs_iterations(50) strategy = ECOCStrategy(encoder, ECOCHDDecoder()) classifier = LinearMulticlassMachine(strategy, feats_train, classifier, labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def classifier_multiclass_shareboost (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, RealSubsetFeatures, MulticlassLabels from modshogun import ShareBoost #print('Working on a problem of %d features and %d samples' % fm_train_real.shape) feats_train = RealFeatures(fm_train_real) labels = MulticlassLabels(label_train_multiclass) shareboost = ShareBoost(feats_train, labels, min(fm_train_real.shape[0]-1, 30)) shareboost.train(); #print(shareboost.get_activeset()) feats_test = RealSubsetFeatures(RealFeatures(fm_test_real), shareboost.get_activeset()) label_pred = shareboost.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) #print('Accuracy = %.4f' % acc) return out
def classifier_multiclassliblinear_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, width=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import MulticlassLibLinear feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = MulticlassLibLinear(C, feats_train, labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def classifier_multiclasslinearmachine_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, width=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsOneStrategy(), feats_train, classifier, labels) mc_classifier.train() label_pred = mc_classifier.apply() out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsOneStrategy(), feats_train, classifier, labels) mc_classifier.train() label_pred = mc_classifier.apply() out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def shareboost(train_features, train_labels, test_features, test_labels): from modshogun import ShareBoost, MulticlassAccuracy, RealSubsetFeatures shareboost = ShareBoost(train_features, train_labels, min(train_features.get_num_features()-1, 30)) shareboost.train() feats_test = RealSubsetFeatures(test_features, shareboost.get_activeset()) test_output = shareboost.apply(feats_test) evaluator = MulticlassAccuracy() print 'ShareBoost test error is %.4f' % ((1-evaluator.evaluate(test_output, test_labels))*100)
def qda(train_features, train_labels, test_featues, test_labels): from modshogun import QDA, MulticlassAccuracy qda = QDA(train_features, train_labels) qda.train() train_output = qda.apply() test_output = qda.apply(test_features) evaluator = MulticlassAccuracy() print 'QDA training error is %.4f' % ((1-evaluator.evaluate(train_output, train_labels))*100) print 'QDA test error is %.4f' % ((1-evaluator.evaluate(test_output, test_labels))*100)
def evaluation_multiclassaccuracy_modular (ground_truth, predicted): from modshogun import MulticlassLabels from modshogun import MulticlassAccuracy ground_truth_labels = MulticlassLabels(ground_truth) predicted_labels = MulticlassLabels(predicted) evaluator = MulticlassAccuracy() accuracy = evaluator.evaluate(predicted_labels,ground_truth_labels) return accuracy
def evaluation_multiclassaccuracy_modular(ground_truth, predicted): from modshogun import MulticlassLabels from modshogun import MulticlassAccuracy ground_truth_labels = MulticlassLabels(ground_truth) predicted_labels = MulticlassLabels(predicted) evaluator = MulticlassAccuracy() accuracy = evaluator.evaluate(predicted_labels, ground_truth_labels) return accuracy
def knn(train_features, train_labels, test_features, test_labels, k=1): from modshogun import KNN, MulticlassAccuracy, EuclideanDistance distance = EuclideanDistance(train_features, train_features) knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'KNN training error is %.4f' % ((1-evaluator.evaluate(train_output, train_labels))*100) print 'KNN test error is %.4f' % ((1-evaluator.evaluate(test_output, test_labels))*100)
def evaluate4svm(labels, feats, params={'c': 1, 'kernal': 'gauss'}, Nsplit=2): """ Run Cross-validation to evaluate the SVM. Parameters ---------- labels: 2d array Data set labels. feats: array Data set feats. params: dictionary Search scope parameters. Nsplit: int, default = 2 The n for n-fold cross validation. """ c = params.get('c') if params.get('kernal' == 'gauss'): kernal = GaussianKernel() kernal.set_width(80) elif params.get('kernal' == 'sigmoid'): kernal = SigmoidKernel() else: kernal = LinearKernel() split = CrossValidationSplitting(labels, Nsplit) split.build_subsets() accuracy = np.zeros(Nsplit) time_test = np.zeros(accuracy.shape) for i in range(Nsplit): idx_train = split.generate_subset_inverse(i) idx_test = split.generate_subset_indices(i) feats.add_subset(idx_train) labels.add_subset(idx_train) print c, kernal, labels svm = GMNPSVM(c, kernal, labels) _ = svm.train(feats) out = svm.apply(feats_test) evaluator = MulticlassAccuracy() accuracy[i] = evaluator.evaluate(out, labels_test) feats.remove_subset() labels.remove_subset() feats.add_subset(idx_test) labels.add_subset(idx_test) t_start = time.clock() time_test[i] = (time.clock() - t_start) / labels.get_num_labels() feats.remove_subset() labels.remove_subset() return accuracy
def shareboost(train_features, train_labels, test_features, test_labels): from modshogun import ShareBoost, MulticlassAccuracy, RealSubsetFeatures shareboost = ShareBoost(train_features, train_labels, min(train_features.get_num_features() - 1, 30)) shareboost.train() feats_test = RealSubsetFeatures(test_features, shareboost.get_activeset()) test_output = shareboost.apply(feats_test) evaluator = MulticlassAccuracy() print 'ShareBoost test error is %.4f' % ( (1 - evaluator.evaluate(test_output, test_labels)) * 100)
def knn(train_features, train_labels, test_features, test_labels, k=1): from modshogun import KNN, MulticlassAccuracy, EuclideanDistance distance = EuclideanDistance(train_features, train_features) knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'KNN training error is %.4f' % ( (1 - evaluator.evaluate(train_output, train_labels)) * 100) print 'KNN test error is %.4f' % ( (1 - evaluator.evaluate(test_output, test_labels)) * 100)
def qda(train_features, train_labels, test_featues, test_labels): from modshogun import QDA, MulticlassAccuracy qda = QDA(train_features, train_labels) qda.train() train_output = qda.apply() test_output = qda.apply(test_features) evaluator = MulticlassAccuracy() print 'QDA training error is %.4f' % ( (1 - evaluator.evaluate(train_output, train_labels)) * 100) print 'QDA test error is %.4f' % ( (1 - evaluator.evaluate(test_output, test_labels)) * 100)
def mkl(train_features, train_labels, test_features, test_labels, width=5, C=1.2, epsilon=1e-2, mkl_epsilon=0.001, mkl_norm=2): from modshogun import CombinedKernel, CombinedFeatures from modshogun import GaussianKernel, LinearKernel, PolyKernel from modshogun import MKLMulticlass, MulticlassAccuracy kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() feats_train.append_feature_obj(train_features) feats_test.append_feature_obj(test_features) subkernel = GaussianKernel(10, width) kernel.append_kernel(subkernel) feats_train.append_feature_obj(train_features) feats_test.append_feature_obj(test_features) subkernel = LinearKernel() kernel.append_kernel(subkernel) feats_train.append_feature_obj(train_features) feats_test.append_feature_obj(test_features) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) mkl = MKLMulticlass(C, kernel, train_labels) mkl.set_epsilon(epsilon) mkl.set_mkl_epsilon(mkl_epsilon) mkl.set_mkl_norm(mkl_norm) mkl.train() train_output = mkl.apply() kernel.init(feats_train, feats_test) test_output = mkl.apply() evaluator = MulticlassAccuracy() print 'MKL training error is %.4f' % ( (1 - evaluator.evaluate(train_output, train_labels)) * 100) print 'MKL test error is %.4f' % ( (1 - evaluator.evaluate(test_output, test_labels)) * 100)
def classifier_multiclass_ecoc_random(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from modshogun import ECOCStrategy, ECOCRandomSparseEncoder, ECOCRandomDenseEncoder, ECOCHDDecoder from modshogun import Math_init_random Math_init_random(12345) feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) rnd_dense_strategy = ECOCStrategy(ECOCRandomDenseEncoder(), ECOCHDDecoder()) rnd_sparse_strategy = ECOCStrategy(ECOCRandomSparseEncoder(), ECOCHDDecoder()) dense_classifier = LinearMulticlassMachine(rnd_dense_strategy, feats_train, classifier, labels) dense_classifier.train() label_dense = dense_classifier.apply(feats_test) out_dense = label_dense.get_labels() sparse_classifier = LinearMulticlassMachine(rnd_sparse_strategy, feats_train, classifier, labels) sparse_classifier.train() label_sparse = sparse_classifier.apply(feats_test) out_sparse = label_sparse.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc_dense = evaluator.evaluate(label_dense, labels_test) acc_sparse = evaluator.evaluate(label_sparse, labels_test) print('Random Dense Accuracy = %.4f' % acc_dense) print('Random Sparse Accuracy = %.4f' % acc_sparse) return out_sparse, out_dense
def classifier_multiclass_ecoc_ovr(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from modshogun import ECOCStrategy, ECOCOVREncoder, ECOCLLBDecoder, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsRestStrategy(), feats_train, classifier, labels) mc_classifier.train() label_mc = mc_classifier.apply(feats_test) out_mc = label_mc.get_labels() ecoc_strategy = ECOCStrategy(ECOCOVREncoder(), ECOCLLBDecoder()) ecoc_classifier = LinearMulticlassMachine(ecoc_strategy, feats_train, classifier, labels) ecoc_classifier.train() label_ecoc = ecoc_classifier.apply(feats_test) out_ecoc = label_ecoc.get_labels() n_diff = (out_mc != out_ecoc).sum() #if n_diff == 0: # print("Same results for OvR and ECOCOvR") #else: # print("Different results for OvR and ECOCOvR (%d out of %d are different)" % (n_diff, len(out_mc))) if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc_mc = evaluator.evaluate(label_mc, labels_test) acc_ecoc = evaluator.evaluate(label_ecoc, labels_test) #print('Normal OVR Accuracy = %.4f' % acc_mc) #print('ECOC OVR Accuracy = %.4f' % acc_ecoc) return out_ecoc, out_mc
def knn_classify(traindat, testdat, k=3): from modshogun import KNN, MulticlassAccuracy, EuclideanDistance train_features, train_labels = traindat.features, traindat.labels distance = EuclideanDistance(train_features, train_features) knn = KNN(k, distance, train_labels) knn.train() test_features, test_labels = testdat.features, testdat.labels predicted_labels = knn.apply(test_features) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(predicted_labels, test_labels) err = 1-acc return err
def knn_classify(traindat, testdat, k=3): from modshogun import KNN, MulticlassAccuracy, EuclideanDistance train_features, train_labels = traindat.features, traindat.labels distance = EuclideanDistance(train_features, train_features) knn = KNN(k, distance, train_labels) knn.train() test_features, test_labels = testdat.features, testdat.labels predicted_labels = knn.apply(test_features) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(predicted_labels, test_labels) err = 1 - acc return err
def lmnn_diagonal(train_features, train_labels, test_features, test_labels, k=1): from modshogun import LMNN, KNN, MSG_DEBUG, MulticlassAccuracy import numpy lmnn = LMNN(train_features, train_labels, k) lmnn.set_diagonal(True) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'LMNN-diagonal training error is %.4f' % ((1-evaluator.evaluate(train_output, train_labels))*100) print 'LMNN-diagonal test error is %.4f' % ((1-evaluator.evaluate(test_output, test_labels))*100)
def main(actual, predicted): LOGGER.info("SVM Multiclass evaluator") # Load SVMLight dataset feats, labels = get_features_and_labels(LibSVMFile(actual)) # Load predicted labels with open(predicted, 'r') as f: predicted_labels_arr = np.array([float(l) for l in f]) predicted_labels = MulticlassLabels(predicted_labels_arr) # Evaluate accuracy multiclass_measures = MulticlassAccuracy() LOGGER.info("Accuracy = %s" % multiclass_measures.evaluate( labels, predicted_labels)) LOGGER.info("Confusion matrix:") res = multiclass_measures.get_confusion_matrix(labels, predicted_labels) print res
def lmnn(train_features, train_labels, test_features, test_labels, k=1): from modshogun import LMNN, KNN, MSG_DEBUG, MulticlassAccuracy import numpy # dummy = LMNN() # dummy.io.set_loglevel(MSG_DEBUG) lmnn = LMNN(train_features, train_labels, k) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'LMNN training error is %.4f' % ((1-evaluator.evaluate(train_output, train_labels))*100) print 'LMNN test error is %.4f' % ((1-evaluator.evaluate(test_output, test_labels))*100)
def classifier_multiclass_ecoc_discriminant( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from modshogun import ECOCStrategy, ECOCDiscriminantEncoder, ECOCHDDecoder feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) encoder = ECOCDiscriminantEncoder() encoder.set_features(feats_train) encoder.set_labels(labels) encoder.set_sffs_iterations(50) strategy = ECOCStrategy(encoder, ECOCHDDecoder()) classifier = LinearMulticlassMachine(strategy, feats_train, classifier, labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def run_ecoc(ier, idr): encoder = getattr(modshogun, encoders[ier])() decoder = getattr(modshogun, decoders[idr])() # whether encoder is data dependent if hasattr(encoder, 'set_labels'): encoder.set_labels(gnd_train) encoder.set_features(fea_train) strategy = ECOCStrategy(encoder, decoder) classifier = LinearMulticlassMachine(strategy, fea_train, base_classifier, gnd_train) classifier.train() label_pred = classifier.apply(fea_test) if gnd_test is not None: evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, gnd_test) else: acc = None return (classifier.get_num_machines(), acc)
def classifier_multiclass_ecoc_ovr (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from modshogun import ECOCStrategy, ECOCOVREncoder, ECOCLLBDecoder, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsRestStrategy(), feats_train, classifier, labels) mc_classifier.train() label_mc = mc_classifier.apply(feats_test) out_mc = label_mc.get_labels() ecoc_strategy = ECOCStrategy(ECOCOVREncoder(), ECOCLLBDecoder()) ecoc_classifier = LinearMulticlassMachine(ecoc_strategy, feats_train, classifier, labels) ecoc_classifier.train() label_ecoc = ecoc_classifier.apply(feats_test) out_ecoc = label_ecoc.get_labels() n_diff = (out_mc != out_ecoc).sum() #if n_diff == 0: # print("Same results for OvR and ECOCOvR") #else: # print("Different results for OvR and ECOCOvR (%d out of %d are different)" % (n_diff, len(out_mc))) if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc_mc = evaluator.evaluate(label_mc, labels_test) acc_ecoc = evaluator.evaluate(label_ecoc, labels_test) #print('Normal OVR Accuracy = %.4f' % acc_mc) #print('ECOC OVR Accuracy = %.4f' % acc_ecoc) return out_ecoc, out_mc
def run_ecoc(ier, idr): encoder = getattr(Classifier, encoders[ier])() decoder = getattr(Classifier, decoders[idr])() # whether encoder is data dependent if hasattr(encoder, 'set_labels'): encoder.set_labels(gnd_train) encoder.set_features(fea_train) strategy = ECOCStrategy(encoder, decoder) classifier = LinearMulticlassMachine(strategy, fea_train, base_classifier, gnd_train) classifier.train() label_pred = classifier.apply(fea_test) if gnd_test is not None: evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, gnd_test) else: acc = None return (classifier.get_num_machines(), acc)
def lmnn(train_features, train_labels, test_features, test_labels, k=1): from modshogun import LMNN, KNN, MSG_DEBUG, MulticlassAccuracy import numpy # dummy = LMNN() # dummy.io.set_loglevel(MSG_DEBUG) lmnn = LMNN(train_features, train_labels, k) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'LMNN training error is %.4f' % ( (1 - evaluator.evaluate(train_output, train_labels)) * 100) print 'LMNN test error is %.4f' % ( (1 - evaluator.evaluate(test_output, test_labels)) * 100)
def mkl(train_features, train_labels, test_features, test_labels, width=5, C=1.2, epsilon=1e-2, mkl_epsilon=0.001, mkl_norm=2): from modshogun import CombinedKernel, CombinedFeatures from modshogun import GaussianKernel, LinearKernel, PolyKernel from modshogun import MKLMulticlass, MulticlassAccuracy kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() feats_train.append_feature_obj(train_features) feats_test.append_feature_obj(test_features) subkernel = GaussianKernel(10,width) kernel.append_kernel(subkernel) feats_train.append_feature_obj(train_features) feats_test.append_feature_obj(test_features) subkernel = LinearKernel() kernel.append_kernel(subkernel) feats_train.append_feature_obj(train_features) feats_test.append_feature_obj(test_features) subkernel = PolyKernel(10,2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) mkl = MKLMulticlass(C, kernel, train_labels) mkl.set_epsilon(epsilon); mkl.set_mkl_epsilon(mkl_epsilon) mkl.set_mkl_norm(mkl_norm) mkl.train() train_output = mkl.apply() kernel.init(feats_train, feats_test) test_output = mkl.apply() evaluator = MulticlassAccuracy() print 'MKL training error is %.4f' % ((1-evaluator.evaluate(train_output, train_labels))*100) print 'MKL test error is %.4f' % ((1-evaluator.evaluate(test_output, test_labels))*100)
def lmnn_classify(traindat, testdat, k=3): from modshogun import LMNN, KNN, MulticlassAccuracy, MSG_DEBUG train_features, train_labels = traindat.features, traindat.labels lmnn = LMNN(train_features, train_labels, k) lmnn.set_maxiter(1200) lmnn.io.set_loglevel(MSG_DEBUG) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() test_features, test_labels = testdat.features, testdat.labels predicted_labels = knn.apply(test_features) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(predicted_labels, test_labels) err = 1-acc return err
def lmnn_classify(traindat, testdat, k=3): from modshogun import LMNN, KNN, MulticlassAccuracy, MSG_DEBUG train_features, train_labels = traindat.features, traindat.labels lmnn = LMNN(train_features, train_labels, k) lmnn.set_maxiter(1200) lmnn.io.set_loglevel(MSG_DEBUG) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() test_features, test_labels = testdat.features, testdat.labels predicted_labels = knn.apply(test_features) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(predicted_labels, test_labels) err = 1 - acc return err
def classifier_multiclass_ecoc_random (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from modshogun import ECOCStrategy, ECOCRandomSparseEncoder, ECOCRandomDenseEncoder, ECOCHDDecoder from modshogun import Math_init_random; Math_init_random(12345); feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) rnd_dense_strategy = ECOCStrategy(ECOCRandomDenseEncoder(), ECOCHDDecoder()) rnd_sparse_strategy = ECOCStrategy(ECOCRandomSparseEncoder(), ECOCHDDecoder()) dense_classifier = LinearMulticlassMachine(rnd_dense_strategy, feats_train, classifier, labels) dense_classifier.train() label_dense = dense_classifier.apply(feats_test) out_dense = label_dense.get_labels() sparse_classifier = LinearMulticlassMachine(rnd_sparse_strategy, feats_train, classifier, labels) sparse_classifier.train() label_sparse = sparse_classifier.apply(feats_test) out_sparse = label_sparse.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc_dense = evaluator.evaluate(label_dense, labels_test) acc_sparse = evaluator.evaluate(label_sparse, labels_test) print('Random Dense Accuracy = %.4f' % acc_dense) print('Random Sparse Accuracy = %.4f' % acc_sparse) return out_sparse, out_dense
def classifier_multiclassliblinear_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import MulticlassLibLinear feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) labels=MulticlassLabels(label_train_multiclass) classifier = MulticlassLibLinear(C,feats_train,labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def lmnn_diagonal(train_features, train_labels, test_features, test_labels, k=1): from modshogun import LMNN, KNN, MSG_DEBUG, MulticlassAccuracy import numpy lmnn = LMNN(train_features, train_labels, k) lmnn.set_diagonal(True) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'LMNN-diagonal training error is %.4f' % ( (1 - evaluator.evaluate(train_output, train_labels)) * 100) print 'LMNN-diagonal test error is %.4f' % ( (1 - evaluator.evaluate(test_output, test_labels)) * 100)
def evaluate(labels, feats, params={ 'n_neighbors': 2, 'use_cover_tree': 'True', 'dist': 'Manhattan' }, Nsplit=2): """ Run Cross-validation to evaluate the KNN. Parameters ---------- labels: 2d array Data set labels. feats: array Data set feats. params: dictionary Search scope parameters. Nsplit: int, default = 2 The n for n-fold cross validation. all_ks: range of int, default = range(1, 21) Numbers of neighbors. """ k = params.get('n_neighbors') use_cover_tree = params.get('use_cover_tree') == 'True' if params.get('dist' == 'Euclidean'): func_dist = EuclideanDistance else: func_dist = ManhattanMetric split = CrossValidationSplitting(labels, Nsplit) split.build_subsets() accuracy = np.zeros(Nsplit) acc_train = np.zeros(accuracy.shape) time_test = np.zeros(accuracy.shape) for i in range(Nsplit): idx_train = split.generate_subset_inverse(i) idx_test = split.generate_subset_indices(i) feats.add_subset(idx_train) labels.add_subset(idx_train) dist = func_dist(feats, feats) knn = KNN(k, dist, labels) knn.set_store_model_features(True) if use_cover_tree: knn.set_knn_solver_type(KNN_COVER_TREE) else: knn.set_knn_solver_type(KNN_BRUTE) knn.train() evaluator = MulticlassAccuracy() pred = knn.apply_multiclass() acc_train[i] = evaluator.evaluate(pred, labels) feats.remove_subset() labels.remove_subset() feats.add_subset(idx_test) labels.add_subset(idx_test) t_start = time.clock() pred = knn.apply_multiclass(feats) time_test[i] = (time.clock() - t_start) / labels.get_num_labels() accuracy[i] = evaluator.evaluate(pred, labels) feats.remove_subset() labels.remove_subset() print accuracy.mean() return accuracy
def evaluation_cross_validation_multiclass_storage (traindat=traindat, label_traindat=label_traindat): from modshogun import CrossValidation, CrossValidationResult from modshogun import CrossValidationPrintOutput from modshogun import CrossValidationMKLStorage, CrossValidationMulticlassStorage from modshogun import MulticlassAccuracy, F1Measure from modshogun import StratifiedCrossValidationSplitting from modshogun import MulticlassLabels from modshogun import RealFeatures, CombinedFeatures from modshogun import GaussianKernel, CombinedKernel from modshogun import MKLMulticlass from modshogun import Statistics, MSG_DEBUG, Math Math.init_random(1) # training data, combined features all on same data features=RealFeatures(traindat) comb_features=CombinedFeatures() comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) labels=MulticlassLabels(label_traindat) # kernel, different Gaussians combined kernel=CombinedKernel() kernel.append_kernel(GaussianKernel(10, 0.1)) kernel.append_kernel(GaussianKernel(10, 1)) kernel.append_kernel(GaussianKernel(10, 2)) # create mkl using libsvm, due to a mem-bug, interleaved is not possible svm=MKLMulticlass(1.0,kernel,labels); svm.set_kernel(kernel); # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "StratifiedCrossValidationSplitting" is also available splitting_strategy=StratifiedCrossValidationSplitting(labels, 3) # evaluation method evaluation_criterium=MulticlassAccuracy() # cross-validation instance cross_validation=CrossValidation(svm, comb_features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # append cross vlaidation output classes #cross_validation.add_cross_validation_output(CrossValidationPrintOutput()) #mkl_storage=CrossValidationMKLStorage() #cross_validation.add_cross_validation_output(mkl_storage) multiclass_storage=CrossValidationMulticlassStorage() multiclass_storage.append_binary_evaluation(F1Measure()) cross_validation.add_cross_validation_output(multiclass_storage) cross_validation.set_num_runs(3) # perform cross-validation result=cross_validation.evaluate() roc_0_0_0 = multiclass_storage.get_fold_ROC(0,0,0) #print roc_0_0_0 auc_0_0_0 = multiclass_storage.get_fold_evaluation_result(0,0,0,0) #print auc_0_0_0 return roc_0_0_0, auc_0_0_0