def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsOneStrategy(), feats_train, classifier, labels) mc_classifier.train() label_pred = mc_classifier.apply() out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def run_classification(train, test, labels): lin = LibLinear(L2R_L2LOSS_SVC) lin.set_bias_enabled(True) lin.set_C(5., 5.) machine = LinearMulticlassMachine(MulticlassOneVsRestStrategy(), train, lin, labels) machine.train() pred = machine.apply_multiclass(test)
def classifier_multiclass_ecoc_discriminant (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from modshogun import ECOCStrategy, ECOCDiscriminantEncoder, ECOCHDDecoder feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) encoder = ECOCDiscriminantEncoder() encoder.set_features(feats_train) encoder.set_labels(labels) encoder.set_sffs_iterations(50) strategy = ECOCStrategy(encoder, ECOCHDDecoder()) classifier = LinearMulticlassMachine(strategy, feats_train, classifier, labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def evaluation_cross_validation_classification(traindat=traindat, label_traindat=label_traindat): from modshogun import CrossValidation, CrossValidationResult from modshogun import ContingencyTableEvaluation, ACCURACY from modshogun import StratifiedCrossValidationSplitting from modshogun import BinaryLabels from modshogun import RealFeatures from modshogun import LibLinear, L2R_L2LOSS_SVC # training data features = RealFeatures(traindat) labels = BinaryLabels(label_traindat) # classifier classifier = LibLinear(L2R_L2LOSS_SVC) # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "CrossValidationSplitting" is also available splitting_strategy = StratifiedCrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium = ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation = CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # (optional) repeat x-val 10 times cross_validation.set_num_runs(10) # perform cross-validation and print(results) result = cross_validation.evaluate()
def modelselection_random_search_liblinear_modular (traindat=traindat, label_traindat=label_traindat): from modshogun import CrossValidation, CrossValidationResult from modshogun import ContingencyTableEvaluation, ACCURACY from modshogun import StratifiedCrossValidationSplitting from modshogun import RandomSearchModelSelection from modshogun import ModelSelectionParameters, R_EXP from modshogun import ParameterCombination from modshogun import BinaryLabels from modshogun import RealFeatures from modshogun import LibLinear, L2R_L2LOSS_SVC # build parameter tree to select C1 and C2 param_tree_root=ModelSelectionParameters() c1=ModelSelectionParameters("C1"); param_tree_root.append_child(c1) c1.build_values(-2.0, 2.0, R_EXP); c2=ModelSelectionParameters("C2"); param_tree_root.append_child(c2); c2.build_values(-2.0, 2.0, R_EXP); # training data features=RealFeatures(traindat) labels=BinaryLabels(label_traindat) # classifier classifier=LibLinear(L2R_L2LOSS_SVC) # print all parameter available for modelselection # Dont worry if yours is not included but, write to the mailing list #classifier.print_modsel_params() # splitting strategy for cross-validation splitting_strategy=StratifiedCrossValidationSplitting(labels, 10) # evaluation method evaluation_criterium=ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation=CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # model selection instance model_selection=RandomSearchModelSelection(cross_validation, param_tree_root, 0.5) # perform model selection with selected methods #print "performing model selection of" #param_tree_root.print_tree() best_parameters=model_selection.select_model() # print best parameters #print "best parameters:" #best_parameters.print_tree() # apply them and print result best_parameters.apply_to_machine(classifier) result=cross_validation.evaluate()
def classifier_multiclass_ecoc_discriminant( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from modshogun import ECOCStrategy, ECOCDiscriminantEncoder, ECOCHDDecoder feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) encoder = ECOCDiscriminantEncoder() encoder.set_features(feats_train) encoder.set_labels(labels) encoder.set_sffs_iterations(50) strategy = ECOCStrategy(encoder, ECOCHDDecoder()) classifier = LinearMulticlassMachine(strategy, feats_train, classifier, labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def classifier_multiclass_ecoc_ovr (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from modshogun import ECOCStrategy, ECOCOVREncoder, ECOCLLBDecoder, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsRestStrategy(), feats_train, classifier, labels) mc_classifier.train() label_mc = mc_classifier.apply(feats_test) out_mc = label_mc.get_labels() ecoc_strategy = ECOCStrategy(ECOCOVREncoder(), ECOCLLBDecoder()) ecoc_classifier = LinearMulticlassMachine(ecoc_strategy, feats_train, classifier, labels) ecoc_classifier.train() label_ecoc = ecoc_classifier.apply(feats_test) out_ecoc = label_ecoc.get_labels() n_diff = (out_mc != out_ecoc).sum() #if n_diff == 0: # print("Same results for OvR and ECOCOvR") #else: # print("Different results for OvR and ECOCOvR (%d out of %d are different)" % (n_diff, len(out_mc))) if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc_mc = evaluator.evaluate(label_mc, labels_test) acc_ecoc = evaluator.evaluate(label_ecoc, labels_test) #print('Normal OVR Accuracy = %.4f' % acc_mc) #print('ECOC OVR Accuracy = %.4f' % acc_ecoc) return out_ecoc, out_mc
def classifier_multiclasslinearmachine_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, width=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsOneStrategy(), feats_train, classifier, labels) mc_classifier.train() label_pred = mc_classifier.apply() out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def classifier_multiclass_ecoc_random (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from modshogun import ECOCStrategy, ECOCRandomSparseEncoder, ECOCRandomDenseEncoder, ECOCHDDecoder from modshogun import Math_init_random; Math_init_random(12345); feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) rnd_dense_strategy = ECOCStrategy(ECOCRandomDenseEncoder(), ECOCHDDecoder()) rnd_sparse_strategy = ECOCStrategy(ECOCRandomSparseEncoder(), ECOCHDDecoder()) dense_classifier = LinearMulticlassMachine(rnd_dense_strategy, feats_train, classifier, labels) dense_classifier.train() label_dense = dense_classifier.apply(feats_test) out_dense = label_dense.get_labels() sparse_classifier = LinearMulticlassMachine(rnd_sparse_strategy, feats_train, classifier, labels) sparse_classifier.train() label_sparse = sparse_classifier.apply(feats_test) out_sparse = label_sparse.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc_dense = evaluator.evaluate(label_dense, labels_test) acc_sparse = evaluator.evaluate(label_sparse, labels_test) print('Random Dense Accuracy = %.4f' % acc_dense) print('Random Sparse Accuracy = %.4f' % acc_sparse) return out_sparse, out_dense
def classifier_multiclass_ecoc_random(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from modshogun import ECOCStrategy, ECOCRandomSparseEncoder, ECOCRandomDenseEncoder, ECOCHDDecoder from modshogun import Math_init_random Math_init_random(12345) feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) rnd_dense_strategy = ECOCStrategy(ECOCRandomDenseEncoder(), ECOCHDDecoder()) rnd_sparse_strategy = ECOCStrategy(ECOCRandomSparseEncoder(), ECOCHDDecoder()) dense_classifier = LinearMulticlassMachine(rnd_dense_strategy, feats_train, classifier, labels) dense_classifier.train() label_dense = dense_classifier.apply(feats_test) out_dense = label_dense.get_labels() sparse_classifier = LinearMulticlassMachine(rnd_sparse_strategy, feats_train, classifier, labels) sparse_classifier.train() label_sparse = sparse_classifier.apply(feats_test) out_sparse = label_sparse.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc_dense = evaluator.evaluate(label_dense, labels_test) acc_sparse = evaluator.evaluate(label_sparse, labels_test) print('Random Dense Accuracy = %.4f' % acc_dense) print('Random Sparse Accuracy = %.4f' % acc_sparse) return out_sparse, out_dense
def classifier_multiclass_ecoc_ovr(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from modshogun import ECOCStrategy, ECOCOVREncoder, ECOCLLBDecoder, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsRestStrategy(), feats_train, classifier, labels) mc_classifier.train() label_mc = mc_classifier.apply(feats_test) out_mc = label_mc.get_labels() ecoc_strategy = ECOCStrategy(ECOCOVREncoder(), ECOCLLBDecoder()) ecoc_classifier = LinearMulticlassMachine(ecoc_strategy, feats_train, classifier, labels) ecoc_classifier.train() label_ecoc = ecoc_classifier.apply(feats_test) out_ecoc = label_ecoc.get_labels() n_diff = (out_mc != out_ecoc).sum() #if n_diff == 0: # print("Same results for OvR and ECOCOvR") #else: # print("Different results for OvR and ECOCOvR (%d out of %d are different)" % (n_diff, len(out_mc))) if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc_mc = evaluator.evaluate(label_mc, labels_test) acc_ecoc = evaluator.evaluate(label_ecoc, labels_test) #print('Normal OVR Accuracy = %.4f' % acc_mc) #print('ECOC OVR Accuracy = %.4f' % acc_ecoc) return out_ecoc, out_mc
def classifier_liblinear_modular (train_fname, test_fname, label_fname, C, epsilon): from modshogun import RealFeatures, SparseRealFeatures, BinaryLabels from modshogun import LibLinear, L2R_L2LOSS_SVC_DUAL from modshogun import Math_init_random, CSVFile Math_init_random(17) feats_train=RealFeatures(CSVFile(train_fname)) feats_test=RealFeatures(CSVFile(test_fname)) labels=BinaryLabels(CSVFile(label_fname)) svm=LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() predictions = svm.apply(feats_test) return predictions, svm, predictions.get_labels()
def main(): fright=open(r'circle counterclockwise/train_cc_10_dim.txt','r') fleft=open(r'letters/Z/train_Z_10_dim.txt','r') data_right=np.load(fright) data_left=np.load(fleft) lenr=len(data_right[0]) lenl=len(data_left[0]) dim=10 endr=int(0.9*lenr) endl=int(0.9*lenl) data_right_train=data_right[:,0:endr] data_left_train=data_left[:,0:endl] data_right_test=data_right[:,endr:] data_left_test=data_left[:,endl:] len_right_train=len(data_right_train[0]) len_left_train=len(data_left_train[0]) len_right_test=len(data_right_test[0]) len_left_test=len(data_left_test[0]) label_right_train=np.ones(len_right_train) label_right_test=np.ones(len_right_test) label_left_train=-1*np.ones(len_left_train) label_left_test=-1*np.ones(len_left_test) C=10 train_dataset=np.hstack((data_right_train,data_left_train)) normalization_mean=[] normalization_std=[] for i in range(0,dim): mean1=np.mean(train_dataset[i]) std1=np.std(train_dataset[i]) train_dataset[i]=(train_dataset[i]-mean1)/std1 normalization_mean.append(mean1) normalization_std.append(std1) feats_train=RealFeatures(train_dataset) f,axes=subplots(2,2) fig = figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(data_right_train[0],data_right_train[1],data_right_train[2],c='r') ax.scatter(data_left_train[0],data_left_train[1],data_left_train[2],c='b') ax.set_xlabel('STD X') ax.set_ylabel('STD Y') ax.set_zlabel('STD Z') ax.set_title('3D PLOT OF STD') axes[0,0].plot(data_right_train[0],data_right_train[1],'*') axes[0,0].plot(data_left_train[0],data_left_train[1],'x') axes[0,0].set_xlabel('STDX') axes[0,0].set_ylabel('std y') axes[0,0].set_title('STDX VS stdy') axes[0,1].plot(data_right_train[1],data_right_train[2],'*') axes[0,1].plot(data_left_train[1],data_left_train[2],'x') axes[0,1].set_xlabel('std Y') axes[0,1].set_ylabel('std Z') axes[0,1].set_title('stdY VS stdZ') axes[1,0].plot(data_right_train[0],data_right_train[2],'*') axes[1,0].plot(data_left_train[0],data_left_train[2],'x') axes[1,0].set_xlabel('std X') axes[1,0].set_ylabel('std z') axes[1,0].set_title('std X VS stdz') show() train_labels=np.hstack((label_right_train,label_left_train)) test_dataset=np.hstack((data_right_test,data_left_test)) normalization_file=open('normalization_parameters_cc_vs_letters.txt','w') norm_params=[normalization_mean,normalization_std] norm_params=np.asarray(norm_params) np.save(normalization_file,norm_params) for i in range(0,dim): test_dataset[i]=(test_dataset[i]-normalization_mean[i])/normalization_std[i] labels_train=BinaryLabels(train_labels) print 'the length of test_dataset is ',len(test_dataset[0]) feats_test=RealFeatures(test_dataset) test_labels=np.hstack((label_right_test,label_left_test)) print 'the length is test_labels is ',len(test_labels) labels_test=BinaryLabels(test_labels) svm=LibLinear(C,feats_train,labels_train) epsilon=1e-3 svm.set_epsilon(epsilon) svm.train() predictions=svm.apply(feats_test) predictions_on_train=svm.apply(feats_train) evaluator1=ROCEvaluation() evaluator2=AccuracyMeasure() evaluator1.evaluate(predictions_on_train,labels_train) evaluator2.evaluate(predictions,labels_test) file_name="z_vs_cc/accuracy="+ str(evaluator2.get_accuracy()) + " liblinear_cc_vs_Z_svm_classifier_with_C_10_and_normalized.h5" f3=SerializableHdf5File(file_name, "w") svm.save_serializable(f3) p_test=predictions.get_labels() for i in range(0,len(test_labels)): print 'predicted : ',p_test[i] ,' and actual ',test_labels[i] print 'the Area under the curve is ',evaluator1.get_auROC() print 'the accuracy is ',evaluator2.get_accuracy()*100 evaluator1.evaluate(predictions,labels_test) print 'the auc for test set is ',evaluator1.get_auROC()
test_set[4].append(np.std(datax)) #std of each axis test_set[5].append(np.mean(datay)) test_set[6].append(np.mean(dataz)) array_xy=np.asarray([datax,datay]) array_yz=np.asarray([datay,dataz]) array_zx=np.asarray([dataz,datax]) test_set[7].append(np.cov(array_xy)[0][1]) test_set[8].append(np.cov(array_xy)[0][1]) test_set[9].append(np.cov(array_xy)[0][1])""" test_array = np.asarray(test_set) print 'the test array is ', test_array f3 = open('test_for_cc.txt', 'w') np.save(f3, test_array) test_feature = RealFeatures(test_array) svm = LibLinear() file_classifier = SerializableHdf5File( r'letters_vs_cc/ZLabel1accuracy=0.964285714286 liblinear_cc_vs_Z_svm_classifier_with_C_10_and_normalized.h5', 'r') status = svm.load_serializable(file_classifier) output = svm.apply(test_feature).get_labels() print 'output is ', output if int(output[0]) == 1: print 'You just made the mirrored letter Z ' elif int(output[0]) == -1: print 'You just made a counterclockwise circle '
def classifier_multiclass_ecoc(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): import shogun.Classifier as Classifier from modshogun import ECOCStrategy, LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from modshogun import MulticlassAccuracy from modshogun import RealFeatures, MulticlassLabels def nonabstract_class(name): try: getattr(Classifier, name)() except TypeError: return False return True encoders = [ x for x in dir(Classifier) if re.match(r'ECOC.+Encoder', x) and nonabstract_class(x) ] decoders = [ x for x in dir(Classifier) if re.match(r'ECOC.+Decoder', x) and nonabstract_class(x) ] fea_train = RealFeatures(fm_train_real) fea_test = RealFeatures(fm_test_real) gnd_train = MulticlassLabels(label_train_multiclass) if label_test_multiclass is None: gnd_test = None else: gnd_test = MulticlassLabels(label_test_multiclass) base_classifier = LibLinear(L2R_L2LOSS_SVC) base_classifier.set_bias_enabled(True) #print('Testing with %d encoders and %d decoders' % (len(encoders), len(decoders))) #print('-' * 70) #format_str = '%%15s + %%-10s %%-10%s %%-10%s %%-10%s' #print((format_str % ('s', 's', 's')) % ('encoder', 'decoder', 'codelen', 'time', 'accuracy')) def run_ecoc(ier, idr): encoder = getattr(Classifier, encoders[ier])() decoder = getattr(Classifier, decoders[idr])() # whether encoder is data dependent if hasattr(encoder, 'set_labels'): encoder.set_labels(gnd_train) encoder.set_features(fea_train) strategy = ECOCStrategy(encoder, decoder) classifier = LinearMulticlassMachine(strategy, fea_train, base_classifier, gnd_train) classifier.train() label_pred = classifier.apply(fea_test) if gnd_test is not None: evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, gnd_test) else: acc = None return (classifier.get_num_machines(), acc) for ier in range(len(encoders)): for idr in range(len(decoders)): t_begin = time.clock() (codelen, acc) = run_ecoc(ier, idr) if acc is None: acc_fmt = 's' acc = 'N/A' else: acc_fmt = '.4f' t_elapse = time.clock() - t_begin
#!/usr/bin/env python2.7 # # This software is distributed under BSD 3-clause license (see LICENSE file). # # Copyright (C) 2014 Thoralf Klein # from modshogun import RealFeatures, BinaryLabels, LibLinear from numpy import random, mean X_train = RealFeatures(random.randn(30, 100)) Y_train = BinaryLabels(random.randn(X_train.get_num_vectors())) svm = LibLinear(1.0, X_train, Y_train) svm.train() Y_pred = svm.apply_binary(X_train) Y_train.get_labels() == Y_pred.get_labels() print "accuracy:", mean(Y_train.get_labels() == Y_pred.get_labels())
# from modshogun import RealFeatures, BinaryLabels from modshogun import LibLinear, L2R_L2LOSS_SVC_DUAL from numpy import random, mean X_train = RealFeatures(random.randn(30, 100)) Y_train = BinaryLabels(random.randn(X_train.get_num_vectors())) results = [] for C1_pow in range(-3, 1): for C2_pow in range(-3, 1): svm = LibLinear() svm.set_bias_enabled(False) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) svm.set_C(10**C1_pow, 10**C2_pow) svm.set_features(X_train) svm.set_labels(Y_train) svm.train() Y_pred = svm.apply_binary(X_train) accuracy = mean(Y_train.get_labels() == Y_pred.get_labels()) print 10**C1_pow, 10**C2_pow, accuracy results.append({"accuracy":accuracy, "svm":svm}) results.sort(key=lambda x:x["accuracy"], reverse=True)
def classifier_multiclass_ecoc (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): import modshogun from modshogun import ECOCStrategy, LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from modshogun import MulticlassAccuracy from modshogun import RealFeatures, MulticlassLabels def nonabstract_class(name): try: getattr(modshogun, name)() except TypeError: return False return True encoders = [x for x in dir(modshogun) if re.match(r'ECOC.+Encoder', x) and nonabstract_class(x)] decoders = [x for x in dir(modshogun) if re.match(r'ECOC.+Decoder', x) and nonabstract_class(x)] fea_train = RealFeatures(fm_train_real) fea_test = RealFeatures(fm_test_real) gnd_train = MulticlassLabels(label_train_multiclass) if label_test_multiclass is None: gnd_test = None else: gnd_test = MulticlassLabels(label_test_multiclass) base_classifier = LibLinear(L2R_L2LOSS_SVC) base_classifier.set_bias_enabled(True) #print('Testing with %d encoders and %d decoders' % (len(encoders), len(decoders))) #print('-' * 70) #format_str = '%%15s + %%-10s %%-10%s %%-10%s %%-10%s' #print((format_str % ('s', 's', 's')) % ('encoder', 'decoder', 'codelen', 'time', 'accuracy')) def run_ecoc(ier, idr): encoder = getattr(modshogun, encoders[ier])() decoder = getattr(modshogun, decoders[idr])() # whether encoder is data dependent if hasattr(encoder, 'set_labels'): encoder.set_labels(gnd_train) encoder.set_features(fea_train) strategy = ECOCStrategy(encoder, decoder) classifier = LinearMulticlassMachine(strategy, fea_train, base_classifier, gnd_train) classifier.train() label_pred = classifier.apply(fea_test) if gnd_test is not None: evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, gnd_test) else: acc = None return (classifier.get_num_machines(), acc) for ier in range(len(encoders)): for idr in range(len(decoders)): t_begin = time.clock() (codelen, acc) = run_ecoc(ier, idr) if acc is None: acc_fmt = 's' acc = 'N/A' else: acc_fmt = '.4f' t_elapse = time.clock() - t_begin