class LanguageClassifier: def __init__(self): self.svm = None def load_classifier(self, fname=DEFAULT_FILEPATH): gz_stream = gz.open(fname, "rb") self.svm = pkl.load(gz_stream) gz_stream.close() def load_svm(self, filepath): from modshogun import SerializableAsciiFile print('Attempting to load a multiclass liblinear svm from "' + filepath + '"') self.svm = MulticlassLibLinear() loader = SerializableAsciiFile(filepath, "r") self.svm.load_serializable(loader) print("Svm succesfully loaded") def classify_doc(self, doc): from modshogun import StringCharFeatures, RAWBYTE from modshogun import HashedDocDotFeatures from modshogun import NGramTokenizer from modshogun import MulticlassLabels docs = [doc] string_feats = StringCharFeatures(docs, RAWBYTE) tokenizer = NGramTokenizer(4) normalize = True num_bits = 18 hashed_doc_feats = HashedDocDotFeatures(num_bits, string_feats, tokenizer, normalize, 3, 2) labels = self.svm.apply(hashed_doc_feats).get_labels() return id_to_lang[labels[0]]
def evaluation_multiclassovrevaluation_modular(traindat, label_traindat, testdat, label_testdat): from shogun.Features import MulticlassLabels from shogun.Evaluation import MulticlassOVREvaluation, ROCEvaluation from modshogun import MulticlassLibLinear, RealFeatures, ContingencyTableEvaluation, ACCURACY from shogun.Mathematics import Math Math.init_random(1) ground_truth_labels = MulticlassLabels(label_traindat) svm = MulticlassLibLinear(1.0, RealFeatures(traindat), MulticlassLabels(label_traindat)) svm.train() predicted_labels = svm.apply() binary_evaluator = ROCEvaluation() evaluator = MulticlassOVREvaluation(binary_evaluator) mean_roc = evaluator.evaluate(predicted_labels, ground_truth_labels) #print mean_roc binary_evaluator = ContingencyTableEvaluation(ACCURACY) evaluator = MulticlassOVREvaluation(binary_evaluator) mean_accuracy = evaluator.evaluate(predicted_labels, ground_truth_labels) #print mean_accuracy return mean_roc, mean_accuracy
def classifier_multiclassliblinear_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, width=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import MulticlassLibLinear feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = MulticlassLibLinear(C, feats_train, labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def evaluation_multiclassovrevaluation_modular (traindat, label_traindat): from shogun.Features import MulticlassLabels from shogun.Evaluation import MulticlassOVREvaluation,ROCEvaluation from modshogun import MulticlassLibLinear,RealFeatures,ContingencyTableEvaluation,ACCURACY from shogun.Mathematics import Math Math.init_random(1) ground_truth_labels = MulticlassLabels(label_traindat) svm = MulticlassLibLinear(1.0,RealFeatures(traindat),MulticlassLabels(label_traindat)) svm.parallel.set_num_threads(1) svm.train() predicted_labels = svm.apply() binary_evaluator = ROCEvaluation() evaluator = MulticlassOVREvaluation(binary_evaluator) mean_roc = evaluator.evaluate(predicted_labels,ground_truth_labels) #print mean_roc binary_evaluator = ContingencyTableEvaluation(ACCURACY) evaluator = MulticlassOVREvaluation(binary_evaluator) mean_accuracy = evaluator.evaluate(predicted_labels,ground_truth_labels) #print mean_accuracy return mean_roc, mean_accuracy, predicted_labels, svm
def load_svm(self, filepath): from modshogun import SerializableAsciiFile print("Attempting to load a multiclass liblinear svm from \"" + filepath +"\"") self.svm = MulticlassLibLinear() loader = SerializableAsciiFile(filepath, "r") self.svm.load_serializable(loader) print("Svm succesfully loaded")
def classifier_multilabeloutputliblinear_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels, MultilabelLabels from modshogun import MulticlassLibLinear feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) labels=MulticlassLabels(label_train_multiclass) classifier = MulticlassLibLinear(C,feats_train,labels) classifier.train() label_pred = classifier.apply_multilabel_output(feats_test,2) out = label_pred.get_labels() #print out return out
def classifier_multiclass_relaxedtree(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import RelaxedTree, MulticlassLibLinear from modshogun import GaussianKernel #print('Working on a problem of %d features and %d samples' % fm_train_real.shape) feats_train = RealFeatures(fm_train_real) labels = MulticlassLabels(label_train_multiclass) machine = RelaxedTree() machine.set_machine_for_confusion_matrix(MulticlassLibLinear()) machine.set_kernel(GaussianKernel()) machine.set_labels(labels) machine.train(feats_train) label_pred = machine.apply_multiclass(RealFeatures(fm_test_real)) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def load_svm(self, filepath): from modshogun import SerializableAsciiFile print('Attempting to load a multiclass liblinear svm from "' + filepath + '"') self.svm = MulticlassLibLinear() loader = SerializableAsciiFile(filepath, "r") self.svm.load_serializable(loader) print("Svm succesfully loaded")
def evaluation_multiclassovrevaluation_modular (traindat, label_traindat, testdat, label_testdat): from shogun.Features import MulticlassLabels from shogun.Evaluation import MulticlassOVREvaluation,ROCEvaluation from modshogun import MulticlassLibLinear,RealFeatures,ContingencyTableEvaluation,ACCURACY ground_truth_labels = MulticlassLabels(label_traindat) svm = MulticlassLibLinear(1.0,RealFeatures(traindat),MulticlassLabels(label_traindat)) svm.train() predicted_labels = svm.apply() binary_evaluator = ROCEvaluation() evaluator = MulticlassOVREvaluation(binary_evaluator) mean_roc = evaluator.evaluate(predicted_labels,ground_truth_labels) print mean_roc binary_evaluator = ContingencyTableEvaluation(ACCURACY) evaluator = MulticlassOVREvaluation(binary_evaluator) mean_accuracy = evaluator.evaluate(predicted_labels,ground_truth_labels) print mean_accuracy return mean_roc, mean_accuracy
class LanguageClassifier: def __init__(self): self.svm = None def load_classifier(self, fname=DEFAULT_FILEPATH): gz_stream = gz.open(fname, 'rb') self.svm = pkl.load(gz_stream) gz_stream.close() def load_svm(self, filepath): from modshogun import SerializableAsciiFile print("Attempting to load a multiclass liblinear svm from \"" + filepath +"\"") self.svm = MulticlassLibLinear() loader = SerializableAsciiFile(filepath, "r") self.svm.load_serializable(loader) print("Svm succesfully loaded") def classify_doc(self, doc): from modshogun import StringCharFeatures, RAWBYTE from modshogun import HashedDocDotFeatures from modshogun import NGramTokenizer from modshogun import MulticlassLabels docs = [doc] string_feats = StringCharFeatures(docs, RAWBYTE) tokenizer = NGramTokenizer(4) normalize = True num_bits = 18 hashed_doc_feats = HashedDocDotFeatures(num_bits, string_feats, tokenizer, normalize, 3, 2) labels = self.svm.apply(hashed_doc_feats).get_labels() return id_to_lang[labels[0]]
def classifier_multilabeloutputliblinear_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, width=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels, MultilabelLabels from modshogun import MulticlassLibLinear feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = MulticlassLibLinear(C, feats_train, labels) classifier.train() label_pred = classifier.apply_multilabel_output(feats_test, 2) out = label_pred.get_labels() #print out return out
def classifier_multiclassliblinear_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import MulticlassLibLinear feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) labels=MulticlassLabels(label_train_multiclass) classifier = MulticlassLibLinear(C,feats_train,labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out