예제 #1
0
class LanguageClassifier:
    def __init__(self):
        self.svm = None

    def load_classifier(self, fname=DEFAULT_FILEPATH):
        gz_stream = gz.open(fname, "rb")
        self.svm = pkl.load(gz_stream)
        gz_stream.close()

    def load_svm(self, filepath):
        from modshogun import SerializableAsciiFile

        print('Attempting to load a multiclass liblinear svm from "' + filepath + '"')
        self.svm = MulticlassLibLinear()
        loader = SerializableAsciiFile(filepath, "r")
        self.svm.load_serializable(loader)
        print("Svm succesfully loaded")

    def classify_doc(self, doc):
        from modshogun import StringCharFeatures, RAWBYTE
        from modshogun import HashedDocDotFeatures
        from modshogun import NGramTokenizer
        from modshogun import MulticlassLabels

        docs = [doc]
        string_feats = StringCharFeatures(docs, RAWBYTE)
        tokenizer = NGramTokenizer(4)
        normalize = True
        num_bits = 18

        hashed_doc_feats = HashedDocDotFeatures(num_bits, string_feats, tokenizer, normalize, 3, 2)

        labels = self.svm.apply(hashed_doc_feats).get_labels()

        return id_to_lang[labels[0]]
예제 #2
0
def evaluation_multiclassovrevaluation_modular(traindat, label_traindat,
                                               testdat, label_testdat):
    from shogun.Features import MulticlassLabels
    from shogun.Evaluation import MulticlassOVREvaluation, ROCEvaluation
    from modshogun import MulticlassLibLinear, RealFeatures, ContingencyTableEvaluation, ACCURACY
    from shogun.Mathematics import Math

    Math.init_random(1)

    ground_truth_labels = MulticlassLabels(label_traindat)
    svm = MulticlassLibLinear(1.0, RealFeatures(traindat),
                              MulticlassLabels(label_traindat))
    svm.train()
    predicted_labels = svm.apply()

    binary_evaluator = ROCEvaluation()
    evaluator = MulticlassOVREvaluation(binary_evaluator)
    mean_roc = evaluator.evaluate(predicted_labels, ground_truth_labels)
    #print mean_roc

    binary_evaluator = ContingencyTableEvaluation(ACCURACY)
    evaluator = MulticlassOVREvaluation(binary_evaluator)
    mean_accuracy = evaluator.evaluate(predicted_labels, ground_truth_labels)
    #print mean_accuracy

    return mean_roc, mean_accuracy
def classifier_multiclassliblinear_modular(
        fm_train_real=traindat,
        fm_test_real=testdat,
        label_train_multiclass=label_traindat,
        label_test_multiclass=label_testdat,
        width=2.1,
        C=1,
        epsilon=1e-5):
    from modshogun import RealFeatures, MulticlassLabels
    from modshogun import MulticlassLibLinear

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = MulticlassLibLinear(C, feats_train, labels)
    classifier.train()

    label_pred = classifier.apply(feats_test)
    out = label_pred.get_labels()

    if label_test_multiclass is not None:
        from modshogun import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc = evaluator.evaluate(label_pred, labels_test)
        print('Accuracy = %.4f' % acc)

    return out
def evaluation_multiclassovrevaluation_modular (traindat, label_traindat):
	from shogun.Features import MulticlassLabels
	from shogun.Evaluation import MulticlassOVREvaluation,ROCEvaluation
	from modshogun import MulticlassLibLinear,RealFeatures,ContingencyTableEvaluation,ACCURACY
	from shogun.Mathematics import Math
	
	Math.init_random(1)

	ground_truth_labels = MulticlassLabels(label_traindat)
	svm = MulticlassLibLinear(1.0,RealFeatures(traindat),MulticlassLabels(label_traindat))
	svm.parallel.set_num_threads(1)
	svm.train()
	predicted_labels = svm.apply()
	
	binary_evaluator = ROCEvaluation()
	evaluator = MulticlassOVREvaluation(binary_evaluator)
	mean_roc = evaluator.evaluate(predicted_labels,ground_truth_labels)
	#print mean_roc
	
	binary_evaluator = ContingencyTableEvaluation(ACCURACY)
	evaluator = MulticlassOVREvaluation(binary_evaluator)
	mean_accuracy = evaluator.evaluate(predicted_labels,ground_truth_labels)
	#print mean_accuracy

	return mean_roc, mean_accuracy, predicted_labels, svm
예제 #5
0
	def load_svm(self, filepath):
		from modshogun import SerializableAsciiFile
	
		print("Attempting to load a multiclass liblinear svm from \"" +
					filepath +"\"")	
		self.svm = MulticlassLibLinear()
		loader = SerializableAsciiFile(filepath, "r")
		self.svm.load_serializable(loader)
		print("Svm succesfully loaded")
def classifier_multilabeloutputliblinear_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5):
	from modshogun import RealFeatures, MulticlassLabels, MultilabelLabels
	from modshogun import MulticlassLibLinear

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	labels=MulticlassLabels(label_train_multiclass)

	classifier = MulticlassLibLinear(C,feats_train,labels)
	classifier.train()

	label_pred = classifier.apply_multilabel_output(feats_test,2)
	out = label_pred.get_labels()
	#print out
	return out
def classifier_multiclass_relaxedtree(fm_train_real=traindat,
                                      fm_test_real=testdat,
                                      label_train_multiclass=label_traindat,
                                      label_test_multiclass=label_testdat,
                                      lawidth=2.1,
                                      C=1,
                                      epsilon=1e-5):
    from modshogun import RealFeatures, MulticlassLabels
    from modshogun import RelaxedTree, MulticlassLibLinear
    from modshogun import GaussianKernel

    #print('Working on a problem of %d features and %d samples' % fm_train_real.shape)

    feats_train = RealFeatures(fm_train_real)

    labels = MulticlassLabels(label_train_multiclass)

    machine = RelaxedTree()
    machine.set_machine_for_confusion_matrix(MulticlassLibLinear())
    machine.set_kernel(GaussianKernel())
    machine.set_labels(labels)
    machine.train(feats_train)

    label_pred = machine.apply_multiclass(RealFeatures(fm_test_real))
    out = label_pred.get_labels()

    if label_test_multiclass is not None:
        from modshogun import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc = evaluator.evaluate(label_pred, labels_test)
        print('Accuracy = %.4f' % acc)

    return out
예제 #8
0
    def load_svm(self, filepath):
        from modshogun import SerializableAsciiFile

        print('Attempting to load a multiclass liblinear svm from "' + filepath + '"')
        self.svm = MulticlassLibLinear()
        loader = SerializableAsciiFile(filepath, "r")
        self.svm.load_serializable(loader)
        print("Svm succesfully loaded")
def evaluation_multiclassovrevaluation_modular (traindat, label_traindat, testdat, label_testdat):
	from shogun.Features import MulticlassLabels
	from shogun.Evaluation import MulticlassOVREvaluation,ROCEvaluation
	from modshogun import MulticlassLibLinear,RealFeatures,ContingencyTableEvaluation,ACCURACY

	ground_truth_labels = MulticlassLabels(label_traindat)
	svm = MulticlassLibLinear(1.0,RealFeatures(traindat),MulticlassLabels(label_traindat))
	svm.train()
	predicted_labels = svm.apply()
	
	binary_evaluator = ROCEvaluation()
	evaluator = MulticlassOVREvaluation(binary_evaluator)
	mean_roc = evaluator.evaluate(predicted_labels,ground_truth_labels)
	print mean_roc
	
	binary_evaluator = ContingencyTableEvaluation(ACCURACY)
	evaluator = MulticlassOVREvaluation(binary_evaluator)
	mean_accuracy = evaluator.evaluate(predicted_labels,ground_truth_labels)
	print mean_accuracy

	return mean_roc, mean_accuracy
예제 #10
0
class LanguageClassifier:
	def __init__(self):
		self.svm = None

	def load_classifier(self, fname=DEFAULT_FILEPATH):
		gz_stream = gz.open(fname, 'rb')
		self.svm = pkl.load(gz_stream)
		gz_stream.close()

	def load_svm(self, filepath):
		from modshogun import SerializableAsciiFile
	
		print("Attempting to load a multiclass liblinear svm from \"" +
					filepath +"\"")	
		self.svm = MulticlassLibLinear()
		loader = SerializableAsciiFile(filepath, "r")
		self.svm.load_serializable(loader)
		print("Svm succesfully loaded")


	def classify_doc(self, doc):
		from modshogun import StringCharFeatures, RAWBYTE
		from modshogun import HashedDocDotFeatures
		from modshogun import NGramTokenizer
		from modshogun import MulticlassLabels
	
		docs = [doc]
		string_feats = StringCharFeatures(docs, RAWBYTE)
		tokenizer = NGramTokenizer(4)
		normalize = True
		num_bits = 18
	
		hashed_doc_feats = HashedDocDotFeatures(num_bits, string_feats,
					tokenizer, normalize, 3, 2)
		
		labels = self.svm.apply(hashed_doc_feats).get_labels()
	
		return id_to_lang[labels[0]]
def classifier_multilabeloutputliblinear_modular(
        fm_train_real=traindat,
        fm_test_real=testdat,
        label_train_multiclass=label_traindat,
        label_test_multiclass=label_testdat,
        width=2.1,
        C=1,
        epsilon=1e-5):
    from modshogun import RealFeatures, MulticlassLabels, MultilabelLabels
    from modshogun import MulticlassLibLinear

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = MulticlassLibLinear(C, feats_train, labels)
    classifier.train()

    label_pred = classifier.apply_multilabel_output(feats_test, 2)
    out = label_pred.get_labels()
    #print out
    return out
def classifier_multiclassliblinear_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5):
	from modshogun import RealFeatures, MulticlassLabels
	from modshogun import MulticlassLibLinear

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	labels=MulticlassLabels(label_train_multiclass)

	classifier = MulticlassLibLinear(C,feats_train,labels)
	classifier.train()

	label_pred = classifier.apply(feats_test)
	out = label_pred.get_labels()

	if label_test_multiclass is not None:
		from modshogun import MulticlassAccuracy
		labels_test = MulticlassLabels(label_test_multiclass)
		evaluator = MulticlassAccuracy()
		acc = evaluator.evaluate(label_pred, labels_test)
		print('Accuracy = %.4f' % acc)

	return out