def writeResult(writeFile, trainMatrix, trainLables, testMatrix, testLables, type): result = open(writeFile, "a+") result.write("-------Method\tPrecision-Recall-F1(1100 original text and gbdt)-------\n") if type == 0: trainMatrix, testMatrix = featureSelection(trainMatrix, trainLables, testMatrix) classifierInstance = Classifier(trainMatrix, trainLables, testMatrix, testLables) methods = ["tree", "knn", "svm", "essemble", "gbdt"] for i in range(len(methods)): key = methods[i] classifierInstance.classification(key) print key + "classification() done!" dict = classifierInstance.evaluate() for metric in dict: result.write(key + "\t" + metric + "\t" + dict[metric] + "\n") result.close()
def eval_classifier(classifierToUse, featuresToUse, testOrTrain="train"): print("Chosen feature: {0}".format(featuresToUse) ) print("Chosen classifier: {0}".format(classifierToUse)) fe = FeatureExtractor(featuresToUse) dataset = DataSet(fe) classifier = Classifier() evaluate = Evaluation() print "test or Train %s" % testOrTrain for feature_class, files in getTestData(testOrTrain).items(): print "%s" % testOrTrain for f in files: dataset.addFile(feature_class, f) print "Dataset initialized" print_class_stats(dataset.classes) print "Test set created." a_train, a_test, c_train, c_test = train_test_split(dataset.featureVector, dataset.classes, test_size=0.9) c_pred = classifier.classification(a_train,a_test,c_train,c_test,classifierToUse) evaluate.evaluate(c_pred,c_test,featuresToUse,classifierToUse)