Exemple #1
0
fname = data_dir + os.sep + "ReutersGrain-test.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
test = loader.load_file(fname)
test.set_class_index(test.num_attributes() - 1)

setups = (
    ("weka.classifiers.trees.J48", []),
    ("weka.classifiers.bayes.NaiveBayes", []),
    ("weka.classifiers.bayes.NaiveBayesMultinomial", []),
    ("weka.classifiers.bayes.NaiveBayesMultinomial", ["-C"]),
    ("weka.classifiers.bayes.NaiveBayesMultinomial", ["-C", "-L", "-S"])
)

# cross-validate classifiers
for setup in setups:
    classifier, opt = setup
    print("\n--> %s (filter options: %s)\n" % (classifier, " ".join(opt)))
    cls = FilteredClassifier()
    cls.set_classifier(Classifier(classname=classifier))
    cls.set_filter(Filter(classname="weka.filters.unsupervised.attribute.StringToWordVector", options=opt))
    cls.build_classifier(data)
    evl = Evaluation(test)
    evl.test_model(cls, test)
    print("Accuracy: %0.0f%%" % evl.percent_correct())
    tcdata = plc.generate_thresholdcurve_data(evl, 0)
    print("AUC: %0.3f" % plc.get_auc(tcdata))
    print(evl.to_matrix("Matrix:"))

jvm.stop()
Exemple #2
0
jvm.start()

# load glass
fname = data_dir + os.sep + "glass.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)

# cross-validate default J48
print("\nDefault J48")
cls = Classifier(classname="weka.classifiers.trees.J48")
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
print(evl.to_summary())
print(evl.to_matrix())

# build and plot model
cls.build_classifier(data)
plg.plot_dot_graph(cls.graph())

# cross-validate unpruned J48 with larger leaf size
print("\nUnpruned J48 (minNumObj=15)")
cls = Classifier(classname="weka.classifiers.trees.J48", options=["-U", "-M", "15"])
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
print(evl.to_summary())
print(evl.to_matrix())

# build and plot model
cls.build_classifier(data)