fname = data_dir + os.sep + "ReutersGrain-test.arff" print("\nLoading dataset: " + fname + "\n") loader = Loader(classname="weka.core.converters.ArffLoader") test = loader.load_file(fname) test.set_class_index(test.num_attributes() - 1) setups = ( ("weka.classifiers.trees.J48", []), ("weka.classifiers.bayes.NaiveBayes", []), ("weka.classifiers.bayes.NaiveBayesMultinomial", []), ("weka.classifiers.bayes.NaiveBayesMultinomial", ["-C"]), ("weka.classifiers.bayes.NaiveBayesMultinomial", ["-C", "-L", "-S"]) ) # cross-validate classifiers for setup in setups: classifier, opt = setup print("\n--> %s (filter options: %s)\n" % (classifier, " ".join(opt))) cls = FilteredClassifier() cls.set_classifier(Classifier(classname=classifier)) cls.set_filter(Filter(classname="weka.filters.unsupervised.attribute.StringToWordVector", options=opt)) cls.build_classifier(data) evl = Evaluation(test) evl.test_model(cls, test) print("Accuracy: %0.0f%%" % evl.percent_correct()) tcdata = plc.generate_thresholdcurve_data(evl, 0) print("AUC: %0.3f" % plc.get_auc(tcdata)) print(evl.to_matrix("Matrix:")) jvm.stop()
jvm.start() # load glass fname = data_dir + os.sep + "glass.arff" print("\nLoading dataset: " + fname + "\n") loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(fname) data.set_class_index(data.num_attributes() - 1) # cross-validate default J48 print("\nDefault J48") cls = Classifier(classname="weka.classifiers.trees.J48") evl = Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) print(evl.to_summary()) print(evl.to_matrix()) # build and plot model cls.build_classifier(data) plg.plot_dot_graph(cls.graph()) # cross-validate unpruned J48 with larger leaf size print("\nUnpruned J48 (minNumObj=15)") cls = Classifier(classname="weka.classifiers.trees.J48", options=["-U", "-M", "15"]) evl = Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) print(evl.to_summary()) print(evl.to_matrix()) # build and plot model cls.build_classifier(data)