evl = Evaluation(filtered) evl.test_model(cls, filtered) print(evl.to_summary()) print(str(cls)) plg.plot_dot_graph(cls.graph()) # 2. filtered classifier fname = data_dir + os.sep + "simpletext-test.arff" print("\nLoading dataset: " + fname + "\n") loader = Loader(classname="weka.core.converters.ArffLoader") test = loader.load_file(fname) test.set_class_index(test.num_attributes() - 1) print("Building/evaluating filtered classifier...") cls = FilteredClassifier() cls.set_classifier(Classifier(classname="weka.classifiers.trees.J48")) cls.set_filter(Filter(classname="weka.filters.unsupervised.attribute.StringToWordVector")) cls.build_classifier(data) pout = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.PlainText") pout.set_header(test) evl = Evaluation(data) evl.test_model(cls, test, pout) print(str(pout)) print(str(cls)) # load ReutersCorn-train fname = data_dir + os.sep + "ReutersCorn-train.arff" print("\nLoading dataset: " + fname + "\n") loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(fname) data.set_class_index(data.num_attributes() - 1)
data.set_class_index(data.num_attributes() - 1) # 1. cheating with default filter fltr = Filter(classname="weka.filters.supervised.attribute.Discretize", options=[]) fltr.set_inputformat(data) filtered = fltr.filter(data) cls = Classifier(classname="weka.classifiers.trees.J48") evl = Evaluation(filtered) evl.crossvalidate_model(cls, filtered, 10, Random(1)) cls.build_classifier(filtered) print("cheating (default): accuracy=%0.1f nodes=%s" % (evl.percent_correct(), get_nodes(str(cls)))) # 2. using FilteredClassifier with default filter cls = FilteredClassifier() cls.set_classifier(Classifier(classname="weka.classifiers.trees.J48")) cls.set_filter(Filter(classname="weka.filters.supervised.attribute.Discretize", options=[])) evl = Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) cls.build_classifier(data) print("FilteredClassifier (default): accuracy=%0.1f nodes=%s" % (evl.percent_correct(), get_nodes(str(cls)))) # 3. using FilteredClassifier (make binary) cls = FilteredClassifier() cls.set_classifier(Classifier(classname="weka.classifiers.trees.J48")) cls.set_filter(Filter(classname="weka.filters.supervised.attribute.Discretize", options=["-D"])) evl = Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) cls.build_classifier(data) print("FilteredClassifier (make binary): accuracy=%0.1f nodes=%s" % (evl.percent_correct(), get_nodes(str(cls)))) # 1. cheating with make binary