def test_batch_filtering(self): """ Tests the Filter.filter method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) flter = filters.Filter( classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1,3"]) flter.inputformat(data) filtered = flter.filter(data) self.assertEqual(data.num_attributes - 2, filtered.num_attributes, msg="Number of attributes differ") self.assertEqual(data.num_instances, filtered.num_instances, msg="Number of instances differ") # multple files data = loader.load_file( self.datafile("reutersTop10Randomized_1perc_shortened-train.arff")) self.assertIsNotNone(data) data2 = loader.load_file( self.datafile("reutersTop10Randomized_1perc_shortened-test.arff")) self.assertIsNotNone(data2) flter = filters.Filter( classname="weka.filters.unsupervised.attribute.StringToWordVector") flter.inputformat(data) filtered = flter.filter([data, data2]) self.assertIsNone(filtered[0].equal_headers(filtered[1]), msg="Headers should be compatible")
def test_incremental_filtering(self): """ Tests the Filter.input/output methods. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) flter = filters.Filter( classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1,3"]) flter.inputformat(data) filtered = flter.outputformat() self.assertIsNotNone(filtered) self.assertTrue(isinstance(filtered, dataset.Instances), msg="Should be Instances object") for inst in data: flter.input(inst) finst = flter.output() filtered.add_instance(finst) self.assertEqual(data.num_attributes - 2, filtered.num_attributes, msg="Number of attributes differ") self.assertEqual(data.num_instances, filtered.num_instances, msg="Number of instances differ")
def test_instantiate_clusterer(self): """ Tests the instantion of Clusterer objects. """ cname = "weka.clusterers.SimpleKMeans" cls = clusterers.Clusterer(classname=cname) self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!") self.assertEqual(cname, cls.classname, msg="Classnames differ!") cls = clusterers.FilteredClusterer() self.assertIsNotNone(cls, msg="Failed to instantiate filtered clusterer!") self.assertEqual("weka.clusterers.FilteredClusterer", cls.classname, msg="Classnames differ!") cname = "weka.filters.unsupervised.attribute.Remove" flter = filters.Filter(classname=cname) self.assertEqual(cname, flter.classname, msg="Filter classnames differ!") cls.filter = flter self.assertEqual(cname, cls.filter.classname, msg="Filter classnames differ!") cname = "weka.clusterers.EM" cls.clusterer = clusterers.Clusterer(classname=cname) self.assertEqual(cname, cls.clusterer.classname, msg="Base clusterer classnames differ!")
def main(): """ Just runs some example code. """ """ Plots a dataset. """ # setup the flow helper.print_title("Plot dataset") iris = helper.get_data_dir() + os.sep + "iris.arff" flow = Flow(name="plot dataset") filesupplier = FileSupplier() filesupplier.config["files"] = [iris] flow.actors.append(filesupplier) loaddataset = LoadDataset() flow.actors.append(loaddataset) branch = Branch() flow.actors.append(branch) seq = Sequence(name="matrix plot") branch.actors.append(seq) mplot = MatrixPlot() mplot.config["percent"] = 50.0 mplot.config["wait"] = False seq.actors.append(mplot) seq = Sequence(name="line plot") branch.actors.append(seq) copy = Copy() seq.actors.append(copy) flter = Filter() flter.config["setup"] = filters.Filter( classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"]) flter.config["keep_relationname"] = True seq.actors.append(flter) lplot = LinePlot() lplot.config["percent"] = 50.0 lplot.config["wait"] = True seq.actors.append(lplot) # run the flow msg = flow.setup() if msg is None: print("\n" + flow.tree + "\n") msg = flow.execute() if msg is not None: print("Error executing flow:\n" + msg) else: print("Error setting up flow:\n" + msg) flow.wrapup() flow.cleanup()
def test_generate_thresholdcurve_data(self): """ Tests the generate_thresholdcurve_data method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("diabetes.arff")) data.class_is_last() remove = filters.Filter( classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1-3"]) cls = classifiers.Classifier( classname="weka.classifiers.bayes.NaiveBayes") fc = classifiers.FilteredClassifier() fc.filter = remove fc.classifier = cls evl = classifiers.Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) data = plot.generate_thresholdcurve_data(evl, 0) self.assertEqual(13, data.num_attributes, msg="number of attributes differs") self.assertEqual(769, data.num_instances, msg="number of rows differs") attname = "True Positives" self.assertIsNotNone(data.attribute_by_name(attname), msg="Failed to locate attribute: " + attname) attname = "False Positive Rate" self.assertIsNotNone(data.attribute_by_name(attname), msg="Failed to locate attribute: " + attname) attname = "Lift" self.assertIsNotNone(data.attribute_by_name(attname), msg="Failed to locate attribute: " + attname)
def test_instantiate_classifier(self): """ Tests the instantiation of several classifier classes. """ cname = "weka.filters.unsupervised.attribute.Remove" options = None flter = filters.Filter(classname=cname, options=["-R", "1,3"]) self.assertIsNotNone(flter, msg="Failed to instantiate: " + cname + "/" + str(options)) self.assertEqual(cname, flter.classname, msg="Classnames differ!")
def test_capabilities(self): """ Tests the capabilities. """ cname = "weka.classifiers.trees.J48" options = None flter = filters.Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1,3"]) self.assertIsNotNone(flter, msg="Failed to instantiate: " + cname + "/" + str(options)) caps = flter.capabilities self.assertIsNotNone(caps, msg="Capabilities are None!")
def test_make_copy(self): """ Tests the make_copy class method. """ cname = "weka.filters.unsupervised.attribute.Remove" options = None flter = filters.Filter(classname=cname, options=["-R", "1,3"]) self.assertIsNotNone(flter, msg="Failed to instantiate: " + cname + "/" + str(options)) self.assertEqual(cname, flter.classname, msg="Classnames differ!") flter2 = filters.Filter.make_copy(flter) self.assertIsNotNone(flter2, msg="Failed to instantiate: " + cname + "/" + str(options)) self.assertEqual(cname, flter2.classname, msg="Classnames differ!")
def main(): """ Just runs some example code. """ """ Loads/filters a dataset incrementally and saves it to a new file. """ # setup the flow helper.print_title("Load/filter/save dataset (incrementally)") iris = helper.get_data_dir() + os.sep + "iris.arff" flow = Flow(name="Load/filter/save dataset (incrementally)") filesupplier = FileSupplier() filesupplier.config["files"] = [iris] flow.actors.append(filesupplier) loaddataset = LoadDataset() loaddataset.config["incremental"] = True flow.actors.append(loaddataset) flter = Filter() flter.config["setup"] = filters.Filter( classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"]) flow.actors.append(flter) rename = RenameRelation() rename.config["name"] = "iris-reduced" flow.actors.append(rename) dumper = InstanceDumper() dumper.config["output"] = tempfile.gettempdir() + os.sep + "out.arff" flow.actors.append(dumper) # run the flow msg = flow.setup() if msg is None: print("\n" + flow.tree + "\n") msg = flow.execute() if msg is not None: print("Error executing flow:\n" + msg) else: print("Error setting up flow:\n" + msg) flow.wrapup() flow.cleanup()
def test_plot_prc(self): """ Tests the plot_prc method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("diabetes.arff")) data.class_is_last() remove = filters.Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1-3"]) cls = classifiers.Classifier(classname="weka.classifiers.bayes.NaiveBayes") fc = classifiers.FilteredClassifier() fc.filter = remove fc.classifier = cls evl = classifiers.Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) plot.plot_prc(evl, class_index=[0, 1], wait=False)
def test_instantiate_classifier(self): """ Tests the instantiation of several classifier classes. """ cname = "weka.classifiers.trees.J48" options = None cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) self.assertEqual(cname, cls.classname, msg="Classnames differ!") cname = "weka.classifiers.trees.J48" options = ["-C", "0.3"] cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) self.assertEqual(cname, cls.classname, msg="Classnames differ!") cname = "weka.classifiers.meta.FilteredClassifier" options = ["-W", "weka.classifiers.trees.J48", "--", "-C", "0.3"] cls = classifiers.SingleClassifierEnhancer(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) self.assertEqual(cname, cls.classname, msg="Classnames differ!") fname = "weka.filters.unsupervised.attribute.Remove" flter = filters.Filter(classname=fname, options=["-R", "last"]) cls.filter = flter self.assertEqual(fname, cls.filter.classname, msg="Classnames differ!") cls = classifiers.FilteredClassifier() self.assertIsNotNone(cls, msg="Failed to instantiate FilteredClassifier!") self.assertEqual("weka.classifiers.meta.FilteredClassifier", cls.classname, msg="Classnames differ!") cname = "weka.classifiers.functions.SMO" cls = classifiers.KernelClassifier(classname=cname) self.assertIsNotNone(cls, msg="Failed to instantiate KernelClassifier: " + cname) self.assertEqual(cname, cls.classname, msg="Classnames differ!") kname = "weka.classifiers.functions.supportVector.RBFKernel" kernel = classifiers.Kernel(classname=kname) self.assertIsNotNone(kernel, msg="Failed to instantiate Kernel: " + kname) cls.kernel = kernel self.assertEqual(kname, cls.kernel.classname, msg="Kernel classnames differ!") cname = "weka.classifiers.meta.Vote" cls = classifiers.MultipleClassifiersCombiner(classname=cname) self.assertIsNotNone(cls, msg="Failed to instantiate MultipleClassifiersCombiner: " + cname) self.assertEqual(cname, cls.classname, msg="Classnames differ!")
def main(): """ Just runs some example code. """ # setup the flow helper.print_title("Cross-validate clusterer") iris = helper.get_data_dir() + os.sep + "iris.arff" flow = Flow(name="cross-validate clusterer") filesupplier = FileSupplier() filesupplier.config["files"] = [iris] flow.actors.append(filesupplier) loaddataset = LoadDataset() flow.actors.append(loaddataset) flter = Filter() flter.name = "Remove class" flter.config["filter"] = filters.Filter( classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"]) flow.actors.append(flter) cv = CrossValidate() cv.config["setup"] = Clusterer(classname="weka.clusterers.EM") flow.actors.append(cv) console = Console() console.config["prefix"] = "Loglikelihood: " flow.actors.append(console) # run the flow msg = flow.setup() if msg is None: print("\n" + flow.tree + "\n") msg = flow.execute() if msg is not None: print("Error executing flow:\n" + msg) else: print("Error setting up flow:\n" + msg) flow.wrapup() flow.cleanup()
def incremental(): """ Just runs some example code. """ """ Loads/filters a dataset incrementally. """ # setup the flow helper.print_title("Filter datasets (incrementally)") iris = helper.get_data_dir() + os.sep + "iris.arff" anneal = helper.get_data_dir() + os.sep + "anneal.arff" flow = Flow(name="filter datasets (incrementally)") filesupplier = FileSupplier() filesupplier.config["files"] = [iris, anneal] flow.actors.append(filesupplier) loaddataset = LoadDataset() loaddataset.config["incremental"] = True flow.actors.append(loaddataset) flter = Filter() flter.config["setup"] = filters.Filter( classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1"]) flter.config["keep_relationname"] = True flow.actors.append(flter) console = Console() flow.actors.append(console) # run the flow msg = flow.setup() if msg is None: print("\n" + flow.tree + "\n") msg = flow.execute() if msg is not None: print("Error executing flow:\n" + msg) else: print("Error setting up flow:\n" + msg) flow.wrapup() flow.cleanup()
def test_get_prc(self): """ Tests the get_prc method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("diabetes.arff")) data.class_is_last() remove = filters.Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1-3"]) cls = classifiers.Classifier(classname="weka.classifiers.bayes.NaiveBayes") fc = classifiers.FilteredClassifier() fc.filter = remove fc.classifier = cls evl = classifiers.Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) data = plot.generate_thresholdcurve_data(evl, 0) area = plot.get_prc(data) self.assertAlmostEqual(0.892, area, places=3, msg="PRC differs")
def main(): """ Just runs some example code. """ # setup the flow count = 50 helper.print_title("build clusterer incrementally") iris = helper.get_data_dir() + os.sep + "iris.arff" flow = Flow(name="build clusterer incrementally") filesupplier = FileSupplier() filesupplier.config["files"] = [iris] flow.actors.append(filesupplier) initcounter = InitStorageValue() initcounter.config["storage_name"] = "counter" initcounter.config["value"] = 0 flow.actors.append(initcounter) loaddataset = LoadDataset() loaddataset.config["incremental"] = True flow.actors.append(loaddataset) remove = Filter(name="remove class attribute") remove.config["setup"] = filters.Filter( classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"]) flow.actors.append(remove) inccounter = UpdateStorageValue() inccounter.config["storage_name"] = "counter" inccounter.config["expression"] = "{X} + 1" flow.actors.append(inccounter) train = Train() train.config["setup"] = Clusterer(classname="weka.clusterers.Cobweb") flow.actors.append(train) pick = ContainerValuePicker() pick.config["value"] = "Model" pick.config["switch"] = True flow.actors.append(pick) tee = Tee(name="output model every " + str(count) + " instances") tee.config["condition"] = "@{counter} % " + str(count) + " == 0" flow.actors.append(tee) trigger = Trigger(name="output # of instances") tee.actors.append(trigger) getcounter = GetStorageValue() getcounter.config["storage_name"] = "counter" trigger.actors.append(getcounter) console = Console() console.config["prefix"] = "# of instances: " trigger.actors.append(console) console = Console(name="output model") tee.actors.append(console) # run the flow msg = flow.setup() if msg is None: print("\n" + flow.tree + "\n") msg = flow.execute() if msg is not None: print("Error executing flow:\n" + msg) else: print("Error setting up flow:\n" + msg) flow.wrapup() flow.cleanup()