def test_build_classifier(self): """ Tests the build_classifier method. """ # 1. nominal loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.classifiers.trees.J48" options = ["-C", "0.3"] cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) cls.build_classifier(data) # 2. numeric loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("bolts.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.classifiers.functions.LinearRegression" options = ["-R", "0.1"] cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) cls.build_classifier(data)
def test_gridsearch(self): """ Tests the GridSearch class. """ # make sure the package is installed install_missing_package("gridSearch", stop_jvm_and_exit=True) gs = classifiers.GridSearch() self.assertEqual({"property": "C", "expression": "pow(BASE,I)", "min": -3.0, "max": 3.0, "step": 1.0, "base": 10.0}, gs.x, msg="x of grid differs") x = gs.x x["min"] = -1.0 x["max"] = 2.0 gs.x = x self.assertEqual({"property": "C", "expression": "pow(BASE,I)", "min": -1.0, "max": 2.0, "step": 1.0, "base": 10.0}, gs.x, msg="x of grid differs") self.assertEqual({"property": "kernel.gamma", "expression": "pow(BASE,I)", "min": -3.0, "max": 3.0, "step": 1.0, "base": 10.0}, gs.y, msg="y of grid differs") y = gs.y y["min"] = -1.0 y["max"] = 2.0 gs.y = y self.assertEqual({"property": "kernel.gamma", "expression": "pow(BASE,I)", "min": -1.0, "max": 2.0, "step": 1.0, "base": 10.0}, gs.y, msg="y of grid differs") cls = classifiers.Classifier(classname="weka.classifiers.functions.LinearRegression") gs.classifier = cls gs.evaluation = gs.tags_evaluation.find("RMSE") self.assertEqual("RMSE", str(gs.evaluation), "evaluation differs: " + str(gs.evaluation)) gs.evaluation = "ACC" self.assertEqual("ACC", str(gs.evaluation), "evaluation differs: " + str(gs.evaluation)) cls = classifiers.Classifier(classname="weka.classifiers.trees.J48") gs.classifier = cls
def test_classify_instance(self): """ Tests the classify_instance method. """ # 1. nominal loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.classifiers.trees.J48" options = ["-C", "0.3"] cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) cls.build_classifier(data) preds = [] for i in range(10, 20): pred = cls.classify_instance(data.get_instance(i)) self.assertIsNotNone(pred) preds.append(pred) self.assertEqual([2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 5.0, 5.0, 2.0, 2.0], preds, msg="Classifications differ") # 2. numeric loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("bolts.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.classifiers.functions.LinearRegression" options = ["-R", "0.1"] cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) cls.build_classifier(data) preds = [] for i in range(10): pred = cls.classify_instance(data.get_instance(i)) self.assertIsNotNone(pred) preds.append(pred) expected = [ 24.313, 33.359, 28.569, 26.365, 32.680, 29.149, 26.998, 22.971, 13.160, 7.394 ] for i in range(len(preds)): self.assertAlmostEqual(expected[i], preds[i], places=3, msg="Classifications differ")
def test_learning_curve(self): """ Tests the learning_curve method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("diabetes.arff")) data.class_is_last() cls = [ classifiers.Classifier(classname="weka.classifiers.trees.J48"), classifiers.Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")] plot.plot_learning_curve(cls, data, wait=False) plot.plot_learning_curve(cls, data, increments=0.1, wait=False)
def test_plot_experiment(self): """ Tests the plot_experiment method. """ datasets = [ self.datafile("bolts.arff"), self.datafile("bodyfat.arff"), self.datafile("autoPrice.arff") ] cls = [ classifiers.Classifier("weka.classifiers.trees.REPTree"), classifiers.Classifier( "weka.classifiers.functions.LinearRegression"), classifiers.Classifier("weka.classifiers.functions.SMOreg"), ] outfile = self.tempfile("results-rs.arff") exp = experiments.SimpleRandomSplitExperiment(classification=False, runs=10, percentage=66.6, preserve_order=False, datasets=datasets, classifiers=cls, result=outfile) exp.setup() exp.run() # evaluate loader = converters.loader_for_file(outfile) data = loader.load_file(outfile) matrix = experiments.ResultMatrix( "weka.experiment.ResultMatrixPlainText") tester = experiments.Tester("weka.experiment.PairedCorrectedTTester") tester.resultmatrix = matrix comparison_col = data.attribute_by_name( "Correlation_coefficient").index tester.instances = data tester.header(comparison_col) tester.multi_resultset_full(0, comparison_col) # plot plot.plot_experiment(matrix, title="Random split (w/ StdDev)", measure="Correlation coefficient", show_stdev=True, wait=False) plot.plot_experiment(matrix, title="Random split", measure="Correlation coefficient", wait=False)
def test_distribution_for_instance(self): """ Tests the distribution_for_instance method. """ # 1. nominal loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.classifiers.trees.J48" options = ["-C", "0.3"] cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) cls.build_classifier(data) for i in range(10): dist = cls.distribution_for_instance(data.get_instance(i)) self.assertIsNotNone(dist) self.assertEqual(6, len(dist), msg="Number of classes in prediction differ!") # 2. numeric loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("bolts.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.classifiers.functions.LinearRegression" options = ["-R", "0.1"] cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) cls.build_classifier(data) for i in range(10): dist = cls.distribution_for_instance(data.get_instance(i)) self.assertIsNotNone(dist) self.assertEqual( 1, len(dist), msg= "Number of classes in prediction should be one for numeric classifier!" )
def test_build_and_use_forecaster(self): """ Tests building and using of a forecaster. """ self._ensure_package_is_installed() loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("airline.arff")) self.assertIsNotNone(data, msg="Data should not be none") data.class_is_last() airline_train, airline_test = data.train_test_split(90.0) forecaster = timeseries.WekaForecaster() self.assertIsNotNone(forecaster) forecaster.fields_to_forecast = ["passenger_numbers"] forecaster.base_forecaster = classifiers.Classifier( classname="weka.classifiers.functions.LinearRegression") forecaster.fields_to_forecast = "passenger_numbers" forecaster.build_forecaster(airline_train) num_prime_instances = 12 airline_prime = dataset.Instances.copy_instances( airline_train, airline_train.num_instances - num_prime_instances, num_prime_instances) forecaster.prime_forecaster(airline_prime) num_future_forecasts = airline_test.num_instances preds = forecaster.forecast(num_future_forecasts) self.assertIsNotNone(preds, msg="Predictions should not be none") self.assertEqual(len(preds), airline_test.num_instances, msg="# of predictions should equal prime window size")
def test_generate_thresholdcurve_data(self): """ Tests the generate_thresholdcurve_data method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("diabetes.arff")) data.class_is_last() remove = filters.Filter( classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1-3"]) cls = classifiers.Classifier( classname="weka.classifiers.bayes.NaiveBayes") fc = classifiers.FilteredClassifier() fc.filter = remove fc.classifier = cls evl = classifiers.Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) data = plot.generate_thresholdcurve_data(evl, 0) self.assertEqual(13, data.num_attributes, msg="number of attributes differs") self.assertEqual(769, data.num_instances, msg="number of rows differs") attname = "True Positives" self.assertIsNotNone(data.attribute_by_name(attname), msg="Failed to locate attribute: " + attname) attname = "False Positive Rate" self.assertIsNotNone(data.attribute_by_name(attname), msg="Failed to locate attribute: " + attname) attname = "Lift" self.assertIsNotNone(data.attribute_by_name(attname), msg="Failed to locate attribute: " + attname)
def test_batchpredictor(self): """ Tests the batch predictor methods. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.classifiers.trees.J48" options = [] cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) # batch predictor? self.assertTrue(cls.is_batchpredictor, msg="not a batch predictor: " + cname + "/" + str(options)) # more efficient implementation? cls.has_efficient_batch_prediction() # batch size self.assertIsNotNone(cls.batch_size, msg="batch size is not initialized") # distributions_for_instances cls.build_classifier(data) dists = cls.distributions_for_instances(data) self.assertIsNotNone(dists, msg="no distributions generated") self.assertEqual(len(dists), len(data), msg="number of predictions differ") self.assertEqual(len(dists[0]), data.class_attribute.num_values, msg="size of distribution array does not match number of classes")
def test_json(self): """ Tests the json methods. """ actor = control.Flow() actor.name = "blah" actor.config["annotation"] = "Some annotation text" actor.actors.append(source.Start()) tee = control.Tee() actor.actors.append(tee) tee.actors.append(sink.Console()) trigger = control.Trigger() actor.actors.append(trigger) files = source.FileSupplier() files.config["files"] = ["file1.arff", "file2.arff"] trigger.actors.append(files) loader = transformer.LoadDataset() trigger.actors.append(loader) select = transformer.ClassSelector() trigger.actors.append(select) train = transformer.Train() train.config["setup"] = classifiers.Classifier( classname="weka.classifiers.trees.J48", options=["-C", "0.3"]) trigger.actors.append(train) json = actor.to_json() flow2 = control.Flow.from_json(json) json2 = flow2.to_json() self.assertEqual(json, json2, msg="JSON representations differ")
def test_randomsplit_regression(self): """ Tests random split on regression. """ datasets = [self.datafile("bolts.arff"), self.datafile("bodyfat.arff")] cls = [ classifiers.Classifier(classname="weka.classifiers.rules.ZeroR"), classifiers.Classifier( classname="weka.classifiers.functions.LinearRegression") ] outfile = self.tempfile("results-rs.arff") exp = experiments.SimpleRandomSplitExperiment(classification=False, runs=10, percentage=66.6, preserve_order=False, datasets=datasets, classifiers=cls, result=outfile) self.assertIsNotNone(exp, msg="Failed to instantiate!") exp.setup() exp.run() # evaluate loader = converters.loader_for_file(outfile) data = loader.load_file(outfile) self.assertIsNotNone(data, msg="Failed to load data: " + outfile) matrix = experiments.ResultMatrix( classname="weka.experiment.ResultMatrixPlainText") self.assertIsNotNone(matrix, msg="Failed to instantiate!") tester = experiments.Tester( classname="weka.experiment.PairedCorrectedTTester") self.assertIsNotNone(tester, msg="Failed to instantiate!") tester.resultmatrix = matrix comparison_col = data.attribute_by_name( "Correlation_coefficient").index tester.instances = data self.assertGreater(len(tester.header(comparison_col)), 0, msg="Generated no header") self.assertGreater(len(tester.multi_resultset_full(0, comparison_col)), 0, msg="Generated no result")
def test_multisearch(self): """ Tests the MultiSearch class. NB: multisearch-weka-package must be installed (https://github.com/fracpete/multisearch-weka-package). """ ms = classifiers.MultiSearch() mparam = classes.MathParameter() mparam.prop = "classifier.kernel.gamma" mparam.minimum = -3.0 mparam.maximum = 3.0 mparam.step = 1.0 mparam.base = 10.0 mparam.expression = "pow(BASE,I)" lparam = classes.ListParameter() lparam.prop = "classifier.C" lparam.values = ["-2.0", "-1.0", "0.0", "1.0", "2.0"] ms.parameters = [mparam, lparam] self.assertEqual(str([mparam, lparam]), str(ms.parameters), msg="parameters differ") cls = classifiers.Classifier( classname="weka.classifiers.functions.SMOreg", options=[ "-K", "weka.classifiers.functions.supportVector.RBFKernel" ]) ms.classifier = cls self.assertEqual(cls.to_commandline(), ms.classifier.to_commandline(), msg="classifiers differ") cls = classifiers.Classifier( classname="weka.classifiers.functions.LinearRegression") ms.classifier = cls ms.evaluation = ms.tags_evaluation.find("RMSE") self.assertEqual("RMSE", str(ms.evaluation), "evaluation differs: " + str(ms.evaluation)) ms.evaluation = "ACC" self.assertEqual("ACC", str(ms.evaluation), "evaluation differs: " + str(ms.evaluation)) cls = classifiers.Classifier(classname="weka.classifiers.trees.J48") ms.classifier = cls
def test_instantiate_classifier(self): """ Tests the instantiation of several classifier classes. """ cname = "weka.classifiers.trees.J48" options = None cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) self.assertEqual(cname, cls.classname, msg="Classnames differ!") cname = "weka.classifiers.trees.J48" options = ["-C", "0.3"] cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) self.assertEqual(cname, cls.classname, msg="Classnames differ!") cname = "weka.classifiers.meta.FilteredClassifier" options = ["-W", "weka.classifiers.trees.J48", "--", "-C", "0.3"] cls = classifiers.SingleClassifierEnhancer(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) self.assertEqual(cname, cls.classname, msg="Classnames differ!") fname = "weka.filters.unsupervised.attribute.Remove" flter = filters.Filter(classname=fname, options=["-R", "last"]) cls.filter = flter self.assertEqual(fname, cls.filter.classname, msg="Classnames differ!") cls = classifiers.FilteredClassifier() self.assertIsNotNone(cls, msg="Failed to instantiate FilteredClassifier!") self.assertEqual("weka.classifiers.meta.FilteredClassifier", cls.classname, msg="Classnames differ!") cname = "weka.classifiers.functions.SMO" cls = classifiers.KernelClassifier(classname=cname) self.assertIsNotNone(cls, msg="Failed to instantiate KernelClassifier: " + cname) self.assertEqual(cname, cls.classname, msg="Classnames differ!") kname = "weka.classifiers.functions.supportVector.RBFKernel" kernel = classifiers.Kernel(classname=kname) self.assertIsNotNone(kernel, msg="Failed to instantiate Kernel: " + kname) cls.kernel = kernel self.assertEqual(kname, cls.kernel.classname, msg="Kernel classnames differ!") cname = "weka.classifiers.meta.Vote" cls = classifiers.MultipleClassifiersCombiner(classname=cname) self.assertIsNotNone(cls, msg="Failed to instantiate MultipleClassifiersCombiner: " + cname) self.assertEqual(cname, cls.classname, msg="Classnames differ!")
def test_crossvalidation_classification(self): """ Tests cross-validated classification. """ datasets = [self.datafile("iris.arff"), self.datafile("anneal.arff")] cls = [ classifiers.Classifier(classname="weka.classifiers.rules.ZeroR"), classifiers.Classifier(classname="weka.classifiers.trees.J48") ] outfile = self.tempfile("results-cv.arff") exp = experiments.SimpleCrossValidationExperiment(classification=True, runs=10, folds=10, datasets=datasets, classifiers=cls, result=outfile) self.assertIsNotNone(exp, msg="Failed to instantiate!") exp.setup() exp.run() # evaluate loader = converters.loader_for_file(outfile) data = loader.load_file(outfile) self.assertIsNotNone(data, msg="Failed to load data: " + outfile) matrix = experiments.ResultMatrix( classname="weka.experiment.ResultMatrixPlainText") self.assertIsNotNone(matrix, msg="Failed to instantiate!") tester = experiments.Tester( classname="weka.experiment.PairedCorrectedTTester") self.assertIsNotNone(tester, msg="Failed to instantiate!") tester.resultmatrix = matrix comparison_col = data.attribute_by_name("Percent_correct").index tester.instances = data self.assertGreater(len(tester.header(comparison_col)), 0, msg="Generated no header") self.assertGreater(len(tester.multi_resultset_full(0, comparison_col)), 0, msg="Generated no result")
def test_capabilities(self): """ Tests the capabilities. """ cname = "weka.classifiers.trees.J48" options = None cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) self.assertEqual(cname, cls.classname, msg="Classnames differ!") caps = cls.capabilities self.assertIsNotNone(caps, msg="Capabilities are None!")
def test_plot_classifier_errors(self): """ Tests the plot_classifier_errors method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") bolts_data = loader.load_file(self.datafile("bolts.arff")) self.assertIsNotNone(bolts_data) bolts_data.class_is_last() classifier = classifiers.Classifier(classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"]) evaluation = classifiers.Evaluation(bolts_data) evaluation.crossvalidate_model(classifier, bolts_data, 10, Random(42)) plot.plot_classifier_errors(evaluation.predictions, wait=False)
def test_plot_dot_graph(self): """ Tests the plot_dot_graph method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("diabetes.arff")) data.class_is_last() cls = classifiers.Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"]) cls.build_classifier(data) graph.plot_dot_graph(cls.graph)
def test_plot_prc(self): """ Tests the plot_prc method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("diabetes.arff")) data.class_is_last() remove = filters.Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1-3"]) cls = classifiers.Classifier(classname="weka.classifiers.bayes.NaiveBayes") fc = classifiers.FilteredClassifier() fc.filter = remove fc.classifier = cls evl = classifiers.Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) plot.plot_prc(evl, class_index=[0, 1], wait=False)
def test_get_prc(self): """ Tests the get_prc method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("diabetes.arff")) data.class_is_last() remove = filters.Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1-3"]) cls = classifiers.Classifier(classname="weka.classifiers.bayes.NaiveBayes") fc = classifiers.FilteredClassifier() fc.filter = remove fc.classifier = cls evl = classifiers.Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) data = plot.generate_thresholdcurve_data(evl, 0) area = plot.get_prc(data) self.assertAlmostEqual(0.892, area, places=3, msg="PRC differs")
def test_evaluate_forecaster(self): """ Tests evaluating a forecaster. """ self._ensure_package_is_installed() loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("airline.arff")) self.assertIsNotNone(data, msg="Data should not be none") data.class_is_last() forecaster = timeseries.WekaForecaster() forecaster.fields_to_forecast = ["passenger_numbers"] forecaster.base_forecaster = classifiers.Classifier( classname="weka.classifiers.functions.LinearRegression") forecaster.tslag_maker.timestamp_field = "Date" forecaster.tslag_maker.adjust_for_variance = False forecaster.tslag_maker.include_powers_of_time = True forecaster.tslag_maker.include_timelag_products = True forecaster.tslag_maker.remove_leading_instances_with_unknown_lag_values = False forecaster.tslag_maker.add_month_of_year = True forecaster.tslag_maker.add_quarter_of_year = True self.assertEqual( "LinearRegression -S 0 -R 1.0E-8 -num-decimal-places 4", str(forecaster.algorithm_name), msg="algorithm name") self.assertEqual( "weka.filters.supervised.attribute.TSLagMaker -F passenger_numbers -L 1 -M 12 -G Date -month -quarter", forecaster.tslag_maker.to_commandline(), msg="lag maker commandline") evaluation = timeseries.TSEvaluation(data, 0.0) evaluation.evaluate_on_training_data = False evaluation.evaluate_on_test_data = False evaluation.prime_window_size = forecaster.tslag_maker.max_lag evaluation.prime_for_test_data_with_test_data = True evaluation.rebuild_model_after_each_test_forecast_step = False evaluation.forecast_future = True evaluation.horizon = 20 evaluation.evaluation_modules = "MAE,RMSE" evaluation.evaluate(forecaster)
def getInstance(self, gameState): headers = "" headers = headers + "@relation prueba\n\n" headers = headers + "@attribute score5 NUMERIC\n" headers = headers + "@attribute score2 NUMERIC\n" headers = headers + "@attribute score NUMERIC\n" headers = headers + "@attribute ghost1-living {True, False}\n" headers = headers + "@attribute ghost2-living {True, False}\n" headers = headers + "@attribute ghost3-living {True, False}\n" headers = headers + "@attribute ghost4-living {True, False}\n" headers = headers + "@attribute distance-ghost1 NUMERIC \n" headers = headers + "@attribute distance-ghost2 NUMERIC \n" headers = headers + "@attribute distance-ghost3 NUMERIC \n" headers = headers + "@attribute distance-ghost4 NUMERIC \n" headers = headers + "@attribute posX NUMERIC\n" headers = headers + "@attribute posY NUMERIC\n" headers = headers + "@attribute direction {North, South, East, West, Stop}\n" headers = headers + "@attribute wall-east {True, False}\n" headers = headers + "@attribute wall-south {True, False}\n" headers = headers + "@attribute wall-west {True, False}\n" headers = headers + "@attribute wall-north {True, False}\n" headers = headers + "@attribute move {North, South, East, West, Stop}\n\n" headers = headers + "@data\n\n\n" objects = serialization.read_all("data/out.model") cls = [ classifiers.Classifier("weka.classifiers.trees.REPTree"), classifiers.Classifier( "weka.classifiers.functions.LinearRegression"), classifiers.Classifier("weka.classifiers.functions.SMOreg"), ] cls = Classifier() file = open('data/instances.arff', 'w+') file.write(headers) line = "" for i in gameState.livingGhosts[ 1:]: #discard the first value, as it is PacMan line = line + str(i) + "," for i in gameState.data.ghostDistances: if i is None: line = line + "0" + "," else: line = line + str(i) + "," line = line + str(gameState.data.agentStates[0].getPosition()[0]) + "," +\ str(gameState.data.agentStates[0].getPosition()[1])+ "," +\ str(gameState.data.agentStates[0].getDirection()) + "," +\ str(gameState.hasWall(gameState.getPacmanPosition()[0] - 1, gameState.getPacmanPosition()[1])) + "," +\ str(gameState.hasWall(gameState.getPacmanPosition()[0], gameState.getPacmanPosition()[1] - 1)) + "," +\ str(gameState.hasWall(gameState.getPacmanPosition()[0] + 1, gameState.getPacmanPosition()[1])) + "," +\ str(gameState.hasWall(gameState.getPacmanPosition()[0], gameState.getPacmanPosition()[1] + 1)) + ",?" line = str(int(BustersAgent.getScore5(self, gameState))) + ","+\ str(int(BustersAgent.getScore2(self, gameState))) + "," +\ str(gameState.data.score) + "," + line file.write(line) file.close() loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file("data/instances.arff") data.class_is_last() # set class attribute for index, inst in enumerate(data): pred = cls.classify_instance(inst) return pred
def test_result_matrix(self): """ Tests the ResultMatrix class. """ datasets = [self.datafile("iris.arff"), self.datafile("anneal.arff")] cls = [ classifiers.Classifier(classname="weka.classifiers.rules.ZeroR"), classifiers.Classifier(classname="weka.classifiers.trees.J48") ] outfile = self.tempfile("results-cv.arff") exp = experiments.SimpleCrossValidationExperiment(classification=True, runs=10, folds=10, datasets=datasets, classifiers=cls, result=outfile) self.assertIsNotNone(exp, msg="Failed to instantiate!") exp.setup() exp.run() # evaluate loader = converters.loader_for_file(outfile) data = loader.load_file(outfile) self.assertIsNotNone(data, msg="Failed to load data: " + outfile) matrix = experiments.ResultMatrix( classname="weka.experiment.ResultMatrixPlainText") self.assertIsNotNone(matrix, msg="Failed to instantiate!") tester = experiments.Tester( classname="weka.experiment.PairedCorrectedTTester") self.assertIsNotNone(tester, msg="Failed to instantiate!") tester.resultmatrix = matrix comparison_col = data.attribute_by_name("Percent_correct").index tester.instances = data self.assertGreater(len(tester.header(comparison_col)), 0, msg="Generated no header") self.assertGreater(len(tester.multi_resultset_full(0, comparison_col)), 0, msg="Generated no result") # dimensions self.assertEqual(2, matrix.rows, msg="# of rows differ") self.assertEqual(2, matrix.columns, msg="# of rows differ") # cols self.assertTrue(matrix.get_col_name(0).find("ZeroR") > -1, msg="ZeroR should be part of col name") self.assertTrue(matrix.get_col_name(1).find("J48") > -1, msg="J48 should be part of col name") self.assertIsNone(matrix.get_col_name(2), msg="Column name should not exist") matrix.set_col_name(0, "zeror") self.assertTrue(matrix.get_col_name(0).find("zeror") > -1, msg="zeror should be part of col name") self.assertFalse(matrix.is_col_hidden(1), msg="Column should be visible") matrix.hide_col(1) self.assertTrue(matrix.is_col_hidden(1), msg="Column should be hidden") matrix.show_col(1) self.assertFalse(matrix.is_col_hidden(1), msg="Column should be visible again") # rows self.assertEqual("iris", matrix.get_row_name(0), msg="Row names differ") self.assertEqual("anneal", matrix.get_row_name(1), msg="Row names differ") self.assertIsNone(matrix.get_col_name(2), msg="Row name should not exist") matrix.set_row_name(0, "IRIS") self.assertEqual("IRIS", matrix.get_row_name(0), msg="Row names differ") self.assertFalse(matrix.is_row_hidden(1), msg="Row should be visible") matrix.hide_row(1) self.assertTrue(matrix.is_row_hidden(1), msg="Row should be hidden") matrix.show_row(1) self.assertFalse(matrix.is_row_hidden(1), msg="Row should be visible again") # mean self.assertAlmostEqual(33.3, matrix.get_mean(0, 0), places=1, msg="Means differ") self.assertAlmostEqual(54.75, matrix.average(0), places=2, msg="Averages differ") matrix.set_mean(0, 0, 10) self.assertAlmostEqual(10.0, matrix.get_mean(0, 0), places=1, msg="Means differ") # stdev self.assertAlmostEqual(0.0, matrix.get_stdev(0, 0), places=1, msg="Means differ") matrix.set_stdev(0, 0, 0.3) self.assertAlmostEqual(0.3, matrix.get_stdev(0, 0), places=1, msg="Means differ")
def test_capabilities(self): """Tests the Capabilities class. """ caps = capabilities.Capabilities() self.assertIsNotNone(caps, msg="Failed to instantiate empty capabilities") self.assertEqual(0, len(caps.capabilities()), msg="Should have no capabilities") cls = classifiers.Classifier(classname="weka.classifiers.trees.J48") caps = capabilities.Capabilities(owner=cls) self.assertIsNotNone(caps, msg="Failed to instantiate empty capabilities") self.assertEqual(0, len(caps.capabilities()), msg="Should have no capabilities") self.assertIsNotNone(caps.owner, msg="Should have an owner") caps.owner = None self.assertIsNone(caps.owner, msg="Should have no owner") caps.owner = cls self.assertIsNotNone(caps.owner, msg="Should have an owner") cap = capabilities.Capability(member="NUMERIC_CLASS") caps.enable(cap) self.assertTrue(caps.handles(cap), "Should have capability: " + str(cap)) self.assertFalse(caps.has_dependency(cap), "Should have no dependency: " + str(cap)) self.assertEqual(0, len(caps.dependencies()), msg="Should have no dependencies") caps.enable_dependency(cap) self.assertTrue(caps.has_dependency(cap), "Should have dependency: " + str(cap)) self.assertEqual(1, len(caps.dependencies()), msg="Should have one dependency") caps.disable_all() self.assertEqual(0, len(caps.capabilities()), msg="Should have no capabilities") self.assertEqual(0, len(caps.dependencies()), msg="Should have no dependencies") caps.enable_all() self.assertGreater(len(caps.capabilities()), 0, msg="Should have capabilities") self.assertGreater(len(caps.dependencies()), 0, msg="Should have dependencies") caps.disable_all() caps.enable_all_attribute_dependencies() self.assertEqual(0, len(caps.capabilities()), msg="Should have no capabilities") self.assertGreater(len(caps.dependencies()), 0, msg="Should have dependencies") caps.disable_all() caps.enable_all_attributes() self.assertGreater(len(caps.capabilities()), 0, msg="Should have capabilities") self.assertEqual(0, len(caps.dependencies()), msg="Should have no dependencies") caps.disable_all() caps.enable_all_class_dependencies() self.assertEqual(0, len(caps.capabilities()), msg="Should have no capabilities") self.assertGreater(len(caps.dependencies()), 0, msg="Should have dependencies") caps.disable_all() caps.enable_all_classes() self.assertGreater(len(caps.capabilities()), 0, msg="Should have capabilities") self.assertEqual(0, len(caps.dependencies()), msg="Should have no dependencies")
def test_evaluation(self): """ Tests the Evaluation class. """ # 1. nominal loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.classifiers.trees.J48" cls = classifiers.Classifier(classname=cname) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname) evl = classifiers.Evaluation(data) self.assertIsNotNone(evl, msg="Failed to instantiate Evaluation") self.assertIsNone(evl.header.equal_headers(data)) self.assertEqual([9, 100, 685, 1, 68, 41], evl.class_priors.tolist()) # cross-validate evl.crossvalidate_model(cls, data, 10, classes.Random(1)) self.assertEqual([[5.0, 0.0, 3.0, 0.0, 0.0, 0.0], [0.0, 99.0, 0.0, 0.0, 0.0, 0.0], [0.0, 2.0, 680.0, 0.0, 0.0, 2.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 67.0, 0.0], [0.0, 0.0, 7.0, 0.0, 0.0, 33.0]], evl.confusion_matrix.tolist(), msg="confusion matrix differs") self.assertAlmostEqual(0.749, evl.area_under_prc(0), places=3, msg="area_under_prc(0) differs") self.assertAlmostEqual(0.931, evl.area_under_roc(0), places=3, msg="area_under_roc(0) differs") self.assertAlmostEqual(0, evl.avg_cost, places=3, msg="avg_cost differs") self.assertEqual(884, evl.correct, msg="correct differs") self.assertAlmostEqual(98.775, evl.coverage_of_test_cases_by_predicted_regions, places=3, msg="coverage_of_test_cases_by_predicted_regions differs") self.assertAlmostEqual(0.016, evl.error_rate, places=3, msg="error_rate differs") self.assertAlmostEqual(0.769, evl.f_measure(0), places=3, msg="f_measure(0) differs") self.assertAlmostEqual(0.375, evl.false_negative_rate(0), places=3, msg="false_negative_rate(0) differs") self.assertAlmostEqual(0.0, evl.false_positive_rate(0), places=3, msg="false_positive_rate(0) differs") self.assertEqual(14, evl.incorrect, msg="incorrect differs") self.assertAlmostEqual(0.961, evl.kappa, places=3, msg="kappa differs") self.assertAlmostEqual(1017.196, evl.kb_information, places=3, msg="kb_information differs") self.assertAlmostEqual(1.133, evl.kb_mean_information, places=3, msg="kb_mean_information differs") self.assertAlmostEqual(95.017, evl.kb_relative_information, places=3, msg="kb_relative_information differs") self.assertAlmostEqual(0.789, evl.matthews_correlation_coefficient(0), places=3, msg="matthews_correlation_coefficient(0) differs") self.assertAlmostEqual(0.006, evl.mean_absolute_error, places=3, msg="mean_absolute_error differs") self.assertAlmostEqual(0.134, evl.mean_prior_absolute_error, places=3, msg="mean_prior_absolute_error differs") self.assertAlmostEqual(3.0, evl.num_false_negatives(0), places=3, msg="num_false_negatives(0) differs") self.assertAlmostEqual(0.0, evl.num_false_positives(0), places=3, msg="num_false_positives(0) differs") self.assertAlmostEqual(898.0, evl.num_instances, places=3, msg="num_instances differs") self.assertAlmostEqual(890.0, evl.num_true_negatives(0), places=3, msg="num_true_negatives(0) differs") self.assertAlmostEqual(5.0, evl.num_true_positives(0), places=3, msg="num_true_positives(0) differs") self.assertAlmostEqual(98.44, evl.percent_correct, places=2, msg="percent_correct differs") self.assertAlmostEqual(1.56, evl.percent_incorrect, places=2, msg="percent_incorrect differs") self.assertAlmostEqual(0, evl.percent_unclassified, places=2, msg="percent_unclassified differs") self.assertAlmostEqual(1.0, evl.precision(0), places=3, msg="precision(0) differs") self.assertAlmostEqual(0.625, evl.recall(0), places=3, msg="recall(0) differs") self.assertAlmostEqual(4.187, evl.relative_absolute_error, places=3, msg="relative_absolute_error differs") self.assertAlmostEqual(0.258, evl.root_mean_prior_squared_error, places=3, msg="root_mean_prior_squared_error differs") self.assertAlmostEqual(0.067, evl.root_mean_squared_error, places=3, msg="root_mean_squared_error differs") self.assertAlmostEqual(25.912, evl.root_relative_squared_error, places=3, msg="root_relative_squared_error differs") self.assertAlmostEqual(-75.044, evl.sf_entropy_gain, places=3, msg="sf_entropy_gain differs") self.assertAlmostEqual(-0.084, evl.sf_mean_entropy_gain, places=3, msg="sf_mean_entropy_gain differs") self.assertAlmostEqual(1.192, evl.sf_mean_prior_entropy, places=3, msg="sf_mean_prior_entropy differs") self.assertAlmostEqual(1.276, evl.sf_mean_scheme_entropy, places=3, msg="sf_mean_scheme_entropy differs") self.assertAlmostEqual(16.722, evl.size_of_predicted_regions, places=3, msg="size_of_predicted_regions differs") self.assertEqual(0, evl.unclassified, msg="unclassified differs") self.assertAlmostEqual(0.0, evl.total_cost, places=3, msg="total_cost differs") self.assertAlmostEqual(0.926, evl.unweighted_macro_f_measure, places=3, msg="unweighted_macro_f_measure differs") self.assertAlmostEqual(0.984, evl.unweighted_micro_f_measure, places=3, msg="unweighted_micro_f_measure differs") self.assertAlmostEqual(0.992, evl.weighted_area_under_prc, places=3, msg="weighted_area_under_prc differs") self.assertAlmostEqual(0.995, evl.weighted_area_under_roc, places=3, msg="weighted_area_under_roc differs") self.assertAlmostEqual(0.984, evl.weighted_f_measure, places=3, msg="weighted_f_measure differs") self.assertAlmostEqual(0.016, evl.weighted_false_negative_rate, places=3, msg="weighted_false_negative_rate differs") self.assertAlmostEqual(0.036, evl.weighted_false_positive_rate, places=3, msg="weighted_false_positive_rate differs") self.assertAlmostEqual(0.959, evl.weighted_matthews_correlation, places=3, msg="weighted_matthews_correlation differs") self.assertAlmostEqual(0.984, evl.weighted_precision, places=3, msg="weighted_precision differs") self.assertAlmostEqual(0.984, evl.weighted_recall, places=3, msg="weighted_recall differs") self.assertAlmostEqual(0.964, evl.weighted_true_negative_rate, places=3, msg="weighted_true_negative_rate differs") self.assertAlmostEqual(0.984, evl.weighted_true_positive_rate, places=3, msg="weighted_true_positive_rate differs") self.assertIsNotNone(evl.predictions, msg="Should have had predictions") self.assertEqual(data.num_instances, len(evl.predictions), msg="Number of predictions differ from training set size") # train/test evl = classifiers.Evaluation(data) self.assertIsNotNone(evl, msg="Failed to instantiate Evaluation") cname = "weka.classifiers.trees.J48" cls = classifiers.Classifier(classname=cname) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname) cls.build_classifier(data) evl.test_model(cls, data) self.assertAlmostEqual(99.777, evl.percent_correct, places=3, msg="percent_correct differs") # predictions: no cname = "weka.classifiers.trees.J48" cls = classifiers.Classifier(classname=cname) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname) evl = classifiers.Evaluation(data) evl.discard_predictions = True self.assertIsNotNone(evl, msg="Failed to instantiate Evaluation") evl.crossvalidate_model(cls, data, 10, classes.Random(1)) self.assertIsNone(evl.predictions, msg="Should have had no predictions") # output cname = "weka.classifiers.trees.J48" cls = classifiers.Classifier(classname=cname) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname) evl = classifiers.Evaluation(data) cname = "weka.classifiers.evaluation.output.prediction.PlainText" pout = classifiers.PredictionOutput(classname=cname) self.assertEqual(cname, pout.classname, msg="Output classnames differ!") self.assertIsNotNone(evl, msg="Failed to instantiate Evaluation") evl.crossvalidate_model(cls, data, 10, classes.Random(1), output=pout) self.assertGreater(len(str(pout)), 0, msg="Should have generated output") # 2. numeric loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("bolts.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.classifiers.functions.LinearRegression" cls = classifiers.Classifier(classname=cname) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname ) evl = classifiers.Evaluation(data) self.assertIsNotNone(evl, msg="Failed to instantiate Evaluation") self.assertIsNone(evl.header.equal_headers(data)) # cross-validate evl.crossvalidate_model(cls, data, 10, classes.Random(1)) self.assertAlmostEqual(0.919, evl.correlation_coefficient, places=3, msg="correlation_coefficient differs") self.assertAlmostEqual(10.697, evl.error_rate, places=3, msg="error_rate differs")