Beispiel #1
0
    def test_build_classifier(self):
        """
        Tests the build_classifier method.
        """
        # 1. nominal
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.trees.J48"
        options = ["-C", "0.3"]
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options))

        cls.build_classifier(data)

        # 2. numeric
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("bolts.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.functions.LinearRegression"
        options = ["-R", "0.1"]
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options))

        cls.build_classifier(data)
Beispiel #2
0
    def test_gridsearch(self):
        """
        Tests the GridSearch class.
        """
        # make sure the package is installed
        install_missing_package("gridSearch", stop_jvm_and_exit=True)

        gs = classifiers.GridSearch()

        self.assertEqual({"property": "C", "expression": "pow(BASE,I)", "min": -3.0, "max": 3.0, "step": 1.0, "base": 10.0}, gs.x, msg="x of grid differs")
        x = gs.x
        x["min"] = -1.0
        x["max"] = 2.0
        gs.x = x
        self.assertEqual({"property": "C", "expression": "pow(BASE,I)", "min": -1.0, "max": 2.0, "step": 1.0, "base": 10.0}, gs.x, msg="x of grid differs")

        self.assertEqual({"property": "kernel.gamma", "expression": "pow(BASE,I)", "min": -3.0, "max": 3.0, "step": 1.0, "base": 10.0}, gs.y, msg="y of grid differs")
        y = gs.y
        y["min"] = -1.0
        y["max"] = 2.0
        gs.y = y
        self.assertEqual({"property": "kernel.gamma", "expression": "pow(BASE,I)", "min": -1.0, "max": 2.0, "step": 1.0, "base": 10.0}, gs.y, msg="y of grid differs")

        cls = classifiers.Classifier(classname="weka.classifiers.functions.LinearRegression")
        gs.classifier = cls
        gs.evaluation = gs.tags_evaluation.find("RMSE")
        self.assertEqual("RMSE", str(gs.evaluation), "evaluation differs: " + str(gs.evaluation))

        gs.evaluation = "ACC"
        self.assertEqual("ACC", str(gs.evaluation), "evaluation differs: " + str(gs.evaluation))
        cls = classifiers.Classifier(classname="weka.classifiers.trees.J48")
        gs.classifier = cls
Beispiel #3
0
    def test_classify_instance(self):
        """
        Tests the classify_instance method.
        """
        # 1. nominal
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.trees.J48"
        options = ["-C", "0.3"]
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls,
                             msg="Failed to instantiate: " + cname + "/" +
                             str(options))

        cls.build_classifier(data)
        preds = []
        for i in range(10, 20):
            pred = cls.classify_instance(data.get_instance(i))
            self.assertIsNotNone(pred)
            preds.append(pred)
        self.assertEqual([2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 5.0, 5.0, 2.0, 2.0],
                         preds,
                         msg="Classifications differ")

        # 2. numeric
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("bolts.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.functions.LinearRegression"
        options = ["-R", "0.1"]
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls,
                             msg="Failed to instantiate: " + cname + "/" +
                             str(options))

        cls.build_classifier(data)
        preds = []
        for i in range(10):
            pred = cls.classify_instance(data.get_instance(i))
            self.assertIsNotNone(pred)
            preds.append(pred)
        expected = [
            24.313, 33.359, 28.569, 26.365, 32.680, 29.149, 26.998, 22.971,
            13.160, 7.394
        ]
        for i in range(len(preds)):
            self.assertAlmostEqual(expected[i],
                                   preds[i],
                                   places=3,
                                   msg="Classifications differ")
Beispiel #4
0
    def test_learning_curve(self):
        """
        Tests the learning_curve method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("diabetes.arff"))
        data.class_is_last()

        cls = [
            classifiers.Classifier(classname="weka.classifiers.trees.J48"),
            classifiers.Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")]
        plot.plot_learning_curve(cls, data, wait=False)
        plot.plot_learning_curve(cls, data, increments=0.1, wait=False)
    def test_plot_experiment(self):
        """
        Tests the plot_experiment method.
        """
        datasets = [
            self.datafile("bolts.arff"),
            self.datafile("bodyfat.arff"),
            self.datafile("autoPrice.arff")
        ]
        cls = [
            classifiers.Classifier("weka.classifiers.trees.REPTree"),
            classifiers.Classifier(
                "weka.classifiers.functions.LinearRegression"),
            classifiers.Classifier("weka.classifiers.functions.SMOreg"),
        ]
        outfile = self.tempfile("results-rs.arff")
        exp = experiments.SimpleRandomSplitExperiment(classification=False,
                                                      runs=10,
                                                      percentage=66.6,
                                                      preserve_order=False,
                                                      datasets=datasets,
                                                      classifiers=cls,
                                                      result=outfile)
        exp.setup()
        exp.run()

        # evaluate
        loader = converters.loader_for_file(outfile)
        data = loader.load_file(outfile)
        matrix = experiments.ResultMatrix(
            "weka.experiment.ResultMatrixPlainText")
        tester = experiments.Tester("weka.experiment.PairedCorrectedTTester")
        tester.resultmatrix = matrix
        comparison_col = data.attribute_by_name(
            "Correlation_coefficient").index
        tester.instances = data
        tester.header(comparison_col)
        tester.multi_resultset_full(0, comparison_col)

        # plot
        plot.plot_experiment(matrix,
                             title="Random split (w/ StdDev)",
                             measure="Correlation coefficient",
                             show_stdev=True,
                             wait=False)
        plot.plot_experiment(matrix,
                             title="Random split",
                             measure="Correlation coefficient",
                             wait=False)
Beispiel #6
0
    def test_distribution_for_instance(self):
        """
        Tests the distribution_for_instance method.
        """
        # 1. nominal
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.trees.J48"
        options = ["-C", "0.3"]
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls,
                             msg="Failed to instantiate: " + cname + "/" +
                             str(options))

        cls.build_classifier(data)
        for i in range(10):
            dist = cls.distribution_for_instance(data.get_instance(i))
            self.assertIsNotNone(dist)
            self.assertEqual(6,
                             len(dist),
                             msg="Number of classes in prediction differ!")

        # 2. numeric
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("bolts.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.functions.LinearRegression"
        options = ["-R", "0.1"]
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls,
                             msg="Failed to instantiate: " + cname + "/" +
                             str(options))

        cls.build_classifier(data)
        for i in range(10):
            dist = cls.distribution_for_instance(data.get_instance(i))
            self.assertIsNotNone(dist)
            self.assertEqual(
                1,
                len(dist),
                msg=
                "Number of classes in prediction should be one for numeric classifier!"
            )
Beispiel #7
0
    def test_build_and_use_forecaster(self):
        """
        Tests building and using of a forecaster.
        """
        self._ensure_package_is_installed()

        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("airline.arff"))
        self.assertIsNotNone(data, msg="Data should not be none")
        data.class_is_last()

        airline_train, airline_test = data.train_test_split(90.0)
        forecaster = timeseries.WekaForecaster()
        self.assertIsNotNone(forecaster)
        forecaster.fields_to_forecast = ["passenger_numbers"]
        forecaster.base_forecaster = classifiers.Classifier(
            classname="weka.classifiers.functions.LinearRegression")
        forecaster.fields_to_forecast = "passenger_numbers"
        forecaster.build_forecaster(airline_train)
        num_prime_instances = 12
        airline_prime = dataset.Instances.copy_instances(
            airline_train, airline_train.num_instances - num_prime_instances,
            num_prime_instances)
        forecaster.prime_forecaster(airline_prime)
        num_future_forecasts = airline_test.num_instances
        preds = forecaster.forecast(num_future_forecasts)
        self.assertIsNotNone(preds, msg="Predictions should not be none")
        self.assertEqual(len(preds),
                         airline_test.num_instances,
                         msg="# of predictions should equal prime window size")
    def test_generate_thresholdcurve_data(self):
        """
        Tests the generate_thresholdcurve_data method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("diabetes.arff"))
        data.class_is_last()

        remove = filters.Filter(
            classname="weka.filters.unsupervised.attribute.Remove",
            options=["-R", "1-3"])
        cls = classifiers.Classifier(
            classname="weka.classifiers.bayes.NaiveBayes")
        fc = classifiers.FilteredClassifier()
        fc.filter = remove
        fc.classifier = cls

        evl = classifiers.Evaluation(data)
        evl.crossvalidate_model(cls, data, 10, Random(1))
        data = plot.generate_thresholdcurve_data(evl, 0)
        self.assertEqual(13,
                         data.num_attributes,
                         msg="number of attributes differs")
        self.assertEqual(769, data.num_instances, msg="number of rows differs")
        attname = "True Positives"
        self.assertIsNotNone(data.attribute_by_name(attname),
                             msg="Failed to locate attribute: " + attname)
        attname = "False Positive Rate"
        self.assertIsNotNone(data.attribute_by_name(attname),
                             msg="Failed to locate attribute: " + attname)
        attname = "Lift"
        self.assertIsNotNone(data.attribute_by_name(attname),
                             msg="Failed to locate attribute: " + attname)
Beispiel #9
0
    def test_batchpredictor(self):
        """
        Tests the batch predictor methods.
        """

        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.trees.J48"
        options = []
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options))

        # batch predictor?
        self.assertTrue(cls.is_batchpredictor, msg="not a batch predictor: " + cname + "/" + str(options))

        # more efficient implementation?
        cls.has_efficient_batch_prediction()

        # batch size
        self.assertIsNotNone(cls.batch_size, msg="batch size is not initialized")

        # distributions_for_instances
        cls.build_classifier(data)
        dists = cls.distributions_for_instances(data)
        self.assertIsNotNone(dists, msg="no distributions generated")
        self.assertEqual(len(dists), len(data), msg="number of predictions differ")
        self.assertEqual(len(dists[0]), data.class_attribute.num_values, msg="size of distribution array does not match number of classes")
 def test_json(self):
     """
     Tests the json methods.
     """
     actor = control.Flow()
     actor.name = "blah"
     actor.config["annotation"] = "Some annotation text"
     actor.actors.append(source.Start())
     tee = control.Tee()
     actor.actors.append(tee)
     tee.actors.append(sink.Console())
     trigger = control.Trigger()
     actor.actors.append(trigger)
     files = source.FileSupplier()
     files.config["files"] = ["file1.arff", "file2.arff"]
     trigger.actors.append(files)
     loader = transformer.LoadDataset()
     trigger.actors.append(loader)
     select = transformer.ClassSelector()
     trigger.actors.append(select)
     train = transformer.Train()
     train.config["setup"] = classifiers.Classifier(
         classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
     trigger.actors.append(train)
     json = actor.to_json()
     flow2 = control.Flow.from_json(json)
     json2 = flow2.to_json()
     self.assertEqual(json, json2, msg="JSON representations differ")
Beispiel #11
0
    def test_randomsplit_regression(self):
        """
        Tests random split on regression.
        """
        datasets = [self.datafile("bolts.arff"), self.datafile("bodyfat.arff")]
        cls = [
            classifiers.Classifier(classname="weka.classifiers.rules.ZeroR"),
            classifiers.Classifier(
                classname="weka.classifiers.functions.LinearRegression")
        ]
        outfile = self.tempfile("results-rs.arff")
        exp = experiments.SimpleRandomSplitExperiment(classification=False,
                                                      runs=10,
                                                      percentage=66.6,
                                                      preserve_order=False,
                                                      datasets=datasets,
                                                      classifiers=cls,
                                                      result=outfile)
        self.assertIsNotNone(exp, msg="Failed to instantiate!")
        exp.setup()
        exp.run()

        # evaluate
        loader = converters.loader_for_file(outfile)
        data = loader.load_file(outfile)
        self.assertIsNotNone(data, msg="Failed to load data: " + outfile)

        matrix = experiments.ResultMatrix(
            classname="weka.experiment.ResultMatrixPlainText")
        self.assertIsNotNone(matrix, msg="Failed to instantiate!")

        tester = experiments.Tester(
            classname="weka.experiment.PairedCorrectedTTester")
        self.assertIsNotNone(tester, msg="Failed to instantiate!")

        tester.resultmatrix = matrix
        comparison_col = data.attribute_by_name(
            "Correlation_coefficient").index
        tester.instances = data
        self.assertGreater(len(tester.header(comparison_col)),
                           0,
                           msg="Generated no header")
        self.assertGreater(len(tester.multi_resultset_full(0, comparison_col)),
                           0,
                           msg="Generated no result")
Beispiel #12
0
    def test_multisearch(self):
        """
        Tests the MultiSearch class.
        NB: multisearch-weka-package must be installed (https://github.com/fracpete/multisearch-weka-package).
        """
        ms = classifiers.MultiSearch()

        mparam = classes.MathParameter()
        mparam.prop = "classifier.kernel.gamma"
        mparam.minimum = -3.0
        mparam.maximum = 3.0
        mparam.step = 1.0
        mparam.base = 10.0
        mparam.expression = "pow(BASE,I)"
        lparam = classes.ListParameter()
        lparam.prop = "classifier.C"
        lparam.values = ["-2.0", "-1.0", "0.0", "1.0", "2.0"]
        ms.parameters = [mparam, lparam]
        self.assertEqual(str([mparam, lparam]),
                         str(ms.parameters),
                         msg="parameters differ")

        cls = classifiers.Classifier(
            classname="weka.classifiers.functions.SMOreg",
            options=[
                "-K", "weka.classifiers.functions.supportVector.RBFKernel"
            ])
        ms.classifier = cls
        self.assertEqual(cls.to_commandline(),
                         ms.classifier.to_commandline(),
                         msg="classifiers differ")

        cls = classifiers.Classifier(
            classname="weka.classifiers.functions.LinearRegression")
        ms.classifier = cls
        ms.evaluation = ms.tags_evaluation.find("RMSE")
        self.assertEqual("RMSE", str(ms.evaluation),
                         "evaluation differs: " + str(ms.evaluation))

        ms.evaluation = "ACC"
        self.assertEqual("ACC", str(ms.evaluation),
                         "evaluation differs: " + str(ms.evaluation))
        cls = classifiers.Classifier(classname="weka.classifiers.trees.J48")
        ms.classifier = cls
Beispiel #13
0
    def test_instantiate_classifier(self):
        """
        Tests the instantiation of several classifier classes.
        """
        cname = "weka.classifiers.trees.J48"
        options = None
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options))
        self.assertEqual(cname, cls.classname, msg="Classnames differ!")

        cname = "weka.classifiers.trees.J48"
        options = ["-C", "0.3"]
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options))
        self.assertEqual(cname, cls.classname, msg="Classnames differ!")

        cname = "weka.classifiers.meta.FilteredClassifier"
        options = ["-W", "weka.classifiers.trees.J48", "--", "-C", "0.3"]
        cls = classifiers.SingleClassifierEnhancer(classname=cname, options=options)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options))
        self.assertEqual(cname, cls.classname, msg="Classnames differ!")
        fname = "weka.filters.unsupervised.attribute.Remove"
        flter = filters.Filter(classname=fname, options=["-R", "last"])
        cls.filter = flter
        self.assertEqual(fname, cls.filter.classname, msg="Classnames differ!")

        cls = classifiers.FilteredClassifier()
        self.assertIsNotNone(cls, msg="Failed to instantiate FilteredClassifier!")
        self.assertEqual("weka.classifiers.meta.FilteredClassifier", cls.classname, msg="Classnames differ!")

        cname = "weka.classifiers.functions.SMO"
        cls = classifiers.KernelClassifier(classname=cname)
        self.assertIsNotNone(cls, msg="Failed to instantiate KernelClassifier: " + cname)
        self.assertEqual(cname, cls.classname, msg="Classnames differ!")
        kname = "weka.classifiers.functions.supportVector.RBFKernel"
        kernel = classifiers.Kernel(classname=kname)
        self.assertIsNotNone(kernel, msg="Failed to instantiate Kernel: " + kname)
        cls.kernel = kernel
        self.assertEqual(kname, cls.kernel.classname, msg="Kernel classnames differ!")

        cname = "weka.classifiers.meta.Vote"
        cls = classifiers.MultipleClassifiersCombiner(classname=cname)
        self.assertIsNotNone(cls, msg="Failed to instantiate MultipleClassifiersCombiner: " + cname)
        self.assertEqual(cname, cls.classname, msg="Classnames differ!")
Beispiel #14
0
    def test_crossvalidation_classification(self):
        """
        Tests cross-validated classification.
        """
        datasets = [self.datafile("iris.arff"), self.datafile("anneal.arff")]
        cls = [
            classifiers.Classifier(classname="weka.classifiers.rules.ZeroR"),
            classifiers.Classifier(classname="weka.classifiers.trees.J48")
        ]
        outfile = self.tempfile("results-cv.arff")
        exp = experiments.SimpleCrossValidationExperiment(classification=True,
                                                          runs=10,
                                                          folds=10,
                                                          datasets=datasets,
                                                          classifiers=cls,
                                                          result=outfile)
        self.assertIsNotNone(exp, msg="Failed to instantiate!")
        exp.setup()
        exp.run()

        # evaluate
        loader = converters.loader_for_file(outfile)
        data = loader.load_file(outfile)
        self.assertIsNotNone(data, msg="Failed to load data: " + outfile)

        matrix = experiments.ResultMatrix(
            classname="weka.experiment.ResultMatrixPlainText")
        self.assertIsNotNone(matrix, msg="Failed to instantiate!")

        tester = experiments.Tester(
            classname="weka.experiment.PairedCorrectedTTester")
        self.assertIsNotNone(tester, msg="Failed to instantiate!")

        tester.resultmatrix = matrix
        comparison_col = data.attribute_by_name("Percent_correct").index
        tester.instances = data
        self.assertGreater(len(tester.header(comparison_col)),
                           0,
                           msg="Generated no header")
        self.assertGreater(len(tester.multi_resultset_full(0, comparison_col)),
                           0,
                           msg="Generated no result")
Beispiel #15
0
    def test_capabilities(self):
        """
        Tests the capabilities.
        """
        cname = "weka.classifiers.trees.J48"
        options = None
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options))
        self.assertEqual(cname, cls.classname, msg="Classnames differ!")

        caps = cls.capabilities
        self.assertIsNotNone(caps, msg="Capabilities are None!")
Beispiel #16
0
    def test_plot_classifier_errors(self):
        """
        Tests the plot_classifier_errors method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        bolts_data = loader.load_file(self.datafile("bolts.arff"))
        self.assertIsNotNone(bolts_data)
        bolts_data.class_is_last()

        classifier = classifiers.Classifier(classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"])
        evaluation = classifiers.Evaluation(bolts_data)
        evaluation.crossvalidate_model(classifier, bolts_data, 10, Random(42))
        plot.plot_classifier_errors(evaluation.predictions, wait=False)
Beispiel #17
0
    def test_plot_dot_graph(self):
        """
        Tests the plot_dot_graph method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("diabetes.arff"))
        data.class_is_last()

        cls = classifiers.Classifier(classname="weka.classifiers.trees.J48",
                                     options=["-C", "0.3"])
        cls.build_classifier(data)

        graph.plot_dot_graph(cls.graph)
Beispiel #18
0
    def test_plot_prc(self):
        """
        Tests the plot_prc method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("diabetes.arff"))
        data.class_is_last()

        remove = filters.Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1-3"])
        cls = classifiers.Classifier(classname="weka.classifiers.bayes.NaiveBayes")
        fc = classifiers.FilteredClassifier()
        fc.filter = remove
        fc.classifier = cls

        evl = classifiers.Evaluation(data)
        evl.crossvalidate_model(cls, data, 10, Random(1))
        plot.plot_prc(evl, class_index=[0, 1], wait=False)
Beispiel #19
0
    def test_get_prc(self):
        """
        Tests the get_prc method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("diabetes.arff"))
        data.class_is_last()

        remove = filters.Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1-3"])
        cls = classifiers.Classifier(classname="weka.classifiers.bayes.NaiveBayes")
        fc = classifiers.FilteredClassifier()
        fc.filter = remove
        fc.classifier = cls

        evl = classifiers.Evaluation(data)
        evl.crossvalidate_model(cls, data, 10, Random(1))
        data = plot.generate_thresholdcurve_data(evl, 0)
        area = plot.get_prc(data)
        self.assertAlmostEqual(0.892, area, places=3, msg="PRC differs")
Beispiel #20
0
    def test_evaluate_forecaster(self):
        """
        Tests evaluating a forecaster.
        """
        self._ensure_package_is_installed()

        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("airline.arff"))
        self.assertIsNotNone(data, msg="Data should not be none")
        data.class_is_last()

        forecaster = timeseries.WekaForecaster()
        forecaster.fields_to_forecast = ["passenger_numbers"]
        forecaster.base_forecaster = classifiers.Classifier(
            classname="weka.classifiers.functions.LinearRegression")
        forecaster.tslag_maker.timestamp_field = "Date"
        forecaster.tslag_maker.adjust_for_variance = False
        forecaster.tslag_maker.include_powers_of_time = True
        forecaster.tslag_maker.include_timelag_products = True
        forecaster.tslag_maker.remove_leading_instances_with_unknown_lag_values = False
        forecaster.tslag_maker.add_month_of_year = True
        forecaster.tslag_maker.add_quarter_of_year = True
        self.assertEqual(
            "LinearRegression -S 0 -R 1.0E-8 -num-decimal-places 4",
            str(forecaster.algorithm_name),
            msg="algorithm name")
        self.assertEqual(
            "weka.filters.supervised.attribute.TSLagMaker -F passenger_numbers -L 1 -M 12 -G Date -month -quarter",
            forecaster.tslag_maker.to_commandline(),
            msg="lag maker commandline")

        evaluation = timeseries.TSEvaluation(data, 0.0)
        evaluation.evaluate_on_training_data = False
        evaluation.evaluate_on_test_data = False
        evaluation.prime_window_size = forecaster.tslag_maker.max_lag
        evaluation.prime_for_test_data_with_test_data = True
        evaluation.rebuild_model_after_each_test_forecast_step = False
        evaluation.forecast_future = True
        evaluation.horizon = 20
        evaluation.evaluation_modules = "MAE,RMSE"
        evaluation.evaluate(forecaster)
Beispiel #21
0
    def getInstance(self, gameState):

        headers = ""
        headers = headers + "@relation prueba\n\n"

        headers = headers + "@attribute score5 NUMERIC\n"
        headers = headers + "@attribute score2 NUMERIC\n"
        headers = headers + "@attribute score NUMERIC\n"

        headers = headers + "@attribute ghost1-living {True, False}\n"
        headers = headers + "@attribute ghost2-living {True, False}\n"
        headers = headers + "@attribute ghost3-living {True, False}\n"
        headers = headers + "@attribute ghost4-living {True, False}\n"

        headers = headers + "@attribute distance-ghost1 NUMERIC \n"
        headers = headers + "@attribute distance-ghost2 NUMERIC \n"
        headers = headers + "@attribute distance-ghost3 NUMERIC \n"
        headers = headers + "@attribute distance-ghost4 NUMERIC \n"

        headers = headers + "@attribute posX NUMERIC\n"
        headers = headers + "@attribute posY NUMERIC\n"

        headers = headers + "@attribute direction {North, South, East, West, Stop}\n"

        headers = headers + "@attribute wall-east {True, False}\n"
        headers = headers + "@attribute wall-south {True, False}\n"
        headers = headers + "@attribute wall-west {True, False}\n"
        headers = headers + "@attribute wall-north {True, False}\n"

        headers = headers + "@attribute move {North, South, East, West, Stop}\n\n"

        headers = headers + "@data\n\n\n"

        objects = serialization.read_all("data/out.model")
        cls = [
            classifiers.Classifier("weka.classifiers.trees.REPTree"),
            classifiers.Classifier(
                "weka.classifiers.functions.LinearRegression"),
            classifiers.Classifier("weka.classifiers.functions.SMOreg"),
        ]
        cls = Classifier()
        file = open('data/instances.arff', 'w+')
        file.write(headers)

        line = ""
        for i in gameState.livingGhosts[
                1:]:  #discard the first value, as it is PacMan
            line = line + str(i) + ","

        for i in gameState.data.ghostDistances:
            if i is None:
                line = line + "0" + ","
            else:
                line = line + str(i) + ","


        line = line + str(gameState.data.agentStates[0].getPosition()[0]) + "," +\
        str(gameState.data.agentStates[0].getPosition()[1])+ "," +\
        str(gameState.data.agentStates[0].getDirection()) + "," +\
        str(gameState.hasWall(gameState.getPacmanPosition()[0] - 1, gameState.getPacmanPosition()[1])) + "," +\
        str(gameState.hasWall(gameState.getPacmanPosition()[0], gameState.getPacmanPosition()[1] - 1)) + "," +\
        str(gameState.hasWall(gameState.getPacmanPosition()[0] + 1, gameState.getPacmanPosition()[1])) + "," +\
        str(gameState.hasWall(gameState.getPacmanPosition()[0], gameState.getPacmanPosition()[1] + 1)) + ",?"

        line = str(int(BustersAgent.getScore5(self, gameState))) + ","+\
        str(int(BustersAgent.getScore2(self, gameState))) + "," +\
        str(gameState.data.score) + "," + line

        file.write(line)
        file.close()

        loader = Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file("data/instances.arff")
        data.class_is_last()  # set class attribute
        for index, inst in enumerate(data):
            pred = cls.classify_instance(inst)

        return pred
Beispiel #22
0
    def test_result_matrix(self):
        """
        Tests the ResultMatrix class.
        """
        datasets = [self.datafile("iris.arff"), self.datafile("anneal.arff")]
        cls = [
            classifiers.Classifier(classname="weka.classifiers.rules.ZeroR"),
            classifiers.Classifier(classname="weka.classifiers.trees.J48")
        ]
        outfile = self.tempfile("results-cv.arff")
        exp = experiments.SimpleCrossValidationExperiment(classification=True,
                                                          runs=10,
                                                          folds=10,
                                                          datasets=datasets,
                                                          classifiers=cls,
                                                          result=outfile)
        self.assertIsNotNone(exp, msg="Failed to instantiate!")
        exp.setup()
        exp.run()

        # evaluate
        loader = converters.loader_for_file(outfile)
        data = loader.load_file(outfile)
        self.assertIsNotNone(data, msg="Failed to load data: " + outfile)

        matrix = experiments.ResultMatrix(
            classname="weka.experiment.ResultMatrixPlainText")
        self.assertIsNotNone(matrix, msg="Failed to instantiate!")

        tester = experiments.Tester(
            classname="weka.experiment.PairedCorrectedTTester")
        self.assertIsNotNone(tester, msg="Failed to instantiate!")

        tester.resultmatrix = matrix
        comparison_col = data.attribute_by_name("Percent_correct").index
        tester.instances = data
        self.assertGreater(len(tester.header(comparison_col)),
                           0,
                           msg="Generated no header")
        self.assertGreater(len(tester.multi_resultset_full(0, comparison_col)),
                           0,
                           msg="Generated no result")

        # dimensions
        self.assertEqual(2, matrix.rows, msg="# of rows differ")
        self.assertEqual(2, matrix.columns, msg="# of rows differ")

        # cols
        self.assertTrue(matrix.get_col_name(0).find("ZeroR") > -1,
                        msg="ZeroR should be part of col name")
        self.assertTrue(matrix.get_col_name(1).find("J48") > -1,
                        msg="J48 should be part of col name")
        self.assertIsNone(matrix.get_col_name(2),
                          msg="Column name should not exist")
        matrix.set_col_name(0, "zeror")
        self.assertTrue(matrix.get_col_name(0).find("zeror") > -1,
                        msg="zeror should be part of col name")

        self.assertFalse(matrix.is_col_hidden(1),
                         msg="Column should be visible")
        matrix.hide_col(1)
        self.assertTrue(matrix.is_col_hidden(1), msg="Column should be hidden")
        matrix.show_col(1)
        self.assertFalse(matrix.is_col_hidden(1),
                         msg="Column should be visible again")

        # rows
        self.assertEqual("iris",
                         matrix.get_row_name(0),
                         msg="Row names differ")
        self.assertEqual("anneal",
                         matrix.get_row_name(1),
                         msg="Row names differ")
        self.assertIsNone(matrix.get_col_name(2),
                          msg="Row name should not exist")
        matrix.set_row_name(0, "IRIS")
        self.assertEqual("IRIS",
                         matrix.get_row_name(0),
                         msg="Row names differ")

        self.assertFalse(matrix.is_row_hidden(1), msg="Row should be visible")
        matrix.hide_row(1)
        self.assertTrue(matrix.is_row_hidden(1), msg="Row should be hidden")
        matrix.show_row(1)
        self.assertFalse(matrix.is_row_hidden(1),
                         msg="Row should be visible again")

        # mean
        self.assertAlmostEqual(33.3,
                               matrix.get_mean(0, 0),
                               places=1,
                               msg="Means differ")
        self.assertAlmostEqual(54.75,
                               matrix.average(0),
                               places=2,
                               msg="Averages differ")
        matrix.set_mean(0, 0, 10)
        self.assertAlmostEqual(10.0,
                               matrix.get_mean(0, 0),
                               places=1,
                               msg="Means differ")

        # stdev
        self.assertAlmostEqual(0.0,
                               matrix.get_stdev(0, 0),
                               places=1,
                               msg="Means differ")
        matrix.set_stdev(0, 0, 0.3)
        self.assertAlmostEqual(0.3,
                               matrix.get_stdev(0, 0),
                               places=1,
                               msg="Means differ")
Beispiel #23
0
    def test_capabilities(self):
        """Tests the Capabilities class.
        """
        caps = capabilities.Capabilities()
        self.assertIsNotNone(caps,
                             msg="Failed to instantiate empty capabilities")
        self.assertEqual(0,
                         len(caps.capabilities()),
                         msg="Should have no capabilities")

        cls = classifiers.Classifier(classname="weka.classifiers.trees.J48")
        caps = capabilities.Capabilities(owner=cls)
        self.assertIsNotNone(caps,
                             msg="Failed to instantiate empty capabilities")
        self.assertEqual(0,
                         len(caps.capabilities()),
                         msg="Should have no capabilities")
        self.assertIsNotNone(caps.owner, msg="Should have an owner")
        caps.owner = None
        self.assertIsNone(caps.owner, msg="Should have no owner")
        caps.owner = cls
        self.assertIsNotNone(caps.owner, msg="Should have an owner")

        cap = capabilities.Capability(member="NUMERIC_CLASS")
        caps.enable(cap)
        self.assertTrue(caps.handles(cap),
                        "Should have capability: " + str(cap))
        self.assertFalse(caps.has_dependency(cap),
                         "Should have no dependency: " + str(cap))
        self.assertEqual(0,
                         len(caps.dependencies()),
                         msg="Should have no dependencies")
        caps.enable_dependency(cap)
        self.assertTrue(caps.has_dependency(cap),
                        "Should have dependency: " + str(cap))
        self.assertEqual(1,
                         len(caps.dependencies()),
                         msg="Should have one dependency")

        caps.disable_all()
        self.assertEqual(0,
                         len(caps.capabilities()),
                         msg="Should have no capabilities")
        self.assertEqual(0,
                         len(caps.dependencies()),
                         msg="Should have no dependencies")

        caps.enable_all()
        self.assertGreater(len(caps.capabilities()),
                           0,
                           msg="Should have capabilities")
        self.assertGreater(len(caps.dependencies()),
                           0,
                           msg="Should have dependencies")

        caps.disable_all()
        caps.enable_all_attribute_dependencies()
        self.assertEqual(0,
                         len(caps.capabilities()),
                         msg="Should have no capabilities")
        self.assertGreater(len(caps.dependencies()),
                           0,
                           msg="Should have dependencies")

        caps.disable_all()
        caps.enable_all_attributes()
        self.assertGreater(len(caps.capabilities()),
                           0,
                           msg="Should have capabilities")
        self.assertEqual(0,
                         len(caps.dependencies()),
                         msg="Should have no dependencies")

        caps.disable_all()
        caps.enable_all_class_dependencies()
        self.assertEqual(0,
                         len(caps.capabilities()),
                         msg="Should have no capabilities")
        self.assertGreater(len(caps.dependencies()),
                           0,
                           msg="Should have dependencies")

        caps.disable_all()
        caps.enable_all_classes()
        self.assertGreater(len(caps.capabilities()),
                           0,
                           msg="Should have capabilities")
        self.assertEqual(0,
                         len(caps.dependencies()),
                         msg="Should have no dependencies")
Beispiel #24
0
    def test_evaluation(self):
        """
        Tests the Evaluation class.
        """
        # 1. nominal
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.trees.J48"
        cls = classifiers.Classifier(classname=cname)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname)

        evl = classifiers.Evaluation(data)
        self.assertIsNotNone(evl, msg="Failed to instantiate Evaluation")
        self.assertIsNone(evl.header.equal_headers(data))
        self.assertEqual([9, 100, 685, 1, 68, 41], evl.class_priors.tolist())

        # cross-validate
        evl.crossvalidate_model(cls, data, 10, classes.Random(1))
        self.assertEqual([[5.0, 0.0, 3.0, 0.0, 0.0, 0.0], [0.0, 99.0, 0.0, 0.0, 0.0, 0.0], [0.0, 2.0, 680.0, 0.0, 0.0, 2.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 67.0, 0.0], [0.0, 0.0, 7.0, 0.0, 0.0, 33.0]], evl.confusion_matrix.tolist(), msg="confusion matrix differs")
        self.assertAlmostEqual(0.749, evl.area_under_prc(0), places=3, msg="area_under_prc(0) differs")
        self.assertAlmostEqual(0.931, evl.area_under_roc(0), places=3, msg="area_under_roc(0) differs")
        self.assertAlmostEqual(0, evl.avg_cost, places=3, msg="avg_cost differs")
        self.assertEqual(884, evl.correct, msg="correct differs")
        self.assertAlmostEqual(98.775, evl.coverage_of_test_cases_by_predicted_regions, places=3, msg="coverage_of_test_cases_by_predicted_regions differs")
        self.assertAlmostEqual(0.016, evl.error_rate, places=3, msg="error_rate differs")
        self.assertAlmostEqual(0.769, evl.f_measure(0), places=3, msg="f_measure(0) differs")
        self.assertAlmostEqual(0.375, evl.false_negative_rate(0), places=3, msg="false_negative_rate(0) differs")
        self.assertAlmostEqual(0.0, evl.false_positive_rate(0), places=3, msg="false_positive_rate(0) differs")
        self.assertEqual(14, evl.incorrect, msg="incorrect differs")
        self.assertAlmostEqual(0.961, evl.kappa, places=3, msg="kappa differs")
        self.assertAlmostEqual(1017.196, evl.kb_information, places=3, msg="kb_information differs")
        self.assertAlmostEqual(1.133, evl.kb_mean_information, places=3, msg="kb_mean_information differs")
        self.assertAlmostEqual(95.017, evl.kb_relative_information, places=3, msg="kb_relative_information differs")
        self.assertAlmostEqual(0.789, evl.matthews_correlation_coefficient(0), places=3, msg="matthews_correlation_coefficient(0) differs")
        self.assertAlmostEqual(0.006, evl.mean_absolute_error, places=3, msg="mean_absolute_error differs")
        self.assertAlmostEqual(0.134, evl.mean_prior_absolute_error, places=3, msg="mean_prior_absolute_error differs")
        self.assertAlmostEqual(3.0, evl.num_false_negatives(0), places=3, msg="num_false_negatives(0) differs")
        self.assertAlmostEqual(0.0, evl.num_false_positives(0), places=3, msg="num_false_positives(0) differs")
        self.assertAlmostEqual(898.0, evl.num_instances, places=3, msg="num_instances differs")
        self.assertAlmostEqual(890.0, evl.num_true_negatives(0), places=3, msg="num_true_negatives(0) differs")
        self.assertAlmostEqual(5.0, evl.num_true_positives(0), places=3, msg="num_true_positives(0) differs")
        self.assertAlmostEqual(98.44, evl.percent_correct, places=2, msg="percent_correct differs")
        self.assertAlmostEqual(1.56, evl.percent_incorrect, places=2, msg="percent_incorrect differs")
        self.assertAlmostEqual(0, evl.percent_unclassified, places=2, msg="percent_unclassified differs")
        self.assertAlmostEqual(1.0, evl.precision(0), places=3, msg="precision(0) differs")
        self.assertAlmostEqual(0.625, evl.recall(0), places=3, msg="recall(0) differs")
        self.assertAlmostEqual(4.187, evl.relative_absolute_error, places=3, msg="relative_absolute_error differs")
        self.assertAlmostEqual(0.258, evl.root_mean_prior_squared_error, places=3, msg="root_mean_prior_squared_error differs")
        self.assertAlmostEqual(0.067, evl.root_mean_squared_error, places=3, msg="root_mean_squared_error differs")
        self.assertAlmostEqual(25.912, evl.root_relative_squared_error, places=3, msg="root_relative_squared_error differs")
        self.assertAlmostEqual(-75.044, evl.sf_entropy_gain, places=3, msg="sf_entropy_gain differs")
        self.assertAlmostEqual(-0.084, evl.sf_mean_entropy_gain, places=3, msg="sf_mean_entropy_gain differs")
        self.assertAlmostEqual(1.192, evl.sf_mean_prior_entropy, places=3, msg="sf_mean_prior_entropy differs")
        self.assertAlmostEqual(1.276, evl.sf_mean_scheme_entropy, places=3, msg="sf_mean_scheme_entropy differs")
        self.assertAlmostEqual(16.722, evl.size_of_predicted_regions, places=3, msg="size_of_predicted_regions differs")
        self.assertEqual(0, evl.unclassified, msg="unclassified differs")
        self.assertAlmostEqual(0.0, evl.total_cost, places=3, msg="total_cost differs")
        self.assertAlmostEqual(0.926, evl.unweighted_macro_f_measure, places=3, msg="unweighted_macro_f_measure differs")
        self.assertAlmostEqual(0.984, evl.unweighted_micro_f_measure, places=3, msg="unweighted_micro_f_measure differs")
        self.assertAlmostEqual(0.992, evl.weighted_area_under_prc, places=3, msg="weighted_area_under_prc differs")
        self.assertAlmostEqual(0.995, evl.weighted_area_under_roc, places=3, msg="weighted_area_under_roc differs")
        self.assertAlmostEqual(0.984, evl.weighted_f_measure, places=3, msg="weighted_f_measure differs")
        self.assertAlmostEqual(0.016, evl.weighted_false_negative_rate, places=3, msg="weighted_false_negative_rate differs")
        self.assertAlmostEqual(0.036, evl.weighted_false_positive_rate, places=3, msg="weighted_false_positive_rate differs")
        self.assertAlmostEqual(0.959, evl.weighted_matthews_correlation, places=3, msg="weighted_matthews_correlation differs")
        self.assertAlmostEqual(0.984, evl.weighted_precision, places=3, msg="weighted_precision differs")
        self.assertAlmostEqual(0.984, evl.weighted_recall, places=3, msg="weighted_recall differs")
        self.assertAlmostEqual(0.964, evl.weighted_true_negative_rate, places=3, msg="weighted_true_negative_rate differs")
        self.assertAlmostEqual(0.984, evl.weighted_true_positive_rate, places=3, msg="weighted_true_positive_rate differs")
        self.assertIsNotNone(evl.predictions, msg="Should have had predictions")
        self.assertEqual(data.num_instances, len(evl.predictions), msg="Number of predictions differ from training set size")

        # train/test
        evl = classifiers.Evaluation(data)
        self.assertIsNotNone(evl, msg="Failed to instantiate Evaluation")
        cname = "weka.classifiers.trees.J48"
        cls = classifiers.Classifier(classname=cname)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname)
        cls.build_classifier(data)
        evl.test_model(cls, data)
        self.assertAlmostEqual(99.777, evl.percent_correct, places=3, msg="percent_correct differs")

        # predictions: no
        cname = "weka.classifiers.trees.J48"
        cls = classifiers.Classifier(classname=cname)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname)
        evl = classifiers.Evaluation(data)
        evl.discard_predictions = True
        self.assertIsNotNone(evl, msg="Failed to instantiate Evaluation")
        evl.crossvalidate_model(cls, data, 10, classes.Random(1))
        self.assertIsNone(evl.predictions, msg="Should have had no predictions")

        # output
        cname = "weka.classifiers.trees.J48"
        cls = classifiers.Classifier(classname=cname)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname)
        evl = classifiers.Evaluation(data)
        cname = "weka.classifiers.evaluation.output.prediction.PlainText"
        pout = classifiers.PredictionOutput(classname=cname)
        self.assertEqual(cname, pout.classname, msg="Output classnames differ!")
        self.assertIsNotNone(evl, msg="Failed to instantiate Evaluation")
        evl.crossvalidate_model(cls, data, 10, classes.Random(1), output=pout)
        self.assertGreater(len(str(pout)), 0, msg="Should have generated output")

        # 2. numeric
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("bolts.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.functions.LinearRegression"
        cls = classifiers.Classifier(classname=cname)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname )

        evl = classifiers.Evaluation(data)
        self.assertIsNotNone(evl, msg="Failed to instantiate Evaluation")
        self.assertIsNone(evl.header.equal_headers(data))

        # cross-validate
        evl.crossvalidate_model(cls, data, 10, classes.Random(1))
        self.assertAlmostEqual(0.919, evl.correlation_coefficient, places=3, msg="correlation_coefficient differs")
        self.assertAlmostEqual(10.697, evl.error_rate, places=3, msg="error_rate differs")