def test_plot_experiment(self):
        """
        Tests the plot_experiment method.
        """
        datasets = [
            self.datafile("bolts.arff"),
            self.datafile("bodyfat.arff"),
            self.datafile("autoPrice.arff")
        ]
        cls = [
            classifiers.Classifier("weka.classifiers.trees.REPTree"),
            classifiers.Classifier(
                "weka.classifiers.functions.LinearRegression"),
            classifiers.Classifier("weka.classifiers.functions.SMOreg"),
        ]
        outfile = self.tempfile("results-rs.arff")
        exp = experiments.SimpleRandomSplitExperiment(classification=False,
                                                      runs=10,
                                                      percentage=66.6,
                                                      preserve_order=False,
                                                      datasets=datasets,
                                                      classifiers=cls,
                                                      result=outfile)
        exp.setup()
        exp.run()

        # evaluate
        loader = converters.loader_for_file(outfile)
        data = loader.load_file(outfile)
        matrix = experiments.ResultMatrix(
            "weka.experiment.ResultMatrixPlainText")
        tester = experiments.Tester("weka.experiment.PairedCorrectedTTester")
        tester.resultmatrix = matrix
        comparison_col = data.attribute_by_name(
            "Correlation_coefficient").index
        tester.instances = data
        tester.header(comparison_col)
        tester.multi_resultset_full(0, comparison_col)

        # plot
        plot.plot_experiment(matrix,
                             title="Random split (w/ StdDev)",
                             measure="Correlation coefficient",
                             show_stdev=True,
                             wait=False)
        plot.plot_experiment(matrix,
                             title="Random split",
                             measure="Correlation coefficient",
                             wait=False)
Example #2
0
    def test_randomsplit_regression(self):
        """
        Tests random split on regression.
        """
        datasets = [self.datafile("bolts.arff"), self.datafile("bodyfat.arff")]
        cls = [
            classifiers.Classifier(classname="weka.classifiers.rules.ZeroR"),
            classifiers.Classifier(
                classname="weka.classifiers.functions.LinearRegression")
        ]
        outfile = self.tempfile("results-rs.arff")
        exp = experiments.SimpleRandomSplitExperiment(classification=False,
                                                      runs=10,
                                                      percentage=66.6,
                                                      preserve_order=False,
                                                      datasets=datasets,
                                                      classifiers=cls,
                                                      result=outfile)
        self.assertIsNotNone(exp, msg="Failed to instantiate!")
        exp.setup()
        exp.run()

        # evaluate
        loader = converters.loader_for_file(outfile)
        data = loader.load_file(outfile)
        self.assertIsNotNone(data, msg="Failed to load data: " + outfile)

        matrix = experiments.ResultMatrix(
            classname="weka.experiment.ResultMatrixPlainText")
        self.assertIsNotNone(matrix, msg="Failed to instantiate!")

        tester = experiments.Tester(
            classname="weka.experiment.PairedCorrectedTTester")
        self.assertIsNotNone(tester, msg="Failed to instantiate!")

        tester.resultmatrix = matrix
        comparison_col = data.attribute_by_name(
            "Correlation_coefficient").index
        tester.instances = data
        self.assertGreater(len(tester.header(comparison_col)),
                           0,
                           msg="Generated no header")
        self.assertGreater(len(tester.multi_resultset_full(0, comparison_col)),
                           0,
                           msg="Generated no result")
Example #3
0
    def test_crossvalidation_classification(self):
        """
        Tests cross-validated classification.
        """
        datasets = [self.datafile("iris.arff"), self.datafile("anneal.arff")]
        cls = [
            classifiers.Classifier(classname="weka.classifiers.rules.ZeroR"),
            classifiers.Classifier(classname="weka.classifiers.trees.J48")
        ]
        outfile = self.tempfile("results-cv.arff")
        exp = experiments.SimpleCrossValidationExperiment(classification=True,
                                                          runs=10,
                                                          folds=10,
                                                          datasets=datasets,
                                                          classifiers=cls,
                                                          result=outfile)
        self.assertIsNotNone(exp, msg="Failed to instantiate!")
        exp.setup()
        exp.run()

        # evaluate
        loader = converters.loader_for_file(outfile)
        data = loader.load_file(outfile)
        self.assertIsNotNone(data, msg="Failed to load data: " + outfile)

        matrix = experiments.ResultMatrix(
            classname="weka.experiment.ResultMatrixPlainText")
        self.assertIsNotNone(matrix, msg="Failed to instantiate!")

        tester = experiments.Tester(
            classname="weka.experiment.PairedCorrectedTTester")
        self.assertIsNotNone(tester, msg="Failed to instantiate!")

        tester.resultmatrix = matrix
        comparison_col = data.attribute_by_name("Percent_correct").index
        tester.instances = data
        self.assertGreater(len(tester.header(comparison_col)),
                           0,
                           msg="Generated no header")
        self.assertGreater(len(tester.multi_resultset_full(0, comparison_col)),
                           0,
                           msg="Generated no result")
Example #4
0
    def test_result_matrix(self):
        """
        Tests the ResultMatrix class.
        """
        datasets = [self.datafile("iris.arff"), self.datafile("anneal.arff")]
        cls = [
            classifiers.Classifier(classname="weka.classifiers.rules.ZeroR"),
            classifiers.Classifier(classname="weka.classifiers.trees.J48")
        ]
        outfile = self.tempfile("results-cv.arff")
        exp = experiments.SimpleCrossValidationExperiment(classification=True,
                                                          runs=10,
                                                          folds=10,
                                                          datasets=datasets,
                                                          classifiers=cls,
                                                          result=outfile)
        self.assertIsNotNone(exp, msg="Failed to instantiate!")
        exp.setup()
        exp.run()

        # evaluate
        loader = converters.loader_for_file(outfile)
        data = loader.load_file(outfile)
        self.assertIsNotNone(data, msg="Failed to load data: " + outfile)

        matrix = experiments.ResultMatrix(
            classname="weka.experiment.ResultMatrixPlainText")
        self.assertIsNotNone(matrix, msg="Failed to instantiate!")

        tester = experiments.Tester(
            classname="weka.experiment.PairedCorrectedTTester")
        self.assertIsNotNone(tester, msg="Failed to instantiate!")

        tester.resultmatrix = matrix
        comparison_col = data.attribute_by_name("Percent_correct").index
        tester.instances = data
        self.assertGreater(len(tester.header(comparison_col)),
                           0,
                           msg="Generated no header")
        self.assertGreater(len(tester.multi_resultset_full(0, comparison_col)),
                           0,
                           msg="Generated no result")

        # dimensions
        self.assertEqual(2, matrix.rows, msg="# of rows differ")
        self.assertEqual(2, matrix.columns, msg="# of rows differ")

        # cols
        self.assertTrue(matrix.get_col_name(0).find("ZeroR") > -1,
                        msg="ZeroR should be part of col name")
        self.assertTrue(matrix.get_col_name(1).find("J48") > -1,
                        msg="J48 should be part of col name")
        self.assertIsNone(matrix.get_col_name(2),
                          msg="Column name should not exist")
        matrix.set_col_name(0, "zeror")
        self.assertTrue(matrix.get_col_name(0).find("zeror") > -1,
                        msg="zeror should be part of col name")

        self.assertFalse(matrix.is_col_hidden(1),
                         msg="Column should be visible")
        matrix.hide_col(1)
        self.assertTrue(matrix.is_col_hidden(1), msg="Column should be hidden")
        matrix.show_col(1)
        self.assertFalse(matrix.is_col_hidden(1),
                         msg="Column should be visible again")

        # rows
        self.assertEqual("iris",
                         matrix.get_row_name(0),
                         msg="Row names differ")
        self.assertEqual("anneal",
                         matrix.get_row_name(1),
                         msg="Row names differ")
        self.assertIsNone(matrix.get_col_name(2),
                          msg="Row name should not exist")
        matrix.set_row_name(0, "IRIS")
        self.assertEqual("IRIS",
                         matrix.get_row_name(0),
                         msg="Row names differ")

        self.assertFalse(matrix.is_row_hidden(1), msg="Row should be visible")
        matrix.hide_row(1)
        self.assertTrue(matrix.is_row_hidden(1), msg="Row should be hidden")
        matrix.show_row(1)
        self.assertFalse(matrix.is_row_hidden(1),
                         msg="Row should be visible again")

        # mean
        self.assertAlmostEqual(33.3,
                               matrix.get_mean(0, 0),
                               places=1,
                               msg="Means differ")
        self.assertAlmostEqual(54.75,
                               matrix.average(0),
                               places=2,
                               msg="Averages differ")
        matrix.set_mean(0, 0, 10)
        self.assertAlmostEqual(10.0,
                               matrix.get_mean(0, 0),
                               places=1,
                               msg="Means differ")

        # stdev
        self.assertAlmostEqual(0.0,
                               matrix.get_stdev(0, 0),
                               places=1,
                               msg="Means differ")
        matrix.set_stdev(0, 0, 0.3)
        self.assertAlmostEqual(0.3,
                               matrix.get_stdev(0, 0),
                               places=1,
                               msg="Means differ")