def test_loader_for_file(self):
     """
     Tests the loader_for_file method.
     """
     fname = "/some/where/anneal.arff"
     loader = converters.loader_for_file(fname)
     self.assertEqual("weka.core.converters.ArffLoader", loader.classname, msg="Class differs:  " + fname)
     fname = "/some/where/anneal.arff.gz"
     loader = converters.loader_for_file(fname)
     self.assertEqual("weka.core.converters.ArffLoader", loader.classname, msg="Class differs: " + fname)
     fname = "/some/where/anneal.csv"
     loader = converters.loader_for_file(fname)
     self.assertEqual("weka.core.converters.CSVLoader", loader.classname, msg="Class differs: " + fname)
Esempio n. 2
0
def main():
	"""
	    Just runs some example code.
	"""

#	print(helper.get_data_dir())


	# cross-validation + classification
#	helper.print_title("Experiment: Cross-validation + classification")
	#datasets = [helper.get_data_dir() + os.sep + "iris.arff", helper.get_data_dir() + os.sep + "anneal.arff"]
	datasets = ["train.arff", "test.arff"]
	classifiers = [ Classifier("weka.classifiers.trees.J48")]
	outfile = tempfile.gettempdir() + os.sep + "results-cv.arff"
	exp = SimpleCrossValidationExperiment(
		classification=True,
		runs=10,
		folds=10,
		datasets=datasets,
		classifiers=classifiers,
		result=outfile
	)
	exp.setup()
	exp.run()
	# evaluate
	loader = converters.loader_for_file(outfile)
	data = loader.load_file(outfile)
	matrix = ResultMatrix("weka.experiment.ResultMatrixPlainText")
	tester = Tester("weka.experiment.PairedCorrectedTTester")
	tester.set_resultmatrix(matrix)
	comparison_col = data.get_attribute_by_name("Percent_correct").get_index()
	tester.set_instances(data)
	print(tester.header(comparison_col))
	print(tester.multi_resultset_full(0, comparison_col))

	# random split + regression
#	helper.print_title("Experiment: Random split + regression")

	# evaluate
	loader = converters.loader_for_file(outfile)
	data = loader.load_file(outfile)
	matrix = ResultMatrix("weka.experiment.ResultMatrixPlainText")
	tester = Tester("weka.experiment.PairedCorrectedTTester")
	tester.set_resultmatrix(matrix)
	comparison_col = data.get_attribute_by_name("Correlation_coefficient").get_index()
	tester.set_instances(data)
	print(tester.header(comparison_col))
	print(tester.multi_resultset_full(0, comparison_col))
def main():
    """
    Runs a associator from the command-line. Calls JVM start/stop automatically.
    Use -h to see all options.
    """

    parser = argparse.ArgumentParser(
        description='Executes an associator from the command-line. Calls JVM start/stop automatically.')
    parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories")
    parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m")
    parser.add_argument("-t", metavar="train", dest="train", required=True, help="training set file")
    parser.add_argument("associator", help="associator classname, e.g., weka.associations.Apriori")
    parser.add_argument("option", nargs=argparse.REMAINDER, help="additional associator options")
    parsed = parser.parse_args()
    jars = []
    if parsed.classpath is not None:
        jars = parsed.classpath.split(os.pathsep)

    jvm.start(jars, max_heap_size=parsed.heap, packages=True)

    logger.debug("Commandline: " + join_options(sys.argv[1:]))

    try:
        associator = Associator(classname=parsed.associator)
        if len(parsed.option) > 0:
            associator.options = parsed.option
        loader = converters.loader_for_file(parsed.train)
        data = loader.load_file(parsed.train)
        associator.build_associations(data)
        print(str(associator))
    except Exception as e:
        print(e)
    finally:
        jvm.stop()
def main():
    """
    Runs a associator from the command-line. Calls JVM start/stop automatically.
    Use -h to see all options.
    """

    parser = argparse.ArgumentParser(
        description='Executes an associator from the command-line. Calls JVM start/stop automatically.')
    parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories")
    parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m")
    parser.add_argument("-t", metavar="train", dest="train", required=True, help="training set file")
    parser.add_argument("associator", help="associator classname, e.g., weka.associations.Apriori")
    parser.add_argument("option", nargs=argparse.REMAINDER, help="additional associator options")
    parsed = parser.parse_args()
    jars = []
    if parsed.classpath is not None:
        jars = parsed.classpath.split(os.pathsep)

    jvm.start(jars, max_heap_size=parsed.heap, packages=True)

    logger.debug("Commandline: " + join_options(sys.argv[1:]))

    try:
        associator = Associator(classname=parsed.associator)
        if len(parsed.option) > 0:
            associator.options = parsed.option
        loader = converters.loader_for_file(parsed.train)
        data = loader.load_file(parsed.train)
        associator.build_associations(data)
        print(str(associator))
    except Exception, e:
        print(e)
Esempio n. 5
0
 def test_loader_for_file(self):
     """
     Tests the loader_for_file method.
     """
     fname = "/some/where/anneal.arff"
     loader = converters.loader_for_file(fname)
     self.assertEqual("weka.core.converters.ArffLoader",
                      loader.classname,
                      msg="Class differs:  " + fname)
     fname = "/some/where/anneal.arff.gz"
     loader = converters.loader_for_file(fname)
     self.assertEqual("weka.core.converters.ArffLoader",
                      loader.classname,
                      msg="Class differs: " + fname)
     fname = "/some/where/anneal.csv"
     loader = converters.loader_for_file(fname)
     self.assertEqual("weka.core.converters.CSVLoader",
                      loader.classname,
                      msg="Class differs: " + fname)
    def test_plot_experiment(self):
        """
        Tests the plot_experiment method.
        """
        datasets = [
            self.datafile("bolts.arff"),
            self.datafile("bodyfat.arff"),
            self.datafile("autoPrice.arff")
        ]
        cls = [
            classifiers.Classifier("weka.classifiers.trees.REPTree"),
            classifiers.Classifier(
                "weka.classifiers.functions.LinearRegression"),
            classifiers.Classifier("weka.classifiers.functions.SMOreg"),
        ]
        outfile = self.tempfile("results-rs.arff")
        exp = experiments.SimpleRandomSplitExperiment(classification=False,
                                                      runs=10,
                                                      percentage=66.6,
                                                      preserve_order=False,
                                                      datasets=datasets,
                                                      classifiers=cls,
                                                      result=outfile)
        exp.setup()
        exp.run()

        # evaluate
        loader = converters.loader_for_file(outfile)
        data = loader.load_file(outfile)
        matrix = experiments.ResultMatrix(
            "weka.experiment.ResultMatrixPlainText")
        tester = experiments.Tester("weka.experiment.PairedCorrectedTTester")
        tester.resultmatrix = matrix
        comparison_col = data.attribute_by_name(
            "Correlation_coefficient").index
        tester.instances = data
        tester.header(comparison_col)
        tester.multi_resultset_full(0, comparison_col)

        # plot
        plot.plot_experiment(matrix,
                             title="Random split (w/ StdDev)",
                             measure="Correlation coefficient",
                             show_stdev=True,
                             wait=False)
        plot.plot_experiment(matrix,
                             title="Random split",
                             measure="Correlation coefficient",
                             wait=False)
Esempio n. 7
0
    def test_randomsplit_regression(self):
        """
        Tests random split on regression.
        """
        datasets = [self.datafile("bolts.arff"), self.datafile("bodyfat.arff")]
        cls = [
            classifiers.Classifier(classname="weka.classifiers.rules.ZeroR"),
            classifiers.Classifier(
                classname="weka.classifiers.functions.LinearRegression")
        ]
        outfile = self.tempfile("results-rs.arff")
        exp = experiments.SimpleRandomSplitExperiment(classification=False,
                                                      runs=10,
                                                      percentage=66.6,
                                                      preserve_order=False,
                                                      datasets=datasets,
                                                      classifiers=cls,
                                                      result=outfile)
        self.assertIsNotNone(exp, msg="Failed to instantiate!")
        exp.setup()
        exp.run()

        # evaluate
        loader = converters.loader_for_file(outfile)
        data = loader.load_file(outfile)
        self.assertIsNotNone(data, msg="Failed to load data: " + outfile)

        matrix = experiments.ResultMatrix(
            classname="weka.experiment.ResultMatrixPlainText")
        self.assertIsNotNone(matrix, msg="Failed to instantiate!")

        tester = experiments.Tester(
            classname="weka.experiment.PairedCorrectedTTester")
        self.assertIsNotNone(tester, msg="Failed to instantiate!")

        tester.resultmatrix = matrix
        comparison_col = data.attribute_by_name(
            "Correlation_coefficient").index
        tester.instances = data
        self.assertGreater(len(tester.header(comparison_col)),
                           0,
                           msg="Generated no header")
        self.assertGreater(len(tester.multi_resultset_full(0, comparison_col)),
                           0,
                           msg="Generated no result")
Esempio n. 8
0
    def test_crossvalidation_classification(self):
        """
        Tests cross-validated classification.
        """
        datasets = [self.datafile("iris.arff"), self.datafile("anneal.arff")]
        cls = [
            classifiers.Classifier(classname="weka.classifiers.rules.ZeroR"),
            classifiers.Classifier(classname="weka.classifiers.trees.J48")
        ]
        outfile = self.tempfile("results-cv.arff")
        exp = experiments.SimpleCrossValidationExperiment(classification=True,
                                                          runs=10,
                                                          folds=10,
                                                          datasets=datasets,
                                                          classifiers=cls,
                                                          result=outfile)
        self.assertIsNotNone(exp, msg="Failed to instantiate!")
        exp.setup()
        exp.run()

        # evaluate
        loader = converters.loader_for_file(outfile)
        data = loader.load_file(outfile)
        self.assertIsNotNone(data, msg="Failed to load data: " + outfile)

        matrix = experiments.ResultMatrix(
            classname="weka.experiment.ResultMatrixPlainText")
        self.assertIsNotNone(matrix, msg="Failed to instantiate!")

        tester = experiments.Tester(
            classname="weka.experiment.PairedCorrectedTTester")
        self.assertIsNotNone(tester, msg="Failed to instantiate!")

        tester.resultmatrix = matrix
        comparison_col = data.attribute_by_name("Percent_correct").index
        tester.instances = data
        self.assertGreater(len(tester.header(comparison_col)),
                           0,
                           msg="Generated no header")
        self.assertGreater(len(tester.multi_resultset_full(0, comparison_col)),
                           0,
                           msg="Generated no result")
    def test_randomsplit_regression(self):
        """
        Tests random split on regression.
        """
        datasets = [self.datafile("bolts.arff"), self.datafile("bodyfat.arff")]
        cls = [
            classifiers.Classifier(classname="weka.classifiers.rules.ZeroR"),
            classifiers.Classifier(classname="weka.classifiers.functions.LinearRegression")
        ]
        outfile = self.tempfile("results-rs.arff")
        exp = experiments.SimpleRandomSplitExperiment(
            classification=False,
            runs=10,
            percentage=66.6,
            preserve_order=False,
            datasets=datasets,
            classifiers=cls,
            result=outfile)
        self.assertIsNotNone(exp, msg="Failed to instantiate!")
        exp.setup()
        exp.run()

        # evaluate
        loader = converters.loader_for_file(outfile)
        data = loader.load_file(outfile)
        self.assertIsNotNone(data, msg="Failed to load data: " + outfile)

        matrix = experiments.ResultMatrix(classname="weka.experiment.ResultMatrixPlainText")
        self.assertIsNotNone(matrix, msg="Failed to instantiate!")

        tester = experiments.Tester(classname="weka.experiment.PairedCorrectedTTester")
        self.assertIsNotNone(tester, msg="Failed to instantiate!")

        tester.resultmatrix = matrix
        comparison_col = data.attribute_by_name("Correlation_coefficient").index
        tester.instances = data
        self.assertGreater(len(tester.header(comparison_col)), 0, msg="Generated no header")
        self.assertGreater(len(tester.multi_resultset_full(0, comparison_col)), 0, msg="Generated no result")
Esempio n. 10
0
    def test_crossvalidation_classification(self):
        """
        Tests cross-validated classification.
        """
        datasets = [self.datafile("iris.arff"), self.datafile("anneal.arff")]
        cls = [
            classifiers.Classifier(classname="weka.classifiers.rules.ZeroR"),
            classifiers.Classifier(classname="weka.classifiers.trees.J48")]
        outfile = self.tempfile("results-cv.arff")
        exp = experiments.SimpleCrossValidationExperiment(
            classification=True,
            runs=10,
            folds=10,
            datasets=datasets,
            classifiers=cls,
            result=outfile)
        self.assertIsNotNone(exp, msg="Failed to instantiate!")
        exp.setup()
        exp.run()

        # evaluate
        loader = converters.loader_for_file(outfile)
        data = loader.load_file(outfile)
        self.assertIsNotNone(data, msg="Failed to load data: " + outfile)

        matrix = experiments.ResultMatrix(classname="weka.experiment.ResultMatrixPlainText")
        self.assertIsNotNone(matrix, msg="Failed to instantiate!")

        tester = experiments.Tester(classname="weka.experiment.PairedCorrectedTTester")
        self.assertIsNotNone(tester, msg="Failed to instantiate!")

        tester.resultmatrix = matrix
        comparison_col = data.attribute_by_name("Percent_correct").index
        tester.instances = data
        self.assertGreater(len(tester.header(comparison_col)), 0, msg="Generated no header")
        self.assertGreater(len(tester.multi_resultset_full(0, comparison_col)), 0, msg="Generated no result")
Esempio n. 11
0
    def test_plot_experiment(self):
        """
        Tests the plot_experiment method.
        """
        datasets = [self.datafile("bolts.arff"), self.datafile("bodyfat.arff"), self.datafile("autoPrice.arff")]
        cls = [
            classifiers.Classifier("weka.classifiers.trees.REPTree"),
            classifiers.Classifier("weka.classifiers.functions.LinearRegression"),
            classifiers.Classifier("weka.classifiers.functions.SMOreg"),
        ]
        outfile = self.tempfile("results-rs.arff")
        exp = experiments.SimpleRandomSplitExperiment(
            classification=False,
            runs=10,
            percentage=66.6,
            preserve_order=False,
            datasets=datasets,
            classifiers=cls,
            result=outfile)
        exp.setup()
        exp.run()

        # evaluate
        loader = converters.loader_for_file(outfile)
        data = loader.load_file(outfile)
        matrix = experiments.ResultMatrix("weka.experiment.ResultMatrixPlainText")
        tester = experiments.Tester("weka.experiment.PairedCorrectedTTester")
        tester.resultmatrix = matrix
        comparison_col = data.attribute_by_name("Correlation_coefficient").index
        tester.instances = data
        tester.header(comparison_col)
        tester.multi_resultset_full(0, comparison_col)

        # plot
        plot.plot_experiment(matrix, title="Random split (w/ StdDev)", measure="Correlation coefficient", show_stdev=True, wait=False)
        plot.plot_experiment(matrix, title="Random split", measure="Correlation coefficient", wait=False)
Esempio n. 12
0
def load_dataset(fname, loader=None, class_index=None, internal=False):
    """
    Loads the dataset using Weka's converters. If no loader instance is provided, the extension of
    the file is used to determine a loader (using default options). The data can either be returned
    using mixed types or just numeric (using Weka's internal representation).

    :param fname: the path of the dataset to load
    :type fname: str
    :param loader: the customized Loader instance to use for loading the dataset, can be None
    :type loader: Loader
    :param class_index: the class index string to use ('first', 'second', 'third', 'last-2', 'last-1', 'last' or 1-based index)
    :type class_index: str
    :param internal: whether to return Weka's internal format or mixed data types
    :type internal: bool
    :return: the dataset tuple: (X) if no class index; (X,y) if class index
    """
    if loader is None:
        loader = loader_for_file(fname)
    weka_ds = loader.load_file(fname, class_index=class_index)
    numpy_ds = weka_ds.to_numpy(internal=internal)
    if class_index is not None:
        return split_off_class(numpy_ds, class_index)
    else:
        return numpy_ds
Esempio n. 13
0
    Classifier(classname="weka.classifiers.rules.OneR"),
    Classifier(classname="weka.classifiers.trees.J48"),
    Classifier(classname="weka.classifiers.bayes.NaiveBayes"),
    Classifier(classname="weka.classifiers.lazy.IBk"),
    Classifier(classname="weka.classifiers.functions.SMO"),
    Classifier(classname="weka.classifiers.meta.AdaBoostM1"),
    # handles only 2-class problems: Classifier(classname="weka.classifiers.functions.VotedPerceptron")
]
outfile = tempfile.gettempdir() + os.sep + "results-cv.arff"   # store results for later analysis
exp = SimpleCrossValidationExperiment(
    classification=True,
    runs=10,
    folds=10,
    datasets=datasets,
    classifiers=classifiers,
    result=outfile)
exp.setup()
exp.run()
# evaluate previous run
loader = converters.loader_for_file(outfile)
data = loader.load_file(outfile)
matrix = ResultMatrix(classname="weka.experiment.ResultMatrixPlainText")
tester = Tester(classname="weka.experiment.PairedCorrectedTTester")
tester.set_resultmatrix(matrix)
comparison_col = data.get_attribute_by_name("Percent_correct").get_index()
tester.set_instances(data)
print(tester.header(comparison_col))
print(tester.multi_resultset_full(0, comparison_col))

jvm.stop()
Esempio n. 14
0
jvm.start()

# setup cross-validation experiment and run it
datasets = [data_dir + os.sep + "segment-challenge.arff"]
classifiers = [Classifier(classname="weka.classifiers.trees.J48")]
outfile = tempfile.gettempdir() + os.sep + "results-cv.arff"
exp = SimpleCrossValidationExperiment(datasets=datasets,
                                      classifiers=classifiers,
                                      folds=10,
                                      runs=10,
                                      result=outfile)
exp.setup()
exp.run()

# evaluate experiment
loader = converters.loader_for_file(outfile)
data = loader.load_file(outfile)
matrix = ResultMatrix(classname="weka.experiment.ResultMatrixPlainText",
                      options=[
                          "-print-row-names", "-print-col-names",
                          "-enum-col-names", "-show-stddev"
                      ])
tester = Tester(classname="weka.experiment.PairedCorrectedTTester",
                options=["-V", "-S", "0.05"])
comparison_col = data.attribute_by_name("Percent_correct").index
tester.instances = data
tester.resultmatrix = matrix
print(tester.header(comparison_col))
print(tester.multi_resultset_full(0, comparison_col))

# setup random-split experiment and run it
Esempio n. 15
0
def main():
    """
    Run sample code.
    """

    print(helper.getDataDir())

    # cross-validation + classification
    helper.printTitle("Experiment: Cross-validation + classification")
    datasets = [helper.getDataDir() + os.sep + "iris.arff", helper.getDataDir() + os.sep + "anneal.arff"]
    classifiers = [Classifier("weka.classifiers.rules.ZeroR"), Classifier("weka.classifiers.trees.J48")]
    outfile = tempfile.gettempdir() + os.sep + "results-cv.arff"

    exp = SimpleCrossValidationExperiment(
        classification=True,
        runs=10,
        folds=10,
        datasets=datasets,
        classifiers=classifiers,
        result=outfile)
    exp.setup()
    exp.run()

    # evaluate
    loader = converters.loader_for_file(outfile)
    data = loader.load_file(outfile)
    matrix = ResultMatrix("weka.experiment.ResultMatrixPlainText")
    tester = Tester("weka.experiment.PairedCorrectedTTester")
    tester.resultmatrix = matrix
    comparison_col = data.attribute_by_name("Percent_correct").index
    tester.instances = data
    print(tester.header(comparison_col))
    print(tester.multi_resultset_full(0, comparison_col))

    # random split + regression
    helper.printTitle("Experiment: Random split + regression")
    datasets = [helper.getDataDir() + os.sep + "bolts.arff", helper.getDataDir() + os.sep + "bodyfat.arff"]
    classifiers = [
        Classifier("weka.classifiers.rules.ZeroR"),
        Classifier("weka.classifiers.functions.LinearRegression")
    ]
    outfile = tempfile.gettempdir() + os.sep + "results-rs.arff"
    exp = SimpleRandomSplitExperiment(
        classification=False,
        runs=10,
        percentage=66.6,
        preserve_order=False,
        datasets=datasets,
        classifiers=classifiers,
        result=outfile)
    exp.setup()
    exp.run()

    # evaluate
    loader = converters.loader_for_file(outfile)
    data = loader.load_file(outfile)
    matrix = ResultMatrix("weka.experiment.ResultMatrixPlainText")
    tester = Tester("weka.experiment.PairedCorrectedTTester")
    tester.resultmatrix = matrix
    comparison_col = data.attribute_by_name("Correlation_coefficient").index
    tester.instances = data
    print(tester.header(comparison_col))
    print(tester.multi_resultset_full(0, comparison_col))

    # plot
    plot_exp.plot_experiment(matrix, title="Random split", measure="Correlation coefficient", wait=True)
Esempio n. 16
0
    def test_result_matrix(self):
        """
        Tests the ResultMatrix class.
        """
        datasets = [self.datafile("iris.arff"), self.datafile("anneal.arff")]
        cls = [
            classifiers.Classifier(classname="weka.classifiers.rules.ZeroR"),
            classifiers.Classifier(classname="weka.classifiers.trees.J48")]
        outfile = self.tempfile("results-cv.arff")
        exp = experiments.SimpleCrossValidationExperiment(
            classification=True,
            runs=10,
            folds=10,
            datasets=datasets,
            classifiers=cls,
            result=outfile)
        self.assertIsNotNone(exp, msg="Failed to instantiate!")
        exp.setup()
        exp.run()

        # evaluate
        loader = converters.loader_for_file(outfile)
        data = loader.load_file(outfile)
        self.assertIsNotNone(data, msg="Failed to load data: " + outfile)

        matrix = experiments.ResultMatrix(classname="weka.experiment.ResultMatrixPlainText")
        self.assertIsNotNone(matrix, msg="Failed to instantiate!")

        tester = experiments.Tester(classname="weka.experiment.PairedCorrectedTTester")
        self.assertIsNotNone(tester, msg="Failed to instantiate!")

        tester.resultmatrix = matrix
        comparison_col = data.attribute_by_name("Percent_correct").index
        tester.instances = data
        self.assertGreater(len(tester.header(comparison_col)), 0, msg="Generated no header")
        self.assertGreater(len(tester.multi_resultset_full(0, comparison_col)), 0, msg="Generated no result")

        # dimensions
        self.assertEqual(2, matrix.rows, msg="# of rows differ")
        self.assertEqual(2, matrix.columns, msg="# of rows differ")

        # cols
        #self.assertTrue(matrix.get_col_name(0).find("ZeroR") > -1, msg="ZeroR should be part of col name")
        #self.assertTrue(matrix.get_col_name(1).find("J48") > -1, msg="J48 should be part of col name")
        #self.assertIsNone(matrix.get_col_name(2), msg="Column name should not exist")
        #matrix.set_col_name(0, "zeror")
        #self.assertTrue(matrix.get_col_name(0).find("zeror") > -1, msg="zeror should be part of col name")

        self.assertFalse(matrix.is_col_hidden(1), msg="Column should be visible")
        matrix.hide_col(1)
        self.assertTrue(matrix.is_col_hidden(1), msg="Column should be hidden")
        matrix.show_col(1)
        self.assertFalse(matrix.is_col_hidden(1), msg="Column should be visible again")

        # rows
        #self.assertEqual("iris", matrix.get_row_name(0), msg="Row names differ")
        #self.assertEqual("anneal", matrix.get_row_name(1), msg="Row names differ")
        #self.assertIsNone(matrix.get_col_name(2), msg="Row name should not exist")
        #matrix.set_row_name(0, "IRIS")
        #self.assertEqual("IRIS", matrix.get_row_name(0), msg="Row names differ")

        self.assertFalse(matrix.is_row_hidden(1), msg="Row should be visible")
        matrix.hide_row(1)
        self.assertTrue(matrix.is_row_hidden(1), msg="Row should be hidden")
        matrix.show_row(1)
        self.assertFalse(matrix.is_row_hidden(1), msg="Row should be visible again")

        # mean
        self.assertAlmostEqual(33.3, matrix.get_mean(0, 0), places=1, msg="Means differ")
        self.assertAlmostEqual(54.75, matrix.average(0), places=2, msg="Averages differ")
        matrix.set_mean(0, 0, 10)
        self.assertAlmostEqual(10.0, matrix.get_mean(0, 0), places=1, msg="Means differ")

        # stdev
        self.assertAlmostEqual(0.0, matrix.get_stdev(0, 0), places=1, msg="Means differ")
        matrix.set_stdev(0, 0, 0.3)
        self.assertAlmostEqual(0.3, matrix.get_stdev(0, 0), places=1, msg="Means differ")
Esempio n. 17
0
def main():
    """
    Just runs some example code.
    """

    print(helper.get_data_dir())

    # cross-validation + classification
    helper.print_title("Experiment: Cross-validation + classification")
    datasets = [
        helper.get_data_dir() + os.sep + "iris.arff",
        helper.get_data_dir() + os.sep + "anneal.arff"
    ]
    classifiers = [
        Classifier("weka.classifiers.rules.ZeroR"),
        Classifier("weka.classifiers.trees.J48")
    ]
    outfile = tempfile.gettempdir() + os.sep + "results-cv.arff"
    exp = SimpleCrossValidationExperiment(classification=True,
                                          runs=10,
                                          folds=10,
                                          datasets=datasets,
                                          classifiers=classifiers,
                                          result=outfile)
    exp.setup()
    exp.run()

    # evaluate
    loader = converters.loader_for_file(outfile)
    data = loader.load_file(outfile)
    matrix = ResultMatrix("weka.experiment.ResultMatrixPlainText")
    tester = Tester("weka.experiment.PairedCorrectedTTester")
    tester.resultmatrix = matrix
    comparison_col = data.attribute_by_name("Percent_correct").index
    tester.instances = data
    print(tester.header(comparison_col))
    print(tester.multi_resultset_full(0, comparison_col))

    # random split + regression
    helper.print_title("Experiment: Random split + regression")
    datasets = [
        helper.get_data_dir() + os.sep + "bolts.arff",
        helper.get_data_dir() + os.sep + "bodyfat.arff"
    ]
    classifiers = [
        Classifier("weka.classifiers.rules.ZeroR"),
        Classifier("weka.classifiers.functions.LinearRegression")
    ]
    outfile = tempfile.gettempdir() + os.sep + "results-rs.arff"
    exp = SimpleRandomSplitExperiment(classification=False,
                                      runs=10,
                                      percentage=66.6,
                                      preserve_order=False,
                                      datasets=datasets,
                                      classifiers=classifiers,
                                      result=outfile)
    exp.setup()
    exp.run()

    # evaluate
    loader = converters.loader_for_file(outfile)
    data = loader.load_file(outfile)
    matrix = ResultMatrix("weka.experiment.ResultMatrixPlainText")
    tester = Tester("weka.experiment.PairedCorrectedTTester")
    tester.resultmatrix = matrix
    comparison_col = data.attribute_by_name("Correlation_coefficient").index
    tester.instances = data
    print(tester.header(comparison_col))
    print(tester.multi_resultset_full(0, comparison_col))

    # plot
    plot_exp.plot_experiment(matrix,
                             title="Random split",
                             measure="Correlation coefficient",
                             wait=True)
Esempio n. 18
0
    def test_result_matrix(self):
        """
        Tests the ResultMatrix class.
        """
        datasets = [self.datafile("iris.arff"), self.datafile("anneal.arff")]
        cls = [
            classifiers.Classifier(classname="weka.classifiers.rules.ZeroR"),
            classifiers.Classifier(classname="weka.classifiers.trees.J48")
        ]
        outfile = self.tempfile("results-cv.arff")
        exp = experiments.SimpleCrossValidationExperiment(classification=True,
                                                          runs=10,
                                                          folds=10,
                                                          datasets=datasets,
                                                          classifiers=cls,
                                                          result=outfile)
        self.assertIsNotNone(exp, msg="Failed to instantiate!")
        exp.setup()
        exp.run()

        # evaluate
        loader = converters.loader_for_file(outfile)
        data = loader.load_file(outfile)
        self.assertIsNotNone(data, msg="Failed to load data: " + outfile)

        matrix = experiments.ResultMatrix(
            classname="weka.experiment.ResultMatrixPlainText")
        self.assertIsNotNone(matrix, msg="Failed to instantiate!")

        tester = experiments.Tester(
            classname="weka.experiment.PairedCorrectedTTester")
        self.assertIsNotNone(tester, msg="Failed to instantiate!")

        tester.resultmatrix = matrix
        comparison_col = data.attribute_by_name("Percent_correct").index
        tester.instances = data
        self.assertGreater(len(tester.header(comparison_col)),
                           0,
                           msg="Generated no header")
        self.assertGreater(len(tester.multi_resultset_full(0, comparison_col)),
                           0,
                           msg="Generated no result")

        # dimensions
        self.assertEqual(2, matrix.rows, msg="# of rows differ")
        self.assertEqual(2, matrix.columns, msg="# of rows differ")

        # cols
        self.assertTrue(matrix.get_col_name(0).find("ZeroR") > -1,
                        msg="ZeroR should be part of col name")
        self.assertTrue(matrix.get_col_name(1).find("J48") > -1,
                        msg="J48 should be part of col name")
        self.assertIsNone(matrix.get_col_name(2),
                          msg="Column name should not exist")
        matrix.set_col_name(0, "zeror")
        self.assertTrue(matrix.get_col_name(0).find("zeror") > -1,
                        msg="zeror should be part of col name")

        self.assertFalse(matrix.is_col_hidden(1),
                         msg="Column should be visible")
        matrix.hide_col(1)
        self.assertTrue(matrix.is_col_hidden(1), msg="Column should be hidden")
        matrix.show_col(1)
        self.assertFalse(matrix.is_col_hidden(1),
                         msg="Column should be visible again")

        # rows
        self.assertEqual("iris",
                         matrix.get_row_name(0),
                         msg="Row names differ")
        self.assertEqual("anneal",
                         matrix.get_row_name(1),
                         msg="Row names differ")
        self.assertIsNone(matrix.get_col_name(2),
                          msg="Row name should not exist")
        matrix.set_row_name(0, "IRIS")
        self.assertEqual("IRIS",
                         matrix.get_row_name(0),
                         msg="Row names differ")

        self.assertFalse(matrix.is_row_hidden(1), msg="Row should be visible")
        matrix.hide_row(1)
        self.assertTrue(matrix.is_row_hidden(1), msg="Row should be hidden")
        matrix.show_row(1)
        self.assertFalse(matrix.is_row_hidden(1),
                         msg="Row should be visible again")

        # mean
        self.assertAlmostEqual(33.3,
                               matrix.get_mean(0, 0),
                               places=1,
                               msg="Means differ")
        self.assertAlmostEqual(54.75,
                               matrix.average(0),
                               places=2,
                               msg="Averages differ")
        matrix.set_mean(0, 0, 10)
        self.assertAlmostEqual(10.0,
                               matrix.get_mean(0, 0),
                               places=1,
                               msg="Means differ")

        # stdev
        self.assertAlmostEqual(0.0,
                               matrix.get_stdev(0, 0),
                               places=1,
                               msg="Means differ")
        matrix.set_stdev(0, 0, 0.3)
        self.assertAlmostEqual(0.3,
                               matrix.get_stdev(0, 0),
                               places=1,
                               msg="Means differ")