コード例 #1
0
def use_classifier(data, cli, args):
    cli = cli.format(cli, **args)
    cls = from_commandline(cli, classname="weka.classifiers.Classifier")
    cls.build_classifier(data)
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(cls, data, 10, Random(1))
    return cls, evaluation
コード例 #2
0
 def test_from_and_to_commandline(self):
     """
     Tests the from_commandline and to_commandline methods.
     """
     cmdline = "weka.classifiers.trees.J48 -C 0.3 -M 4"
     cls = classes.from_commandline(
         cmdline=cmdline, classname="weka.classifiers.Classifier")
     self.assertIsNotNone(cls)
     self.assertEqual(cmdline, cls.to_commandline())
コード例 #3
0
def use_classifier(data_filename, cli):
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_filename)
    data.class_is_last()
    cls = from_commandline(cli, classname="weka.classifiers.Classifier")
    cls.build_classifier(data)
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(cls, data, 10, Random(1))
    return cls, evaluation
コード例 #4
0
 def test_from_and_to_commandline(self):
     """
     Tests the from_commandline and to_commandline methods.
     """
     cmdline = "weka.classifiers.trees.J48 -C 0.3 -M 4"
     cls = classes.from_commandline(
         cmdline=cmdline, classname="weka.classifiers.Classifier")
     self.assertIsNotNone(cls)
     self.assertEqual(cmdline, cls.to_commandline())
コード例 #5
0
 def convert(self):
     """
     Performs the actual conversion.
     :return: None if successful, otherwise errors message
     :rtype: str
     """
     cname = str(self.config["wrapper"])
     self._output = classes.from_commandline(self._input, classname=cname)
     return None
コード例 #6
0
 def convert(self):
     """
     Performs the actual conversion.
     :return: None if successful, otherwise errors message
     :rtype: str
     """
     cname = str(self.config["wrapper"])
     self._output = classes.from_commandline(self._input, classname=cname)
     return None
コード例 #7
0
    def setup(self):
        """
        Initializes the experiment.
        """
        # basic options
        javabridge.call(
            self.jobject, "setPropertyArray", "(Ljava/lang/Object;)V",
            javabridge.get_env().make_object_array(0, javabridge.get_env().find_class("weka/classifiers/Classifier")))
        javabridge.call(
            self.jobject, "setUsePropertyIterator", "(Z)V", True)
        javabridge.call(
            self.jobject, "setRunLower", "(I)V", 1)
        javabridge.call(
            self.jobject, "setRunUpper", "(I)V", self.runs)

        # setup result producer
        rproducer, prop_path = self.configure_resultproducer()
        javabridge.call(
            self.jobject, "setResultProducer", "(Lweka/experiment/ResultProducer;)V", rproducer)
        javabridge.call(
            self.jobject, "setPropertyPath", "([Lweka/experiment/PropertyNode;)V", prop_path)

        # classifiers
        classifiers = javabridge.get_env().make_object_array(
            len(self.classifiers), javabridge.get_env().find_class("weka/classifiers/Classifier"))
        for i, classifier in enumerate(self.classifiers):
            if type(classifier) is Classifier:
                javabridge.get_env().set_object_array_element(
                    classifiers, i, classifier.jobject)
            else:
                javabridge.get_env().set_object_array_element(
                    classifiers, i, from_commandline(classifier).jobject)
        javabridge.call(
            self.jobject, "setPropertyArray", "(Ljava/lang/Object;)V",
            classifiers)

        # datasets
        datasets = javabridge.make_instance("javax/swing/DefaultListModel", "()V")
        for dataset in self.datasets:
            f = javabridge.make_instance("java/io/File", "(Ljava/lang/String;)V", dataset)
            javabridge.call(datasets, "addElement", "(Ljava/lang/Object;)V", f)
        javabridge.call(
            self.jobject, "setDatasets", "(Ljavax/swing/DefaultListModel;)V", datasets)

        # output file
        if str(self.result).lower().endswith(".arff"):
            rlistener = javabridge.make_instance("weka/experiment/InstancesResultListener", "()V")
        elif str(self.result).lower().endswith(".csv"):
            rlistener = javabridge.make_instance("weka/experiment/CSVResultListener", "()V")
        else:
            raise Exception("Unhandled output format for results: " + self.result)
        rfile = javabridge.make_instance("java/io/File", "(Ljava/lang/String;)V", self.result)
        javabridge.call(
            rlistener, "setOutputFile", "(Ljava/io/File;)V", rfile)
        javabridge.call(
            self.jobject, "setResultListener", "(Lweka/experiment/ResultListener;)V", rlistener)
コード例 #8
0
    def setup(self):
        """
        Initializes the experiment.
        """
        # basic options
        javabridge.call(
            self.jobject, "setPropertyArray", "(Ljava/lang/Object;)V",
            javabridge.get_env().make_object_array(0, javabridge.get_env().find_class("weka/classifiers/Classifier")))
        javabridge.call(
            self.jobject, "setUsePropertyIterator", "(Z)V", True)
        javabridge.call(
            self.jobject, "setRunLower", "(I)V", 1)
        javabridge.call(
            self.jobject, "setRunUpper", "(I)V", self.runs)

        # setup result producer
        rproducer, prop_path = self.configure_resultproducer()
        javabridge.call(
            self.jobject, "setResultProducer", "(Lweka/experiment/ResultProducer;)V", rproducer)
        javabridge.call(
            self.jobject, "setPropertyPath", "([Lweka/experiment/PropertyNode;)V", prop_path)

        # classifiers
        classifiers = javabridge.get_env().make_object_array(
            len(self.classifiers), javabridge.get_env().find_class("weka/classifiers/Classifier"))
        for i, classifier in enumerate(self.classifiers):
            if type(classifier) is Classifier:
                javabridge.get_env().set_object_array_element(
                    classifiers, i, classifier.jobject)
            else:
                javabridge.get_env().set_object_array_element(
                    classifiers, i, from_commandline(classifier).jobject)
        javabridge.call(
            self.jobject, "setPropertyArray", "(Ljava/lang/Object;)V",
            classifiers)

        # datasets
        datasets = javabridge.make_instance("javax/swing/DefaultListModel", "()V")
        for dataset in self.datasets:
            f = javabridge.make_instance("java/io/File", "(Ljava/lang/String;)V", dataset)
            javabridge.call(datasets, "addElement", "(Ljava/lang/Object;)V", f)
        javabridge.call(
            self.jobject, "setDatasets", "(Ljavax/swing/DefaultListModel;)V", datasets)

        # output file
        if str(self.result).lower().endswith(".arff"):
            rlistener = javabridge.make_instance("weka/experiment/InstancesResultListener", "()V")
        elif str(self.result).lower().endswith(".csv"):
            rlistener = javabridge.make_instance("weka/experiment/CSVResultListener", "()V")
        else:
            raise Exception("Unhandled output format for results: " + self.result)
        rfile = javabridge.make_instance("java/io/File", "(Ljava/lang/String;)V", self.result)
        javabridge.call(
            rlistener, "setOutputFile", "(Ljava/io/File;)V", rfile)
        javabridge.call(
            self.jobject, "setResultListener", "(Lweka/experiment/ResultListener;)V", rlistener)
コード例 #9
0
 def make_copy(cls, generator):
     """
     Creates a copy of the generator.
     :param generator: the generator to copy
     :type generator: DataGenerator
     :return: the copy of the generator
     :rtype: DataGenerator
     """
     return from_commandline(
         to_commandline(generator), classname=classes.get_classname(DataGenerator()))
コード例 #10
0
 def from_config(self, k, v):
     """
     Hook method that allows converting values from the dictionary
     :param k: the key in the dictionary
     :type k: str
     :param v: the value
     :type v: object
     :return: the potentially parsed value
     :rtype: object
     """
     if k == "setup":
         return from_commandline(v, classname=to_commandline(datagen.DataGenerator()))
     return super(DataGenerator, self).from_config(k, v)
コード例 #11
0
ファイル: source.py プロジェクト: akshat0109/kisan_backend
    def from_config(self, k, v):
        """
        Hook method that allows converting values from the dictionary.

        :param k: the key in the dictionary
        :type k: str
        :param v: the value
        :type v: object
        :return: the potentially parsed value
        :rtype: object
        """
        if k == "setup":
            return from_commandline(v, classname=to_commandline(datagen.DataGenerator()))
        return super(DataGenerator, self).from_config(k, v)
コード例 #12
0
def train(option, sym, num):
    # load dataset given the symbol
    path = os.path.join('HistSet', 'histSet_%s.arff' % sym)
    loader = Loader("weka.core.converters.ArffLoader")
    dataset = loader.load_file(path)
    dataset.class_is_last()  # set the last attribute as class attribute

    # load testset
    # testset = loader.load_file(os.path.join('HistSet', 'testSet_LTC.arff'))
    # testset.class_is_last()

    # define classifier
    cmd = {
        'DecisionTable':
        'weka.classifiers.rules.DecisionTable -X 1 -S "weka.attributeSelection.BestFirst -D 1 -N 5"',
        'SMOreg':
        'weka.classifiers.functions.SMOreg -C 1.0 -N 0 -I "weka.classifiers.functions.supportVector.RegSMOImproved -L 0.001 -W 1 -P 1.0E-12 -T 0.001 -V" -K "weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0"',
        'LinearRegression':
        'weka.classifiers.functions.LinearRegression -S 0 -R 1.0E-8',
        'GaussianProcesses':
        'weka.classifiers.functions.GaussianProcesses -L 1.0 -N 0 -K "weka.classifiers.functions.supportVector.RBFKernel -C 250007 -G 1.0"',
    }

    cls = from_commandline(cmd[option],
                           classname='weka.classifiers.Classifier')
    cls.build_classifier(dataset)

    # begin evaluating
    evaluation = Evaluation(dataset)

    # evaluation.evaluate_train_test_split(cls, dataset, 90, Random(1))   # evaluate by splitting train/test set
    evl = evaluation.test_model(cls, dataset)  # evaluate on test set
    print('predictions (' + str(len(evl)) + '): ')
    for i in range(num):
        print(evl[i - num], end=' ')
    # print(evaluation.summary())
    return evl[-num:]
コード例 #13
0
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()

    # classifier help
    helper.print_title("Creating help string")
    classifier = Classifier(classname="weka.classifiers.trees.J48")
    print(classifier.to_help())

    # partial classname
    helper.print_title("Creating classifier from partial classname")
    clsname = ".J48"
    classifier = Classifier(classname=clsname)
    print(clsname + " --> " + classifier.classname)

    # classifier from commandline
    helper.print_title("Creating SMO from command-line string")
    cmdline = 'weka.classifiers.functions.SMO -K "weka.classifiers.functions.supportVector.NormalizedPolyKernel -E 3.0"'
    classifier = from_commandline(cmdline,
                                  classname="weka.classifiers.Classifier")
    classifier.build_classifier(iris_data)
    print("input: " + cmdline)
    print("output: " + classifier.to_commandline())
    print("model:\n" + str(classifier))

    # kernel classifier
    helper.print_title("Creating SMO as KernelClassifier")
    kernel = Kernel(
        classname="weka.classifiers.functions.supportVector.RBFKernel",
        options=["-G", "0.001"])
    classifier = KernelClassifier(classname="weka.classifiers.functions.SMO",
                                  options=["-M"])
    classifier.kernel = kernel
    classifier.build_classifier(iris_data)
    print("classifier: " + classifier.to_commandline())
    print("model:\n" + str(classifier))

    # build a classifier and output model
    helper.print_title("Training J48 classifier on iris")
    classifier = Classifier(classname="weka.classifiers.trees.J48")
    # Instead of using 'options=["-C", "0.3"]' in the constructor, we can also set the "confidenceFactor"
    # property of the J48 classifier itself. However, being of type float rather than double, we need
    # to convert it to the correct type first using the double_to_float function:
    classifier.set_property("confidenceFactor", types.double_to_float(0.3))
    classifier.build_classifier(iris_data)
    print(classifier)
    print(classifier.graph)
    print(classifier.to_source("MyJ48"))
    plot_graph.plot_dot_graph(classifier.graph)

    # evaluate model on test set
    helper.print_title("Evaluating J48 classifier on iris")
    evaluation = Evaluation(iris_data)
    evl = evaluation.test_model(classifier, iris_data)
    print(evl)
    print(evaluation.summary())

    # evaluate model on train/test split
    helper.print_title("Evaluating J48 classifier on iris (random split 66%)")
    classifier = Classifier(classname="weka.classifiers.trees.J48",
                            options=["-C", "0.3"])
    evaluation = Evaluation(iris_data)
    evaluation.evaluate_train_test_split(classifier, iris_data, 66.0,
                                         Random(1))
    print(evaluation.summary())

    # load a dataset incrementally and build classifier incrementally
    helper.print_title("Build classifier incrementally on iris")
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_inc = loader.load_file(iris_file, incremental=True)
    iris_inc.class_is_last()
    classifier = Classifier(
        classname="weka.classifiers.bayes.NaiveBayesUpdateable")
    classifier.build_classifier(iris_inc)
    for inst in loader:
        classifier.update_classifier(inst)
    print(classifier)

    # construct meta-classifiers
    helper.print_title("Meta classifiers")
    # generic FilteredClassifier instantiation
    print("generic FilteredClassifier instantiation")
    meta = SingleClassifierEnhancer(
        classname="weka.classifiers.meta.FilteredClassifier")
    meta.classifier = Classifier(
        classname="weka.classifiers.functions.LinearRegression")
    flter = Filter("weka.filters.unsupervised.attribute.Remove")
    flter.options = ["-R", "first"]
    meta.set_property("filter", flter.jobject)
    print(meta.to_commandline())
    # direct FilteredClassifier instantiation
    print("direct FilteredClassifier instantiation")
    meta = FilteredClassifier()
    meta.classifier = Classifier(
        classname="weka.classifiers.functions.LinearRegression")
    flter = Filter("weka.filters.unsupervised.attribute.Remove")
    flter.options = ["-R", "first"]
    meta.filter = flter
    print(meta.to_commandline())
    # generic Vote
    print("generic Vote instantiation")
    meta = MultipleClassifiersCombiner(classname="weka.classifiers.meta.Vote")
    classifiers = [
        Classifier(classname="weka.classifiers.functions.SMO"),
        Classifier(classname="weka.classifiers.trees.J48")
    ]
    meta.classifiers = classifiers
    print(meta.to_commandline())

    # cross-validate nominal classifier
    helper.print_title("Cross-validating NaiveBayes on diabetes")
    diabetes_file = helper.get_data_dir() + os.sep + "diabetes.arff"
    helper.print_info("Loading dataset: " + diabetes_file)
    loader = Loader("weka.core.converters.ArffLoader")
    diabetes_data = loader.load_file(diabetes_file)
    diabetes_data.class_is_last()
    classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayes")
    pred_output = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText",
        options=["-distribution"])
    evaluation = Evaluation(diabetes_data)
    evaluation.crossvalidate_model(classifier,
                                   diabetes_data,
                                   10,
                                   Random(42),
                                   output=pred_output)
    print(evaluation.summary())
    print(evaluation.class_details())
    print(evaluation.matrix())
    print("areaUnderPRC/0: " + str(evaluation.area_under_prc(0)))
    print("weightedAreaUnderPRC: " + str(evaluation.weighted_area_under_prc))
    print("areaUnderROC/1: " + str(evaluation.area_under_roc(1)))
    print("weightedAreaUnderROC: " + str(evaluation.weighted_area_under_roc))
    print("avgCost: " + str(evaluation.avg_cost))
    print("totalCost: " + str(evaluation.total_cost))
    print("confusionMatrix: " + str(evaluation.confusion_matrix))
    print("correct: " + str(evaluation.correct))
    print("pctCorrect: " + str(evaluation.percent_correct))
    print("incorrect: " + str(evaluation.incorrect))
    print("pctIncorrect: " + str(evaluation.percent_incorrect))
    print("unclassified: " + str(evaluation.unclassified))
    print("pctUnclassified: " + str(evaluation.percent_unclassified))
    print("coverageOfTestCasesByPredictedRegions: " +
          str(evaluation.coverage_of_test_cases_by_predicted_regions))
    print("sizeOfPredictedRegions: " +
          str(evaluation.size_of_predicted_regions))
    print("falseNegativeRate: " + str(evaluation.false_negative_rate(1)))
    print("weightedFalseNegativeRate: " +
          str(evaluation.weighted_false_negative_rate))
    print("numFalseNegatives: " + str(evaluation.num_false_negatives(1)))
    print("trueNegativeRate: " + str(evaluation.true_negative_rate(1)))
    print("weightedTrueNegativeRate: " +
          str(evaluation.weighted_true_negative_rate))
    print("numTrueNegatives: " + str(evaluation.num_true_negatives(1)))
    print("falsePositiveRate: " + str(evaluation.false_positive_rate(1)))
    print("weightedFalsePositiveRate: " +
          str(evaluation.weighted_false_positive_rate))
    print("numFalsePositives: " + str(evaluation.num_false_positives(1)))
    print("truePositiveRate: " + str(evaluation.true_positive_rate(1)))
    print("weightedTruePositiveRate: " +
          str(evaluation.weighted_true_positive_rate))
    print("numTruePositives: " + str(evaluation.num_true_positives(1)))
    print("fMeasure: " + str(evaluation.f_measure(1)))
    print("weightedFMeasure: " + str(evaluation.weighted_f_measure))
    print("unweightedMacroFmeasure: " +
          str(evaluation.unweighted_macro_f_measure))
    print("unweightedMicroFmeasure: " +
          str(evaluation.unweighted_micro_f_measure))
    print("precision: " + str(evaluation.precision(1)))
    print("weightedPrecision: " + str(evaluation.weighted_precision))
    print("recall: " + str(evaluation.recall(1)))
    print("weightedRecall: " + str(evaluation.weighted_recall))
    print("kappa: " + str(evaluation.kappa))
    print("KBInformation: " + str(evaluation.kb_information))
    print("KBMeanInformation: " + str(evaluation.kb_mean_information))
    print("KBRelativeInformation: " + str(evaluation.kb_relative_information))
    print("SFEntropyGain: " + str(evaluation.sf_entropy_gain))
    print("SFMeanEntropyGain: " + str(evaluation.sf_mean_entropy_gain))
    print("SFMeanPriorEntropy: " + str(evaluation.sf_mean_prior_entropy))
    print("SFMeanSchemeEntropy: " + str(evaluation.sf_mean_scheme_entropy))
    print("matthewsCorrelationCoefficient: " +
          str(evaluation.matthews_correlation_coefficient(1)))
    print("weightedMatthewsCorrelation: " +
          str(evaluation.weighted_matthews_correlation))
    print("class priors: " + str(evaluation.class_priors))
    print("numInstances: " + str(evaluation.num_instances))
    print("meanAbsoluteError: " + str(evaluation.mean_absolute_error))
    print("meanPriorAbsoluteError: " +
          str(evaluation.mean_prior_absolute_error))
    print("relativeAbsoluteError: " + str(evaluation.relative_absolute_error))
    print("rootMeanSquaredError: " + str(evaluation.root_mean_squared_error))
    print("rootMeanPriorSquaredError: " +
          str(evaluation.root_mean_prior_squared_error))
    print("rootRelativeSquaredError: " +
          str(evaluation.root_relative_squared_error))
    print("prediction output:\n" + str(pred_output))
    plot_cls.plot_roc(evaluation,
                      title="ROC diabetes",
                      class_index=range(
                          0, diabetes_data.class_attribute.num_values),
                      wait=False)
    plot_cls.plot_prc(evaluation,
                      title="PRC diabetes",
                      class_index=range(
                          0, diabetes_data.class_attribute.num_values),
                      wait=False)

    # train 2nd classifier on diabetes dataset
    classifier2 = Classifier(classname="weka.classifiers.trees.RandomForest")
    evaluation2 = Evaluation(diabetes_data)
    evaluation2.crossvalidate_model(classifier2, diabetes_data, 10, Random(42))
    plot_cls.plot_rocs({
        "NB": evaluation,
        "RF": evaluation2
    },
                       title="ROC diabetes",
                       class_index=0,
                       wait=False)
    plot_cls.plot_prcs({
        "NB": evaluation,
        "RF": evaluation2
    },
                       title="PRC diabetes",
                       class_index=0,
                       wait=False)

    # load a numeric dataset
    bolts_file = helper.get_data_dir() + os.sep + "bolts.arff"
    helper.print_info("Loading dataset: " + bolts_file)
    loader = Loader("weka.core.converters.ArffLoader")
    bolts_data = loader.load_file(bolts_file)
    bolts_data.class_is_last()

    # build a classifier and output model
    helper.print_title("Training LinearRegression on bolts")
    classifier = Classifier(
        classname="weka.classifiers.functions.LinearRegression",
        options=["-S", "1", "-C"])
    classifier.build_classifier(bolts_data)
    print(classifier)

    # cross-validate numeric classifier
    helper.print_title("Cross-validating LinearRegression on bolts")
    classifier = Classifier(
        classname="weka.classifiers.functions.LinearRegression",
        options=["-S", "1", "-C"])
    evaluation = Evaluation(bolts_data)
    evaluation.crossvalidate_model(classifier, bolts_data, 10, Random(42))
    print(evaluation.summary())
    print("correlationCoefficient: " + str(evaluation.correlation_coefficient))
    print("errorRate: " + str(evaluation.error_rate))
    helper.print_title("Header - bolts")
    print(str(evaluation.header))
    helper.print_title("Predictions on bolts")
    for index, pred in enumerate(evaluation.predictions):
        print(
            str(index + 1) + ": " + str(pred) + " -> error=" + str(pred.error))
    plot_cls.plot_classifier_errors(evaluation.predictions, wait=False)

    # train 2nd classifier and show errors in same plot
    classifier2 = Classifier(classname="weka.classifiers.functions.SMOreg")
    evaluation2 = Evaluation(bolts_data)
    evaluation2.crossvalidate_model(classifier2, bolts_data, 10, Random(42))
    plot_cls.plot_classifier_errors(
        {
            "LR": evaluation.predictions,
            "SMOreg": evaluation2.predictions
        },
        wait=False)

    # learning curve
    cls = [
        Classifier(classname="weka.classifiers.trees.J48"),
        Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")
    ]
    plot_cls.plot_learning_curve(cls,
                                 diabetes_data,
                                 increments=0.05,
                                 label_template="[#] !",
                                 metric="percent_correct",
                                 wait=True)

    # access classifier's Java API
    labor_file = helper.get_data_dir() + os.sep + "labor.arff"
    helper.print_info("Loading dataset: " + labor_file)
    loader = Loader("weka.core.converters.ArffLoader")
    labor_data = loader.load_file(labor_file)
    labor_data.class_is_last()

    helper.print_title("Using JRip's Java API to access rules")
    jrip = Classifier(classname="weka.classifiers.rules.JRip")
    jrip.build_classifier(labor_data)
    rset = jrip.jwrapper.getRuleset()
    for i in xrange(rset.size()):
        r = rset.get(i)
        print(str(r.toString(labor_data.class_attribute.jobject)))
コード例 #14
0
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()

    # classifier help
    helper.print_title("Creating help string")
    classifier = Classifier(classname="weka.classifiers.trees.J48")
    print(classifier.to_help())

    # partial classname
    helper.print_title("Creating classifier from partial classname")
    clsname = ".J48"
    classifier = Classifier(classname=clsname)
    print(clsname + " --> " + classifier.classname)

    # classifier from commandline
    helper.print_title("Creating SMO from command-line string")
    cmdline = 'weka.classifiers.functions.SMO -K "weka.classifiers.functions.supportVector.NormalizedPolyKernel -E 3.0"'
    classifier = from_commandline(cmdline, classname="weka.classifiers.Classifier")
    classifier.build_classifier(iris_data)
    print("input: " + cmdline)
    print("output: " + classifier.to_commandline())
    print("model:\n" + str(classifier))

    # kernel classifier
    helper.print_title("Creating SMO as KernelClassifier")
    kernel = Kernel(classname="weka.classifiers.functions.supportVector.RBFKernel", options=["-G", "0.001"])
    classifier = KernelClassifier(classname="weka.classifiers.functions.SMO", options=["-M"])
    classifier.kernel = kernel
    classifier.build_classifier(iris_data)
    print("classifier: " + classifier.to_commandline())
    print("model:\n" + str(classifier))

    # build a classifier and output model
    helper.print_title("Training J48 classifier on iris")
    classifier = Classifier(classname="weka.classifiers.trees.J48")
    # Instead of using 'options=["-C", "0.3"]' in the constructor, we can also set the "confidenceFactor"
    # property of the J48 classifier itself. However, being of type float rather than double, we need
    # to convert it to the correct type first using the double_to_float function:
    classifier.set_property("confidenceFactor", typeconv.double_to_float(0.3))
    classifier.build_classifier(iris_data)
    print(classifier)
    print(classifier.graph)
    print(classifier.to_source("MyJ48"))
    plot_graph.plot_dot_graph(classifier.graph)

    # evaluate model on test set
    helper.print_title("Evaluating J48 classifier on iris")
    evaluation = Evaluation(iris_data)
    evl = evaluation.test_model(classifier, iris_data)
    print(evl)
    print(evaluation.summary())

    # evaluate model on train/test split
    helper.print_title("Evaluating J48 classifier on iris (random split 66%)")
    classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
    evaluation = Evaluation(iris_data)
    evaluation.evaluate_train_test_split(classifier, iris_data, 66.0, Random(1))
    print(evaluation.summary())

    # load a dataset incrementally and build classifier incrementally
    helper.print_title("Build classifier incrementally on iris")
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_inc = loader.load_file(iris_file, incremental=True)
    iris_inc.class_is_last()
    classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")
    classifier.build_classifier(iris_inc)
    for inst in loader:
        classifier.update_classifier(inst)
    print(classifier)

    # construct meta-classifiers
    helper.print_title("Meta classifiers")
    # generic FilteredClassifier instantiation
    print("generic FilteredClassifier instantiation")
    meta = SingleClassifierEnhancer(classname="weka.classifiers.meta.FilteredClassifier")
    meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression")
    flter = Filter("weka.filters.unsupervised.attribute.Remove")
    flter.options = ["-R", "first"]
    meta.set_property("filter", flter.jobject)
    print(meta.to_commandline())
    # direct FilteredClassifier instantiation
    print("direct FilteredClassifier instantiation")
    meta = FilteredClassifier()
    meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression")
    flter = Filter("weka.filters.unsupervised.attribute.Remove")
    flter.options = ["-R", "first"]
    meta.filter = flter
    print(meta.to_commandline())
    # generic Vote
    print("generic Vote instantiation")
    meta = MultipleClassifiersCombiner(classname="weka.classifiers.meta.Vote")
    classifiers = [
        Classifier(classname="weka.classifiers.functions.SMO"),
        Classifier(classname="weka.classifiers.trees.J48")
    ]
    meta.classifiers = classifiers
    print(meta.to_commandline())

    # cross-validate nominal classifier
    helper.print_title("Cross-validating NaiveBayes on diabetes")
    diabetes_file = helper.get_data_dir() + os.sep + "diabetes.arff"
    helper.print_info("Loading dataset: " + diabetes_file)
    loader = Loader("weka.core.converters.ArffLoader")
    diabetes_data = loader.load_file(diabetes_file)
    diabetes_data.class_is_last()
    classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayes")
    pred_output = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText", options=["-distribution"])
    evaluation = Evaluation(diabetes_data)
    evaluation.crossvalidate_model(classifier, diabetes_data, 10, Random(42), output=pred_output)
    print(evaluation.summary())
    print(evaluation.class_details())
    print(evaluation.matrix())
    print("areaUnderPRC/0: " + str(evaluation.area_under_prc(0)))
    print("weightedAreaUnderPRC: " + str(evaluation.weighted_area_under_prc))
    print("areaUnderROC/1: " + str(evaluation.area_under_roc(1)))
    print("weightedAreaUnderROC: " + str(evaluation.weighted_area_under_roc))
    print("avgCost: " + str(evaluation.avg_cost))
    print("totalCost: " + str(evaluation.total_cost))
    print("confusionMatrix: " + str(evaluation.confusion_matrix))
    print("correct: " + str(evaluation.correct))
    print("pctCorrect: " + str(evaluation.percent_correct))
    print("incorrect: " + str(evaluation.incorrect))
    print("pctIncorrect: " + str(evaluation.percent_incorrect))
    print("unclassified: " + str(evaluation.unclassified))
    print("pctUnclassified: " + str(evaluation.percent_unclassified))
    print("coverageOfTestCasesByPredictedRegions: " + str(evaluation.coverage_of_test_cases_by_predicted_regions))
    print("sizeOfPredictedRegions: " + str(evaluation.size_of_predicted_regions))
    print("falseNegativeRate: " + str(evaluation.false_negative_rate(1)))
    print("weightedFalseNegativeRate: " + str(evaluation.weighted_false_negative_rate))
    print("numFalseNegatives: " + str(evaluation.num_false_negatives(1)))
    print("trueNegativeRate: " + str(evaluation.true_negative_rate(1)))
    print("weightedTrueNegativeRate: " + str(evaluation.weighted_true_negative_rate))
    print("numTrueNegatives: " + str(evaluation.num_true_negatives(1)))
    print("falsePositiveRate: " + str(evaluation.false_positive_rate(1)))
    print("weightedFalsePositiveRate: " + str(evaluation.weighted_false_positive_rate))
    print("numFalsePositives: " + str(evaluation.num_false_positives(1)))
    print("truePositiveRate: " + str(evaluation.true_positive_rate(1)))
    print("weightedTruePositiveRate: " + str(evaluation.weighted_true_positive_rate))
    print("numTruePositives: " + str(evaluation.num_true_positives(1)))
    print("fMeasure: " + str(evaluation.f_measure(1)))
    print("weightedFMeasure: " + str(evaluation.weighted_f_measure))
    print("unweightedMacroFmeasure: " + str(evaluation.unweighted_macro_f_measure))
    print("unweightedMicroFmeasure: " + str(evaluation.unweighted_micro_f_measure))
    print("precision: " + str(evaluation.precision(1)))
    print("weightedPrecision: " + str(evaluation.weighted_precision))
    print("recall: " + str(evaluation.recall(1)))
    print("weightedRecall: " + str(evaluation.weighted_recall))
    print("kappa: " + str(evaluation.kappa))
    print("KBInformation: " + str(evaluation.kb_information))
    print("KBMeanInformation: " + str(evaluation.kb_mean_information))
    print("KBRelativeInformation: " + str(evaluation.kb_relative_information))
    print("SFEntropyGain: " + str(evaluation.sf_entropy_gain))
    print("SFMeanEntropyGain: " + str(evaluation.sf_mean_entropy_gain))
    print("SFMeanPriorEntropy: " + str(evaluation.sf_mean_prior_entropy))
    print("SFMeanSchemeEntropy: " + str(evaluation.sf_mean_scheme_entropy))
    print("matthewsCorrelationCoefficient: " + str(evaluation.matthews_correlation_coefficient(1)))
    print("weightedMatthewsCorrelation: " + str(evaluation.weighted_matthews_correlation))
    print("class priors: " + str(evaluation.class_priors))
    print("numInstances: " + str(evaluation.num_instances))
    print("meanAbsoluteError: " + str(evaluation.mean_absolute_error))
    print("meanPriorAbsoluteError: " + str(evaluation.mean_prior_absolute_error))
    print("relativeAbsoluteError: " + str(evaluation.relative_absolute_error))
    print("rootMeanSquaredError: " + str(evaluation.root_mean_squared_error))
    print("rootMeanPriorSquaredError: " + str(evaluation.root_mean_prior_squared_error))
    print("rootRelativeSquaredError: " + str(evaluation.root_relative_squared_error))
    print("prediction output:\n" + str(pred_output))
    plot_cls.plot_roc(
        evaluation, title="ROC diabetes",
        class_index=range(0, diabetes_data.class_attribute.num_values), wait=False)
    plot_cls.plot_prc(
        evaluation, title="PRC diabetes",
        class_index=range(0, diabetes_data.class_attribute.num_values), wait=False)

    # load a numeric dataset
    bolts_file = helper.get_data_dir() + os.sep + "bolts.arff"
    helper.print_info("Loading dataset: " + bolts_file)
    loader = Loader("weka.core.converters.ArffLoader")
    bolts_data = loader.load_file(bolts_file)
    bolts_data.class_is_last()

    # build a classifier and output model
    helper.print_title("Training LinearRegression on bolts")
    classifier = Classifier(classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"])
    classifier.build_classifier(bolts_data)
    print(classifier)

    # cross-validate numeric classifier
    helper.print_title("Cross-validating LinearRegression on bolts")
    classifier = Classifier(classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"])
    evaluation = Evaluation(bolts_data)
    evaluation.crossvalidate_model(classifier, bolts_data, 10, Random(42))
    print(evaluation.summary())
    print("correlationCoefficient: " + str(evaluation.correlation_coefficient))
    print("errorRate: " + str(evaluation.error_rate))
    helper.print_title("Header - bolts")
    print(str(evaluation.header))
    helper.print_title("Predictions on bolts")
    for index, pred in enumerate(evaluation.predictions):
        print(str(index+1) + ": " + str(pred) + " -> error=" + str(pred.error))
    plot_cls.plot_classifier_errors(evaluation.predictions, wait=False)

    # learning curve
    cls = [
        Classifier(classname="weka.classifiers.trees.J48"),
        Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")]
    plot_cls.plot_learning_curve(
        cls, diabetes_data, increments=0.05, label_template="[#] !", metric="percent_correct", wait=True)

    # access classifier's Java API
    labor_file = helper.get_data_dir() + os.sep + "labor.arff"
    helper.print_info("Loading dataset: " + labor_file)
    loader = Loader("weka.core.converters.ArffLoader")
    labor_data = loader.load_file(labor_file)
    labor_data.class_is_last()

    helper.print_title("Using JRip's Java API to access rules")
    jrip = Classifier(classname="weka.classifiers.rules.JRip")
    jrip.build_classifier(labor_data)
    rset = jrip.jwrapper.getRuleset()
    for i in range(rset.size()):
        r = rset.get(i)
        print(str(r.toString(labor_data.class_attribute.jobject)))
コード例 #15
0
    
    # remove class attribute
    print(f"Numeric to nominals")
    remove = Filter(classname="weka.filters.unsupervised.attribute.NumericToNominal", options=["-R", "8,11,12,13,14,25"])
    remove.inputformat(data)
    filtered = remove.filter(data)
    filtered.class_is_last()
    print(f"[Done] Numeric to nominals")

    attributes = (filtered.attribute_stats(filtered.num_attributes - 1))
    num_class1 = (attributes.nominal_counts[0])
    num_class2 = (attributes.nominal_counts[1])
    
    input_config = f'weka.classifiers.trees.PBC4cip -S 1 -miner \"PRFramework.Core.SupervisedClassifiers.EmergingPatterns.Miners.RandomForestMinerWithoutFiltering -bagSizePercent 100 -numFeatures -1 -numTrees {numTrees} -builder \\\"PRFramework.Core.SupervisedClassifiers.DecisionTrees.Builder.DecisionTreeBuilder -distributionEvaluator \\\\\\\"PRFramework.Core.SupervisedClassifiers.DecisionTrees.DistributionEvaluators.Hellinger \\\\\\\" -maxDepth {maxDepth} \\\\\\\"-minimalObjByLeaf \\\\\\\" {minObjects} -minimalSplitGain 1.0E-30\\\"\"'

    classifier = from_commandline(input_config, classname="weka.classifiers.Classifier")

    print(">>> Building classifier...")
    start_time_1 = time.time()
    classifier.build_classifier(filtered)
    print(f">>> [Done] Bulding classifier. Time: {(time.time() - start_time_1)} seconds ---")

    print(">>> Serializing model...")
    start_time_1 = time.time()
    classifier.serialize(f"{path}/{file_name}.model")
    print(f">>> [Done] Serializing model. Time: {(time.time() - start_time_1)} seconds ---")

    print(">>> Generating big string...")
    start_time_1 = time.time()
    big_string = str(classifier).split("]")
    print(f">>> [Done] Generating big string. Time: {(time.time() - start_time_1)} seconds ---")
コード例 #16
0
 def train_bayes(self, train_set):
     self.model = from_commandline(self.cmdline,
                                   classname="weka.classifiers.Classifier")
     self.model.build_classifier(train_set)