def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()

    # train classifier
    classifier = Classifier("weka.classifiers.trees.J48")
    classifier.build_classifier(iris_data)

    # save and read object
    helper.print_title("I/O: single object")
    outfile = tempfile.gettempdir() + os.sep + "j48.model"
    serialization.write(outfile, classifier)
    model = Classifier(jobject=serialization.read(outfile))
    print(model)

    # save classifier and dataset header (multiple objects)
    helper.print_title("I/O: single object")
    serialization.write_all(outfile, [classifier, Instances.template_instances(iris_data)])
    objects = serialization.read_all(outfile)
    for i, obj in enumerate(objects):
        helper.print_info("Object #" + str(i+1) + ":")
        if javabridge.get_env().is_instance_of(obj, javabridge.get_env().find_class("weka/core/Instances")):
            obj = Instances(jobject=obj)
        elif javabridge.get_env().is_instance_of(obj, javabridge.get_env().find_class("weka/classifiers/Classifier")):
            obj = Classifier(jobject=obj)
        print(obj)
Esempio n. 2
0
    def test_read_write_all(self):
        """
        Tests methods read_all and write_all.
        """
        fname = self.tempfile("readwrite.ser")
        self.delfile(fname)

        lin = []
        for i in range(4):
            lin.append(javabridge.make_instance("java/lang/Integer", "(I)V",
                                                i))
        serialization.write_all(fname, lin)
        self.assertTrue(os.path.exists(fname),
                        msg="Failed to write to " + fname + "?")

        lout = serialization.read_all(fname)
        self.assertIsNotNone(lout, msg="Failed to read from " + fname + "?")
        self.delfile(fname)
        self.assertEqual(len(lin), len(lout), msg="Number of elements differ")
        for i in range(len(lin)):
            iin = javabridge.call(lin[i], "intValue", "()I")
            iout = javabridge.call(lout[i], "intValue", "()I")
            self.assertEqual(iin,
                             iout,
                             msg="Input/output differ at #" + str(i))
Esempio n. 3
0
def save_model(model, data, filename):
    """Save the model to the target caching file.

    The caches should be defined in the config file. See README and
    config.sample for reference.

    Args:
        model(obj): The model to be saved. Should be a
            weka.classifier.Classifier object.
        data(obj): The training set to be cached.
        target(str): The target option in '[cached]' section in the config
            file.
        filename(str): The target file to save.

    Returns:
        True if the target caching is saved, otherwise False.
    """

    folder = os.path.join('caches', 'model')
    path = os.path.join(folder, filename + '.cache')
    build_if_not_exist(folder)
    serialization.write_all(path, [model, Instances.template_instances(data)])
    localizer_log.msg(
        "Saved cache of {target_name}.".format(target_name='model'))
    return True
Esempio n. 4
0
def create_model(input_file, output_file):
    # Load data
    data = converters.load_any_file(input_file)
    data.class_is_last()  # set class attribute

    # filter data
    print_title("Filtering Data")
    discretize = Filter(
        classname="weka.filters.unsupervised.attribute.Discretize",
        options=["-B", "10", "-M", "-1.0", "-R", "first-last"])
    discretize.inputformat(
        data)  # let the filter know about the type of data to filter
    filtered_data = discretize.filter(data)
    print("Done! (believe it or not)")

    print_title("Build Classifier")
    classifier = Classifier(classname="weka.classifiers.trees.RandomForest",
                            options=["-I", "100", "-K", "0", "-S", "1"])
    classifier.build_classifier(filtered_data)
    print("Done! (believe it or not)")
    serialization.write_all(output_file, [classifier, discretize])
    print("Model and filter saved to ", output_file)

    evaluation = Evaluation(data)  # initialize with priors
    evaluation.crossvalidate_model(classifier, filtered_data, 10,
                                   Random(42))  # 10-fold CV
    print(evaluation.summary())
    print("pctCorrect: " + str(evaluation.percent_correct))
    print("incorrect: " + str(evaluation.incorrect))
Esempio n. 5
0
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()

    # train classifier
    classifier = Classifier("weka.classifiers.trees.J48")
    classifier.build_classifier(iris_data)

    # save and read object
    helper.print_title("I/O: model (using serialization module)")
    outfile = tempfile.gettempdir() + os.sep + "j48.model"
    serialization.write(outfile, classifier)
    model = Classifier(jobject=serialization.read(outfile))
    print(model)

    # save classifier and dataset header (multiple objects)
    helper.print_title("I/O: model and header (using serialization module)")
    serialization.write_all(
        outfile,
        [classifier, Instances.template_instances(iris_data)])
    objects = serialization.read_all(outfile)
    for i, obj in enumerate(objects):
        helper.print_info("Object #" + str(i + 1) + ":")
        if javabridge.get_env().is_instance_of(
                obj,
                javabridge.get_env().find_class("weka/core/Instances")):
            obj = Instances(jobject=obj)
        elif javabridge.get_env().is_instance_of(
                obj,
                javabridge.get_env().find_class(
                    "weka/classifiers/Classifier")):
            obj = Classifier(jobject=obj)
        print(obj)

    # save and read object
    helper.print_title("I/O: just model (using Classifier class)")
    outfile = tempfile.gettempdir() + os.sep + "j48.model"
    classifier.serialize(outfile)
    model, _ = Classifier.deserialize(outfile)
    print(model)

    # save classifier and dataset header (multiple objects)
    helper.print_title("I/O: model and header (using Classifier class)")
    classifier.serialize(outfile, header=iris_data)
    model, header = Classifier.deserialize(outfile)
    print(model)
    if header is not None:
        print(header)
Esempio n. 6
0
 def do_execute(self):
     """
     The actual execution of the actor.
     :return: None if successful, otherwise error message
     :rtype: str
     """
     result = None
     cont = self.input.payload
     serialization.write_all(
         str(self.resolve_option("output")),
         [cont.get("Model").jobject, cont.get("Header").jobject])
     return result
Esempio n. 7
0
 def do_execute(self):
     """
     The actual execution of the actor.
     :return: None if successful, otherwise error message
     :rtype: str
     """
     result = None
     cont = self.input.payload
     serialization.write_all(
         str(self.resolve_option("output")),
         [cont.get("Model").jobject,
          cont.get("Header").jobject])
     return result
Esempio n. 8
0
def SimpleLogistic():
    # load a dataset
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file("First_trial_classification.arff")
    data.class_is_last()  # set class attribute

    cls = Classifier(classname="weka.classifiers.functions.SimpleLogistic")
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText")
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, 10, Random(486), pout)

    print(evl.summary())
    print(pout.buffer_content())

    # save model
    serialization.write_all("SimpleLogistic2.model", cls)
Esempio n. 9
0
    def serialize(self, ser_file, header=None):
        """
        Serializes the clusterer to the specified file.

        :param ser_file: the file to save the model to
        :type ser_file: str
        :param header: the (optional) dataset header to store alongside; recommended
        :type header: Instances
        """

        if (header is not None) and header.num_instances > 0:
            header = Instances.template_instances(header)

        if header is not None:
            serialization.write_all(ser_file, [self, header])
        else:
            serialization.write(ser_file, self)
    def test_read_write_all(self):
        """
        Tests methods read_all and write_all.
        """
        fname = self.tempfile("readwrite.ser")
        self.delfile(fname)

        lin = []
        for i in range(4):
            lin.append(javabridge.make_instance("java/lang/Integer", "(I)V", i))
        serialization.write_all(fname, lin)
        self.assertTrue(os.path.exists(fname), msg="Failed to write to " + fname + "?")

        lout = serialization.read_all(fname)
        self.assertIsNotNone(lout, msg="Failed to read from " + fname + "?")
        self.delfile(fname)
        self.assertEqual(len(lin), len(lout), msg="Number of elements differ")
        for i in range(len(lin)):
            iin = javabridge.call(lin[i], "intValue", "()I")
            iout = javabridge.call(lout[i], "intValue", "()I")
            self.assertEqual(iin, iout, msg="Input/output differ at #" + str(i))
Esempio n. 11
0
def SMOreg():
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file("First_trial_regression.arff")
    data.class_is_last()

    cls = KernelClassifier(classname="weka.classifiers.functions.SMOreg",
                           options=["-N", "0"])
    kernel = Kernel(
        classname="weka.classifiers.functions.supportVector.RBFKernel",
        options=["-G", "0.2"])
    cls.kernel = kernel
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText")
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, 10, Random(486), pout)

    print(evl.summary())
    print(pout.buffer_content())

    # save model
    serialization.write_all("SMOreg.model2", cls)
def createTrainedModel():
    from weka.core.converters import Loader
    folderList = os.listdir(outputModel)
    i = 0
    classi = ""
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(os.path.join(outputModel, "genderTrain.arff"))
    data.class_is_last()
    from weka.classifiers import Classifier
    classi = "weka.classifiers.bayes.NaiveBayes"
    cls = Classifier(classname=classi)
    from weka.attribute_selection import ASSearch, ASEvaluation, AttributeSelection
    search = ASSearch(classname="weka.attributeSelection.Ranker",
                      options=["-1.7976931348623157E308", "-1"])
    #evaluator = ASEvaluation(classname="weka.attributeSelection.ChiSquaredAttributeEval")
    #attsel = AttributeSelection()
    #attsel.search(search)
    #attsel.evaluator(evaluator)
    #attsel.select_attributes(data)
    cls.build_classifier(data)
    import weka.core.serialization as serialization
    from weka.core.dataset import Instances
    serialization.write_all(
        os.path.join(outputModel, "GenderModel" + ".model"),
        [cls, Instances.template_instances(data)])
    from weka.classifiers import Evaluation
    from weka.core.classes import Random
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, 10, Random(1))
    print "Gender model predictions"
    print cls
    #print(evl.percent_correct)
    print(evl.summary())
    print(evl.class_details())

    data = loader.load_file(os.path.join(outputModel, "ageTrain.arff"))
    data.class_is_last()
    classi = "weka.classifiers.bayes.NaiveBayes"
    cls = Classifier(classname=classi)
    from weka.attribute_selection import ASSearch, ASEvaluation, AttributeSelection
    search = ASSearch(classname="weka.attributeSelection.Ranker",
                      options=["-1.7976931348623157E308", "-1"])
    #evaluator = ASEvaluation(classname="weka.attributeSelection.ChiSquaredAttributeEval")
    #attsel = AttributeSelection()
    #attsel.search(search)
    #attsel.evaluator(evaluator)
    #attsel.select_attributes(data)
    #classi = "weka.classifiers.trees.J48"
    #classi = "weka.classifiers.functions.Logistic"
    #classi = "weka.classifiers.trees.RandomForest"
    #classi = "weka.classifiers.bayes.NaiveBayes"
    #classi = "weka.classifiers.functions.SMOreg"
    cls.build_classifier(data)
    print "Age model predictions"
    print cls
    import weka.core.serialization as serialization
    from weka.core.dataset import Instances
    serialization.write_all(os.path.join(outputModel, "AgeModel" + ".model"),
                            [cls, Instances.template_instances(data)])
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, 10, Random(1))

    #print(evl.percent_correct)
    print(evl.summary())
    print(evl.class_details())
    os._exit(0)
Esempio n. 13
0
	def saveClassifier(self, filename, path='/home/sbiastoch/Schreibtisch/classifiers/'):
		serialization.write_all(path+filename, [self.classifier, Instances.template_instances(self.data)])