Exemple #1
0
    def test_model(self, test_data, empty_solution, evaluate = False):
        model_weka = None
        if os.path.isfile(self.prediction_file):
            print 'Model ' + self.name + ' already tested.'
        elif not os.path.isfile(self.model_file):
            print 'Impossible testing this model. It should be trained first.'
            return
        else: 
            print 'Starting to test_model model ' + self.name + '.'
            model_weka = Classifier(jobject = serialization.read(self.model_file)) 
            evaluation = Evaluation(data = test_data)
            evaluation.test_model(classifier = model_weka, data = test_data)
            
            predictions = evaluation.predictions()
            rows        = read_sheet(file_name = empty_solution)
            solutions   = []

            for row in rows:
                solution = [row['userid'], row['tweetid'], predictions.pop(0).predicted()]
                solutions.append(solution)
            write_the_solution_file(solutions, self.prediction_file)
            print 'Model ' + self.name + ' tested.'
        
        if evaluate == True:
            if os.path.isfile(self.evaluation_file):
                print 'Model ' + self.name + ' already evaluated.'
                return
            elif model_weka == None:
                model_weka = Classifier(jobject = serialization.read(self.model_file)) 
                evaluation = Evaluation(data = test_data)
                evaluation.test_model(classifier = model_weka, data = test_data)
            save_file(file_name = self.evaluation_file, content = evaluation.to_summary())
            print 'Model ' + self.name + ' evaluated.'
Exemple #2
0
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)

# plot
pld.scatter_plot(
    data, data.get_attribute_by_name("petalwidth").get_index(),
    data.get_attribute_by_name("petallength").get_index(),
    wait=False)

# add classifier errors to dataset
addcls = Filter(
    classname="weka.filters.supervised.attribute.AddClassification",
    options=["-W", "weka.classifiers.trees.J48", "-classification", "-error"])
addcls.set_inputformat(data)
filtered = addcls.filter(data)
print(filtered)

# build J48
cls = Classifier(classname="weka.classifiers.trees.J48")
cls.build_classifier(data)
evl = Evaluation(data)
evl.test_model(cls, data)

# plot classifier errors
plc.plot_classifier_errors(evl.predictions(), wait=True)

jvm.stop()

Exemple #3
0
jvm.start()

# load credit-g
fname = data_dir + os.sep + "credit-g.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)

# cross-validate NaiveBayes
classifier = "weka.classifiers.bayes.NaiveBayes"
print("\n--> " + classifier + "\n")
cls = Classifier(classname=classifier)
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
preds = classifiers.predictions_to_instances(data, evl.predictions())
preds.sort(preds.get_attribute_by_name("distribution-good").get_index())
print(evl.to_summary())
print(evl.to_matrix())
print(preds)

# cross-validate J48
classifier = "weka.classifiers.trees.J48"
print("\n--> " + classifier + "\n")
cls = Classifier(classname=classifier, options=["-M", "100"])
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
preds = classifiers.predictions_to_instances(data, evl.predictions())
preds.sort(preds.get_attribute_by_name("distribution-good").get_index())
print(evl.to_summary())
print(evl.to_matrix())