Beispiel #1
0
 def naive_bayes_classify(self):
     nb = NaiveBayesClassifier()
     nb.train(self.X_train, self.y_train)
     predictions = nb.predict(self.X_test)
     accuracy = str(self.main.accuracy(self.y_test, predictions))
     accuracy = accuracy[0:4]
     self.naive_bayes_acc_label.setText(accuracy)
Beispiel #2
0
def started():

    if __name__ == '__main__':
        print("Ok let's go!")

        # Where to find data
        datasource_info = [('newyorktimes', 'data/nyt_discussions.json'),
                           ('motherjones', 'data/motherjones_discussions.json'),
                           ('breitbart', 'data/breitbart_discussions.json')]

        # Load the dataset into memory
        json_text = load_json_files(datasource_info, verbose=True)
        dataset = build_dataset(json_text, featurize_text, verbose=True)

        # Split our data into train and test
        train_dataset, test_dataset = split_dataset(dataset, fraction_train=0.8)

        # Train our classifier
        nb_classifier = NaiveBayesClassifier()
        nb_classifier.train(train_dataset)

        # Evaluate our classifier, for each class
        performance_string = 'Class {klass} performance: f1={f1:.{digits}}, precision={precision:.{digits}}, recall={recall:.{digits}}'
        for klass in sorted(nb_classifier.class_counter):  # sort just for nicer output
            f1, precision, recall = evaluate_classifier(nb_classifier, klass,
                                                        test_dataset)

            print(performance_string.format(klass=klass, f1=f1, precision=precision, recall=recall, digits=3))
    else:
        print("Ok let's go!")

        # Where to find data
        datasource_info = [('newyorktimes', 'data/nyt_discussions.json'),
                           ('motherjones', 'data/motherjones_discussions.json'),
                           ('breitbart', 'data/breitbart_discussions.json')]

        # Load the dataset into memory
        json_text = load_json_files(datasource_info, verbose=True)
        dataset = build_dataset(json_text, featurize_text, verbose=True)

        # Split our data into train and test
        train_dataset, test_dataset = split_dataset(dataset, fraction_train=0.8)

        # Train our classifier
        nb_classifier = NaiveBayesClassifier()
        nb_classifier.train(train_dataset)

        # Evaluate our classifier, for each class
        performance_string = 'Class {klass} performance: f1={f1:.{digits}}, precision={precision:.{digits}}, recall={recall:.{digits}}'
        for klass in sorted(nb_classifier.class_counter):  # sort just for nicer output
            f1, precision, recall = evaluate_classifier(nb_classifier, klass,
                                                        test_dataset)

            print(performance_string.format(klass=klass, f1=f1, precision=precision, recall=recall, digits=3))
Beispiel #3
0
    def classify_custom_input(self, custom_input_vector):
        nb = NaiveBayesClassifier()
        nb.train(self.X_train, self.y_train)
        prediction = nb.predict([custom_input_vector])
        self.custom_text_nb_label.setText(str(prediction[0]))

        knn = KNNClassifier()
        prediction = knn.predict_classification(self.X_train, self.y_train,
                                                [custom_input_vector])
        self.custom_text_knn_label.setText(str(prediction[0]))

        rf = SklearnRandomForest()
        prediction = rf.random_forest(self.X_train, self.y_train,
                                      [custom_input_vector])
        self.custom_text_dt_label.setText(str(prediction[0]))

        dt = SklearnDecisionTree()
        prediction = dt.decision_tree(self.X_train, self.y_train,
                                      [custom_input_vector])
        self.custom_text_rf_label.setText(str(prediction[0]))
Beispiel #4
0
    # Where to find data
    datasource_info = [('newyorktimes', 'data/nyt_discussions.json'),
                       ('motherjones', 'data/motherjones_discussions.json'),
                       ('breitbart', 'data/breitbart_discussions.json')]

    # Load the dataset into memory
    json_text = load_json_files(datasource_info, verbose=True)
    dataset = build_dataset(json_text, featurize_text, verbose=True)

    # Split our data into train and test
    train_dataset, test_dataset = split_dataset(dataset, fraction_train=0.8)

    # Train our classifier
    nb_classifier = NaiveBayesClassifier()
    nb_classifier.train(train_dataset)

    #pdb.set_trace()

    # Evaluate our classifier, for each class
    performance_string = 'Class {klass} performance: f1={f1:.{digits}}, precision={precision:.{digits}}, recall={recall:.{digits}}'
    for klass in sorted(
            nb_classifier.class_counter):  # sort just for nicer output
        f1, precision, recall = evaluate_classifier(nb_classifier, klass,
                                                    test_dataset)

        print(
            performance_string.format(klass=klass,
                                      f1=f1,
                                      precision=precision,
                                      recall=recall,