Esempio n. 1
0
 def naive_bayes_classify(self):
     nb = NaiveBayesClassifier()
     nb.train(self.X_train, self.y_train)
     predictions = nb.predict(self.X_test)
     accuracy = str(self.main.accuracy(self.y_test, predictions))
     accuracy = accuracy[0:4]
     self.naive_bayes_acc_label.setText(accuracy)
def classify(data, classifier, num_classes, train_labels, train_features,
             test_labels, test_features):
    """
    Function used by FoldRunner to execute classification based on the current classifier
    :param data: the configuration dictionary
    :param classifier: current classifier (from the classifiers list in the configuration file)
    :param num_classes: the number of distinct labels (binary or multiclass classification)
    :param train_labels: the labels of all train instances
    :param train_features: the features of all train instances
    :param test_labels: the labels of all test instances
    :param test_features: the features of all test instances
    :return: the confusion matrix of the classification
    """
    if classifier == "NN_keras":
        return nnk.classify(data, num_classes, train_labels, train_features,
                            test_labels, test_features)
    elif classifier == "NN_scikit-learn":
        return nns.classify(train_labels, train_features, test_labels,
                            test_features)
    elif classifier == "KNN":
        return knn.classify(data, train_labels, train_features, test_labels,
                            test_features)
    elif classifier == "NaiveBayes":
        return nb.classify(train_labels, train_features, test_labels,
                           test_features)
    elif classifier == "RandomForest":
        return rf.classify(train_labels, train_features, test_labels,
                           test_features)
    elif classifier == "LogisticRegression":
        return lr.classify(train_labels, train_features, test_labels,
                           test_features)
Esempio n. 3
0
 def classifyPoems(filename):
     try:
         with open('CSVs/'+filename, 'r') as fp:
             print('opened ' + filename )
             global cl
             cl = NaiveBayesClassifier(fp, format="csv")
             print(cl)
     except IOError:
         print('\nFile not found for Naive-Bayes Classifier')
Esempio n. 4
0
def started():

    if __name__ == '__main__':
        print("Ok let's go!")

        # Where to find data
        datasource_info = [('newyorktimes', 'data/nyt_discussions.json'),
                           ('motherjones', 'data/motherjones_discussions.json'),
                           ('breitbart', 'data/breitbart_discussions.json')]

        # Load the dataset into memory
        json_text = load_json_files(datasource_info, verbose=True)
        dataset = build_dataset(json_text, featurize_text, verbose=True)

        # Split our data into train and test
        train_dataset, test_dataset = split_dataset(dataset, fraction_train=0.8)

        # Train our classifier
        nb_classifier = NaiveBayesClassifier()
        nb_classifier.train(train_dataset)

        # Evaluate our classifier, for each class
        performance_string = 'Class {klass} performance: f1={f1:.{digits}}, precision={precision:.{digits}}, recall={recall:.{digits}}'
        for klass in sorted(nb_classifier.class_counter):  # sort just for nicer output
            f1, precision, recall = evaluate_classifier(nb_classifier, klass,
                                                        test_dataset)

            print(performance_string.format(klass=klass, f1=f1, precision=precision, recall=recall, digits=3))
    else:
        print("Ok let's go!")

        # Where to find data
        datasource_info = [('newyorktimes', 'data/nyt_discussions.json'),
                           ('motherjones', 'data/motherjones_discussions.json'),
                           ('breitbart', 'data/breitbart_discussions.json')]

        # Load the dataset into memory
        json_text = load_json_files(datasource_info, verbose=True)
        dataset = build_dataset(json_text, featurize_text, verbose=True)

        # Split our data into train and test
        train_dataset, test_dataset = split_dataset(dataset, fraction_train=0.8)

        # Train our classifier
        nb_classifier = NaiveBayesClassifier()
        nb_classifier.train(train_dataset)

        # Evaluate our classifier, for each class
        performance_string = 'Class {klass} performance: f1={f1:.{digits}}, precision={precision:.{digits}}, recall={recall:.{digits}}'
        for klass in sorted(nb_classifier.class_counter):  # sort just for nicer output
            f1, precision, recall = evaluate_classifier(nb_classifier, klass,
                                                        test_dataset)

            print(performance_string.format(klass=klass, f1=f1, precision=precision, recall=recall, digits=3))
Esempio n. 5
0
    def classify_custom_input(self, custom_input_vector):
        nb = NaiveBayesClassifier()
        nb.train(self.X_train, self.y_train)
        prediction = nb.predict([custom_input_vector])
        self.custom_text_nb_label.setText(str(prediction[0]))

        knn = KNNClassifier()
        prediction = knn.predict_classification(self.X_train, self.y_train,
                                                [custom_input_vector])
        self.custom_text_knn_label.setText(str(prediction[0]))

        rf = SklearnRandomForest()
        prediction = rf.random_forest(self.X_train, self.y_train,
                                      [custom_input_vector])
        self.custom_text_dt_label.setText(str(prediction[0]))

        dt = SklearnDecisionTree()
        prediction = dt.decision_tree(self.X_train, self.y_train,
                                      [custom_input_vector])
        self.custom_text_rf_label.setText(str(prediction[0]))
Esempio n. 6
0
    print("Ok let's go!")

    # Where to find data
    datasource_info = [('newyorktimes', 'data/nyt_discussions.json'),
                       ('motherjones', 'data/motherjones_discussions.json'),
                       ('breitbart', 'data/breitbart_discussions.json')]

    # Load the dataset into memory
    json_text = load_json_files(datasource_info, verbose=True)
    dataset = build_dataset(json_text, featurize_text, verbose=True)

    # Split our data into train and test
    train_dataset, test_dataset = split_dataset(dataset, fraction_train=0.8)

    # Train our classifier
    nb_classifier = NaiveBayesClassifier()
    nb_classifier.train(train_dataset)

    #pdb.set_trace()

    # Evaluate our classifier, for each class
    performance_string = 'Class {klass} performance: f1={f1:.{digits}}, precision={precision:.{digits}}, recall={recall:.{digits}}'
    for klass in sorted(
            nb_classifier.class_counter):  # sort just for nicer output
        f1, precision, recall = evaluate_classifier(nb_classifier, klass,
                                                    test_dataset)

        print(
            performance_string.format(klass=klass,
                                      f1=f1,
                                      precision=precision,