def naive_bayes_classify(self): nb = NaiveBayesClassifier() nb.train(self.X_train, self.y_train) predictions = nb.predict(self.X_test) accuracy = str(self.main.accuracy(self.y_test, predictions)) accuracy = accuracy[0:4] self.naive_bayes_acc_label.setText(accuracy)
def started(): if __name__ == '__main__': print("Ok let's go!") # Where to find data datasource_info = [('newyorktimes', 'data/nyt_discussions.json'), ('motherjones', 'data/motherjones_discussions.json'), ('breitbart', 'data/breitbart_discussions.json')] # Load the dataset into memory json_text = load_json_files(datasource_info, verbose=True) dataset = build_dataset(json_text, featurize_text, verbose=True) # Split our data into train and test train_dataset, test_dataset = split_dataset(dataset, fraction_train=0.8) # Train our classifier nb_classifier = NaiveBayesClassifier() nb_classifier.train(train_dataset) # Evaluate our classifier, for each class performance_string = 'Class {klass} performance: f1={f1:.{digits}}, precision={precision:.{digits}}, recall={recall:.{digits}}' for klass in sorted(nb_classifier.class_counter): # sort just for nicer output f1, precision, recall = evaluate_classifier(nb_classifier, klass, test_dataset) print(performance_string.format(klass=klass, f1=f1, precision=precision, recall=recall, digits=3)) else: print("Ok let's go!") # Where to find data datasource_info = [('newyorktimes', 'data/nyt_discussions.json'), ('motherjones', 'data/motherjones_discussions.json'), ('breitbart', 'data/breitbart_discussions.json')] # Load the dataset into memory json_text = load_json_files(datasource_info, verbose=True) dataset = build_dataset(json_text, featurize_text, verbose=True) # Split our data into train and test train_dataset, test_dataset = split_dataset(dataset, fraction_train=0.8) # Train our classifier nb_classifier = NaiveBayesClassifier() nb_classifier.train(train_dataset) # Evaluate our classifier, for each class performance_string = 'Class {klass} performance: f1={f1:.{digits}}, precision={precision:.{digits}}, recall={recall:.{digits}}' for klass in sorted(nb_classifier.class_counter): # sort just for nicer output f1, precision, recall = evaluate_classifier(nb_classifier, klass, test_dataset) print(performance_string.format(klass=klass, f1=f1, precision=precision, recall=recall, digits=3))
def classify_custom_input(self, custom_input_vector): nb = NaiveBayesClassifier() nb.train(self.X_train, self.y_train) prediction = nb.predict([custom_input_vector]) self.custom_text_nb_label.setText(str(prediction[0])) knn = KNNClassifier() prediction = knn.predict_classification(self.X_train, self.y_train, [custom_input_vector]) self.custom_text_knn_label.setText(str(prediction[0])) rf = SklearnRandomForest() prediction = rf.random_forest(self.X_train, self.y_train, [custom_input_vector]) self.custom_text_dt_label.setText(str(prediction[0])) dt = SklearnDecisionTree() prediction = dt.decision_tree(self.X_train, self.y_train, [custom_input_vector]) self.custom_text_rf_label.setText(str(prediction[0]))
# Where to find data datasource_info = [('newyorktimes', 'data/nyt_discussions.json'), ('motherjones', 'data/motherjones_discussions.json'), ('breitbart', 'data/breitbart_discussions.json')] # Load the dataset into memory json_text = load_json_files(datasource_info, verbose=True) dataset = build_dataset(json_text, featurize_text, verbose=True) # Split our data into train and test train_dataset, test_dataset = split_dataset(dataset, fraction_train=0.8) # Train our classifier nb_classifier = NaiveBayesClassifier() nb_classifier.train(train_dataset) #pdb.set_trace() # Evaluate our classifier, for each class performance_string = 'Class {klass} performance: f1={f1:.{digits}}, precision={precision:.{digits}}, recall={recall:.{digits}}' for klass in sorted( nb_classifier.class_counter): # sort just for nicer output f1, precision, recall = evaluate_classifier(nb_classifier, klass, test_dataset) print( performance_string.format(klass=klass, f1=f1, precision=precision, recall=recall,