classifier_tfidf.fit(training_set_tfidf, labels_array) ########### # testing # ########### # issue predictions predictions_degree = classifier_degree.predict(testing_set_degree) predictions_w_degree = classifier_w_degree.predict(testing_set_w_degree) predictions_closeness = classifier_closeness.predict(testing_set_closeness) predictions_w_closeness = classifier_w_closeness.predict(testing_set_w_closeness) predictions_twicw = classifier_twicw.predict(testing_set_twicw) predictions_tfidf = classifier_tfidf.predict(testing_set_tfidf) print('========== accuracy for', clf ,'classifier ==========') print("accuracy TW-IDF degree:", round(metrics.accuracy_score(truth_array,predictions_degree)*100,3)) print("accuracy TW-IDF weighted degree:", round(metrics.accuracy_score(truth_array,predictions_w_degree)*100,3)) print("accuracy TW-IDF closeness:", round(metrics.accuracy_score(truth_array,predictions_closeness)*100,3)) print("accuracy TW-IDF weighted closeness:", round(metrics.accuracy_score(truth_array,predictions_w_closeness)*100,3)) print("accuracy TW-ICW degree:", round(metrics.accuracy_score(truth_array,predictions_twicw)*100,3)) print("accuracy TF-IDF:", round(metrics.accuracy_score(truth_array,predictions_tfidf)*100,3)) print( '========== top 10 for logreg classifier_w_closeness classifier ==========' ) print_top10( all_unique_terms , classifier_w_closeness , unique_labels ) print( '========== bot 10 for logreg classifier_w_closeness classifier ==========' ) print_bot10( all_unique_terms , classifier_w_closeness , unique_labels ) # show the most and less important features for each class ### fill the gaps ### hint: pick a classifier (e.g., 'classifier_tfidf'), and pass it to the 'print_top10' and 'print_bot10' functions along with 'unique_labels' and 'all_unique_terms'
3)) print( "accuracy TW-IDF closeness:", round( metrics.accuracy_score(truth_array, predictions_closeness) * 100, 3)) print( "accuracy TW-IDF weighted closeness:", round( metrics.accuracy_score(truth_array, predictions_w_closeness) * 100, 3)) print( "accuracy TW-ICW degree:", round(metrics.accuracy_score(truth_array, predictions_twicw) * 100, 3)) print( "accuracy TF-IDF:", round(metrics.accuracy_score(truth_array, predictions_tfidf) * 100, 3)) # show the most and less important features for each class classifiers = [ classifier_degree, classifier_w_degree, classifier_closeness, classifier_w_closeness, classifier_twicw, classifier_tfidf ] i = 1 for classifier in classifiers: print("classifier ", i) i += 1 print("TOP 10:") print_top10(all_unique_terms, classifier, unique_labels) print("BOT 10:") print_bot10(all_unique_terms, classifier, unique_labels)
# testing # ########### # issue predictions predictions_degree = classifier_degree.predict(testing_set_degree) predictions_w_degree = classifier_w_degree.predict(testing_set_w_degree) predictions_closeness = classifier_closeness.predict(testing_set_closeness) predictions_w_closeness = classifier_w_closeness.predict(testing_set_w_closeness) predictions_twicw = classifier_twicw.predict(testing_set_twicw) predictions_tfidf = classifier_tfidf.predict(testing_set_tfidf) print('========== accuracy for', clf ,'classifier ==========') print("accuracy TW-IDF degree:", round(metrics.accuracy_score(truth_array,predictions_degree)*100,3)) print("accuracy TW-IDF weighted degree:", round(metrics.accuracy_score(truth_array,predictions_w_degree)*100,3)) print("accuracy TW-IDF closeness:", round(metrics.accuracy_score(truth_array,predictions_closeness)*100,3)) print("accuracy TW-IDF weighted closeness:", round(metrics.accuracy_score(truth_array,predictions_w_closeness)*100,3)) print("accuracy TW-ICW degree:", round(metrics.accuracy_score(truth_array,predictions_twicw)*100,3)) print("accuracy TF-IDF:", round(metrics.accuracy_score(truth_array,predictions_tfidf)*100,3)) # In[6]: # show the most and less important features for each class my_clf = classifier_tfidf print("\nMost important features for each class:") print_top10(all_unique_terms, my_clf, unique_labels) print("\nLess important features for each class:") print_bot10(all_unique_terms, my_clf, unique_labels)