print('Confusion Matrix') print('================') cm = confusion_matrix(test_y, pred_y) print(cm) # plt.figure() # plot_confusion_matrix(cm) # ## plt.show() print('\nCalculating predictions on Crowdflower') print('--------------------------------------') cf_x, cf_y = prep.get_crowdflower(1000) pred_cf = pipe.predict(cf_x) print() print(classification_report(cf_y, pred_cf)) print() print("Accuracy score") print("==============") print(accuracy_score(cf_y, pred_cf)) print() print('Confusion Matrix') print('================') cm = confusion_matrix(cf_y, pred_cf)
tick_marks = np.arange(len(CM_LABELS)) plt.xticks(tick_marks, CM_LABELS, rotation=45) plt.yticks(tick_marks, CM_LABELS) plt.tight_layout() plt.ylabel("True label") plt.xlabel("Predicted label") NUM_PER_CATEGORY = 1000 print("loading data....") print(str(NUM_PER_CATEGORY) + " examples per category") start_time = time.time() train_x, train_y = prep.get_data(NUM_PER_CATEGORY) train_x = train_x.astype("float32") test_x, test_y = prep.get_crowdflower(NUM_PER_CATEGORY) test_x = test_x.astype("float32") print("Total data load time:") print("---------------------") print(time.time() - start_time) os.system('say "data is loaded"') # Consider trying different values for output_layers print("\nstarting nn trained on twitter with logit @ -2....") tf = OverfeatTransformer(output_layers=[-2]) clf = LogisticRegression() # clf = SVC() # clf = RandomForestClassifier() pipe = make_pipeline(tf, clf)