def cross_validation(k, X, y, random_forest=False, use_confidence=False, num_of_trees=1): accuracies = [] y_pred = [] y_true = [] predictors = get_predictors() emotion_values = get_emotion_values() X_splits = np.array_split(X, k) y_splits = np.array_split(y, k) for i in range(k): X_train, X_test, y_train, y_test = get_train_test_split( X_splits, y_splits, i) emotion_predictor = EmotionPredictor(predictors, random_forest, use_confidence, num_of_trees) emotion_predictor.fit(emotion_values, X_train, y_train) predictions = emotion_predictor.predict(X_test) y_pred = y_pred + predictions # print(y_test) # print(y_true) for elem in y_test: y_true.append(elem) # y_true = y_true + y_test correct = sum([ 1 for i, prediction in enumerate(predictions) if prediction == y_test[i] ]) accuracy = float(correct * 100) / len(y_test) accuracies.append(accuracy) print("Accuracy for round {0} is {1:.2f}".format(i + 1, accuracy)) print( "Cross Validation accuracy has a mean of {0:.2f} and a std of {1:.2f}". format(np.mean(accuracies), np.std(accuracies))) print(" prec, rec, f1") for emotion_number in emotion_values: print("Emotion {0}: {1:.2f}, {2:.2f}, {3:.2f}".format( emotion_number, get_precision(y_true, y_pred, emotion_number), get_recall(y_true, y_pred, emotion_number), get_f1_score(y_true, y_pred, emotion_number))) plt.figure() cfm = confusion_matrix(y_true, y_pred) / k plot_confusion_matrix(cfm, classes=["1", "2", "3", "4", "5", "6"]) plt.show()
import pickle from emotion_predictor import EmotionPredictor from util import get_clean_data, get_predictors, get_emotion_values X, y = get_clean_data() predictors = get_predictors() emotion_values = get_emotion_values() emotion_predictor = EmotionPredictor(predictors, random_forest=True, use_confidence=True, num_of_trees=200) emotion_predictor.fit(emotion_values, X, y) with open('emotion_predictor.pickle', 'wb') as f: pickle.dump(emotion_predictor, f, pickle.HIGHEST_PROTOCOL)