def cross_validation(k,
                     X,
                     y,
                     random_forest=False,
                     use_confidence=False,
                     num_of_trees=1):
    accuracies = []
    y_pred = []
    y_true = []
    predictors = get_predictors()
    emotion_values = get_emotion_values()

    X_splits = np.array_split(X, k)
    y_splits = np.array_split(y, k)

    for i in range(k):
        X_train, X_test, y_train, y_test = get_train_test_split(
            X_splits, y_splits, i)

        emotion_predictor = EmotionPredictor(predictors, random_forest,
                                             use_confidence, num_of_trees)
        emotion_predictor.fit(emotion_values, X_train, y_train)

        predictions = emotion_predictor.predict(X_test)
        y_pred = y_pred + predictions
        # print(y_test)
        # print(y_true)
        for elem in y_test:
            y_true.append(elem)
        # y_true = y_true + y_test
        correct = sum([
            1 for i, prediction in enumerate(predictions)
            if prediction == y_test[i]
        ])

        accuracy = float(correct * 100) / len(y_test)
        accuracies.append(accuracy)
        print("Accuracy for round {0} is {1:.2f}".format(i + 1, accuracy))

    print(
        "Cross Validation accuracy has a mean of {0:.2f} and a std of {1:.2f}".
        format(np.mean(accuracies), np.std(accuracies)))

    print("          prec, rec, f1")
    for emotion_number in emotion_values:
        print("Emotion {0}: {1:.2f}, {2:.2f}, {3:.2f}".format(
            emotion_number, get_precision(y_true, y_pred, emotion_number),
            get_recall(y_true, y_pred, emotion_number),
            get_f1_score(y_true, y_pred, emotion_number)))

    plt.figure()
    cfm = confusion_matrix(y_true, y_pred) / k
    plot_confusion_matrix(cfm, classes=["1", "2", "3", "4", "5", "6"])
    plt.show()
import pickle
from emotion_predictor import EmotionPredictor

from util import get_clean_data, get_predictors, get_emotion_values

X, y = get_clean_data()
predictors = get_predictors()
emotion_values = get_emotion_values()

emotion_predictor = EmotionPredictor(predictors, random_forest=True, use_confidence=True, num_of_trees=200)
emotion_predictor.fit(emotion_values, X, y)

with open('emotion_predictor.pickle', 'wb') as f:
    pickle.dump(emotion_predictor, f, pickle.HIGHEST_PROTOCOL)