def exp11():
    """
    Show correlation for each feature
    """
    poems = getPoemModel().poems
    scores = {}
    scores["affect"] = getAffectRatios()
    scores["cLength"] = getLogAverageCommentLength()
    scores["rating"] = getPoemScores()
    scores["typeToken"] = getCommentTypeTokenRatio(100)
    scores["numC"] = getNumberOfComments(True)  # use log

    result = {}
    for k1, v1 in scores.items():
        for feature in poems.values()[0].keys():
            cor, p = getCorrelation(poems, v1, feature)
            if result.get(feature, None) is None:
                result[feature] = {k1: (cor, p)}
            else:
                result[feature][k1] = (cor, p)

    for k1 in sorted(result.keys(), key=lambda x: result[x]["affect"][1]):  # sort by affect
        print "\\\\", k1, "& %0.2f & %0.4f" % result[k1]["affect"], "& %0.2f & %0.4f" % result[k1][
            "typeToken"
        ], "& %0.2f & %0.4f" % result[k1]["cLength"], "& %0.2f & %0.4f" % result[k1][
            "rating"
        ], "& %0.2f & %0.4f" % result[
            k1
        ][
            "numC"
        ]
def exp03():
    """
    We can predict log of average comment length with 10% reduction in error
    over baseline (better than average comment length), but this is still worse
    than predicting the affect ratio. Why? Is there another descriptive feature
    of the comments that we can better describe?

    Are the comments with different affect ratios saying the same things
    differently or saying different things?
    """
    m = getPoemModel()
    poems = m.poems
    scores = getLogAverageCommentLength()
    makePlots(poems, scores, "log of average comment length", "../experiments/exp03.pdf")
    runPredictCV(poems, scores, DEFAULT_FEATURE_LIST)
def exp12():
    affect = getAffectRatios()
    cLength = getLogAverageCommentLength()
    typeToken = getCommentTypeTokenRatio(100)

    plt.figure(num=None, figsize=(18, 4.5), dpi=80, facecolor="w", edgecolor="k")

    plt.subplot(1, 3, 1)
    checkCorrelation(affect, cLength, "affect ratio", "log average comment length")
    plt.subplot(1, 3, 2)
    checkCorrelation(affect, typeToken, "affect ratio", "type-token ratio")
    plt.subplot(1, 3, 3)
    checkCorrelation(cLength, typeToken, "log average comment length", "type-token ratio")

    plt.savefig("../experiments/exp12.pdf", format="pdf")