def exp11():
    """
    Show correlation for each feature
    """
    poems = getPoemModel().poems
    scores = {}
    scores["affect"] = getAffectRatios()
    scores["cLength"] = getLogAverageCommentLength()
    scores["rating"] = getPoemScores()
    scores["typeToken"] = getCommentTypeTokenRatio(100)
    scores["numC"] = getNumberOfComments(True)  # use log

    result = {}
    for k1, v1 in scores.items():
        for feature in poems.values()[0].keys():
            cor, p = getCorrelation(poems, v1, feature)
            if result.get(feature, None) is None:
                result[feature] = {k1: (cor, p)}
            else:
                result[feature][k1] = (cor, p)

    for k1 in sorted(result.keys(), key=lambda x: result[x]["affect"][1]):  # sort by affect
        print "\\\\", k1, "& %0.2f & %0.4f" % result[k1]["affect"], "& %0.2f & %0.4f" % result[k1][
            "typeToken"
        ], "& %0.2f & %0.4f" % result[k1]["cLength"], "& %0.2f & %0.4f" % result[k1][
            "rating"
        ], "& %0.2f & %0.4f" % result[
            k1
        ][
            "numC"
        ]
def exp00():
    """
    Identify correlation of features with aspect ratios. We can predict
    this with about 30% reduction in error over the baseline.
    """
    m = getPoemModel()
    poems = m.poems
    scores = getAffectRatios()  # plot average comment length
    makePlots(poems, scores, "affect ratio", "../experiments/exp00.pdf")
    runPredictCV(poems, scores, DEFAULT_FEATURE_LIST)
def exp13():
    m = getPoemModel()
    poems = m.poems
    scores = getAffectRatios()  # plot average comment length
    for name, poem in poems.items():
        if scores.get(name, None) is None:
            continue
        if poem["proportionOfStops"] > 0.25 and scores[name] < 15:
            print "bottom right", name
        if poem["proportionOfStops"] < 0.12 and scores[name] > 25:
            print "left, name", name, scores[name]
def exp12():
    affect = getAffectRatios()
    cLength = getLogAverageCommentLength()
    typeToken = getCommentTypeTokenRatio(100)

    plt.figure(num=None, figsize=(18, 4.5), dpi=80, facecolor="w", edgecolor="k")

    plt.subplot(1, 3, 1)
    checkCorrelation(affect, cLength, "affect ratio", "log average comment length")
    plt.subplot(1, 3, 2)
    checkCorrelation(affect, typeToken, "affect ratio", "type-token ratio")
    plt.subplot(1, 3, 3)
    checkCorrelation(cLength, typeToken, "log average comment length", "type-token ratio")

    plt.savefig("../experiments/exp12.pdf", format="pdf")
            print "  no score for", filename
            continue 

        featureSet = poems.get(filename, None)
        if featureSet is None:
            # only include poem if features are extracted for it
            print "  no features for", filename
            continue

        scoreArr.append(score)
        featureArr.append(filterFeatures(featureSet, useFeatureList))

        # # print featureArr for specific poem
        # print "poem", f
        # for key, value in poems[f].items():
        #     print "  ", key, ":", value

    vec = DictVectorizer()
    featureArr = vec.fit_transform(featureArr).toarray().tolist()
    featureNames = vec.get_feature_names()

    print "Performing regression using %d data points..." % len(scoreArr)
    tenFold(featureArr, scoreArr, featureNames)


if __name__ == "__main__":
    poems = getPoemModel().poems
    # scores = getPoemScores()
    # scores = getAverageCommentLength()
    scores = getAffectRatios()
    runPredictCV(poems, scores)
    print "Plotting %d feature plots..." % len(useFeatures)
    for index, feature in enumerate(next(iter(xDict.values())).keys()):
        if feature not in useFeatures:
            continue
        plt.figure(num=None, figsize=(16, 12), dpi=80, facecolor='w', edgecolor='k')
        plotFeatureVsScore(xDict, yDict, feature)
        plt.savefig("zoom_%s.jpg" % feature, format="jpg")

def makeHistogram(affectHist, filename):
    plt.figure(num=None, figsize=(18, 4.5), dpi=80)
    cats = sorted(next(iter(affectHist.values())).keys())
    for hist in affectHist.values():
        plt.plot(range(len(cats)), 
            [hist.get(cat,0) for cat in cats],
            color="blue", alpha=0.2)

    plt.ylabel("prevalence")
    plt.xlabel("emotional category")
    plt.xticks(range(len(cats)), cats)
    plt.savefig(filename, format="pdf")


if __name__ == "__main__":
    from extract_poem_features import getPoemModel
    from extract_comment_features import getAffectRatios
    m = getPoemModel()
    poems = m.poems
    scores = getAffectRatios()  # plot average comment length
    makePlots2(poems, scores, "affect ratio", "../experiments/affect-ratio.pdf")