def exp06(): """ Can we predict the log of the number of responses? Not much better. """ m = getPoemModel() poems = m.poems scores = getNumberOfComments(True) # use log makePlots(poems, scores, "log of number of comments", "../experiments/exp06.pdf") runPredictCV(poems, scores, DEFAULT_FEATURE_LIST)
def exp05(): """ Can we predict the number of responses? Not so well. """ m = getPoemModel() poems = m.poems scores = getNumberOfComments() makePlots(poems, scores, "number of comments", "../experiments/exp05.pdf") runPredictCV(poems, scores, DEFAULT_FEATURE_LIST)
def exp04(): """ Can we predict poem rating? No, it seems like we cannot predict it so well. """ m = getPoemModel() poems = m.poems scores = getPoemScores() makePlots(poems, scores, "poem score", "../experiments/exp04.pdf") runPredictCV(poems, scores, DEFAULT_FEATURE_LIST)
def exp02(): """ If comment length is the driver of affect ratio, we should find better correlations with comment length. However, we cannot predict this very well. """ m = getPoemModel() poems = m.poems scores = getAverageCommentLength() # plot average comment length makePlots(poems, scores, "average comment length", "../experiments/exp02.pdf") runPredictCV(poems, scores, DEFAULT_FEATURE_LIST)
def exp00(): """ Identify correlation of features with aspect ratios. We can predict this with about 30% reduction in error over the baseline. """ m = getPoemModel() poems = m.poems scores = getAffectRatios() # plot average comment length makePlots(poems, scores, "affect ratio", "../experiments/exp00.pdf") runPredictCV(poems, scores, DEFAULT_FEATURE_LIST)
def exp08(): """ If high affect ratio comments are less rich in their analysis/observation, does this imply that they have a lower type-token ratio? Actually, this cannot be well predicted. (~0% reduction in accuracy) """ m = getPoemModel() poems = m.poems scores = getCommentTypeTokenRatio() makePlots(poems, scores, "comment type token ratio", "../experiments/exp08.pdf") runPredictCV(poems, scores, DEFAULT_FEATURE_LIST)
def exp09(): """ Experiment 9: What is the difference in predicting the affect ratio with predicing the NRC ratio? Wouldn't this also capture emotion words? We cannot predict this as well. (Only ~3% over baseline.) """ m = getPoemModel() poems = m.poems scores = getNRCRatios() makePlots(poems, scores, "NRC ratio", "../experiments/exp09.pdf") runPredictCV(poems, scores, DEFAULT_FEATURE_LIST)
def exp03(): """ We can predict log of average comment length with 10% reduction in error over baseline (better than average comment length), but this is still worse than predicting the affect ratio. Why? Is there another descriptive feature of the comments that we can better describe? Are the comments with different affect ratios saying the same things differently or saying different things? """ m = getPoemModel() poems = m.poems scores = getLogAverageCommentLength() makePlots(poems, scores, "log of average comment length", "../experiments/exp03.pdf") runPredictCV(poems, scores, DEFAULT_FEATURE_LIST)
def exp081(): """ Repeat exp 8, but sample from all of the words. This should control for the fact that longer documents tend to have lower type-token ratios. We can predict this with ~15% reduction in error. This suggests that "richness" of response can be categorized by the type- token ratio, though this is not so easily predicted as affect ratio. With experiment 3, we know that we can predict the log of comment length with ~10% accuracy. Taking note of the sign of the correlation of each variable, this gives us a definition of 'richness' that includes: - longer comments - higher type-token ratio - lower affect ratio """ m = getPoemModel() poems = m.poems scores = getCommentTypeTokenRatio(100) # sample words makePlots(poems, scores, "sampled type-token ratio", "../experiments/exp08.1.pdf") runPredictCV(poems, scores, DEFAULT_FEATURE_LIST)