if len(set(X)) >= 1 and len(set(Y)) >= 1: spearmans.append(correlation.spearman_rho_tr(X, Y)) write('\t%s, %s: bootstrapped spearman: %.3f (dev=%.3f)' % (condition, condition, utils.mean(spearmans), utils.dev(spearmans))) for ktype in pair_means.keys(): write('question type: %s' % ktype) for (condition1, consensus1) in pair_means[ktype].items(): for (condition2, consensus2) in pair_means[ktype].items(): if not consensus1 or not consensus2 or condition1 == condition2 or condition1 == 'all' or condition2 == 'all': continue write('\tcondition %s, %s' % (condition1, condition2)) common_ids = list(set(consensus1.keys()).intersection(consensus2.keys())) X = [consensus1[pid] for pid in common_ids] Y = [consensus2[pid] for pid in common_ids] diffs = [(abs(x - y), x, y, pid) for (pid, x, y) in zip(common_ids, X, Y)] diffs.sort() diffs.reverse() write('\t\tpearson: %.3f' % (correlation.pearson_rho(X, Y))) write('\t\tspearman: %.3f' % (correlation.spearman_rho_tr(X, Y))) for (d, x, y, pid) in diffs[:10]: pair = s.id_to_phrases(pid) ratings1 = pair_ratings[ktype][condition1][pid] ratings2 = pair_ratings[ktype][condition2][pid] (t, p) = stats.ttest_ind(ratings1, ratings2) write('\t\t\t%s, %s: %.2f to %.2f (ttest t=%.5f, p=%.5f)' % (pair[0], pair[1], x, y, t, p))
#!/usr/bin/python -O import sys import correlation import utils survey = utils.Survey() wordsim = {} for line in open('dat/general.txt'): tokens = line.split('\t') pair = tuple(sorted([t.strip() for t in tokens[:2]])) wordsim[pair] = float(tokens[2]) for condition in ('all', 'mturk', 'scholar'): ratings = survey.get_ratings_by_condition('general', condition) X = [] Y = [] for (pair_id, pair_ratings) in ratings.items(): pair = tuple(sorted(survey.id_to_phrases(pair_id))) if pair in wordsim and len(pair_ratings) > 10: X.append(utils.mean([r.response for r in pair_ratings if r.has_response()])) Y.append(wordsim[pair]) else: print 'unknown pair:', pair print condition, correlation.pearson_rho(X, Y), correlation.spearman_rho_tr(X, Y)
utils.dev(spearmans))) for ktype in pair_means.keys(): write('question type: %s' % ktype) for (condition1, consensus1) in pair_means[ktype].items(): for (condition2, consensus2) in pair_means[ktype].items(): if not consensus1 or not consensus2 or condition1 == condition2 or condition1 == 'all' or condition2 == 'all': continue write('\tcondition %s, %s' % (condition1, condition2)) common_ids = list( set(consensus1.keys()).intersection(consensus2.keys())) X = [consensus1[pid] for pid in common_ids] Y = [consensus2[pid] for pid in common_ids] diffs = [(abs(x - y), x, y, pid) for (pid, x, y) in zip(common_ids, X, Y)] diffs.sort() diffs.reverse() write('\t\tpearson: %.3f' % (correlation.pearson_rho(X, Y))) write('\t\tspearman: %.3f' % (correlation.spearman_rho_tr(X, Y))) for (d, x, y, pid) in diffs[:10]: pair = s.id_to_phrases(pid) ratings1 = pair_ratings[ktype][condition1][pid] ratings2 = pair_ratings[ktype][condition2][pid] (t, p) = stats.ttest_ind(ratings1, ratings2) write('\t\t\t%s, %s: %.2f to %.2f (ttest t=%.5f, p=%.5f)' % (pair[0], pair[1], x, y, t, p))
write('\tcondition %s, %s' % (condition1, condition2)) pearsons = [] spearmans = [] samples_per_user = 10 for u1 in users1: for u2 in users2: if id(u1) == id(u2): continue common_pair_ids = list( set(u1.keys()).intersection(u2.keys())) if len(common_pair_ids) >= 5: up = [] # user pearson samples us = [] # user spearman samples for i in xrange(samples_per_user): sample_pair_ids = random.sample(common_pair_ids, 5) X = [u1[pid] for pid in sample_pair_ids] Y = [u2[pid] for pid in sample_pair_ids] if len(set(X)) != 1 and len(set(Y)) != 1: p = correlation.pearson_rho(X, Y) s = correlation.spearman_rho_tr(X, Y) up.append(p) us.append(s) if up and us: pearsons.append(utils.mean(up)) spearmans.append(utils.mean(us)) write('\t\td unique pairs of users: %d' % len(spearmans)) write('\t\tspearman: mean=%.3f, dev=%s' % (utils.mean(spearmans), utils.dev(spearmans))) write('\t\tpearson: mean=%.3f, dev=%s' % (utils.mean(pearsons), utils.dev(pearsons)))
for (condition2, users2) in users[ktype].items(): if not users1 or not users2: continue write('\tcondition %s, %s' % (condition1, condition2)) pearsons = [] spearmans = [] samples_per_user = 10 for u1 in users1: for u2 in users2: if id(u1) == id(u2): continue common_pair_ids = list(set(u1.keys()).intersection(u2.keys())) if len(common_pair_ids) >= 5: up = [] # user pearson samples us = [] # user spearman samples for i in xrange(samples_per_user): sample_pair_ids = random.sample(common_pair_ids, 5) X = [u1[pid] for pid in sample_pair_ids] Y = [u2[pid] for pid in sample_pair_ids] if len(set(X)) != 1 and len(set(Y)) != 1: p = correlation.pearson_rho(X, Y) s = correlation.spearman_rho_tr(X, Y) up.append(p) us.append(s) if up and us: pearsons.append(utils.mean(up)) spearmans.append(utils.mean(us)) write('\t\td unique pairs of users: %d' % len(spearmans)) write('\t\tspearman: mean=%.3f, dev=%s' % (utils.mean(spearmans), utils.dev(spearmans))) write('\t\tpearson: mean=%.3f, dev=%s' % (utils.mean(pearsons), utils.dev(pearsons)))