Ejemplo n.º 1
0
        X = []
        Y = []
        for r in u.rated:
            if r.has_response():
                if r.pair_id in ratings:
                    X.append(ratings[r.pair_id])
                    Y.append(r.response)
                    d = abs(ratings[r.pair_id] - r.response)
                    deltas['all'].append(d)
                    deltas[r.condition].append(d)
                    deltas[(r.condition, (r.field == 'general'))].append(d)
                else:
                    ratings[r.pair_id] = r.response

        if len(X) == len(Y) == 5 and len(set(X)) > 1 and len(set(Y)) > 1:
            rho = correlation.spearman_rho_tr(X, Y)
            spearmans['all'].append(rho)
            if u.mturk and not u.scholar:
                spearmans['mturk'].append(rho)
            if u.scholar and not u.mturk:
                spearmans['scholar'].append(rho)

write('intra-rater spearman correlations:')
for c in spearmans:
    write('\t%s: %.3f n=%d, dev=%.3f' % (
            c,
            utils.mean(spearmans[c]),
            len(spearmans[c]),
            utils.dev(spearmans[c])
        ))
        n = 10
        means = [[] for i in range(n)]
        for (pair_id, ratings) in pair_ratings[ktype][condition].items():
            total = 0
            for i in range(n):
                pm = pair_min_counts[pair_id]
                means[i].append(utils.mean(utils.sample_with_replacement(ratings, pm)))
                total += utils.mean(random.sample(ratings, pm))
            pair_means[ktype][condition][pair_id] = 1.0 * total / n

        if means:
            spearmans = []
            for X in means:
                for Y in means:
                    if len(set(X)) >= 1 and len(set(Y)) >= 1:
                        spearmans.append(correlation.spearman_rho_tr(X, Y))
            write('\t%s, %s: bootstrapped spearman: %.3f (dev=%.3f)' %
                (condition, condition, utils.mean(spearmans), utils.dev(spearmans)))

for ktype in pair_means.keys():
    write('question type: %s' % ktype)
    for (condition1, consensus1) in pair_means[ktype].items():
        for (condition2, consensus2) in pair_means[ktype].items():
            if not consensus1 or not consensus2 or condition1 == condition2 or condition1 == 'all' or condition2 == 'all':
                continue
            write('\tcondition %s, %s' % (condition1, condition2))
            
            common_ids = list(set(consensus1.keys()).intersection(consensus2.keys()))
            X = [consensus1[pid] for pid in common_ids]
            Y = [consensus2[pid] for pid in common_ids]
            write('\tcondition %s, %s' % (condition1, condition2))
            pearsons = []
            spearmans = []
            samples_per_user = 10
            for u1 in users1:
                for u2 in users2:
                    if id(u1) == id(u2):
                        continue
                    common_pair_ids = list(
                        set(u1.keys()).intersection(u2.keys()))
                    if len(common_pair_ids) >= 5:
                        up = []  # user pearson samples
                        us = []  # user spearman samples
                        for i in xrange(samples_per_user):
                            sample_pair_ids = random.sample(common_pair_ids, 5)
                            X = [u1[pid] for pid in sample_pair_ids]
                            Y = [u2[pid] for pid in sample_pair_ids]
                            if len(set(X)) != 1 and len(set(Y)) != 1:
                                p = correlation.pearson_rho(X, Y)
                                s = correlation.spearman_rho_tr(X, Y)
                                up.append(p)
                                us.append(s)
                        if up and us:
                            pearsons.append(utils.mean(up))
                            spearmans.append(utils.mean(us))
            write('\t\td unique pairs of users: %d' % len(spearmans))
            write('\t\tspearman: mean=%.3f, dev=%s' %
                  (utils.mean(spearmans), utils.dev(spearmans)))
            write('\t\tpearson: mean=%.3f, dev=%s' %
                  (utils.mean(pearsons), utils.dev(pearsons)))
Ejemplo n.º 4
0
#!/usr/bin/python -O

import sys

import correlation
import utils

survey = utils.Survey()

wordsim = {}
for line in open('dat/general.txt'):
    tokens = line.split('\t')
    pair = tuple(sorted([t.strip() for t in tokens[:2]]))
    wordsim[pair] = float(tokens[2])


for condition in ('all', 'mturk', 'scholar'):
    ratings = survey.get_ratings_by_condition('general', condition)
    X = []
    Y = []
    for (pair_id, pair_ratings) in ratings.items():
        pair = tuple(sorted(survey.id_to_phrases(pair_id)))
        if pair in wordsim and len(pair_ratings) > 10:
            X.append(utils.mean([r.response for r in pair_ratings if r.has_response()]))
            Y.append(wordsim[pair])
        else:
            print 'unknown pair:', pair
    print condition, correlation.pearson_rho(X, Y), correlation.spearman_rho_tr(X, Y)
Ejemplo n.º 5
0
        means = [[] for i in range(n)]
        for (pair_id, ratings) in pair_ratings[ktype][condition].items():
            total = 0
            for i in range(n):
                pm = pair_min_counts[pair_id]
                means[i].append(
                    utils.mean(utils.sample_with_replacement(ratings, pm)))
                total += utils.mean(random.sample(ratings, pm))
            pair_means[ktype][condition][pair_id] = 1.0 * total / n

        if means:
            spearmans = []
            for X in means:
                for Y in means:
                    if len(set(X)) >= 1 and len(set(Y)) >= 1:
                        spearmans.append(correlation.spearman_rho_tr(X, Y))
            write('\t%s, %s: bootstrapped spearman: %.3f (dev=%.3f)' %
                  (condition, condition, utils.mean(spearmans),
                   utils.dev(spearmans)))

for ktype in pair_means.keys():
    write('question type: %s' % ktype)
    for (condition1, consensus1) in pair_means[ktype].items():
        for (condition2, consensus2) in pair_means[ktype].items():
            if not consensus1 or not consensus2 or condition1 == condition2 or condition1 == 'all' or condition2 == 'all':
                continue
            write('\tcondition %s, %s' % (condition1, condition2))

            common_ids = list(
                set(consensus1.keys()).intersection(consensus2.keys()))
            X = [consensus1[pid] for pid in common_ids]