Esempio n. 1
0
        if len(X) == len(Y) == 5 and len(set(X)) > 1 and len(set(Y)) > 1:
            rho = correlation.spearman_rho_tr(X, Y)
            spearmans['all'].append(rho)
            if u.mturk and not u.scholar:
                spearmans['mturk'].append(rho)
            if u.scholar and not u.mturk:
                spearmans['scholar'].append(rho)

write('intra-rater spearman correlations:')
for c in spearmans:
    write('\t%s: %.3f n=%d, dev=%.3f' % (
            c,
            utils.mean(spearmans[c]),
            len(spearmans[c]),
            utils.dev(spearmans[c])
        ))

for spec in utils.SPECIFICITIES:
    write('mean absolute errors for %s questions:' % spec)
    for c in utils.CONDITIONS:
        diffs = []
        for (pair_id, ratings) in s.get_ratings_by_condition(spec, c).items():
            for reratings in utils.group_by(ratings, lambda r: r.user).values():
                if len(reratings) >= 2:
                    diffs.append(abs(reratings[0].response - reratings[1].response))
        if diffs:
            write('\tcondition %s: mae=%.3f, n=%d, dev=%.3f' %
                ( c, utils.mean(diffs), len(diffs), utils.dev(diffs)))
f.close()
            write('\tcondition %s, %s' % (condition1, condition2))
            pearsons = []
            spearmans = []
            samples_per_user = 10
            for u1 in users1:
                for u2 in users2:
                    if id(u1) == id(u2):
                        continue
                    common_pair_ids = list(
                        set(u1.keys()).intersection(u2.keys()))
                    if len(common_pair_ids) >= 5:
                        up = []  # user pearson samples
                        us = []  # user spearman samples
                        for i in xrange(samples_per_user):
                            sample_pair_ids = random.sample(common_pair_ids, 5)
                            X = [u1[pid] for pid in sample_pair_ids]
                            Y = [u2[pid] for pid in sample_pair_ids]
                            if len(set(X)) != 1 and len(set(Y)) != 1:
                                p = correlation.pearson_rho(X, Y)
                                s = correlation.spearman_rho_tr(X, Y)
                                up.append(p)
                                us.append(s)
                        if up and us:
                            pearsons.append(utils.mean(up))
                            spearmans.append(utils.mean(us))
            write('\t\td unique pairs of users: %d' % len(spearmans))
            write('\t\tspearman: mean=%.3f, dev=%s' %
                  (utils.mean(spearmans), utils.dev(spearmans)))
            write('\t\tpearson: mean=%.3f, dev=%s' %
                  (utils.mean(pearsons), utils.dev(pearsons)))
        for (pair_id, ratings) in pair_ratings[ktype][condition].items():
            total = 0
            for i in range(n):
                pm = pair_min_counts[pair_id]
                means[i].append(utils.mean(utils.sample_with_replacement(ratings, pm)))
                total += utils.mean(random.sample(ratings, pm))
            pair_means[ktype][condition][pair_id] = 1.0 * total / n

        if means:
            spearmans = []
            for X in means:
                for Y in means:
                    if len(set(X)) >= 1 and len(set(Y)) >= 1:
                        spearmans.append(correlation.spearman_rho_tr(X, Y))
            write('\t%s, %s: bootstrapped spearman: %.3f (dev=%.3f)' %
                (condition, condition, utils.mean(spearmans), utils.dev(spearmans)))

for ktype in pair_means.keys():
    write('question type: %s' % ktype)
    for (condition1, consensus1) in pair_means[ktype].items():
        for (condition2, consensus2) in pair_means[ktype].items():
            if not consensus1 or not consensus2 or condition1 == condition2 or condition1 == 'all' or condition2 == 'all':
                continue
            write('\tcondition %s, %s' % (condition1, condition2))
            
            common_ids = list(set(consensus1.keys()).intersection(consensus2.keys()))
            X = [consensus1[pid] for pid in common_ids]
            Y = [consensus2[pid] for pid in common_ids]

            diffs = [(abs(x - y), x, y, pid) for (pid, x, y) in zip(common_ids, X, Y)]
            diffs.sort()
Esempio n. 4
0
def write(message):
    print message
    f.write(message + '\n')

for ktype in utils.SPECIFICITIES:
    for condition in utils.CONDITIONS:
        pair_ratings = s.get_ratings_by_condition(ktype, condition)
        values = []
        for ratings in pair_ratings.values():
            values.extend([r.response for r in ratings])
        if not values:
            continue
        hist = [0] * 6
        for r in values:
            hist[r] += 1
        write('condition %s, %s:'% (ktype, condition))
        write('\tn: %d' % len(values))
        write('\tmean: %.3f' % utils.mean(values))
        write('\tdev: %.3f' % utils.dev(values))
        tokens = [
            '%d=%d' % (i, hist[i])
            for i in range(1,6)
        ]
        write('\tcounts: %s' % string.join(tokens))
        tokens = [
            '%d=%.3f' % (i, 1.0 * hist[i] / len(values))
            for i in range(1,6)
        ]
        write('\thist: %s' % string.join(tokens))

Esempio n. 5
0
s = utils.Survey()


def write(message):
    print message
    f.write(message + '\n')


for ktype in utils.SPECIFICITIES:
    for condition in utils.CONDITIONS:
        pair_ratings = s.get_ratings_by_condition(ktype, condition)
        values = []
        for ratings in pair_ratings.values():
            values.extend([r.response for r in ratings])
        if not values:
            continue
        hist = [0] * 6
        for r in values:
            hist[r] += 1
        write('condition %s, %s:' % (ktype, condition))
        write('\tn: %d' % len(values))
        write('\tmean: %.3f' % utils.mean(values))
        write('\tdev: %.3f' % utils.dev(values))
        tokens = ['%d=%d' % (i, hist[i]) for i in range(1, 6)]
        write('\tcounts: %s' % string.join(tokens))
        tokens = [
            '%d=%.3f' % (i, 1.0 * hist[i] / len(values)) for i in range(1, 6)
        ]
        write('\thist: %s' % string.join(tokens))
Esempio n. 6
0
            for i in range(n):
                pm = pair_min_counts[pair_id]
                means[i].append(
                    utils.mean(utils.sample_with_replacement(ratings, pm)))
                total += utils.mean(random.sample(ratings, pm))
            pair_means[ktype][condition][pair_id] = 1.0 * total / n

        if means:
            spearmans = []
            for X in means:
                for Y in means:
                    if len(set(X)) >= 1 and len(set(Y)) >= 1:
                        spearmans.append(correlation.spearman_rho_tr(X, Y))
            write('\t%s, %s: bootstrapped spearman: %.3f (dev=%.3f)' %
                  (condition, condition, utils.mean(spearmans),
                   utils.dev(spearmans)))

for ktype in pair_means.keys():
    write('question type: %s' % ktype)
    for (condition1, consensus1) in pair_means[ktype].items():
        for (condition2, consensus2) in pair_means[ktype].items():
            if not consensus1 or not consensus2 or condition1 == condition2 or condition1 == 'all' or condition2 == 'all':
                continue
            write('\tcondition %s, %s' % (condition1, condition2))

            common_ids = list(
                set(consensus1.keys()).intersection(consensus2.keys()))
            X = [consensus1[pid] for pid in common_ids]
            Y = [consensus2[pid] for pid in common_ids]

            diffs = [(abs(x - y), x, y, pid)
Esempio n. 7
0
                    deltas[(r.condition, (r.field == 'general'))].append(d)
                else:
                    ratings[r.pair_id] = r.response

        if len(X) == len(Y) == 5 and len(set(X)) > 1 and len(set(Y)) > 1:
            rho = correlation.spearman_rho_tr(X, Y)
            spearmans['all'].append(rho)
            if u.mturk and not u.scholar:
                spearmans['mturk'].append(rho)
            if u.scholar and not u.mturk:
                spearmans['scholar'].append(rho)

write('intra-rater spearman correlations:')
for c in spearmans:
    write('\t%s: %.3f n=%d, dev=%.3f' % (c, utils.mean(
        spearmans[c]), len(spearmans[c]), utils.dev(spearmans[c])))

for spec in utils.SPECIFICITIES:
    write('mean absolute errors for %s questions:' % spec)
    for c in utils.CONDITIONS:
        diffs = []
        for (pair_id, ratings) in s.get_ratings_by_condition(spec, c).items():
            for reratings in utils.group_by(ratings,
                                            lambda r: r.user).values():
                if len(reratings) >= 2:
                    diffs.append(
                        abs(reratings[0].response - reratings[1].response))
        if diffs:
            write('\tcondition %s: mae=%.3f, n=%d, dev=%.3f' %
                  (c, utils.mean(diffs), len(diffs), utils.dev(diffs)))
f.close()
        for (condition2, users2) in users[ktype].items():
            if not users1 or not users2:
                continue
            write('\tcondition %s, %s' % (condition1, condition2))
            pearsons = []
            spearmans = []
            samples_per_user = 10
            for u1 in users1:
                for u2 in users2:
                    if id(u1) == id(u2):
                        continue
                    common_pair_ids = list(set(u1.keys()).intersection(u2.keys()))
                    if len(common_pair_ids) >= 5:
                        up = [] # user pearson samples
                        us = [] # user spearman samples
                        for i in xrange(samples_per_user):
                            sample_pair_ids = random.sample(common_pair_ids, 5)
                            X = [u1[pid] for pid in sample_pair_ids]
                            Y = [u2[pid] for pid in sample_pair_ids]
                            if len(set(X)) != 1 and len(set(Y)) != 1:
                                p = correlation.pearson_rho(X, Y)
                                s = correlation.spearman_rho_tr(X, Y)
                                up.append(p)
                                us.append(s)
                        if up and us:
                            pearsons.append(utils.mean(up))
                            spearmans.append(utils.mean(us))
            write('\t\td unique pairs of users: %d' % len(spearmans))
            write('\t\tspearman: mean=%.3f, dev=%s' % (utils.mean(spearmans), utils.dev(spearmans)))
            write('\t\tpearson: mean=%.3f, dev=%s' % (utils.mean(pearsons), utils.dev(pearsons)))