Ejemplo n.º 1
0
def get_mean_exposures_per_pivot():
    "Returns the number of exposures each pivot received."
    cursor = connection.cursor()
    cursor.execute("""
        SELECT pivot, pivot_type, COUNT(*) as n_exposures
        FROM drill_question
        GROUP BY CONCAT(pivot, "|", pivot_type)
    """)
    data = cursor.fetchall()
    word_c = []
    kanji_c = []
    combined_c = []
    kanji_inc_dist = FreqDist()
    for pivot, pivot_type, n_exposures in data:
        combined_c.append(n_exposures)

        if pivot_type == 'k':
            kanji_c.append(n_exposures)
            kanji_inc_dist.inc(pivot, n_exposures)

        elif pivot_type == 'w':
            word_c.append(n_exposures)
            for kanji in scripts.unique_kanji(pivot):
                kanji_inc_dist.inc(kanji, n_exposures)

        else:
            raise ValueError('unknown pivot type: %s' % pivot_type)

    return [
        ('Words', mean(word_c)),
        ('Kanji', mean(kanji_c)),
        ('Combined', mean(combined_c)),
        ('Kanji combined', mean(kanji_inc_dist.values())),
    ]
Ejemplo n.º 2
0
def get_mean_exposures_per_pivot():
    "Returns the number of exposures each pivot received."
    cursor = connection.cursor()
    cursor.execute("""
        SELECT pivot, pivot_type, COUNT(*) as n_exposures
        FROM drill_question
        GROUP BY CONCAT(pivot, "|", pivot_type)
    """)
    data = cursor.fetchall()
    word_c = []
    kanji_c = []
    combined_c = []
    kanji_inc_dist = FreqDist()
    for pivot, pivot_type, n_exposures in data:
        combined_c.append(n_exposures)

        if pivot_type == 'k':
            kanji_c.append(n_exposures)
            kanji_inc_dist.inc(pivot, n_exposures)

        elif pivot_type == 'w':
            word_c.append(n_exposures)
            for kanji in scripts.unique_kanji(pivot):
                kanji_inc_dist.inc(kanji, n_exposures)

        else:
            raise ValueError('unknown pivot type: %s' % pivot_type)

    return [
            ('Words', mean(word_c)),
            ('Kanji', mean(kanji_c)),
            ('Combined', mean(combined_c)),
            ('Kanji combined', mean(kanji_inc_dist.values())),
        ]
Ejemplo n.º 3
0
def _calculate_pre_post_ratios(response_data):
    """
    Returns the number of data which are correctly responded to on their first
    presentation.
    """
    response_data = [(pid, pt, i, ic)
                     for (i, (pid, pt, ic)) in enumerate(response_data)]
    response_data.sort()

    first_responses = []
    last_responses = []
    for (pivot_id, pivot_type), responses in groupby(response_data, lambda r:
                                                     (r[0], r[1])):
        responses = list(responses)
        if len(responses) < 2:
            continue
        first_responses.append(responses[0][3])
        last_responses.append(responses[-1][3])

    if not first_responses:
        return None, None

    return (
        mean(first_responses),
        mean(last_responses),
    )
Ejemplo n.º 4
0
def _calculate_pre_post_ratios(response_data):
    """
    Returns the number of data which are correctly responded to on their first
    presentation.
    """
    response_data = [(pid, pt, i, ic) for (i, (pid, pt, ic)) in 
            enumerate(response_data)]
    response_data.sort()
    
    first_responses = []
    last_responses = []
    for (pivot_id, pivot_type), responses in groupby(response_data,
            lambda r: (r[0], r[1])):
        responses = list(responses)
        if len(responses) < 2:
            continue
        first_responses.append(responses[0][3])
        last_responses.append(responses[-1][3])
        
    if not first_responses:
        return None, None
    
    return (
            mean(first_responses),
            mean(last_responses),
        )
Ejemplo n.º 5
0
def get_global_rater_stats():
    cursor = connection.cursor()
    
    cursor.execute("""
        SELECT id, username
        FROM auth_user
    """)
    id_to_username = dict(cursor.fetchall())
    
    cursor.execute("""
        SELECT q_time.user_id, q.pivot, q.pivot_type, q_time.is_correct
        FROM (
            SELECT mco.question_id, mco.is_correct, ot.user_id, ot.timestamp
            FROM (
                SELECT mcr.option_id, dr.user_id, dr.timestamp
                FROM drill_response AS dr
                INNER JOIN drill_multiplechoiceresponse AS mcr
                ON mcr.response_ptr_id = dr.id
            ) AS ot
            INNER JOIN drill_multiplechoiceoption AS mco
            ON mco.id = ot.option_id
        ) AS q_time
        INNER JOIN drill_question AS q
        ON q.id = q_time.question_id
        ORDER BY user_id ASC, q_time.timestamp ASC
    """)
    results = []
    ignore_users = _get_user_ignore_set()
    
    for user_id, rows in groupby(cursor.fetchall(), lambda r: r[0]):
        if user_id in ignore_users:
            continue
        rows = [(p, pt, c) for (_u, p, pt, c) in rows] # discard user_id
        
        user_data = {
            'user_id':      user_id,
            'username':     id_to_username[user_id]
        }
        user_data['n_responses'] = len(rows)
        user_data['n_tests'] = drill_models.TestSet.objects.filter(
                user__id=user_id).exclude(end_time=None).count()
        user_data['mean_accuracy'] = mean(r[2] for r in rows)
        user_data['n_errors'] = _seq_len(r for r in rows if r[2])

        pre_ratio, post_ratio = _calculate_pre_post_ratios(rows)
        user_data['pre_ratio'] = pre_ratio
        user_data['post_ratio'] = post_ratio

        user_data['state_machine'] = _calculated_item_change(rows)

        if pre_ratio and post_ratio:
            user_data['pre_post_diff'] = post_ratio - pre_ratio
        else:
            user_data['pre_post_diff'] = None
        results.append(user_data)
    return results
Ejemplo n.º 6
0
def get_rater_stats(rater):
    responses = drill_models.MultipleChoiceOption.objects.filter(
        multiplechoiceresponse__user=rater).values('is_correct')
    mean_accuracy = mean((r['is_correct'] and 1 or 0) for r in responses)

    return {
        'n_responses':
        drill_models.Response.objects.filter(user=rater).count(),
        'n_tests': drill_models.TestSet.objects.filter(user=rater).count(),
        'mean_accuracy': mean_accuracy,
    }
Ejemplo n.º 7
0
def get_global_rater_stats():
    cursor = connection.cursor()

    cursor.execute("""
        SELECT id, username
        FROM auth_user
    """)
    id_to_username = dict(cursor.fetchall())

    cursor.execute("""
        SELECT q_time.user_id, q.pivot, q.pivot_type, q_time.is_correct
        FROM (
            SELECT mco.question_id, mco.is_correct, ot.user_id, ot.timestamp
            FROM (
                SELECT mcr.option_id, dr.user_id, dr.timestamp
                FROM drill_response AS dr
                INNER JOIN drill_multiplechoiceresponse AS mcr
                ON mcr.response_ptr_id = dr.id
            ) AS ot
            INNER JOIN drill_multiplechoiceoption AS mco
            ON mco.id = ot.option_id
        ) AS q_time
        INNER JOIN drill_question AS q
        ON q.id = q_time.question_id
        ORDER BY user_id ASC, q_time.timestamp ASC
    """)
    results = []
    ignore_users = _get_user_ignore_set()

    for user_id, rows in groupby(cursor.fetchall(), lambda r: r[0]):
        if user_id in ignore_users:
            continue
        rows = [(p, pt, c) for (_u, p, pt, c) in rows]  # discard user_id

        user_data = {'user_id': user_id, 'username': id_to_username[user_id]}
        user_data['n_responses'] = len(rows)
        user_data['n_tests'] = drill_models.TestSet.objects.filter(
            user__id=user_id).exclude(end_time=None).count()
        user_data['mean_accuracy'] = mean(r[2] for r in rows)
        user_data['n_errors'] = _seq_len(r for r in rows if r[2])

        pre_ratio, post_ratio = _calculate_pre_post_ratios(rows)
        user_data['pre_ratio'] = pre_ratio
        user_data['post_ratio'] = post_ratio

        user_data['state_machine'] = _calculated_item_change(rows)

        if pre_ratio and post_ratio:
            user_data['pre_post_diff'] = post_ratio - pre_ratio
        else:
            user_data['pre_post_diff'] = None
        results.append(user_data)
    return results
Ejemplo n.º 8
0
def get_rater_stats(rater):
    responses = drill_models.MultipleChoiceOption.objects.filter(
            multiplechoiceresponse__user=rater).values('is_correct')
    mean_accuracy = mean((r['is_correct'] and 1 or 0) for r in responses)
    
    return {
        'n_responses': drill_models.Response.objects.filter(
                user=rater).count(),
        'n_tests': drill_models.TestSet.objects.filter(
                user=rater).count(),
        'mean_accuracy': mean_accuracy,
    }
Ejemplo n.º 9
0
 def sample_by_centroid(cls, locations, potential_locations, 
         use_max=False):
     mean_x = mean(x for (x, y) in locations)
     mean_y = mean(y for (x, y) in locations)
     parts = []
     n_locations = len(potential_locations)
     for loc in potential_locations:
         x, y = loc
         dist = sqrt((x - mean_x)**2 + (y - mean_y)**2)
         if dist > 0:
             score = 1 / dist**2 
         else:
             score = 1
         parts.append((loc, score))
     
     dist = prob.ProbDist(parts)
     
     if use_max:
         p, l = max((p, l) for (l, p) in dist.iteritems())
         return l
     
     return dist.sample()
Ejemplo n.º 10
0
    if data[width][nodes][ch].has_key('prim'):
        data[width][nodes][ch]['prim'].append(prim)
    else:
        data[width][nodes][ch]['prim'] = [prim]

    if data[width][nodes][ch].has_key('rcs'):
        data[width][nodes][ch]['rcs'].append(rcs)
    else:
        data[width][nodes][ch]['rcs'] = [rcs]

for width in sorted(data.keys()):
    for nodes in sorted(data[width].keys()):
        for channels in sorted(data[width][nodes].keys()):
            rd = data[width][nodes][channels]
            rd['dijkstra'] = simplestats.mean(rd['dijkstra'])
            rd['prim'] = simplestats.mean(rd['prim'])
            rd['rcs'] = simplestats.mean(rd['rcs'])

# Scenario 1: vary # of channels in {1, 2, 3, 4, 5}
d = data[50000.0][25]
dij = []
pri = []
rcs = []
for channels in sorted(d.keys()):
    dij.append(str(d[channels]['dijkstra'] / 1000000))
    pri.append(str(d[channels]['prim'] / 1000000))
    rcs.append(str(d[channels]['rcs'] / 1000000))

print(
    """
Ejemplo n.º 11
0
def _accumulate_plugin_errors(raw_data):
    data = []
    for label, scores in itertools.groupby(raw_data, lambda x: x[0]):
        data.append((label, mean(v for (l, v) in scores)))
    return data
Ejemplo n.º 12
0
#     for j in data[i].keys():
#         for k in data[i][j].keys():
#             for l in data[i][j][k].keys():
#                 for m in data[i][j][k][l].keys():
#                     print "W: %d N: %d C: %d AP: %d V: %s #: %d" % (i, j, k, l, m, len(data[i][j][k][l][m]))



# Scenario 1: vary size of the network
mst = []
mst2 = []
msto = []
mst2o = []
idxs = []
for width in sorted(data.keys()):
    mst.append(str(simplestats.mean(data[width][MUSERS][MCHAN][0]['mst'])))
    mst2.append(str(simplestats.mean(data[width][MUSERS][MCHAN][0]['mst2'])))
    msto.append(str(simplestats.mean(data[width][MUSERS][MCHAN][0]['mst-apx'])))
    mst2o.append(str(simplestats.mean(data[width][MUSERS][MCHAN][0]['mst2-apx'])))
    idxs.append(int(width/1000))

print("""
figure(1);
set(1, \"defaulttextfontname\", \"Times-Roman\");
set(1, \"defaultaxesfontname\", \"Times-Roman\");
set(1, \"defaulttextfontsize\", 19);
set(1, \"defaultaxesfontsize\", 19);

X = %(idx)s;
MST = [ %(mst)s ];
MST2 = [ %(mst2)s ];
Ejemplo n.º 13
0
    if data[width][theta][relays][subs][mq][ch].has_key('r2'):
        data[width][theta][relays][subs][mq][ch]['r2'].append(r2)
    else:
        data[width][theta][relays][subs][mq][ch]['r2'] = [r2]

for width in sorted(data.keys()):
    for theta in sorted(data[width].keys()):
        for relay in sorted(data[width][theta].keys()):
            for subs in sorted(data[width][theta][relay].keys()):
                for mq in sorted(data[width][theta][relay][subs].keys()):
                    for ch in sorted(data[width][theta][relay][subs][mq].keys()):
                        rd = data[width][theta][relay][subs][mq][ch]
                        lilp = len(rd['ilp'])
                        lr1 = len(rd['r1'])
                        lr2 = len(rd['r2'])
                        rd['ilp'] = simplestats.mean(rd['ilp'])
                        rd['r1'] = simplestats.mean(rd['r1'])
                        rd['r2'] = simplestats.mean(rd['r2'])

# Scenario 1: vary # of subscribers in {20, 30, 40, 50, 60}
d = data[40000.0][40][4]
ilp = []
r1 = []
r2 = []
for subs in sorted(d.keys()):
    ilp.append(str(d[subs][40000.0][4]['ilp'] / 1000000))
    r1.append(str(d[subs][40000.0][4]['r1'] / 1000000))
    r2.append(str(d[subs][40000.0][4]['r2'] / 1000000))
    
print("""
figure(1);
Ejemplo n.º 14
0
    if data[width][nodes][ch].has_key('rcsdpcs'):
        data[width][nodes][ch]['rcsdpcs'].append(rcsdpcs)
    else:
        data[width][nodes][ch]['rcsdpcs'] = [rcsdpcs]

    if data[width][nodes][ch].has_key('rcs'):
        data[width][nodes][ch]['rcs'].append(rcs)
    else:
        data[width][nodes][ch]['rcs'] = [rcs]

for width in sorted(data.keys()):
    for nodes in sorted(data[width].keys()):
        for channels in sorted(data[width][nodes].keys()):
            rd = data[width][nodes][channels]
            rd['spdpcs'] = simplestats.mean(rd['spdpcs'])
            rd['spgdy'] = simplestats.mean(rd['spgdy'])
            rd['btldpcs'] = simplestats.mean(rd['btldpcs'])
            rd['btlgdy'] = simplestats.mean(rd['btlgdy'])
            rd['rcsdpcs'] = simplestats.mean(rd['rcsdpcs'])
            rd['rcs'] = simplestats.mean(rd['rcs'])
            
# Scenario 1: vary # of channels in {1, 2, 3, 4, 5}
d = data[50000.0][25]
spdpcs = []
spgdy = []
btldpcs = []
btlgdy = []
rcsdpcs = []
rcs = []
idx = [9, 16, 25, 36, 49];
Ejemplo n.º 15
0
def _accumulate_plugin_errors(raw_data):
    data = []
    for label, scores in itertools.groupby(raw_data, lambda x: x[0]):
        data.append((label, mean(v for (l, v) in scores)))
    return data
Ejemplo n.º 16
0
    if data[width][nodes][ch].has_key('prim'):
        data[width][nodes][ch]['prim'].append(prim)
    else:
        data[width][nodes][ch]['prim'] = [prim]

    if data[width][nodes][ch].has_key('rcs'):
        data[width][nodes][ch]['rcs'].append(rcs)
    else:
        data[width][nodes][ch]['rcs'] = [rcs]

for width in sorted(data.keys()):
    for nodes in sorted(data[width].keys()):
        for channels in sorted(data[width][nodes].keys()):
            rd = data[width][nodes][channels]
            rd['dijkstra'] = simplestats.mean(rd['dijkstra'])
            rd['prim'] = simplestats.mean(rd['prim'])
            rd['rcs'] = simplestats.mean(rd['rcs'])
            
# Scenario 1: vary # of channels in {1, 2, 3, 4, 5}
d = data[50000.0][25]
dij = []
pri = []
rcs = []
for channels in sorted(d.keys()):
    dij.append(str(d[channels]['dijkstra'] / 1000000))
    pri.append(str(d[channels]['prim'] / 1000000))
    rcs.append(str(d[channels]['rcs'] / 1000000))
    
print("""
figure(1);