def get_mean_exposures_per_pivot(): "Returns the number of exposures each pivot received." cursor = connection.cursor() cursor.execute(""" SELECT pivot, pivot_type, COUNT(*) as n_exposures FROM drill_question GROUP BY CONCAT(pivot, "|", pivot_type) """) data = cursor.fetchall() word_c = [] kanji_c = [] combined_c = [] kanji_inc_dist = FreqDist() for pivot, pivot_type, n_exposures in data: combined_c.append(n_exposures) if pivot_type == 'k': kanji_c.append(n_exposures) kanji_inc_dist.inc(pivot, n_exposures) elif pivot_type == 'w': word_c.append(n_exposures) for kanji in scripts.unique_kanji(pivot): kanji_inc_dist.inc(kanji, n_exposures) else: raise ValueError('unknown pivot type: %s' % pivot_type) return [ ('Words', mean(word_c)), ('Kanji', mean(kanji_c)), ('Combined', mean(combined_c)), ('Kanji combined', mean(kanji_inc_dist.values())), ]
def _calculate_pre_post_ratios(response_data): """ Returns the number of data which are correctly responded to on their first presentation. """ response_data = [(pid, pt, i, ic) for (i, (pid, pt, ic)) in enumerate(response_data)] response_data.sort() first_responses = [] last_responses = [] for (pivot_id, pivot_type), responses in groupby(response_data, lambda r: (r[0], r[1])): responses = list(responses) if len(responses) < 2: continue first_responses.append(responses[0][3]) last_responses.append(responses[-1][3]) if not first_responses: return None, None return ( mean(first_responses), mean(last_responses), )
def get_global_rater_stats(): cursor = connection.cursor() cursor.execute(""" SELECT id, username FROM auth_user """) id_to_username = dict(cursor.fetchall()) cursor.execute(""" SELECT q_time.user_id, q.pivot, q.pivot_type, q_time.is_correct FROM ( SELECT mco.question_id, mco.is_correct, ot.user_id, ot.timestamp FROM ( SELECT mcr.option_id, dr.user_id, dr.timestamp FROM drill_response AS dr INNER JOIN drill_multiplechoiceresponse AS mcr ON mcr.response_ptr_id = dr.id ) AS ot INNER JOIN drill_multiplechoiceoption AS mco ON mco.id = ot.option_id ) AS q_time INNER JOIN drill_question AS q ON q.id = q_time.question_id ORDER BY user_id ASC, q_time.timestamp ASC """) results = [] ignore_users = _get_user_ignore_set() for user_id, rows in groupby(cursor.fetchall(), lambda r: r[0]): if user_id in ignore_users: continue rows = [(p, pt, c) for (_u, p, pt, c) in rows] # discard user_id user_data = { 'user_id': user_id, 'username': id_to_username[user_id] } user_data['n_responses'] = len(rows) user_data['n_tests'] = drill_models.TestSet.objects.filter( user__id=user_id).exclude(end_time=None).count() user_data['mean_accuracy'] = mean(r[2] for r in rows) user_data['n_errors'] = _seq_len(r for r in rows if r[2]) pre_ratio, post_ratio = _calculate_pre_post_ratios(rows) user_data['pre_ratio'] = pre_ratio user_data['post_ratio'] = post_ratio user_data['state_machine'] = _calculated_item_change(rows) if pre_ratio and post_ratio: user_data['pre_post_diff'] = post_ratio - pre_ratio else: user_data['pre_post_diff'] = None results.append(user_data) return results
def get_rater_stats(rater): responses = drill_models.MultipleChoiceOption.objects.filter( multiplechoiceresponse__user=rater).values('is_correct') mean_accuracy = mean((r['is_correct'] and 1 or 0) for r in responses) return { 'n_responses': drill_models.Response.objects.filter(user=rater).count(), 'n_tests': drill_models.TestSet.objects.filter(user=rater).count(), 'mean_accuracy': mean_accuracy, }
def get_global_rater_stats(): cursor = connection.cursor() cursor.execute(""" SELECT id, username FROM auth_user """) id_to_username = dict(cursor.fetchall()) cursor.execute(""" SELECT q_time.user_id, q.pivot, q.pivot_type, q_time.is_correct FROM ( SELECT mco.question_id, mco.is_correct, ot.user_id, ot.timestamp FROM ( SELECT mcr.option_id, dr.user_id, dr.timestamp FROM drill_response AS dr INNER JOIN drill_multiplechoiceresponse AS mcr ON mcr.response_ptr_id = dr.id ) AS ot INNER JOIN drill_multiplechoiceoption AS mco ON mco.id = ot.option_id ) AS q_time INNER JOIN drill_question AS q ON q.id = q_time.question_id ORDER BY user_id ASC, q_time.timestamp ASC """) results = [] ignore_users = _get_user_ignore_set() for user_id, rows in groupby(cursor.fetchall(), lambda r: r[0]): if user_id in ignore_users: continue rows = [(p, pt, c) for (_u, p, pt, c) in rows] # discard user_id user_data = {'user_id': user_id, 'username': id_to_username[user_id]} user_data['n_responses'] = len(rows) user_data['n_tests'] = drill_models.TestSet.objects.filter( user__id=user_id).exclude(end_time=None).count() user_data['mean_accuracy'] = mean(r[2] for r in rows) user_data['n_errors'] = _seq_len(r for r in rows if r[2]) pre_ratio, post_ratio = _calculate_pre_post_ratios(rows) user_data['pre_ratio'] = pre_ratio user_data['post_ratio'] = post_ratio user_data['state_machine'] = _calculated_item_change(rows) if pre_ratio and post_ratio: user_data['pre_post_diff'] = post_ratio - pre_ratio else: user_data['pre_post_diff'] = None results.append(user_data) return results
def get_rater_stats(rater): responses = drill_models.MultipleChoiceOption.objects.filter( multiplechoiceresponse__user=rater).values('is_correct') mean_accuracy = mean((r['is_correct'] and 1 or 0) for r in responses) return { 'n_responses': drill_models.Response.objects.filter( user=rater).count(), 'n_tests': drill_models.TestSet.objects.filter( user=rater).count(), 'mean_accuracy': mean_accuracy, }
def sample_by_centroid(cls, locations, potential_locations, use_max=False): mean_x = mean(x for (x, y) in locations) mean_y = mean(y for (x, y) in locations) parts = [] n_locations = len(potential_locations) for loc in potential_locations: x, y = loc dist = sqrt((x - mean_x)**2 + (y - mean_y)**2) if dist > 0: score = 1 / dist**2 else: score = 1 parts.append((loc, score)) dist = prob.ProbDist(parts) if use_max: p, l = max((p, l) for (l, p) in dist.iteritems()) return l return dist.sample()
if data[width][nodes][ch].has_key('prim'): data[width][nodes][ch]['prim'].append(prim) else: data[width][nodes][ch]['prim'] = [prim] if data[width][nodes][ch].has_key('rcs'): data[width][nodes][ch]['rcs'].append(rcs) else: data[width][nodes][ch]['rcs'] = [rcs] for width in sorted(data.keys()): for nodes in sorted(data[width].keys()): for channels in sorted(data[width][nodes].keys()): rd = data[width][nodes][channels] rd['dijkstra'] = simplestats.mean(rd['dijkstra']) rd['prim'] = simplestats.mean(rd['prim']) rd['rcs'] = simplestats.mean(rd['rcs']) # Scenario 1: vary # of channels in {1, 2, 3, 4, 5} d = data[50000.0][25] dij = [] pri = [] rcs = [] for channels in sorted(d.keys()): dij.append(str(d[channels]['dijkstra'] / 1000000)) pri.append(str(d[channels]['prim'] / 1000000)) rcs.append(str(d[channels]['rcs'] / 1000000)) print( """
def _accumulate_plugin_errors(raw_data): data = [] for label, scores in itertools.groupby(raw_data, lambda x: x[0]): data.append((label, mean(v for (l, v) in scores))) return data
# for j in data[i].keys(): # for k in data[i][j].keys(): # for l in data[i][j][k].keys(): # for m in data[i][j][k][l].keys(): # print "W: %d N: %d C: %d AP: %d V: %s #: %d" % (i, j, k, l, m, len(data[i][j][k][l][m])) # Scenario 1: vary size of the network mst = [] mst2 = [] msto = [] mst2o = [] idxs = [] for width in sorted(data.keys()): mst.append(str(simplestats.mean(data[width][MUSERS][MCHAN][0]['mst']))) mst2.append(str(simplestats.mean(data[width][MUSERS][MCHAN][0]['mst2']))) msto.append(str(simplestats.mean(data[width][MUSERS][MCHAN][0]['mst-apx']))) mst2o.append(str(simplestats.mean(data[width][MUSERS][MCHAN][0]['mst2-apx']))) idxs.append(int(width/1000)) print(""" figure(1); set(1, \"defaulttextfontname\", \"Times-Roman\"); set(1, \"defaultaxesfontname\", \"Times-Roman\"); set(1, \"defaulttextfontsize\", 19); set(1, \"defaultaxesfontsize\", 19); X = %(idx)s; MST = [ %(mst)s ]; MST2 = [ %(mst2)s ];
if data[width][theta][relays][subs][mq][ch].has_key('r2'): data[width][theta][relays][subs][mq][ch]['r2'].append(r2) else: data[width][theta][relays][subs][mq][ch]['r2'] = [r2] for width in sorted(data.keys()): for theta in sorted(data[width].keys()): for relay in sorted(data[width][theta].keys()): for subs in sorted(data[width][theta][relay].keys()): for mq in sorted(data[width][theta][relay][subs].keys()): for ch in sorted(data[width][theta][relay][subs][mq].keys()): rd = data[width][theta][relay][subs][mq][ch] lilp = len(rd['ilp']) lr1 = len(rd['r1']) lr2 = len(rd['r2']) rd['ilp'] = simplestats.mean(rd['ilp']) rd['r1'] = simplestats.mean(rd['r1']) rd['r2'] = simplestats.mean(rd['r2']) # Scenario 1: vary # of subscribers in {20, 30, 40, 50, 60} d = data[40000.0][40][4] ilp = [] r1 = [] r2 = [] for subs in sorted(d.keys()): ilp.append(str(d[subs][40000.0][4]['ilp'] / 1000000)) r1.append(str(d[subs][40000.0][4]['r1'] / 1000000)) r2.append(str(d[subs][40000.0][4]['r2'] / 1000000)) print(""" figure(1);
if data[width][nodes][ch].has_key('rcsdpcs'): data[width][nodes][ch]['rcsdpcs'].append(rcsdpcs) else: data[width][nodes][ch]['rcsdpcs'] = [rcsdpcs] if data[width][nodes][ch].has_key('rcs'): data[width][nodes][ch]['rcs'].append(rcs) else: data[width][nodes][ch]['rcs'] = [rcs] for width in sorted(data.keys()): for nodes in sorted(data[width].keys()): for channels in sorted(data[width][nodes].keys()): rd = data[width][nodes][channels] rd['spdpcs'] = simplestats.mean(rd['spdpcs']) rd['spgdy'] = simplestats.mean(rd['spgdy']) rd['btldpcs'] = simplestats.mean(rd['btldpcs']) rd['btlgdy'] = simplestats.mean(rd['btlgdy']) rd['rcsdpcs'] = simplestats.mean(rd['rcsdpcs']) rd['rcs'] = simplestats.mean(rd['rcs']) # Scenario 1: vary # of channels in {1, 2, 3, 4, 5} d = data[50000.0][25] spdpcs = [] spgdy = [] btldpcs = [] btlgdy = [] rcsdpcs = [] rcs = [] idx = [9, 16, 25, 36, 49];
if data[width][nodes][ch].has_key('prim'): data[width][nodes][ch]['prim'].append(prim) else: data[width][nodes][ch]['prim'] = [prim] if data[width][nodes][ch].has_key('rcs'): data[width][nodes][ch]['rcs'].append(rcs) else: data[width][nodes][ch]['rcs'] = [rcs] for width in sorted(data.keys()): for nodes in sorted(data[width].keys()): for channels in sorted(data[width][nodes].keys()): rd = data[width][nodes][channels] rd['dijkstra'] = simplestats.mean(rd['dijkstra']) rd['prim'] = simplestats.mean(rd['prim']) rd['rcs'] = simplestats.mean(rd['rcs']) # Scenario 1: vary # of channels in {1, 2, 3, 4, 5} d = data[50000.0][25] dij = [] pri = [] rcs = [] for channels in sorted(d.keys()): dij.append(str(d[channels]['dijkstra'] / 1000000)) pri.append(str(d[channels]['prim'] / 1000000)) rcs.append(str(d[channels]['rcs'] / 1000000)) print(""" figure(1);