def get_test_size_stats(): """ Fetches the distribution of test sizes chosen by users, and the completion statistics for each reported size. """ cursor = connection.cursor() cursor.execute(""" SELECT n_items, COUNT(*), AVG(is_finished) FROM ( SELECT ts.id AS test_set_id, COUNT(*) AS n_items, (ts.end_time IS NOT NULL) AS is_finished FROM drill_testset_questions AS tsq INNER JOIN drill_testset AS ts ON tsq.testset_id = ts.id GROUP BY ts.id ) as tmp GROUP BY n_items ORDER BY n_items ASC """) data = cursor.fetchall() dist = FreqDist() completion_rates = {} for n_items, n_tests, completion_rate in data: dist.inc(n_items, n_tests) completion_rates[n_items] = completion_rate results = [] for sample in sorted(dist.samples()): results.append((sample, dist.freq(sample), completion_rates[sample])) return results
def get_accuracy_by_pivot_type(): cursor = connection.cursor() cursor.execute(""" SELECT question.pivot, SUM(chosen_option.is_correct) as n_correct, COUNT(*) as n_responses FROM ( SELECT mco.question_id, mco.is_correct FROM drill_multiplechoiceresponse AS mcr INNER JOIN drill_multiplechoiceoption AS mco ON mcr.option_id = mco.id ) as chosen_option INNER JOIN drill_question AS question ON chosen_option.question_id = question.id WHERE question.pivot_type = "w" GROUP BY question.pivot """) raw_data = cursor.fetchall() counts = { 'Hiragana': FreqDist(), 'Katakana': FreqDist(), 'Kanji': FreqDist() } complex_scripts = set([scripts.Script.Kanji, scripts.Script.Unknown]) only_katakana = set([scripts.Script.Katakana]) for word, n_correct, n_responses in raw_data: scripts_found = scripts.script_types(word) if scripts_found.intersection(complex_scripts): dist = counts['Kanji'] elif scripts_found.intersection(only_katakana): dist = counts['Katakana'] else: dist = counts['Hiragana'] dist.inc(True, int(n_correct)) dist.inc(False, int(n_responses - n_correct)) keys = ('Hiragana', 'Katakana', 'Kanji') data = [(key, counts[key].freq(True)) for key in keys] average = FreqDist() for key in keys: average.inc(True, counts[key][True]) average.inc(False, counts[key][False]) data.append(('Average', average.freq(True))) return data
def get_language_data(name): "Fetches information about user first and second languages." fields_needed = ['user_id'] if name in ['first', 'combined']: fields_needed.append('first_language') elif name in ['second', 'combined']: fields_needed.append('second_languages') else: assert name == 'combined' valid_users = set(u.id for u in User.objects.all() if u.testset_set.exclude(end_time=None).count() > 0) profiles = UserProfile.objects.filter(user__id__in=valid_users).values( *fields_needed) dist = FreqDist() for profile in profiles: if 'first_language' in profile: lang = profile['first_language'].title() dist.inc(lang) if 'second_languages' in profile: if profile['second_languages']: for lang in profile['second_languages'].split(','): lang = lang.strip().title() if lang == 'Japanese': continue dist.inc(lang) elif name != 'lang_combined': dist.inc('None') return dist
def get_pivot_response_stats(pivot_id, pivot_type): """ Given a particular pivot, generate a distribution of observed erroneous responses for each type of plugin. """ cursor = connection.cursor() cursor.execute(""" SELECT plugin_option.plugin_id, plugin_option.value FROM drill_multiplechoiceresponse AS mcr INNER JOIN ( SELECT pivot_qn.plugin_id, mco.id AS option_id, mco.value FROM ( SELECT id, question_plugin_id AS plugin_id FROM drill_question WHERE pivot_type = "%(pivot_type)s" AND pivot_id = %(pivot_id)d ) AS pivot_qn INNER JOIN drill_multiplechoiceoption AS mco ON mco.question_id = pivot_qn.id ) AS plugin_option ON plugin_option.option_id = mcr.option_id """ % { 'pivot_type': pivot_type, 'pivot_id': pivot_id }) rows = cursor.fetchall() dist_map = {} plugin_ids_used = set(plugin_id for (plugin_id, error_value) in rows) for plugin_id in plugin_ids_used: dist_map[plugin_id] = FreqDist() for plugin_id, error_value in rows: dist_map[plugin_id].inc(error_value) plugin_map = drill_models.QuestionPlugin.objects.in_bulk(dist_map.keys()) results = [(plugin_map[plugin_id].name, dist) \ for (plugin_id, dist) in dist_map.iteritems()] combined_dist = FreqDist() for name, dist in results: combined_dist.inc(name, dist.N()) results[0:0] = [('By plugin type', combined_dist)] return results
def get_mean_exposures_per_pivot(): "Returns the number of exposures each pivot received." cursor = connection.cursor() cursor.execute(""" SELECT pivot, pivot_type, COUNT(*) as n_exposures FROM drill_question GROUP BY CONCAT(pivot, "|", pivot_type) """) data = cursor.fetchall() word_c = [] kanji_c = [] combined_c = [] kanji_inc_dist = FreqDist() for pivot, pivot_type, n_exposures in data: combined_c.append(n_exposures) if pivot_type == 'k': kanji_c.append(n_exposures) kanji_inc_dist.inc(pivot, n_exposures) elif pivot_type == 'w': word_c.append(n_exposures) for kanji in scripts.unique_kanji(pivot): kanji_inc_dist.inc(kanji, n_exposures) else: raise ValueError('unknown pivot type: %s' % pivot_type) return [ ('Words', mean(word_c)), ('Kanji', mean(kanji_c)), ('Combined', mean(combined_c)), ('Kanji combined', mean(kanji_inc_dist.values())), ]
def get_language_data(name): "Fetches information about user first and second languages." fields_needed = ['user_id'] if name in ['first', 'combined']: fields_needed.append('first_language') elif name in ['second', 'combined']: fields_needed.append('second_languages') else: assert name == 'combined' valid_users = set(u.id for u in User.objects.all() if u.testset_set.exclude( end_time=None).count() > 0) profiles = UserProfile.objects.filter(user__id__in=valid_users).values( *fields_needed) dist = FreqDist() for profile in profiles: if 'first_language' in profile: lang = profile['first_language'].title() dist.inc(lang) if 'second_languages' in profile: if profile['second_languages']: for lang in profile['second_languages'].split(','): lang = lang.strip().title() if lang == 'Japanese': continue dist.inc(lang) elif name != 'lang_combined': dist.inc('None') return dist
def get_pivot_response_stats(pivot_id, pivot_type): """ Given a particular pivot, generate a distribution of observed erroneous responses for each type of plugin. """ cursor = connection.cursor() cursor.execute(""" SELECT plugin_option.plugin_id, plugin_option.value FROM drill_multiplechoiceresponse AS mcr INNER JOIN ( SELECT pivot_qn.plugin_id, mco.id AS option_id, mco.value FROM ( SELECT id, question_plugin_id AS plugin_id FROM drill_question WHERE pivot_type = "%(pivot_type)s" AND pivot_id = %(pivot_id)d ) AS pivot_qn INNER JOIN drill_multiplechoiceoption AS mco ON mco.question_id = pivot_qn.id ) AS plugin_option ON plugin_option.option_id = mcr.option_id """ % {'pivot_type': pivot_type, 'pivot_id': pivot_id}) rows = cursor.fetchall() dist_map = {} plugin_ids_used = set(plugin_id for (plugin_id, error_value) in rows) for plugin_id in plugin_ids_used: dist_map[plugin_id] = FreqDist() for plugin_id, error_value in rows: dist_map[plugin_id].inc(error_value) plugin_map = drill_models.QuestionPlugin.objects.in_bulk(dist_map.keys()) results = [(plugin_map[plugin_id].name, dist) \ for (plugin_id, dist) in dist_map.iteritems()] combined_dist = FreqDist() for name, dist in results: combined_dist.inc(name, dist.N()) results[0:0] = [('By plugin type', combined_dist)] return results
def get_accuracy_by_pivot_type(): cursor = connection.cursor() cursor.execute(""" SELECT question.pivot, SUM(chosen_option.is_correct) as n_correct, COUNT(*) as n_responses FROM ( SELECT mco.question_id, mco.is_correct FROM drill_multiplechoiceresponse AS mcr INNER JOIN drill_multiplechoiceoption AS mco ON mcr.option_id = mco.id ) as chosen_option INNER JOIN drill_question AS question ON chosen_option.question_id = question.id WHERE question.pivot_type = "w" GROUP BY question.pivot """) raw_data = cursor.fetchall() counts = {'Hiragana': FreqDist(), 'Katakana': FreqDist(), 'Kanji': FreqDist()} complex_scripts = set([scripts.Script.Kanji, scripts.Script.Unknown]) only_katakana = set([scripts.Script.Katakana]) for word, n_correct, n_responses in raw_data: scripts_found = scripts.script_types(word) if scripts_found.intersection(complex_scripts): dist = counts['Kanji'] elif scripts_found.intersection(only_katakana): dist = counts['Katakana'] else: dist = counts['Hiragana'] dist.inc(True, int(n_correct)) dist.inc(False, int(n_responses - n_correct)) keys = ('Hiragana', 'Katakana', 'Kanji') data = [(key, counts[key].freq(True)) for key in keys] average = FreqDist() for key in keys: average.inc(True, counts[key][True]) average.inc(False, counts[key][False]) data.append(('Average', average.freq(True))) return data