def basic(request): "Calculates and displays some basic statistics." context = {} # Number of users num_users = stats.count_active_users() context["num_users"] = num_users # Number of questions answered num_responses = models.MultipleChoiceResponse.objects.count() context["num_responses"] = num_responses context["responses_per_user"] = num_responses / float(num_users) num_tests = models.TestSet.objects.exclude(end_time=None).count() context["num_tests"] = num_tests (context["mean_tbt"], context["std_tbt"]) = basic_stats(stats.get_time_between_tests()) context["log_start_time"] = models.TestSet.objects.order_by("start_time")[0].start_time context["log_end_time"] = models.TestSet.objects.order_by("-start_time")[0].start_time context["tests_per_user"] = num_tests / float(num_users) context["responses_per_test"] = num_responses / float(num_tests) all_responses = models.MultipleChoiceResponse.objects context["mean_score"] = all_responses.filter(option__is_correct=True).count() / float(all_responses.count()) test_stats = stats.get_test_size_stats() pretty_results = [(k, 100 * t, 100 * c) for (k, t, c) in test_stats] context["test_dist"] = pretty_results context["time_used_mean"], context["time_used_std"] = stats.get_mean_time_used() return render_to_response("analysis/basic.html", context, RequestContext(request))
def approximate(data, n_points=10, x_min=None, x_max=None): """ Approximates a data series by grouping points into a number of bins. """ if x_min is None: x_min = min(x for (x, y) in data) if x_max is None: x_max = max(x for (x, y) in data) interval = float((x_max - x_min) / n_points) eps = 1e-8 results = [] for bin_no in xrange(n_points): start_interval = bin_no * interval end_interval = (bin_no + 1) * interval if bin_no == n_points - 1: end_interval += eps midpoint = (bin_no + 0.5) * interval sub_data = [float(y) for (x, y) in data if start_interval <= x < end_interval] if len(sub_data) < 3: continue avg, stddev = basic_stats(sub_data) results.append(( midpoint, avg, max(avg - 2 * stddev, 0.0), min(avg + 2 * stddev, 1.0), )) return results
def approximate(data, n_points=10, x_min=None, x_max=None): """ Approximates a data series by grouping points into a number of bins. """ if x_min is None: x_min = min(x for (x, y) in data) if x_max is None: x_max = max(x for (x, y) in data) interval = float((x_max - x_min) / n_points) eps = 1e-8 results = [] for bin_no in xrange(n_points): start_interval = bin_no * interval end_interval = (bin_no + 1) * interval if bin_no == n_points - 1: end_interval += eps midpoint = (bin_no + 0.5) * interval sub_data = [ float(y) for (x, y) in data if start_interval <= x < end_interval ] if len(sub_data) < 3: continue avg, stddev = basic_stats(sub_data) results.append(( midpoint, avg, max(avg - 2 * stddev, 0.0), min(avg + 2 * stddev, 1.0), )) return results
def get_mean_time_used(): "Gets the mean time in days for which the system was used." test_sets = drill_models.TestSet.objects.exclude( end_time=None).order_by('user__id') rows = [] one_day = timedelta(days=1) for user_id, user_tests in groupby(test_sets, lambda t: t.user_id): user_tests = sorted(user_tests, key=lambda t: t.start_time) time_used = user_tests[-1].end_time - user_tests[0].start_time days = _scale_time_delta(time_used, one_day) rows.append(days) return basic_stats(rows)
def get_mean_time_used(): "Gets the mean time in days for which the system was used." test_sets = drill_models.TestSet.objects.exclude(end_time=None).order_by( 'user__id') rows = [] one_day = timedelta(days=1) for user_id, user_tests in groupby(test_sets, lambda t: t.user_id): user_tests = sorted(user_tests, key=lambda t: t.start_time) time_used = user_tests[-1].end_time - user_tests[0].start_time days = _scale_time_delta(time_used, one_day) rows.append(days) return basic_stats(rows)
def evaluate_paths(input_file, limit=5): print 'Evaluating paths from "%s"' % os.path.basename(input_file) traces = TraceFile.load(input_file) path_lengths = [] successes = [] for (query, target, path) in traces: if path and path[-1] == target: successes.append(path) path_lengths.append(len(path) - 1) else: path_lengths.append(limit) print u'Success rate: %d/%d (%.02f%%)' % ( len(successes), len(traces), 100.0 * len(successes) / len(traces)) print u'Mean path length: %.02f (σ = %.02f)' % basic_stats(path_lengths)
def group_by_points(data, y_max=sys.maxint, y_min=None): """ Similar to approximate(), but more useful when the x axis is discrete. Instead of quantizing the x axis, we just group points by x values, and average across them. """ data.sort() new_data = [] for x, rows in groupby(data, lambda r: r[0]): rows = list(rows) if len(rows) < 3: continue avg, std = basic_stats(y for (x, y) in rows) new_data.append((x, avg, min(avg + 2 * std, y_max), max(avg - 2 * std, y_min))) return new_data
def basic(request): "Calculates and displays some basic statistics." context = {} # Number of users num_users = stats.count_active_users() context['num_users'] = num_users # Number of questions answered num_responses = models.MultipleChoiceResponse.objects.count() context['num_responses'] = num_responses context['responses_per_user'] = num_responses / float(num_users) num_tests = models.TestSet.objects.exclude(end_time=None).count() context['num_tests'] = num_tests ( context['mean_tbt'], context['std_tbt'], ) = basic_stats(stats.get_time_between_tests()) context['log_start_time'] = models.TestSet.objects.order_by( 'start_time')[0].start_time context['log_end_time'] = models.TestSet.objects.order_by( '-start_time', )[0].start_time context['tests_per_user'] = num_tests / float(num_users) context['responses_per_test'] = num_responses / float(num_tests) all_responses = models.MultipleChoiceResponse.objects context['mean_score'] = ( all_responses.filter(option__is_correct=True).count() / float(all_responses.count())) test_stats = stats.get_test_size_stats() pretty_results = [(k, 100 * t, 100 * c) for (k, t, c) in test_stats] context['test_dist'] = pretty_results context['time_used_mean'], context['time_used_std'] = \ stats.get_mean_time_used() return render_to_response("analysis/basic.html", context, RequestContext(request))
def group_by_points(data, y_max=sys.maxint, y_min=None): """ Similar to approximate(), but more useful when the x axis is discrete. Instead of quantizing the x axis, we just group points by x values, and average across them. """ data.sort() new_data = [] for x, rows in groupby(data, lambda r: r[0]): rows = list(rows) if len(rows) < 3: continue avg, std = basic_stats(y for (x, y) in rows) new_data.append(( x, avg, min(avg + 2 * std, y_max), max(avg - 2 * std, y_min) )) return new_data
def simulate_accessibility(output_file, threshold=DEFAULT_THRESHOLD): print 'Loading frequency distribution' dist = FreqDist.from_file(settings.FREQ_SOURCE) print 'Loading kanji' kanji_set = list(models._get_kanji()) random.seed(123456789) random.shuffle(kanji_set) kanji_in_order = sorted(kanji_set, key=lambda k: dist.prob(k)) print 'Loading graph' graph = RestrictedGraph() print 'Dumping frequencies to %s' % os.path.basename(output_file) n_neighbours = [] with codecs.open(output_file, 'w', 'utf8') as ostream: print >> ostream, u'#n_known,n_accessible' print >> ostream, u'%d,%d' % (0, 0) known_set = set() accessible_set = set() for i, kanji in enumerate(kanji_in_order): known_set.add(kanji) accessible_set.add(kanji) neighbours = graph[kanji] accessible_set.update(neighbours) n_neighbours.append(len(neighbours)) if (i + 1) % 50 == 0: print >> ostream, u'%d,%d' % (len(known_set), len(accessible_set)) print >> ostream, u'%d,%d' % (len(known_set), len(accessible_set)) print 'Average neighbourhood size: %.02f (σ = %.02f)' % \ basic_stats(n_neighbours)