Ejemplo n.º 1
0
def basic(request):
    "Calculates and displays some basic statistics."
    context = {}

    # Number of users
    num_users = stats.count_active_users()
    context["num_users"] = num_users

    # Number of questions answered
    num_responses = models.MultipleChoiceResponse.objects.count()
    context["num_responses"] = num_responses
    context["responses_per_user"] = num_responses / float(num_users)

    num_tests = models.TestSet.objects.exclude(end_time=None).count()
    context["num_tests"] = num_tests

    (context["mean_tbt"], context["std_tbt"]) = basic_stats(stats.get_time_between_tests())

    context["log_start_time"] = models.TestSet.objects.order_by("start_time")[0].start_time
    context["log_end_time"] = models.TestSet.objects.order_by("-start_time")[0].start_time

    context["tests_per_user"] = num_tests / float(num_users)
    context["responses_per_test"] = num_responses / float(num_tests)

    all_responses = models.MultipleChoiceResponse.objects
    context["mean_score"] = all_responses.filter(option__is_correct=True).count() / float(all_responses.count())

    test_stats = stats.get_test_size_stats()
    pretty_results = [(k, 100 * t, 100 * c) for (k, t, c) in test_stats]
    context["test_dist"] = pretty_results

    context["time_used_mean"], context["time_used_std"] = stats.get_mean_time_used()

    return render_to_response("analysis/basic.html", context, RequestContext(request))
Ejemplo n.º 2
0
def approximate(data, n_points=10, x_min=None, x_max=None):
    """
    Approximates a data series by grouping points into a number of bins.
    """
    if x_min is None:
        x_min = min(x for (x, y) in data)
    if x_max is None:
        x_max = max(x for (x, y) in data)

    interval = float((x_max - x_min) / n_points)
    eps = 1e-8
    results = []
    for bin_no in xrange(n_points):
        start_interval = bin_no * interval
        end_interval = (bin_no + 1) * interval
        if bin_no == n_points - 1:
            end_interval += eps
        midpoint = (bin_no + 0.5) * interval
        sub_data = [float(y) for (x, y) in data 
                if start_interval <= x < end_interval]
        if len(sub_data) < 3:
            continue
        avg, stddev = basic_stats(sub_data)
        results.append((
            midpoint,
            avg,
            max(avg - 2 * stddev, 0.0),
            min(avg + 2 * stddev, 1.0),
        ))
    return results
Ejemplo n.º 3
0
def approximate(data, n_points=10, x_min=None, x_max=None):
    """
    Approximates a data series by grouping points into a number of bins.
    """
    if x_min is None:
        x_min = min(x for (x, y) in data)
    if x_max is None:
        x_max = max(x for (x, y) in data)

    interval = float((x_max - x_min) / n_points)
    eps = 1e-8
    results = []
    for bin_no in xrange(n_points):
        start_interval = bin_no * interval
        end_interval = (bin_no + 1) * interval
        if bin_no == n_points - 1:
            end_interval += eps
        midpoint = (bin_no + 0.5) * interval
        sub_data = [
            float(y) for (x, y) in data if start_interval <= x < end_interval
        ]
        if len(sub_data) < 3:
            continue
        avg, stddev = basic_stats(sub_data)
        results.append((
            midpoint,
            avg,
            max(avg - 2 * stddev, 0.0),
            min(avg + 2 * stddev, 1.0),
        ))
    return results
Ejemplo n.º 4
0
def get_mean_time_used():
    "Gets the mean time in days for which the system was used."
    test_sets = drill_models.TestSet.objects.exclude(
        end_time=None).order_by('user__id')

    rows = []
    one_day = timedelta(days=1)
    for user_id, user_tests in groupby(test_sets, lambda t: t.user_id):
        user_tests = sorted(user_tests, key=lambda t: t.start_time)
        time_used = user_tests[-1].end_time - user_tests[0].start_time
        days = _scale_time_delta(time_used, one_day)
        rows.append(days)

    return basic_stats(rows)
Ejemplo n.º 5
0
def get_mean_time_used():
    "Gets the mean time in days for which the system was used."
    test_sets = drill_models.TestSet.objects.exclude(end_time=None).order_by(
            'user__id')
    
    rows = []
    one_day = timedelta(days=1)
    for user_id, user_tests in groupby(test_sets, lambda t: t.user_id):
        user_tests = sorted(user_tests, key=lambda t: t.start_time)
        time_used = user_tests[-1].end_time - user_tests[0].start_time
        days = _scale_time_delta(time_used, one_day)
        rows.append(days)
    
    return basic_stats(rows)
Ejemplo n.º 6
0
def evaluate_paths(input_file, limit=5):
    print 'Evaluating paths from "%s"' % os.path.basename(input_file)
    traces = TraceFile.load(input_file)

    path_lengths = []
    successes = []
    for (query, target, path) in traces:
        if path and path[-1] == target:
            successes.append(path)
            path_lengths.append(len(path) - 1)
        else:
            path_lengths.append(limit)

    print u'Success rate: %d/%d (%.02f%%)' % (
        len(successes), len(traces), 100.0 * len(successes) / len(traces))

    print u'Mean path length: %.02f (σ = %.02f)' % basic_stats(path_lengths)
Ejemplo n.º 7
0
def group_by_points(data, y_max=sys.maxint, y_min=None):
    """
    Similar to approximate(), but more useful when the x axis is discrete.
    Instead of quantizing the x axis, we just group points by x values, and
    average across them.
    """
    data.sort()
    new_data = []
    for x, rows in groupby(data, lambda r: r[0]):
        rows = list(rows)

        if len(rows) < 3:
            continue

        avg, std = basic_stats(y for (x, y) in rows)
        new_data.append((x, avg, min(avg + 2 * std,
                                     y_max), max(avg - 2 * std, y_min)))

    return new_data
Ejemplo n.º 8
0
def basic(request):
    "Calculates and displays some basic statistics."
    context = {}

    # Number of users
    num_users = stats.count_active_users()
    context['num_users'] = num_users

    # Number of questions answered
    num_responses = models.MultipleChoiceResponse.objects.count()
    context['num_responses'] = num_responses
    context['responses_per_user'] = num_responses / float(num_users)

    num_tests = models.TestSet.objects.exclude(end_time=None).count()
    context['num_tests'] = num_tests

    (
        context['mean_tbt'],
        context['std_tbt'],
    ) = basic_stats(stats.get_time_between_tests())

    context['log_start_time'] = models.TestSet.objects.order_by(
        'start_time')[0].start_time
    context['log_end_time'] = models.TestSet.objects.order_by(
        '-start_time', )[0].start_time

    context['tests_per_user'] = num_tests / float(num_users)
    context['responses_per_test'] = num_responses / float(num_tests)

    all_responses = models.MultipleChoiceResponse.objects
    context['mean_score'] = (
        all_responses.filter(option__is_correct=True).count() /
        float(all_responses.count()))

    test_stats = stats.get_test_size_stats()
    pretty_results = [(k, 100 * t, 100 * c) for (k, t, c) in test_stats]
    context['test_dist'] = pretty_results

    context['time_used_mean'], context['time_used_std'] = \
            stats.get_mean_time_used()

    return render_to_response("analysis/basic.html", context,
                              RequestContext(request))
Ejemplo n.º 9
0
def group_by_points(data, y_max=sys.maxint, y_min=None):
    """
    Similar to approximate(), but more useful when the x axis is discrete.
    Instead of quantizing the x axis, we just group points by x values, and
    average across them.
    """
    data.sort()
    new_data = []
    for x, rows in groupby(data, lambda r: r[0]):
        rows = list(rows)
        
        if len(rows) < 3:
            continue
    
        avg, std = basic_stats(y for (x, y) in rows)
        new_data.append((
                x,
                avg,
                min(avg + 2 * std, y_max),
                max(avg - 2 * std, y_min)
            ))
            
    return new_data
Ejemplo n.º 10
0
def simulate_accessibility(output_file, threshold=DEFAULT_THRESHOLD):
    print 'Loading frequency distribution'
    dist = FreqDist.from_file(settings.FREQ_SOURCE)

    print 'Loading kanji'
    kanji_set = list(models._get_kanji())
    random.seed(123456789)
    random.shuffle(kanji_set)

    kanji_in_order = sorted(kanji_set, key=lambda k: dist.prob(k))

    print 'Loading graph'
    graph = RestrictedGraph()

    print 'Dumping frequencies to %s' % os.path.basename(output_file)
    n_neighbours = []
    with codecs.open(output_file, 'w', 'utf8') as ostream:
        print >> ostream, u'#n_known,n_accessible'
        print >> ostream, u'%d,%d' % (0, 0)
        known_set = set()
        accessible_set = set()
        for i, kanji in enumerate(kanji_in_order):
            known_set.add(kanji)
            accessible_set.add(kanji)

            neighbours = graph[kanji]
            accessible_set.update(neighbours)
            n_neighbours.append(len(neighbours))

            if (i + 1) % 50 == 0:
                print >> ostream, u'%d,%d' % (len(known_set),
                                              len(accessible_set))
        print >> ostream, u'%d,%d' % (len(known_set), len(accessible_set))

    print 'Average neighbourhood size: %.02f (σ = %.02f)' % \
            basic_stats(n_neighbours)