예제 #1
0
    def _add_alternation_model(model_obj, code, root_node, first=False):
        """
        Adds this alternation model to our current alternation tree. This
        involves walking to each leaf node, then getting all candidates of
        the model, and appending them as new nodes.

        @param model_obj: An alternation model.
        @type model_obj: AlternationModelI
        @param code: A character code for the given alternation.
        @type code: char
        @param root_node: The root node of the entire tree.
        @type root_node: TreeNode
        """
        for kanji_node in consoleLog.withProgress(root_node.children.values()):
            kanji = kanji_node.label
            leaves = list(kanji_node.walk_leaves())
            for reading_node in leaves:
                reading = reading_node.label
                candidates = model_obj.candidates(kanji, reading)
                if not first and candidates == [(reading, 0.0)]:
                    # No changes
                    continue

                for alt_reading, log_prob in candidates:
                    # Only tag changes with their alternation code.
                    if alt_reading == reading:
                        node_code = ''
                    else:
                        node_code = code
                    assert alt_reading not in reading_node.children
                    reading_node.add_child(
                            AltTreeNode(alt_reading, node_code, log_prob))

        return
예제 #2
0
def _dump_responses(filename):
    _log.log(filename + ' ', newLine=False)
    with open(filename, 'w') as ostream:
        users = User.objects.exclude(email__in=EXCLUDE_EMAILS)
        for user in withProgress(users):
            for response in drill_models.MultipleChoiceResponse.objects.filter(
                    user=user):
                question = response.question
                test_set = drill_models.TestSet.objects.get(questions=question)
                record = {
                        'user_id': user.id,
                        'test_id': test_set.id,
                        'timestamp': response.timestamp.ctime(),
                        'pivot': question.pivot,
                        'pivot_type': question.pivot_type,
                        'question_type': question.question_type,
                        'is_adaptive': question.question_plugin.is_adaptive,
                        'distractors': [
                                o.value for o in \
                                question.multiplechoicequestion.options.all()
                            ],
                        'correct_response': question.multiplechoicequestion.options.get(is_correct=True).value,
                        'user_response': response.option.value,
                    }
                print >> ostream, simplejson.dumps(record)
예제 #3
0
    def _add_alternation_model(model_obj, code, root_node, first=False):
        """
        Adds this alternation model to our current alternation tree. This
        involves walking to each leaf node, then getting all candidates of
        the model, and appending them as new nodes.

        @param model_obj: An alternation model.
        @type model_obj: AlternationModelI
        @param code: A character code for the given alternation.
        @type code: char
        @param root_node: The root node of the entire tree.
        @type root_node: TreeNode
        """
        for kanji_node in consoleLog.withProgress(root_node.children.values()):
            kanji = kanji_node.label
            leaves = list(kanji_node.walk_leaves())
            for reading_node in leaves:
                reading = reading_node.label
                candidates = model_obj.candidates(kanji, reading)
                if not first and candidates == [(reading, 0.0)]:
                    # No changes
                    continue

                for alt_reading, log_prob in candidates:
                    # Only tag changes with their alternation code.
                    if alt_reading == reading:
                        node_code = ''
                    else:
                        node_code = code
                    assert alt_reading not in reading_node.children
                    reading_node.add_child(
                        AltTreeNode(alt_reading, node_code, log_prob))

        return
예제 #4
0
def test_sample(m, n, method, iterations=1000):
    dist = FreqDist()
    for i in withProgress(xrange(iterations)):
        for s in method(m, n):
            dist.inc(s)
    
    min_prob, min_v = min((dist.prob(k), k) for k in dist.iterkeys())
    max_prob, max_v = max((dist.prob(k), k) for k in dist.iterkeys())

    print "Min:", min_prob, min_v
    print "Max:", max_prob, max_v
    print "Diff:", abs(max_prob - min_prob)
예제 #5
0
    def _build_graph(self, kanji_set):
        metric = metrics.metric_library[_default_metric_name]
        graph = threshold_graph.ThresholdGraph(settings.MAX_GRAPH_DEGREE)
        ignore_set = set()
        for kanji_a, kanji_b in consoleLog.withProgress(
                    iunique_pairs(kanji_set), 100):
            if kanji_a in ignore_set or kanji_b in ignore_set:
                continue

            try:
                weight = metric(kanji_a, kanji_b)
            except DomainError, e:
                kanji = e.message
                ignore_set.add(kanji)
                continue

            graph.connect(kanji_a, kanji_b, weight)
예제 #6
0
def _dump_responses(filename):
    _log.log(filename + " ", newLine=False)
    with open(filename, "w") as ostream:
        users = User.objects.exclude(email__in=EXCLUDE_EMAILS)
        for user in withProgress(users):
            for response in drill_models.MultipleChoiceResponse.objects.filter(user=user):
                question = response.question
                test_set = drill_models.TestSet.objects.get(questions=question)
                record = {
                    "user_id": user.id,
                    "test_id": test_set.id,
                    "timestamp": response.timestamp.ctime(),
                    "pivot": question.pivot,
                    "pivot_type": question.pivot_type,
                    "question_type": question.question_type,
                    "is_adaptive": question.question_plugin.is_adaptive,
                    "distractors": [o.value for o in question.multiplechoicequestion.options.all()],
                    "correct_response": question.multiplechoicequestion.options.get(is_correct=True).value,
                    "user_response": response.option.value,
                }
                print >> ostream, simplejson.dumps(record)
예제 #7
0
    def _pad_readings(self, prior_dist):
        """
        Once the reading distribution has been copied over, we still have the
        problem that there may not be enough erroneous readings to meet the
        minimum number of distractors we wish to generate.

        To circumvent this problem, we pad with random distractors.
        """
        _log.log('Padding results ', newLine=False)
        conditions = set(o['condition'] for o in \
                prior_dist.density.all().values('condition'))
        for (condition,) in consoleLog.withProgress(conditions):
            exclude_set = set(
                    o.reading for o in \
                    lexicon_models.KanjiReading.objects.filter(
                        kanji__kanji=condition)
                )
            n_stored = prior_dist.density.filter(condition=condition).exclude(
                    symbol__in=exclude_set).count()

            sub_dist = ProbDist.from_query_set(prior_dist.density.filter(
                    condition=condition))
            exclude_set.update(sub_dist.keys())
            n_needed = settings.MIN_TOTAL_DISTRACTORS - n_stored
            min_prob = min(sub_dist.itervalues()) / 2
            while n_needed > 0:
                for row in lexicon_models.KanjiReadingProb.sample_n(n_needed):
                    if row.symbol not in exclude_set:
                        sub_dist[row.symbol] = min_prob
                        exclude_set.add(row.symbol)
                        n_needed -= 1

                    if n_needed == 0:
                        break

            sub_dist.normalise()
            sub_dist.save_to(prior_dist.density, condition=condition)

        return
예제 #8
0
def simulate_search(output_file, strategy='greedy',
        k=settings.N_NEIGHBOURS_RECALLED, error_rate=0.0):
    """
    Simulate user searches on every query/target pair from the flashcard
    dataset, using one of the available strategies. The resulting query paths
    are dumped to the specified file.
    """
    if strategy == 'greedy':
        search_fn = _greedy_search
    elif strategy == 'shortest':
        search_fn = _breadth_first_search
    elif strategy == 'random':
        random.seed(123456789)
        search_fn = _random_stumble
    else:
        raise ValueError(strategy)

    traces = []
    for query, target in withProgress(_load_search_examples()):
        path = search_fn(query, target, k=k, error_rate=error_rate)
        traces.append((query, target, path))

    TraceFile.save(traces, output_file)
    print 'Paths dumped to %s' % output_file