def _add_alternation_model(model_obj, code, root_node, first=False): """ Adds this alternation model to our current alternation tree. This involves walking to each leaf node, then getting all candidates of the model, and appending them as new nodes. @param model_obj: An alternation model. @type model_obj: AlternationModelI @param code: A character code for the given alternation. @type code: char @param root_node: The root node of the entire tree. @type root_node: TreeNode """ for kanji_node in consoleLog.withProgress(root_node.children.values()): kanji = kanji_node.label leaves = list(kanji_node.walk_leaves()) for reading_node in leaves: reading = reading_node.label candidates = model_obj.candidates(kanji, reading) if not first and candidates == [(reading, 0.0)]: # No changes continue for alt_reading, log_prob in candidates: # Only tag changes with their alternation code. if alt_reading == reading: node_code = '' else: node_code = code assert alt_reading not in reading_node.children reading_node.add_child( AltTreeNode(alt_reading, node_code, log_prob)) return
def _dump_responses(filename): _log.log(filename + ' ', newLine=False) with open(filename, 'w') as ostream: users = User.objects.exclude(email__in=EXCLUDE_EMAILS) for user in withProgress(users): for response in drill_models.MultipleChoiceResponse.objects.filter( user=user): question = response.question test_set = drill_models.TestSet.objects.get(questions=question) record = { 'user_id': user.id, 'test_id': test_set.id, 'timestamp': response.timestamp.ctime(), 'pivot': question.pivot, 'pivot_type': question.pivot_type, 'question_type': question.question_type, 'is_adaptive': question.question_plugin.is_adaptive, 'distractors': [ o.value for o in \ question.multiplechoicequestion.options.all() ], 'correct_response': question.multiplechoicequestion.options.get(is_correct=True).value, 'user_response': response.option.value, } print >> ostream, simplejson.dumps(record)
def test_sample(m, n, method, iterations=1000): dist = FreqDist() for i in withProgress(xrange(iterations)): for s in method(m, n): dist.inc(s) min_prob, min_v = min((dist.prob(k), k) for k in dist.iterkeys()) max_prob, max_v = max((dist.prob(k), k) for k in dist.iterkeys()) print "Min:", min_prob, min_v print "Max:", max_prob, max_v print "Diff:", abs(max_prob - min_prob)
def _build_graph(self, kanji_set): metric = metrics.metric_library[_default_metric_name] graph = threshold_graph.ThresholdGraph(settings.MAX_GRAPH_DEGREE) ignore_set = set() for kanji_a, kanji_b in consoleLog.withProgress( iunique_pairs(kanji_set), 100): if kanji_a in ignore_set or kanji_b in ignore_set: continue try: weight = metric(kanji_a, kanji_b) except DomainError, e: kanji = e.message ignore_set.add(kanji) continue graph.connect(kanji_a, kanji_b, weight)
def _dump_responses(filename): _log.log(filename + " ", newLine=False) with open(filename, "w") as ostream: users = User.objects.exclude(email__in=EXCLUDE_EMAILS) for user in withProgress(users): for response in drill_models.MultipleChoiceResponse.objects.filter(user=user): question = response.question test_set = drill_models.TestSet.objects.get(questions=question) record = { "user_id": user.id, "test_id": test_set.id, "timestamp": response.timestamp.ctime(), "pivot": question.pivot, "pivot_type": question.pivot_type, "question_type": question.question_type, "is_adaptive": question.question_plugin.is_adaptive, "distractors": [o.value for o in question.multiplechoicequestion.options.all()], "correct_response": question.multiplechoicequestion.options.get(is_correct=True).value, "user_response": response.option.value, } print >> ostream, simplejson.dumps(record)
def _pad_readings(self, prior_dist): """ Once the reading distribution has been copied over, we still have the problem that there may not be enough erroneous readings to meet the minimum number of distractors we wish to generate. To circumvent this problem, we pad with random distractors. """ _log.log('Padding results ', newLine=False) conditions = set(o['condition'] for o in \ prior_dist.density.all().values('condition')) for (condition,) in consoleLog.withProgress(conditions): exclude_set = set( o.reading for o in \ lexicon_models.KanjiReading.objects.filter( kanji__kanji=condition) ) n_stored = prior_dist.density.filter(condition=condition).exclude( symbol__in=exclude_set).count() sub_dist = ProbDist.from_query_set(prior_dist.density.filter( condition=condition)) exclude_set.update(sub_dist.keys()) n_needed = settings.MIN_TOTAL_DISTRACTORS - n_stored min_prob = min(sub_dist.itervalues()) / 2 while n_needed > 0: for row in lexicon_models.KanjiReadingProb.sample_n(n_needed): if row.symbol not in exclude_set: sub_dist[row.symbol] = min_prob exclude_set.add(row.symbol) n_needed -= 1 if n_needed == 0: break sub_dist.normalise() sub_dist.save_to(prior_dist.density, condition=condition) return
def simulate_search(output_file, strategy='greedy', k=settings.N_NEIGHBOURS_RECALLED, error_rate=0.0): """ Simulate user searches on every query/target pair from the flashcard dataset, using one of the available strategies. The resulting query paths are dumped to the specified file. """ if strategy == 'greedy': search_fn = _greedy_search elif strategy == 'shortest': search_fn = _breadth_first_search elif strategy == 'random': random.seed(123456789) search_fn = _random_stumble else: raise ValueError(strategy) traces = [] for query, target in withProgress(_load_search_examples()): path = search_fn(query, target, k=k, error_rate=error_rate) traces.append((query, target, path)) TraceFile.save(traces, output_file) print 'Paths dumped to %s' % output_file