コード例 #1
0
ファイル: models.py プロジェクト: larsyencken/kanjitester
    def update(self, condition, symbol, symbol_set):
        query = self.density.filter(condition=condition)
        whole_dist = ProbDist.from_query_set(query)

        sub_dist = ProbDist.from_query_set(query.filter(
                symbol__in=symbol_set))
        assert sub_dist
        m = max(v for (s, v) in sub_dist.iteritems() if s != symbol) + \
                settings.UPDATE_EPSILON

        if sub_dist[symbol] >= m:
            # Nothing to say here.
            return

        # Increase the likelihood of seeing the symbol
        sub_dist[symbol] = m
        sub_dist.normalise()

        sub_dist_mass = sum(map(whole_dist.__getitem__, sub_dist.keys()))
        for s in sub_dist:
            whole_dist[s] = sub_dist[s] * sub_dist_mass

        assert abs(sum(whole_dist.values()) - 1.0) < 1e-6
        whole_dist.save_to(self.density, condition=condition)
        return
コード例 #2
0
ファイル: models.py プロジェクト: tryforceful/kanjitester
    def sample_seq_n(self, condition_segments, n, exclude_set=None):
        dists = []
        kanji_script = scripts.Script.Kanji
        for segment in condition_segments:
            if scripts.script_type(segment) == kanji_script:
                seg_dist = ProbDist.from_query_set(
                    self.density.filter(condition=segment))
                dists.append(seg_dist)
            else:
                dists.append(segment)

        return SeqDist(*dists).sample_n(n, exclude_set)
コード例 #3
0
ファイル: models.py プロジェクト: larsyencken/kanjitester
 def sample_seq_n(self, condition_segments, n, exclude_set=None):
     dists = []
     kanji_script = scripts.Script.Kanji
     for segment in condition_segments:
         if scripts.script_type(segment) == kanji_script:
             seg_dist = ProbDist.from_query_set(self.density.filter(
                 condition=segment))
             dists.append(seg_dist)
         else:
             dists.append(segment)
     
     return SeqDist(*dists).sample_n(n, exclude_set)
コード例 #4
0
ファイル: models.py プロジェクト: tryforceful/kanjitester
    def update(self, condition, symbol, symbol_set):
        query = self.density.filter(condition=condition)
        whole_dist = ProbDist.from_query_set(query)

        sub_dist = ProbDist.from_query_set(query.filter(symbol__in=symbol_set))
        assert sub_dist
        m = max(v for (s, v) in sub_dist.iteritems() if s != symbol) + \
                settings.UPDATE_EPSILON

        if sub_dist[symbol] >= m:
            # Nothing to say here.
            return

        # Increase the likelihood of seeing the symbol
        sub_dist[symbol] = m
        sub_dist.normalise()

        sub_dist_mass = sum(map(whole_dist.__getitem__, sub_dist.keys()))
        for s in sub_dist:
            whole_dist[s] = sub_dist[s] * sub_dist_mass

        assert abs(sum(whole_dist.values()) - 1.0) < 1e-6
        whole_dist.save_to(self.density, condition=condition)
        return
コード例 #5
0
ファイル: __init__.py プロジェクト: larsyencken/kanjitester
    def _pad_readings(self, prior_dist):
        """
        Once the reading distribution has been copied over, we still have the
        problem that there may not be enough erroneous readings to meet the
        minimum number of distractors we wish to generate.

        To circumvent this problem, we pad with random distractors.
        """
        _log.log('Padding results ', newLine=False)
        conditions = set(o['condition'] for o in \
                prior_dist.density.all().values('condition'))
        for (condition,) in consoleLog.withProgress(conditions):
            exclude_set = set(
                    o.reading for o in \
                    lexicon_models.KanjiReading.objects.filter(
                        kanji__kanji=condition)
                )
            n_stored = prior_dist.density.filter(condition=condition).exclude(
                    symbol__in=exclude_set).count()

            sub_dist = ProbDist.from_query_set(prior_dist.density.filter(
                    condition=condition))
            exclude_set.update(sub_dist.keys())
            n_needed = settings.MIN_TOTAL_DISTRACTORS - n_stored
            min_prob = min(sub_dist.itervalues()) / 2
            while n_needed > 0:
                for row in lexicon_models.KanjiReadingProb.sample_n(n_needed):
                    if row.symbol not in exclude_set:
                        sub_dist[row.symbol] = min_prob
                        exclude_set.add(row.symbol)
                        n_needed -= 1

                    if n_needed == 0:
                        break

            sub_dist.normalise()
            sub_dist.save_to(prior_dist.density, condition=condition)

        return
コード例 #6
0
ファイル: models.py プロジェクト: tryforceful/kanjitester
 def sample_n(self, condition, n, exclude_set=None):
     "Samples n symbols without replacement from the distribution."
     dist = ProbDist.from_query_set(
         self.density.filter(condition=condition))
     return dist.sample_n(n, exclude_set=exclude_set)
コード例 #7
0
ファイル: models.py プロジェクト: larsyencken/kanjitester
 def sample_n(self, condition, n, exclude_set=None):
     "Samples n symbols without replacement from the distribution."
     dist = ProbDist.from_query_set(self.density.filter(
             condition=condition))
     return dist.sample_n(n, exclude_set=exclude_set)