Exemple #1
0
    def update_kanji_pos(self):

        # get all char count
        self.total_count = self.db.get_count_for_drama(
            JdsDatabase.get_merged_drama())

        # set jlpt position
        jlpt_dict = self.compute_pos_dict('jlpt')
        position = 1
        for level in range(len(jlpt_dict) - 1, 0, -1):
            for char in jlpt_dict[level]:
                char.jlpt_pos = position
                position += 1

        # set jouyou position
        jouyou_dict = self.compute_pos_dict('jouyou')
        position = 1
        for level in range(1, len(jouyou_dict), 1):
            for char in jouyou_dict[level]:
                char.jouyou_pos = position
                position += 1

        # set jdpt position
        char_per_level = {}
        for level in range(len(jlpt_dict) - 1, 0, -1):
            char_per_level[level] = len(jlpt_dict[level])

        # find sum of all kanji count
        sum_all_count = 0
        for char_uid in sorted(self.total_count,
                               key=self.total_count.get,
                               reverse=True):
            if is_kanji(self.chars[char_uid].value):
                sum_all_count += self.chars[char_uid].count()

        # set JDPT level
        position = 1
        cur_level = 6
        jdpt_limits = [1, 0.99, 0.98, 0.95, 0.9, 0.75, 0.5]
        cumul_freq = 0
        for char_uid in sorted(self.total_count,
                               key=self.total_count.get,
                               reverse=True):
            if is_kanji(self.chars[char_uid].value):
                count = self.total_count[char_uid]
                freq = count / sum_all_count
                self.chars[char_uid].jdpt_pos = position
                self.chars[char_uid].freq = freq
                self.chars[char_uid].cumul_freq = cumul_freq + freq
                cumul_freq += freq
                self.chars[char_uid].set_count(count)
                position += 1

                if cumul_freq > jdpt_limits[cur_level]:
                    cur_level -= 1
                if cumul_freq < jdpt_limits[cur_level]:
                    self.chars[char_uid].jdpt = cur_level - 1

        self.db.push_kanji_pos(self.chars)

        # set episode and drama frequency
        char_drama_count = {}
        char_episode_count = {}
        results = self.db.get_kanji_count_raw()
        for result in results:
            kanji_uid = result['kanji_uid']
            if kanji_uid not in char_drama_count:
                char_drama_count[kanji_uid] = 0
                char_episode_count[kanji_uid] = 0
            drama_uid = result['drama_uid']
            episode_count = result['episode_count']
            if drama_uid is 0:
                continue
            char_drama_count[kanji_uid] += 1
            char_episode_count[kanji_uid] += episode_count

        num_of_drama = len(
            self.db.get_all_dramas()) - 1  # -1 due to merge drama
        num_of_episodes = len(self.db.get_all_episodes_raw())
        char_drama_freq = {}
        char_episode_freq = {}

        for kanji_uid, count in char_drama_count.items():
            char_drama_freq[kanji_uid] = count / num_of_drama
        for kanji_uid, count in char_episode_count.items():
            char_episode_freq[kanji_uid] = count / num_of_episodes

        self.db.push_drama_and_episode_count(char_drama_freq,
                                             char_episode_freq)
Exemple #2
0
            if result['flag'] is not 1:
                continue
            row = dict()
            rows[result['kanji_uid']] = row
            row['kanji'] = chr(result['kanji_uid'])
            row['jouyou'] = result['jouyou']
            row['jouyou_pos'] = result['jouyou_pos']
            row['jdpt'] = result['jdpt']
            row['jdpt_pos'] = result['jdpt_pos']
            row['freq'] = result['freq']
            row['cumul_freq'] = result['cumul_freq']
            row['drama_freq'] = result['drama_freq']
            row['episode_freq'] = result['episode_freq']

        for result in kanji_count_results:
            if result['drama_uid'] is not JdsDatabase.get_merged_drama().uid:
                continue
            if result['kanji_uid'] not in rows:
                continue
            row = rows[result['kanji_uid']]
            row['count'] = result['count']

        writer.writerows(rows.values())

    print("{} ended in {:2.2f}".format(__file__,
                                       (time.perf_counter() - start_time)))

    if settings.enable_profiler:
        pr.disable()
        pr.print_stats(sort="cumulative")