#!/usr/bin/env python # -*- coding: utf8 - *- from __future__ import unicode_literals, print_function from cihai.core import Cihai from cihai.bootstrap import bootstrap_unihan c = Cihai() if not c.is_bootstrapped: # download and install Unihan to db bootstrap_unihan(c.metadata) c.reflect_db() # automap new table created during bootstrap query = c.lookup_char('好') glyph = query.first() print("lookup for 好: %s" % glyph.kDefinition) query = c.reverse_char('good') print('matches for "good": %s ' % ', '.join([glph.char for glph in query]))
class ExplainKanji(BaseFilter): def __init__(self): super().__init__() self.c = Cihai() if not self.c.is_bootstrapped: # download and install Unihan to db bootstrap_unihan(self.c.metadata) self.c.reflect_db() def __call__(self, chunk): from src.Sequencer import TextChunk, JingleChunk chunk = self._duplicate_chunk(chunk) result = [chunk] if not isinstance(chunk, TextChunk) or chunk.language != 'japanese': return result explanations = self._get_explanations(chunk.text) result.append( TextChunk(text='[', audible=False, printable=True, final=True)) for k, ons, kuns, explanation in explanations: result.append( TextChunk(text=k, language='japanese', audible=False, printable=True, final=True)) result.append( TextChunk(text='on', language='english', audible=True, printable=False, final=True)) result.append(JingleChunk(jingle='silence')) for on in ons: result.append( TextChunk(text=on, language='japanese', audible=True, printable=True, final=True)) result.append(JingleChunk(jingle='silence')) result.append( TextChunk(text='、', audible=False, printable=True, final=True)) result.append( TextChunk(text='koon', language='english', audible=True, printable=False, final=True)) result.append(JingleChunk(jingle='silence')) for kun in kuns: result.append( TextChunk(text=kun, language='japanese', audible=True, printable=True, final=True)) result.append(JingleChunk(jingle='silence')) result.append( TextChunk(text='、', audible=False, printable=True, final=True)) result.append(JingleChunk(jingle='definition')) result.append( TextChunk(text=explanation, language='english', audible=True, printable=True, final=True)) result.append( TextChunk(text=']', audible=False, printable=True, final=True)) result.append( JingleChunk(jingle='silence_long', audible=False, printable=True, final=True)) return result def _kanji_to_kana(self, char): glyph = self.c.lookup_char(char).first() if glyph is None: return None romaji_on = glyph.kJapaneseKun.lower() romaji_kun = glyph.kJapaneseOn.lower() jp_on = jaconv.alphabet2kana(romaji_on).split(' ') jp_kun = jaconv.hira2kata(jaconv.alphabet2kana(romaji_kun)).split(' ') return jp_on, jp_kun, glyph.kDefinition @staticmethod def is_kana(char): return ('\u30A0' <= char <= '\u30FF') or ( '\u3040' <= char <= '\u309F') # Katakana and Hiragana blocks @classmethod def is_kanji(cls, char): return not cls.is_kana(char) def _get_explanations(self, text): kanji = set(filter(self.is_kanji, text)) detail_list = [] for k in kanji: triplet = self._kanji_to_kana(k) if triplet is None: continue on, kun, definition = triplet # detail_list.append(f'{k}: {", ".join(on)}; {", ".join(kun)}; {definition}') detail_list.append((k, on, kun, definition)) return detail_list