Ejemplo n.º 1
0
    def __init__(self, kanjidic_files=None):
        dict.__init__(self)

        if kanjidic_files is None:
            kanjidic_files = [
                cjkdata.get_resource('kanjidic'),
                cjkdata.get_resource('kanjd212'),
            ]

        line_stream = reduce(chain, [sopen(f) for f in kanjidic_files])
        self._parse_kanjidic(line_stream)
Ejemplo n.º 2
0
    def __init__(self, istream=None):
        """
        @param dict_file: The radkfile to parse.
        """
        if istream is None:
            istream = open(cjkdata.get_resource('radkfile'))

        self._parse_radkfile(codecs.getreader('utf8')(istream))
Ejemplo n.º 3
0
    def __init__(self):
        self._segmenter = get_pinyin_segmenter()

        # Load the table mapping hanzi to pinyin.
        f = cjkdata.get_resource('tables/gbk_pinyin_table')
        with codecs.open(f, 'r', 'utf8') as istream:
            for line in istream:
                if line.startswith('#'):
                    continue
                entries = line.rstrip().split()
                hanzi = entries[0]
                numeric_readings = tuple(entries[1:])
                self[hanzi] = tuple(numeric_readings)
Ejemplo n.º 4
0
def get_list(list_name):
    "Returns the kanji in the given list."
    f = cjkdata.get_resource('lists/char/%s' % list_name)
    with codecs.open(f, 'r', 'utf8') as istream:
        return scripts.unique_kanji(istream.read())
Ejemplo n.º 5
0
def get_lists():
    "Returns list containing the names of all existing kanji lists."
    return os.listdir(cjkdata.get_resource('lists/char'))
Ejemplo n.º 6
0
def _default_stream():
    return open(cjkdata.get_resource('tables/zhuyin_pinyin_conv_table'))