Exemple #1
0
    def __init__(self, kanjidic_files=None):
        dict.__init__(self)

        if kanjidic_files is None:
            kanjidic_files = [
                cjkdata.get_resource('kanjidic'),
                cjkdata.get_resource('kanjd212'),
            ]

        line_stream = reduce(chain, [sopen(f) for f in kanjidic_files])
        self._parse_kanjidic(line_stream)
Exemple #2
0
    def __init__(self, istream=None):
        """
        @param dict_file: The radkfile to parse.
        """
        if istream is None:
            istream = open(cjkdata.get_resource('radkfile'))

        self._parse_radkfile(codecs.getreader('utf8')(istream))
Exemple #3
0
    def __init__(self):
        self._segmenter = get_pinyin_segmenter()

        # Load the table mapping hanzi to pinyin.
        f = cjkdata.get_resource('tables/gbk_pinyin_table')
        with codecs.open(f, 'r', 'utf8') as istream:
            for line in istream:
                if line.startswith('#'):
                    continue
                entries = line.rstrip().split()
                hanzi = entries[0]
                numeric_readings = tuple(entries[1:])
                self[hanzi] = tuple(numeric_readings)
Exemple #4
0
def get_list(list_name):
    "Returns the kanji in the given list."
    f = cjkdata.get_resource('lists/char/%s' % list_name)
    with codecs.open(f, 'r', 'utf8') as istream:
        return scripts.unique_kanji(istream.read())
Exemple #5
0
def get_lists():
    "Returns list containing the names of all existing kanji lists."
    return os.listdir(cjkdata.get_resource('lists/char'))
Exemple #6
0
def _default_stream():
    return open(cjkdata.get_resource('tables/zhuyin_pinyin_conv_table'))