def __init__(self, kanjidic_files=None): dict.__init__(self) if kanjidic_files is None: kanjidic_files = [ cjkdata.get_resource('kanjidic'), cjkdata.get_resource('kanjd212'), ] line_stream = reduce(chain, [sopen(f) for f in kanjidic_files]) self._parse_kanjidic(line_stream)
def __init__(self, istream=None): """ @param dict_file: The radkfile to parse. """ if istream is None: istream = open(cjkdata.get_resource('radkfile')) self._parse_radkfile(codecs.getreader('utf8')(istream))
def __init__(self): self._segmenter = get_pinyin_segmenter() # Load the table mapping hanzi to pinyin. f = cjkdata.get_resource('tables/gbk_pinyin_table') with codecs.open(f, 'r', 'utf8') as istream: for line in istream: if line.startswith('#'): continue entries = line.rstrip().split() hanzi = entries[0] numeric_readings = tuple(entries[1:]) self[hanzi] = tuple(numeric_readings)
def get_list(list_name): "Returns the kanji in the given list." f = cjkdata.get_resource('lists/char/%s' % list_name) with codecs.open(f, 'r', 'utf8') as istream: return scripts.unique_kanji(istream.read())
def get_lists(): "Returns list containing the names of all existing kanji lists." return os.listdir(cjkdata.get_resource('lists/char'))
def _default_stream(): return open(cjkdata.get_resource('tables/zhuyin_pinyin_conv_table'))