def __init__(self): self.conn = sqlite3.connect(".resources/j_edict.db") self.cur = self.conn.cursor() self.kanji_dictionary = kanjidic.Kanjidic() self.radical_dictionary = RadkDict()
def build(cls): cls.drop_collection() kjd = kanjidic.Kanjidic() for entry in kjd.itervalues(): translation = cls( kanji=entry.kanji, on_readings=entry.on_readings, kun_readings=entry.kun_readings, glosses = entry.gloss, ) translation.save()
def build(cls): cls.drop_collection() kjd = kanjidic.Kanjidic( kanjidic_files=[settings.KANJI_DIC, settings.KANJI_D212]) for entry in kjd.values(): translation = cls( kanji=entry.kanji, on_readings=entry.on_readings, kun_readings=entry.kun_readings, glosses=entry.gloss, ) translation.save()
def glossKanji(k, t): from cjktools.resources import kanjidic kjd = kanjidic.Kanjidic() try: entry = kjd[k] except: return '' kanji = u' '.join(entry.kanji) gloss = '[' + u', '.join(entry.gloss) + ']' on_r = '[' + u', '.join(entry.on_readings) + ']' kun_r = '[' + u', '.join(entry.kun_readings[:4]) + ']' if t == KDSG or t == GLOSS: return u'%s %s %s' % (gloss, on_r, kun_r) elif t == KSG: return u'• %s: %s %s %s' % (kanji, gloss, on_r, kun_r) elif t in WRAPL: return u'%s: %s %s %s' % (kanji, gloss, on_r, kun_r)
def _jpn(token): """Convert jpn token to phonemes.""" from cjktools import scripts from cjktools.resources import kanjidic lkp = {} for fn in ["lib/data/phon/ja-Hira", "lib/data/phon/ja-Kata"]: lines = open(fn).readlines() if len(lines) == 0: continue for line in lines: if line.strip() == "": continue kv = line.strip().split("\t") if len(kv) != 2: print("!", kv, file=sys.stderr) continue k = kv[0].strip() v = kv[1].strip() if k not in lkp: lkp[k] = [] lkp[k].append(v) kjd = kanjidic.Kanjidic(kanjidic_files=["lib/data/dict/ja"]) op = "" segs = scripts.script_boundaries(token) for seg in segs: tipus = scripts.script_types(seg) if 3 in tipus: for ch in seg: if ch in kjd: if len(kjd[ch].on_readings) > 0: op += kjd[ch].on_readings[0] else: op += seg res = _maxphon(lkp, op) if res == "": return "?" return res
def _build_alternation_tree(cls, kanji_set): """ Builds the tree of all readings and alternations. Upon completion, any possible reading (erroneous or not) for a given kanji should be a leaf node in the subtree for that kanji. Each fixed depth in that subtree corresponds to an alternation model of some sort. """ log.start('Building alternation tree', nSteps=3) log.log('Adding base kanji set') root_node = AltTreeNode('root', '/') for kanji in kanji_set: root_node.add_child(AltTreeNode(kanji, 'k')) log.log('Adding good readings') kjdic = kanjidic.Kanjidic() for kanji_node in root_node.children.values(): kanji = kanji_node.label if kanji in kjdic: for reading in kjdic[kanji].all_readings: kanji_node.add_child(AltTreeNode(reading, 'b')) log.start('Adding alternation models', nSteps=len(_alternation_models)) i = 0 max_len = max(len(n) for (n, c, cl) in _alternation_models) pattern = '%%-%ds ' % max_len for model_name, model_code, model_class in _alternation_models: log.log(pattern % model_name, newLine=False) sys.stdout.flush() model_obj = model_class() cls._add_alternation_model(model_obj, model_code, root_node, first=(i == 0)) i += 1 log.finish() log.finish() return root_node