Example #1
0
    def __init__(self):

        self.conn = sqlite3.connect(".resources/j_edict.db")
        self.cur = self.conn.cursor()

        self.kanji_dictionary = kanjidic.Kanjidic()
        self.radical_dictionary = RadkDict()
Example #2
0
 def build(cls):
     cls.drop_collection()
     kjd = kanjidic.Kanjidic()
     for entry in kjd.itervalues():
         translation = cls(
                 kanji=entry.kanji,
                 on_readings=entry.on_readings,
                 kun_readings=entry.kun_readings,
                 glosses = entry.gloss,
             )
         translation.save()
Example #3
0
 def build(cls):
     cls.drop_collection()
     kjd = kanjidic.Kanjidic(
         kanjidic_files=[settings.KANJI_DIC, settings.KANJI_D212])
     for entry in kjd.values():
         translation = cls(
             kanji=entry.kanji,
             on_readings=entry.on_readings,
             kun_readings=entry.kun_readings,
             glosses=entry.gloss,
         )
         translation.save()
Example #4
0
def glossKanji(k, t):
    from cjktools.resources import kanjidic
    kjd = kanjidic.Kanjidic()
    try:
        entry = kjd[k]
    except:
        return ''
    kanji = u' '.join(entry.kanji)
    gloss = '[' + u', '.join(entry.gloss) + ']'
    on_r = '[' + u', '.join(entry.on_readings) + ']'
    kun_r = '[' + u', '.join(entry.kun_readings[:4]) + ']'
    if t == KDSG or t == GLOSS:
        return u'%s %s %s' % (gloss, on_r, kun_r)
    elif t == KSG:
        return u'• %s: %s %s %s' % (kanji, gloss, on_r, kun_r)
    elif t in WRAPL:
        return u'%s: %s %s %s' % (kanji, gloss, on_r, kun_r)
Example #5
0
def _jpn(token):
    """Convert jpn token to phonemes."""
    from cjktools import scripts
    from cjktools.resources import kanjidic

    lkp = {}
    for fn in ["lib/data/phon/ja-Hira", "lib/data/phon/ja-Kata"]:
        lines = open(fn).readlines()
        if len(lines) == 0:
            continue
        for line in lines:
            if line.strip() == "":
                continue
            kv = line.strip().split("\t")
            if len(kv) != 2:
                print("!", kv, file=sys.stderr)
                continue
            k = kv[0].strip()
            v = kv[1].strip()
            if k not in lkp:
                lkp[k] = []
            lkp[k].append(v)

    kjd = kanjidic.Kanjidic(kanjidic_files=["lib/data/dict/ja"])
    op = ""
    segs = scripts.script_boundaries(token)
    for seg in segs:
        tipus = scripts.script_types(seg)
        if 3 in tipus:
            for ch in seg:
                if ch in kjd:
                    if len(kjd[ch].on_readings) > 0:
                        op += kjd[ch].on_readings[0]
        else:
            op += seg

    res = _maxphon(lkp, op)
    if res == "":
        return "?"
    return res
    def _build_alternation_tree(cls, kanji_set):
        """
        Builds the tree of all readings and alternations. Upon completion, any
        possible reading (erroneous or not) for a given kanji should be a leaf
        node in the subtree for that kanji. Each fixed depth in that subtree
        corresponds to an alternation model of some sort.
        """
        log.start('Building alternation tree', nSteps=3)
        log.log('Adding base kanji set')
        root_node = AltTreeNode('root', '/')
        for kanji in kanji_set:
            root_node.add_child(AltTreeNode(kanji, 'k'))

        log.log('Adding good readings')
        kjdic = kanjidic.Kanjidic()
        for kanji_node in root_node.children.values():
            kanji = kanji_node.label
            if kanji in kjdic:
                for reading in kjdic[kanji].all_readings:
                    kanji_node.add_child(AltTreeNode(reading, 'b'))

        log.start('Adding alternation models', nSteps=len(_alternation_models))
        i = 0
        max_len = max(len(n) for (n, c, cl) in _alternation_models)
        pattern = '%%-%ds ' % max_len
        for model_name, model_code, model_class in _alternation_models:
            log.log(pattern % model_name, newLine=False)
            sys.stdout.flush()
            model_obj = model_class()
            cls._add_alternation_model(model_obj,
                                       model_code,
                                       root_node,
                                       first=(i == 0))
            i += 1
        log.finish()

        log.finish()

        return root_node