Ejemplo n.º 1
0
def gen_dict(terms, lang1, lang2, version):
    d = Dictionary(lang1=lang1, lang2=lang2, version=version, banner=banner)
    for term in terms.values():
        if lang1 in term and lang2 in term:
            val = term[lang2]
            for key in term[lang1]:
                d.add(key, val)
    return d
Ejemplo n.º 2
0
def make_dict(dst, srcs, minkeys=1):
    dicts = []
    for src in srcs:
        print('merge_dicts: Reading {}'.format(src))
        d = Dictionary(src)
        dicts.append(d)

    dicts = sorted(dicts, reverse=True, key=lambda d: len(d))

    banners = []
    for d in dicts:
        if d.banner and not d.banner in banners:
            banners.append(d.banner)

    res = dicts[0]
    for d in dicts[1:]:
        res.merge(d)
        res.banner += '\n' + d.banner

    if len(banners) == 1:
        res.banner = banners[0]
    elif len(banners) > 1:
        banners.insert(0, multi_banner)
        res.banner = '\n\n'.join(banners)

    res.version = version_to_str(max([parse_version(d.version)
                                      for d in dicts]))
    res.validate()

    if len(res) >= minkeys:
        print('merge_dicts: Dict {} writing {}'.format(res.get_name(), dst))
        res.save(dst)
    else:
        print('merge_dicts: Dict {} got only {} keys, SKIPPING'.format(
            res.get_name(), len(res)))
Ejemplo n.º 3
0
def get_dict(lang1, lang2, version):
    key = (lang1, lang2)
    if key in dicts:
        return dicts[key]

    try:
        d = Dictionary(lang1=codes2to3[lang1],
                       lang2=codes2to3[lang2],
                       version=version,
                       banner=banner)
        dicts[key] = d
        return d
    except:
        return None
Ejemplo n.º 4
0
def load_dicts(dirpath):
    dicts = {}
    paths = glob.glob(os.path.join(dirpath, '???-???.dict'))
    i = 0
    for path in paths:
        i += 1
        pr = 100.0 * i / len(paths)
        print('\rprocessing {}: {:.0f}%'.format(dirpath, pr),
              end='',
              file=sys.stderr)
        sys.stdout.flush()

        name = os.path.basename(path)
        dicts[name] = Dictionary(path).items_count()

    print('\r{}\r'.format(' ' * (len(dirpath) + 18)), end='', file=sys.stderr)
    sys.stdout.flush()
    return dicts
Ejemplo n.º 5
0
'''.strip()

if __name__ == "__main__":
    if len(sys.argv) != 4:
        print('mdbg_convert: Use: {} SRC_FILE DST_DIR VERSION'.format(
            sys.argv[0]))
        exit(1)

    srcpath = sys.argv[1]
    dstdir = sys.argv[2]
    version = sys.argv[3]

    print('mdbg_convert: Reading {}'.format(srcpath))

    chi_eng = Dictionary(lang1='chi',
                         lang2='eng',
                         version=version,
                         banner=banner)
    cht_eng = Dictionary(lang1='cht',
                         lang2='eng',
                         version=version,
                         banner=banner)
    chs_eng = Dictionary(lang1='chs',
                         lang2='eng',
                         version=version,
                         banner=banner)
    chs_cht = Dictionary(lang1='chs',
                         lang2='cht',
                         version=version,
                         banner=banner)
    dicts = [chi_eng, cht_eng, chs_eng, chs_cht]
        d.add(key, val)


if __name__ == "__main__":
    if len(sys.argv) <= 3:
        print('svob_convert: Use: {} SRC_FILE DST_DIR VERSION'.format(sys.argv[0]))
        exit(1)

    srcpath = sys.argv[1]
    dstdir  = sys.argv[2]
    version = sys.argv[3]
    minkeys = int(sys.argv[4]) if len(sys.argv) > 4 else 1

    print('svob_convert: Reading {}'.format(srcpath))

    d = Dictionary(lang1='cze', lang2='eng', version=version, banner=banner)

    with open(srcpath, 'rt', encoding='utf8') as fp:
        for line in fp:
            if line and line[0] != '#':
                ent = line.split('\t')
                if len(ent) >= 2:
                    val = ent[0]
                    key = ent[1]

                    if key and val:
                        dict_add(d, key, val)

    d.validate()

    if len(d) >= minkeys:
Ejemplo n.º 7
0
if __name__ == "__main__":
    if len(sys.argv) <= 3:
        print('Use: {} SRC_DIR DST_DIR VERSION [MIN_KEYS]'.format(sys.argv[0]))
        exit(1)

    srcdir = sys.argv[1]
    dstdir = sys.argv[2]
    version = sys.argv[3]
    minkeys = int(sys.argv[4]) if len(sys.argv) > 4 else 1

    srcs = list_dicts(srcdir)
    while srcs:
        (lang1, lang2), path = srcs.popitem()
        d = Dictionary(lang1=lang1,
                       lang2=lang2,
                       version=version,
                       banner=banner)

        try:
            print('Reading dict {}/{} from {}'.format(lang1, lang2, path))
            read_sql_dict(d, path)
            if lang1 > lang2:
                d = d.transpose()
        except Exception as e:
            print('[!] error: ' + str(e))

        path = srcs.get((lang2, lang1), None)
        if path:
            try:
                d2 = Dictionary(lang1=lang2,
                                lang2=lang1,
Ejemplo n.º 8
0
def readDictFromSqliteDB(d, fname):
    db = sqlite3.connect(fname)
    cursor = db.cursor()
    cursor.execute('select written_rep, trans_list from translation')
    #where part_of_speech in (
    #'noun', 'properNoun', 'adjective', 'adverb', 'interjection',
    #'possessiveAdjective', 'conjunction', 'verb', 'particle', 'preposition',
    #'modal', 'pronoun', 'indefinitePronoun', 'numeral',
    #'interrogativePronoun', 'indefiniteCardinalNumeral',
    #'multiplicativeNumeral', 'personalPronoun', 'cardinalNumeral',
    #'collective', 'participleAdjective', 'numeralFraction'); ''')
    for row in cursor:
        key = row[0].strip()
        vals = row[1].split('|')
        if key.find(' ') == -1:
            for val in vals:
                val = val.strip()
                if ' ' not in val:
                    d.add(key, val)


if __name__ == "__main__":
    if len(sys.argv) == 3:
        d = Dictionary()
        readDictFromSqliteDB(d, sys.argv[1])
        d.validate()
        d.save(sys.argv[2])
    else:
        print('Usage:')
        print('\t' + sys.argv[0] + ' infile.sqlite3 outfile.dict')
# Licence: GNU/FDL 1.1
'''.strip()

if __name__ == "__main__":
    if len(sys.argv) <= 3:
        print('Use: {} SRC_FILE DST_DIR VERSION'.format(sys.argv[0]))
        exit(1)

    srcpath = sys.argv[1]
    dstdir = sys.argv[2]
    version = sys.argv[3]
    minkeys = int(sys.argv[4]) if len(sys.argv) > 4 else 1

    print('Reading dict {}'.format(srcpath))

    d = Dictionary(lang1='cze', lang2='eng', version=version, banner=banner)

    with open(srcpath, 'rt', encoding='utf8') as fp:
        for line in fp:
            if line and line[0] != '#':
                ent = line.split('\t')
                if len(ent) >= 2:
                    val = ent[0]
                    key = ent[1]

                    if key and val and ' ' not in key and ' ' not in val:
                        d.add(key, val)

    d.validate()

    if len(d) >= minkeys: