def gen_dict(terms, lang1, lang2, version): d = Dictionary(lang1=lang1, lang2=lang2, version=version, banner=banner) for term in terms.values(): if lang1 in term and lang2 in term: val = term[lang2] for key in term[lang1]: d.add(key, val) return d
def make_dict(dst, srcs, minkeys=1): dicts = [] for src in srcs: print('merge_dicts: Reading {}'.format(src)) d = Dictionary(src) dicts.append(d) dicts = sorted(dicts, reverse=True, key=lambda d: len(d)) banners = [] for d in dicts: if d.banner and not d.banner in banners: banners.append(d.banner) res = dicts[0] for d in dicts[1:]: res.merge(d) res.banner += '\n' + d.banner if len(banners) == 1: res.banner = banners[0] elif len(banners) > 1: banners.insert(0, multi_banner) res.banner = '\n\n'.join(banners) res.version = version_to_str(max([parse_version(d.version) for d in dicts])) res.validate() if len(res) >= minkeys: print('merge_dicts: Dict {} writing {}'.format(res.get_name(), dst)) res.save(dst) else: print('merge_dicts: Dict {} got only {} keys, SKIPPING'.format( res.get_name(), len(res)))
def get_dict(lang1, lang2, version): key = (lang1, lang2) if key in dicts: return dicts[key] try: d = Dictionary(lang1=codes2to3[lang1], lang2=codes2to3[lang2], version=version, banner=banner) dicts[key] = d return d except: return None
def load_dicts(dirpath): dicts = {} paths = glob.glob(os.path.join(dirpath, '???-???.dict')) i = 0 for path in paths: i += 1 pr = 100.0 * i / len(paths) print('\rprocessing {}: {:.0f}%'.format(dirpath, pr), end='', file=sys.stderr) sys.stdout.flush() name = os.path.basename(path) dicts[name] = Dictionary(path).items_count() print('\r{}\r'.format(' ' * (len(dirpath) + 18)), end='', file=sys.stderr) sys.stdout.flush() return dicts
'''.strip() if __name__ == "__main__": if len(sys.argv) != 4: print('mdbg_convert: Use: {} SRC_FILE DST_DIR VERSION'.format( sys.argv[0])) exit(1) srcpath = sys.argv[1] dstdir = sys.argv[2] version = sys.argv[3] print('mdbg_convert: Reading {}'.format(srcpath)) chi_eng = Dictionary(lang1='chi', lang2='eng', version=version, banner=banner) cht_eng = Dictionary(lang1='cht', lang2='eng', version=version, banner=banner) chs_eng = Dictionary(lang1='chs', lang2='eng', version=version, banner=banner) chs_cht = Dictionary(lang1='chs', lang2='cht', version=version, banner=banner) dicts = [chi_eng, cht_eng, chs_eng, chs_cht]
d.add(key, val) if __name__ == "__main__": if len(sys.argv) <= 3: print('svob_convert: Use: {} SRC_FILE DST_DIR VERSION'.format(sys.argv[0])) exit(1) srcpath = sys.argv[1] dstdir = sys.argv[2] version = sys.argv[3] minkeys = int(sys.argv[4]) if len(sys.argv) > 4 else 1 print('svob_convert: Reading {}'.format(srcpath)) d = Dictionary(lang1='cze', lang2='eng', version=version, banner=banner) with open(srcpath, 'rt', encoding='utf8') as fp: for line in fp: if line and line[0] != '#': ent = line.split('\t') if len(ent) >= 2: val = ent[0] key = ent[1] if key and val: dict_add(d, key, val) d.validate() if len(d) >= minkeys:
if __name__ == "__main__": if len(sys.argv) <= 3: print('Use: {} SRC_DIR DST_DIR VERSION [MIN_KEYS]'.format(sys.argv[0])) exit(1) srcdir = sys.argv[1] dstdir = sys.argv[2] version = sys.argv[3] minkeys = int(sys.argv[4]) if len(sys.argv) > 4 else 1 srcs = list_dicts(srcdir) while srcs: (lang1, lang2), path = srcs.popitem() d = Dictionary(lang1=lang1, lang2=lang2, version=version, banner=banner) try: print('Reading dict {}/{} from {}'.format(lang1, lang2, path)) read_sql_dict(d, path) if lang1 > lang2: d = d.transpose() except Exception as e: print('[!] error: ' + str(e)) path = srcs.get((lang2, lang1), None) if path: try: d2 = Dictionary(lang1=lang2, lang2=lang1,
def readDictFromSqliteDB(d, fname): db = sqlite3.connect(fname) cursor = db.cursor() cursor.execute('select written_rep, trans_list from translation') #where part_of_speech in ( #'noun', 'properNoun', 'adjective', 'adverb', 'interjection', #'possessiveAdjective', 'conjunction', 'verb', 'particle', 'preposition', #'modal', 'pronoun', 'indefinitePronoun', 'numeral', #'interrogativePronoun', 'indefiniteCardinalNumeral', #'multiplicativeNumeral', 'personalPronoun', 'cardinalNumeral', #'collective', 'participleAdjective', 'numeralFraction'); ''') for row in cursor: key = row[0].strip() vals = row[1].split('|') if key.find(' ') == -1: for val in vals: val = val.strip() if ' ' not in val: d.add(key, val) if __name__ == "__main__": if len(sys.argv) == 3: d = Dictionary() readDictFromSqliteDB(d, sys.argv[1]) d.validate() d.save(sys.argv[2]) else: print('Usage:') print('\t' + sys.argv[0] + ' infile.sqlite3 outfile.dict')
# Licence: GNU/FDL 1.1 '''.strip() if __name__ == "__main__": if len(sys.argv) <= 3: print('Use: {} SRC_FILE DST_DIR VERSION'.format(sys.argv[0])) exit(1) srcpath = sys.argv[1] dstdir = sys.argv[2] version = sys.argv[3] minkeys = int(sys.argv[4]) if len(sys.argv) > 4 else 1 print('Reading dict {}'.format(srcpath)) d = Dictionary(lang1='cze', lang2='eng', version=version, banner=banner) with open(srcpath, 'rt', encoding='utf8') as fp: for line in fp: if line and line[0] != '#': ent = line.split('\t') if len(ent) >= 2: val = ent[0] key = ent[1] if key and val and ' ' not in key and ' ' not in val: d.add(key, val) d.validate() if len(d) >= minkeys: