def get_lexibase(path, name, columns=None, preprocessing=None, namespace=None, lexibase=False): wordlist = Wordlist.from_cldf(path, columns=columns or ("language_id", "concept_name", "value", "form", "segments", "comment"), namespace=namespace or dict([("language_id", "doculect"), ("concept_name", "concept"), ("value", "value"), ("form", "form"), ("segments", "tokens"), ("comment", "note")])) if preprocessing: D = preprocessing(wordlist) else: D = {idx: wordlist[idx] for idx in wordlist} D[0] = wordlist.columns if not lexibase: Wordlist(D).output("tsv", filename=name, ignore="all", prettify=False) else: lex = LexiBase(D, dbase=name + ".sqlite3") lex.create(name)
def main(): debug = False if 'debug' in argv or '--debug' in argv: debug = True if 'pinyin' in argv: py = sinopy.pinyin(argv[argv.index('pinyin') + 1]) print(py) if 'profile' in argv: if '--cldf' in argv: wl = Wordlist.from_cldf(argv[argv.index('profile') + 1], col='language_id', row='parameter_id') wl.add_entries('doculect', 'language_name', lambda x: x) else: wl = Wordlist(argv[argv.index('profile') + 1]) column = 'ipa' language = None filename = 'orthography.tsv' if '--column' in argv: column = argv[argv.index('--column') + 1] if '--language' in argv: language = argv[argv.index('--language') + 1] if '-l' in argv: language = argv[argv.index('-l') + 1] if '-o' in argv: filename = argv[argv.index('-o') + 1] if '--filename' in argv: filename = argv[argv.index('--filename') + 1] segments.write_structure_profile(wl, column=column, filename=filename, debug=debug, language=language)