import sys import os import argparse base_dir = os.path.dirname(os.path.abspath(__file__)) script_dir = os.path.join(base_dir, 'Common') sys.path.insert(0, script_dir) import common from polyglotdb.client.client import PGDBClient token = common.load_token() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('corpus_name', help='Name of the corpus') args = parser.parse_args() corpus_name = args.corpus_name directories = [ x for x in os.listdir(base_dir) if os.path.isdir(x) and x != 'Common' ] if args.corpus_name not in directories: print( 'The corpus {0} does not have a directory (available: {1}). Please make it with a {0}.yaml file inside.' .format(args.corpus_name, ', '.join(directories))) sys.exit(1) corpus_conf = common.load_config(corpus_name)
'The corpus {0} does not have a directory (available: {1}). Please make it with a {0}.yaml file inside.' .format(args.corpus_name, ', '.join(directories))) sys.exit(1) corpus_conf = common.load_config(corpus_name) print('Processing...') ignored_speakers = corpus_conf.get('ignore_speakers', []) if reset: common.reset(corpus_name) ip = common.server_ip if docker: ip = common.docker_ip with ensure_local_database_running(corpus_name, ip=ip, port=common.server_port, token=common.load_token()) as params: print(params) config = CorpusConfig(corpus_name, **params) config.formant_source = 'praat' # Common set up common.loading(config, corpus_conf['corpus_directory'], corpus_conf['input_format']) common.lexicon_enrichment(config, corpus_conf['unisyn_spade_directory'], corpus_conf['dialect_code']) common.speaker_enrichment(config, corpus_conf['speaker_enrichment_file']) common.basic_enrichment(
corpus_name = args.corpus_name reset = args.reset docker = args.docker directories = [x for x in os.listdir(base_dir) if os.path.isdir(x) and x != 'Common'] if args.corpus_name not in directories: print( 'The corpus {0} does not have a directory (available: {1}). Please make it with a {0}.yaml file inside.'.format( args.corpus_name, ', '.join(directories))) sys.exit(1) corpus_conf = common.load_config(corpus_name) print('Processing...') if reset: common.reset(corpus_name) ip = common.server_ip if docker: ip = common.docker_ip with ensure_local_database_running(corpus_name, port=common.server_port, ip=ip, token=common.load_token()) as params: config = CorpusConfig(corpus_name, **params) config.formant_source = 'praat' # Common set up common.loading(config, corpus_conf['corpus_directory'], corpus_conf['input_format']) common.lexicon_enrichment(config, corpus_conf['unisyn_spade_directory'], corpus_conf['dialect_code']) common.speaker_enrichment(config, corpus_conf['speaker_enrichment_file']) common.basic_enrichment(config, corpus_conf['vowel_inventory'] + corpus_conf['extra_syllabic_segments'], corpus_conf['pauses']) common.polysyllabic_export(config, corpus_name, corpus_conf['dialect_code'], corpus_conf['speakers']) print('Finishing up!')