def acoustic_config(graph_db, textgrid_test_dir): config = CorpusConfig('acoustic', **graph_db) acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid') with CorpusContext(config) as c: c.reset() parser = inspect_textgrid(acoustic_path) c.load(parser, acoustic_path) config.pitch_algorithm = 'acousticsim' config.formant_source = 'acousticsim' return config
def overlapped_config(graph_db, textgrid_test_dir, acoustic_syllabics): config = CorpusConfig('overlapped', **graph_db) acoustic_path = os.path.join(textgrid_test_dir, 'overlapped_speech') with CorpusContext(config) as c: c.reset() parser = inspect_mfa(acoustic_path) c.load(parser, acoustic_path) c.encode_pauses(['sil']) c.encode_utterances(min_pause_length=0) c.encode_syllabic_segments(acoustic_syllabics) c.encode_syllables() config.pitch_algorithm = 'acousticsim' config.formant_source = 'acousticsim' return config
if args.corpus_name not in directories: print( 'The corpus {0} does not have a directory (available: {1}). Please make it with a {0}.yaml file inside.' .format(args.corpus_name, ', '.join(directories))) sys.exit(1) corpus_conf = load_config(corpus_name) print('Processing...') #Connect to local database at 8080 with ensure_local_database_running(corpus_name, port=8080, token=common.load_token()) as params: #Load corpus context and config info config = CorpusConfig(corpus_name, **params) config.formant_source = 'praat' # Common set up if reset: with CorpusContext(config) as c: print("Resetting the corpus.") c.reset() common.loading(config, corpus_conf['corpus_directory'], corpus_conf['input_format']) common.lexicon_enrichment(config, corpus_conf['unisyn_spade_directory'], corpus_conf['dialect_code']) common.speaker_enrichment(config, corpus_conf['speaker_enrichment_file']) common.basic_enrichment( config, corpus_conf['vowel_inventory'] + corpus_conf['extra_syllabic_segments'], corpus_conf['pauses'])
q = q.columns(c.phone.speaker.name.column_name('speaker'), c.phone.discourse.name.column_name('discourse'), c.phone.id.column_name('phone_id'), c.phone.label.column_name('phone_label'), c.phone.begin.column_name('begin'), c.phone.end.column_name('end'), c.phone.following.label.column_name('following_phone'), c.phone.previous.label.column_name('previous_phone'), c.phone.word.label.column_name('word'), c.phone.cog.column_name('cog'), c.phone.peak.column_name('peak'), c.phone.slope.column_name('slope'), c.phone.spread.column_name('spread')) q.to_csv(output_path_word_initial) print("Results for sibilants written to " + output_path + " and " + output_path_word_initial) if __name__ == '__main__': with ensure_local_database_running('database') as config: conf = CorpusConfig(corpus_name, **config) conf.pitch_source = 'praat' # config.pitch_algorithm = 'base' conf.formant_source = 'praat' conf.intensity_source = 'praat' conf.praat_path = praat_path if reset: loading(conf) acoustics(conf) analysis(conf)