def from_textgrid(voice): """ Create aligned Utterances by synthesising to Segment level from the orthography and simply copying label end times into segment items as "end" feature. """ #Setup and create necessary dirs... CWD = os.getcwd() wav_dir = os.path.join(CWD, WAV_DIR) uttwav_dir = os.path.join(CWD, UTTWAV_DIR) transcr_location = os.path.join(CWD, ETC_DIR, TRANSCR_FILE) textgrid_dir = os.path.join(CWD, TEXTGRID_DIR) aligned_utts_dir = os.path.join(CWD, ALIGNED_UTT_DIR) os.makedirs(aligned_utts_dir) #update utts from textgrids... transcriptions = load_transcriptions_schemefile(transcr_location) alignments = sl.Corpus(textgrid_dir) ################# for sc_utt, uttname, wavfilename in zip( alignments.utterances, sorted(transcriptions), sorted(glob(os.path.join(uttwav_dir, "*")))): assert sc_utt.name == uttname, "Utterance missmatch..." assert os.path.basename(wavfilename).startswith( uttname), "Utterance missmatch..." print("Synthesizing:", uttname) utt = voice.synthesize(transcriptions[uttname], 'text-to-segments') utt["file_id"] = uttname utt = transplant_segtime_info(voice, sc_utt, utt) #add waveform to utt: utt["waveform"] = Waveform(wavfilename) #save utt... ttslab.tofile( utt, os.path.join(aligned_utts_dir, ".".join([uttname, UTT_EXT])))
def from_textgrid(voice): """ Create aligned Utterances by synthesising to Word level from the orthography and filling in further SylStructure from the TextGrid input... Normalised orthography must match for this to be successful... """ #Setup and create necessary dirs... CWD = os.getcwd() wav_dir = os.path.join(CWD, WAV_DIR) transcr_location = os.path.join(CWD, ETC_DIR, TRANSCR_FILE) textgrid_dir = os.path.join(CWD, TEXTGRID_DIR) aligned_utts_dir = os.path.join(CWD, ALIGNED_UTT_DIR) os.makedirs(aligned_utts_dir) #correct utterances from textgrids... transcriptions = load_transcriptions_schemefile(transcr_location) alignments = sl.Corpus(textgrid_dir) make_aligned_utts(voice, transcriptions, alignments, wav_dir, aligned_utts_dir)
def to_textgrid(voice): """ Should pull Word and Segment info from set of utterances make input compatible with HAlign2 (align from orthography), and run alignment process to make a set of TextGrid files including Syllables... or Should pull Word and Segment info from a special set of utterances where alternative utts are embedded. Makes input compatible with HAlign2 (align from orthography) with dictionary containing variants, and run alignment process (with re-alignment stage) to make a set of TextGrid files including Syllables... """ #create necessary output dirs... CWD = os.getcwd() wav_dir = os.path.join(CWD, WAV_DIR) transcr_location = os.path.join(CWD, ETC_DIR, TRANSCR_FILE) halign_config_location = os.path.join(CWD, ETC_DIR, HALIGNCONF_FILE) halign_working_dir = os.path.join(CWD, HALIGNWORK_DIR) halign_input_transcr_dir = os.path.join(halign_working_dir, HALIGNINPUT_SUBDIR, HALIGNINPUTTRANSCR_DIR) textgrid_dir = os.path.join(CWD, TEXTGRID_DIR) os.makedirs(textgrid_dir) os.makedirs(halign_input_transcr_dir) #get silence phone.. silence_phone = voice.phoneset.features["silence_phone"] #start alignment process.. transcriptions = load_transcriptions_schemefile(transcr_location) utts = make_base_utts(voice, transcriptions) halign_input_transcr_dir, pronundict_location = make_halign_input( voice, utts, halign_working_dir) # if not any("alt_utts" in u for u in utts): # GenHAlign(halign_config_location, # overrides={"SOURCE:ORTHOGRAPHIC_TRANSCRIPTIONS" : halign_input_transcr_dir, # "SOURCE:PRONUNCIATION_DICTIONARY" : pronundict_location, # "SOURCE:AUDIO" : wav_dir, # "PARMS:WORKING_DIR" : halign_working_dir, # "PARMS:SILENCE_PHONE" : silence_phone}) # else: GenHAlignRealign(halign_config_location, overrides={ "SOURCE:ORTHOGRAPHIC_TRANSCRIPTIONS": halign_input_transcr_dir, "SOURCE:PRONUNCIATION_DICTIONARY": pronundict_location, "SOURCE:AUDIO": wav_dir, "PARMS:WORKING_DIR": halign_working_dir, "PARMS:SILENCE_PHONE": silence_phone }) alignments = sl.Corpus(os.path.join(halign_working_dir, "textgrids")) add_sylls_to_textgrids(voice, alignments, textgrid_dir)