def proscript_segments_to_textgrid(proscript, output_dir, file_prefix="", speaker_segmented=False, no_write=False): output_files = [] assert (proscript.duration > 0.0, "Proscript duration is 0") fix_segment_overlaps(proscript) if speaker_segmented: proscript.populate_speaker_ids() assert (len(proscript.speaker_ids) > 0, "No speaker info set on proscript") for speaker_index, speaker_id in enumerate(proscript.speaker_ids): try: textgrid_file = proscript.speaker_textgrid_files[speaker_index] except: textgrid_file = os.path.join( output_dir, "%s-%s.TextGrid" % (file_prefix, speaker_id)) if not no_write: tg = tgio.Textgrid() segment_entry_list = [ (segment.start_time, segment.end_time, segment.transcript) for segment in proscript.get_speaker_segments(speaker_id) ] segment_tier = tgio.IntervalTier('%s' % speaker_id, segment_entry_list, 0, proscript.duration) tg.addTier(segment_tier) saveTextGridWithTags(tg, textgrid_file) output_files.append(textgrid_file) proscript.speaker_textgrid_files.append(textgrid_file) else: if proscript.textgrid_file: textgrid_file = proscript.textgrid_file else: textgrid_file = os.path.join(output_dir, "%s.TextGrid" % (file_prefix)) proscript.textgrid_file = textgrid_file if not no_write: tg = tgio.Textgrid() segment_entry_list = [(segment.start_time, segment.end_time, segment.transcript) for segment in proscript.segment_list] segment_tier = tgio.IntervalTier('segments', segment_entry_list, 0, proscript.duration) tg.addTier(segment_tier) saveTextGridWithTags(tg, textgrid_file) output_files.append(textgrid_file) return output_files
def write_intervals(wav_dir, align_dir, intervals): for audio_name, entries in intervals.items(): audio_align_dir = align_dir / audio_name audio_align_dir.mkdir(parents=True, exist_ok=True) grid = tgio.Textgrid() tier = tgio.IntervalTier("sentences", entries) grid.addTier(tier) grid_path = audio_align_dir / f"{audio_name}.TextGrid" grid.save(str(grid_path)) logger.debug(f"Wrote {grid_path}") # Split audio wav_path = wav_dir / f"{audio_name}.wav" audio_wav_dir = audio_align_dir / "wav" logger.debug(f"Splitting {wav_path}") splitAudioOnTier(str(wav_path), str(grid_path), "sentences", str(audio_wav_dir)) # Write transcriptions text_align_dir = audio_align_dir / "text" text_align_dir.mkdir(parents=True, exist_ok=True) num_zeros = int(math.ceil(math.log10(len(entries)))) n_format = "{0:0" + str(num_zeros) + "d}" for i, interval in enumerate(entries): n = n_format.format(i) text_path = text_align_dir / f"{audio_name}_{n}.txt" text_path.write_text(interval.label.strip()) logger.debug(f"Wrote {text_path}")
def alignment_to_textgrid(alignment, path): """ Take a filename and its associated transcription and fill in all the gaps """ with contextlib.closing(wave.open(path, 'r')) as f: frames = f.getnframes() rate = f.getframerate() duration = frames / float(rate) rearranged_words = [] file_ons = 0 try: content = json.loads(alignment.to_json()) all_ons = content['words'][0]['start'] for ix, word in enumerate(content['words']): word_ons = word['start'] word_off = word['end'] target = word['alignedWord'] if (ix >= 1) and (ix < (len(content['words']))): prev_word = content['words'][ix - 1] prev_off = prev_word['end'] if word['start'] > prev_off: rearranged_words.append((prev_off, word_ons, '')) elif ix == 0: rearranged_words.append( (file_ons, all_ons, '')) # make empty first tier rearranged_words.append((word_ons, word_off, target)) if word_off < duration: rearranged_words.append((word_off, duration, '')) except: rearranged_words = [(0, duration, '')] tg = tgio.Textgrid() tg.addTier(tgio.IntervalTier('word', rearranged_words)) return tg
def create_textgrid_obj(textgrid_list): new_dict = dict() keys = ["ORT", "KAN", "MAU"] for key in keys: new_dict[key] = tgio.TextgridTier(key, [], 0.0, textgrid_list[-1].interval.end) new_dict[key].tierType = tgio.INTERVAL_TIER for element in textgrid_list: new_dict["ORT"].entryList.append(element.interval) try: phonetic = element.phonetic phones_list = element.phones_list except AttributeError: phonetic = element.interval phones_list = [element.interval] new_dict["KAN"].entryList.append(phonetic) new_dict["MAU"].entryList += phones_list textgrid_obj = tgio.Textgrid() for key in keys: textgrid_obj.addTier(new_dict[key]) return textgrid_obj
def syllabifyTextgrids(tgPath, islePath): isleDict = isletool.LexicalTool(islePath) outputPath = join(tgPath, "syllabifiedTGs") utils.makeDir(outputPath) skipLabelList = ["<VOCNOISE>", "xx", "<SIL>", "{B_TRANS}", '{E_TRANS}'] for fn in utils.findFiles(tgPath, filterExt=".TextGrid"): if os.path.exists(join(outputPath, fn)): continue tg = tgio.openTextgrid(join(tgPath, fn)) syllableTG = praattools.syllabifyTextgrid(isleDict, tg, "words", "phones", skipLabelList=skipLabelList) outputTG = tgio.Textgrid() outputTG.addTier(tg.tierDict["words"]) outputTG.addTier(tg.tierDict["phones"]) # outputTG.addTier(syllableTG.tierDict["syllable"]) outputTG.addTier(syllableTG.tierDict["tonic"]) outputTG.save(join(outputPath, fn))
def ctm2tg(wavdir, outdir): '''Convert CTM alignment files to Praat's TextGrid format. Args: wavdir -- path to the directory containing speech wav files outdir -- path to output the textgrid files in ''' print "Converting ctm files to Praat Textgrids...", words = readCSV(os.path.join(outdir, 'wordlvl.ctm')) phones = readCSV(os.path.join(outdir, 'phonelvl.ctm')) word_dict = csv2tgdict(words) phone_dict = csv2tgdict(phones) wavscp = wavscp2dict(readCSV(os.path.join(outdir, 'wav.scp'))) if not os.path.exists(os.path.join(outdir, 'tg')): os.makedirs(os.path.join(outdir, 'tg')) for utt in wavscp.keys(): tg = tgio.Textgrid() wordTier = tgio.IntervalTier('words', word_dict[utt], 0, pairedWav=wavscp[utt]) phoneTier = tgio.IntervalTier('phones', phone_dict[utt], 0, pairedWav=wavscp[utt]) tg.addTier(wordTier) tg.addTier(phoneTier) tg.save(os.path.join(outdir, 'tg', utt + '.TextGrid')) print "stored in " + os.path.join(outdir, 'tg')
def Create_textgrid(phones, out_path, raw=False): ''' Create a textgrid based on the alignment sample: an pd DataFrame of an alignment file out_path: the output path ''' tg = tgio.Textgrid() syl_tier = tgio.IntervalTier('phones', [], 0, sample.iloc[-1, 1] + sample.iloc[-1, 2]) entries = [] if raw: for i in range(len(sample)): ph = (sample.iloc[i, 3], sample.iloc[i, 3] + sample.iloc[i, 4], sample.iloc[i, -1]) entries.append(ph) else: for i in range(len(sample)): ph = (sample.iloc[i, 1], sample.iloc[i, 1] + sample.iloc[i, 2], sample.iloc[i, -1]) entries.append(ph) syl_tier = syl_tier.new(entryList=entries) tg.addTier(syl_tier) out_path = os.path.join(out_path, sample.iloc[0, 0] + '.TextGrid') tg.save(out_path)
def main(): '''Convert CTM alignment files to Praat's TextGrid format. Args: wavscp -- path to the directory containing speech wav files outdir -- path to output the textgrid files in ''' if (len(sys.argv) < 3): print("Usage:%s <wavscp> <outdir> <cmt-1>...<cmt-n>\n" % (sys.argv[0])) exit(1) print("Converting ctm files to Praat Textgrids...\n") wavscp = sys.argv[1] outdir = sys.argv[2] #absOutDir = os.path.abspath(outdir) wavdict = wavscp2dict(readCSV(wavscp)) if not os.path.exists(os.path.join(outdir)): os.makedirs(os.path.join(outdir)) for utt in wavdict.keys(): tg = tgio.Textgrid() for num in range(3, len(sys.argv)): ctmcsv = readCSV(sys.argv[num]) tgdict = csv2tgdict(ctmcsv) if not os.path.isfile(wavdict[utt]): print("%s not exist!" % (wavdict[utt])) break else: fpath, fname = os.path.split(wavdict[utt]) shutil.copyfile(wavdict[utt], os.path.join(outdir, fname)) intervalTier = tgio.IntervalTier(sys.argv[num], tgdict[utt], 0, pairedWav=wavdict[utt]) tg.addTier(intervalTier) tg.save(os.path.join(outdir, utt + '.TextGrid')) print("stored in %s" % (outdir))
def outputStereoTextgrid(outputFN, duration, leftEntryList, rightEntryList, leftChannelName, rightChannelName): # Give all entries a label indicating their order of occurrence leftEntryList.sort() newLeftEntryList = [(entry[0], entry[1], str(i)) for i, entry in enumerate(leftEntryList)] rightEntryList.sort() newRightEntryList = [(entry[0], entry[1], str(i)) for i, entry in enumerate(rightEntryList)] # This shouldn't be necessary newLeftEntryList = [ entry for entry in newLeftEntryList if entry[1] <= duration and entry[0] < entry[1] ] newRightEntryList = [ entry for entry in newRightEntryList if entry[1] <= duration and entry[0] < entry[1] ] # Output textgrid leftTier = tgio.IntervalTier(leftChannelName, newLeftEntryList, 0, duration) rightTier = tgio.IntervalTier(rightChannelName, newRightEntryList, 0, duration) outputTG = tgio.Textgrid() outputTG.addTier(leftTier) outputTG.addTier(rightTier) outputTG.save(outputFN)
def convert_json_to_textgrid(wav_file_path, transcript_file_path): textgrid_file_path = transcript_file_path.replace(".tlog", ".TextGrid") with open(transcript_file_path) as json_file: textgrid_entries_list = [] json_data = json.load(json_file) for transcript in json_data: start_seconds = float(transcript["start"] / 1000) end_seconds = float(transcript["end"] / 1000) textgrid_entry = (start_seconds, end_seconds, transcript["transcript"]) textgrid_entries_list.append(textgrid_entry) utterance_tier = tgio.IntervalTier('utterance', textgrid_entries_list, 0, pairedWav=wav_file_path) tg = tgio.Textgrid() tg.addTier(utterance_tier) tg.save(textgrid_file_path, useShortForm=False, outputFormat='textgrid') print("Textgrid of transcription saved to %s" % textgrid_file_path)
def markTranscriptForAnnotations(tgFN, tierName, outputTGFN, proportion=1 / 5.0): ''' Prep a noisy silence annotation for an annotation task Voice activity detectors are liable to segment speech into very small chunks (fragments of speech and silence). The point of this code is to segment a recording into larger units that could be used in a speech transcription task. Assumes the speaker is speaking for most of the recording. ''' tg = tgio.openTextgrid(tgFN) duration = tg.maxTimestamp numEntries = int(math.ceil(duration * proportion)) entryList = tg.tierDict[tierName].entryList # Get all silent intervals entryList = [(stop - start, start, stop, label) for start, stop, label in entryList if label == "silent"] # Remove silent intervals at the start or end of the file entryList = [ entry for entry in entryList if entry[1] != 0 and entry[2] != duration ] # Put longest intervals first entryList.sort(reverse=True) # Get the mid point of the longest n intervals and convert them # into intervals to be transcribed entryList = entryList[:numEntries] pointList = [ start + ((stop - start) / 2.0) for _, start, stop, _ in entryList ] pointList.sort() pointList = [ 0.0, ] + pointList + [ duration, ] newEntryList = [] for i in range(len(pointList) - 1): newEntryList.append((pointList[i], pointList[i + 1], "%d" % i)) outputTG = tgio.Textgrid() tier = tgio.IntervalTier("toTranscribe", newEntryList, 0, duration) outputTG.addTier(tier) outputTG.save(outputTGFN)
def create_textgrid(wav_dictionary: Dict[str, str], ctm_dictionary: dict, output_directory: str) -> None: for index, utterance_id in enumerate(wav_dictionary.keys()): textgrid = tgio.Textgrid() tier = tgio.IntervalTier(name='default', entryList=ctm_dictionary[utterance_id], minT=0, pairedWav=str(Path(wav_dictionary[utterance_id]))) textgrid.addTier(tier) textgrid.save(str(Path(output_directory, f"utterance-{index}.TextGrid")))
def test_save_with_force_larger_value_as_maximum_time(self): userEntryList = [[0.4, 0.6, 'A'], [0.8, 1.0, 'E'], [1.2, 1.3, 'I']] expectedEntryList = [[0.3, 0.4, ''], [0.4, 0.6, 'A'], [0.6, 0.8, ''], [0.8, 1.0, 'E'], [1.0, 1.2, ''], [1.2, 1.3, 'I'], [1.3, 3.0, '']] tier = tgio.IntervalTier('test', userEntryList, 0.3, 2.0) tg = tgio.Textgrid() tg.addTier(tier) actualEntryList = run_save(tg, maxTimestamp=3.0) self.assertEqual(expectedEntryList, actualEntryList)
def test_save_with_minimum_time_stamp(self): userEntryList = [[0.4, 0.6, 'A'], [0.8, 1.0, 'E'], [1.2, 1.3, 'I']] expectedEntryList = [[0.3, 0.4, ''], [0.4, 0.6, 'A'], [0.6, 0.8, ''], [0.8, 1.0, 'E'], [1.0, 1.2, ''], [1.2, 1.3, 'I'], [1.3, 2.0, '']] tier = tgio.IntervalTier('test', userEntryList, 0.3, 2.0) tg = tgio.Textgrid() tg.addTier(tier) actualEntryList = run_save(tg) self.assertEqual(expectedEntryList, actualEntryList)
def outputTextgrid(outputFN, duration, entryList, tierName): # Give all entries a label indicating their order of occurrence entryList.sort() newEntryList = [(entry[0], entry[1], str(i)) for i, entry in enumerate(entryList)] # Output textgrid tierSpeech = tgio.IntervalTier(tierName, newEntryList, 0, duration) tg = tgio.Textgrid() tg.addTier(tierSpeech) tg.save(outputFN)
def test_save_with_force_too_large_minimum_time(self): # If you choose to force save to use a minTimestamp, all # of your entries must be higher than that minTimestamp userEntryList = [[0.4, 0.6, 'A'], [0.8, 1.0, 'E'], [1.2, 1.3, 'I']] expectedEntryList = [[0, 0.4, ''], [0.4, 0.6, 'A'], [0.6, 0.8, ''], [0.8, 1.0, 'E'], [1.0, 1.2, ''], [1.2, 1.3, 'I'], [1.3, 2.0, '']] tier = tgio.IntervalTier('test', userEntryList, 0.3, 2.0) tg = tgio.Textgrid() tg.addTier(tier) self.assertRaises(AssertionError, run_save, tg, maxTimestamp=1.0)
def convert_to_textgrid_file(text, wav_file, tg_file): tg = tgio.Textgrid() tg_entries_list = [] tg_entry = (0.0, get_wav_duration(wav_file), text) tg_entries_list.append(tg_entry) utteranceTier = tgio.IntervalTier('utterance', tg_entries_list, 0, pairedWav=wav_file) tg.addTier(utteranceTier) tg.save(tg_file)
def test_save_with_minimum_interval_length(self): # The first entry will be stretched to fill the unlabeled region in # front of it: [0.30, 0.35, ''] (The unlabeled region starts at 0.3 # instead of 0 because the minTimestamp for this tg is 0.3) userEntryList = [[0.35, 0.6, 'A'], [0.8, 1.0, 'E'], [1.2, 1.3, 'I']] expectedEntryList = [[0.3, 0.6, 'A'], [0.6, 0.8, ''], [0.8, 1.0, 'E'], [1.0, 1.2, ''], [1.2, 1.3, 'I'], [1.3, 2.0, '']] tier = tgio.IntervalTier('test', userEntryList, 0.3, 2.0) tg = tgio.Textgrid() tg.addTier(tier) actualEntryList = run_save(tg, minimumIntervalLength=0.06) self.assertEqual(expectedEntryList, actualEntryList)
def audiosplitOnTone(inputPath, fn, pitchPath, tgPath, subwavPath, minPitch, maxPitch, toneFrequency, minEventDuration, praatEXE, praatScriptPath, forceRegen, generateWavs=False): utils.makeDir(pitchPath) utils.makeDir(tgPath) utils.makeDir(subwavPath) name = os.path.splitext(fn)[0] piSamplingRate = 100 # Samples per second # Extract pitch and find patterns in the file outputFN = os.path.splitext(fn)[0] + ".txt" sampleStep = 1 / float(piSamplingRate) motherPIList = pitch_and_intensity.extractPI(join(inputPath, fn), join(pitchPath, outputFN), praatEXE, minPitch, maxPitch, sampleStep=sampleStep, forceRegenerate=forceRegen) # entry = (time, pitchVal, intVal) pitchList = [float(entry[1]) for entry in motherPIList] timeDict = split_on_tone.splitFileOnTone(pitchList, piSamplingRate, toneFrequency, minEventDuration) # Output result as textgrid duration = audio_scripts.getSoundFileDuration(join(inputPath, fn)) tg = tgio.Textgrid() for key in ['beep', 'speech', 'silence']: entryList = timeDict[key] tier = tgio.IntervalTier(key, entryList, 0, duration) tg.addTier(tier) tg.save(join(tgPath, name + ".TextGrid")) # Output audio portions between tones if generateWavs: split_on_tone.extractSubwavs(timeDict, inputPath, fn, subwavPath)
def generateSingleIPUTextgrids(wavPath, txtPath, outputPath, nameMod=None, addPause=True): ''' Generates a textgrid with a single IPU for each wave file This constitutes the first step of SPPAS, chunking a recording into utterances. In the cases when there is only a single utterance, SPPAS sometimes makes errors (or is not configured properly by the user). This script is strictly for those situations. If there are multiple audio files for each transcript, you can derive the transcript name using /nameMod/ If there is a chance of even a slight segment of silence on the edges of the audio file, /addPause/ should be True. ''' if nameMod is None: nameMod = lambda x: x if not os.path.exists(outputPath): os.mkdir(outputPath) wavList = utils.findFiles(wavPath, filterExt=".wav", stripExt=True) for wavName in wavList: transcriptName = nameMod(wavName) # Add initial and final small pauses to each transcript with io.open(join(txtPath, transcriptName + ".txt"), "r") as fd: txt = fd.read() if addPause is True: txt = "+ %s +" % txt.lower() wavFN = join(wavPath, wavName + ".wav") dur = praatio_scripts.audioio.WavQueryObj(wavFN).getDuration() tg = tgio.Textgrid() tier = tgio.IntervalTier("ipu", [ (0, dur, txt), ], 0, dur) tg.addTier(tier) tg.save(join(outputPath, wavName + ".TextGrid"))
def wavFileToGrid(wavFile,outputFile): snd = parselmouth.Sound(wavFile) pitch = snd.to_pitch() print("Get entryList for TextGrid From {file} by pitch".format(file=wavFile)) entryList=pitchToEntryList(pitch) print("Save TextGrid to {output} ".format(output=outputFile)) tierName="Pitch" if os.path.isfile(outputFile): tg = tgio.openTextgrid(outputFile) if tierName in tg.tierDict: tierName=tierName+datetime.now().strftime("%m%d%Y%H%M%S") else: tg = tgio.Textgrid() wordTier = tgio.IntervalTier(tierName, entryList, 0, pairedWav=wavFile) tg.addTier(wordTier) tg.save(outputFile)
def make_textgrid(df, out_name, orig_name=None, word2phone=None): if orig_name: tg = tgio.openTextgrid(orig_name) else: tg = tgio.Textgrid() phones_list = [] syllables_list = [] curr_syllable = [] for tup in df[['start', 'end', 'phone']].itertuples(): phones_list.append((tup.start, tup.end, tup.phone)) if tup.phone in set(['spn', 'sil']): # pass syllables_list.append((tup.start, tup.end, tup.phone)) curr_syllable = [] elif len(tup.phone) > 2 and tup.phone[-2] == '_': # final curr_syllable.append(tup.phone) syllables_list.append( (initial_start, tup.end, ' '.join(curr_syllable))) curr_syllable = [] else: # initial curr_syllable.append(tup.phone) initial_start = tup.start phone_tier = tgio.IntervalTier('phone', phones_list) syllable_tier = tgio.IntervalTier('syllable\_phones', syllables_list) if orig_name and word2phone: ipus, xmins, xmaxs = get_ipus(tg) word_list, unmatched_words, break_list = make_word_list( syllable_tier, ipus, word2phone, out_name, xmaxs) word_tier = tgio.IntervalTier('word', word_list) tg.addTier(word_tier) tg.addTier(phone_tier) tg.addTier(syllable_tier) if not tg.tierDict['breaks'].entryList: tg.removeTier('breaks') break_tier = tgio.PointTier('break', break_list) tg.addTier(break_tier) else: print(out_name, 'has break tier, did not write new one') os.makedirs(os.path.dirname(out_name), exist_ok=True) tg.save(out_name, useShortForm=False) print('wrote to {}, # matched: {}, # unmatched: {}'.format( out_name, len(word_list), len(unmatched_words))) return len(word_list), len(unmatched_words)
def textgridMorphDuration(fromTGFN, toTGFN): ''' A convenience function. Morphs interval durations of one tg to another. This assumes the two textgrids have the same number of segments. ''' fromTG = tgio.openTextgrid(fromTGFN) toTG = tgio.openTextgrid(toTGFN) adjustedTG = tgio.Textgrid() for tierName in fromTG.tierNameList: fromTier = fromTG.tierDict[tierName] toTier = toTG.tierDict[tierName] adjustedTier = fromTier.morph(toTier) adjustedTG.addTier(adjustedTier) return adjustedTG
def write_normalized_transcript(transcript: str, audio: NormalizedAudio, output_path: str): assert ' ' not in output_path, \ 'Please remove spaces from output path for {}'.format(output_path) assert '\n' not in transcript and '\t' not in transcript, \ 'Please remove the newlines and tabs in transcript for [{}]'.format( transcript) if datapipes.__verbose__: print('writing normalized transcript to {}'.format(output_path)) duration = audio.duration textgrid = tgio.Textgrid() utterance = tgio.IntervalTier('utt', [], 0, duration) utterance.insertEntry( tgio.Interval(start=0, end=duration, label=transcript)) textgrid.addTier(utterance) textgrid.save(output_path, useShortForm=False)
def textgridManipulateDuration(tgFN, ratioList): tg = tgio.openTextgrid(tgFN) adjustedTG = tgio.Textgrid() for tierName in tg.tierNameList: fromTier = tg.tierDict[tierName] adjustedTier = None if isinstance(fromTier, tgio.IntervalTier): adjustedTier = _morphIntervalTier(fromTier, ratioList) elif isinstance(fromTier, tgio.PointTier): adjustedTier = _morphPointTier(fromTier, ratioList) assert (adjustedTier is not None) adjustedTG.addTier(adjustedTier) return adjustedTG
def lab2praat(file_name, praat_align_file,state_number=5): """ convert state alignment lab file to praat alignment file file_name should be state alignment file end with *.lab praat_align_file should be praat Textgrid file for visualization and state number are state number of one senone """ fid = open(file_name) utt_labels = fid.readlines() fid.close() current_index = 0 label_number = len(utt_labels) duration_phone_list=[] for line in utt_labels: line = line.strip() if len(line) < 1: continue temp_list = re.split('\s+', line) start_time = int(temp_list[0]) end_time = int(temp_list[1]) frame_number = int((end_time - start_time) / 50000) # all frame number of this phone full_label = temp_list[2] full_label_length = len(full_label) - 3 # remove state information [k] state_index = full_label[full_label_length + 1] state_index = int(state_index) - 1 full_label = full_label[0:full_label_length] match = re.match(r"^.*?\-(.*?)\+.*?$",full_label,re.M|re.I) phone_identity = match.group(1) if state_index == 1: phone_duration = frame_number for i in range(state_number - 1): line = utt_labels[current_index + i + 1].strip() temp_list = re.split('\s+', line) phone_duration += (int(temp_list[1]) - int(temp_list[0])) / 50000 start_time = start_time/10000000.0 end_time = start_time+phone_duration*0.005 duration_phone_list.append((str(start_time),str(end_time),phone_identity)) current_index+=1 setTG = tgio.Textgrid() # pdb.set_trace() phoneTier = tgio.IntervalTier('phone', duration_phone_list) setTG.addTier(phoneTier) setTG.save(praat_align_file)
def do_all(ELAN_name, individual_notes, note_phrases, frequencies, relative_notes, relative_phrases): #creates tiers of import individual_notes_tier = tgio.IntervalTier('Individual Notes', individual_notes) note_phrases_tier = tgio.IntervalTier('Note Phrases', note_phrases) frequencies_tier = tgio.IntervalTier('Frequencies', frequencies) relative_notes_tier = tgio.IntervalTier('Relative Notes', relative_notes) relative_phrases_tier = tgio.IntervalTier('Relative Phrases', relative_phrases) #nothing is left blank nothing = [[0, 1, '']] #creates tiers karim_tier = tgio.IntervalTier('Karim', nothing) ktrans_tier = tgio.IntervalTier('Karim Translation', nothing) ant_tier = tgio.IntervalTier('Anthony', nothing) atrans_tier = tgio.IntervalTier('Anthony Translation', nothing) emile_tier = tgio.IntervalTier('Emile', nothing) etrans_tier = tgio.IntervalTier('Emile Translation', nothing) #creates the textgrid and adds in the filled beat tier, and blank tiers for further annotation file_textgrid = tgio.Textgrid() file_textgrid.addTier(individual_notes_tier) file_textgrid.addTier(note_phrases_tier) file_textgrid.addTier(frequencies_tier) file_textgrid.addTier(relative_notes_tier) file_textgrid.addTier(relative_phrases_tier) # file_textgrid.addTier(beat_tier file_textgrid.addTier(karim_tier) file_textgrid.addTier(ktrans_tier) file_textgrid.addTier(ant_tier) file_textgrid.addTier(atrans_tier) file_textgrid.addTier(emile_tier) file_textgrid.addTier(etrans_tier) file_textgrid.save('textgrid_data/' + ELAN_name + '_combined.TextGrid')
def merge_adjacent(path, fn, outputPath): ''' Goes through every tier of a textgrid; combines adjacent filled intervals ''' assert(path != outputPath) if not os.path.exists(outputPath): os.mkdir(outputPath) outputTG = tgio.Textgrid() tg = tgio.openTextgrid(join(path, fn)) for tierName in tg.tierNameList: tier = tg.tierDict[tierName] newEntryList = [] currentEntry = list(tier.entryList[0]) for nextEntry in tier.entryList[1:]: # Is a boundary shared? if currentEntry[1] == nextEntry[0]: currentEntry[1] = nextEntry[1] # Old end = new end currentEntry[2] += " - " + nextEntry[2] # If not else: newEntryList.append(currentEntry) currentEntry = list(nextEntry) newEntryList.append(currentEntry) replacementTier = tgio.IntervalTier(tierName, newEntryList, tier.minTimestamp, tier.maxTimestamp) outputTG.addTier(replacementTier) outputTG.save(join(outputPath, fn))
def mlf2praat(mlf, praat_align_file): "transform cuprosody mlf file to Textgrid file" """ mlf format: 0 4400000 sil -2160.365723 sil 4400000 5200000 I_g -504.555634 gaa 5200000 6600000 F_aa -960.479187 6600000 7300000 I_j -543.072876 jau 7300000 8500000 F_au -856.253418 8500000 8900000 I_d -320.236786 daai 8900000 10400000 F_aai -789.435547 10400000 11300000 I_s -523.623901 si """ fid = open(mlf, 'r') lines = fid.readlines() duration_phone_list = [] for line in lines: tmp_split = re.split('\s+', line.strip()) if len(tmp_split) == 5 and tmp_split[2] == tmp_split[4]: start_time = int(tmp_split[0]) end_time = int(tmp_split[1]) phone_identity = tmp_split[4] start_time = start_time / 10000000.0 end_time = end_time / 10000000.0 duration_phone_list.append((str(start_time), str(end_time), phone_identity)) elif len(tmp_split) == 4: end_time = int(tmp_split[1]) start_time = start_time / 10000000.0 end_time = end_time / 10000000.0 duration_phone_list.append((str(start_time), str(end_time), phone_identity)) else: start_time = int(tmp_split[0]) phone_identity = tmp_split[4] setTG = tgio.Textgrid() pdb.set_trace() phoneTier = tgio.IntervalTier('syllable', duration_phone_list) setTG.addTier(phoneTier) setTG.save(praat_align_file)
def lab2tg(input_filename, output_filename, wav_duration, tiername=None): lab = [] with open(input_filename, 'r') as fid: for line in fid.readlines(): start, end, label = line.rstrip().split() start = float(start)/10000000. end = float(end)/10000000. lab.append((start, end, label)) if len(lab) <= 0: print('Unable to convert empty lab for {0}'.format(input_filename)) return if not tiername: tiername = 'tier_1' tg = tgio.Textgrid() tier = tgio.IntervalTier(tiername, lab, 0, wav_duration) tg.addTier(tier) tg.save(output_filename)