def _addSyllableNucleiToTextgrids(wavPath, tgPath, tierName, syllableNucleiPath, outputPath): # Add syllable nuclei to textgrids for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True): tg = tgio.openTextgrid(join(tgPath, name + ".TextGrid")) entryList = tg.tierDict[tierName].entryList startTimeList = [entry[0] for entry in entryList] nucleusSyllableList = uwe_sr.toAbsoluteTime(name, syllableNucleiPath, startTimeList) flattenedSyllableList = [nuclei for sublist in nucleusSyllableList for nuclei in sublist] wavFN = join(wavPath, name + ".wav") duration = audio_scripts.getSoundFileDuration(wavFN) oom = my_math.orderOfMagnitude(len(flattenedSyllableList)) labelTemplate = "%%0%dd" % (oom + 1) entryList = [(timestamp, labelTemplate % i) for i, timestamp in enumerate(flattenedSyllableList)] print(flattenedSyllableList) tier = tgio.PointTier("Syllable Nuclei", entryList, 0, duration) tgFN = join(tgPath, name + ".TextGrid") tg = tgio.openTextgrid(tgFN) tg.addTier(tier) tg.save(join(outputPath, name + ".TextGrid"))
def test_saving_and_loading_json(self): '''Tests that json files are saved non-destructively''' fn = "mary.TextGrid" shortFN = join(self.dataRoot, fn) outputFN = join(self.outputRoot, "saved_textgrid_as_json.json") outputLastFN = join(self.outputRoot, "saved_textgrid_as_json_then_textgrid.TextGrid") tgFromTgFile = tgio.openTextgrid(shortFN) tgFromTgFile.save(outputFN, outputFormat=tgio.JSON) tgFromJsonFile = tgio.openTextgrid(outputFN, readAsJson=True) tgFromJsonFile.save(outputLastFN) self.assertTrue(areTheSame(shortFN, outputLastFN, readFile))
def extract_data_matrix(fn2): """ Extract onsets of half verses and the corresponding lyric of it from the textgrid file. """ tg = tgio.openTextgrid(fn2) structure = tg.tierDict['structure'].entryList # Get all intervals structure_start = [] structure_end = [] structure_filt = [] for i in range(len(structure)): if re.search('[mi]+[uan]+.[0-9]+.[0-9]+.[0-9]', structure[i][2]): structure_start.append(structure[i][0]) structure_end.append(structure[i][1]) structure_filt.append(structure[i][2]) lyrics = tg.tierDict['lyrics'].entryList # the lyrics tier is extracted lyrics_filt = [] j = 0 for i in range(len( structure_filt)): # Put together all the lyrics of a whole verse while structure_start[i] != lyrics[j][0]: j += 1 lyrics_filt.append(lyrics[j][2]) k = 0 while lyrics[j + k][1] != structure_end[i]: k += 1 lyrics_filt[i] += ' ' + lyrics[j + k][2] data_matrix = [] cont = 0 i = 0 while ( i < len(structure_filt) ): # If a section of a verse is repeated, the end is taken as the end of the last repeated section j = 0 # results saved in=> data_matrix while (structure_filt[i + j] == structure_filt[i]): j += 1 if i + j == len(structure_filt): break j = j - 1 to_add = [ structure_filt[i], lyrics_filt[i], structure_start[i], structure_end[i + j] ] data_matrix.append(to_add) cont += 1 i += 1 + j # Particular case if a section is repeated after one previous that is different. It will be named as mu.1.1.1(1) for i in range(len(data_matrix)): j = 1 while j < (len(data_matrix) - i): if (data_matrix[i][0] == data_matrix[i + j][0]): data_matrix[i + j][0] = data_matrix[i + j][0] + '(' + str(1) + ')' j += 1 return data_matrix
def check_phone_consistent(self, ): """ make sure phone same in """ with open("../exp/casia/train.txt") as fid: textlines = fid.readlines() for text_line in textlines: wav_path, sent_content = re.split("\|", text_line.strip()) sent_index = re.split("\.", os.path.basename(wav_path))[0] spk = re.split('\-', sent_index)[0] if spk != "liuchanhg": continue # corresponding label file # extract word and pos tagging if "483" in sent_index: continue word_list, pos_list = self.get_word_pos_list( sent_content.strip(), "jieba") phone_list, tone_list, syl_map, word_map, _ = self.get_word_phone_list( word_list, True) phone_list_2 = [] # extract alignment file alignment_file_path = os.path.join(self.alignment_file_dir, sent_index + ".TextGrid") tg = tgio.openTextgrid(alignment_file_path) wordTier = tg.tierDict['phones'] for start, stop, label in wordTier.entryList: phone = label if phone == "sp" or phone == "sil": continue phone_list_2.append(phone) assert phone_list_2 == phone_list
def extractTranscriptContents(fn, outputFn, tiersToNotInclude=None): assert (fn != outputFn) tg = tgio.openTextgrid(fn) tierNameList = tg.tierNameList if tiersToNotInclude != None: tierNameList = list( filter(lambda tierName: tierName not in tiersToNotInclude, tierNameList)) outputList = [] for tierName in tierNameList: tier = tg.tierDict[tierName] for entry in tier.entryList: outputList.append([ fn, tierName, ] + [val for val in entry]) outputList = sorted(outputList, key=lambda row: row[2]) # Sort by the third column outputList = [row[-1] for row in outputList] outputTxt = "\n".join(outputList) with io.open(outputFn, mode="w", encoding="utf-8") as fd: fd.write(outputTxt)
def alignBoundariesAcrossTiers(tgFN, maxDifference=0.01): ''' Aligns boundaries or points in a textgrid that suffer from 'jitter' Often times, boundaries in different tiers are meant to line up. For example the boundary of the first phone in a word and the start of the word. If manually annotated, however, those values might not be the same, even if they were intended to be the same. This script will force all boundaries within /maxDifference/ amount to be the same value. The replacement value is either the majority value found within /maxDifference/ or, if no majority exists, than the value used in the search query. ''' tg = tgio.openTextgrid(tgFN) for tierName in tg.tierNameList: altNameList = [tmpName for tmpName in tg.tierNameList if tmpName != tierName] tier = tg.tierDict[tierName] for entry in tier.entryList: # Interval tier left boundary or point tier point _findMisalignments(tg, entry[0], maxDifference, altNameList, tierName, entry, 0) # Interval tier right boundary if tier.tierType == tgio.INTERVAL_TIER: for entry in tier.entryList: _findMisalignments(tg, entry[1], maxDifference, altNameList, tierName, entry, 1) return tg
def syllabifyTextgrids(tgPath, islePath): isleDict = isletool.LexicalTool(islePath) outputPath = join(tgPath, "syllabifiedTGs") utils.makeDir(outputPath) skipLabelList = ["<VOCNOISE>", "xx", "<SIL>", "{B_TRANS}", '{E_TRANS}'] for fn in utils.findFiles(tgPath, filterExt=".TextGrid"): if os.path.exists(join(outputPath, fn)): continue tg = tgio.openTextgrid(join(tgPath, fn)) syllableTG = praattools.syllabifyTextgrid(isleDict, tg, "words", "phones", skipLabelList=skipLabelList) outputTG = tgio.Textgrid() outputTG.addTier(tg.tierDict["words"]) outputTG.addTier(tg.tierDict["phones"]) # outputTG.addTier(syllableTG.tierDict["syllable"]) outputTG.addTier(syllableTG.tierDict["tonic"]) outputTG.save(join(outputPath, fn))
def get_all_sfs(path): ''' Batch processing Generate .sfs files from textgrids ./PhoneLabeling/007017.interval has the wrong tier name ''' path_to_textgrid = os.path.join(path, "PhoneLabeling") tg_files = os.listdir(path_to_textgrid) path_to_sfs = os.path.join(path, "sfs") if not os.path.isdir(path_to_sfs): os.mkdir(path_to_sfs) files = os.listdir(path_to_sfs) print("Generating sfs alignment files!") for tg_file in tqdm(tg_files): tg = tgio.openTextgrid(os.path.join(path_to_textgrid, tg_file)) entries = get_entries(path_to_textgrid, tg) generate_individual_sfs(path_to_sfs, tg_file, entries) print("\n" + "Done!" + "\n")
def get_phrases(tg, WAVLIST, wav_stems): phrase_df = pd.DataFrame(columns=[ "indv", "rendition", "datetime", "wavloc", "tgloc", "phrase_num", "phrase_start", "phrase_end", "phrase_label", ]) indv, rendition, year, month, day, hour, minute = tg.stem.split("_") dt = "-".join([year, month, day, hour, minute]) dt = datetime.strptime(dt, "%Y-%m-%d-%H-%M") wf = WAVLIST[wav_stems == tg.stem][0] textgrid = tgio.openTextgrid(fnFullPath=tg) tier = textgrid.tierDict["syllables"].entryList for inti, interval in enumerate(tier): phrase_df.loc[len(phrase_df)] = [ indv, rendition, dt, wf, tg, inti, interval.start, interval.end, interval.label, ] return phrase_df
def process_textgrid(input_directory: str) -> List[Dict[str, Union[str, int]]]: """ Traverses through the textgrid files in the given directory and extracts transcription information in each tier and creates a list of dictionaries, each containing data in the following format: {'audio_file_name': <file_name>, 'transcript': <transcription_label>, 'start_ms': <start_time_in_milliseconds>, 'stop_ms': <stop_time_in_milliseconds>} :param input_directory: directory path containing input files from where the method :return: list of interval data in dictionary form """ intervals: List[Dict[str, Union[str, int]]] = [] for root, directories, files in os.walk(input_directory): for filename in files: basename, extension = os.path.splitext(filename) if filename.endswith(".TextGrid"): text_grid: tgio.Textgrid = tgio.openTextgrid(os.path.join(root, filename)) speech_tier: tgio.IntervalTier = text_grid.tierDict["Speech"] for start, stop, label in speech_tier.entryList: label_word: str = label.replace('"', '') intervals.append({ "audio_file_name": os.path.join(".", basename + ".wav"), "transcript": label_word, "start_ms": seconds_to_milliseconds(float(start)), "stop_ms": seconds_to_milliseconds(float(stop)) }) return intervals
def main(): original_folder = "flusense_data" files = os.listdir(original_folder) duration_mp = {} for file in files: if not file.endswith('.TextGrid'): continue full_path = os.path.join(original_folder, file) tg = tgio.openTextgrid(full_path) t_name = tg.tierNameList[0] entryList = tg.tierDict[t_name].entryList for entry in entryList: lab = entry.label if lab not in duration_mp: duration_mp[lab] = 0.0 duration_mp[lab] += (entry.end - entry.start) print_stats(duration_mp)
def textgrid2rttm(textgrid): ''' Take in input the path to a text grid, and output a dictionary of lists *{spkr: [ (onset, duration) ]}* that can easily be written in rttm format. ''' # init output rttm_out = dict() # open textgrid tg = tgio.openTextgrid(textgrid) # loop over all speakers in this text grid for spkr in tg.tierNameList: spkr_timestamps = [] # loop over all annotations for this speaker for interval in tg.tierDict[spkr].entryList: bg = interval[0] # onset ed = interval[1] # offset # label = interval[2] # label spkr_timestamps.append((bg, ed - bg)) # add list of onsets, durations for each speakers rttm_out[spkr] = spkr_timestamps return rttm_out
def get_frame_labels( # kaldi_obj, wavfile, annofile, start=0, end=None, frame_size=1024, frame_shift=256, n_speakers=None): """ Get frame-aligned labels of given recording Args: kaldi_obj (KaldiData) rec (str): recording id start (int): start frame index end (int): end frame index None means the last frame of recording frame_size (int): number of frames in a frame frame_shift (int): number of shift samples n_speakers (int): number of speakers if None, the value is given from data Returns: T: label (n_frames, n_speakers)-shaped np.int32 array """ spkr2idx = {'CHN': 0, 'FAN': 1, 'MAN': 2} # CXN? tg = tgio.openTextgrid(annofile) rate, data = read(wavfile) data = np.array(data[:10 * 60 * rate], dtype=np.float32) # cut files to 10 minutes es = end * frame_shift if end is not None else None data = data[start * frame_shift:es] # data = data - data.mean() # unit normalization # data /= data.std() n_frames = _count_frames(len(data), frame_size, frame_shift) T = np.zeros((n_frames, n_speakers), dtype=np.int32) if end is None: end = n_frames for spkr, speaker_index in spkr2idx.items(): # for each tier try: entries = tg.tierDict[spkr].entryList except: # print('no entry', spkr) continue for entry in entries: # create label st = entry.start et = min(entry.end, len(data)) start_frame = np.rint(st * rate / frame_shift).astype(int) end_frame = np.rint(et * rate / frame_shift).astype(int) rel_start = rel_end = None if start <= start_frame and start_frame < end: rel_start = start_frame - start if start < end_frame and end_frame <= end: rel_end = end_frame - start if rel_start is not None or rel_end is not None: T[rel_start:rel_end, speaker_index] = 1 else: print('get_frame_labels error: rel_start or rel_end is None') return T
def textgridMorphDuration(fromTGFN, toTGFN): ''' A convenience function. Morphs interval durations of one tg to another. This assumes the two textgrids have the same number of segments. ''' fromTG = tgio.openTextgrid(fromTGFN) toTG = tgio.openTextgrid(toTGFN) adjustedTG = tgio.Textgrid() for tierName in fromTG.tierNameList: fromTier = fromTG.tierDict[tierName] toTier = toTG.tierDict[tierName] adjustedTier = fromTier.morph(toTier) adjustedTG.addTier(adjustedTier) return adjustedTG
def test_shift(self): '''Testing adjustments to textgrid times''' tgFN = join(self.dataRoot, "mary.TextGrid") tg = tgio.openTextgrid(tgFN) shiftedTG = tg.editTimestamps(0.1, True) unshiftedTG = shiftedTG.editTimestamps(-0.1, True) self.assertTrue(tg == unshiftedTG)
def getPitchForIntervals(data, tgFN, tierName): ''' Preps data for use in f0Morph ''' tg = tgio.openTextgrid(tgFN) data = tg.tierDict[tierName].getValuesInIntervals(data) data = [dataList for _, dataList in data] return data
def test_insert_delete_space(self): '''Testing insertion and deletion of space in a textgrid''' tgFN = join(self.dataRoot, "mary.TextGrid") tg = tgio.openTextgrid(tgFN) stretchedTG = tg.insertSpace(1, 1, 'stretch') unstretchedTG = stretchedTG.eraseRegion(1, 2, doShrink=True) self.assertTrue(tg == unstretchedTG)
def is_blankStart(filePath): tg = tgio.openTextgrid(filePath) firstTier = tg.tierDict[tg.tierNameList[0]] minTime = int(float(tg.minTimestamp) * 100) minStartLabelTime = int(float(firstTier.entryList[0][0]) * 100) # print(str(maxTime) + ' ' + str(minStartLabelTime)) if minTime == minStartLabelTime: return True return False
def process_text_grid_file(text_grid_path): out_path = text_grid_path.replace('.TextGrid', '.phn') tg = tgio.openTextgrid(text_grid_path) phoneTier = tg.tierDict['phones'] entryList = phoneTier.entryList matrix = [[secs_to_samps(start), secs_to_samps(end), label] for start, end, label in entryList] df = pd.DataFrame(np.array(matrix)) df.to_csv(out_path, header=False, index=False, sep=' ')
def load_file(filename): tg = tgio.openTextgrid(filename) for tierDictName, tierDict in tg.tierDict.items(): new_entry_list = [] for entry in tierDict.entryList: if entry[2] not in ['voc', 's', 'ns']: # check if correct inside tiers names raise Exception("Bad entry " + tierDictName + " : " + str(entry)) new_entry_list.append(Entry(entry[0], entry[1], entry[2])) tierDict.entryList = new_entry_list return tg
def test_saving_short_textgrid(self): '''Tests that short textgrid files are saved non-destructively''' fn = "textgrid_to_merge.TextGrid" shortFN = join(self.dataRoot, fn) outputFN = join(self.outputRoot, "saved_short_file.textgrid") tg = tgio.openTextgrid(shortFN) tg.save(outputFN) self.assertTrue(areTheSame(shortFN, outputFN, readFile))
def test_tg_io(self): '''Tests for reading/writing textgrid io''' fn = "textgrid_to_merge.TextGrid" inputFN = join(self.dataRoot, fn) outputFN = join(self.outputRoot, fn) tg = tgio.openTextgrid(inputFN) tg.save(outputFN) self.assertTrue(areTheSame(inputFN, outputFN, tgio.openTextgrid))
def test_saving_long_textgrid(self): '''Tests that long textgrid files are saved non-destructively''' fn = "textgrid_to_merge_longfile.TextGrid" longFN = join(self.dataRoot, fn) outputFN = join(self.outputRoot, "saved_long_file.textgrid") tg = tgio.openTextgrid(longFN) tg.save(outputFN, useShortForm=False) self.assertTrue(areTheSame(longFN, outputFN, readFile))
def extract_from_tg(input_folder, output_folder): # create output directory if not os.path.isdir(output_folder): os.makedirs(output_folder) for fin in os.listdir(input_folder): if fin.startswith('vanuatu'): continue if not fin.endswith('_m1.TextGrid'): continue print 'treating file {}'.format(fin) # output name child, date, time, m1 = fin.split('.')[0].split('_') fout = '_'.join([child, date, str( int(time) + 180), m1]) + ".TextGrid" # read text grid tg = tgio.openTextgrid( os.path.join(input_folder, fin) ) # shift all timestamps from 180 s copy_of_tg = tg.editTimestamps(-180) # edit maxTimestamps for cut at 60 seconds copy_of_tg.maxTimestamp = 60 # write TextGrid #try: copy_of_tg.save( os.path.join(output_folder, fout)) #except: # #ipdb.set_trace() # for key in copy_of_tg.tierNameList: # for intervals in copy_of_tg.tierDict[key].entryList: # # #if intervals[1] > 60: # # ipdb.set_trace() # # #intervals[1] = 60 # print intervals # ipdb.set_trace() # extract minute from wav # remove suffix and " _m1", add .wav _wav_in = fin.split('.')[0][:-3] + '.wav' wav_in = os.path.join(input_folder, _wav_in) child, date, time = _wav_in.split('.')[0].split('_') #_wav_out = fin.split('.')[0][:-3] + '_trimmed.wav' _wav_out = '_'.join([child, date, str(int(time) + 180)]) wav_out = os.path.join(output_folder, _wav_out) + '.wav' # export command via subprocess command = ' '.join(['sox', wav_in, wav_out, 'trim 180 60']) print command process = subprocess.Popen( shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout,stderr=process.communicate()
def markTranscriptForAnnotations(tgFN, tierName, outputTGFN, proportion=1 / 5.0): ''' Prep a noisy silence annotation for an annotation task Voice activity detectors are liable to segment speech into very small chunks (fragments of speech and silence). The point of this code is to segment a recording into larger units that could be used in a speech transcription task. Assumes the speaker is speaking for most of the recording. ''' tg = tgio.openTextgrid(tgFN) duration = tg.maxTimestamp numEntries = int(math.ceil(duration * proportion)) entryList = tg.tierDict[tierName].entryList # Get all silent intervals entryList = [(stop - start, start, stop, label) for start, stop, label in entryList if label == "silent"] # Remove silent intervals at the start or end of the file entryList = [ entry for entry in entryList if entry[1] != 0 and entry[2] != duration ] # Put longest intervals first entryList.sort(reverse=True) # Get the mid point of the longest n intervals and convert them # into intervals to be transcribed entryList = entryList[:numEntries] pointList = [ start + ((stop - start) / 2.0) for _, start, stop, _ in entryList ] pointList.sort() pointList = [ 0.0, ] + pointList + [ duration, ] newEntryList = [] for i in range(len(pointList) - 1): newEntryList.append((pointList[i], pointList[i + 1], "%d" % i)) outputTG = tgio.Textgrid() tier = tgio.IntervalTier("toTranscribe", newEntryList, 0, duration) outputTG.addTier(tier) outputTG.save(outputTGFN)
def test_rename_tier(self): '''Testing renaming of tiers''' tgFN = join(self.dataRoot, "mary.TextGrid") tg = tgio.openTextgrid(tgFN) tg.renameTier("phone", "candy") self.assertTrue("phone" not in tg.tierNameList) self.assertTrue("candy" in tg.tierNameList)
def is_blankEnd(filePath): # print(get_fileName(filePath)) tg = tgio.openTextgrid(filePath) firstTier = tg.tierDict[tg.tierNameList[0]] maxTime = int(float(tg.maxTimestamp) * 100) maxEndLabelTime = int(float(firstTier.entryList[-1][-2]) * 100) # print('End——' + get_fileName(filePath) + ': ' + str(maxTime) + ' ' + str(maxEndLabelTime)) if maxTime == maxEndLabelTime: return True return False
def find_first_word_location(script_file_path, textgrid_file_path): tg = tgio.openTextgrid(textgrid_file_path) first_words_idx = get_first_words_idx(script_file_path) first_words_idx = first_words_idx[:len(first_words_idx) - 1] sent_start_time = [] for idx in first_words_idx: start = tg.tierDict["words"].entryList[idx].start sent_start_time.append(start) return sent_start_time
def deleteVowels(inputTGFN, inputWavFN, outputPath, doShrink, atZeroCrossing=True): utils.makeDir(outputPath) wavFN = os.path.split(inputWavFN)[1] tgFN = os.path.split(inputTGFN)[1] outputWavFN = join(outputPath, wavFN) outputTGFN = join(outputPath, tgFN) if atZeroCrossing is True: zeroCrossingTGPath = join(outputPath, "zero_crossing_tgs") zeroCrossingTGFN = join(zeroCrossingTGPath, tgFN) utils.makeDir(zeroCrossingTGPath) tg = tgio.openTextgrid(inputTGFN) wavObj = audioio.WavQueryObj(inputWavFN) praatio_scripts.tgBoundariesToZeroCrossings(tg, wavObj, zeroCrossingTGFN) else: tg = tgio.openTextgrid(inputTGFN) keepList = tg.tierDict["phone"].entryList keepList = [entry for entry in keepList if not isVowel(entry[2])] deleteList = utils.invertIntervalList(keepList, tg.maxTimestamp) wavObj = audioio.openAudioFile(inputWavFN, keepList=keepList, doShrink=doShrink) wavObj.save(outputWavFN) shrunkTG = copy.deepcopy(tg) for start, stop in sorted(deleteList, reverse=True): shrunkTG = shrunkTG.eraseRegion(start, stop, doShrink=doShrink) shrunkTG.save(outputTGFN)
def load_textgrids(TextGridDir, long2iso, lfn): tgs = {} for filename in long2iso.keys(): textgridpath = os.path.join(TextGridDir, filename+'.TextGrid') if os.path.isfile(textgridpath): tg = tgio.openTextgrid(textgridpath) if 'seg' not in tg.tierDict: raise KeyError('Required key "seg" not found in %s'%(textgridpath)) tgs[filename] = tg if len(tgs)==0: raise FileNotFoundError("None of filenames in %s found in %s"%(lfn,TextGridDir)) return(tgs)
def textgridToCSV(inputPath, outputPath, outputExt='.csv'): utils.makeDir(outputPath) for fn in utils.findFiles(inputPath, filterExt=".TextGrid"): tg = tgio.openTextgrid(join(inputPath, fn)) tier = tg.tierDict["utterances"] outputList = [] for start, stop, label in tier.entryList: outputList.append("%s,%s,%s" % (start, stop, label)) name = os.path.splitext(fn)[0] outputTxt = "\n".join(outputList) outputFN = join(outputPath, "%s%s" % (name, outputExt)) with io.open(outputFN, "w", encoding="utf-8") as fd: fd.write(outputTxt)
def markTranscriptForAnnotations(tgFN, tierName, outputTGFN, proportion=1 / 5.0): ''' Prep a noisy silence annotation for an annotation task Voice activity detectors are liable to segment speech into very small chunks (fragments of speech and silence). The point of this code is to segment a recording into larger units that could be used in a speech transcription task. Assumes the speaker is speaking for most of the recording. ''' tg = tgio.openTextgrid(tgFN) duration = tg.maxTimestamp numEntries = int(math.ceil(duration * proportion)) entryList = tg.tierDict[tierName].entryList # Get all silent intervals entryList = [(stop - start, start, stop, label) for start, stop, label in entryList if label == "silent"] # Remove silent intervals at the start or end of the file entryList = [entry for entry in entryList if entry[1] != 0 and entry[2] != duration] # Put longest intervals first entryList.sort(reverse=True) # Get the mid point of the longest n intervals and convert them # into intervals to be transcribed entryList = entryList[:numEntries] pointList = [start + ((stop - start) / 2.0) for _, start, stop, _ in entryList] pointList.sort() pointList = [0.0, ] + pointList + [duration, ] newEntryList = [] for i in range(len(pointList) - 1): newEntryList.append((pointList[i], pointList[i + 1], "%d" % i)) outputTG = tgio.Textgrid() tier = tgio.IntervalTier("toTranscribe", newEntryList, 0, duration) outputTG.addTier(tier) outputTG.save(outputTGFN)
def merge_adjacent(path, fn, outputPath): ''' Goes through every tier of a textgrid; combines adjacent filled intervals ''' assert(path != outputPath) if not os.path.exists(outputPath): os.mkdir(outputPath) outputTG = tgio.Textgrid() tg = tgio.openTextgrid(join(path, fn)) for tierName in tg.tierNameList: tier = tg.tierDict[tierName] newEntryList = [] currentEntry = list(tier.entryList[0]) for nextEntry in tier.entryList[1:]: # Is a boundary shared? if currentEntry[1] == nextEntry[0]: currentEntry[1] = nextEntry[1] # Old end = new end currentEntry[2] += " - " + nextEntry[2] # If not else: newEntryList.append(currentEntry) currentEntry = list(nextEntry) newEntryList.append(currentEntry) replacementTier = tgio.IntervalTier(tierName, newEntryList, tier.minTimestamp, tier.maxTimestamp) outputTG.addTier(replacementTier) outputTG.save(join(outputPath, fn))
def correctTextgridTimes(tgPath, threshold): # Are x and y unique but very very similar withinThreshold = lambda x, y: (abs(x - y) < threshold) and (x != y) outputPath = join(tgPath, "correctsTGs") utils.makeDir(outputPath) for fn in utils.findFiles(tgPath, filterExt=".TextGrid"): print(fn) tg = tgio.openTextgrid(join(tgPath, fn)) wordTier = tg.tierDict["words"] phoneTier = tg.tierDict["phones"] for wordEntry in wordTier.entryList: for i, phoneEntry in enumerate(phoneTier.entryList): if tgio.intervalOverlapCheck(wordEntry, phoneEntry): start = phoneEntry[0] end = phoneEntry[1] phone = phoneEntry[2] if withinThreshold(wordEntry[0], start): start = wordEntry[0] elif withinThreshold(wordEntry[1], start): start = wordEntry[1] elif withinThreshold(wordEntry[0], end): end = wordEntry[0] elif withinThreshold(wordEntry[1], end): end = wordEntry[1] phoneTier.entryList[i] = (start, end, phone) tg.save(join(outputPath, fn))
def generatePIMeasures(dataList, tgFN, tierName, doPitch, medianFilterWindowSize=None, globalZNormalization=False, localZNormalizationWindowSize=0): ''' Generates processed values for the labeled intervals in a textgrid nullLabelList - labels to ignore in the textgrid. Defaults to ["",] if 'doPitch'=true get pitch measures; if =false get rms intensity medianFilterWindowSize: if none, no filtering is done globalZNormalization: if True, values are normalized with the mean and stdDev of the data in dataList localZNormalization: if greater than 1, values are normalized with the mean and stdDev of the local context (for a window of 5, it would consider the current value, 2 values before and 2 values after) ''' # Warn user that normalizing a second time nullifies the first normalization if globalZNormalization is True and localZNormalizationWindowSize > 0: raise NormalizationException() if globalZNormalization is True: if doPitch: dataList = myMath.znormalizeSpeakerData(dataList, 1, True) else: dataList = myMath.znormalizeSpeakerData(dataList, 2, True) # Raw values should have 0 filtered; normalized values are centered around 0, so don't filter filterZeroFlag = not globalZNormalization tg = tgio.openTextgrid(tgFN) piData = tg.tierDict[tierName].getValuesInIntervals(dataList) outputList = [] for interval, entryList in piData: label = interval[0] if doPitch: tmpValList = [f0Val for _, f0Val, _ in entryList] f0Measures = getPitchMeasures(tmpValList, tgFN, label, medianFilterWindowSize, filterZeroFlag) outputList.append(list(f0Measures)) else: tmpValList = [intensityVal for _, _, intensityVal in entryList] if filterZeroFlag: tmpValList = [intensityVal for intensityVal in tmpValList if intensityVal != 0.0] rmsIntensity = 0 if len(tmpValList) != 0: rmsIntensity = myMath.rms(tmpValList) outputList.append([rmsIntensity, ]) # Locally normalize the output if localZNormalizationWindowSize > 0 and len(outputList) > 0: for colI in range(len(outputList[0])): featValList = [row[colI] for row in outputList] featValList = myMath.znormWindowFilter(featValList, localZNormalizationWindowSize, True, True) assert(len(featValList) == len(outputList)) for i, val in enumerate(featValList): outputList[i][colI] = val return outputList
root = r"C:\Users\Tim\Dropbox\workspace\praatIO\examples\files" audioFN = join(root, "mary.wav") tgFN = join(root, "mary.TextGrid") outputPath = join(root, "splice_example") outputAudioFN = join(outputPath, "barry_spliced.wav") outputTGFN = join(outputPath, "barry_spliced.TextGrid") tierName = "phone" if not os.path.exists(outputPath): os.mkdir(outputPath) # Find the region to replace and the region that we'll replace it with tg = tgio.openTextgrid(tgFN) tier = tg.tierDict[tierName] mEntry = tier.entryList[tier.find('m')[0]] bEntry = tier.entryList[tier.find('b')[0]] sourceAudioObj = audioio.openAudioFile(audioFN) mAudioObj = sourceAudioObj.getSubsegment(mEntry[0], mEntry[1]) bAudioObj = sourceAudioObj.getSubsegment(bEntry[0], bEntry[1]) # Replace 'm' with 'b' audioObj, tg = praatio_scripts.audioSplice(sourceAudioObj, bAudioObj, tg, tierName, "b",
''' Praatio example for extracting points in a PointProcess for the vowels specified in a textgrid ''' import os from os.path import join from praatio import tgio from praatio import dataio path = join(".", "files") outputPath = join(path, "point_process_output") if not os.path.exists(outputPath): os.mkdir(outputPath) tg = tgio.openTextgrid(join(path, "bobby_phones.TextGrid")) pp = dataio.open1DPointObject(join(path, "bobby.PointProcess")) newPoints = [] tier = tg.tierDict["phone"] for start, stop, label in tier.entryList: if label.lower()[0] not in ["a", "e", "i", "o", "u"]: continue newPoints.extend([pp.getPointsInInterval(start, stop), ]) outputPP = dataio.PointObject1D(newPoints, dataio.POINT, pp.minTime, pp.maxTime) outputPP.save(join(outputPath, "bobby_vowels.PointProcess"))
from os.path import join from praatio import tgio from pysle import isletool from pysle import praattools root = join('.', 'files') isleDict = isletool.LexicalTool(join(root, "ISLEdict_sample.txt")) inputFN = join(root, "pumpkins_with_syllables.TextGrid") outputFN = join(root, "pumpkins_with_naive_alignment.TextGrid") utteranceTierName = "utterance" wordTierName = "word" phoneListTierName = "phoneList" phoneTierName = "phone" tg = tgio.openTextgrid(inputFN) for tierName in tg.tierNameList[:]: if tierName == utteranceTierName: continue tg.removeTier(tierName) tg = praattools.naiveWordAlignment(tg, utteranceTierName, wordTierName, isleDict, phoneListTierName) tg = praattools.naivePhoneAlignment(tg, wordTierName, phoneTierName, isleDict) tg.save(outputFN)
Example of using praatio for merging tiers together. ''' import os from os.path import join from praatio import tgio path = join('.', 'files') outputPath = join(path, "merged_textgrids") if not os.path.exists(outputPath): os.mkdir(outputPath) # Let's use praatio to construct some hypothetical textgrids tg = tgio.openTextgrid(join(path, "bobby_words.TextGrid")) wordTier = tg.tierDict["word"] entryList = wordTier.entryList bobbyPhoneTG = tgio.openTextgrid(join(path, "bobby_phones.TextGrid")) bobbyTG = tgio.Textgrid() bobbyTG.addTier(bobbyPhoneTG.tierDict["phone"]) bobbyTG.addTier(tgio.IntervalTier("nouns", [entryList[1], ])) bobbyTG.addTier(tgio.IntervalTier("verbs", [entryList[2], ])) bobbyTG.addTier(tgio.IntervalTier("subjects", entryList[3:5])) # Let's save it, in case you want to see it bobbyTG.save(join(outputPath, "mergeExample_bobby_words_split.TextGrid"))
''' Extracts the duration of each interval in each tier of the specified textgrids Outputs the data in a csv friendly format ''' import os from os.path import join from praatio import tgio path = join(".", "files") for fn in ["bobby_phones.TextGrid", "bobby_words.TextGrid", "mary.TextGrid"]: tg = tgio.openTextgrid(join(path, fn)) name = os.path.splitext(fn)[0] # Get the durations for each tier for tierName in tg.tierNameList: tier = tg.tierDict[tierName] if not isinstance(tier, tgio.IntervalTier): continue for start, stop, label in tier.entryList: txt = u"%s,%s,%s,%0.2f" % (name, tierName, label, float(stop) - float(start)) print(txt.encode('utf-8'))
''' Praatio example of adding two tiers to the same textgrid ''' import os from os.path import join from praatio import tgio path = join('.', 'files') outputPath = join(path, "merged_textgrids") if not os.path.exists(outputPath): os.mkdir(outputPath) tgPhones = tgio.openTextgrid(join(path, "bobby_phones.TextGrid")) tgWords = tgio.openTextgrid(join(path, "bobby_words.TextGrid")) tgPhones.addTier(tgWords.tierDict["word"]) tgPhones.save(join(outputPath, "bobby.TextGrid"))
from praatio import tgio from praatio import audioio path = join(".", "files") outputPath = join(path, "anonymized_data") if not os.path.exists(outputPath): os.mkdir(outputPath) for wavFN, tgFN in (("mary.wav", "mary.TextGrid"), ("bobby.wav", "bobby_words.TextGrid")): outputWavFN = join(outputPath, wavFN) # Find the word(s) to anonymize # (One could imagine a search for common names or identification of # some sort of code ('section-to-anonymize') rather than what I have # done here. deleteList = [] tg = tgio.openTextgrid(join(path, tgFN)) deleteList.append(tg.tierDict['word'].entryList[0]) # Get only time information from entries (i.e. remove label information) deleteList = [(start, stop) for start, stop, _ in deleteList] # Replace segments with a sine wave wavQObj = audioio.WavQueryObj(join(path, wavFN)) wavQObj.deleteWavSections(outputWavFN, deleteList=deleteList, operation="sine wave")