def _addSyllableNucleiToTextgrids(wavPath, tgPath, tierName, syllableNucleiPath, outputPath): # Add syllable nuclei to textgrids for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True): tg = tgio.openTextgrid(join(tgPath, name + ".TextGrid")) entryList = tg.tierDict[tierName].entryList startTimeList = [entry[0] for entry in entryList] nucleusSyllableList = uwe_sr.toAbsoluteTime(name, syllableNucleiPath, startTimeList) flattenedSyllableList = [nuclei for sublist in nucleusSyllableList for nuclei in sublist] wavFN = join(wavPath, name + ".wav") duration = audio_scripts.getSoundFileDuration(wavFN) oom = my_math.orderOfMagnitude(len(flattenedSyllableList)) labelTemplate = "%%0%dd" % (oom + 1) entryList = [(timestamp, labelTemplate % i) for i, timestamp in enumerate(flattenedSyllableList)] print(flattenedSyllableList) tier = tgio.PointTier("Syllable Nuclei", entryList, 0, duration) tgFN = join(tgPath, name + ".TextGrid") tg = tgio.openTextgrid(tgFN) tg.addTier(tier) tg.save(join(outputPath, name + ".TextGrid"))
def detectPitchErrors(pitchList, maxJumpThreshold=0.70, tgToMark=None): ''' Detect pitch halving and doubling errors. If a textgrid is passed in, it adds the markings to the textgrid ''' assert (maxJumpThreshold >= 0.0 and maxJumpThreshold <= 1.0) errorList = [] for i in range(1, len(pitchList)): lastPitch = pitchList[i - 1][1] currentPitch = pitchList[i][1] ceilingCutoff = currentPitch / maxJumpThreshold floorCutoff = currentPitch * maxJumpThreshold if ((lastPitch <= floorCutoff) or (lastPitch >= ceilingCutoff)): currentTime = pitchList[i][0] errorList.append([currentTime, currentPitch / lastPitch]) if tgToMark is not None: tierName = "pitch errors" assert (tierName not in tgToMark.tierNameList) pointTier = tgio.PointTier(tierName, errorList, tgToMark.minTimestamp, tgToMark.maxTimestamp) tgToMark.addTier(pointTier) return errorList, tgToMark
def make_textgrid(df, out_name, orig_name=None, word2phone=None): if orig_name: tg = tgio.openTextgrid(orig_name) else: tg = tgio.Textgrid() phones_list = [] syllables_list = [] curr_syllable = [] for tup in df[['start', 'end', 'phone']].itertuples(): phones_list.append((tup.start, tup.end, tup.phone)) if tup.phone in set(['spn', 'sil']): # pass syllables_list.append((tup.start, tup.end, tup.phone)) curr_syllable = [] elif len(tup.phone) > 2 and tup.phone[-2] == '_': # final curr_syllable.append(tup.phone) syllables_list.append( (initial_start, tup.end, ' '.join(curr_syllable))) curr_syllable = [] else: # initial curr_syllable.append(tup.phone) initial_start = tup.start phone_tier = tgio.IntervalTier('phone', phones_list) syllable_tier = tgio.IntervalTier('syllable\_phones', syllables_list) if orig_name and word2phone: ipus, xmins, xmaxs = get_ipus(tg) word_list, unmatched_words, break_list = make_word_list( syllable_tier, ipus, word2phone, out_name, xmaxs) word_tier = tgio.IntervalTier('word', word_list) tg.addTier(word_tier) tg.addTier(phone_tier) tg.addTier(syllable_tier) if not tg.tierDict['breaks'].entryList: tg.removeTier('breaks') break_tier = tgio.PointTier('break', break_list) tg.addTier(break_tier) else: print(out_name, 'has break tier, did not write new one') os.makedirs(os.path.dirname(out_name), exist_ok=True) tg.save(out_name, useShortForm=False) print('wrote to {}, # matched: {}, # unmatched: {}'.format( out_name, len(word_list), len(unmatched_words))) return len(word_list), len(unmatched_words)