def markupTextgridWithSyllableNuclei(wavPath, tgPath, tierName, wavTmpPath, syllableNucleiPath, matlabEXE, matlabScriptsPath, outputPath, printCmd=True, outputTGFlag=False): utils.makeDir(outputPath) # This can be commented out and instead, you can run the code directly # from matlab, then you can start directly from the next line _runSpeechRateEstimateOnIntervals(wavPath, tgPath, tierName, wavTmpPath, syllableNucleiPath, matlabEXE, matlabScriptsPath, printCmd, outputTGFlag) _addSyllableNucleiToTextgrids(wavPath, tgPath, tierName, syllableNucleiPath, outputPath) _calculateSyllablesPerSecondForIntervals(wavPath, tgPath, tierName, syllableNucleiPath)
def renameTiers(inputPath, outputPath, includeMothersPhones=False): renameList = [(["Mother", "Mother's Speech", "Mother's speech", "mother's speech", "Mother Speech", "mother speech"], "Mother"), (["Mother's Backchannel", "Mother's backchannel", "mother's backchannel", "child's backchannel"], "Mother's Backchannel"), (["Child", "Child's speech", "Child's Speech", "child's speech", "Child Speech", "child speech"], "Child"), (["Room", "Extraneous room noise", "Extraneous Room Noise", "Extraneous Noise", "Room Noise", "room noise", "Room noise", "extraneous room noise"], "Room"), (["Timer", "Time"], "Timer"), (["Epochs", "epochs",], "Epochs"), ] if includeMothersPhones: renameList.insert(1, (["Mother's phones",], "Mother's Phones")) utils.makeDir(outputPath) for fn in utils.findFiles(inputPath, filterExt=".TextGrid"): print(fn) tg = tgio.openTextGrid(join(inputPath, fn)) for oldNameList, newName in renameList: try: tg = replaceTierName(tg, oldNameList, newName) except ValueError: print fn raise tg.save(join(outputPath, fn))
def syllabifyTextgrids(tgPath, islePath): isleDict = isletool.LexicalTool(islePath) outputPath = join(tgPath, "syllabifiedTGs") utils.makeDir(outputPath) skipLabelList = ["<VOCNOISE>", "xx", "<SIL>", "{B_TRANS}", '{E_TRANS}'] for fn in utils.findFiles(tgPath, filterExt=".TextGrid"): if os.path.exists(join(outputPath, fn)): continue tg = tgio.openTextgrid(join(tgPath, fn)) syllableTG = praattools.syllabifyTextgrid(isleDict, tg, "words", "phones", skipLabelList=skipLabelList) outputTG = tgio.Textgrid() outputTG.addTier(tg.tierDict["words"]) outputTG.addTier(tg.tierDict["phones"]) # outputTG.addTier(syllableTG.tierDict["syllable"]) outputTG.addTier(syllableTG.tierDict["tonic"]) outputTG.save(join(outputPath, fn))
def filterTextgrids(tgPath, speechTierName, laughterTierName, minDuration, outputPath): ''' Removes invalid entries from the mother's speech tier - removes pauses (FP, SP) - removes speech (MS) that occurs with insitu laughter (LA) - removes ultrashort utterances (uwe's script crashed on an utterance of length 0.013 seconds) ''' utils.makeDir(outputPath) for fn in utils.findFiles(tgPath, filterExt=".TextGrid"): tg = tgio.openTextGrid(join(tgPath, fn)) # Removes all non-speech events (MS) newTierEntryList = [] speechTier = tg.tierDict[speechTierName] for entry in speechTier.entryList: start, stop, label = entry print(entry) if insituLaughterCheck(start, stop, tg, laughterTierName): newTierEntryList.append(entry) # Removes all speech events shorter than some threshold newTierEntryList = [(start, stop, label) for start, stop, label in newTierEntryList if float(stop) - float(start) > minDuration] tg.replaceTier(speechTierName, newTierEntryList) tg.save(join(outputPath, fn))
def markupTextgridWithSyllableNuclei(wavPath, tgPath, tierName, wavTmpPath, syllableNucleiPath, matlabEXE, matlabScriptsPath, outputPath, printCmd=True, outputTGFlag=False): utils.makeDir(outputPath) # This can be commented out and instead, you can run the code directly # from matlab, then you can start directly from the next line print("RUN_SPEECH_RATE_ESTIMATE_ON_INTERVALS") #TODO DEBUG _runSpeechRateEstimateOnIntervals(wavPath, tgPath, tierName, wavTmpPath, syllableNucleiPath, matlabEXE, matlabScriptsPath, printCmd, outputTGFlag) print("ADD_SYLLABLE_NUCLEI_TO_TEXTGRIDS") #TODO DEBUG _addSyllableNucleiToTextgrids(wavPath, tgPath, tierName, syllableNucleiPath, outputPath) print("CALCULATE_SYLLABLES_PER_SECOND_FOR_INTERVALS") #TODO DEBUG _calculateSyllablesPerSecondForIntervals(wavPath, tgPath, tierName, syllableNucleiPath)
def extractTGInfo(inputPath, outputPath, tierName, searchForMothersSpeech): ''' Same as textgrids.extractTGInfo? ''' utils.makeDir(outputPath) minDuration = 0.15 # Time in seconds for name in utils.findFiles(inputPath, filterExt=".TextGrid", stripExt=True): print name tg = tgio.openTextGrid(join(inputPath, name+".TextGrid")) tier = tg.tierDict[tierName] entryList = tier.getEntries() if searchForMothersSpeech: entryList = [(start, stop, label) for start, stop, label in entryList if label == "MS"] outputList = [] for start, stop, label in entryList: outputList.append( "%f,%f,%s" % (start, stop, label) ) outputTxt = "\n".join(outputList) + "\n" codecs.open(join(outputPath, name + ".txt"), "w", encoding="utf-8").write(outputTxt)
def extractPraatPitchForEpochs(pitchPath, epochPath, tgInfoPath, outputPath): utils.makeDir(outputPath) for fn in utils.findFiles(pitchPath, filterExt=".txt"): name = os.path.splitext(fn)[0] print name epochList = utils.openCSV(epochPath, fn) epochList = [(epochNum, float(start), float(stop)) for epochNum, start, stop in epochList] entryList = utils.openCSV(tgInfoPath, fn) entryList = [(float(start), float(stop), label) for start, stop, label in entryList] dataList = praat_pi.loadPitchAndTime(pitchPath, fn) # Get F0 values for the intervals when the mother was speaking speechDataList = [] for start, stop, label in entryList: speechDataList.extend(praat_pi.getAllValuesInTime(start, stop, dataList)) # Get F0 values for the times the mother is speaking for each epoch pitchData = [] for epochNum, start, stop in epochList: start, stop = float(start), float(stop) duration = stop - start epochValueList = praat_pi.getAllValuesInTime(start, stop, speechDataList) f0List = [f0Val for time, f0Val, intVal in epochValueList] pitchData.append(praat_pi.extractPitchMeasuresForSegment(f0List, name, epochNum, medianFilterWindowSize=None, filterZeroFlag=True)) open(join(outputPath, "%s.txt" % name), "w").write("\n".join(pitchData) + "\n")
def aggregateSpeechRate(tgInfoPath, speechRatePath, outputPath, samplingRate): utils.makeDir(outputPath) finishedList = utils.findFiles(outputPath, filterExt=".txt") for fn in utils.findFiles(tgInfoPath, filterExt=".txt", skipIfNameInList=finishedList): # Load subset speech rate name = os.path.splitext(fn)[0] speechRateFNList = utils.findFiles(speechRatePath, filterExt=".txt", filterPattern=name) subSplitList = utils.openCSV(tgInfoPath, fn) # Convert the sample numbers to seconds # They are in terms of the beginning of the subset they are in but # need to be in terms of the start of the file the larger file the # subset originated from outputList = [] for splitInfo, speechRateFN in utils.safeZip([subSplitList, speechRateFNList], enforceLength=True): start, stop, label = splitInfo speechRateList = utils.openCSV(speechRatePath, speechRateFN, valueIndex=0) speechRateList = [value for value in speechRateList if value != ''] speechRateList = [str(float(start) + float(sampleNum) / float(samplingRate)) for sampleNum in speechRateList] outputList.append( ",".join(speechRateList) ) open(join(outputPath, fn), "w").write("\n".join(outputList) + "\n")
def manualPhoneCount(tgInfoPath, isleFN, outputPath, skipList=None): if skipList is None: skipList = [] utils.makeDir(outputPath) isleDict = isletool.LexicalTool(isleFN) existFNList = utils.findFiles(outputPath, filterPaths=".txt") for fn in utils.findFiles(tgInfoPath, filterExt=".txt", skipIfNameInList=existFNList): if os.path.exists(join(outputPath, fn)): continue print(fn) dataList = utils.openCSV(tgInfoPath, fn) dataList = [row[2] for row in dataList] # start, stop, tmpLabel outputList = [] for tmpLabel in dataList: if tmpLabel not in skipList: syllableCount, phoneCount = isletool.getNumPhones(isleDict, tmpLabel, maxFlag=True) else: syllableCount, phoneCount = 0, 0 outputList.append("%d,%d" % (syllableCount, phoneCount)) outputTxt = "\n".join(outputList) with open(join(outputPath, fn), "w") as fd: fd.write(outputTxt)
def extractPraatPitch(intensityAndPitchPath, textgridPath, tierName, outputPath, nullLabel=""): utils.makeDir(outputPath) for fn in utils.findFiles(intensityAndPitchPath, filterExt=".txt"): dataList = loadPitchAndTime(intensityAndPitchPath, fn) name = os.path.splitext(fn)[0] tgFN = join(textgridPath, name + ".TextGrid") if not os.path.exists(tgFN): continue tg = praatio.openTextGrid(tgFN) tier = tg.tierDict[tierName] pitchData = [] for valueList, label, _, _ in getValuesForIntervals( dataList, tier.entryList): f0Values = [f0Val for _, f0Val, _ in valueList] label = label.strip() if label == "" or label == nullLabel: continue pitchData.append( getPitchMeasures(f0Values, name, label, True, True)) open(join(outputPath, "%s.txt" % name), "w").write("\n".join(pitchData))
def removeFilledPauses(inputPath, outputPath): utils.makeDir(outputPath) for fn in utils.findFiles(inputPath, filterExt=".txt"): dataList = utils.openCSV(inputPath, fn) dataList = [[start, stop, label] for start, stop, label in dataList if label == "MS"] dataList = [",".join(row) for row in dataList] open(join(outputPath, fn), "w").write("\n".join(dataList) + "\n")
def adjustEpochNumbers(inputPath, outputPath): utils.makeDir(outputPath) for fn in utils.findFiles(inputPath, filterExt=".txt"): dataList = utils.openCSV(inputPath, fn) dataList = ["%02d,%s,%s" % (int(id)+1,start, stop) for id, start, stop in dataList] open(join(outputPath, fn), "w").write("\n".join(dataList) + "\n")
def resampleAudio(newSampleRate, inputPath): outputPath = join(inputPath, "resampled_wavs") utils.makeDir(outputPath) for fn in utils.findFiles(inputPath, filterExt=".wav"): soxCmd = "%s %s -r %f %s rate -v 96k" % ( "/opt/local/bin/sox", join( inputPath, fn), newSampleRate, join(outputPath, fn)) os.system(soxCmd)
def findSyllableNuclei(inputPath, outputPath, matlabEXE, matlabScriptsPath, printCmd=False): ''' Makes a file listing the syllable nuclei for each file in inputPath ''' utils.makeDir(outputPath) pathList = [matlabScriptsPath, join(matlabScriptsPath, "nucleus_detection_matlab")] cmd = "detect_syllable_nuclei('%s', '%s');" % (inputPath, outputPath) matlab.runMatlabFunction(cmd, matlabEXE, pathList, printCmd)
def replaceAllLabelsInMotherTierWithMS(inputPath, outputPath): utils.makeDir(outputPath) speechTierName = "Mother" for fn in utils.findFiles(inputPath, filterExt=".TextGrid"): tg = tgio.openTextGrid(join(inputPath, fn)) tg.replaceTier(speechTierName, [[start, stop, "MS"] for start, stop, label in tg.tierDict[speechTierName].entryList]) tg.save(join(outputPath, fn))
def findSyllableNuclei(inputPath, outputPath, matlabEXE, matlabScriptsPath, printCmd=False): ''' Makes a file listing the syllable nuclei for each file in inputPath ''' utils.makeDir(outputPath) pathList = [matlabScriptsPath, join(matlabScriptsPath, "nucleus_detection_matlab")] cmd = "detect_syllable_nuclei('%s', '%s');" % (inputPath, outputPath) #matlab.runMatlabFunction(cmd, matlabEXE, pathList, printCmd) mfd.detect_syllable_nuclei(inputPath, outputPath)
def generateEpochRowHeader(epochPath, outputPath, sessionCode): utils.makeDir(outputPath) for fn in utils.findFiles(epochPath, filterExt=".txt"): epochList = utils.openCSV(epochPath, fn) id = fn.split("_")[2] outputList = [",".join([id, sessionCode, epoch, epochStart, epochEnd, str(float(epochEnd) - float(epochStart))]) for epoch, epochStart, epochEnd in epochList] open(join(outputPath, fn), "w").write("\n".join(outputList) + "\n")
def aggregateFeatures(featurePath, featureList, headerStr=None): outputDir = join(featurePath, "aggr") utils.makeDir(outputDir) fnList = [] dataList = [] # Find the files that exist in all features for feature in featureList: fnSubList = utils.findFiles(join(featurePath, feature), filterExt=".txt") fnList.append(fnSubList) actualFNList = [] for featureFN in fnList[0]: if all([featureFN in subList for subList in fnList]): actualFNList.append(featureFN) for featureFN in actualFNList: dataList = [] for feature in featureList: featureDataList = utils.openCSV(join(featurePath, feature), featureFN, encoding="utf-8") dataList.append([",".join(row) for row in featureDataList]) name = os.path.splitext(featureFN)[0] dataList.insert(0, [name for _ in range(len(dataList[0]))]) tDataList = utils.safeZip(dataList, enforceLength=True) outputList = [",".join(row) for row in tDataList] outputTxt = "\n".join(outputList) outputFN = join(outputDir, name + ".csv") with io.open(outputFN, "w", encoding="utf-8") as fd: fd.write(outputTxt) # Cat all files together aggrOutput = [] if headerStr is not None: aggrOutput.append(headerStr) for fn in utils.findFiles(outputDir, filterExt=".csv"): if fn == "all.csv": continue with io.open(join(outputDir, fn), "r", encoding='utf-8') as fd: aggrOutput.append(fd.read()) with io.open(join(outputDir, "all.csv"), "w", encoding='utf-8') as fd: fd.write("\n".join(aggrOutput))
def resampleAudio(soxEXE, newSampleRate, inputPath, fn, outputPath=None): ''' Mac: "/opt/local/bin/sox" Windows: "C:\Program Files (x86)\sox-14-4-2\sox.exe" ''' if outputPath is None: outputPath = join(inputPath, "resampled_wavs") utils.makeDir(outputPath) soxCmd = "%s %s -r %f %s rate -v 96k" % (soxEXE, join( inputPath, fn), newSampleRate, join(outputPath, fn)) os.system(soxCmd)
def _runSpeechRateEstimateOnIntervals(wavPath, tgPath, tierName, wavTmpPath, syllableNucleiPath, matlabEXE, matlabScriptsPath, printCmd=True, outputTGFlag=False): utils.makeDir(wavTmpPath) # Split audio files into subsections based on textgrid intervals for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True): praatio_scripts.splitAudioOnTier(join(wavPath, name + ".wav"), join(tgPath, name + ".TextGrid"), tierName, wavTmpPath, outputTGFlag) uwe_sr.findSyllableNuclei(wavTmpPath, syllableNucleiPath, matlabEXE, matlabScriptsPath, printCmd)
def toWords(featurePath, outputPath): utils.makeDir(outputPath) transcriptPath = join(featurePath, "txt") for fn in utils.findFiles(transcriptPath, filterExt=".txt"): fnFullPath = join(transcriptPath, fn) with io.open(fnFullPath, "r", encoding="utf-8") as fd: data = fd.read() dataList = data.split() with io.open(join(outputPath, fn), "w", encoding="utf-8") as fd: fd.write("\n".join(dataList))
def extractWords(tgPath, tierName, outputPath): utils.makeDir(outputPath) for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True): outputList = [] for entry in _navigateTGs(tgPath, name, tierName): label = entry[2] for word in label.split(): outputList.append("%s" % (word)) outputTxt = "\n".join(outputList) outputFN = join(outputPath, name + ".txt") with io.open(outputFN, "w", encoding="utf-8") as fd: fd.write(outputTxt)
def addEpochsToTextgrids(tgPath, epochPath, outputPath): utils.makeDir(outputPath) for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True): print name tg = tgio.openTextGrid(join(tgPath, name+".TextGrid")) entryList = utils.openCSV(epochPath, name+".txt") entryList = [(float(start), float(end), label) for label, start, end in entryList] tier = tgio.IntervalTier("epochs", entryList, minT=0, maxT=tg.maxTimestamp) tg.addTier(tier) tg.save(join(outputPath, name+".TextGrid"))
def resampleAudio(soxEXE, newSampleRate, inputPath, fn, outputPath=None): ''' Mac: "/opt/local/bin/sox" Windows: "C:\Program Files (x86)\sox-14-4-2\sox.exe" ''' if outputPath is None: outputPath = join(inputPath, "resampled_wavs") utils.makeDir(outputPath) soxCmd = "%s %s -r %f %s rate -v 96k" % (soxEXE, join(inputPath, fn), newSampleRate, join(outputPath, fn)) os.system(soxCmd)
def extractRMSIntensity(intensityAndPitchPath, textgridPath, tierName, outputPath, nullLabel=""): utils.makeDir(outputPath) for fn in utils.findFiles(intensityAndPitchPath, filterExt=".txt"): dataList = loadPitchAndTime(intensityAndPitchPath, fn) name = os.path.splitext(fn)[0] tgFN = join(textgridPath, name + ".TextGrid") if not os.path.exists(tgFN): continue tg = praatio.openTextGrid(join(textgridPath, name + ".TextGrid")) tier = tg.tierDict[tierName] print(fn) rmsIntensityList = [] for valueList, label, _, _ in getValuesForIntervals( dataList, tier.entryList): intensityVals = [intensityVal for _, _, intensityVal in valueList] intensityVals = [ intensityVal for intensityVal in intensityVals if intensityVal != 0.0 ] label = label.strip() if label == "" or label == nullLabel: continue rmsIntensity = 0 if len(intensityVals) != 0: rmsIntensity = my_math.rms(intensityVals) rmsIntensityList.append(str(rmsIntensity)) open(join(outputPath, "%s.txt" % name), "w").write("\n".join(rmsIntensityList))
def findFrequenciesForWordLists(featurePath, countObj, frequencyNormFunc): frequencyPath = join(featurePath, "frequency") utils.makeDir(frequencyPath) wordsPath = join(featurePath, "words") for fn in utils.findFiles(wordsPath): wordList = utils.openCSV(wordsPath, fn, valueIndex=0, encoding="utf-8") countList = [] for word in wordList: tmp = countObj.getFrequency(word, frequencyNormFunc, outOfDictionaryValue=1) count, freq, logFreq = tmp countList.append("%f,%f,%f" % (count, freq, logFreq)) with open(join(frequencyPath, fn), "w") as fd: fd.write("\n".join(countList))
def extractTGInfo(inputPath, outputPath, tierName): utils.makeDir(outputPath) for name in utils.findFiles(inputPath, filterExt=".TextGrid", stripExt=True): if os.path.exists(join(outputPath, name + ".txt")): continue print(name) outputList = [] for start, stop, label in _navigateTGs(inputPath, name, tierName): outputList.append("%f,%f,%s" % (start, stop, label)) outputTxt = "\n".join(outputList) outputFN = join(outputPath, name + ".txt") with io.open(outputFN, "w", encoding="utf-8") as fd: fd.write(outputTxt)
def manualPhoneCountForEpochs(manualCountsPath, tgInfoPath, epochPath, outputPath): utils.makeDir(outputPath) skipList = utils.findFiles(outputPath, filterExt=".txt") for fn in utils.findFiles(tgInfoPath, filterExt=".txt", skipIfNameInList=skipList): epochList = utils.openCSV(epochPath, fn) tgInfo = utils.openCSV(tgInfoPath, fn) manualCounts = utils.openCSV(manualCountsPath, fn) epochOutputList = [] for epochTuple in epochList: # Epoch num, start, stop epochStart, epochStop = float(epochTuple[1]), float(epochTuple[2]) # Find all of the intervals that are at least partially # contained within the current epoch epochSyllableCount = 0 epochPhoneCount = 0 speechDuration = 0 for info, counts in utils.safeZip([tgInfo, manualCounts], enforceLength=True): start, stop = float(info[0]), float(info[1]) syllableCount, phoneCount = float(counts[0]), float(counts[1]) # Accounts for intervals that straddle an epoch boundary multiplicationFactor = percentInside(start, stop, epochStart, epochStop) speechDuration += (stop - start) * multiplicationFactor epochSyllableCount += syllableCount * multiplicationFactor epochPhoneCount += phoneCount * multiplicationFactor epochOutputList.append("%f,%f,%f" % (epochSyllableCount, epochPhoneCount, speechDuration)) with open(join(outputPath, fn), "w") as fd: fd.write("\n".join(epochOutputList))
def isolateMotherSpeech(path, filterGrid, outputPath): ''' Removes mother speech when the child is also speaking ''' utils.makeDir(outputPath) for fn in utils.findFiles(path, filterExt=".TextGrid"): tg = tgio.openTextGrid(join(path, fn)) motherTier = tg.tierDict["Mother"] newEntryList = [] for start, stop, label in motherTier.entryList: croppedTG = tg.crop(False, False, start, stop) entryList = croppedTG.tierDict[filterGrid].entryList resultList = [(start, stop, label),] for subStart, subStop, subLabel in entryList: i = 0 while i < len(resultList): tmpStart = resultList[i][0] tmpEnd = resultList[i][1] tmpResultList = subtractOverlap(tmpStart, tmpEnd, label, subStart, subStop) # Replace if there has been a change if tmpResultList != [[tmpStart, tmpEnd, label],]: resultList = resultList[:i] + tmpResultList i += len(tmpResultList) - 1 i += 1 newEntryList.extend(resultList) newMotherTier = tgio.IntervalTier("Mother", newEntryList) tg.replaceTier("Mother", newMotherTier.entryList) tg.save(join(outputPath, fn))
def extractMotherSpeech(wavPath, textgridPath, mothersSpeechName, outputWavPath, outputTextgridPath): utils.makeDir(outputWavPath) utils.makeDir(outputTextgridPath) for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True,): print name tg = tgio.openTextGrid(join(textgridPath, name+".TextGrid")) speechTier = tg.tierDict[mothersSpeechName] for i, entry in enumerate(speechTier.entryList): subName = "%s_%03d" % (name, i) start, stop, label = entry start, stop = float(start), float(stop) audio_scripts.extractSubwav(join(wavPath, name+".wav"), join(outputWavPath, subName+".wav" ), start, stop, singleChannelFlag=True) subTG = tg.crop(strictFlag=False, softFlag=False, startTime=start, endTime=stop) subTG.save(join(outputTextgridPath, subName+".TextGrid"))
def extractTranscript(featurePath, tierName): ''' Outputs each label of a textgrid on a separate line in a plain text file ''' tgPath = join(featurePath, "textgrids") outputPath = join(featurePath, "transcript") utils.makeDir(outputPath) for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True): outputList = [] for entry in _navigateTGs(tgPath, name, tierName): label = entry[2] outputList.append("%s" % (label)) outputTxt = "\n".join(outputList) outputFN = join(outputPath, name + ".txt") with io.open(outputFN, "w", encoding="utf-8") as fd: fd.write(outputTxt)
def analyzeLaughter(textgridPath, outputPath): utils.makeDir(outputPath) speechTierName = "Mother" laughterTierName = "Mother's Backchannel" speechCode = "MS" laughterCode = "LA" pauseCode = "FP" # How much did each event occur? allCodeSummaryList = [] for tierName, code, outputName in [[speechTierName, speechCode, "speech_occurances"], [laughterTierName, laughterCode, "laughter_occurances"], [speechTierName, pauseCode, "pause_code"], ]: entryList = [] summaryList = [] for fn in utils.findFiles(textgridPath, filterExt=".TextGrid"): tg = tgio.openTextGrid(join(textgridPath, fn)) tier = tg.tierDict[tierName] matchEntryList = tier.find(code) durationList = [float(stop)-float(start) for start, stop, label in matchEntryList] matchEntryList = [[fn,str(start),str(stop),label]for start, stop, label in matchEntryList] entryList.extend(matchEntryList) summaryList.append( (fn, str(sum(durationList))) ) entryList = [",".join(row) for row in entryList] open(join(outputPath, outputName+".csv"), "w").write("\n".join(entryList)) allCodeSummaryList.append(summaryList) outputList = ["Filename,Speech,Laughter,Pause",] for speech, laugh, pause in utils.safeZip(allCodeSummaryList, enforceLength=True): outputList.append(",".join([speech[0], speech[1], laugh[1], pause[1]])) open(join(outputPath, "event_cumulative_lengths.csv"), "w").write("\n".join(outputList) + "\n")
def medianFilter(f0Path, outputPath, windowSize): # windowSize must be odd assert (windowSize % 2 != 0) utils.makeDir(outputPath) for fn in utils.findFiles(f0Path, filterExt=".txt"): valueList = utils.openCSV(f0Path, fn) f0List = [ float(row[1]) if row[1] != "--undefined--" else 0 for row in valueList ] # time, f0Val, intensityVal f0Filtered = filters.medianFilter(f0List, windowSize, useEdgePadding=True) outputList = [ "%s,%0.3f,%s" % (row[0], f0Val, row[2]) for row, f0Val in zip(*[valueList, f0Filtered]) ] open(join(outputPath, fn), "w").write("\n".join(outputList) + "\n")
def correctTextgridTimes(tgPath, threshold): # Are x and y unique but very very similar withinThreshold = lambda x, y: (abs(x - y) < threshold) and (x != y) outputPath = join(tgPath, "correctsTGs") utils.makeDir(outputPath) for fn in utils.findFiles(tgPath, filterExt=".TextGrid"): print(fn) tg = tgio.openTextgrid(join(tgPath, fn)) wordTier = tg.tierDict["words"] phoneTier = tg.tierDict["phones"] for wordEntry in wordTier.entryList: for i, phoneEntry in enumerate(phoneTier.entryList): if tgio.intervalOverlapCheck(wordEntry, phoneEntry): start = phoneEntry[0] end = phoneEntry[1] phone = phoneEntry[2] if withinThreshold(wordEntry[0], start): start = wordEntry[0] elif withinThreshold(wordEntry[1], start): start = wordEntry[1] elif withinThreshold(wordEntry[0], end): end = wordEntry[0] elif withinThreshold(wordEntry[1], end): end = wordEntry[1] phoneTier.entryList[i] = (start, end, phone) tg.save(join(outputPath, fn))
def audiosplitOnTone(inputPath, fn, pitchPath, tgPath, subwavPath, minPitch, maxPitch, toneFrequency, minEventDuration, praatEXE, praatScriptPath, forceRegen, generateWavs=False): utils.makeDir(pitchPath) utils.makeDir(tgPath) utils.makeDir(subwavPath) name = os.path.splitext(fn)[0] piSamplingRate = 100 # Samples per second # Extract pitch and find patterns in the file outputFN = os.path.splitext(fn)[0] + ".txt" sampleStep = 1 / float(piSamplingRate) motherPIList = pitch_and_intensity.extractPI(join(inputPath, fn), join(pitchPath, outputFN), praatEXE, minPitch, maxPitch, sampleStep=sampleStep, forceRegenerate=forceRegen) # entry = (time, pitchVal, intVal) pitchList = [float(entry[1]) for entry in motherPIList] timeDict = split_on_tone.splitFileOnTone(pitchList, piSamplingRate, toneFrequency, minEventDuration) # Output result as textgrid duration = audio_scripts.getSoundFileDuration(join(inputPath, fn)) tg = tgio.Textgrid() for key in ['beep', 'speech', 'silence']: entryList = timeDict[key] tier = tgio.IntervalTier(key, entryList, 0, duration) tg.addTier(tier) tg.save(join(tgPath, name + ".TextGrid")) # Output audio portions between tones if generateWavs: split_on_tone.extractSubwavs(timeDict, inputPath, fn, subwavPath)
_minPitch = 50 _maxPitch = 450 _intensityPercentile = 0.3 _stepSize = 0.1 _numSteps = 5 _fn = "introduction.wav" _dataPath = join('/Users/tmahrt/Dropbox/workspace/pyAcoustics/test/files') _outputPath = join(_dataPath, "output_stepSize_0.1") _tgPath = join(_dataPath, "splitAudio_silence_stepSize_0.1") _pitchPath = join(_dataPath, "pitch") _wavOutputPath = join(_dataPath, "output_wavs") _praatEXE = "/Applications/praat.App/Contents/MacOS/Praat" _praatScriptPath = ("/Users/tmahrt/Dropbox/workspace/pyAcoustics/" "praatScripts") utils.makeDir(_wavOutputPath) _rootFolderName = os.path.splitext(os.path.split(_fn)[1])[0] _subwavOutputPath = join(_wavOutputPath, _rootFolderName) audiosplitSilence(_dataPath, _fn, _tgPath, _pitchPath, _subwavOutputPath, _minPitch, _maxPitch, _stepSize, _numSteps, _praatEXE, _praatScriptPath) # Changing the parameters used in silence detection can lead to # very different results _stepSize = 0.025 _numSteps = 10 _tgPath = join(_dataPath, "splitAudio_silence_stepSize_0.025") audiosplitSilence(_dataPath, _fn, _tgPath, _pitchPath, _subwavOutputPath, _minPitch, _maxPitch, _stepSize, _numSteps, _praatEXE, _praatScriptPath)
def audiosplitSilence(inputPath, fn, tgPath, pitchPath, subwavPath, minPitch, maxPitch, stepSize, numSteps, praatEXE, praatScriptPath, generateWavs=False, numSegmentsToExtract=None,): ''' Extract the non-silence portions of a file minPitch - the speaker's minimum pitch maxPitch - the speaker's maximum pitch intensityPercentile - Given the distribution of intensity values in a file, the intensity threshold to use is the one that falls at /intensityPercentile/ Any intensity values less than the intensity threshold will be considered silence. I typically use a value between 0.2 or 0.3. stepSize - non-overlapping step size (in seconds) numSteps - number of consecutive blocks needed for a segment to be considered silence stepSize * numSteps is the smallest possible interval that can be considered silence/not-silence. praatEXE - fullpath to a praat executable. On Windows use praatcon.exe. Other systems use praat praatScriptPath - location of the folder containing praat scripts that is distributed with pyAcoustics numSegmentsToExtract - if not None remove all but the X loudest segments as specified by /numSegmentsToExtract/. Otherwise, all non-silent segments are kept. generateWavs - if False, no wavefiles are extracted, but you can look at the generated textgrids to see which wavefiles would have been extracted ''' utils.makeDir(tgPath) utils.makeDir(pitchPath) utils.makeDir(subwavPath) name = os.path.splitext(fn)[0] piSamplingRate = 100 # Samples per second sampleStep = 1 / float(piSamplingRate) outputFN = os.path.splitext(fn)[0] + ".txt" motherPIList = pitch_and_intensity.extractPI(join(inputPath, fn), join(pitchPath, outputFN), praatEXE, minPitch, maxPitch, sampleStep=sampleStep, forceRegenerate=False) # entry = (time, pitchVal, intVal) motherPIList = [float(entry[2]) for entry in motherPIList] # We need the intensity threshold to distinguish silence from speech/noise # Naively, we can extract this by getting the nth percent most intense # sound in the file naive_vad.getIntensityPercentile() # (but then, how do we determine the percent?) # Alternatively, we could consider the set of intensity values to be # bimodal -- silent values vs non-silent. The best threshold is the one # that minimizes the overlap between the two distributions, obtained via # data_fitting.getBimodalValley() # silenceThreshold = naive_vad.getIntensityPercentile(motherPIList, # intensityPercentile) silenceThreshold = data_fitting.getBimodalValley(motherPIList, doplot=True) print(silenceThreshold) entryList = naive_vad.naiveVAD(motherPIList, silenceThreshold, piSamplingRate, stepSize, numSteps) entryList = [(time[0], time[1], str(i)) for i, time in enumerate(entryList)] # Filter out quieter sounds if necessary if numSegmentsToExtract is not None: # Get the rms energy of each non-silent region rmsEntryList = [] for i, entry in enumerate(entryList): intList = motherPIList[int(entry[0] * piSamplingRate): int(entry[1] * piSamplingRate)] rmsVal = my_math.rms(intList) rmsEntryList.append((rmsVal, entry)) rmsEntryList.sort() # Sort by energy entryList = [rmsTuple[1] for rmsTuple in rmsEntryList[:numSegmentsToExtract]] entryList.sort() # Sort by time # Create the textgrid tg = tgio.Textgrid() duration = audio_scripts.getSoundFileDuration(join(inputPath, fn)) tier = tgio.IntervalTier("speech_tier", entryList, 0, duration) tg.addTier(tier) tg.save(join(tgPath, name + '.TextGrid')) if generateWavs is True: for i, entry in enumerate(entryList): subwavOutputFN = join(subwavPath, name + "_" + str(i) + ".wav") audio_scripts.extractSubwav(join(inputPath, fn), subwavOutputFN, entry[0], entry[1], singleChannelFlag=True)