def extractSubwavs(timeDict, path, fn, outputPath): ''' Extracts segments between tones marked in the output of splitFileOnTone() ''' name = os.path.splitext(fn)[0] duration = audio_scripts.getSoundFileDuration(join(path, fn)) beepEntryList = timeDict[BEEP] segmentEntryList = sequences.invertIntervalList(beepEntryList, 0, duration) if len(segmentEntryList) > 0: numZeroes = int(math.floor(math.log10(len(segmentEntryList)))) + 1 else: numZeroes = 1 strFmt = "%%s_%%0%dd.wav" % numZeroes # e.g. '%s_%02d.wav' for i, entry in enumerate(segmentEntryList): start, stop = entry[:2] audio_scripts.extractSubwav(join(path, fn), join(outputPath, strFmt % (name, i)), startT=float(start), endT=float(stop), singleChannelFlag=True)
def _addSyllableNucleiToTextgrids(wavPath, tgPath, tierName, syllableNucleiPath, outputPath): # Add syllable nuclei to textgrids for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True): tg = tgio.openTextgrid(join(tgPath, name + ".TextGrid")) entryList = tg.tierDict[tierName].entryList startTimeList = [entry[0] for entry in entryList] nucleusSyllableList = uwe_sr.toAbsoluteTime(name, syllableNucleiPath, startTimeList) flattenedSyllableList = [nuclei for sublist in nucleusSyllableList for nuclei in sublist] wavFN = join(wavPath, name + ".wav") duration = audio_scripts.getSoundFileDuration(wavFN) oom = my_math.orderOfMagnitude(len(flattenedSyllableList)) labelTemplate = "%%0%dd" % (oom + 1) entryList = [(timestamp, labelTemplate % i) for i, timestamp in enumerate(flattenedSyllableList)] print(flattenedSyllableList) tier = tgio.PointTier("Syllable Nuclei", entryList, 0, duration) tgFN = join(tgPath, name + ".TextGrid") tg = tgio.openTextgrid(tgFN) tg.addTier(tier) tg.save(join(outputPath, name + ".TextGrid"))
def getMinMaxAmplitude(wavFN, stepSize, entryList=None): audiofile = openAudioFile(wavFN)[0] # By default, find the min and max amplitude for the whole file if entryList is None: stop = audio_scripts.getSoundFileDuration(wavFN) entryList = [ (0, stop), ] # Accumulate relevant energy values rmsList = [] for entry in entryList: start, stop = entry[0], entry[1] currentTime = start while currentTime < stop: rmsList.append(rmsNextFrames(audiofile, stepSize)) currentTime += stepSize # Return the min and max values minValue = min(rmsList) maxValue = max(rmsList) return minValue, maxValue
def _calculateSyllablesPerSecond(wavPath, syllableNucleiPath): for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True): nucleusSyllableList = uwe_sr.toAbsoluteTime(name, syllableNucleiPath, [0, ]) nucleusSyllableList = [nucleus for subList in nucleusSyllableList for nucleus in subList] numSyllables = len(nucleusSyllableList) wavFN = join(wavPath, name + ".wav") duration = audio_scripts.getSoundFileDuration(wavFN) print("%s - %.02f syllables/second" % (name, numSyllables / float(duration)))
def audiosplitOnTone(inputPath, fn, pitchPath, tgPath, subwavPath, minPitch, maxPitch, toneFrequency, minEventDuration, praatEXE, praatScriptPath, forceRegen, generateWavs=False): utils.makeDir(pitchPath) utils.makeDir(tgPath) utils.makeDir(subwavPath) name = os.path.splitext(fn)[0] piSamplingRate = 100 # Samples per second # Extract pitch and find patterns in the file outputFN = os.path.splitext(fn)[0] + ".txt" sampleStep = 1 / float(piSamplingRate) motherPIList = pitch_and_intensity.extractPI(join(inputPath, fn), join(pitchPath, outputFN), praatEXE, minPitch, maxPitch, sampleStep=sampleStep, forceRegenerate=forceRegen) # entry = (time, pitchVal, intVal) pitchList = [float(entry[1]) for entry in motherPIList] timeDict = split_on_tone.splitFileOnTone(pitchList, piSamplingRate, toneFrequency, minEventDuration) # Output result as textgrid duration = audio_scripts.getSoundFileDuration(join(inputPath, fn)) tg = tgio.Textgrid() for key in ['beep', 'speech', 'silence']: entryList = timeDict[key] tier = tgio.IntervalTier(key, entryList, 0, duration) tg.addTier(tier) tg.save(join(tgPath, name + ".TextGrid")) # Output audio portions between tones if generateWavs: split_on_tone.extractSubwavs(timeDict, inputPath, fn, subwavPath)
def generateEpochFiles(tgPath, wavPath, epPath): utils.makeDir(epPath) try: for filename in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True): tgrid = tgio.openTextGrid(os.path.join(tgPath, filename+".TextGrid")) with open(os.path.join(epPath, filename+".txt"), "w") as epochFile: for (start,stop,label) in tgrid.tierDict["Epochs"].entryList: epochFile.write(str(label)+','+str(start)+','+str(stop)+'\n') except: epDuration = int(raw_input("\nOk, the textgrids don't have an 'Epochs' tier. How long are the epochs in this dataset?\nEnter the epoch duration in seconds: ")) print("\nOk. Epochs are each %dsecs max.\n" % epDuration) #def generatePlayEpochs(path, outputPath): durationList = [] for fn in utils.findFiles(wavPath, filterExt=".wav"): duration = audio_scripts.getSoundFileDuration(join(wavPath, fn)) durationList.append( (fn, int(duration)) ) durationList.sort() for fn, duration in durationList: # if '045' in fn: # print 'hello' outputFN = os.path.splitext(fn)[0] + ".txt" numEpoches = int(duration / epDuration) epochList = [(i, i*epDuration,(i+1)*epDuration) for i in xrange((numEpoches))] if duration % epDuration != 0: startTime = (numEpoches)*epDuration epochList.append( (numEpoches+1, startTime, startTime+(duration%epDuration) ) ) epochList = ["%02d, %02d, %02d" % row for row in epochList] with open(join(epPath, outputFN), "w") as epochFN: epochFN.write("\n".join(epochList) + "\n")
def audiosplitSilence( inputPath, fn, tgPath, pitchPath, subwavPath, minPitch, maxPitch, stepSize, numSteps, praatEXE, praatScriptPath, generateWavs=False, numSegmentsToExtract=None, ): ''' Extract the non-silence portions of a file minPitch - the speaker's minimum pitch maxPitch - the speaker's maximum pitch intensityPercentile - Given the distribution of intensity values in a file, the intensity threshold to use is the one that falls at /intensityPercentile/ Any intensity values less than the intensity threshold will be considered silence. I typically use a value between 0.2 or 0.3. stepSize - non-overlapping step size (in seconds) numSteps - number of consecutive blocks needed for a segment to be considered silence stepSize * numSteps is the smallest possible interval that can be considered silence/not-silence. praatEXE - fullpath to a praat executable. On Windows use praatcon.exe. Other systems use praat praatScriptPath - location of the folder containing praat scripts that is distributed with pyAcoustics numSegmentsToExtract - if not None remove all but the X loudest segments as specified by /numSegmentsToExtract/. Otherwise, all non-silent segments are kept. generateWavs - if False, no wavefiles are extracted, but you can look at the generated textgrids to see which wavefiles would have been extracted ''' utils.makeDir(tgPath) utils.makeDir(pitchPath) utils.makeDir(subwavPath) name = os.path.splitext(fn)[0] piSamplingRate = 100 # Samples per second sampleStep = 1 / float(piSamplingRate) outputFN = os.path.splitext(fn)[0] + ".txt" motherPIList = pitch_and_intensity.extractPI(join(inputPath, fn), join(pitchPath, outputFN), praatEXE, minPitch, maxPitch, sampleStep=sampleStep, forceRegenerate=False) # entry = (time, pitchVal, intVal) motherPIList = [float(entry[2]) for entry in motherPIList] # We need the intensity threshold to distinguish silence from speech/noise # Naively, we can extract this by getting the nth percent most intense # sound in the file naive_vad.getIntensityPercentile() # (but then, how do we determine the percent?) # Alternatively, we could consider the set of intensity values to be # bimodal -- silent values vs non-silent. The best threshold is the one # that minimizes the overlap between the two distributions, obtained via # data_fitting.getBimodalValley() # silenceThreshold = naive_vad.getIntensityPercentile(motherPIList, # intensityPercentile) silenceThreshold = data_fitting.getBimodalValley(motherPIList, doplot=True) print(silenceThreshold) entryList = naive_vad.naiveVAD(motherPIList, silenceThreshold, piSamplingRate, stepSize, numSteps) entryList = [(time[0], time[1], str(i)) for i, time in enumerate(entryList)] # Filter out quieter sounds if necessary if numSegmentsToExtract is not None: # Get the rms energy of each non-silent region rmsEntryList = [] for i, entry in enumerate(entryList): intList = motherPIList[int(entry[0] * piSamplingRate):int(entry[1] * piSamplingRate)] rmsVal = my_math.rms(intList) rmsEntryList.append((rmsVal, entry)) rmsEntryList.sort() # Sort by energy entryList = [ rmsTuple[1] for rmsTuple in rmsEntryList[:numSegmentsToExtract] ] entryList.sort() # Sort by time # Create the textgrid tg = tgio.Textgrid() duration = audio_scripts.getSoundFileDuration(join(inputPath, fn)) tier = tgio.IntervalTier("speech_tier", entryList, 0, duration) tg.addTier(tier) tg.save(join(tgPath, name + '.TextGrid')) if generateWavs is True: for i, entry in enumerate(entryList): subwavOutputFN = join(subwavPath, name + "_" + str(i) + ".wav") audio_scripts.extractSubwav(join(inputPath, fn), subwavOutputFN, entry[0], entry[1], singleChannelFlag=True)