Ejemplo n.º 1
0
def extractSubwavs(timeDict, path, fn, outputPath):
    '''
    Extracts segments between tones marked in the output of splitFileOnTone()
    '''
    name = os.path.splitext(fn)[0]

    duration = audio_scripts.getSoundFileDuration(join(path, fn))
    beepEntryList = timeDict[BEEP]
    segmentEntryList = sequences.invertIntervalList(beepEntryList, 0, duration)

    if len(segmentEntryList) > 0:
        numZeroes = int(math.floor(math.log10(len(segmentEntryList)))) + 1
    else:
        numZeroes = 1

    strFmt = "%%s_%%0%dd.wav" % numZeroes  # e.g. '%s_%02d.wav'

    for i, entry in enumerate(segmentEntryList):
        start, stop = entry[:2]

        audio_scripts.extractSubwav(join(path, fn),
                                    join(outputPath, strFmt % (name, i)),
                                    startT=float(start),
                                    endT=float(stop),
                                    singleChannelFlag=True)
Ejemplo n.º 2
0
def _addSyllableNucleiToTextgrids(wavPath, tgPath, tierName,
                                 syllableNucleiPath, outputPath):
    # Add syllable nuclei to textgrids
    for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True):
        
        tg = tgio.openTextgrid(join(tgPath, name + ".TextGrid"))
        entryList = tg.tierDict[tierName].entryList
        startTimeList = [entry[0] for entry in entryList]
        nucleusSyllableList = uwe_sr.toAbsoluteTime(name, syllableNucleiPath,
                                                    startTimeList)
        flattenedSyllableList = [nuclei for sublist in nucleusSyllableList
                                 for nuclei in sublist]
        wavFN = join(wavPath, name + ".wav")
        duration = audio_scripts.getSoundFileDuration(wavFN)
        
        oom = my_math.orderOfMagnitude(len(flattenedSyllableList))
        labelTemplate = "%%0%dd" % (oom + 1)

        entryList = [(timestamp, labelTemplate % i)
                     for i, timestamp in enumerate(flattenedSyllableList)]
        print(flattenedSyllableList)
        tier = tgio.PointTier("Syllable Nuclei", entryList, 0, duration)
        
        tgFN = join(tgPath, name + ".TextGrid")
        tg = tgio.openTextgrid(tgFN)
        tg.addTier(tier)
        tg.save(join(outputPath, name + ".TextGrid"))
Ejemplo n.º 3
0
def getMinMaxAmplitude(wavFN, stepSize, entryList=None):

    audiofile = openAudioFile(wavFN)[0]

    # By default, find the min and max amplitude for the whole file
    if entryList is None:
        stop = audio_scripts.getSoundFileDuration(wavFN)
        entryList = [
            (0, stop),
        ]

    # Accumulate relevant energy values
    rmsList = []
    for entry in entryList:
        start, stop = entry[0], entry[1]
        currentTime = start
        while currentTime < stop:
            rmsList.append(rmsNextFrames(audiofile, stepSize))
            currentTime += stepSize

    # Return the min and max values
    minValue = min(rmsList)
    maxValue = max(rmsList)

    return minValue, maxValue
Ejemplo n.º 4
0
def _calculateSyllablesPerSecond(wavPath, syllableNucleiPath):
        
    for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True):
        nucleusSyllableList = uwe_sr.toAbsoluteTime(name, syllableNucleiPath,
                                                    [0, ])
        nucleusSyllableList = [nucleus for subList in nucleusSyllableList
                               for nucleus in subList]
        numSyllables = len(nucleusSyllableList)
        wavFN = join(wavPath, name + ".wav")
        duration = audio_scripts.getSoundFileDuration(wavFN)
        
        print("%s - %.02f syllables/second" %
              (name, numSyllables / float(duration)))
def audiosplitOnTone(inputPath,
                     fn,
                     pitchPath,
                     tgPath,
                     subwavPath,
                     minPitch,
                     maxPitch,
                     toneFrequency,
                     minEventDuration,
                     praatEXE,
                     praatScriptPath,
                     forceRegen,
                     generateWavs=False):

    utils.makeDir(pitchPath)
    utils.makeDir(tgPath)
    utils.makeDir(subwavPath)

    name = os.path.splitext(fn)[0]
    piSamplingRate = 100  # Samples per second

    # Extract pitch and find patterns in the file
    outputFN = os.path.splitext(fn)[0] + ".txt"
    sampleStep = 1 / float(piSamplingRate)
    motherPIList = pitch_and_intensity.extractPI(join(inputPath, fn),
                                                 join(pitchPath, outputFN),
                                                 praatEXE,
                                                 minPitch,
                                                 maxPitch,
                                                 sampleStep=sampleStep,
                                                 forceRegenerate=forceRegen)
    # entry = (time, pitchVal, intVal)
    pitchList = [float(entry[1]) for entry in motherPIList]
    timeDict = split_on_tone.splitFileOnTone(pitchList, piSamplingRate,
                                             toneFrequency, minEventDuration)

    # Output result as textgrid
    duration = audio_scripts.getSoundFileDuration(join(inputPath, fn))
    tg = tgio.Textgrid()
    for key in ['beep', 'speech', 'silence']:
        entryList = timeDict[key]
        tier = tgio.IntervalTier(key, entryList, 0, duration)
        tg.addTier(tier)
    tg.save(join(tgPath, name + ".TextGrid"))

    # Output audio portions between tones
    if generateWavs:
        split_on_tone.extractSubwavs(timeDict, inputPath, fn, subwavPath)
Ejemplo n.º 6
0
def generateEpochFiles(tgPath, wavPath, epPath):
    utils.makeDir(epPath)
    try:
        for filename in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True):
            tgrid = tgio.openTextGrid(os.path.join(tgPath, filename+".TextGrid"))
            with open(os.path.join(epPath, filename+".txt"), "w") as epochFile:
                for (start,stop,label) in tgrid.tierDict["Epochs"].entryList:
                    epochFile.write(str(label)+','+str(start)+','+str(stop)+'\n')

    except:
        epDuration = int(raw_input("\nOk, the textgrids don't have an 'Epochs' tier.  How long are the epochs in this dataset?\nEnter the epoch duration in seconds: "))
        print("\nOk. Epochs are each %dsecs max.\n" % epDuration)    
#def generatePlayEpochs(path, outputPath):
    
        durationList = []
        for fn in utils.findFiles(wavPath, filterExt=".wav"):
            duration = audio_scripts.getSoundFileDuration(join(wavPath, fn))
            durationList.append( (fn, int(duration)) )
        
        durationList.sort()
        
        for fn, duration in durationList:
#            if '045' in fn:
#                print 'hello'
            outputFN = os.path.splitext(fn)[0] + ".txt"
            
            numEpoches = int(duration / epDuration)
            epochList = [(i, i*epDuration,(i+1)*epDuration) for i in xrange((numEpoches))]
            if duration % epDuration != 0:
                startTime = (numEpoches)*epDuration
                epochList.append( (numEpoches+1, startTime, startTime+(duration%epDuration) ) )
                
            epochList = ["%02d, %02d, %02d" % row for row in epochList]
            
            with open(join(epPath, outputFN), "w") as epochFN:
                epochFN.write("\n".join(epochList) + "\n")
def audiosplitSilence(
    inputPath,
    fn,
    tgPath,
    pitchPath,
    subwavPath,
    minPitch,
    maxPitch,
    stepSize,
    numSteps,
    praatEXE,
    praatScriptPath,
    generateWavs=False,
    numSegmentsToExtract=None,
):
    '''
    Extract the non-silence portions of a file
    
    minPitch - the speaker's minimum pitch
    maxPitch - the speaker's maximum pitch
    intensityPercentile - Given the distribution of intensity values in a file,
                            the intensity threshold to use is the one that
                            falls at /intensityPercentile/
                            Any intensity values less than the intensity
                            threshold will be considered silence.
                            I typically use a value between 0.2 or 0.3.
    stepSize - non-overlapping step size (in seconds)
    numSteps - number of consecutive blocks needed for a segment to be
                considered silence
                stepSize * numSteps is the smallest possible interval that
                can be considered silence/not-silence.
    praatEXE - fullpath to a praat executable.  On Windows use praatcon.exe.
                Other systems use praat
    praatScriptPath - location of the folder containing praat scripts that
                        is distributed with pyAcoustics
    numSegmentsToExtract - if not None remove all but the X loudest segments as
                            specified by /numSegmentsToExtract/.  Otherwise,
                            all non-silent segments are kept.
    generateWavs - if False, no wavefiles are extracted, but you can look at
                    the generated textgrids to see which wavefiles would have
                    been extracted
    '''
    utils.makeDir(tgPath)
    utils.makeDir(pitchPath)
    utils.makeDir(subwavPath)

    name = os.path.splitext(fn)[0]

    piSamplingRate = 100  # Samples per second
    sampleStep = 1 / float(piSamplingRate)
    outputFN = os.path.splitext(fn)[0] + ".txt"
    motherPIList = pitch_and_intensity.extractPI(join(inputPath, fn),
                                                 join(pitchPath, outputFN),
                                                 praatEXE,
                                                 minPitch,
                                                 maxPitch,
                                                 sampleStep=sampleStep,
                                                 forceRegenerate=False)

    # entry = (time, pitchVal, intVal)
    motherPIList = [float(entry[2]) for entry in motherPIList]

    # We need the intensity threshold to distinguish silence from speech/noise
    # Naively, we can extract this by getting the nth percent most intense
    # sound in the file naive_vad.getIntensityPercentile()
    # (but then, how do we determine the percent?)
    # Alternatively, we could consider the set of intensity values to be
    # bimodal -- silent values vs non-silent.  The best threshold is the one
    # that minimizes the overlap between the two distributions, obtained via
    # data_fitting.getBimodalValley()
    #     silenceThreshold = naive_vad.getIntensityPercentile(motherPIList,
    #                                                         intensityPercentile)
    silenceThreshold = data_fitting.getBimodalValley(motherPIList, doplot=True)
    print(silenceThreshold)
    entryList = naive_vad.naiveVAD(motherPIList, silenceThreshold,
                                   piSamplingRate, stepSize, numSteps)
    entryList = [(time[0], time[1], str(i))
                 for i, time in enumerate(entryList)]

    # Filter out quieter sounds if necessary
    if numSegmentsToExtract is not None:

        # Get the rms energy of each non-silent region
        rmsEntryList = []
        for i, entry in enumerate(entryList):
            intList = motherPIList[int(entry[0] *
                                       piSamplingRate):int(entry[1] *
                                                           piSamplingRate)]

            rmsVal = my_math.rms(intList)
            rmsEntryList.append((rmsVal, entry))

        rmsEntryList.sort()  # Sort by energy
        entryList = [
            rmsTuple[1] for rmsTuple in rmsEntryList[:numSegmentsToExtract]
        ]
        entryList.sort()  # Sort by time

    # Create the textgrid
    tg = tgio.Textgrid()
    duration = audio_scripts.getSoundFileDuration(join(inputPath, fn))
    tier = tgio.IntervalTier("speech_tier", entryList, 0, duration)
    tg.addTier(tier)
    tg.save(join(tgPath, name + '.TextGrid'))

    if generateWavs is True:
        for i, entry in enumerate(entryList):
            subwavOutputFN = join(subwavPath, name + "_" + str(i) + ".wav")
            audio_scripts.extractSubwav(join(inputPath, fn),
                                        subwavOutputFN,
                                        entry[0],
                                        entry[1],
                                        singleChannelFlag=True)