Python openTextGrid 예제들, praatio.tgio.openTextGrid Python 예제들

예제 #1

0

파일 보기

def tgBoundariesToZeroCrossings(tgFN, wavFN, outputTGFN, adjustPoints=True):
    '''
    Makes all textgrid interval boundaries fall on pressure wave zero crossings
    
    maxShiftAmount specifies the search space in seconds (the amount before and
        after the given time)
    if ignoreOnFailure is true, a warning is printed to the screen and
        the given timestamp is returned
    '''

    audiofile = wave.open(wavFN, "rb")

    tg = tgio.openTextGrid(tgFN)

    for tierName in tg.tierNameList[:]:
        tier = tg.tierDict[tierName]

        newEntryList = []
        if isinstance(tier, tgio.PointTier) and adjustPoints is True:
            for start, label in tier.entryList:
                newStart = findNearestZeroCrossing(audiofile, start)
                newEntryList.append((newStart, label))

        elif isinstance(tier, tgio.IntervalTier):

            for start, stop, label in tier.entryList:
                newStart = findNearestZeroCrossing(audiofile, start)
                newStop = findNearestZeroCrossing(audiofile, stop)
                newEntryList.append((newStart, newStop, label))

        tg.replaceTier(tierName, newEntryList, True)

    tg.save(outputTGFN)

예제 #2

0

파일 보기

def deleteUnlabeledIntervals(tgFN, wavFN, tierName, outputFN):
    '''
	Removes all audio from sections of wav file not inside labeled intervals

	'''

    tg = tgio.openTextGrid(tgFN)

    # Get the unlabeled intervals
    tier = tg.tierDict[tierName].fillInBlanks()
    entryList = [entry for entry in tier.entryList if entry[2] == ""]

    # Sometimes the textgrid and wav file differ by some small amount
    # If the textgrid is longer, the script crashes
    wavDur = _getSoundFileDuration(wavFN)
    if entryList[-1][1] > wavDur and entryList[-1][0] < wavDur:
        entryList[-1] = (entryList[-1][0], wavDur, "")

    try:
        praatio_scripts.deleteWavSections(wavFN,
                                          outputFN,
                                          entryList,
                                          doShrink=False)
    except wave.Error:
        print("There was a problem processing {}".format(
            os.path.basename(tgFN)))

예제 #3

0

파일 보기

파일: pitchGeneral.py 프로젝트: authorofnaught/MCRP

def renameTiers(inputPath, outputPath, includeMothersPhones=False):
    
    renameList = [(["Mother", "Mother's Speech", "Mother's speech", "mother's speech", "Mother Speech", "mother speech"], "Mother"),
                  (["Mother's Backchannel", "Mother's backchannel", "mother's backchannel", "child's backchannel"], "Mother's Backchannel"),
                  (["Child", "Child's speech", "Child's Speech", "child's speech", "Child Speech", "child speech"], "Child"),
                  (["Room", "Extraneous room noise", "Extraneous Room Noise", "Extraneous Noise", "Room Noise", "room noise", "Room noise", "extraneous room noise"], "Room"),
                  (["Timer", "Time"], "Timer"),
                  (["Epochs", "epochs",], "Epochs"),
                  ]
    
    if includeMothersPhones:
        renameList.insert(1, (["Mother's phones",], "Mother's Phones"))
    
    utils.makeDir(outputPath)
    
    for fn in utils.findFiles(inputPath, filterExt=".TextGrid"):
       
        print(fn) 
        tg = tgio.openTextGrid(join(inputPath, fn))
        
        for oldNameList, newName in renameList:
            try:
                tg = replaceTierName(tg, oldNameList, newName)
            except ValueError:
                print fn
                raise
        
        tg.save(join(outputPath, fn))

예제 #4

0

파일 보기

파일: removeWavSegments.py 프로젝트: authorofnaught/MCRP

def deleteUnlabeledIntervals(tgPath, wavPath, tierName, outputPath):
    """
    Does not assume TextGrid and wav files are inside same directory
    """

    utils.makeDir(outputPath)

    for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True):

        tg = tgio.openTextGrid(join(tgPath, name + ".TextGrid"))

        # Get the unlabeled intervals
        tier = tg.tierDict[tierName].fillInBlanks()
        entryList = [entry for entry in tier.entryList if entry[2] == ""]

        wavFN = join(wavPath, name + ".wav")
        outputWavFN = join(outputPath, name + ".wav")

        # Sometimes the textgrid and wav file differ by some small amount
        # If the textgrid is longer, the script crashes
        wavDur = _getSoundFileDuration(wavFN)
        if entryList[-1][1] > wavDur and entryList[-1][0] < wavDur:
            entryList[-1] = (entryList[-1][0], wavDur, "")

        praatio_scripts.deleteWavSections(wavFN,
                                          outputWavFN,
                                          entryList,
                                          doShrink=False)

예제 #5

0

파일 보기

파일: pitchGeneral.py 프로젝트: authorofnaught/MCRP

def filterTextgrids(tgPath, speechTierName, laughterTierName, minDuration, outputPath):
    '''
    Removes invalid entries from the mother's speech tier
    
    - removes pauses (FP, SP)
    - removes speech (MS) that occurs with insitu laughter (LA)
    - removes ultrashort utterances (uwe's script crashed on an utterance of
                                     length 0.013 seconds)
    '''
    
    utils.makeDir(outputPath)
    
    for fn in utils.findFiles(tgPath, filterExt=".TextGrid"):
        
        tg = tgio.openTextGrid(join(tgPath, fn))

        # Removes all non-speech events (MS)
        newTierEntryList = []
        speechTier = tg.tierDict[speechTierName]
        for entry in speechTier.entryList:
            start, stop, label = entry
            print(entry)
            if insituLaughterCheck(start, stop, tg, laughterTierName):
               newTierEntryList.append(entry)
               
        # Removes all speech events shorter than some threshold
        newTierEntryList = [(start, stop, label) for start, stop, label in newTierEntryList
                            if float(stop) - float(start) > minDuration]
        tg.replaceTier(speechTierName, newTierEntryList)
        tg.save(join(outputPath, fn))

예제 #6

0

파일 보기

파일: pitchGeneral.py 프로젝트: authorofnaught/MCRP

def extractTGInfo(inputPath, outputPath, tierName, searchForMothersSpeech):
    '''
    Same as textgrids.extractTGInfo?
    

    '''
    
    utils.makeDir(outputPath)
    
    minDuration = 0.15 # Time in seconds
    
    
    for name in utils.findFiles(inputPath, filterExt=".TextGrid", stripExt=True):
        print name
        
        tg = tgio.openTextGrid(join(inputPath, name+".TextGrid"))
        tier = tg.tierDict[tierName]
        entryList = tier.getEntries()
        
        if searchForMothersSpeech:
            entryList = [(start, stop, label) for start, stop, label in entryList
                         if label == "MS"]
        
        outputList = []
        for start, stop, label in entryList:
            outputList.append( "%f,%f,%s" % (start, stop, label) )
            
        outputTxt = "\n".join(outputList) + "\n"
        codecs.open(join(outputPath, name + ".txt"), "w", encoding="utf-8").write(outputTxt)

예제 #7

0

파일 보기

파일: duration_morph.py 프로젝트: timmahrt/ProMo

def textgridMorphDuration(fromTGFN, toTGFN):
    '''
    A convenience function.  Morphs interval durations of one tg to another.
    
    This assumes the two textgrids have the same number of segments.
    '''
    fromTG = tgio.openTextGrid(fromTGFN)
    toTG = tgio.openTextGrid(toTGFN)
    adjustedTG = tgio.Textgrid()

    for tierName in fromTG.tierNameList:
        fromTier = fromTG.tierDict[tierName]
        toTier = toTG.tierDict[tierName]
        adjustedTier = fromTier.morph(toTier)
        adjustedTG.addTier(adjustedTier)

    return adjustedTG

예제 #8

0

파일 보기

파일: f0_morph.py 프로젝트: timmahrt/ProMo

def getPitchForIntervals(data, tgFN, tierName):
    '''
    Preps data for use in f0Morph
    '''
    tg = tgio.openTextGrid(tgFN)
    data = tg.tierDict[tierName].getValuesInIntervals(data)
    data = [dataList for _, dataList in data]

    return data

예제 #9

0

파일 보기

파일: pitchGeneral.py 프로젝트: authorofnaught/MCRP

def replaceAllLabelsInMotherTierWithMS(inputPath, outputPath):
    
    utils.makeDir(outputPath)
    
    speechTierName = "Mother"
    
    for fn in utils.findFiles(inputPath, filterExt=".TextGrid"):
        
        tg = tgio.openTextGrid(join(inputPath, fn))
        tg.replaceTier(speechTierName, [[start, stop, "MS"] for start, stop, label in tg.tierDict[speechTierName].entryList])
        tg.save(join(outputPath, fn))

예제 #10

0

파일 보기

파일: pitchGeneral.py 프로젝트: authorofnaught/MCRP

def analyzeInsituLaughter(inputPath, outputPath):
    
    outputList = []
    for fn in utils.findFiles(inputPath, filterExt=".TextGrid"):
        
        tg = tgio.openTextGrid(join(inputPath, fn))
        tier = tg.tierDict["Mother"]
        for start, stop, label in tier.getEntries():
            isInsitu = insituLaughterCheck(start, stop, tg, "Mother's Backchannel")
            if isInsitu:
                outputList.append("%s,%02.02f,%02.02f,%s" % (fn, start, stop, label))
                
    open(join(outputPath, "insitu_laughter_events.csv"), "w").write("\n".join(outputList) + "\n")

예제 #11

0

파일 보기

파일: estimate_speech_rate.py 프로젝트: authorofnaught/MCRP

def _addSyllableNucleiToTextgrids(wavPath, tgPath, tierName,
                                  syllableNucleiPath, outputPath):
    # Add syllable nuclei to textgrids
    for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True):

        tg = tgio.openTextGrid(join(tgPath, name + ".TextGrid"))
        entryList = tg.tierDict[tierName].entryList
        startTimeList = [entry[0] for entry in entryList]
        nucleusSyllableList = uwe_sr.toAbsoluteTime(name, syllableNucleiPath,
                                                    startTimeList)
        ######### DEBUG  ############
        for i in range(len(startTimeList)):
            print("{}: {}".format(startTimeList[i],
                                  len(nucleusSyllableList[i])))


#        print("startTimeList has {} entries:\n{}".format(len(startTimeList), startTimeList))
#        print("nucleusSyllableList has {} sublists:\n{}".format(len(nucleusSyllableList), nucleusSyllableList))
#############################
        flattenedSyllableList = [
            nuclei for sublist in nucleusSyllableList for nuclei in sublist
        ]
        wavFN = join(wavPath, name + ".wav")
        duration = audio_scripts.getSoundFileDuration(wavFN)

        oom = my_math.orderOfMagnitude(len(flattenedSyllableList))
        labelTemplate = "%%0%dd" % (oom + 1)

        entryList = [(timestamp, labelTemplate % i)
                     for i, timestamp in enumerate(flattenedSyllableList)]
        #        print flattenedSyllableList
        tier = tgio.PointTier("Syllable Nuclei", entryList, 0, duration)

        tgFN = join(tgPath, name + ".TextGrid")
        tg = tgio.openTextGrid(tgFN)
        tg.addTier(tier)
        tg.save(join(outputPath, name + ".TextGrid"))

예제 #12

0

파일 보기

파일: pitchGeneral.py 프로젝트: authorofnaught/MCRP

def addEpochsToTextgrids(tgPath, epochPath, outputPath):
    
    utils.makeDir(outputPath)
    
    for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True):
        print name
        tg = tgio.openTextGrid(join(tgPath, name+".TextGrid"))

        entryList = utils.openCSV(epochPath, name+".txt")
        entryList = [(float(start), float(end), label) for label, start, end in entryList]
        
        tier = tgio.IntervalTier("epochs", entryList, minT=0, maxT=tg.maxTimestamp)
        
        tg.addTier(tier)
        tg.save(join(outputPath, name+".TextGrid"))

예제 #13

0

파일 보기

파일: utils.py 프로젝트: timmahrt/ProMo

def getIntervals(fn, tierName, filterFunc=None,
                 includeUnlabeledRegions=False):
    '''
    Get information about the 'extract' tier, used by several merge scripts
    '''

    tg = tgio.openTextGrid(fn)
    
    tier = tg.tierDict[tierName]
    if includeUnlabeledRegions is True:
        tier = tier.fillInBlanks()

    entryList = tier.entryList
    if filterFunc is not None:
        entryList = [entry for entry in entryList if filterFunc(entry)]

    return entryList

예제 #14

0

파일 보기

파일: duration_morph.py 프로젝트: timmahrt/ProMo

def textgridManipulateDuration(tgFN, ratioList):

    tg = tgio.openTextGrid(tgFN)

    adjustedTG = tgio.Textgrid()

    for tierName in tg.tierNameList:
        fromTier = tg.tierDict[tierName]
        
        adjustedTier = None
        if isinstance(fromTier, tgio.IntervalTier):
            adjustedTier = _morphIntervalTier(fromTier, ratioList)
        elif isinstance(fromTier, tgio.PointTier):
            adjustedTier = _morphPointTier(fromTier, ratioList)
        
        assert(adjustedTier is not None)
        adjustedTG.addTier(adjustedTier)

    return adjustedTG

예제 #15

0

파일 보기

def generatePIMeasures(dataList,
                       tgPath,
                       tgFN,
                       tierName,
                       doPitch,
                       medianFilterWindowSize=None):
    '''
    Generates processed values for the labeled intervals in a textgrid

    nullLabelList - labels to ignore in the textgrid.  Defaults to ["",]
      
    if 'doPitch'=true get pitch measures; if =false get rms intensity
    '''

    tgFN = join(tgPath, tgFN)
    tg = tgio.openTextGrid(tgFN)
    piData = tg.tierDict[tierName].getValuesInIntervals(dataList)

    outputList = []
    for interval, entryList in piData:
        label = interval[0]
        if doPitch:
            tmpValList = [f0Val for _, f0Val, _ in entryList]
            f0Measures = getPitchMeasures(tmpValList, tgFN, label,
                                          medianFilterWindowSize, True)
            outputList.append(list(f0Measures))
        else:
            tmpValList = [intensityVal for _, _, intensityVal in entryList]

            tmpValList = [
                intensityVal for intensityVal in tmpValList
                if intensityVal != 0.0
            ]

            rmsIntensity = 0
            if len(tmpValList) != 0:
                rmsIntensity = myMath.rms(tmpValList)
            outputList.append([
                rmsIntensity,
            ])

    return outputList

예제 #16

0

파일 보기

파일: estimate_speech_rate.py 프로젝트: authorofnaught/MCRP

def _calculateSyllablesPerSecondForIntervals(wavPath, tgPath, tierName,
                                             syllableNucleiPath):
    # Add syllable nuclei to textgrids
    for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True):

        tg = tgio.openTextGrid(join(tgPath, name + ".TextGrid"))
        entryList = tg.tierDict[tierName].entryList
        startTimeList = [entry[0] for entry in entryList]
        nucleusSyllableList = uwe_sr.toAbsoluteTime(name, syllableNucleiPath,
                                                    startTimeList)

        durationList = []
        for intervalList, entry in utils.safeZip(
            [nucleusSyllableList, entryList], enforceLength=True):
            start, stop = entry[0], entry[1]
            duration = len(intervalList) / (stop - start)
            durationList.append(str(duration))

        print("%s - %s (syllables/second for each interval)" %
              (name, ",".join(durationList)))

예제 #17

0

파일 보기

파일: pitchGeneral.py 프로젝트: authorofnaught/MCRP

def isolateMotherSpeech(path, filterGrid, outputPath):
    '''
    Removes mother speech when the child is also speaking
    '''
    
    utils.makeDir(outputPath)
    
    for fn in utils.findFiles(path, filterExt=".TextGrid"):
        
        tg = tgio.openTextGrid(join(path, fn))
        motherTier = tg.tierDict["Mother"]
        
        newEntryList = []
        for start, stop, label in motherTier.entryList:
            croppedTG = tg.crop(False, False, start, stop)
            entryList = croppedTG.tierDict[filterGrid].entryList
            
            resultList = [(start, stop, label),]
            
            for subStart, subStop, subLabel in entryList:
                
                i = 0
                while i < len(resultList):
                    tmpStart = resultList[i][0]
                    tmpEnd = resultList[i][1]
                    tmpResultList = subtractOverlap(tmpStart,
                                                    tmpEnd,
                                                    label,
                                                    subStart,
                                                    subStop)
                     # Replace if there has been a change
                    if tmpResultList != [[tmpStart, tmpEnd, label],]:
                        resultList = resultList[:i] + tmpResultList
                        i += len(tmpResultList) - 1
                    i += 1

            newEntryList.extend(resultList)

        newMotherTier = tgio.IntervalTier("Mother", newEntryList)
        tg.replaceTier("Mother", newMotherTier.entryList)
        tg.save(join(outputPath, fn))

예제 #18

0

파일 보기

파일: pitchGeneral.py 프로젝트: authorofnaught/MCRP

def extractMotherSpeech(wavPath, textgridPath, mothersSpeechName,
                        outputWavPath, outputTextgridPath):
    
    utils.makeDir(outputWavPath)
    utils.makeDir(outputTextgridPath)
    
    for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True,):
        print name
        tg = tgio.openTextGrid(join(textgridPath, name+".TextGrid"))
        speechTier = tg.tierDict[mothersSpeechName]
        for i, entry in enumerate(speechTier.entryList):
            subName = "%s_%03d" % (name, i)
            start, stop, label = entry
            start, stop = float(start), float(stop)
            audio_scripts.extractSubwav(join(wavPath, name+".wav"), 
                                        join(outputWavPath, subName+".wav" ),
                                             start, stop, 
                                             singleChannelFlag=True)
            subTG = tg.crop(strictFlag=False, softFlag=False, 
                            startTime=start, endTime=stop)
            subTG.save(join(outputTextgridPath, subName+".TextGrid"))

예제 #19

0

파일 보기

파일: pitchGeneral.py 프로젝트: authorofnaught/MCRP

def analyzeLaughter(textgridPath, outputPath):
    
    utils.makeDir(outputPath)
    
    speechTierName = "Mother"
    laughterTierName = "Mother's Backchannel"
    
    speechCode = "MS"
    laughterCode = "LA"
    pauseCode = "FP"
    
    # How much did each event occur?
    allCodeSummaryList = []
    for tierName, code, outputName in [[speechTierName, speechCode, "speech_occurances"],
                                       [laughterTierName, laughterCode, "laughter_occurances"],
                                       [speechTierName, pauseCode, "pause_code"],
                                       ]:
        entryList = []
        summaryList = []
        for fn in utils.findFiles(textgridPath, filterExt=".TextGrid"):
            tg = tgio.openTextGrid(join(textgridPath, fn))
            tier = tg.tierDict[tierName]
            
            matchEntryList = tier.find(code)
            durationList = [float(stop)-float(start) for start, stop, label in matchEntryList]
            matchEntryList = [[fn,str(start),str(stop),label]for start, stop, label in matchEntryList] 
            
            entryList.extend(matchEntryList)
            summaryList.append( (fn, str(sum(durationList))) )
        
        entryList = [",".join(row) for row in entryList]
        open(join(outputPath, outputName+".csv"), "w").write("\n".join(entryList))

        allCodeSummaryList.append(summaryList)
    
    outputList = ["Filename,Speech,Laughter,Pause",]
    for speech, laugh, pause in utils.safeZip(allCodeSummaryList, enforceLength=True):
        outputList.append(",".join([speech[0], speech[1], laugh[1], pause[1]]))
        
    open(join(outputPath, "event_cumulative_lengths.csv"), "w").write("\n".join(outputList) + "\n")

예제 #20

0

파일 보기

파일: pitchGeneral.py 프로젝트: authorofnaught/MCRP

def filterShortIntervalsFromTier(tgPath, speechTierName, minDuration, outputPath):
    '''
    Removes ultrashort utterances from tier (uwe's script crashed on an utterance of
                                     length 0.013 seconds)
    '''
    
    utils.makeDir(outputPath)
    
    for fn in utils.findFiles(tgPath, filterExt=".TextGrid"):
        
        tg = tgio.openTextGrid(join(tgPath, fn))
        speechTier = tg.tierDict[speechTierName]
        newTierEntryList = []

        for entry in speechTier.entryList:

            start, stop, label = entry

            if float(stop) - float(start) >= minDuration:

                newTierEntryList.append(entry)

        tg.replaceTier(speechTierName, newTierEntryList)
        tg.save(join(outputPath, fn))

예제 #21

0

파일 보기

파일: pitchGeneral.py 프로젝트: authorofnaught/MCRP

def generateEpochFiles(tgPath, wavPath, epPath):
    utils.makeDir(epPath)
    try:
        for filename in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True):
            tgrid = tgio.openTextGrid(os.path.join(tgPath, filename+".TextGrid"))
            with open(os.path.join(epPath, filename+".txt"), "w") as epochFile:
                for (start,stop,label) in tgrid.tierDict["Epochs"].entryList:
                    epochFile.write(str(label)+','+str(start)+','+str(stop)+'\n')

    except:
        epDuration = int(raw_input("\nOk, the textgrids don't have an 'Epochs' tier.  How long are the epochs in this dataset?\nEnter the epoch duration in seconds: "))
        print("\nOk. Epochs are each %dsecs max.\n" % epDuration)    
#def generatePlayEpochs(path, outputPath):
    
        durationList = []
        for fn in utils.findFiles(wavPath, filterExt=".wav"):
            duration = audio_scripts.getSoundFileDuration(join(wavPath, fn))
            durationList.append( (fn, int(duration)) )
        
        durationList.sort()
        
        for fn, duration in durationList:
#            if '045' in fn:
#                print 'hello'
            outputFN = os.path.splitext(fn)[0] + ".txt"
            
            numEpoches = int(duration / epDuration)
            epochList = [(i, i*epDuration,(i+1)*epDuration) for i in xrange((numEpoches))]
            if duration % epDuration != 0:
                startTime = (numEpoches)*epDuration
                epochList.append( (numEpoches+1, startTime, startTime+(duration%epDuration) ) )
                
            epochList = ["%02d, %02d, %02d" % row for row in epochList]
            
            with open(join(epPath, outputFN), "w") as epochFN:
                epochFN.write("\n".join(epochList) + "\n")

예제 #22

0

파일 보기

파일: pitchGeneral.py 프로젝트: authorofnaught/MCRP

def removeIntervalsFromTierByLabel(inputPath, tierName, targetLabel, outputPath, removeAllBut=False):
    
    utils.makeDir(outputPath)
    
    for fn in utils.findFiles(inputPath, filterExt=".TextGrid"):
        
        tg = tgio.openTextGrid(join(inputPath, fn))
        speechTier = tg.tierDict[tierName]
        newTierEntryList = []

        for entry in speechTier.entryList:

            start, stop, label = entry

            if removeAllBut and label == targetLabel:
                
                newTierEntryList.append(entry)

            elif not removeAllBut and label != targetLabel:

                newTierEntryList.append(entry)

        tg.replaceTier(tierName, newTierEntryList)
        tg.save(join(outputPath, fn))

예제 #23

0

파일 보기

to modify a textgrid.  It still shows that, but all that code is now in
the main library (pysle.praattools.syllabifyTextgrid)

This snippet shows you how to use this function.
'''

from os.path import join

from praatio import tgio
from pysle import isletool
from pysle import praattools

path = join('.', 'files')
path = "/Users/tmahrt/Dropbox/workspace/pysle/test/files"

tg = tgio.openTextGrid(join(path, "pumpkins.TextGrid"))

# Needs the full path to the file
islevPath = '/Users/tmahrt/Dropbox/workspace/pysle/test/islev2.txt'
isleDict = isletool.LexicalTool(islevPath)

# Get the syllabification tiers and add it to the textgrid
syllableTG = praattools.syllabifyTextgrid(isleDict, tg, "word", "phone",
                                          skipLabelList=["",])
tg.addTier(syllableTG.tierDict["syllable"])
tg.addTier(syllableTG.tierDict["tonicSyllable"])
tg.addTier(syllableTG.tierDict["tonicVowel"])



tg.save(join(path, "pumpkins_with_syllables.TextGrid"))

예제 #24

0

파일 보기

def markMaxPitch(tgFNFullPath, wavFNFullPath, outputPath, tierName, minPitch,
                 maxPitch, numTopPitchIntervals, numIntervalsPerTier,
                 praatEXE):
    '''
	Returns a textgrid whose tier intervals denote locations of highest pitch 
	measurements in the wav file it annotates.

	'''

    tgFN = os.path.basename(tgFNFullPath)
    wavFN = os.path.basename(wavFNFullPath)

    print("Processing max pitch from {}".format(wavFN))

    io.make_dir(outputPath)

    cleanedWavPath = join(outputPath, "cleanedWavs")
    io.make_dir(cleanedWavPath)
    cleanedWavFN = join(cleanedWavPath, wavFN)

    pitchPath = join(outputPath, "pitch")
    io.make_dir(pitchPath)
    pitchFN = io.get_filename_w_new_ext(wavFN, "pitch")

    textgridPath = join(outputPath, "textgrid")
    io.make_dir(textgridPath)
    textgridFN = join(textgridPath, tgFN)

    # 1 Delete unlabeled segments
    if not os.path.exists(cleanedWavFN):
        deleteUnlabeledIntervals(tgFNFullPath, wavFNFullPath, tierName,
                                 cleanedWavFN)

    # 2 Measure pitch from 'pruned' recording file
    piList = pitch_and_intensity.audioToPI(cleanedWavPath,
                                           wavFNFullPath,
                                           pitchPath,
                                           pitchFN,
                                           praatEXE,
                                           minPitch,
                                           maxPitch,
                                           forceRegenerate=False)

    # 3 Get pitch from each interval
    tg = tgio.openTextGrid(tgFNFullPath)
    tier = tg.tierDict[tierName]
    piListSegmented = tier.getValuesInIntervals(piList)

    # 4 Get max pitch from each interval
    entryList = []
    for interval, dataList in piListSegmented:
        pitchList = [f0Val for _, f0Val, _ in dataList]
        if len(pitchList) == 0:
            continue
        maxF0Val = max(pitchList)
        entryList.append((interval[0], interval[1], maxF0Val))

    entryList.sort(key=lambda x: x[2], reverse=True)
    entryList = [(start, stop, str(label)) for start, stop, label in entryList]

    # 5 Report the top intervals
    outputTG = tgio.Textgrid()
    for i in xrange(0, numTopPitchIntervals, numIntervalsPerTier):
        name = "top %d" % (i + 10)
        subEntryList = entryList[i:i + 10]
        minT = tg.minTimestamp
        maxT = tg.maxTimestamp

        tier = tgio.IntervalTier(name, subEntryList, minT, maxT)
        outputTG.addTier(tier)

    outputTG.save(textgridFN)

예제 #25

0

파일 보기

파일: pitchGeneral.py 프로젝트: authorofnaught/MCRP

def eventStructurePerEpoch(epochPath, fullyFilteredTGPath, 
                           childFilteredTGPath, noiseFilteredTGPath,
                           unfilteredTGPath, outputPath, 
                           speechTierName, laughterTierName):
    '''
    How frequent and with what duration did laughter, pauses, and speech occur
    '''
    
    def _getCountsAndDurations(tier, searchLabel):
        entryList = tier.find(searchLabel)
        durationList = [float(stop) - float(start) 
                        for start, stop, label in entryList]
        count = len(entryList)
        
        return sum(durationList), count
    
    utils.makeDir(outputPath)
    
    for name in utils.findFiles(epochPath, filterExt=".txt", stripExt=True):
        
        epochList = utils.openCSV(epochPath, name+".txt")
        epochList = [(epochNum, float(start), float(stop)) 
                     for epochNum, start, stop in epochList]
        tg = tgio.openTextGrid(join(fullyFilteredTGPath, 
                                       name + ".TextGrid"))
        childFilteredTG = tgio.openTextGrid(join(childFilteredTGPath,
                                                   name + ".TextGrid"))
        noiseFilteredTG = tgio.openTextGrid(join(noiseFilteredTGPath,
                                                    name + ".TextGrid"))
        origTG = tgio.openTextGrid(join(unfilteredTGPath, 
                                           name + ".TextGrid"))
        
        outputList = []
        for epochNum, start, stop in epochList:
            subTG = tg.crop(strictFlag=False, softFlag=False, 
                            startTime=start, endTime=stop)
            
            speechTier = subTG.tierDict[speechTierName]
            laughterTier = subTG.tierDict[laughterTierName]
            
            pauseDur, numPauses = _getCountsAndDurations(speechTier, "FP")
            speechDur, numSpeech = _getCountsAndDurations(speechTier, "MS")
            laughDur, numLaughter = _getCountsAndDurations(laughterTier, "LA")
            
            subCSFilteredTG = childFilteredTG.crop(strictFlag=False, 
                                                softFlag=False,
                                                startTime=start,
                                                endTime=stop)
            csFilteredTier = subCSFilteredTG.tierDict[speechTierName]
            csFiltSpeech, numCSFiltSpeech = _getCountsAndDurations(csFilteredTier, 
                                                               "MS")            

            subNoiseFilteredTG = noiseFilteredTG.crop(strictFlag=False, 
                                                softFlag=False,
                                                startTime=start,
                                                endTime=stop)
            nsFilteredTier = subNoiseFilteredTG.tierDict[speechTierName]
            nsFiltSpeech, numNsFiltSpeech = _getCountsAndDurations(nsFilteredTier, 
                                                               "MS")     
            
            subOrigTG = origTG.crop(strictFlag=False,
                                    softFlag=False,
                                    startTime=start,
                                    endTime=stop)
            origSpeechTier = subOrigTG.tierDict[speechTierName]
            fullSpeechDur, fullNumSpeech = _getCountsAndDurations(origSpeechTier, 
                                                                  "MS")
            
            epochTuple = (speechDur, numSpeech, csFiltSpeech, nsFiltSpeech, 
                          fullSpeechDur, fullSpeechDur - speechDur,
                          pauseDur, numPauses, laughDur, numLaughter)
            outputList.append("%.02f, %d, %.02f, %.02f, %.02f, %.02f, %.02f, %d, %.02f, %d" % epochTuple)
        
        open(join(outputPath, name+".txt"), "w").write("\n".join(outputList) + "\n")

예제 #26

0

파일 보기

def splitAudioOnTier(wavFN,
                     tgFN,
                     tierName,
                     outputPath,
                     outputTGFlag=False,
                     nameStyle=None,
                     noPartialIntervals=False):
    '''
    Outputs one subwav for each entry in the tier of a textgrid
    
    outputTGFlag: If True, outputs paired, cropped textgrids
                  If is type str (a tier name), outputs a paired, cropped
                  textgrid with only the specified tier
    nameStyle: if 'append': append interval label to output name
               if 'append_no_i': append label but not interval to output name
               if 'label': output name is the same as label
               if None: output name plus the interval number
    noPartialIntervals: if True: intervals in non-target tiers that are
                                  not wholly contained by an interval in
                                  the target tier will not be included in
                                  the output textgrids
    '''
    tg = tgio.openTextGrid(tgFN)
    entryList = tg.tierDict[tierName].entryList

    # Build the output name template
    name = os.path.splitext(os.path.split(wavFN)[1])[0]
    orderOfMagnitude = int(math.floor(math.log10(len(entryList))))

    # We want one more zero in the output than the order of magnitude
    outputTemplate = "%s_%%0%dd" % (name, orderOfMagnitude + 1)

    firstWarning = True

    # If we're using the 'label' namestyle for outputs, all of the
    # interval labels have to be unique, or wave files with those
    # labels as names, will be overwritten
    if nameStyle == 'label':
        wordList = [word for _, _, word in entryList]
        multipleInstList = []
        for word in set(wordList):
            if wordList.count(word) > 1:
                multipleInstList.append(word)

        if len(multipleInstList) > 0:
            instListTxt = "\n".join(multipleInstList)
            print(("Overwriting wave files in: %s\n" +
                   "Intervals exist with the same name:\n%s") %
                  (outputPath, instListTxt))
            firstWarning = False

    # Output wave files
    outputFNList = []
    for i, entry in enumerate(entryList):
        start, stop, label = entry

        # Resolve output name
        outputName = outputTemplate % i
        if nameStyle == "append":
            outputName += "_" + label
        elif nameStyle == "append_no_i":
            outputName = name + "_" + label
        elif nameStyle == "label":
            outputName = label

        outputFNFullPath = join(outputPath, outputName + ".wav")

        if os.path.exists(outputFNFullPath) and firstWarning:
            print(("Overwriting wave files in: %s\n" +
                   "Files existed before or intervals exist with " +
                   "the same name:\n%s") % (outputPath, outputName))
        _extractSubwav(wavFN, outputFNFullPath, start, stop)
        outputFNList.append((start, stop, outputName + ".wav"))

        # Output the textgrid if requested
        if outputTGFlag is not False:
            subTG = tg.crop(noPartialIntervals, False, start, stop)

            if isinstance(outputTGFlag, str):
                for tierName in subTG.tierNameList:
                    if tierName != outputTGFlag:
                        subTG.removeTier(tierName)

            offset = -1 * start
            subTG = subTG.editTimestamps(offset, offset, offset)
            subTG.minTimestamp = 0
            subTG.maxTimestamp = stop - start

            subTG.save(join(outputPath, outputName + ".TextGrid"))

    return outputFNList