Esempio n. 1
0
def aggregateSpeechRate(tgInfoPath, speechRatePath, outputPath, samplingRate):
    
    utils.makeDir(outputPath)
    
    finishedList = utils.findFiles(outputPath, filterExt=".txt")
    
    for fn in utils.findFiles(tgInfoPath, filterExt=".txt",
                              skipIfNameInList=finishedList):
        
        # Load subset speech rate
        name = os.path.splitext(fn)[0]
        speechRateFNList = utils.findFiles(speechRatePath, filterExt=".txt",
                                           filterPattern=name)
        
        subSplitList = utils.openCSV(tgInfoPath, fn)
    
        # Convert the sample numbers to seconds
        # They are in terms of the beginning of the subset they are in but
        # need to be in terms of the start of the file the larger file the
        # subset originated from
        outputList = []
        for splitInfo, speechRateFN in utils.safeZip([subSplitList,
                                                      speechRateFNList],
                                                     enforceLength=True):
            start, stop, label = splitInfo
            
            speechRateList = utils.openCSV(speechRatePath, speechRateFN, valueIndex=0)
            speechRateList = [value for value in speechRateList if value != '']
            speechRateList = [str(float(start) + float(sampleNum) / float(samplingRate)) for sampleNum in speechRateList]
            
            outputList.append( ",".join(speechRateList) )
    
        open(join(outputPath, fn), "w").write("\n".join(outputList) + "\n")
def manualPhoneCount(tgInfoPath, isleFN, outputPath, skipList=None):
    
    if skipList is None:
        skipList = []
    
    utils.makeDir(outputPath)
    
    isleDict = isletool.LexicalTool(isleFN)
    
    existFNList = utils.findFiles(outputPath, filterPaths=".txt")
    for fn in utils.findFiles(tgInfoPath, filterExt=".txt",
                              skipIfNameInList=existFNList):

        if os.path.exists(join(outputPath, fn)):
            continue
        print(fn)
        
        dataList = utils.openCSV(tgInfoPath, fn)
        dataList = [row[2] for row in dataList]  # start, stop, tmpLabel
        outputList = []
        for tmpLabel in dataList:
            if tmpLabel not in skipList:
                syllableCount, phoneCount = isletool.getNumPhones(isleDict,
                                                                  tmpLabel,
                                                                  maxFlag=True)
            else:
                syllableCount, phoneCount = 0, 0
            
            outputList.append("%d,%d" % (syllableCount, phoneCount))
        
        outputTxt = "\n".join(outputList)
        
        with open(join(outputPath, fn), "w") as fd:
            fd.write(outputTxt)
def aggregateFeatures(featurePath, featureList, headerStr=None):

    outputDir = join(featurePath, "aggr")
    utils.makeDir(outputDir)

    fnList = []
    dataList = []

    # Find the files that exist in all features
    for feature in featureList:
        fnSubList = utils.findFiles(join(featurePath, feature),
                                    filterExt=".txt")
        fnList.append(fnSubList)

    actualFNList = []
    for featureFN in fnList[0]:
        if all([featureFN in subList for subList in fnList]):
            actualFNList.append(featureFN)

    for featureFN in actualFNList:
        dataList = []
        for feature in featureList:
            featureDataList = utils.openCSV(join(featurePath, feature),
                                            featureFN,
                                            encoding="utf-8")
            dataList.append([",".join(row) for row in featureDataList])

        name = os.path.splitext(featureFN)[0]

        dataList.insert(0, [name for _ in range(len(dataList[0]))])
        tDataList = utils.safeZip(dataList, enforceLength=True)
        outputList = [",".join(row) for row in tDataList]
        outputTxt = "\n".join(outputList)

        outputFN = join(outputDir, name + ".csv")
        with io.open(outputFN, "w", encoding="utf-8") as fd:
            fd.write(outputTxt)

    # Cat all files together
    aggrOutput = []

    if headerStr is not None:
        aggrOutput.append(headerStr)

    for fn in utils.findFiles(outputDir, filterExt=".csv"):
        if fn == "all.csv":
            continue
        with io.open(join(outputDir, fn), "r", encoding='utf-8') as fd:
            aggrOutput.append(fd.read())

    with io.open(join(outputDir, "all.csv"), "w", encoding='utf-8') as fd:
        fd.write("\n".join(aggrOutput))
Esempio n. 4
0
def aggregateFeatures(featurePath, featureList, headerStr=None):
    
    outputDir = join(featurePath, "aggr")
    utils.makeDir(outputDir)
    
    fnList = []
    dataList = []
    
    # Find the files that exist in all features
    for feature in featureList:
        fnSubList = utils.findFiles(join(featurePath, feature),
                                    filterExt=".txt")
        fnList.append(fnSubList)
        
    actualFNList = []
    for featureFN in fnList[0]:
        if all([featureFN in subList for subList in fnList]):
            actualFNList.append(featureFN)
    
    for featureFN in actualFNList:
        dataList = []
        for feature in featureList:
            featureDataList = utils.openCSV(join(featurePath, feature),
                                            featureFN, encoding="utf-8")
            dataList.append([",".join(row) for row in featureDataList])
        
        name = os.path.splitext(featureFN)[0]
        
        dataList.insert(0, [name for _ in range(len(dataList[0]))])
        tDataList = utils.safeZip(dataList, enforceLength=True)
        outputList = [",".join(row) for row in tDataList]
        outputTxt = "\n".join(outputList)
        
        outputFN = join(outputDir, name + ".csv")
        with io.open(outputFN, "w", encoding="utf-8") as fd:
            fd.write(outputTxt)
        
    # Cat all files together
    aggrOutput = []
    
    if headerStr is not None:
        aggrOutput.append(headerStr)
    
    for fn in utils.findFiles(outputDir, filterExt=".csv"):
        if fn == "all.csv":
            continue
        with io.open(join(outputDir, fn), "r", encoding='utf-8') as fd:
            aggrOutput.append(fd.read())
    
    with io.open(join(outputDir, "all.csv"), "w", encoding='utf-8') as fd:
        fd.write("\n".join(aggrOutput))
Esempio n. 5
0
def extractTGInfo(inputPath, outputPath, tierName, searchForMothersSpeech):
    '''
    Same as textgrids.extractTGInfo?
    

    '''
    
    utils.makeDir(outputPath)
    
    minDuration = 0.15 # Time in seconds
    
    
    for name in utils.findFiles(inputPath, filterExt=".TextGrid", stripExt=True):
        print name
        
        tg = tgio.openTextGrid(join(inputPath, name+".TextGrid"))
        tier = tg.tierDict[tierName]
        entryList = tier.getEntries()
        
        if searchForMothersSpeech:
            entryList = [(start, stop, label) for start, stop, label in entryList
                         if label == "MS"]
        
        outputList = []
        for start, stop, label in entryList:
            outputList.append( "%f,%f,%s" % (start, stop, label) )
            
        outputTxt = "\n".join(outputList) + "\n"
        codecs.open(join(outputPath, name + ".txt"), "w", encoding="utf-8").write(outputTxt)
Esempio n. 6
0
def toAbsoluteTime(namePrefix, matlabOutputPath, startTimeList):
    '''
    Converts the sampled times from relative to absolute time
    
    The input may be split across a number of files.  This script assumes
    that files of the pattern <<namePrefix>><<nameSuffix>>.txt correspond
    to different parts of the same source file.
    
    namePrefix - name of the original wav file with no suffix
    speechRatePath - the path where the output of the matlab script is placed
    startTimeList - there needs to be one file here for each file in
                    speechRatePath with the pattern namePrefix
    
    Returns a list of lists where each sublist corresponds to the output of
    one file matching <<namePrefix>>
    '''
    # Load subset speech rate
    speechRateFNList = utils.findFiles(matlabOutputPath, filterExt=".txt",
                                       filterPattern=namePrefix)
    
    returnList = []
    for start, speechRateFN in utils.safeZip([startTimeList, speechRateFNList],
                                             enforceLength=True):
        speechRateList = utils.openCSV(matlabOutputPath,
                                       speechRateFN,
                                       valueIndex=0)
        speechRateList = [value for value in speechRateList if value != '']
        speechRateList = [str(float(start) + float(sampNum))
                          for sampNum in speechRateList]

        returnList.append(speechRateList)
    
    return returnList
Esempio n. 7
0
def extractPraatPitch(intensityAndPitchPath,
                      textgridPath,
                      tierName,
                      outputPath,
                      nullLabel=""):

    utils.makeDir(outputPath)

    for fn in utils.findFiles(intensityAndPitchPath, filterExt=".txt"):

        dataList = loadPitchAndTime(intensityAndPitchPath, fn)

        name = os.path.splitext(fn)[0]

        tgFN = join(textgridPath, name + ".TextGrid")
        if not os.path.exists(tgFN):
            continue
        tg = praatio.openTextGrid(tgFN)
        tier = tg.tierDict[tierName]

        pitchData = []
        for valueList, label, _, _ in getValuesForIntervals(
                dataList, tier.entryList):
            f0Values = [f0Val for _, f0Val, _ in valueList]
            label = label.strip()
            if label == "" or label == nullLabel:
                continue
            pitchData.append(
                getPitchMeasures(f0Values, name, label, True, True))

        open(join(outputPath, "%s.txt" % name),
             "w").write("\n".join(pitchData))
Esempio n. 8
0
def toAbsoluteTime(namePrefix, matlabOutputPath, startTimeList):
    '''
    Converts the sampled times from relative to absolute time
    
    The input may be split across a number of files.  This script assumes
    that files of the pattern <<namePrefix>><<nameSuffix>>.txt correspond
    to different parts of the same source file.
    
    namePrefix - name of the original wav file with no suffix
    speechRatePath - the path where the output of the matlab script is placed
    startTimeList - there needs to be one file here for each file in
                    speechRatePath with the pattern namePrefix
    
    Returns a list of lists where each sublist corresponds to the output of
    one file matching <<namePrefix>>
    '''
    # Load subset speech rate
    speechRateFNList = utils.findFiles(matlabOutputPath,
                                       filterExt=".txt",
                                       filterPattern=namePrefix)

    returnList = []
    for start, speechRateFN in utils.safeZip([startTimeList, speechRateFNList],
                                             enforceLength=True):
        speechRateList = utils.openCSV(matlabOutputPath,
                                       speechRateFN,
                                       valueIndex=0)
        speechRateList = [value for value in speechRateList if value != '']
        speechRateList = [
            str(float(start) + float(sampNum)) for sampNum in speechRateList
        ]

        returnList.append(speechRateList)

    return returnList
def syllabifyTextgrids(tgPath, islePath):

    isleDict = isletool.LexicalTool(islePath)

    outputPath = join(tgPath, "syllabifiedTGs")
    utils.makeDir(outputPath)
    skipLabelList = ["<VOCNOISE>", "xx", "<SIL>", "{B_TRANS}", '{E_TRANS}']

    for fn in utils.findFiles(tgPath, filterExt=".TextGrid"):

        if os.path.exists(join(outputPath, fn)):
            continue

        tg = tgio.openTextgrid(join(tgPath, fn))
        
        syllableTG = praattools.syllabifyTextgrid(isleDict, tg, "words",
                                                  "phones",
                                                  skipLabelList=skipLabelList)
        
        outputTG = tgio.Textgrid()
        outputTG.addTier(tg.tierDict["words"])
        outputTG.addTier(tg.tierDict["phones"])
#         outputTG.addTier(syllableTG.tierDict["syllable"])
        outputTG.addTier(syllableTG.tierDict["tonic"])
        
        outputTG.save(join(outputPath, fn))
Esempio n. 10
0
def extractPraatPitchForEpochs(pitchPath, epochPath, tgInfoPath, outputPath):
    
    utils.makeDir(outputPath)
       
    for fn in utils.findFiles(pitchPath, filterExt=".txt"):
        name = os.path.splitext(fn)[0]
        
        print name

        epochList = utils.openCSV(epochPath, fn)
        epochList = [(epochNum, float(start), float(stop)) for epochNum, start, stop in epochList]
        
        entryList = utils.openCSV(tgInfoPath, fn)
        entryList = [(float(start), float(stop), label) for start, stop, label in entryList]
        
        dataList = praat_pi.loadPitchAndTime(pitchPath, fn)
        
        # Get F0 values for the intervals when the mother was speaking
        speechDataList = []
        for start, stop, label in entryList:
            speechDataList.extend(praat_pi.getAllValuesInTime(start, stop, dataList))
        
        # Get F0 values for the times the mother is speaking for each epoch
        pitchData = []
        for epochNum, start, stop in epochList:
            start, stop = float(start), float(stop)
            duration = stop - start
            epochValueList = praat_pi.getAllValuesInTime(start, stop, speechDataList)
            f0List = [f0Val for time, f0Val, intVal in epochValueList]
            
            pitchData.append(praat_pi.extractPitchMeasuresForSegment(f0List, name, epochNum, medianFilterWindowSize=None, filterZeroFlag=True))
        
        open(join(outputPath, "%s.txt" % name), "w").write("\n".join(pitchData) + "\n")
Esempio n. 11
0
def renameTiers(inputPath, outputPath, includeMothersPhones=False):
    
    renameList = [(["Mother", "Mother's Speech", "Mother's speech", "mother's speech", "Mother Speech", "mother speech"], "Mother"),
                  (["Mother's Backchannel", "Mother's backchannel", "mother's backchannel", "child's backchannel"], "Mother's Backchannel"),
                  (["Child", "Child's speech", "Child's Speech", "child's speech", "Child Speech", "child speech"], "Child"),
                  (["Room", "Extraneous room noise", "Extraneous Room Noise", "Extraneous Noise", "Room Noise", "room noise", "Room noise", "extraneous room noise"], "Room"),
                  (["Timer", "Time"], "Timer"),
                  (["Epochs", "epochs",], "Epochs"),
                  ]
    
    if includeMothersPhones:
        renameList.insert(1, (["Mother's phones",], "Mother's Phones"))
    
    utils.makeDir(outputPath)
    
    for fn in utils.findFiles(inputPath, filterExt=".TextGrid"):
       
        print(fn) 
        tg = tgio.openTextGrid(join(inputPath, fn))
        
        for oldNameList, newName in renameList:
            try:
                tg = replaceTierName(tg, oldNameList, newName)
            except ValueError:
                print fn
                raise
        
        tg.save(join(outputPath, fn))
Esempio n. 12
0
def _addSyllableNucleiToTextgrids(wavPath, tgPath, tierName,
                                 syllableNucleiPath, outputPath):
    # Add syllable nuclei to textgrids
    for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True):
        
        tg = tgio.openTextgrid(join(tgPath, name + ".TextGrid"))
        entryList = tg.tierDict[tierName].entryList
        startTimeList = [entry[0] for entry in entryList]
        nucleusSyllableList = uwe_sr.toAbsoluteTime(name, syllableNucleiPath,
                                                    startTimeList)
        flattenedSyllableList = [nuclei for sublist in nucleusSyllableList
                                 for nuclei in sublist]
        wavFN = join(wavPath, name + ".wav")
        duration = audio_scripts.getSoundFileDuration(wavFN)
        
        oom = my_math.orderOfMagnitude(len(flattenedSyllableList))
        labelTemplate = "%%0%dd" % (oom + 1)

        entryList = [(timestamp, labelTemplate % i)
                     for i, timestamp in enumerate(flattenedSyllableList)]
        print(flattenedSyllableList)
        tier = tgio.PointTier("Syllable Nuclei", entryList, 0, duration)
        
        tgFN = join(tgPath, name + ".TextGrid")
        tg = tgio.openTextgrid(tgFN)
        tg.addTier(tier)
        tg.save(join(outputPath, name + ".TextGrid"))
Esempio n. 13
0
def filterTextgrids(tgPath, speechTierName, laughterTierName, minDuration, outputPath):
    '''
    Removes invalid entries from the mother's speech tier
    
    - removes pauses (FP, SP)
    - removes speech (MS) that occurs with insitu laughter (LA)
    - removes ultrashort utterances (uwe's script crashed on an utterance of
                                     length 0.013 seconds)
    '''
    
    utils.makeDir(outputPath)
    
    for fn in utils.findFiles(tgPath, filterExt=".TextGrid"):
        
        tg = tgio.openTextGrid(join(tgPath, fn))

        # Removes all non-speech events (MS)
        newTierEntryList = []
        speechTier = tg.tierDict[speechTierName]
        for entry in speechTier.entryList:
            start, stop, label = entry
            print(entry)
            if insituLaughterCheck(start, stop, tg, laughterTierName):
               newTierEntryList.append(entry)
               
        # Removes all speech events shorter than some threshold
        newTierEntryList = [(start, stop, label) for start, stop, label in newTierEntryList
                            if float(stop) - float(start) > minDuration]
        tg.replaceTier(speechTierName, newTierEntryList)
        tg.save(join(outputPath, fn))
def syllabifyTextgrids(tgPath, islePath):

    isleDict = isletool.LexicalTool(islePath)

    outputPath = join(tgPath, "syllabifiedTGs")
    utils.makeDir(outputPath)
    skipLabelList = ["<VOCNOISE>", "xx", "<SIL>", "{B_TRANS}", '{E_TRANS}']

    for fn in utils.findFiles(tgPath, filterExt=".TextGrid"):

        if os.path.exists(join(outputPath, fn)):
            continue

        tg = tgio.openTextgrid(join(tgPath, fn))

        syllableTG = praattools.syllabifyTextgrid(isleDict,
                                                  tg,
                                                  "words",
                                                  "phones",
                                                  skipLabelList=skipLabelList)

        outputTG = tgio.Textgrid()
        outputTG.addTier(tg.tierDict["words"])
        outputTG.addTier(tg.tierDict["phones"])
        #         outputTG.addTier(syllableTG.tierDict["syllable"])
        outputTG.addTier(syllableTG.tierDict["tonic"])

        outputTG.save(join(outputPath, fn))
Esempio n. 15
0
def removeFilledPauses(inputPath, outputPath):
    
    utils.makeDir(outputPath)
    
    for fn in utils.findFiles(inputPath, filterExt=".txt"):
        dataList = utils.openCSV(inputPath, fn)
        dataList = [[start, stop, label] for start, stop, label in dataList if label == "MS"]
        dataList = [",".join(row) for row in dataList]
        open(join(outputPath, fn), "w").write("\n".join(dataList) + "\n")
Esempio n. 16
0
def adjustEpochNumbers(inputPath, outputPath):
    
    utils.makeDir(outputPath)
    
    for fn in utils.findFiles(inputPath, filterExt=".txt"):
        dataList = utils.openCSV(inputPath, fn)
        dataList = ["%02d,%s,%s" % (int(id)+1,start, stop) 
                    for id, start, stop in dataList]
        
        open(join(outputPath, fn), "w").write("\n".join(dataList) + "\n")
def resampleAudio(newSampleRate, inputPath):

    outputPath = join(inputPath, "resampled_wavs")
    utils.makeDir(outputPath)

    for fn in utils.findFiles(inputPath, filterExt=".wav"):
        soxCmd = "%s %s -r %f %s rate -v 96k" % (
            "/opt/local/bin/sox", join(
                inputPath, fn), newSampleRate, join(outputPath, fn))
        os.system(soxCmd)
def manualPhoneCountForEpochs(manualCountsPath, tgInfoPath, epochPath,
                              outputPath):
    
    utils.makeDir(outputPath)
    
    skipList = utils.findFiles(outputPath, filterExt=".txt")
    for fn in utils.findFiles(tgInfoPath, filterExt=".txt",
                              skipIfNameInList=skipList):
        
        epochList = utils.openCSV(epochPath, fn)
        tgInfo = utils.openCSV(tgInfoPath, fn)
        manualCounts = utils.openCSV(manualCountsPath, fn)
        
        epochOutputList = []
        for epochTuple in epochList:  # Epoch num, start, stop
            epochStart, epochStop = float(epochTuple[1]), float(epochTuple[2])
            
            # Find all of the intervals that are at least partially
            # contained within the current epoch
            epochSyllableCount = 0
            epochPhoneCount = 0
            speechDuration = 0
            for info, counts in utils.safeZip([tgInfo, manualCounts],
                                              enforceLength=True):
                start, stop = float(info[0]), float(info[1])
                syllableCount, phoneCount = float(counts[0]), float(counts[1])
            
                # Accounts for intervals that straddle an epoch boundary
                multiplicationFactor = percentInside(start, stop,
                                                     epochStart, epochStop)
                
                speechDuration += (stop - start) * multiplicationFactor
                
                epochSyllableCount += syllableCount * multiplicationFactor
                epochPhoneCount += phoneCount * multiplicationFactor
            
            epochOutputList.append("%f,%f,%f" % (epochSyllableCount,
                                                 epochPhoneCount,
                                                 speechDuration))
        
        with open(join(outputPath, fn), "w") as fd:
            fd.write("\n".join(epochOutputList))
Esempio n. 19
0
def replaceAllLabelsInMotherTierWithMS(inputPath, outputPath):
    
    utils.makeDir(outputPath)
    
    speechTierName = "Mother"
    
    for fn in utils.findFiles(inputPath, filterExt=".TextGrid"):
        
        tg = tgio.openTextGrid(join(inputPath, fn))
        tg.replaceTier(speechTierName, [[start, stop, "MS"] for start, stop, label in tg.tierDict[speechTierName].entryList])
        tg.save(join(outputPath, fn))
Esempio n. 20
0
def generateEpochRowHeader(epochPath, outputPath, sessionCode):
    
    utils.makeDir(outputPath)
    
    for fn in utils.findFiles(epochPath, filterExt=".txt"):
        epochList = utils.openCSV(epochPath, fn)
        
        id = fn.split("_")[2]
        
        outputList = [",".join([id, sessionCode, epoch, epochStart, epochEnd, str(float(epochEnd) - float(epochStart))]) for epoch, epochStart, epochEnd in epochList]
        
        open(join(outputPath, fn), "w").write("\n".join(outputList) + "\n")
Esempio n. 21
0
def analyzeInsituLaughter(inputPath, outputPath):
    
    outputList = []
    for fn in utils.findFiles(inputPath, filterExt=".TextGrid"):
        
        tg = tgio.openTextGrid(join(inputPath, fn))
        tier = tg.tierDict["Mother"]
        for start, stop, label in tier.getEntries():
            isInsitu = insituLaughterCheck(start, stop, tg, "Mother's Backchannel")
            if isInsitu:
                outputList.append("%s,%02.02f,%02.02f,%s" % (fn, start, stop, label))
                
    open(join(outputPath, "insitu_laughter_events.csv"), "w").write("\n".join(outputList) + "\n")
Esempio n. 22
0
def _calculateSyllablesPerSecond(wavPath, syllableNucleiPath):
        
    for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True):
        nucleusSyllableList = uwe_sr.toAbsoluteTime(name, syllableNucleiPath,
                                                    [0, ])
        nucleusSyllableList = [nucleus for subList in nucleusSyllableList
                               for nucleus in subList]
        numSyllables = len(nucleusSyllableList)
        wavFN = join(wavPath, name + ".wav")
        duration = audio_scripts.getSoundFileDuration(wavFN)
        
        print("%s - %.02f syllables/second" %
              (name, numSyllables / float(duration)))
Esempio n. 23
0
def _runSpeechRateEstimateOnIntervals(wavPath, tgPath, tierName, wavTmpPath,
                                      syllableNucleiPath, matlabEXE,
                                      matlabScriptsPath, printCmd=True,
                                      outputTGFlag=False):
    
    utils.makeDir(wavTmpPath)
    # Split audio files into subsections based on textgrid intervals
    for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True):
        praatio_scripts.splitAudioOnTier(join(wavPath, name + ".wav"),
                                         join(tgPath, name + ".TextGrid"),
                                         tierName, wavTmpPath, outputTGFlag)
        
    uwe_sr.findSyllableNuclei(wavTmpPath, syllableNucleiPath, matlabEXE,
                              matlabScriptsPath, printCmd)
Esempio n. 24
0
def toWords(featurePath, outputPath):
    
    utils.makeDir(outputPath)

    transcriptPath = join(featurePath, "txt")

    for fn in utils.findFiles(transcriptPath, filterExt=".txt"):
        fnFullPath = join(transcriptPath, fn)
        with io.open(fnFullPath, "r", encoding="utf-8") as fd:
            data = fd.read()
        dataList = data.split()
        
        with io.open(join(outputPath, fn), "w", encoding="utf-8") as fd:
            fd.write("\n".join(dataList))
Esempio n. 25
0
def generateEpochFiles(tgPath, wavPath, epPath):
    utils.makeDir(epPath)
    try:
        for filename in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True):
            tgrid = tgio.openTextGrid(os.path.join(tgPath, filename+".TextGrid"))
            with open(os.path.join(epPath, filename+".txt"), "w") as epochFile:
                for (start,stop,label) in tgrid.tierDict["Epochs"].entryList:
                    epochFile.write(str(label)+','+str(start)+','+str(stop)+'\n')

    except:
        epDuration = int(raw_input("\nOk, the textgrids don't have an 'Epochs' tier.  How long are the epochs in this dataset?\nEnter the epoch duration in seconds: "))
        print("\nOk. Epochs are each %dsecs max.\n" % epDuration)    
#def generatePlayEpochs(path, outputPath):
    
        durationList = []
        for fn in utils.findFiles(wavPath, filterExt=".wav"):
            duration = audio_scripts.getSoundFileDuration(join(wavPath, fn))
            durationList.append( (fn, int(duration)) )
        
        durationList.sort()
        
        for fn, duration in durationList:
#            if '045' in fn:
#                print 'hello'
            outputFN = os.path.splitext(fn)[0] + ".txt"
            
            numEpoches = int(duration / epDuration)
            epochList = [(i, i*epDuration,(i+1)*epDuration) for i in xrange((numEpoches))]
            if duration % epDuration != 0:
                startTime = (numEpoches)*epDuration
                epochList.append( (numEpoches+1, startTime, startTime+(duration%epDuration) ) )
                
            epochList = ["%02d, %02d, %02d" % row for row in epochList]
            
            with open(join(epPath, outputFN), "w") as epochFN:
                epochFN.write("\n".join(epochList) + "\n")
Esempio n. 26
0
def addEpochsToTextgrids(tgPath, epochPath, outputPath):
    
    utils.makeDir(outputPath)
    
    for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True):
        print name
        tg = tgio.openTextGrid(join(tgPath, name+".TextGrid"))

        entryList = utils.openCSV(epochPath, name+".txt")
        entryList = [(float(start), float(end), label) for label, start, end in entryList]
        
        tier = tgio.IntervalTier("epochs", entryList, minT=0, maxT=tg.maxTimestamp)
        
        tg.addTier(tier)
        tg.save(join(outputPath, name+".TextGrid"))
Esempio n. 27
0
def extractWords(tgPath, tierName, outputPath):
    
    utils.makeDir(outputPath)
    
    for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True):
        outputList = []
        for entry in _navigateTGs(tgPath, name, tierName):
            label = entry[2]
            for word in label.split():
                outputList.append("%s" % (word))
        
        outputTxt = "\n".join(outputList)
        outputFN = join(outputPath, name + ".txt")
        with io.open(outputFN, "w", encoding="utf-8") as fd:
            fd.write(outputTxt)
Esempio n. 28
0
def extractRMSIntensity(intensityAndPitchPath,
                        textgridPath,
                        tierName,
                        outputPath,
                        nullLabel=""):

    utils.makeDir(outputPath)

    for fn in utils.findFiles(intensityAndPitchPath, filterExt=".txt"):

        dataList = loadPitchAndTime(intensityAndPitchPath, fn)

        name = os.path.splitext(fn)[0]

        tgFN = join(textgridPath, name + ".TextGrid")
        if not os.path.exists(tgFN):
            continue
        tg = praatio.openTextGrid(join(textgridPath, name + ".TextGrid"))
        tier = tg.tierDict[tierName]

        print(fn)

        rmsIntensityList = []
        for valueList, label, _, _ in getValuesForIntervals(
                dataList, tier.entryList):
            intensityVals = [intensityVal for _, _, intensityVal in valueList]

            intensityVals = [
                intensityVal for intensityVal in intensityVals
                if intensityVal != 0.0
            ]

            label = label.strip()
            if label == "" or label == nullLabel:
                continue

            rmsIntensity = 0
            if len(intensityVals) != 0:
                rmsIntensity = my_math.rms(intensityVals)

            rmsIntensityList.append(str(rmsIntensity))

        open(join(outputPath, "%s.txt" % name),
             "w").write("\n".join(rmsIntensityList))
Esempio n. 29
0
def findFrequenciesForWordLists(featurePath, countObj, frequencyNormFunc):
    
    frequencyPath = join(featurePath, "frequency")
    utils.makeDir(frequencyPath)
    
    wordsPath = join(featurePath, "words")

    for fn in utils.findFiles(wordsPath):
        wordList = utils.openCSV(wordsPath, fn, valueIndex=0, encoding="utf-8")
        countList = []
        for word in wordList:
            tmp = countObj.getFrequency(word,
                                        frequencyNormFunc,
                                        outOfDictionaryValue=1)
            count, freq, logFreq = tmp
            countList.append("%f,%f,%f" % (count, freq, logFreq))
            
        with open(join(frequencyPath, fn), "w") as fd:
            fd.write("\n".join(countList))
Esempio n. 30
0
def findFrequenciesForWordLists(featurePath, countObj, frequencyNormFunc):
    
    frequencyPath = join(featurePath, "frequency")
    utils.makeDir(frequencyPath)
    
    wordsPath = join(featurePath, "words")

    for fn in utils.findFiles(wordsPath):
        wordList = utils.openCSV(wordsPath, fn, valueIndex=0, encoding="utf-8")
        countList = []
        for word in wordList:
            tmp = countObj.getFrequency(word,
                                        frequencyNormFunc,
                                        outOfDictionaryValue=1)
            count, freq, logFreq = tmp
            countList.append("%f,%f,%f" % (count, freq, logFreq))
            
        with open(join(frequencyPath, fn), "w") as fd:
            fd.write("\n".join(countList))
Esempio n. 31
0
def extractTGInfo(inputPath, outputPath, tierName):
    
    utils.makeDir(outputPath)
    
    for name in utils.findFiles(inputPath, filterExt=".TextGrid",
                                stripExt=True):

        if os.path.exists(join(outputPath, name + ".txt")):
            continue
        print(name)
    
        outputList = []
        for start, stop, label in _navigateTGs(inputPath, name, tierName):
            outputList.append("%f,%f,%s" % (start, stop, label))
            
        outputTxt = "\n".join(outputList)
        outputFN = join(outputPath, name + ".txt")
        with io.open(outputFN, "w", encoding="utf-8") as fd:
            fd.write(outputTxt)
Esempio n. 32
0
def _calculateSyllablesPerSecondForIntervals(wavPath, tgPath, tierName,
                                             syllableNucleiPath):
    # Add syllable nuclei to textgrids
    for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True):

        tg = tgio.openTextGrid(join(tgPath, name + ".TextGrid"))
        entryList = tg.tierDict[tierName].entryList
        startTimeList = [entry[0] for entry in entryList]
        nucleusSyllableList = uwe_sr.toAbsoluteTime(name, syllableNucleiPath,
                                                    startTimeList)

        durationList = []
        for intervalList, entry in utils.safeZip(
            [nucleusSyllableList, entryList], enforceLength=True):
            start, stop = entry[0], entry[1]
            duration = len(intervalList) / (stop - start)
            durationList.append(str(duration))

        print("%s - %s (syllables/second for each interval)" %
              (name, ",".join(durationList)))
Esempio n. 33
0
def isolateMotherSpeech(path, filterGrid, outputPath):
    '''
    Removes mother speech when the child is also speaking
    '''
    
    utils.makeDir(outputPath)
    
    for fn in utils.findFiles(path, filterExt=".TextGrid"):
        
        tg = tgio.openTextGrid(join(path, fn))
        motherTier = tg.tierDict["Mother"]
        
        newEntryList = []
        for start, stop, label in motherTier.entryList:
            croppedTG = tg.crop(False, False, start, stop)
            entryList = croppedTG.tierDict[filterGrid].entryList
            
            resultList = [(start, stop, label),]
            
            for subStart, subStop, subLabel in entryList:
                
                i = 0
                while i < len(resultList):
                    tmpStart = resultList[i][0]
                    tmpEnd = resultList[i][1]
                    tmpResultList = subtractOverlap(tmpStart,
                                                    tmpEnd,
                                                    label,
                                                    subStart,
                                                    subStop)
                     # Replace if there has been a change
                    if tmpResultList != [[tmpStart, tmpEnd, label],]:
                        resultList = resultList[:i] + tmpResultList
                        i += len(tmpResultList) - 1
                    i += 1

            newEntryList.extend(resultList)

        newMotherTier = tgio.IntervalTier("Mother", newEntryList)
        tg.replaceTier("Mother", newMotherTier.entryList)
        tg.save(join(outputPath, fn))
Esempio n. 34
0
def extractTranscript(featurePath, tierName):
    '''
    Outputs each label of a textgrid on a separate line in a plain text file
    '''
    
    tgPath = join(featurePath, "textgrids")
    
    outputPath = join(featurePath, "transcript")
    utils.makeDir(outputPath)
    
    for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True):
        
        outputList = []
        for entry in _navigateTGs(tgPath, name, tierName):
            label = entry[2]
            outputList.append("%s" % (label))
        
        outputTxt = "\n".join(outputList)
        outputFN = join(outputPath, name + ".txt")
        with io.open(outputFN, "w", encoding="utf-8") as fd:
            fd.write(outputTxt)
Esempio n. 35
0
def extractMotherSpeech(wavPath, textgridPath, mothersSpeechName,
                        outputWavPath, outputTextgridPath):
    
    utils.makeDir(outputWavPath)
    utils.makeDir(outputTextgridPath)
    
    for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True,):
        print name
        tg = tgio.openTextGrid(join(textgridPath, name+".TextGrid"))
        speechTier = tg.tierDict[mothersSpeechName]
        for i, entry in enumerate(speechTier.entryList):
            subName = "%s_%03d" % (name, i)
            start, stop, label = entry
            start, stop = float(start), float(stop)
            audio_scripts.extractSubwav(join(wavPath, name+".wav"), 
                                        join(outputWavPath, subName+".wav" ),
                                             start, stop, 
                                             singleChannelFlag=True)
            subTG = tg.crop(strictFlag=False, softFlag=False, 
                            startTime=start, endTime=stop)
            subTG.save(join(outputTextgridPath, subName+".TextGrid"))
Esempio n. 36
0
def correctTextgridTimes(tgPath, threshold):
    
    # Are x and y unique but very very similar
    withinThreshold = lambda x, y: (abs(x - y) < threshold) and (x != y)
    
    outputPath = join(tgPath, "correctsTGs")
    utils.makeDir(outputPath)
    
    for fn in utils.findFiles(tgPath, filterExt=".TextGrid"):
        print(fn)
        tg = tgio.openTextgrid(join(tgPath, fn))
        wordTier = tg.tierDict["words"]
        phoneTier = tg.tierDict["phones"]
        
        for wordEntry in wordTier.entryList:
            
            for i, phoneEntry in enumerate(phoneTier.entryList):
                
                if tgio.intervalOverlapCheck(wordEntry, phoneEntry):
                    
                    start = phoneEntry[0]
                    end = phoneEntry[1]
                    phone = phoneEntry[2]
                    
                    if withinThreshold(wordEntry[0], start):
                        start = wordEntry[0]
                    elif withinThreshold(wordEntry[1], start):
                        start = wordEntry[1]
                    elif withinThreshold(wordEntry[0], end):
                        end = wordEntry[0]
                    elif withinThreshold(wordEntry[1], end):
                        end = wordEntry[1]
                    
                    phoneTier.entryList[i] = (start, end, phone)
        
        tg.save(join(outputPath, fn))