Esempio n. 1
0
def toAbsoluteTime(namePrefix, matlabOutputPath, startTimeList):
    '''
    Converts the sampled times from relative to absolute time
    
    The input may be split across a number of files.  This script assumes
    that files of the pattern <<namePrefix>><<nameSuffix>>.txt correspond
    to different parts of the same source file.
    
    namePrefix - name of the original wav file with no suffix
    speechRatePath - the path where the output of the matlab script is placed
    startTimeList - there needs to be one file here for each file in
                    speechRatePath with the pattern namePrefix
    
    Returns a list of lists where each sublist corresponds to the output of
    one file matching <<namePrefix>>
    '''
    # Load subset speech rate
    speechRateFNList = utils.findFiles(matlabOutputPath,
                                       filterExt=".txt",
                                       filterPattern=namePrefix)

    returnList = []
    for start, speechRateFN in utils.safeZip([startTimeList, speechRateFNList],
                                             enforceLength=True):
        speechRateList = utils.openCSV(matlabOutputPath,
                                       speechRateFN,
                                       valueIndex=0)
        speechRateList = [value for value in speechRateList if value != '']
        speechRateList = [
            str(float(start) + float(sampNum)) for sampNum in speechRateList
        ]

        returnList.append(speechRateList)

    return returnList
Esempio n. 2
0
def toAbsoluteTime(namePrefix, matlabOutputPath, startTimeList):
    '''
    Converts the sampled times from relative to absolute time
    
    The input may be split across a number of files.  This script assumes
    that files of the pattern <<namePrefix>><<nameSuffix>>.txt correspond
    to different parts of the same source file.
    
    namePrefix - name of the original wav file with no suffix
    speechRatePath - the path where the output of the matlab script is placed
    startTimeList - there needs to be one file here for each file in
                    speechRatePath with the pattern namePrefix
    
    Returns a list of lists where each sublist corresponds to the output of
    one file matching <<namePrefix>>
    '''
    # Load subset speech rate
    speechRateFNList = utils.findFiles(matlabOutputPath, filterExt=".txt",
                                       filterPattern=namePrefix)
    
    returnList = []
    for start, speechRateFN in utils.safeZip([startTimeList, speechRateFNList],
                                             enforceLength=True):
        speechRateList = utils.openCSV(matlabOutputPath,
                                       speechRateFN,
                                       valueIndex=0)
        speechRateList = [value for value in speechRateList if value != '']
        speechRateList = [str(float(start) + float(sampNum))
                          for sampNum in speechRateList]

        returnList.append(speechRateList)
    
    return returnList
Esempio n. 3
0
def aggregateSpeechRate(tgInfoPath, speechRatePath, outputPath, samplingRate):
    
    utils.makeDir(outputPath)
    
    finishedList = utils.findFiles(outputPath, filterExt=".txt")
    
    for fn in utils.findFiles(tgInfoPath, filterExt=".txt",
                              skipIfNameInList=finishedList):
        
        # Load subset speech rate
        name = os.path.splitext(fn)[0]
        speechRateFNList = utils.findFiles(speechRatePath, filterExt=".txt",
                                           filterPattern=name)
        
        subSplitList = utils.openCSV(tgInfoPath, fn)
    
        # Convert the sample numbers to seconds
        # They are in terms of the beginning of the subset they are in but
        # need to be in terms of the start of the file the larger file the
        # subset originated from
        outputList = []
        for splitInfo, speechRateFN in utils.safeZip([subSplitList,
                                                      speechRateFNList],
                                                     enforceLength=True):
            start, stop, label = splitInfo
            
            speechRateList = utils.openCSV(speechRatePath, speechRateFN, valueIndex=0)
            speechRateList = [value for value in speechRateList if value != '']
            speechRateList = [str(float(start) + float(sampleNum) / float(samplingRate)) for sampleNum in speechRateList]
            
            outputList.append( ",".join(speechRateList) )
    
        open(join(outputPath, fn), "w").write("\n".join(outputList) + "\n")
def aggregateFeatures(featurePath, featureList, headerStr=None):

    outputDir = join(featurePath, "aggr")
    utils.makeDir(outputDir)

    fnList = []
    dataList = []

    # Find the files that exist in all features
    for feature in featureList:
        fnSubList = utils.findFiles(join(featurePath, feature),
                                    filterExt=".txt")
        fnList.append(fnSubList)

    actualFNList = []
    for featureFN in fnList[0]:
        if all([featureFN in subList for subList in fnList]):
            actualFNList.append(featureFN)

    for featureFN in actualFNList:
        dataList = []
        for feature in featureList:
            featureDataList = utils.openCSV(join(featurePath, feature),
                                            featureFN,
                                            encoding="utf-8")
            dataList.append([",".join(row) for row in featureDataList])

        name = os.path.splitext(featureFN)[0]

        dataList.insert(0, [name for _ in range(len(dataList[0]))])
        tDataList = utils.safeZip(dataList, enforceLength=True)
        outputList = [",".join(row) for row in tDataList]
        outputTxt = "\n".join(outputList)

        outputFN = join(outputDir, name + ".csv")
        with io.open(outputFN, "w", encoding="utf-8") as fd:
            fd.write(outputTxt)

    # Cat all files together
    aggrOutput = []

    if headerStr is not None:
        aggrOutput.append(headerStr)

    for fn in utils.findFiles(outputDir, filterExt=".csv"):
        if fn == "all.csv":
            continue
        with io.open(join(outputDir, fn), "r", encoding='utf-8') as fd:
            aggrOutput.append(fd.read())

    with io.open(join(outputDir, "all.csv"), "w", encoding='utf-8') as fd:
        fd.write("\n".join(aggrOutput))
Esempio n. 5
0
def aggregateFeatures(featurePath, featureList, headerStr=None):
    
    outputDir = join(featurePath, "aggr")
    utils.makeDir(outputDir)
    
    fnList = []
    dataList = []
    
    # Find the files that exist in all features
    for feature in featureList:
        fnSubList = utils.findFiles(join(featurePath, feature),
                                    filterExt=".txt")
        fnList.append(fnSubList)
        
    actualFNList = []
    for featureFN in fnList[0]:
        if all([featureFN in subList for subList in fnList]):
            actualFNList.append(featureFN)
    
    for featureFN in actualFNList:
        dataList = []
        for feature in featureList:
            featureDataList = utils.openCSV(join(featurePath, feature),
                                            featureFN, encoding="utf-8")
            dataList.append([",".join(row) for row in featureDataList])
        
        name = os.path.splitext(featureFN)[0]
        
        dataList.insert(0, [name for _ in range(len(dataList[0]))])
        tDataList = utils.safeZip(dataList, enforceLength=True)
        outputList = [",".join(row) for row in tDataList]
        outputTxt = "\n".join(outputList)
        
        outputFN = join(outputDir, name + ".csv")
        with io.open(outputFN, "w", encoding="utf-8") as fd:
            fd.write(outputTxt)
        
    # Cat all files together
    aggrOutput = []
    
    if headerStr is not None:
        aggrOutput.append(headerStr)
    
    for fn in utils.findFiles(outputDir, filterExt=".csv"):
        if fn == "all.csv":
            continue
        with io.open(join(outputDir, fn), "r", encoding='utf-8') as fd:
            aggrOutput.append(fd.read())
    
    with io.open(join(outputDir, "all.csv"), "w", encoding='utf-8') as fd:
        fd.write("\n".join(aggrOutput))
def manualPhoneCountForEpochs(manualCountsPath, tgInfoPath, epochPath,
                              outputPath):
    
    utils.makeDir(outputPath)
    
    skipList = utils.findFiles(outputPath, filterExt=".txt")
    for fn in utils.findFiles(tgInfoPath, filterExt=".txt",
                              skipIfNameInList=skipList):
        
        epochList = utils.openCSV(epochPath, fn)
        tgInfo = utils.openCSV(tgInfoPath, fn)
        manualCounts = utils.openCSV(manualCountsPath, fn)
        
        epochOutputList = []
        for epochTuple in epochList:  # Epoch num, start, stop
            epochStart, epochStop = float(epochTuple[1]), float(epochTuple[2])
            
            # Find all of the intervals that are at least partially
            # contained within the current epoch
            epochSyllableCount = 0
            epochPhoneCount = 0
            speechDuration = 0
            for info, counts in utils.safeZip([tgInfo, manualCounts],
                                              enforceLength=True):
                start, stop = float(info[0]), float(info[1])
                syllableCount, phoneCount = float(counts[0]), float(counts[1])
            
                # Accounts for intervals that straddle an epoch boundary
                multiplicationFactor = percentInside(start, stop,
                                                     epochStart, epochStop)
                
                speechDuration += (stop - start) * multiplicationFactor
                
                epochSyllableCount += syllableCount * multiplicationFactor
                epochPhoneCount += phoneCount * multiplicationFactor
            
            epochOutputList.append("%f,%f,%f" % (epochSyllableCount,
                                                 epochPhoneCount,
                                                 speechDuration))
        
        with open(join(outputPath, fn), "w") as fd:
            fd.write("\n".join(epochOutputList))
def _calculateSyllablesPerSecondForIntervals(wavPath, tgPath, tierName,
                                             syllableNucleiPath):
    # Add syllable nuclei to textgrids
    for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True):

        tg = tgio.openTextGrid(join(tgPath, name + ".TextGrid"))
        entryList = tg.tierDict[tierName].entryList
        startTimeList = [entry[0] for entry in entryList]
        nucleusSyllableList = uwe_sr.toAbsoluteTime(name, syllableNucleiPath,
                                                    startTimeList)

        durationList = []
        for intervalList, entry in utils.safeZip(
            [nucleusSyllableList, entryList], enforceLength=True):
            start, stop = entry[0], entry[1]
            duration = len(intervalList) / (stop - start)
            durationList.append(str(duration))

        print("%s - %s (syllables/second for each interval)" %
              (name, ",".join(durationList)))
Esempio n. 8
0
def analyzeLaughter(textgridPath, outputPath):
    
    utils.makeDir(outputPath)
    
    speechTierName = "Mother"
    laughterTierName = "Mother's Backchannel"
    
    speechCode = "MS"
    laughterCode = "LA"
    pauseCode = "FP"
    
    # How much did each event occur?
    allCodeSummaryList = []
    for tierName, code, outputName in [[speechTierName, speechCode, "speech_occurances"],
                                       [laughterTierName, laughterCode, "laughter_occurances"],
                                       [speechTierName, pauseCode, "pause_code"],
                                       ]:
        entryList = []
        summaryList = []
        for fn in utils.findFiles(textgridPath, filterExt=".TextGrid"):
            tg = tgio.openTextGrid(join(textgridPath, fn))
            tier = tg.tierDict[tierName]
            
            matchEntryList = tier.find(code)
            durationList = [float(stop)-float(start) for start, stop, label in matchEntryList]
            matchEntryList = [[fn,str(start),str(stop),label]for start, stop, label in matchEntryList] 
            
            entryList.extend(matchEntryList)
            summaryList.append( (fn, str(sum(durationList))) )
        
        entryList = [",".join(row) for row in entryList]
        open(join(outputPath, outputName+".csv"), "w").write("\n".join(entryList))

        allCodeSummaryList.append(summaryList)
    
    outputList = ["Filename,Speech,Laughter,Pause",]
    for speech, laugh, pause in utils.safeZip(allCodeSummaryList, enforceLength=True):
        outputList.append(",".join([speech[0], speech[1], laugh[1], pause[1]]))
        
    open(join(outputPath, "event_cumulative_lengths.csv"), "w").write("\n".join(outputList) + "\n")
Esempio n. 9
0
def uwePhoneCountForEpochs(epochPath, tgInfoPath, manualCountsPath, outputPath):
    
    utils.makeDir(outputPath)
    
    for fn in utils.findFiles(tgInfoPath, filterExt=".txt"):
        print fn
        epochList = utils.openCSV(epochPath, fn)
        tgInfo = utils.openCSV(tgInfoPath, fn)
        manualCounts = utils.openCSV(manualCountsPath, fn)
        
        epochOutputList = []
        for epochNumber, epochStart, epochStop in epochList:
            epochStart, epochStop = float(epochStart), float(epochStop)
            
            # Find all of the intervals that are at least partially contained within
            # the current epoch
            epochSyllableCount = 0
            unadjustedEpochSyllableCount = 0
            epochArticulationRate = 0
            epochAverageSyllableDuration = 0
            for info, nucleusList in utils.safeZip([tgInfo, manualCounts],
                                                   enforceLength=True):
                start, stop, wordList = info
                start, stop = float(start), float(stop)
                
                syllableCount = len(nucleusList)
                unadjustedEpochSyllableCount += syllableCount
                # Accounts for intervals that straddle an epoch boundary
                multiplicationFactor = _percentInside(start, stop, epochStart,
                                                      epochStop)
                
                epochSyllableCount += syllableCount * multiplicationFactor
            
#             epochOutputList.append("%f,%f" % (unadjustedEpochSyllableCount,epochSyllableCount))
            epochOutputList.append("%f" % (epochSyllableCount))
                    
        open(join(outputPath, fn), "w").write("\n".join(epochOutputList) + "\n")