Esempio n. 1
0
def testUnBiasing():
    realTimeDirectory = "../media/Jamison_Evaluations/Real_Time_Evaluation/Audio/"

    for filePath in sorted(glob.iglob(realTimeDirectory + "*.wav")):
        name = os.path.basename(filePath)

        print(name)

        # Read in the file
        audio = audioModule.Audio(filePath)

        plotAudio(audio=audio, name=name, samples=80000)

        audio.unBias()

        plotAudio(audio=audio, name=name, samples=80000)

        audio = audioModule.Audio(filePath)
        if audio.numberOfChannels > 1:
            audio.makeMono()

        plotAudio(audio=audio, name=name, samples=80000)

        audio.unBias()

        plotAudio(audio=audio, name=name, samples=80000)
Esempio n. 2
0
def showVoiceActivityForParticipantAudio():

    audioDirectory = "../media/Participant_Audio/*.wav"

    speechAnalyzer = speechAnalysis.SpeechAnalyzer()

    for filePath in sorted(glob.iglob(audioDirectory)):
        name = os.path.basename(filePath)[:-4]

        audio = audioModule.Audio(filePath=filePath)
        if audio.numberOfChannels != 1:
            audio.makeMono()

        voiceActivity = speechAnalyzer.getVoiceActivityFromAudio(audio)
        voiceActivity[voiceActivity == 0] = np.nan

        times = np.arange(0,
                          len(audio.data) / audio.sampleRate,
                          speechAnalyzer.featureStepSize / 1000)
        signalTimes = np.arange(0,
                                len(audio.data) / audio.sampleRate,
                                1 / audio.sampleRate)

        plt.figure(figsize=[16, 8])
        plt.plot(signalTimes, audio.data, times, voiceActivity)
        plt.title(name)
        plt.show()
Esempio n. 3
0
def graphSantaBarbara():
    audioDirectory = "../media/SBCSAE/audio/*.wav"
    speechAnalyzer = speechAnalysis.SpeechAnalyzer()

    for filePath in sorted(glob.iglob(audioDirectory)):
        fileName = os.path.basename(filePath)[:-4]

        print(fileName)

        audio = audioModule.Audio(filePath=filePath)
        if audio.numberOfChannels != 1:
            audio.makeMono()

        intensity = speechAnalyzer.getEnergyFromAudio(audio)

        filledPauses, timeStamps = speechAnalyzer.getFilledPausesFromAudio(
            audio)

        print(convertArrayToTimeStamps(timeStamps))

        filledPausesMarkers = np.full(len(timeStamps), 0)
        energyTimes = np.array(range(
            0, len(intensity))) / (1000 / speechAnalyzer.featureStepSize)

        plt.figure(figsize=(20, 10))
        plt.plot(timeStamps, filledPausesMarkers, '^')
        plt.plot(energyTimes, intensity)
        plt.savefig("../media/SBCSAE/graphs/" + fileName + ".png")
        plt.close()
def validateWithSVCCorpus():
    speechAnalyzer = speechAnalysis.SpeechAnalyzer()

    corpusPath = "../media/vocalizationcorpus"
    labelsPath = corpusPath + "/labels.txt"

    transcript = []

    totalNumberOfFilledPauses = 0
    totalNumberOfCorrectlyDetectedPauses = 0
    totalNumberOfFalseAlarms = 0

    with open(labelsPath) as transcriptFile:
        lines = transcriptFile.readlines()
        for row in lines:
            transcript.append(row.strip().split(','))

    # Remove header line
    transcript.pop(0)

    for row in transcript:
        fileName = row[0]

        utterances = row[4:]

        # print(fileName, utterances)

        utterances = np.array(utterances)
        utterances = utterances.reshape((int(utterances.shape[0] / 3)), 3)

        if 'filler' in utterances:
            filePath = corpusPath + "/data/" + fileName + ".wav"

            audio = audioModule.Audio(filePath=filePath)
            if audio.numberOfChannels != 1:
                audio.makeMono()

            filledPauses, timeStamps = speechAnalyzer.getFilledPausesFromAudio(
                audio)

            for utterance in utterances:
                if utterance[0] == "filler":
                    totalNumberOfFilledPauses += 1

            for filledPauseDetectedTime in timeStamps:
                correctDetection = False
                for utterance in utterances:
                    if utterance[0] == "filler" and abs(
                            float(utterance[1]) -
                            filledPauseDetectedTime) < 0.5:
                        correctDetection = True

                if correctDetection:
                    totalNumberOfCorrectlyDetectedPauses += 1
                else:
                    totalNumberOfFalseAlarms += 1

            print(fileName, totalNumberOfFilledPauses,
                  totalNumberOfCorrectlyDetectedPauses,
                  totalNumberOfFalseAlarms)
Esempio n. 5
0
def compareLibrosaAndRMS():
    filePath = "../media/Participant_Audio/p10_ol.wav"
    name = os.path.basename(filePath)[:-4]

    stepSize = 10  # In milliseconds
    windowSize = 10

    audio = audioModule.Audio(filePath=filePath)
    if audio.numberOfChannels != 1:
        audio.makeMono()

    librosaRMS = featureModule.getEnergy(data=audio.data,
                                         sampleRate=audio.sampleRate,
                                         windowSize=windowSize,
                                         stepSize=stepSize)

    rms = featureModule.getRMSIntensity(data=audio.data,
                                        sampleRate=audio.sampleRate,
                                        windowSize=windowSize,
                                        stepSize=stepSize)

    times = np.arange(0, len(audio.data) / audio.sampleRate, stepSize / 1000)

    plt.figure(figsize=[16, 8])
    plt.plot(times, librosaRMS)
    plt.plot(times, rms)
    plt.title(name)
    plt.show()
Esempio n. 6
0
def compareAlgorithmToParticipants():
    audioDirectory = "../media/Participant_Audio_30_Sec_Chunks/*.wav"
    speechAnalyzer = speechAnalysis.SpeechAnalyzer()
    printParameters(speechAnalyzer)

    transcript = []

    totalNumberOfFilledPauses = 0
    totalNumberOfCorrectlyDetectedPauses = 0
    totalNumberOfFalseAlarms = 0

    with open(
            "../media/Participant_Audio_30_Sec_Chunks_Transcripts/filled_pauses.txt"
    ) as transcriptFile:
        lines = transcriptFile.readlines()
        for row in lines:
            transcript.append(row.strip().split(', '))

    for line in transcript:
        name = line[0]
        actualFilledPausesCount = int(line[1])

        for filePath in sorted(glob.iglob(audioDirectory)):
            fileName = os.path.basename(filePath)[:-4]

            if fileName == name:
                audio = audioModule.Audio(filePath=filePath)
                if audio.numberOfChannels != 1:
                    audio.makeMono()

                filledPauses, timeStamps = speechAnalyzer.getFilledPausesFromAudio(
                    audio)

                filledPausesMarkers = np.full(len(timeStamps), 0)
                filledPausesCount = len(timeStamps)

                print(name, actualFilledPausesCount, filledPausesCount)

                totalNumberOfFilledPauses += actualFilledPausesCount

                if filledPausesCount > actualFilledPausesCount:
                    totalNumberOfFalseAlarms += filledPausesCount - actualFilledPausesCount
                    totalNumberOfCorrectlyDetectedPauses += actualFilledPausesCount
                else:
                    totalNumberOfCorrectlyDetectedPauses += filledPausesCount

    precision = totalNumberOfCorrectlyDetectedPauses / (
        totalNumberOfCorrectlyDetectedPauses + totalNumberOfFalseAlarms)
    recall = totalNumberOfCorrectlyDetectedPauses / totalNumberOfFilledPauses

    f1 = 2 * precision * recall / (precision + recall)

    print("    Total     | Filled pauses:", totalNumberOfFilledPauses)
    print("     New      | Correct filled pauses:",
          totalNumberOfCorrectlyDetectedPauses, "False alarms:",
          totalNumberOfFalseAlarms, "Precision:", precision, "Recall:", recall,
          "F1", f1)
def validateWithCCHP():
    corpusTopLevelPath = "../media/cchp_english/"
    speechAnalyzer = speechAnalysis.SpeechAnalyzer()

    # Iterate through sub directories with participants.
    for participantPath in sorted(glob.iglob(corpusTopLevelPath + '*/')):

        totalNumberOfFilledPauses = 0
        totalNumberOfCorrectlyDetectedPauses = 0
        totalNumberOfFalseAlarms = 0

        # Find the audio files for each condition.
        for filePath in sorted(glob.iglob(participantPath + "*.wav")):
            fileName = os.path.basename(filePath)[:-4]

            # Find the matching transcript
            for transciptPath in sorted(glob.iglob(participantPath + "*.xml")):
                transcriptName = os.path.basename(transciptPath)[:-4]

                if fileName == transcriptName:
                    # Grab the number of filled pauses
                    transcriptFile = open(transciptPath, 'r').read()
                    actualFilledPausesCount = transcriptFile.count(
                        "uh</T>") + transcriptFile.count(
                            "um</T>") + transcriptFile.count("mm</T>")

                    audio = audioModule.Audio(filePath=filePath)
                    if audio.numberOfChannels == 2:
                        audio.makeMono()

                    _, timeStamps = speechAnalyzer.getFilledPausesFromAudio(
                        audio)

                    algorithmFilledPauseCount = len(timeStamps)

                    totalNumberOfFilledPauses += actualFilledPausesCount

                    if algorithmFilledPauseCount > actualFilledPausesCount:
                        totalNumberOfFalseAlarms += algorithmFilledPauseCount - actualFilledPausesCount
                        totalNumberOfCorrectlyDetectedPauses += actualFilledPausesCount
                    else:
                        totalNumberOfCorrectlyDetectedPauses += algorithmFilledPauseCount

                    print(fileName, actualFilledPausesCount,
                          algorithmFilledPauseCount)

        # precision = totalNumberOfCorrectlyDetectedPauses / (totalNumberOfCorrectlyDetectedPauses + totalNumberOfFalseAlarms)
        # recall = totalNumberOfCorrectlyDetectedPauses / totalNumberOfFilledPauses
        #
        # f1 = 2 * precision * recall / (precision + recall)

        print("    Total     | Filled pauses:", totalNumberOfFilledPauses)
        print("     New      | Correct filled pauses:",
              totalNumberOfCorrectlyDetectedPauses, "False alarms:",
              totalNumberOfFalseAlarms)
def testingCCHP():
    speechAnalyzer = speechAnalysis.SpeechAnalyzer()

    # audio = audioModule.Audio(filePath="../media/SBC001.wav")
    audio = audioModule.Audio(
        filePath="../media/cchp_english/p102/p102_en_pd.wav")
    if audio.numberOfChannels != 1:
        audio.makeMono()

    filledPauses, timeStamps = speechAnalyzer.getFilledPausesFromAudio(audio)
    print(timeStamps)
Esempio n. 9
0
def getFeaturesFromFile():
    filePath = "../media/cchp_english/p102/p102_en_pd.wav"

    audio = audioModule.Audio(filePath=filePath)
    audio.makeMono()

    print(filePath)

    analyzer = speechAnalysis.SpeechAnalyzer()
    filledPauses, timeStamps = analyzer.getFilledPausesFromAudio(audio)

    print(len(timeStamps))
Esempio n. 10
0
def createSlicesFromPausesWithParticipants():
    audioDirectory = "../media/Participant_Audio_First_five/*.wav"
    outputDir = "./filledPauses/"

    for filePath in sorted(glob.iglob(audioDirectory)):

        # Audio file i/o
        name = os.path.basename(filePath)[:-4]

        participant = name.split("_")[0]
        condition = name.split("_")[1]

        # # Make fresh directories
        # os.mkdir(outputDir + name)

        print(participant, condition)

        audio = audioModule.Audio(filePath=filePath)
        audio.makeMono()

        filledPauses = featureModule.getFilledPauses(
            audio.data, audio.sampleRate, utteranceWindowSize,
            utteranceStepSize, utteranceMinimumLength,
            utteranceF1MaximumVariance, utteranceF2MaximumVariance,
            utteranceEnergyThreshold)

        audio = AudioSegment.from_wav(filePath)

        for time in filledPauses:
            ### Output files - pydub is in ms
            outputPath = outputDir + name + "/" + str(round(time, 2))

            # move back 100 ms
            start = (time - 0.1) * 1000
            # grab a second
            end = (time + 1) * 1000
            segment = audio[start:end]

            # write to disk
            segment.export(outputPath + ".wav", format="wav")

            # move back a whole second for more context
            start = (time - 1) * 1000
            segment = audio[start:end]

            # write to disk
            segment.export(outputPath + "[extra].wav", format="wav")

        # --

        print("Done with ", name)
        print(len(filledPauses))
Esempio n. 11
0
def getFeaturesFromSlices():
    filePaths = sorted(glob.iglob("./filledPauses/p3_ol/*extra].wav"))

    analyzer = speechAnalysis.SpeechAnalyzer()

    for filePath in filePaths:
        print(filePath)

        audio = audioModule.Audio(filePath=filePath)
        audio.makeMono()

        filledPauses, timeStamps = analyzer.getFilledPausesFromAudio(audio)

        print(timeStamps)
Esempio n. 12
0
def createSlicesFromPausesOnCCHP():
    audioDirectory = "../media/cchp_english/*/*.wav"
    outputDir = "../validation/results/filledPausesSlices/"

    speechAnalyzer = speechAnalysis.SpeechAnalyzer()

    # # Make fresh directories
    # for filePath in sorted(glob.iglob("../media/cchp_english/*/")):
    #     participantDir = filePath.split("/")[-2]
    #     # os.mkdir(outputDir + participantDir + "/")
    #     for condition in ["pd", "ra", "tn"]:
    #         # os.mkdir(outputDir + participantDir + "/" + condition + "/")

    for filePath in sorted(glob.iglob(audioDirectory)):
        participantDir = filePath.split("/")[-2]

        # Audio file i/o
        name = os.path.basename(filePath)[:-4]
        condition = name.split("_")[2]

        audio = audioModule.Audio(filePath=filePath)
        if audio.numberOfChannels != 1:
            audio.makeMono()

        filledPauses, timeStamps = speechAnalyzer.getFilledPausesFromAudio(
            audio)

        print(name)

        audioSegment = AudioSegment.from_wav(filePath)

        for time in timeStamps:
            ### Output files - pydub is in ms
            outputPath = outputDir + participantDir + "/" + condition + "/" + name + "-" + str(
                round(time, 2))

            print(outputPath)

            # move back 100 ms
            start = (time - 0.1) * 1000
            # grab a second
            end = (time + 1) * 1000
            segment = audioSegment[start:end]

            # write to disk
            segment.export(outputPath + ".wav", format="wav")

        # --

        print("Done with ", name)
Esempio n. 13
0
def parameterSweepP103CCHP():
    audioDirectory = "../media/cchp_english/p103/*.wav"
    outputDir = "../validation/results/p103sweep/"

    speechAnalyzer = speechAnalysis.SpeechAnalyzer()

    for minimumLength in list(range(50, 260, 10)):
        print("threshold:", minimumLength)

        speechAnalyzer.filledPauseMinimumLength = minimumLength

        # # Make fresh directories
        # os.mkdir(outputDir + str(minimumLength) + "/")
        # for condition in ["pd", "ra", "tn"]:
        #     os.mkdir(outputDir + str(minimumLength) + "/" + condition + "/")

        for filePath in sorted(glob.iglob(audioDirectory)):
            participantDir = filePath.split("/")[-2]

            # Audio file i/o
            name = os.path.basename(filePath)[:-4]
            condition = name.split("_")[2]

            audio = audioModule.Audio(filePath=filePath)
            if audio.numberOfChannels != 1:
                audio.makeMono()

            filledPauses, timeStamps = speechAnalyzer.getFilledPausesFromAudio(
                audio)

            print(name, len(timeStamps))

            audioSegment = AudioSegment.from_wav(filePath)

            for time in timeStamps:
                ### Output files - pydub is in ms
                outputPath = outputDir + str(
                    minimumLength) + "/" + condition + "/" + name + "-" + str(
                        round(time, 2))

                # move back 100 ms
                start = (time - 0.1) * 1000
                # grab a second
                end = (time + 1) * 1000
                segment = audioSegment[start:end]

                # write to disk
                segment.export(outputPath + ".wav", format="wav")
Esempio n. 14
0
def compareAlgorithmToDataset():
    print("Running on Dr. Smart's Dataset")

    speechAnalyzer = speechAnalysis.SpeechAnalyzer()
    printParameters(speechAnalyzer)

    directory = '../media/drSmartAudio'
    dataset = []

    numberOfAccurateDetections = 0
    numberOfDetections = 0
    trueNumberOfFilledPauses = 0

    # Load the dataset info for training
    with open(directory + '/metadata.csv', 'r') as csvfile:
        reader = csv.reader(csvfile)
        dataset.extend(reader)

    # Remove header
    dataset.pop(0)

    startTime = time.time()

    for audioFile in dataset:
        audio = audioModule.Audio(filePath=directory + audioFile[0])

        filledPauses, timeStamps = speechAnalyzer.getFilledPausesFromAudio(
            audio)

        if int(audioFile[1]) <= len(timeStamps):
            numberOfAccurateDetections += int(audioFile[1])

        trueNumberOfFilledPauses += int(audioFile[1])
        numberOfDetections += len(timeStamps)

    print()
    print("  Time to run:", time.time() - startTime)
    print("  Detections:", numberOfDetections, "Accurate detections:",
          numberOfAccurateDetections, "Total filled pauses:",
          trueNumberOfFilledPauses)
    print("  Precision:", numberOfAccurateDetections / numberOfDetections)
    print("  Recall:", numberOfAccurateDetections / trueNumberOfFilledPauses)
    print("  Score: ", (numberOfAccurateDetections / numberOfDetections) *
          (numberOfAccurateDetections / trueNumberOfFilledPauses))
    print()
Esempio n. 15
0
def runAlgorithmOnSlices():
    analyzer = speechAnalysis.SpeechAnalyzer()

    for subdir, dirs, files in os.walk(outputDir):
        for file in files:
            filePath = os.path.join(subdir, file)

            if "[extra].wav" in filePath:
                print(filePath)
                name = os.path.basename(filePath)[:-4]

                audio = audioModule.Audio(filePath=filePath)
                audio.makeMono()

                filledPauses, timeStamps = analyzer.getFilledPausesFromAudio(
                    audio)

                print(timeStamps)
Esempio n. 16
0
def runAlgorithmOnParticipants():

    underLoadFilledPauses = 0
    normalLoadFilledPauses = 0
    overLoadFilledPauses = 0

    participantCount = 30
    directory = "../media/Participant_Audio/"

    filledPausesForParticipant = [["participant", "ul", "nl", "ol"]]

    for participantNumber in range(1, participantCount + 1):
        participantData = [participantNumber]

        for condition in ["ul", "nl", "ol"]:
            filePath = directory + "p" + str(
                participantNumber) + "_" + condition + ".wav"

            if filePath != "../media/Participant_Audio/p8_nl.wav":
                print(filePath)

                audio = audioModule.Audio(filePath=filePath)
                audio.makeMono()

                filledPauses = featureModule.getFilledPauses(
                    audio.data, audio.sampleRate, utteranceWindowSize,
                    utteranceStepSize, utteranceMinimumLength,
                    utteranceF1MaximumVariance, utteranceF2MaximumVariance,
                    utteranceEnergyThreshold)

                participantData.append(len(filledPauses))

                print("   ", len(filledPauses))

        print(participantData)
        filledPausesForParticipant.append(participantData)
        print(filledPausesForParticipant)

    with open('./filledPauses/filledPausesForParticipant.csv',
              'w') as outputFile:
        writer = csv.writer(outputFile)
        for row in filledPausesForParticipant:
            writer.writerow(row)
Esempio n. 17
0
def runAlgorithmOnDataset():
    directory = '../media/drSmartAudio'
    dataset = []

    analyzer = speechAnalysis.SpeechAnalyzer()

    # Load the dataset info for training
    with open(directory + '/metadata.csv', 'r') as csvfile:
        reader = csv.reader(csvfile)
        dataset.extend(reader)

    # Remove header
    dataset.pop(0)

    for audioFile in dataset:
        filePath = audioFile[0]

        print(filePath)

        audio = audioModule.Audio(filePath=directory + audioFile[0])

        filledPauses, timeStamps = analyzer.getFilledPausesFromAudio(audio)
        print(len(timeStamps))
Esempio n. 18
0
def showVoiceActivityAndSyllablesForParticipantAudio():
    audioDirectory = "../media/Participant_Audio/*.wav"

    speechAnalyzer = speechAnalysis.SpeechAnalyzer()

    for filePath in sorted(glob.iglob(audioDirectory)):
        name = os.path.basename(filePath)[:-4]

        audio = audioModule.Audio(filePath=filePath)
        if audio.numberOfChannels != 1:
            audio.makeMono()

        print("Getting voice activity...")

        voiceActivity = speechAnalyzer.getVoiceActivityFromAudio(audio)
        voiceActivity[voiceActivity == 0] = np.nan

        voiceActivityBufferSize = int(100 / speechAnalyzer.featureStepSize)
        voiceActivityBuffered = featureModule.createBufferedBinaryArrayFromArray(
            voiceActivity == 1,
            voiceActivityBufferSize).astype(int).astype(float)
        voiceActivityBuffered[voiceActivityBuffered == 0] = np.nan

        print("Getting syllables...")

        syllables, _ = speechAnalyzer.getSyllablesFromAudio(audio)
        syllableMarkers = np.full(len(syllables), 0)

        print("Getting other features...")

        energy = featureModule.getEnergy(audio.data, audio.sampleRate,
                                         speechAnalyzer.syllableWindowSize,
                                         speechAnalyzer.featureStepSize)
        energyMinThreshold = featureModule.getEnergyMinimumThreshold(energy)
        fractionEnergyMinThreshold = energyMinThreshold / max(energy)

        zcr = librosa.feature.zero_crossing_rate(
            audio.data,
            frame_length=int(audio.sampleRate / 1000 *
                             speechAnalyzer.featureStepSize),
            hop_length=int(audio.sampleRate / 1000 *
                           speechAnalyzer.featureStepSize))[0]
        zcrTimes = np.arange(0,
                             len(audio.data) / audio.sampleRate + 1,
                             speechAnalyzer.featureStepSize / 1000)[:len(zcr)]

        pitch = speechAnalyzer.getPitchFromAudio(audio)
        pitch[pitch == 0] = np.nan
        pitchTimes = np.arange(0,
                               len(audio.data) / audio.sampleRate,
                               speechAnalyzer.featureStepSize /
                               1000)[:len(pitch)]

        times = np.arange(0,
                          len(audio.data) / audio.sampleRate,
                          speechAnalyzer.featureStepSize / 1000)
        energyTimes = np.arange(0,
                                len(audio.data) / audio.sampleRate,
                                speechAnalyzer.featureStepSize /
                                1000)[:len(energy)]

        print("Graphing!")

        plt.figure(figsize=[16, 8])
        plt.plot(zcrTimes, zcr * 1000, 'gold')
        plt.plot(times, energy / 5)
        plt.plot(pitchTimes, pitch, 'red')
        plt.plot(syllables, syllableMarkers, 'go')
        plt.plot(times, voiceActivityBuffered * -5, 'darkorange')
        plt.plot(times, voiceActivity, 'purple')
        plt.title(name)
        plt.show()
Esempio n. 19
0
def getFeaturesFromFileUsingWindowing():
    filePath = "../media/Participant_Audio/p3_ol.wav"
    name = os.path.basename(filePath)[:-4]

    speechAnalyzer = speechAnalysis.SpeechAnalyzer()
    speechAnalyzer.lookBackSize = 5

    # Read in the file, extract data and metadata
    audio = audioModule.Audio(filePath)
    if audio.numberOfChannels > 1:
        audio.makeMono()

    # Set up time tracker
    seconds = np.zeros(shape=0)

    step = 0
    sampleStepSize = int(speechAnalyzer.stepSize * audio.sampleRate)
    sampleLookBackSize = int(speechAnalyzer.lookBackSize * audio.sampleRate)

    while step < audio.length:
        # Keep track of what second we're in
        print("Second:", step / audio.sampleRate)

        # Look backward to calculate features over long term
        if step + sampleStepSize - sampleLookBackSize > 0:

            currentWindow = audioModule.Audio(
                data=audio.data[step + sampleStepSize -
                                sampleLookBackSize:step + sampleStepSize])
            currentWindow.sampleRate = audio.sampleRate

            ### WORDS PER MINUTE
            syllables = speechAnalyzer.getSyllablesFromAudio(currentWindow)[0]
            syllableMarkers = np.full(len(syllables), 0)

            ### VAD
            voiceActivity = speechAnalyzer.getVoiceActivityFromAudio(
                currentWindow)

            ### INTENSITY
            energy = featureModule.getEnergy(currentWindow.data,
                                             currentWindow.sampleRate,
                                             speechAnalyzer.syllableWindowSize,
                                             speechAnalyzer.featureStepSize)

            energyMinThreshold = featureModule.getEnergyMinimumThreshold(
                energy)
            fractionEnergyMinThreshold = energyMinThreshold / max(energy)

            ### PITCH
            pitch = featureModule.getPitch(currentWindow.data,
                                           currentWindow.sampleRate,
                                           speechAnalyzer.featureStepSize,
                                           fractionEnergyMinThreshold)

            syllableBinaryArray = np.full(len(voiceActivity), 0)

            for timeStamp in syllables:
                syllableBinaryArray[int(timeStamp /
                                        (currentWindow.sampleRate / 1000 *
                                         speechAnalyzer.featureStepSize) *
                                        currentWindow.sampleRate)] = 1

            # Mask out all filled pauses that coincide with voice acitivty
            syllableBinaryArray[voiceActivity.astype(bool)] = 0

            if max(syllableBinaryArray) >= 1:

                # Clean up va for graphing
                voiceActivity[voiceActivity == 0] = np.nan
                pitch[pitch == 0] = np.nan

                pitchTimes = np.arange(
                    0,
                    len(currentWindow.data) / currentWindow.sampleRate,
                    speechAnalyzer.featureStepSize / 1000)[:len(pitch)]
                energyTimes = np.arange(
                    0,
                    len(currentWindow.data) / currentWindow.sampleRate,
                    speechAnalyzer.featureStepSize / 1000)[:len(energy)]
                times = np.arange(
                    0,
                    len(currentWindow.data) / currentWindow.sampleRate,
                    speechAnalyzer.featureStepSize / 1000)

                plt.figure(figsize=[16, 8])
                plt.plot(times, energy / 10, pitchTimes, pitch)
                plt.plot(times, voiceActivity)
                plt.plot(syllables, syllableMarkers, 'r^')
                plt.title(name + " from " + str(step / audio.sampleRate -
                                                speechAnalyzer.lookBackSize) +
                          " to " + str(step / audio.sampleRate) + " seconds")
                # plt.savefig("./syllablesVersusVAD/" + name + "_" + str(step/audio.sampleRate - speechAnalyzer.lookBackSize) + "-" + str(step/audio.sampleRate) + "_seconds.png")
                plt.show()

        # Increment to next step
        step += sampleStepSize
Esempio n. 20
0
def showFeatures():
    filePath = "../media/Participant_Audio/p3_ol.wav"
    name = os.path.basename(filePath)[:-4]

    speechAnalyzer = speechAnalysis.SpeechAnalyzer()

    # Read in the file, extract data and metadata
    audio = audioModule.Audio(filePath)
    if audio.numberOfChannels > 1:
        audio.makeMono()

    speechAnalyzer = speechAnalysis.SpeechAnalyzer()

    ### AMPLITUDE
    energy = speechAnalyzer.getEnergyFromAudio(audio)

    ### PITCH
    pitches = speechAnalyzer.getPitchFromAudio(audio, energy)

    ### VAD
    voiceActivity = speechAnalyzer.getVoiceActivityFromAudio(audio, pitches)

    ### SYLLABLES
    syllables = speechAnalyzer.getSyllablesFromAudio(audio,
                                                     pitches)[0].astype(float)

    # ### FILLED PAUSES
    filledPauses = speechAnalyzer.getFilledPausesFromAudio(audio)[0].astype(
        float)

    # Mask features with voice activity
    bufferFrames = int(speechAnalyzer.voiceActivityMaskBufferSize /
                       speechAnalyzer.featureStepSize)
    mask = np.invert(
        featureModule.createBufferedBinaryArrayFromArray(
            voiceActivity.astype(bool), bufferFrames))

    # energy[mask[:len(energy)]] = 0
    pitches[mask[:len(pitches)]] = 0
    syllables[mask[:len(syllables)]] = 0
    filledPauses[mask[:len(filledPauses)]] = 0

    # Graphing
    pitches[pitches == 0] = np.nan
    voiceActivity[voiceActivity == 0] = np.nan
    syllables[syllables == 0] = np.nan
    filledPauses[filledPauses == 0] = np.nan

    pitchTimes = np.arange(0,
                           len(audio.data) / audio.sampleRate,
                           speechAnalyzer.featureStepSize /
                           1000)[:len(pitches)]
    times = np.arange(0,
                      len(audio.data) / audio.sampleRate,
                      speechAnalyzer.featureStepSize / 1000)

    plt.figure(figsize=[16, 8])
    plt.plot(times, energy / 10, pitchTimes, pitches)
    plt.plot(times, voiceActivity, 'orchid')
    plt.plot(times[:len(syllables)], syllables, color='c', marker='^')
    plt.plot(times[:len(filledPauses)], filledPauses, 'ro')
    plt.title(name)
    # plt.savefig("./syllablesVersusVAD/" + name + "_" + str(step/audio.sampleRate - speechAnalyzer.lookBackSize) + "-" + str(step/audio.sampleRate) + "_seconds.png")
    plt.show()
Esempio n. 21
0
def compareAlgorithmToSlices():
    print("Running on slices")

    speechAnalyzer = speechAnalysis.SpeechAnalyzer()
    printParameters(speechAnalyzer)

    controlYeses = 0
    controlNos = 0

    yeses = 0
    nos = 0

    startTime = time.time()

    # Compare with file of all existing
    with open('./filledPauses/filledPausesAllParticipantsRatings.csv'
              ) as csvfile:
        reader = csv.DictReader(csvfile)

        # Go through each existing filled pause
        for row in reader:
            participant = row['participant']
            condition = row['condition']
            timeStamp = row['time']
            judgement = row['judgement']

            if timeStamp == "862":
                timeStamp = "862.0"

            # Keep track of manual classification
            if judgement == "1":
                controlYeses += 1
            elif judgement == "-1":
                controlNos += 1

            filePath = "./filledPauses/" + participant + "_" + condition[
                1:] + "/" + timeStamp + "[extra].wav"
            # print(filePath)

            audio = audioModule.Audio(filePath=filePath)
            audio.makeMono()

            # Run algorithm
            filledPauses, timeStamps = speechAnalyzer.getFilledPausesFromAudio(
                audio)

            found = False

            for timeDetected in timeStamps:
                if abs(timeDetected - 1.0) < 0.2 and not found:
                    found = True
                    if judgement == "1":
                        yeses += 1
                    elif judgement == "-1":
                        nos += 1

    print()
    print("  Time to run:", time.time() - startTime)
    print("  Detections:", (yeses + nos), "Accurate detections:", yeses,
          "Total filled pauses:", controlYeses)
    print("  Precision:", yeses / (yeses + nos))
    print("  Recall:", yeses / controlYeses)
    print("  Score: ", (yeses / controlYeses) * (yeses / (yeses + nos)))
    print()
Esempio n. 22
0
def showSyllables():

    # filePath = "../media/cchp_english/p102/p102_en_pd.wav"
    filePath = "../media/Participant_Audio_30_Sec_Chunks/p14_ol_chunk18.wav"
    name = os.path.basename(filePath)[:-4]

    speechAnalyzer = speechAnalysis.SpeechAnalyzer()

    audio = audioModule.Audio(filePath=filePath)
    if audio.numberOfChannels != 1:
        audio.makeMono()

    audio.description()

    syllables, candidates = speechAnalyzer.getSyllablesFromAudio(audio)
    print(len(syllables))
    syllableMarkers = np.full(len(syllables), 0)
    candidateMarkers = np.full(len(candidates), 0)

    ### Energy
    energy = librosa.feature.rmse(
        audio.data,
        frame_length=int(audio.sampleRate / 1000 *
                         speechAnalyzer.featureStepSize),
        hop_length=int(audio.sampleRate / 1000 *
                       speechAnalyzer.featureStepSize))[0]
    energyTimes = np.arange(0,
                            len(audio.data) / audio.sampleRate,
                            speechAnalyzer.featureStepSize /
                            1000)[:len(energy)]

    energyMinThreshold = featureModule.getEnergyMinimumThreshold(energy)
    fractionEnergyMinThreshold = energyMinThreshold / max(energy)

    pitch = featureModule.getPitch(audio.data, audio.sampleRate,
                                   speechAnalyzer.featureStepSize,
                                   fractionEnergyMinThreshold)
    pitchTimes = np.arange(0,
                           len(audio.data) / audio.sampleRate,
                           speechAnalyzer.featureStepSize / 1000)[:len(pitch)]

    zcr = librosa.feature.zero_crossing_rate(
        audio.data,
        frame_length=int(audio.sampleRate / 1000 *
                         speechAnalyzer.featureStepSize * 4),
        hop_length=int(audio.sampleRate / 1000 *
                       speechAnalyzer.featureStepSize))[0]
    zcrTimes = np.arange(0,
                         len(audio.data) / audio.sampleRate + 1,
                         speechAnalyzer.featureStepSize / 1000)[:len(zcr)]

    voiceActivity = speechAnalyzer.getVoiceActivityFromAudio(audio)
    voiceActivity[voiceActivity == 0] = np.nan
    voiceActivityTimes = np.arange(0,
                                   len(audio.data) / audio.sampleRate,
                                   speechAnalyzer.featureStepSize /
                                   1000)[:len(voiceActivity)]
    print(len(voiceActivity), len(voiceActivityTimes))

    times = np.arange(0,
                      len(audio.data) / audio.sampleRate,
                      speechAnalyzer.featureStepSize / 1000)
    signalTimes = np.arange(0,
                            len(audio.data) / audio.sampleRate,
                            1 / audio.sampleRate)

    plt.figure(figsize=[16, 8])
    plt.plot(energyTimes, energy / 10, pitchTimes, pitch, zcrTimes, zcr * 100,
             candidates, candidateMarkers, 'ro')
    plt.plot(syllables, syllableMarkers, 'go')
    plt.plot(voiceActivityTimes, voiceActivity)
    plt.title(name)
    plt.show()
def validateWithTranscript():
    speechAnalyzer = speechAnalysis.SpeechAnalyzer()

    transcript = []

    totalNumberOfFilledPauses = 0
    totalNumberOfCorrectlyDetectedPauses = 0
    totalNumberOfFalseAlarms = 0

    with open("../media/filled_pauses_validation_participant_audio" +
              "/filled_pauses.txt") as transcriptFile:
        lines = transcriptFile.readlines()
        for row in lines:
            transcript.append(row.strip().split(', '))

    for line in transcript:
        name = line[0]

        if name[0] != "#":
            actualFilledPausesCount = int(line[1])

            path = None

            # for filePath in sorted(glob.iglob("../media/filled_pauses_validation_participant_audio/" + "*.wav")):
            #     fileName = os.path.basename(filePath)[:-4]
            #
            #     if fileName == name:
            #         path = filePath

            for filePath in sorted(glob.iglob(audioDirectory + "*.wav")):
                fileName = os.path.basename(filePath)[:-4]

                if fileName == name:
                    path = filePath

            if path:
                audio = audioModule.Audio(filePath=path)
                if audio.numberOfChannels != 1:
                    audio.makeMono()

                filledPauses, timeStamps = speechAnalyzer.getFilledPausesFromAudio(
                    audio)

                if True:
                    voiceActivity = speechAnalyzer.getVoiceActivityFromAudio(
                        audio)
                    bufferFrames = int(
                        speechAnalyzer.voiceActivityMaskBufferSize /
                        speechAnalyzer.featureStepSize)
                    mask = np.invert(
                        featureModule.createBufferedBinaryArrayFromArray(
                            voiceActivity.astype(bool), bufferFrames))
                    filledPauses[mask] = 0

                filledPausesMarkers = np.full(int(sum(filledPauses)), 0)
                filledPausesCount = int(sum(filledPauses))

                print(name, "\t", actualFilledPausesCount, filledPausesCount,
                      timeStamps)

                totalNumberOfFilledPauses += actualFilledPausesCount

                if filledPausesCount > actualFilledPausesCount:
                    totalNumberOfFalseAlarms += filledPausesCount - actualFilledPausesCount
                    totalNumberOfCorrectlyDetectedPauses += actualFilledPausesCount
                else:
                    totalNumberOfCorrectlyDetectedPauses += filledPausesCount

    precision = totalNumberOfCorrectlyDetectedPauses / (
        totalNumberOfCorrectlyDetectedPauses + totalNumberOfFalseAlarms)
    recall = totalNumberOfCorrectlyDetectedPauses / totalNumberOfFilledPauses

    fMeasure = 2 * precision * recall / (precision + recall)

    print("    Total     | Filled pauses:", totalNumberOfFilledPauses)
    print("     New      | Correct filled pauses:",
          totalNumberOfCorrectlyDetectedPauses, "False alarms:",
          totalNumberOfFalseAlarms, "Precision:", precision, "Recall:", recall,
          "F1", fMeasure)
Esempio n. 24
0
def compareEnergyAndIntensity():
    filePath = "../media/Participant_Audio/p10_ol.wav"
    name = os.path.basename(filePath)[:-4]

    stepSize = 10  # In milliseconds
    windowSize = 10

    audio = audioModule.Audio(filePath=filePath)
    if audio.numberOfChannels != 1:
        audio.makeMono()

    stepSizeInSamples = int(audio.sampleRate / 1000 * stepSize)
    windowSizeInSamples = int(audio.sampleRate / 1000 * windowSize)

    # Parselmouth intensity
    parselSound = parselmouth.Sound(values=audio.data,
                                    sampling_frequency=audio.sampleRate)
    intensityObject = parselSound.to_intensity(minimum_pitch=50.0,
                                               time_step=stepSize / 1000)
    intensity = intensityObject.values.T

    shortTermEnergy = np.array([
        math.sqrt(
            sum(audio.data[step:step + windowSizeInSamples]**2) /
            windowSizeInSamples)
        for step in range(0, len(audio.data), stepSizeInSamples)
    ])

    rms = np.array([
        sum(audio.data[step:step + windowSizeInSamples]**2)
        for step in range(0, len(audio.data), stepSizeInSamples)
    ])

    # Librosa rms
    rms = librosa.feature.rms(audio.data,
                              frame_length=windowSizeInSamples,
                              hop_length=stepSizeInSamples)[0]

    # Current intensity measure
    amplitude = np.absolute(audio.data)

    intensityTimes = np.arange(0,
                               len(audio.data) / audio.sampleRate,
                               stepSize / 1000)[:len(intensity)]
    shortTermEnergyTimes = np.arange(0,
                                     len(audio.data) / audio.sampleRate,
                                     stepSize / 1000)[:len(shortTermEnergy)]
    rmsTimes = np.arange(0,
                         len(audio.data) / audio.sampleRate,
                         stepSize / 1000)[:len(rms)]
    signalTimes = np.arange(0,
                            len(audio.data) / audio.sampleRate,
                            1 / audio.sampleRate)

    plt.figure(figsize=[16, 8])
    # plt.plot(signalTimes, amplitude / 2)
    plt.plot(shortTermEnergyTimes, shortTermEnergy)
    plt.plot(rmsTimes, rms)
    plt.plot(intensityTimes, intensity * 100)
    plt.title(name)
    plt.show()