Esempio n. 1
0
def fileSpectrogramWrapper(wav_file):
    if not os.path.isfile(wav_file):
        raise Exception("Input audio file not found!")
    [fs, x] = audioBasicIO.readAudioFile(wav_file)
    x = audioBasicIO.stereo2mono(x)
    specgram, TimeAxis, FreqAxis = aF.stSpectogram(x, fs, round(fs * 0.040),
                                                   round(fs * 0.040), True)
Esempio n. 2
0
def fileSpectrogramWrapper(wav_file):
    if not os.path.isfile(wav_file):
        raise Exception("Input audio file not found!")
    [fs, x] = audioBasicIO.readAudioFile(wav_file)
    x = audioBasicIO.stereo2mono(x)
    specgram, TimeAxis, FreqAxis = aF.stSpectogram(x, fs, round(fs * 0.040),
                                                   round(fs * 0.040), True)
Esempio n. 3
0
def mtCNN_classification(signal, Fs, mtWin, mtStep, SingleFrame_net,
                         channel_mean, input_transformer, classNamesCNN,
                         input_size):
    mtWin2 = int(mtWin * Fs)
    mtStep2 = int(mtStep * Fs)
    stWin = 0.040
    stStep = 0.005
    N = len(signal)
    curPos = 0
    count = 0
    fileNames = []
    flagsInd = []
    Ps = []
    randomString = (''.join(
        random.SystemRandom().choice(string.ascii_uppercase + string.digits)
        for _ in range(5)))
    while (curPos < N):
        N1 = curPos
        N2 = curPos + mtWin2 + stStep * Fs
        if N2 > N:
            N2 = N
        xtemp = signal[int(N1):int(N2)]
        #print xtemp.shape
        #print xtemp.shape[0]            # get mid-term segment
        if xtemp.shape[0] < 8000:
            curPos += mtStep2
            count += 1
            continue
        specgram, TimeAxis, FreqAxis = aF.stSpectogram(
            xtemp, Fs, round(Fs * stWin), round(Fs * stStep),
            False)  # compute spectrogram
        specgram = cv2.resize(specgram, (input_size, input_size),
                              interpolation=cv2.INTER_LINEAR)
        #specgram = scipy.misc.imresize(specgram, float(input_size) / float(specgram.shape[0]), interp='bilinear')        # resize to 227 x 227
        if specgram.shape[0] != specgram.shape[1]:
            break
        #print specgram.shape
        #specgram = scipy.misc.imresize(specgram, float(input_size) / float(specgram.shape[0]), interp='bilinear')        # resize to 227 x 227
        #print specgram.shape

    # imSpec = Image.fromarray(np.uint8(matplotlib.cm.jet(specgram)*255))                                         # create image
        curFileName = randomString + "temp_{0:d}.png".format(count)
        fileNames.append(curFileName)
        #imSpec = rgb2gray(np.uint8(matplotlib.cm.jet(specgram)*255))
        imSpec = Image.fromarray(np.uint8(matplotlib.cm.jet(specgram) * 255))
        scipy.misc.imsave(curFileName, imSpec)
        T1 = time.time()
        output_classes, outputP = singleFrame_classify_video(
            curFileName, SingleFrame_net, input_transformer, False,
            classNamesCNN, input_size)
        T2 = time.time()
        os.remove(curFileName)
        #print T2 - T1
        #flagsInd.append(classNamesCNN.index(output_classes[0]))
        Ps.append(np.copy(outputP[0]))
        #print flagsInd[-1]
        curPos += mtStep2
        count += 1
    return np.array(flagsInd), classNamesCNN, np.array(Ps)
Esempio n. 4
0
def createSpectrogramFile(x, Fs, fileName, stWin, stStep, label):
    specgramOr, TimeAxis, FreqAxis = aF.stSpectogram(x, Fs, round(Fs * stWin),
                                                     round(Fs * stStep), False)
    specgramOr = filter.medfilt2d(specgramOr, 5)
    save_path = "medfilt5_label_" + label + '/'
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    specgram = cv2.resize(specgramOr, (227, 227),
                          interpolation=cv2.INTER_LINEAR)
    im1 = Image.fromarray(np.uint8(matplotlib.cm.jet(specgram) * 255))
    scipy.misc.imsave(save_path + fileName, im1)
Esempio n. 5
0
def createSpectrogramFile(x, Fs, fileName, stWin, stStep):
    specgramOr, TimeAxis, FreqAxis = aF.stSpectogram(x, Fs, round(Fs * stWin),
                                                     round(Fs * stStep), False)
    print specgramOr.shape
    if inputs[2] == 'full':
        print specgramOr
        numpy.save(fileName.replace('.png', '') + '_spectrogram', specgramOr)
    else:
        #specgram = scipy.misc.imresize(specgramOr, float(227.0) / float(specgramOr.shape[0]), interp='bilinear')
        specgram = cv2.resize(specgramOr, (227, 227),
                              interpolation=cv2.INTER_LINEAR)
        im1 = Image.fromarray(numpy.uint8(matplotlib.cm.jet(specgram) * 255))
        scipy.misc.imsave(fileName, im1)
def mtCNN_classification(signal, Fs, mtWin, mtStep, RGB_singleFrame_net,
                         SOUND_mean_RGB, transformer_RGB, classNamesCNN):
    mtWin2 = int(mtWin * Fs)
    mtStep2 = int(mtStep * Fs)
    stWin = 0.020
    stStep = 0.015
    N = len(signal)
    curPos = 0
    count = 0
    fileNames = []
    flagsInd = []
    Ps = []
    randomString = (''.join(
        random.SystemRandom().choice(string.ascii_uppercase + string.digits)
        for _ in range(5)))
    while (curPos < N):  # for each mid-term segment
        N1 = curPos
        N2 = curPos + mtWin2 + stStep * Fs
        if N2 > N:
            N2 = N
        xtemp = signal[int(N1):int(N2)]  # get mid-term segment

        specgram, TimeAxis, FreqAxis = aF.stSpectogram(
            xtemp, Fs, round(Fs * stWin), round(Fs * stStep),
            False)  # compute spectrogram
        if specgram.shape[0] != specgram.shape[
                1]:  # TODO (this must be dynamic!)
            break
        specgram = scipy.misc.imresize(
            specgram,
            float(227.0) / float(specgram.shape[0]),
            interp='bilinear')  # resize to 227 x 227

        imSpec = Image.fromarray(np.uint8(matplotlib.cm.jet(specgram) *
                                          255))  # create image
        curFileName = randomString + "temp_{0:d}.png".format(count)
        fileNames.append(curFileName)
        scipy.misc.imsave(curFileName, imSpec)

        T1 = time.time()
        output_classes, outputP = singleFrame_classify_video(
            curFileName, RGB_singleFrame_net, transformer_RGB, False,
            classNamesCNN)
        T2 = time.time()
        #print T2 - T1
        flagsInd.append(classNamesCNN.index(output_classes[0]))
        Ps.append(outputP[0])
        #print flagsInd[-1]
        curPos += mtStep2
        count += 1
    return np.array(flagsInd), classNamesCNN, np.array(Ps)
def wave_to_spectrogram(wav: WaveData, window_size, window_step) -> np.ndarray:
    """
    Converts the given WAV data into a spectrogram.

    :param wav:                 The WAV data to convert.
    :param window_size:         The width of the sampling window in samples.
    :param window_step:         The step the sampling window takes between FFTs.
    :return:                    A spectrogram.
    """
    # Convert the WAV data in a Numpy array
    wav_array: np.ndarray = np.array(wav.data)

    # Get the spectrogram using pyAudioAnalysis
    return aFEx.stSpectogram(wav_array, 1, window_size, window_step, False)[0]
Esempio n. 8
0
 def _generate_spectrogram(self, filename):
     
     [Fs, x] = audioBasicIO.readAudioFile(filename)
     x = audioBasicIO.stereo2mono(x)
     specgram, TimeAxis, FreqAxis = aF.stSpectogram(x, Fs, round(Fs * 0.040), round(Fs * 0.040), False)
     return (specgram, TimeAxis, FreqAxis)
Esempio n. 9
0
    x, Fs, 0.050 * Fs, 0.025 * Fs)
plt.subplot(2, 1, 1)
plt.plot(F[0, :])
plt.xlabel('Frame no')
plt.ylabel(f_names[0])
plt.subplot(2, 1, 2)
plt.plot(F[1, :])
plt.xlabel('Frame no')
plt.ylabel(f_names[1])
plt.show()

print("\n\n\n * * * TEST 2 * * * \n\n\n")
[Fs, x] = audioBasicIO.read_audio_file(root_data_path +
                                       "pyAudioAnalysis/data/doremi.wav")
x = audioBasicIO.stereo_to_mono(x)
specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stSpectogram(
    x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 3 * * * \n\n\n")
[Fs, x] = audioBasicIO.read_audio_file(root_data_path +
                                       "pyAudioAnalysis/data/doremi.wav")
x = audioBasicIO.stereo_to_mono(x)
specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stChromagram(
    x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 4 * * * \n\n\n")
aT.featureAndTrain([root_data_path + "SM/speech", root_data_path + "SM/music"],
                   1.0, 1.0, 0.2, 0.2, "svm", "temp", True)

print("\n\n\n * * * TEST 5 * * * \n\n\n")
[flagsInd, classesAll, acc, CM] = aS.mtFileClassification(
    root_data_path + "pyAudioAnalysis/data//scottish.wav",
Esempio n. 10
0
def recordAudioSegments(BLOCKSIZE,
                        Fs=16000,
                        showSpectrogram=False,
                        showChromagram=False,
                        recordActivity=False):

    midTermBufferSize = int(Fs * BLOCKSIZE)

    print "Press Ctr+C to stop recording"

    startDateTimeStr = datetime.datetime.now().strftime("%Y_%m_%d_%I:%M%p")

    MEAN, STD = loadMEANS("svmMovies8classesMEANS")  # load MEAN feature values

    pa = pyaudio.PyAudio()

    stream = pa.open(format=FORMAT,
                     channels=1,
                     rate=Fs,
                     input=True,
                     frames_per_buffer=midTermBufferSize)

    midTermBuffer = []
    curWindow = []
    count = 0
    global allData
    allData = []
    energy100_buffer_zero = []
    curActiveWindow = numpy.array([])
    timeStart = time.time()

    while 1:
        try:
            block = stream.read(midTermBufferSize)
            countB = len(block) / 2
            format = "%dh" % (countB)
            shorts = struct.unpack(format, block)
            curWindow = list(shorts)
            midTermBuffer = midTermBuffer + curWindow
            # copy to midTermBuffer
            del (curWindow)
            #print len(midTermBuffer), midTermBufferSize
            #if len(midTermBuffer) == midTermBufferSize:                                     # if midTermBuffer is full:
            if 1:
                elapsedTime = (time.time() - timeStart
                               )  # time since recording started
                dataTime = (count + 1) * BLOCKSIZE  # data-driven time

                # TODO
                # mtF, _ = aF.mtFeatureExtraction(midTermBuffer, Fs, BLOCKSIZE * Fs, BLOCKSIZE * Fs, 0.050 * Fs, 0.050 * Fs)
                # curFV = (mtF - MEAN) / STD
                # TODO
                allData += midTermBuffer
                midTermBuffer = numpy.double(
                    midTermBuffer)  # convert current buffer to numpy array

                # Compute spectrogram
                if showSpectrogram:
                    (spectrogram, TimeAxisS, FreqAxisS) = aF.stSpectogram(
                        midTermBuffer, Fs, 0.020 * Fs,
                        0.02 * Fs)  # extract spectrogram
                    FreqAxisS = numpy.array(FreqAxisS)  # frequency axis
                    DominantFreqs = FreqAxisS[numpy.argmax(
                        spectrogram, axis=1
                    )]  # most dominant frequencies (for each short-term window)
                    maxFreq = numpy.mean(
                        DominantFreqs)  # get average most dominant freq
                    maxFreqStd = numpy.std(DominantFreqs)

                # Compute chromagram
                if showChromagram:
                    (chromagram, TimeAxisC,
                     FreqAxisC) = aF.stChromagram(midTermBuffer, Fs,
                                                  0.020 * Fs,
                                                  0.02 * Fs)  # get chromagram
                    FreqAxisC = numpy.array(
                        FreqAxisC)  # frequency axis (12 chroma classes)
                    DominantFreqsC = FreqAxisC[numpy.argmax(
                        chromagram, axis=1)]  # most dominant chroma classes
                    maxFreqC = most_common(DominantFreqsC)[
                        0]  # get most common among all short-term windows

                # Plot signal window
                signalPlotCV = plotCV(
                    scipy.signal.resample(midTermBuffer + 16000, WidthPlot),
                    WidthPlot, HeightPlot, 32000)
                cv2.imshow('Signal', signalPlotCV)
                cv2.moveWindow('Signal', 50, statusHeight + 50)
                # Show spectrogram
                if showSpectrogram:
                    iSpec = numpy.array(spectrogram.T * 255, dtype=numpy.uint8)
                    iSpec2 = cv2.resize(iSpec, (WidthPlot, HeightPlot),
                                        interpolation=cv2.INTER_CUBIC)
                    iSpec2 = cv2.applyColorMap(iSpec2, cv2.COLORMAP_JET)
                    cv2.putText(iSpec2, "maxFreq: %.0f Hz" % maxFreq, (0, 11),
                                cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                    cv2.imshow('Spectrogram', iSpec2)
                    cv2.moveWindow('Spectrogram', 50,
                                   HeightPlot + statusHeight + 60)
                # Show chromagram
                if showChromagram:
                    iChroma = numpy.array(
                        (chromagram.T / chromagram.max()) * 255,
                        dtype=numpy.uint8)
                    iChroma2 = cv2.resize(iChroma, (WidthPlot, HeightPlot),
                                          interpolation=cv2.INTER_CUBIC)
                    iChroma2 = cv2.applyColorMap(iChroma2, cv2.COLORMAP_JET)
                    cv2.putText(iChroma2, "maxFreqC: %s" % maxFreqC, (0, 11),
                                cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                    cv2.imshow('Chroma', iChroma2)
                    cv2.moveWindow('Chroma', 50,
                                   2 * HeightPlot + statusHeight + 60)
                # Activity Detection:
                energy100 = (100 * numpy.sum(midTermBuffer * midTermBuffer) /
                             (midTermBuffer.shape[0] * 32000 * 32000))
                if count < 10:  # TODO make this param
                    energy100_buffer_zero.append(energy100)
                    mean_energy100_zero = numpy.mean(
                        numpy.array(energy100_buffer_zero))
                else:
                    mean_energy100_zero = numpy.mean(
                        numpy.array(energy100_buffer_zero))
                    if (energy100 < 1.2 * mean_energy100_zero):
                        if curActiveWindow.shape[
                                0] > 0:  # if a sound has been detected in the previous segment:
                            activeT2 = elapsedTime - BLOCKSIZE  # set time of current active window
                            if activeT2 - activeT1 > minActivityDuration:
                                wavFileName = startDateTimeStr + "_activity_{0:.2f}_{1:.2f}.wav".format(
                                    activeT1, activeT2)
                                if recordActivity:
                                    wavfile.write(
                                        wavFileName, Fs,
                                        numpy.int16(curActiveWindow)
                                    )  # write current active window to file
                            curActiveWindow = numpy.array(
                                [])  # delete current active window
                    else:
                        if curActiveWindow.shape[
                                0] == 0:  # this is a new active window!
                            activeT1 = elapsedTime - BLOCKSIZE  # set timestamp start of new active window
                        curActiveWindow = numpy.concatenate(
                            (curActiveWindow, midTermBuffer))

                # Show status messages on Status cv winow:
                textIm = numpy.zeros((statusHeight, WidthPlot, 3))
                statusStrTime = "time: %.1f sec" % elapsedTime + " - data time: %.1f sec" % dataTime + " - loss : %.1f sec" % (
                    elapsedTime - dataTime)
                statusStrFeature = "ene1:%.1f" % energy100 + " eneZero:%.1f" % mean_energy100_zero
                cv2.putText(textIm, statusStrTime, (0, 11),
                            cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                cv2.putText(textIm, statusStrFeature, (0, 22),
                            cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                if curActiveWindow.shape[0] > 0:
                    cv2.putText(textIm, "sound", (0, 33),
                                cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255))
                else:
                    cv2.putText(textIm, "silence", (0, 33),
                                cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 220))
                cv2.imshow("Status", textIm)
                cv2.moveWindow("Status", 50, 0)
                midTermBuffer = []
                ch = cv2.waitKey(10)
                count += 1

        except IOError, e:
            print("(%d) Error recording: %s" % (errorcount, e))
Esempio n. 11
0
def recordAndCalcHR(BLOCKSIZE,
                    Fs,
                    showSpectrogram=False,
                    showChromagram=False,
                    recordActivity=False):
    print("Press Ctr+C to stop process")

    startDateTimeStr = datetime.datetime.now().strftime("%Y_%m_%d_%I:%M%p")

    MEAN, STD = loadMEANS("svmMovies8classesMEANS")  # load MEAN feature values

    inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE,
                        alsaaudio.PCM_NONBLOCK)  # open alsaaudio capture
    inp.setchannels(1)  # 1 channel
    inp.setrate(Fs)  # set sampling freq
    inp.setformat(alsaaudio.PCM_FORMAT_S16_LE)  # set 2-byte sample
    inp.setperiodsize(512)
    midTermBufferSize = int(Fs * BLOCKSIZE)
    midTermBuffer = []
    curWindow = []
    count = 0
    global allData
    allData = []
    energy100_buffer_zero = []
    curActiveWindow = numpy.array([])
    timeStart = time.time()

    while 1:
        l, data = inp.read()  # read data from buffer
        if l:
            for i in range(len(data) / 2):
                curWindow.append(audioop.getsample(data, 2,
                                                   i))  # get audio samples

            if (len(curWindow) + len(midTermBuffer) > midTermBufferSize):
                samplesToCopyToMidBuffer = midTermBufferSize - len(
                    midTermBuffer)
            else:
                samplesToCopyToMidBuffer = len(curWindow)

            midTermBuffer = midTermBuffer + curWindow[
                0:samplesToCopyToMidBuffer]
            # copy to midTermBuffer
            del (curWindow[0:samplesToCopyToMidBuffer])

            if len(midTermBuffer
                   ) == midTermBufferSize:  # if midTermBuffer is full:
                elapsedTime = (time.time() - timeStart
                               )  # time since recording started
                dataTime = (count + 1) * BLOCKSIZE  # data-driven time

                # TODO
                # mtF, _ = aF.mtFeatureExtraction(midTermBuffer, Fs, BLOCKSIZE * Fs, BLOCKSIZE * Fs, 0.050 * Fs, 0.050 * Fs)
                # curFV = (mtF - MEAN) / STD
                # TODO
                allData += midTermBuffer
                midTermBuffer = numpy.double(
                    midTermBuffer)  # convert current buffer to numpy array

                # Compute spectrogram
                if showSpectrogram:
                    (spectrogram, TimeAxisS, FreqAxisS) = aF.stSpectogram(
                        midTermBuffer, Fs, 0.020 * Fs,
                        0.02 * Fs)  # extract spectrogram
                    FreqAxisS = numpy.array(FreqAxisS)  # frequency axis
                    DominantFreqs = FreqAxisS[numpy.argmax(
                        spectrogram, axis=1
                    )]  # most dominant frequencies (for each short-term window)
                    maxFreq = numpy.mean(
                        DominantFreqs)  # get average most dominant freq
                    maxFreqStd = numpy.std(DominantFreqs)

                    # Compute chromagram
                if showChromagram:
                    (chromagram, TimeAxisC,
                     FreqAxisC) = aF.stChromagram(midTermBuffer, Fs,
                                                  0.020 * Fs,
                                                  0.02 * Fs)  # get chromagram
                    FreqAxisC = numpy.array(
                        FreqAxisC)  # frequency axis (12 chroma classes)
                    DominantFreqsC = FreqAxisC[numpy.argmax(
                        chromagram, axis=1)]  # most dominant chroma classes
                    maxFreqC = most_common(DominantFreqsC)[
                        0]  # get most common among all short-term windows

                # Plot signal window
                signalPlotCV = plotCV(
                    scipy.signal.resample(midTermBuffer + 16000, WidthPlot),
                    WidthPlot, HeightPlot, 32000)
                cv2.imshow('Signal', signalPlotCV)
                cv2.moveWindow('Signal', 50, statusHeight + 50)

                # Show spectrogram
                if showSpectrogram:
                    iSpec = numpy.array(spectrogram.T * 255, dtype=numpy.uint8)
                    iSpec2 = cv2.resize(iSpec, (WidthPlot, HeightPlot),
                                        interpolation=cv2.INTER_CUBIC)
                    iSpec2 = cv2.applyColorMap(iSpec2, cv2.COLORMAP_JET)
                    cv2.putText(iSpec2, "maxFreq: %.0f Hz" % maxFreq, (0, 11),
                                cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                    cv2.imshow('Spectrogram', iSpec2)
                    cv2.moveWindow('Spectrogram', 50,
                                   HeightPlot + statusHeight + 60)

                # Show chromagram
                if showChromagram:
                    iChroma = numpy.array(
                        (chromagram.T / chromagram.max()) * 255,
                        dtype=numpy.uint8)
                    iChroma2 = cv2.resize(iChroma, (WidthPlot, HeightPlot),
                                          interpolation=cv2.INTER_CUBIC)
                    iChroma2 = cv2.applyColorMap(iChroma2, cv2.COLORMAP_JET)
                    cv2.putText(iChroma2, "maxFreqC: %s" % maxFreqC, (0, 11),
                                cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                    cv2.imshow('Chroma', iChroma2)
                    cv2.moveWindow('Chroma', 50,
                                   2 * HeightPlot + statusHeight + 60)

                # Activity Detection:
                energy100 = (100 * numpy.sum(midTermBuffer * midTermBuffer) /
                             (midTermBuffer.shape[0] * 32000 * 32000))
                if count < 10:  # TODO make this param
                    energy100_buffer_zero.append(energy100)
                    mean_energy100_zero = numpy.mean(
                        numpy.array(energy100_buffer_zero))
                else:
                    mean_energy100_zero = numpy.mean(
                        numpy.array(energy100_buffer_zero))
                    if (energy100 < 1.2 * mean_energy100_zero):
                        if curActiveWindow.shape[
                                0] > 0:  # if a sound has been detected in the previous segment:
                            activeT2 = elapsedTime - BLOCKSIZE  # set time of current active window
                            if activeT2 - activeT1 > minActivityDuration:
                                wavFileName = startDateTimeStr + "_activity_{0:.2f}_{1:.2f}.wav".format(
                                    activeT1, activeT2)
                                if recordActivity:
                                    wavfile.write(
                                        wavFileName, Fs,
                                        numpy.int16(curActiveWindow)
                                    )  # write current active window to file
                            curActiveWindow = numpy.array(
                                [])  # delete current active window
                    else:
                        if curActiveWindow.shape[
                                0] == 0:  # this is a new active window!
                            activeT1 = elapsedTime - BLOCKSIZE  # set timestamp start of new active window
                        curActiveWindow = numpy.concatenate(
                            (curActiveWindow, midTermBuffer))

                        # Show status messages on Status cv winow:
                textIm = numpy.zeros((statusHeight, WidthPlot, 3))
                statusStrTime = "time: %.1f sec" % elapsedTime + " - data time: %.1f sec" % dataTime + " - loss : %.1f sec" % (
                    elapsedTime - dataTime)
                statusStrFeature = "ene1:%.1f" % energy100 + " eneZero:%.1f" % mean_energy100_zero
                cv2.putText(textIm, statusStrTime, (0, 11),
                            cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                cv2.putText(textIm, statusStrFeature, (0, 22),
                            cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                if curActiveWindow.shape[0] > 0:
                    cv2.putText(textIm, "sound", (0, 33),
                                cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255))
                else:
                    cv2.putText(textIm, "silence", (0, 33),
                                cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 220))
                cv2.imshow("Status", textIm)
                cv2.moveWindow("Status", 50, 0)
                midTermBuffer = []
                ch = cv2.waitKey(10)
                count += 1
Esempio n. 12
0
"""! 
@brief Example 04
@details pyAudioAnalysis spectrogram calculation and visualization example
@author Theodoros Giannakopoulos {[email protected]}
"""
import numpy as np
import scipy.io.wavfile as wavfile
import plotly
import plotly.graph_objs as go
from pyAudioAnalysis import audioFeatureExtraction as aF
layout = go.Layout(
    title='Spectrogram Extraction Example using pyAudioAnalysis',
    xaxis=dict(title='time (sec)', ),
    yaxis=dict(title='Freqs (Hz)', ))


def normalize_signal(signal):
    signal = np.double(signal)
    signal = signal / (2.0**15)
    return (signal - signal.mean()) / ((np.abs(signal)).max() + 0.0000000001)


if __name__ == '__main__':
    [Fs, s] = wavfile.read("../data/sample_music.wav")
    s = normalize_signal(s)
    [S, t, f] = aF.stSpectogram(s, Fs, int(Fs * 0.020), int(Fs * 0.020))
    heatmap = go.Heatmap(z=S.T, y=f, x=t)
    plotly.offline.plot(go.Figure(data=[heatmap], layout=layout),
                        filename="temp.html",
                        auto_open=True)
def main(argv):
    dirName = argv[1]
    types = ('*.wav', )
    filesList = []
    for files in types:
        filesList.extend(glob.glob(os.path.join(dirName, files)))
    filesList = sorted(filesList)
    WIDTH_SEC = 2.4
    stWin = 0.020
    stStep = 0.015
    WIDTH = WIDTH_SEC / stStep

    for f in filesList:
        [Fs, x] = audioBasicIO.readAudioFile(f)
        print(Fs)
        x = audioBasicIO.stereo2mono(x)
        specgramOr, TimeAxis, FreqAxis = aF.stSpectogram(
            x, Fs, round(Fs * stWin), round(Fs * stStep), False)
        if specgramOr.shape[0] > WIDTH:
            specgram = specgramOr[int(specgramOr.shape[0] / 2) -
                                  WIDTH / 2:int(specgramOr.shape[0] / 2) +
                                  WIDTH / 2, :]
            specgram = scipy.misc.imresize(specgram,
                                           float(227.0) /
                                           float(specgram.shape[0]),
                                           interp='bilinear')
            print specgram.shape
            im = Image.fromarray(numpy.uint8(
                matplotlib.cm.jet(specgram) * 255))
            #plt.imshow(im)
            scipy.misc.imsave(f.replace(".wav", ".jpg"), im)

            if int(specgramOr.shape[0] / 2) - WIDTH / 2 - int(
                (0.2) / stStep) > 0:
                specgram = specgramOr[
                    int(specgramOr.shape[0] / 2) - WIDTH / 2 -
                    int((0.2) / stStep):int(specgramOr.shape[0] / 2) +
                    WIDTH / 2 - int((0.2) / stStep), :]
                specgram = scipy.misc.imresize(specgram,
                                               float(227.0) /
                                               float(specgram.shape[0]),
                                               interp='bilinear')
                im = Image.fromarray(
                    numpy.uint8(matplotlib.cm.jet(specgram) * 255))
                print specgram.shape
                scipy.misc.imsave(f.replace(".wav", "_02A.jpg"), im)

                specgram = specgramOr[
                    int(specgramOr.shape[0] / 2) - WIDTH / 2 +
                    int((0.2) / stStep):int(specgramOr.shape[0] / 2) +
                    WIDTH / 2 + int((0.2) / stStep), :]
                specgram = scipy.misc.imresize(specgram,
                                               float(227.0) /
                                               float(specgram.shape[0]),
                                               interp='bilinear')
                print specgram.shape
                im = Image.fromarray(
                    numpy.uint8(matplotlib.cm.jet(specgram) * 255))
                scipy.misc.imsave(f.replace(".wav", "_02B.jpg"), im)

                # ONLY FOR SPEECH (fewer samples). Must comment for music
                """specgram = specgramOr[int(specgramOr.shape[0]/2) - WIDTH/2 - int((0.1) / stStep):int(specgramOr.shape[0]/2) + WIDTH/2 - int((0.1) / stStep), :]                
Esempio n. 14
0
from pyAudioAnalysis import audioTrainTest as aT
from pyAudioAnalysis import audioSegmentation as aS
import matplotlib.pyplot as plt

root_data_path = "/Users/tyiannak/ResearchData/Audio Dataset/pyAudioAnalysisData/"

print("\n\n\n * * * TEST 1 * * * \n\n\n")
[Fs, x] = audioBasicIO.readAudioFile(root_data_path + "pyAudioAnalysis/data/count.wav");
F, f_names = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050*Fs, 0.025*Fs);
plt.subplot(2,1,1); plt.plot(F[0,:]); plt.xlabel('Frame no'); plt.ylabel(f_names[0]);
plt.subplot(2,1,2); plt.plot(F[1,:]); plt.xlabel('Frame no'); plt.ylabel(f_names[1]); plt.show()

print("\n\n\n * * * TEST 2 * * * \n\n\n")
[Fs, x] = audioBasicIO.readAudioFile(root_data_path + "pyAudioAnalysis/data/doremi.wav")
x = audioBasicIO.stereo2mono(x)
specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stSpectogram(x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 3 * * * \n\n\n")
[Fs, x] = audioBasicIO.readAudioFile(root_data_path + "pyAudioAnalysis/data/doremi.wav")
x = audioBasicIO.stereo2mono(x)
specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stChromagram(x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 4 * * * \n\n\n")
aT.featureAndTrain([root_data_path +"SM/speech",root_data_path + "SM/music"], 1.0, 1.0, 0.2, 0.2, "svm", "temp", True)

print("\n\n\n * * * TEST 5 * * * \n\n\n")
[flagsInd, classesAll, acc, CM] = aS.mtFileClassification(root_data_path + "pyAudioAnalysis/data//scottish.wav", root_data_path + "pyAudioAnalysis/data/svmSM", "svm", True, root_data_path + 'pyAudioAnalysis/data/scottish.segments')

print("\n\n\n * * * TEST 6 * * * \n\n\n")
aS.trainHMM_fromFile(root_data_path + 'radioFinal/train/bbc4A.wav', root_data_path + 'radioFinal/train/bbc4A.segments', 'hmmTemp1', 1.0, 1.0)	
aS.trainHMM_fromDir(root_data_path + 'radioFinal/small', 'hmmTemp2', 1.0, 1.0)