def ExtractSpec(id):
    with ZipFile("/Users/aravind/Downloads/{}_P.zip".format(id), 'r') as zip:
        audio = zip.extract("{}_AUDIO.wav".format(id), 'audio')
        [Fs, x] = audioBasicIO.read_audio_file("audio/{}_AUDIO.wav".format(id))
        F, f_names, time = ShortTermFeatures.spectrogram(
            x, Fs, 0.050 * Fs, 0.025 * Fs)
        return F
Exemple #2
0
def fileSpectrogramWrapper(wav_file):
    if not os.path.isfile(wav_file):
        raise Exception("Input audio file not found!")
    [fs, x] = audioBasicIO.read_audio_file(wav_file)
    x = audioBasicIO.stereo_to_mono(x)
    specgram, TimeAxis, FreqAxis = sF.spectrogram(x, fs, round(fs * 0.040),
                                                  round(fs * 0.040), True)
Exemple #3
0
def get_spectrogram(path, win, step, disable_caching=True, smooth=True):
    """
    get_spectrogram() is a wrapper to
    pyAudioAnalysis.ShortTermFeatures.spectrogram() with a caching functionality

    :param path: path of the WAV file to analyze
    :param win: short-term window to be used in spectrogram calculation
    :param step: short-term step to be used in spectrogram calculation
    :return: spectrogram matrix, time array, freq array and sampling freq
    """
    fs, s = io.read_audio_file(path)
    cache_name = path + "_{0:.6f}_{1:.6f}.npz".format(win, step)
    if not disable_caching and os.path.isfile(cache_name):
        print("Loading cached spectrogram")
        npzfile = np.load(cache_name)
        spec_val = npzfile["arr_0"]
        spec_time = npzfile["arr_1"]
        spec_freq = npzfile["arr_2"]
    else:
        print("Computing spectrogram")
        spec_val, spec_time, spec_freq = sF.spectrogram(
            s, fs, round(fs * win), round(fs * step), False, True)
        if not disable_caching:
            np.savez(cache_name, spec_val, spec_time, spec_freq)
    #    f, f_n  = sF.feature_extraction(s, fs, win * fs / 1000.0,
    #                                    step * fs / 1000.0, deltas=True)
    if smooth:
        spec_val = ndimage.median_filter(spec_val, (2, 3))

    return spec_val, np.array(spec_time), np.array(spec_freq), fs
Exemple #4
0
def get_spectrogram_buffer(s, fs, win, step, smooth=True):
    """
    get_spectrogram_buffer() same as get_spectrogram() but input is an audio
    buffer, instead of an audio file
    """
    spec_val, spec_time, spec_freq = sF.spectrogram(s, fs, round(fs * win),
                                                    round(fs * step), False,
                                                    True)
    if smooth:
        spec_val = ndimage.median_filter(spec_val, (2, 3))

    return spec_val, np.array(spec_time), np.array(spec_freq), fs
def plots(file, fig_name):
    fs, signal = wavfile.read(file)
    time_wav = np.arange(0, len(signal)) / fs

    plotly.offline.iplot({ "data": [go.Scatter(x=time_wav, 
                                               y=signal[:, 0], 
                                               name='left channel'), 
                                    go.Scatter(x=time_wav, 
                                               y=signal[:, 1], 
                                               name='right channel')]})
    x = audioBasicIO.stereo_to_mono(signal)
    specgram, TimeAxis, FreqAxis = sF.spectrogram(x, fs, round(fs * 0.01),
                                                      round(fs * 0.01), False)
    image = resize(specgram, (250,250))
    print(image.shape)
    plt.imshow(image)
    print_figure(fig_name)
    plt.show()   
Exemple #6
0
"""! 
@brief Example 04
@details pyAudioAnalysis spectrogram calculation and visualization example
@author Theodoros Giannakopoulos {[email protected]}
"""
import numpy as np
import scipy.io.wavfile as wavfile
import plotly
import plotly.graph_objs as go
from pyAudioAnalysis import ShortTermFeatures as aF
layout = go.Layout(
    title='Spectrogram Extraction Example using pyAudioAnalysis',
    xaxis=dict(title='time (sec)', ),
    yaxis=dict(title='Freqs (Hz)', ))


def normalize_signal(signal):
    signal = np.double(signal)
    signal = signal / (2.0**15)
    return (signal - signal.mean()) / ((np.abs(signal)).max() + 0.0000000001)


if __name__ == '__main__':
    [Fs, s] = wavfile.read("../data/sample_music.wav")
    s = normalize_signal(s)
    [S, t, f] = aF.spectrogram(s, Fs, int(Fs * 0.020), int(Fs * 0.020))
    heatmap = go.Heatmap(z=S.T, y=f, x=t)
    plotly.offline.plot(go.Figure(data=[heatmap], layout=layout),
                        filename="temp.html",
                        auto_open=True)
from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import ShortTermFeatures
import boto3 as boto
import os
import matplotlib.pyplot as plt
access_key = os.environ['']
access_secret_key = os.environ['']

conn = boto.connect_s3(access_key, access_secret_key)
bucket = conn.get_bucket('aravindsamala')
file_key = bucket.get_key('312_AUDIO.wav')
file_key.get_contents_to_filename('312_AUDIO.wav')
[Fs, x] = audioBasicIO.read_audio_file('312_AUDIO.wav')
F, f_names, time = ShortTermFeatures.spectrogram(x, Fs, 0.050 * Fs, 0.025 * Fs)
print(F.shape)




"""
fstep = int(num_fft / 5.0)
frequency_ticks = range(0, int(num_fft) + fstep, fstep)
frequency_tick_labels = \
    [str(sampling_rate / 2 -
         int((f * sampling_rate) / (2 * num_fft)))
     for f in frequency_ticks]
ax.set_yticks(frequency_ticks)
ax.set_yticklabels(frequency_tick_labels)
t_step = int(count_fr / 3)

Exemple #8
0
                                                  0.025 * Fs)
plt.subplot(2, 1, 1)
plt.plot(F[0, :])
plt.xlabel('Frame no')
plt.ylabel(f_names[0])
plt.subplot(2, 1, 2)
plt.plot(F[1, :])
plt.xlabel('Frame no')
plt.ylabel(f_names[1])
plt.show()

print("\n\n\n * * * TEST 2 * * * \n\n\n")
[Fs, x] = audioBasicIO.read_audio_file(root_data_path +
                                       "pyAudioAnalysis/data/doremi.wav")
x = audioBasicIO.stereo_to_mono(x)
specgram, TimeAxis, FreqAxis = ShortTermFeatures.spectrogram(
    x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 3 * * * \n\n\n")
[Fs, x] = audioBasicIO.read_audio_file(root_data_path +
                                       "pyAudioAnalysis/data/doremi.wav")
x = audioBasicIO.stereo_to_mono(x)
specgram, TimeAxis, FreqAxis = ShortTermFeatures.chromagram(
    x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 4 * * * \n\n\n")
aT.extract_features_and_train([root_data_path + "1/", root_data_path + "2/"],
                              1.0, 1.0, 0.2, 0.2, "svm", "temp", True)

print("\n\n\n * * * TEST 5 * * * \n\n\n")
[flagsInd, classesAll, acc, CM] = aS.mid_term_file_classification(
    root_data_path + "scottish.wav", root_data_path + "models/svm_rbf_sm",
Exemple #9
0
def record_audio(block_size,
                 fs=8000,
                 show_spec=False,
                 show_chroma=False,
                 log_sounds=False,
                 logs_all=False):

    # inialize recording process
    mid_buf_size = int(fs * block_size)
    pa = pyaudio.PyAudio()
    stream = pa.open(format=FORMAT,
                     channels=1,
                     rate=fs,
                     input=True,
                     frames_per_buffer=mid_buf_size)
    mid_buf = []
    count = 0
    global all_data
    global outstr
    all_data = []
    # initalize counters etc
    time_start = time.time()
    outstr = datetime.datetime.now().strftime("%Y_%m_%d_%I:%M%p")
    out_folder = outstr + "_segments"
    if log_sounds:
        if not os.path.exists(out_folder):
            os.makedirs(out_folder)
    # load segment model
    [classifier, MEAN, STD, class_names, mt_win, mt_step, st_win, st_step,
     _] = aT.load_model("model")

    while 1:
        try:
            block = stream.read(mid_buf_size)
            count_b = len(block) / 2
            format = "%dh" % (count_b)
            shorts = struct.unpack(format, block)
            cur_win = list(shorts)
            mid_buf = mid_buf + cur_win
            del cur_win

            # time since recording started:
            e_time = (time.time() - time_start)
            # data-driven time
            data_time = (count + 1) * block_size
            x = numpy.int16(mid_buf)
            seg_len = len(x)

            # extract features
            # We are using the signal length as mid term window and step,
            # in order to guarantee a mid-term feature sequence of len 1
            [mt_feats, _,
             _] = mF.mid_feature_extraction(x, fs, seg_len, seg_len,
                                            round(fs * st_win),
                                            round(fs * st_step))
            cur_fv = (mt_feats[:, 0] - MEAN) / STD
            # classify vector:
            [res, prob] = aT.classifier_wrapper(classifier, "svm_rbf", cur_fv)
            win_class = class_names[int(res)]
            win_prob = prob[int(res)]

            if logs_all:
                all_data += mid_buf
            mid_buf = numpy.double(mid_buf)

            # Compute spectrogram
            if show_spec:
                (spec, t_axis,
                 freq_axis_s) = sF.spectrogram(mid_buf, fs, 0.050 * fs,
                                               0.050 * fs)
                freq_axis_s = numpy.array(freq_axis_s)  # frequency axis
                # most dominant frequencies (for each short-term window):
                dominant_freqs = freq_axis_s[numpy.argmax(spec, axis=1)]
                # get average most dominant freq
                max_freq = numpy.mean(dominant_freqs)
                max_freq_std = numpy.std(dominant_freqs)

            # Compute chromagram
            if show_chroma:
                (chrom, TimeAxisC,
                 freq_axis_c) = sF.chromagram(mid_buf, fs, 0.050 * fs,
                                              0.050 * fs)
                freq_axis_c = numpy.array(freq_axis_c)
                # most dominant chroma classes:
                dominant_freqs_c = freq_axis_c[numpy.argmax(chrom, axis=1)]
                # get most common among all short-term windows
                max_freqC = most_common(dominant_freqs_c)[0]

            # Plot signal window
            signalPlotCV = plotCV(
                scipy.signal.resample(mid_buf + 16000, plot_w), plot_w, plot_h,
                32000)
            cv2.imshow('Signal', signalPlotCV)
            cv2.moveWindow('Signal', 50, status_h + 50)

            # Show spectrogram
            if show_spec:
                i_spec = numpy.array(spec.T * 255, dtype=numpy.uint8)
                i_spec2 = cv2.resize(i_spec, (plot_w, plot_h),
                                     interpolation=cv2.INTER_CUBIC)
                i_spec2 = cv2.applyColorMap(i_spec2, cv2.COLORMAP_JET)
                cv2.putText(i_spec2, "max_freq: %.0f Hz" % max_freq, (0, 11),
                            cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                cv2.imshow('Spectrogram', i_spec2)
                cv2.moveWindow('Spectrogram', 50, plot_h + status_h + 60)
            # Show chromagram
            if show_chroma:
                i_chroma = numpy.array((chrom.T / chrom.max()) * 255,
                                       dtype=numpy.uint8)
                i_chroma2 = cv2.resize(i_chroma, (plot_w, plot_h),
                                       interpolation=cv2.INTER_CUBIC)
                i_chroma2 = cv2.applyColorMap(i_chroma2, cv2.COLORMAP_JET)
                cv2.putText(i_chroma2, "max_freqC: %s" % max_freqC, (0, 11),
                            cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                cv2.imshow('Chroma', i_chroma2)
                cv2.moveWindow('Chroma', 50, 2 * plot_h + status_h + 60)

            # Activity Detection:
            print("{0:.2f}\t{1:s}\t{2:.2f}".format(e_time, win_class,
                                                   win_prob))

            if log_sounds:
                # TODO: log audio files
                out_file = os.path.join(
                    out_folder,
                    "{0:.2f}_".format(e_time).zfill(8) + win_class + ".wav")
                #shutil.copyfile("temp.wav", out_file)
                wavfile.write(out_file, fs, x)

            textIm = numpy.zeros((status_h, plot_w, 3))
            statusStrTime = "time: %.1f sec" % e_time + \
                            " - data time: %.1f sec" % data_time + \
                            " - loss : %.1f sec" % (e_time - data_time)
            cv2.putText(textIm, statusStrTime, (0, 11), cv2.FONT_HERSHEY_PLAIN,
                        1, (200, 200, 200))
            cv2.putText(textIm, win_class, (0, 33), cv2.FONT_HERSHEY_PLAIN, 1,
                        (0, 0, 255))
            cv2.imshow("Status", textIm)
            cv2.moveWindow("Status", 50, 0)
            mid_buf = []
            ch = cv2.waitKey(10)
            count += 1
        except IOError:
            print("Error recording")