def ExtractSpec(id): with ZipFile("/Users/aravind/Downloads/{}_P.zip".format(id), 'r') as zip: audio = zip.extract("{}_AUDIO.wav".format(id), 'audio') [Fs, x] = audioBasicIO.read_audio_file("audio/{}_AUDIO.wav".format(id)) F, f_names, time = ShortTermFeatures.spectrogram( x, Fs, 0.050 * Fs, 0.025 * Fs) return F
def fileSpectrogramWrapper(wav_file): if not os.path.isfile(wav_file): raise Exception("Input audio file not found!") [fs, x] = audioBasicIO.read_audio_file(wav_file) x = audioBasicIO.stereo_to_mono(x) specgram, TimeAxis, FreqAxis = sF.spectrogram(x, fs, round(fs * 0.040), round(fs * 0.040), True)
def get_spectrogram(path, win, step, disable_caching=True, smooth=True): """ get_spectrogram() is a wrapper to pyAudioAnalysis.ShortTermFeatures.spectrogram() with a caching functionality :param path: path of the WAV file to analyze :param win: short-term window to be used in spectrogram calculation :param step: short-term step to be used in spectrogram calculation :return: spectrogram matrix, time array, freq array and sampling freq """ fs, s = io.read_audio_file(path) cache_name = path + "_{0:.6f}_{1:.6f}.npz".format(win, step) if not disable_caching and os.path.isfile(cache_name): print("Loading cached spectrogram") npzfile = np.load(cache_name) spec_val = npzfile["arr_0"] spec_time = npzfile["arr_1"] spec_freq = npzfile["arr_2"] else: print("Computing spectrogram") spec_val, spec_time, spec_freq = sF.spectrogram( s, fs, round(fs * win), round(fs * step), False, True) if not disable_caching: np.savez(cache_name, spec_val, spec_time, spec_freq) # f, f_n = sF.feature_extraction(s, fs, win * fs / 1000.0, # step * fs / 1000.0, deltas=True) if smooth: spec_val = ndimage.median_filter(spec_val, (2, 3)) return spec_val, np.array(spec_time), np.array(spec_freq), fs
def get_spectrogram_buffer(s, fs, win, step, smooth=True): """ get_spectrogram_buffer() same as get_spectrogram() but input is an audio buffer, instead of an audio file """ spec_val, spec_time, spec_freq = sF.spectrogram(s, fs, round(fs * win), round(fs * step), False, True) if smooth: spec_val = ndimage.median_filter(spec_val, (2, 3)) return spec_val, np.array(spec_time), np.array(spec_freq), fs
def plots(file, fig_name): fs, signal = wavfile.read(file) time_wav = np.arange(0, len(signal)) / fs plotly.offline.iplot({ "data": [go.Scatter(x=time_wav, y=signal[:, 0], name='left channel'), go.Scatter(x=time_wav, y=signal[:, 1], name='right channel')]}) x = audioBasicIO.stereo_to_mono(signal) specgram, TimeAxis, FreqAxis = sF.spectrogram(x, fs, round(fs * 0.01), round(fs * 0.01), False) image = resize(specgram, (250,250)) print(image.shape) plt.imshow(image) print_figure(fig_name) plt.show()
"""! @brief Example 04 @details pyAudioAnalysis spectrogram calculation and visualization example @author Theodoros Giannakopoulos {[email protected]} """ import numpy as np import scipy.io.wavfile as wavfile import plotly import plotly.graph_objs as go from pyAudioAnalysis import ShortTermFeatures as aF layout = go.Layout( title='Spectrogram Extraction Example using pyAudioAnalysis', xaxis=dict(title='time (sec)', ), yaxis=dict(title='Freqs (Hz)', )) def normalize_signal(signal): signal = np.double(signal) signal = signal / (2.0**15) return (signal - signal.mean()) / ((np.abs(signal)).max() + 0.0000000001) if __name__ == '__main__': [Fs, s] = wavfile.read("../data/sample_music.wav") s = normalize_signal(s) [S, t, f] = aF.spectrogram(s, Fs, int(Fs * 0.020), int(Fs * 0.020)) heatmap = go.Heatmap(z=S.T, y=f, x=t) plotly.offline.plot(go.Figure(data=[heatmap], layout=layout), filename="temp.html", auto_open=True)
from pyAudioAnalysis import audioBasicIO from pyAudioAnalysis import ShortTermFeatures import boto3 as boto import os import matplotlib.pyplot as plt access_key = os.environ[''] access_secret_key = os.environ[''] conn = boto.connect_s3(access_key, access_secret_key) bucket = conn.get_bucket('aravindsamala') file_key = bucket.get_key('312_AUDIO.wav') file_key.get_contents_to_filename('312_AUDIO.wav') [Fs, x] = audioBasicIO.read_audio_file('312_AUDIO.wav') F, f_names, time = ShortTermFeatures.spectrogram(x, Fs, 0.050 * Fs, 0.025 * Fs) print(F.shape) """ fstep = int(num_fft / 5.0) frequency_ticks = range(0, int(num_fft) + fstep, fstep) frequency_tick_labels = \ [str(sampling_rate / 2 - int((f * sampling_rate) / (2 * num_fft))) for f in frequency_ticks] ax.set_yticks(frequency_ticks) ax.set_yticklabels(frequency_tick_labels) t_step = int(count_fr / 3)
0.025 * Fs) plt.subplot(2, 1, 1) plt.plot(F[0, :]) plt.xlabel('Frame no') plt.ylabel(f_names[0]) plt.subplot(2, 1, 2) plt.plot(F[1, :]) plt.xlabel('Frame no') plt.ylabel(f_names[1]) plt.show() print("\n\n\n * * * TEST 2 * * * \n\n\n") [Fs, x] = audioBasicIO.read_audio_file(root_data_path + "pyAudioAnalysis/data/doremi.wav") x = audioBasicIO.stereo_to_mono(x) specgram, TimeAxis, FreqAxis = ShortTermFeatures.spectrogram( x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) print("\n\n\n * * * TEST 3 * * * \n\n\n") [Fs, x] = audioBasicIO.read_audio_file(root_data_path + "pyAudioAnalysis/data/doremi.wav") x = audioBasicIO.stereo_to_mono(x) specgram, TimeAxis, FreqAxis = ShortTermFeatures.chromagram( x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) print("\n\n\n * * * TEST 4 * * * \n\n\n") aT.extract_features_and_train([root_data_path + "1/", root_data_path + "2/"], 1.0, 1.0, 0.2, 0.2, "svm", "temp", True) print("\n\n\n * * * TEST 5 * * * \n\n\n") [flagsInd, classesAll, acc, CM] = aS.mid_term_file_classification( root_data_path + "scottish.wav", root_data_path + "models/svm_rbf_sm",
def record_audio(block_size, fs=8000, show_spec=False, show_chroma=False, log_sounds=False, logs_all=False): # inialize recording process mid_buf_size = int(fs * block_size) pa = pyaudio.PyAudio() stream = pa.open(format=FORMAT, channels=1, rate=fs, input=True, frames_per_buffer=mid_buf_size) mid_buf = [] count = 0 global all_data global outstr all_data = [] # initalize counters etc time_start = time.time() outstr = datetime.datetime.now().strftime("%Y_%m_%d_%I:%M%p") out_folder = outstr + "_segments" if log_sounds: if not os.path.exists(out_folder): os.makedirs(out_folder) # load segment model [classifier, MEAN, STD, class_names, mt_win, mt_step, st_win, st_step, _] = aT.load_model("model") while 1: try: block = stream.read(mid_buf_size) count_b = len(block) / 2 format = "%dh" % (count_b) shorts = struct.unpack(format, block) cur_win = list(shorts) mid_buf = mid_buf + cur_win del cur_win # time since recording started: e_time = (time.time() - time_start) # data-driven time data_time = (count + 1) * block_size x = numpy.int16(mid_buf) seg_len = len(x) # extract features # We are using the signal length as mid term window and step, # in order to guarantee a mid-term feature sequence of len 1 [mt_feats, _, _] = mF.mid_feature_extraction(x, fs, seg_len, seg_len, round(fs * st_win), round(fs * st_step)) cur_fv = (mt_feats[:, 0] - MEAN) / STD # classify vector: [res, prob] = aT.classifier_wrapper(classifier, "svm_rbf", cur_fv) win_class = class_names[int(res)] win_prob = prob[int(res)] if logs_all: all_data += mid_buf mid_buf = numpy.double(mid_buf) # Compute spectrogram if show_spec: (spec, t_axis, freq_axis_s) = sF.spectrogram(mid_buf, fs, 0.050 * fs, 0.050 * fs) freq_axis_s = numpy.array(freq_axis_s) # frequency axis # most dominant frequencies (for each short-term window): dominant_freqs = freq_axis_s[numpy.argmax(spec, axis=1)] # get average most dominant freq max_freq = numpy.mean(dominant_freqs) max_freq_std = numpy.std(dominant_freqs) # Compute chromagram if show_chroma: (chrom, TimeAxisC, freq_axis_c) = sF.chromagram(mid_buf, fs, 0.050 * fs, 0.050 * fs) freq_axis_c = numpy.array(freq_axis_c) # most dominant chroma classes: dominant_freqs_c = freq_axis_c[numpy.argmax(chrom, axis=1)] # get most common among all short-term windows max_freqC = most_common(dominant_freqs_c)[0] # Plot signal window signalPlotCV = plotCV( scipy.signal.resample(mid_buf + 16000, plot_w), plot_w, plot_h, 32000) cv2.imshow('Signal', signalPlotCV) cv2.moveWindow('Signal', 50, status_h + 50) # Show spectrogram if show_spec: i_spec = numpy.array(spec.T * 255, dtype=numpy.uint8) i_spec2 = cv2.resize(i_spec, (plot_w, plot_h), interpolation=cv2.INTER_CUBIC) i_spec2 = cv2.applyColorMap(i_spec2, cv2.COLORMAP_JET) cv2.putText(i_spec2, "max_freq: %.0f Hz" % max_freq, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.imshow('Spectrogram', i_spec2) cv2.moveWindow('Spectrogram', 50, plot_h + status_h + 60) # Show chromagram if show_chroma: i_chroma = numpy.array((chrom.T / chrom.max()) * 255, dtype=numpy.uint8) i_chroma2 = cv2.resize(i_chroma, (plot_w, plot_h), interpolation=cv2.INTER_CUBIC) i_chroma2 = cv2.applyColorMap(i_chroma2, cv2.COLORMAP_JET) cv2.putText(i_chroma2, "max_freqC: %s" % max_freqC, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.imshow('Chroma', i_chroma2) cv2.moveWindow('Chroma', 50, 2 * plot_h + status_h + 60) # Activity Detection: print("{0:.2f}\t{1:s}\t{2:.2f}".format(e_time, win_class, win_prob)) if log_sounds: # TODO: log audio files out_file = os.path.join( out_folder, "{0:.2f}_".format(e_time).zfill(8) + win_class + ".wav") #shutil.copyfile("temp.wav", out_file) wavfile.write(out_file, fs, x) textIm = numpy.zeros((status_h, plot_w, 3)) statusStrTime = "time: %.1f sec" % e_time + \ " - data time: %.1f sec" % data_time + \ " - loss : %.1f sec" % (e_time - data_time) cv2.putText(textIm, statusStrTime, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.putText(textIm, win_class, (0, 33), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255)) cv2.imshow("Status", textIm) cv2.moveWindow("Status", 50, 0) mid_buf = [] ch = cv2.waitKey(10) count += 1 except IOError: print("Error recording")