def fileChromagramWrapper(wav_file): if not os.path.isfile(wav_file): raise Exception("Input audio file not found!") [fs, x] = audioBasicIO.read_audio_file(wav_file) x = audioBasicIO.stereo_to_mono(x) specgram, TimeAxis, FreqAxis = sF.chromagram(x, fs, round(fs * 0.040), round(fs * 0.040), True)
def record_audio(block_size, fs=8000, show_spec=False, show_chroma=False, log_sounds=False, logs_all=False): # inialize recording process mid_buf_size = int(fs * block_size) pa = pyaudio.PyAudio() stream = pa.open(format=FORMAT, channels=1, rate=fs, input=True, frames_per_buffer=mid_buf_size) mid_buf = [] count = 0 global all_data global outstr all_data = [] # initalize counters etc time_start = time.time() outstr = datetime.datetime.now().strftime("%Y_%m_%d_%I:%M%p") out_folder = outstr + "_segments" if log_sounds: if not os.path.exists(out_folder): os.makedirs(out_folder) # load segment model [classifier, MEAN, STD, class_names, mt_win, mt_step, st_win, st_step, _] = aT.load_model("model") while 1: try: block = stream.read(mid_buf_size) count_b = len(block) / 2 format = "%dh" % (count_b) shorts = struct.unpack(format, block) cur_win = list(shorts) mid_buf = mid_buf + cur_win del cur_win # time since recording started: e_time = (time.time() - time_start) # data-driven time data_time = (count + 1) * block_size x = numpy.int16(mid_buf) seg_len = len(x) # extract features # We are using the signal length as mid term window and step, # in order to guarantee a mid-term feature sequence of len 1 [mt_feats, _, _] = mF.mid_feature_extraction(x, fs, seg_len, seg_len, round(fs * st_win), round(fs * st_step)) cur_fv = (mt_feats[:, 0] - MEAN) / STD # classify vector: [res, prob] = aT.classifier_wrapper(classifier, "svm_rbf", cur_fv) win_class = class_names[int(res)] win_prob = prob[int(res)] if logs_all: all_data += mid_buf mid_buf = numpy.double(mid_buf) # Compute spectrogram if show_spec: (spec, t_axis, freq_axis_s) = sF.spectrogram(mid_buf, fs, 0.050 * fs, 0.050 * fs) freq_axis_s = numpy.array(freq_axis_s) # frequency axis # most dominant frequencies (for each short-term window): dominant_freqs = freq_axis_s[numpy.argmax(spec, axis=1)] # get average most dominant freq max_freq = numpy.mean(dominant_freqs) max_freq_std = numpy.std(dominant_freqs) # Compute chromagram if show_chroma: (chrom, TimeAxisC, freq_axis_c) = sF.chromagram(mid_buf, fs, 0.050 * fs, 0.050 * fs) freq_axis_c = numpy.array(freq_axis_c) # most dominant chroma classes: dominant_freqs_c = freq_axis_c[numpy.argmax(chrom, axis=1)] # get most common among all short-term windows max_freqC = most_common(dominant_freqs_c)[0] # Plot signal window signalPlotCV = plotCV( scipy.signal.resample(mid_buf + 16000, plot_w), plot_w, plot_h, 32000) cv2.imshow('Signal', signalPlotCV) cv2.moveWindow('Signal', 50, status_h + 50) # Show spectrogram if show_spec: i_spec = numpy.array(spec.T * 255, dtype=numpy.uint8) i_spec2 = cv2.resize(i_spec, (plot_w, plot_h), interpolation=cv2.INTER_CUBIC) i_spec2 = cv2.applyColorMap(i_spec2, cv2.COLORMAP_JET) cv2.putText(i_spec2, "max_freq: %.0f Hz" % max_freq, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.imshow('Spectrogram', i_spec2) cv2.moveWindow('Spectrogram', 50, plot_h + status_h + 60) # Show chromagram if show_chroma: i_chroma = numpy.array((chrom.T / chrom.max()) * 255, dtype=numpy.uint8) i_chroma2 = cv2.resize(i_chroma, (plot_w, plot_h), interpolation=cv2.INTER_CUBIC) i_chroma2 = cv2.applyColorMap(i_chroma2, cv2.COLORMAP_JET) cv2.putText(i_chroma2, "max_freqC: %s" % max_freqC, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.imshow('Chroma', i_chroma2) cv2.moveWindow('Chroma', 50, 2 * plot_h + status_h + 60) # Activity Detection: print("{0:.2f}\t{1:s}\t{2:.2f}".format(e_time, win_class, win_prob)) if log_sounds: # TODO: log audio files out_file = os.path.join( out_folder, "{0:.2f}_".format(e_time).zfill(8) + win_class + ".wav") #shutil.copyfile("temp.wav", out_file) wavfile.write(out_file, fs, x) textIm = numpy.zeros((status_h, plot_w, 3)) statusStrTime = "time: %.1f sec" % e_time + \ " - data time: %.1f sec" % data_time + \ " - loss : %.1f sec" % (e_time - data_time) cv2.putText(textIm, statusStrTime, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.putText(textIm, win_class, (0, 33), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255)) cv2.imshow("Status", textIm) cv2.moveWindow("Status", 50, 0) mid_buf = [] ch = cv2.waitKey(10) count += 1 except IOError: print("Error recording")
plt.xlabel('Frame no') plt.ylabel(f_names[1]) plt.show() print("\n\n\n * * * TEST 2 * * * \n\n\n") [Fs, x] = audioBasicIO.read_audio_file(root_data_path + "pyAudioAnalysis/data/doremi.wav") x = audioBasicIO.stereo_to_mono(x) specgram, TimeAxis, FreqAxis = ShortTermFeatures.spectrogram( x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) print("\n\n\n * * * TEST 3 * * * \n\n\n") [Fs, x] = audioBasicIO.read_audio_file(root_data_path + "pyAudioAnalysis/data/doremi.wav") x = audioBasicIO.stereo_to_mono(x) specgram, TimeAxis, FreqAxis = ShortTermFeatures.chromagram( x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) print("\n\n\n * * * TEST 4 * * * \n\n\n") aT.extract_features_and_train([root_data_path + "1/", root_data_path + "2/"], 1.0, 1.0, 0.2, 0.2, "svm", "temp", True) print("\n\n\n * * * TEST 5 * * * \n\n\n") [flagsInd, classesAll, acc, CM] = aS.mid_term_file_classification( root_data_path + "scottish.wav", root_data_path + "models/svm_rbf_sm", "svm_rbf", True, root_data_path + 'pyAudioAnalysis/data/scottish.segments') print("\n\n\n * * * TEST 6 * * * \n\n\n") aS.train_hmm_from_file(root_data_path + 'radioFinal/train/bbc4A.wav', root_data_path + 'radioFinal/train/bbc4A.segments', 'hmmTemp1', 1.0, 1.0) aS.train_hmm_from_directory(root_data_path + 'radioFinal/small', 'hmmTemp2',
"""! @brief Example 11 @details pyAudioAnalysis chromagram example @author Theodoros Giannakopoulos {[email protected]} """ import plotly import plotly.graph_objs as go from pyAudioAnalysis import ShortTermFeatures as aF from pyAudioAnalysis import audioBasicIO as aIO layout = go.Layout(title='Chromagram example for doremi.wav signal', xaxis=dict(title='time (sec)', ), yaxis=dict(title='Chroma Name', )) if __name__ == '__main__': win = 0.04 fp = "../data/doremi.wav" # music sample # read machine sound fs, s = aIO.read_audio_file(fp) fs = float(fs) dur1 = len(s) / float(fs) spec, time, freq = aF.chromagram(s, fs, int(fs * win), int(fs * win), False) heatmap = go.Heatmap(z=spec.T, y=freq, x=time) plotly.offline.plot(go.Figure(data=[heatmap], layout=layout), filename="temp.html", auto_open=True)