Ejemplo n.º 1
0
def fileChromagramWrapper(wav_file):
    if not os.path.isfile(wav_file):
        raise Exception("Input audio file not found!")
    [fs, x] = audioBasicIO.read_audio_file(wav_file)
    x = audioBasicIO.stereo_to_mono(x)
    specgram, TimeAxis, FreqAxis = sF.chromagram(x, fs, round(fs * 0.040),
                                                 round(fs * 0.040), True)
Ejemplo n.º 2
0
def record_audio(block_size,
                 fs=8000,
                 show_spec=False,
                 show_chroma=False,
                 log_sounds=False,
                 logs_all=False):

    # inialize recording process
    mid_buf_size = int(fs * block_size)
    pa = pyaudio.PyAudio()
    stream = pa.open(format=FORMAT,
                     channels=1,
                     rate=fs,
                     input=True,
                     frames_per_buffer=mid_buf_size)
    mid_buf = []
    count = 0
    global all_data
    global outstr
    all_data = []
    # initalize counters etc
    time_start = time.time()
    outstr = datetime.datetime.now().strftime("%Y_%m_%d_%I:%M%p")
    out_folder = outstr + "_segments"
    if log_sounds:
        if not os.path.exists(out_folder):
            os.makedirs(out_folder)
    # load segment model
    [classifier, MEAN, STD, class_names, mt_win, mt_step, st_win, st_step,
     _] = aT.load_model("model")

    while 1:
        try:
            block = stream.read(mid_buf_size)
            count_b = len(block) / 2
            format = "%dh" % (count_b)
            shorts = struct.unpack(format, block)
            cur_win = list(shorts)
            mid_buf = mid_buf + cur_win
            del cur_win

            # time since recording started:
            e_time = (time.time() - time_start)
            # data-driven time
            data_time = (count + 1) * block_size
            x = numpy.int16(mid_buf)
            seg_len = len(x)

            # extract features
            # We are using the signal length as mid term window and step,
            # in order to guarantee a mid-term feature sequence of len 1
            [mt_feats, _,
             _] = mF.mid_feature_extraction(x, fs, seg_len, seg_len,
                                            round(fs * st_win),
                                            round(fs * st_step))
            cur_fv = (mt_feats[:, 0] - MEAN) / STD
            # classify vector:
            [res, prob] = aT.classifier_wrapper(classifier, "svm_rbf", cur_fv)
            win_class = class_names[int(res)]
            win_prob = prob[int(res)]

            if logs_all:
                all_data += mid_buf
            mid_buf = numpy.double(mid_buf)

            # Compute spectrogram
            if show_spec:
                (spec, t_axis,
                 freq_axis_s) = sF.spectrogram(mid_buf, fs, 0.050 * fs,
                                               0.050 * fs)
                freq_axis_s = numpy.array(freq_axis_s)  # frequency axis
                # most dominant frequencies (for each short-term window):
                dominant_freqs = freq_axis_s[numpy.argmax(spec, axis=1)]
                # get average most dominant freq
                max_freq = numpy.mean(dominant_freqs)
                max_freq_std = numpy.std(dominant_freqs)

            # Compute chromagram
            if show_chroma:
                (chrom, TimeAxisC,
                 freq_axis_c) = sF.chromagram(mid_buf, fs, 0.050 * fs,
                                              0.050 * fs)
                freq_axis_c = numpy.array(freq_axis_c)
                # most dominant chroma classes:
                dominant_freqs_c = freq_axis_c[numpy.argmax(chrom, axis=1)]
                # get most common among all short-term windows
                max_freqC = most_common(dominant_freqs_c)[0]

            # Plot signal window
            signalPlotCV = plotCV(
                scipy.signal.resample(mid_buf + 16000, plot_w), plot_w, plot_h,
                32000)
            cv2.imshow('Signal', signalPlotCV)
            cv2.moveWindow('Signal', 50, status_h + 50)

            # Show spectrogram
            if show_spec:
                i_spec = numpy.array(spec.T * 255, dtype=numpy.uint8)
                i_spec2 = cv2.resize(i_spec, (plot_w, plot_h),
                                     interpolation=cv2.INTER_CUBIC)
                i_spec2 = cv2.applyColorMap(i_spec2, cv2.COLORMAP_JET)
                cv2.putText(i_spec2, "max_freq: %.0f Hz" % max_freq, (0, 11),
                            cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                cv2.imshow('Spectrogram', i_spec2)
                cv2.moveWindow('Spectrogram', 50, plot_h + status_h + 60)
            # Show chromagram
            if show_chroma:
                i_chroma = numpy.array((chrom.T / chrom.max()) * 255,
                                       dtype=numpy.uint8)
                i_chroma2 = cv2.resize(i_chroma, (plot_w, plot_h),
                                       interpolation=cv2.INTER_CUBIC)
                i_chroma2 = cv2.applyColorMap(i_chroma2, cv2.COLORMAP_JET)
                cv2.putText(i_chroma2, "max_freqC: %s" % max_freqC, (0, 11),
                            cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                cv2.imshow('Chroma', i_chroma2)
                cv2.moveWindow('Chroma', 50, 2 * plot_h + status_h + 60)

            # Activity Detection:
            print("{0:.2f}\t{1:s}\t{2:.2f}".format(e_time, win_class,
                                                   win_prob))

            if log_sounds:
                # TODO: log audio files
                out_file = os.path.join(
                    out_folder,
                    "{0:.2f}_".format(e_time).zfill(8) + win_class + ".wav")
                #shutil.copyfile("temp.wav", out_file)
                wavfile.write(out_file, fs, x)

            textIm = numpy.zeros((status_h, plot_w, 3))
            statusStrTime = "time: %.1f sec" % e_time + \
                            " - data time: %.1f sec" % data_time + \
                            " - loss : %.1f sec" % (e_time - data_time)
            cv2.putText(textIm, statusStrTime, (0, 11), cv2.FONT_HERSHEY_PLAIN,
                        1, (200, 200, 200))
            cv2.putText(textIm, win_class, (0, 33), cv2.FONT_HERSHEY_PLAIN, 1,
                        (0, 0, 255))
            cv2.imshow("Status", textIm)
            cv2.moveWindow("Status", 50, 0)
            mid_buf = []
            ch = cv2.waitKey(10)
            count += 1
        except IOError:
            print("Error recording")
Ejemplo n.º 3
0
plt.xlabel('Frame no')
plt.ylabel(f_names[1])
plt.show()

print("\n\n\n * * * TEST 2 * * * \n\n\n")
[Fs, x] = audioBasicIO.read_audio_file(root_data_path +
                                       "pyAudioAnalysis/data/doremi.wav")
x = audioBasicIO.stereo_to_mono(x)
specgram, TimeAxis, FreqAxis = ShortTermFeatures.spectrogram(
    x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 3 * * * \n\n\n")
[Fs, x] = audioBasicIO.read_audio_file(root_data_path +
                                       "pyAudioAnalysis/data/doremi.wav")
x = audioBasicIO.stereo_to_mono(x)
specgram, TimeAxis, FreqAxis = ShortTermFeatures.chromagram(
    x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 4 * * * \n\n\n")
aT.extract_features_and_train([root_data_path + "1/", root_data_path + "2/"],
                              1.0, 1.0, 0.2, 0.2, "svm", "temp", True)

print("\n\n\n * * * TEST 5 * * * \n\n\n")
[flagsInd, classesAll, acc, CM] = aS.mid_term_file_classification(
    root_data_path + "scottish.wav", root_data_path + "models/svm_rbf_sm",
    "svm_rbf", True, root_data_path + 'pyAudioAnalysis/data/scottish.segments')

print("\n\n\n * * * TEST 6 * * * \n\n\n")
aS.train_hmm_from_file(root_data_path + 'radioFinal/train/bbc4A.wav',
                       root_data_path + 'radioFinal/train/bbc4A.segments',
                       'hmmTemp1', 1.0, 1.0)
aS.train_hmm_from_directory(root_data_path + 'radioFinal/small', 'hmmTemp2',
Ejemplo n.º 4
0
"""! 
@brief Example 11
@details pyAudioAnalysis chromagram example
@author Theodoros Giannakopoulos {[email protected]}
"""
import plotly
import plotly.graph_objs as go
from pyAudioAnalysis import ShortTermFeatures as aF
from pyAudioAnalysis import audioBasicIO as aIO
layout = go.Layout(title='Chromagram example for doremi.wav signal',
                   xaxis=dict(title='time (sec)', ),
                   yaxis=dict(title='Chroma Name', ))

if __name__ == '__main__':
    win = 0.04
    fp = "../data/doremi.wav"  # music sample
    # read machine sound
    fs, s = aIO.read_audio_file(fp)
    fs = float(fs)
    dur1 = len(s) / float(fs)
    spec, time, freq = aF.chromagram(s, fs, int(fs * win), int(fs * win),
                                     False)
    heatmap = go.Heatmap(z=spec.T, y=freq, x=time)
    plotly.offline.plot(go.Figure(data=[heatmap], layout=layout),
                        filename="temp.html",
                        auto_open=True)