Ejemplo n.º 1
0
def classify_audioframes(audioQueue, audio_frames, classifier, high_speed):
    if (not audioQueue.empty()):
        audio_frames.append(audioQueue.get())

        # In case we are dealing with frames not being met and a buffer being built up,
        # Start skipping every other audio frame to maintain being up to date,
        # Trading being up to date over being 100% correct in sequence
        if (audioQueue.qsize() > 1):
            print("SKIP FRAME", audioQueue.qsize())
            audioQueue.get()

        if (len(audio_frames) >= 2):
            audio_frames = audio_frames[-2:]

            highestintensity = np.amax(
                audioop.maxpp(audio_frames[1], 4) / 32767)
            wavData = b''.join(audio_frames)

            # SKIP FEATURE ENGINEERING COMPLETELY WHEN DEALING WITH SILENCE
            if (high_speed == True
                    and highestintensity < SILENCE_INTENSITY_THRESHOLD):
                probabilityDict, predicted, frequency = create_empty_probability_dict(
                    classifier, {}, 0, highestintensity, 0)
            else:
                power = fftData = np.frombuffer(wavData, dtype=np.int16)
                power = get_recording_power(
                    fftData,
                    classifier.get_setting('RECORD_SECONDS', RECORD_SECONDS))
                probabilityDict, predicted, frequency = predict_raw_data(
                    wavData, classifier, highestintensity, power)

            return probabilityDict, predicted, audio_frames, highestintensity, frequency, wavData

    return False, False, audio_frames, False, False, False
Ejemplo n.º 2
0
def segment_input_file(threshold, power_threshold, frequency_threshold, begin_threshold, WAVE_INPUT_FILE, WAVE_OUTPUT_FILENAME, WAVE_OUTPUT_FILE_EXTENSION):
    audioFrames = []

    wf = wave.open(WAVE_INPUT_FILE, 'rb')
    number_channels = wf.getnchannels()
    total_frames = wf.getnframes()
    frame_rate = wf.getframerate()
    frames_to_read = round( frame_rate * RECORD_SECONDS / SLIDING_WINDOW_AMOUNT )
    
    files_recorded = 0
    delay_threshold = 0
    if( begin_threshold < 0 ):
        delay_threshold = begin_threshold * -1
        begin_threshold = 1000

    audio = pyaudio.PyAudio()
    record_wave_file_count = 0
    index = 0
    while( wf.tell() < total_frames ):
        index = index + 1
        raw_wav = wf.readframes(frames_to_read * number_channels)
        
        # If our wav file is shorter than the amount of bytes ( assuming 16 bit ) times the frames, we discard it and assume we arriveed at the end of the file
        if (len(raw_wav) != 2 * frames_to_read * number_channels ):
            break;
        else:
            audioFrames.append(raw_wav)
            if( len( audioFrames ) >= SLIDING_WINDOW_AMOUNT ):
                audioFrames = audioFrames[-SLIDING_WINDOW_AMOUNT:]
                intensity = [
                    audioop.maxpp( audioFrames[0], 4 ) / 32767,
                    audioop.maxpp( audioFrames[1], 4 ) / 32767
                ]
                highestintensity = np.amax( intensity )
                    
                byteString = b''.join(audioFrames)
                fftData = np.frombuffer( byteString, dtype=np.int16 )
                frequency = get_loudest_freq( fftData, RECORD_SECONDS )
                power = get_recording_power( fftData, RECORD_SECONDS )

                print( "Segmenting file " + WAVE_INPUT_FILE + ": " + str( math.ceil(wf.tell() / total_frames * 100) ) + "%" , end="\r" )
                if( frequency > frequency_threshold and highestintensity > threshold and power > power_threshold ):
                    record_wave_file_count += 1
                    if( record_wave_file_count <= begin_threshold and record_wave_file_count > delay_threshold ):
                        files_recorded += 1
                        waveFile = wave.open(WAVE_OUTPUT_FILENAME + str(index) + WAVE_OUTPUT_FILE_EXTENSION, 'wb')
                        waveFile.setnchannels(number_channels)
                        waveFile.setsampwidth(audio.get_sample_size(FORMAT))
                        waveFile.setframerate(frame_rate)
                        waveFile.writeframes(byteString)
                        waveFile.close()
                else:
                    record_wave_file_count = 0
        
    print( "Extracted " + str(files_recorded) + " segmented files from " + WAVE_INPUT_FILE )
    wf.close()
Ejemplo n.º 3
0
def listen_loop(audio,
                stream,
                classifier,
                dataDicts,
                audio_frames,
                high_speed=False):
    audio_frames, intensity = get_stream_wav_segment(stream, [])
    wavData = b''.join(audio_frames)

    # SKIP FEATURE ENGINEERING COMPLETELY WHEN DEALING WITH SILENCE
    if (high_speed == True and intensity < SILENCE_INTENSITY_THRESHOLD):
        probabilityDict, predicted, frequency = create_probability_dict(
            classifier, {}, 0, intensity, 0)
    else:
        fftData = np.frombuffer(byteString, dtype=np.int16)
        power = get_recording_power(fftData, RECORD_SECONDS)
        probabilityDict, predicted, frequency = predict_raw_data(
            wavData, classifier, intensity, power)

    return probabilityDict, predicted, audio_frames, intensity, frequency, wavData
Ejemplo n.º 4
0
def record_consumer(threshold, power_threshold, frequency_threshold,
                    begin_threshold, WAVE_OUTPUT_FILENAME,
                    WAVE_OUTPUT_FILE_EXTENSION, FULL_WAVE_OUTPUT_FILENAME,
                    audio, stream):
    global recordQueue

    files_recorded = 0
    j = 0
    record_wave_file_count = 0
    audioFrames = []

    # Set the proper thresholds for starting recordings
    delay_threshold = 0
    if (begin_threshold < 0):
        delay_threshold = begin_threshold * -1
        begin_threshold = 1000

    totalAudioFrames = []
    try:
        with KeyPoller() as key_poller:
            while (True):
                if (not recordQueue.empty()):
                    audioFrames.append(recordQueue.get())
                    totalAudioFrames.append(audioFrames[-1])
                    if (len(audioFrames) >= SLIDING_WINDOW_AMOUNT):
                        j += 1
                        audioFrames = audioFrames[-SLIDING_WINDOW_AMOUNT:]

                        intensity = [
                            audioop.maxpp(audioFrames[0], 4) / 32767,
                            audioop.maxpp(audioFrames[1], 4) / 32767
                        ]
                        highestintensity = np.amax(intensity)

                        byteString = b''.join(audioFrames)
                        fftData = np.frombuffer(byteString, dtype=np.int16)
                        frequency = get_loudest_freq(fftData, RECORD_SECONDS)
                        power = get_recording_power(fftData, RECORD_SECONDS)

                        fileid = "%0.2f" % ((j) * RECORD_SECONDS)

                        if (record_controls(key_poller, recordQueue) == False):
                            stream.stop_stream()
                            break

                        if (frequency > frequency_threshold
                                and highestintensity > threshold
                                and power > power_threshold):
                            record_wave_file_count += 1
                            if (record_wave_file_count <= begin_threshold and
                                    record_wave_file_count > delay_threshold):
                                files_recorded += 1
                                print(
                                    "Files recorded: %0d - Power: %0d - Freq: %0d - Saving %s"
                                    %
                                    (files_recorded, power, frequency, fileid))
                                waveFile = wave.open(
                                    WAVE_OUTPUT_FILENAME + fileid +
                                    WAVE_OUTPUT_FILE_EXTENSION, 'wb')
                                waveFile.setnchannels(CHANNELS)
                                waveFile.setsampwidth(
                                    audio.get_sample_size(FORMAT))
                                waveFile.setframerate(RATE)
                                waveFile.writeframes(byteString)
                                waveFile.close()
                            else:
                                print(
                                    "Files recorded: %0d - Power: %0d - Freq: %0d"
                                    % (files_recorded, power, frequency))
                        else:
                            record_wave_file_count = 0
                            print(
                                "Files recorded: %0d - Power: %0d - Freq: %0d"
                                % (files_recorded, power, frequency))

                        # Persist the total wave only once every six frames
                        if (len(totalAudioFrames) % 6):
                            byteString = b''.join(totalAudioFrames)
                            waveFile = wave.open(FULL_WAVE_OUTPUT_FILENAME,
                                                 'wb')
                            waveFile.setnchannels(CHANNELS)
                            waveFile.setsampwidth(
                                audio.get_sample_size(FORMAT))
                            waveFile.setframerate(RATE)
                            waveFile.writeframes(byteString)
                            waveFile.close()

    except Exception as e:
        print("----------- ERROR DURING RECORDING -------------- ")
        exc_type, exc_value, exc_tb = sys.exc_info()
        traceback.print_exception(exc_type, exc_value, exc_tb)
        stream.stop_stream()