def audio_features_generator_with_buffer(input_filename,
                                         speech_features_params, target_sr,
                                         int_values, chunk_duration):

    sf = soundfile.SoundFile(input_filename, 'rb')

    chunk_size = int(chunk_duration * sf.samplerate)

    start = True
    end = False

    audio_signal = np.zeros(shape=3 * chunk_size, dtype=np.float32)

    while not end:

        audio_signal[-chunk_size:], end = get_audio_chunk_from_soundfile(
            sf, chunk_size, int_values)

        audio_segment = AudioSegment(audio_signal, sf.samplerate, target_sr)
        audio_features, features_length = get_speech_features(
            audio_segment.samples, target_sr, speech_features_params)

        yield audio_features, start, end

        start = False
        audio_signal[:-chunk_size] = audio_signal[chunk_size:]

    sf.close()
    def generate_audio_signal(self):

        #chunk_size = int(self.chunk_duration*self.target_sr)
        chunk_size = int(0.2 * self.target_sr)
        self.recording_state = "init"

        def keyboard_listener():
            input("Press Enter to start and end recording...")
            self.recording_state = "capture"
            print("Recording...")

            input("")
            self.recording_state = "release"

        listener = threading.Thread(target=keyboard_listener)
        listener.start()

        audio_samples = []
        stream_initialized = False
        step = 0
        while self.recording_state != "release":
            try:
                if self.recording_state == "capture":

                    if not stream_initialized:
                        stream = self.p.open(
                            format=pa.paInt16,
                            channels=1,
                            rate=self.target_sr,
                            input=True,
                            input_device_index=self.input_device_id,
                            frames_per_buffer=chunk_size)
                        stream_initialized = True

                    # Read audio chunk from microphone
                    audio_signal = stream.read(chunk_size)
                    audio_signal = np.frombuffer(audio_signal, dtype=np.int16)
                    audio_segment = AudioSegment(audio_signal, self.target_sr,
                                                 self.target_sr)

                    if step == 0:
                        audio_samples = audio_segment.samples
                    else:
                        audio_samples = np.concatenate(
                            (audio_samples, audio_segment.samples))

                    start = False
                    step += 1
            except Exception as e:
                print(e)
                break

        stream.close()
        self.p.terminate()

        return audio_samples
def audio_generator_from_file(input_filename, target_sr, int_values,
                              chunk_duration):

    sf = soundfile.SoundFile(input_filename, 'rb')
    chunk_size = int(chunk_duration * sf.samplerate)
    start = True
    end = False

    while not end:

        audio_signal, end = get_audio_chunk_from_soundfile(
            sf, chunk_size, int_values)

        audio_segment = AudioSegment(audio_signal, sf.samplerate, target_sr)

        yield audio_segment.samples, target_sr, start, end

        start = False

    sf.close()
                    )  #parse_transcript(transcript_text, labels_map, blank_index)) # convert to vocab indices

    # Read the audio files
    # Group requests in batches
    audio_idx = 0
    last_request = False
    predictions = []
    while not last_request:
        batch_audio_samples = []
        batch_filenames = []

        for idx in range(FLAGS.batch_size):
            filename = filenames[audio_idx]
            print("Reading audio file: ", filename)
            audio = AudioSegment.from_file(filename,
                                           offset=0,
                                           duration=FLAGS.fixed_size).samples
            if FLAGS.fixed_size:
                audio = np.resize(audio, FLAGS.fixed_size)

            audio_idx = (audio_idx + 1) % len(filenames)
            if audio_idx == 0:
                last_request = True

            batch_audio_samples.append(audio)
            batch_filenames.append(filename)

        predictions += speech_client.recognize(batch_audio_samples,
                                               batch_filenames)

    if transcripts: