Ejemplo n.º 1
0
def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
    """Yields `StreamingRecognizeRequest`s constructed from a recording audio
    stream.

    Args:
        stop_audio: A threading.Event object stops the recording when set.
        channels: How many audio channels to record.
        rate: The sampling rate.
        chunk: Buffer audio into chunks of this size before sending to the api.
    """
    # The initial request must contain metadata about the stream, so the
    # server knows how to interpret it.
    recognition_config = cloud_speech.RecognitionConfig(encoding='LINEAR16',
                                                        sample_rate=rate)
    streaming_config = cloud_speech.StreamingRecognitionConfig(
        config=recognition_config,
        # Note that setting interim_results to True means that you'll likely
        # get multiple results for the same bit of audio, as the system
        # re-interprets audio in the context of subsequent audio. However, this
        # will give us quick results without having to tell the server when to
        # finalize a piece of audio.
        interim_results=True,
        single_utterance=False)

    yield cloud_speech.StreamingRecognizeRequest(
        streaming_config=streaming_config)

    with record_audio(channels, rate, chunk) as audio_stream:
        while not stop_audio.is_set():
            data = audio_stream.read(chunk)
            if not data:
                raise StopIteration()

            # Subsequent requests can all just have the content
            yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
Ejemplo n.º 2
0
def request_stream(data_stream, rate, interim_results=True):
    """Yields `StreamingRecognizeRequest`s constructed from a recording audio
    stream.

    Args:
        data_stream: A generator that yields raw audio data to send.
        rate: The sampling rate in hertz.
        interim_results: Whether to return intermediate results, before the
            transcription is finalized.
    """
    # The initial request must contain metadata about the stream, so the
    # server knows how to interpret it.
    recognition_config = cloud_speech.RecognitionConfig(
        # There are a bunch of config options you can specify. See
        # https://goo.gl/KPZn97 for the full list.
        encoding='LINEAR16',  # raw 16-bit signed LE samples
        sample_rate=rate,  # the rate in hertz
        # See http://g.co/cloud/speech/docs/languages
        # for a list of supported languages.
        language_code='en-US',  # a BCP-47 language tag
    )
    streaming_config = cloud_speech.StreamingRecognitionConfig(
        interim_results=interim_results,
        config=recognition_config,
    )

    yield cloud_speech.StreamingRecognizeRequest(
        streaming_config=streaming_config)

    for data in data_stream:
        # Subsequent requests can all just have the content
        yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
Ejemplo n.º 3
0
def request_stream(channels=CHANNELS, rate=RATE, chunk=CHUNK):
    global flag_RecogEnd
    global LANG_CODE
    recognition_config = cloud_speech.RecognitionConfig(
        encoding='LINEAR16',  # raw 16-bit signed LE samples
        sample_rate=rate,  # the rate in hertz
        language_code=LANG_CODE,  # a BCP-47 language tag
    )
    streaming_config = cloud_speech.StreamingRecognitionConfig(
        config=recognition_config, interim_results=True, single_utterance=True)

    yield cloud_speech.StreamingRecognizeRequest(
        streaming_config=streaming_config)

    while True:
        time.sleep(SLEEP_SEC)

        if flag_RecogEnd:
            return

        # バッファにデータが溜まったら,データ送信
        if len(frames) > 0:
            data_1frame = frames.pop(0)
            data_l2s = b''.join(map(str, data_1frame))
            wf.writeframes(data_l2s)  # waveファイルに書き込み
            yield cloud_speech.StreamingRecognizeRequest(
                audio_content=data_l2s)  # google ASR
Ejemplo n.º 4
0
    def request_stream(self):
        recognition_config = cloud_speech_pb2.RecognitionConfig(
        encoding=self.audio_encoding,
        sample_rate=self.sampling_rate,
        language_code=self.lang_code,
        max_alternatives=1,
        )
        streaming_config = cloud_speech_pb2.StreamingRecognitionConfig(
            config=recognition_config,
            interim_results=True,
            single_utterance=True)

        yield cloud_speech_pb2.StreamingRecognizeRequest(streaming_config=streaming_config)

        silent_cnt=0
        while True:
            #print(sys._getframe().f_code.co_name,"1")
            time.sleep(self.frame_seconds / 4)
            #print("self.should_finish_stream", self.should_finish_stream, "len", len(frames))

            if self.should_finish_stream:
                return

            if len(self.frames) > 0:
                #音量チェック 連続して無音区間が続いたら処理を抜ける。

                data = self.frames[0]
                rms = audioop.rms(data, 2)
                decibel = 20 * math.log10(rms) if rms > 0 else 0

                if decibel < self.silent_decibel:
                    silent_cnt = silent_cnt+1
                else :
                    silent_cnt = 0

                if silent_cnt > self.max_silent_cnt :
                    print(sys._getframe().f_code.co_name, "find silent frames return")
                    return

            #print("request_stream2 3 framen len=", len(self.frames))
            if len(self.frames) > 0:
                #print(sys._getframe().f_code.co_name,"2", "framelen=",len(self.frames))
                #self.frames.pop(0)
                yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=self.frames.pop(0))
Ejemplo n.º 5
0
def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
    """Yields `StreamingRecognizeRequest`s constructed from a recording audio
    stream.

    Args:
        stop_audio: A threading.Event object stops the recording when set.
        channels: How many audio channels to record.
        rate: The sampling rate in hertz.
        chunk: Buffer audio into chunks of this size before sending to the api.
    """
    # The initial request must contain metadata about the stream, so the
    # server knows how to interpret it.
    recognition_config = cloud_speech.RecognitionConfig(
        # There are a bunch of config options you can specify. See
        # https://goo.gl/A6xv5G for the full list.
        encoding='LINEAR16',  # raw 16-bit signed LE samples
        sample_rate=rate,  # the rate in hertz
        # See
        # https://g.co/cloud/speech/docs/best-practices#language_support
        # for a list of supported languages.
        language_code='en-US',  # a BCP-47 language tag
    )
    streaming_config = cloud_speech.StreamingRecognitionConfig(
        config=recognition_config,
        # Note that setting interim_results to True means that you'll likely
        # get multiple results for the same bit of audio, as the system
        # re-interprets audio in the context of subsequent audio. However, this
        # will give us quick results without having to tell the server when to
        # finalize a piece of audio.
        interim_results=True,
        single_utterance=True)

    yield cloud_speech.StreamingRecognizeRequest(
        streaming_config=streaming_config)

    with record_audio(channels, rate, chunk) as audio_stream:
        while not stop_audio.is_set():
            data = audio_stream.read(chunk)
            if not data:
                raise StopIteration()

            # Subsequent requests can all just have the content
            yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
Ejemplo n.º 6
0
    def request_stream(self, stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
        recognition_config = cloud_speech.RecognitionConfig(
            encoding='LINEAR16',
            sample_rate=rate,
            language_code='ko-KR',
        )
        streaming_config = cloud_speech.StreamingRecognitionConfig(
            config=recognition_config,
            interim_results=True,
            single_utterance=False
        )

        yield cloud_speech.StreamingRecognizeRequest(streaming_config=streaming_config)
        with self.record_audio(channels, rate, chunk) as audio_stream:
            while not rospy.is_shutdown():
                data = audio_stream.read(chunk)
                if not data:
                    raise StopIteration()
                yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
Ejemplo n.º 7
0
def request_stream():
	global queue
	global recognition_result
	global should_finish_stream

	recognition_config = cloud_speech_pb2.RecognitionConfig(
		encoding=args.audio_encoding,
		sample_rate=args.sampling_rate,
		language_code=args.lang_code,
		max_alternatives=1,
	)
	streaming_config = cloud_speech_pb2.StreamingRecognitionConfig(
		config=recognition_config,
		interim_results=True, 
		single_utterance=True
	)

	yield cloud_speech_pb2.StreamingRecognizeRequest(streaming_config=streaming_config)

	frame_length = int(args.sampling_rate * args.frame_seconds)
	frame = b""

	while True:
		if should_finish_stream:
			return

		try:
			data = queue.get(False)
			frame += data
		except Exception as e:
			if len(frame) > frame_length:
				rms = audioop.rms(frame, 2)
				decibel = 20 * math.log10(rms) if rms > 0 else 0
				if decibel < args.silent_decibel:
					recognition_result.success = False
					return
				yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=frame)
				frame = b""
			time.sleep(args.frame_seconds / 4)
Ejemplo n.º 8
0
def request_stream():
    recognition_config = cloud_speech_pb2.RecognitionConfig(
        encoding=args.audio_encoding,
        sample_rate=args.sampling_rate,
        language_code=args.lang_code,
        max_alternatives=1,
    )
    streaming_config = cloud_speech_pb2.StreamingRecognitionConfig(
        config=recognition_config, interim_results=True, single_utterance=True)

    yield cloud_speech_pb2.StreamingRecognizeRequest(
        streaming_config=streaming_config)

    while True:
        time.sleep(args.frame_seconds / 4)

        if should_finish_stream:
            return

        if len(frames) > 0:
            yield cloud_speech_pb2.StreamingRecognizeRequest(
                audio_content=frames.pop(0))
Ejemplo n.º 9
0
    def g_request_steam(self, data_stream, rate, init_buff=None):

        r_config = cloud_speech.RecognitionConfig(
            encoding='LINEAR16',
            sample_rate=rate,
            language_code='en-US',
            speech_context= cloud_speech.SpeechContext(
                phrases=["mirror", "add", "item", "help", "close", "clothes", "tag", "tags", "find", "number 1", "wear", "start", "stop", "stylist", "wardrobe", "exit", "1", "2", "3", "4", "5", "6", "7", "8"]
            )
        )
        r_stream_config = cloud_speech.StreamingRecognitionConfig(
            config=r_config,
            single_utterance=False,
            interim_results=False)

        yield cloud_speech.StreamingRecognizeRequest(
            streaming_config=r_stream_config)

        if init_buff:
            yield cloud_speech.StreamingRecognizeRequest(audio_content=init_buff)

        for data in data_stream:
            yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
    """Yields `StreamingRecognizeRequest`s constructed from a recording audio
    stream.

    Args:
        stop_audio: A threading.Event object stops the recording when set.
        channels: How many audio channels to record.
        rate: The sampling rate in hertz.
        chunk: Buffer audio into chunks of this size before sending to the api.
    """
    # The initial request must contain metadata about the stream, so the
    # server knows how to interpret it.
    recognition_config = cloud_speech.RecognitionConfig(
        # There are a bunch of config options you can specify. See
        # https://goo.gl/KPZn97 for the full list.
        encoding='LINEAR16',  # raw 16-bit signed LE samples
        sample_rate=rate,  # the rate in hertz
        # See
        # https://g.co/cloud/speech/docs/best-practices#language_support
        # for a list of supported languages.
        language_code='en-US',  # a BCP-47 language tag
    )
    streaming_config = cloud_speech.StreamingRecognitionConfig(
        config=recognition_config, )

    yield cloud_speech.StreamingRecognizeRequest(
        streaming_config=streaming_config)

    with record_audio(channels, rate, chunk) as audio_stream:
        while not stop_audio.is_set():
            data = audio_stream.read(chunk)
            if not data:
                raise StopIteration()

            # Subsequent requests can all just have the content
            yield cloud_speech.StreamingRecognizeRequest(audio_content=data)