Exemple #1
0
def recognize_stream(bytestream: Generator[ByteString, None,
                                           None], client: speech.SpeechClient,
                     recognition_config: types.RecognitionConfig, q: Queue):
    """Streams transcription of the given audio file."""
    requests = (types.StreamingRecognizeRequest(audio_content=chunk)
                for chunk in bytestream)
    responses = client.streaming_recognize(
        get_streaming_recognition_config(recognition_config), requests)
    while True:
        try:
            resp = next(responses)
            logger.debug("reading next response; resp.results is {}".format(
                resp.results))
        except StopIteration:
            logger.info("no more responses!")
            break
        if resp.results:
            final = [x for x in resp.results if x.is_final]
            if final: q.put(final[0].alternatives[0])
    logger.info("exit from recognize_stream!")
    return
class GoogleAsr(Asr):

    NAME = 'Google Asr'
    DEPENDENCIES = {'system': [], 'pip': {'google-cloud-speech==1.3.1'}}

    def __init__(self):
        super().__init__()
        self._capableOfArbitraryCapture = True
        self._isOnlineASR = True

        self._client: Optional[SpeechClient] = None
        self._streamingConfig: Optional[
            types.StreamingRecognitionConfig] = None

        self._previousCapture = ''

    def onStart(self):
        super().onStart()
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = str(
            Path(self.Commons.rootDir(), 'credentials/googlecredentials.json'))

        self._client = SpeechClient()
        # noinspection PyUnresolvedReferences
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=self.AudioServer.SAMPLERATE,
            language_code=self.LanguageManager.getLanguageAndCountryCode())

        self._streamingConfig = types.StreamingRecognitionConfig(
            config=config, interim_results=True)

    def decodeStream(self, session: DialogSession) -> Optional[ASRResult]:
        super().decodeStream(session)

        recorder = Recorder(self._timeout, session.user, session.siteId)
        self.ASRManager.addRecorder(session.siteId, recorder)
        self._recorder = recorder
        with Stopwatch() as processingTime:
            with recorder as stream:
                audioStream = stream.audioStream()
                # noinspection PyUnresolvedReferences
                try:
                    requests = (types.StreamingRecognizeRequest(
                        audio_content=content) for content in audioStream)
                    responses = self._client.streaming_recognize(
                        self._streamingConfig, requests)
                    result = self._checkResponses(session, responses)
                except:
                    self.logWarning('Failed ASR request')

            self.end()

        return ASRResult(
            text=result[0],
            session=session,
            likelihood=result[1],
            processingTime=processingTime.time) if result else None

    def _checkResponses(self, session: DialogSession,
                        responses: Generator) -> Optional[tuple]:
        if responses is None:
            return None

        for response in responses:
            if not response.results:
                continue

            result = response.results[0]
            if not result.alternatives:
                continue

            if result.is_final:
                return result.alternatives[0].transcript, result.alternatives[
                    0].confidence
            elif result.alternatives[0].transcript != self._previousCapture:
                self.partialTextCaptured(
                    session=session,
                    text=result.alternatives[0].transcript,
                    likelihood=result.alternatives[0].confidence,
                    seconds=0)
                self._previousCapture = result.alternatives[0].transcript

        return None
        sock.connect(address)
    else:
        sock = None

    client = SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=args.lang_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    print("%s recognition started!" % args.lang_code)
    while True:
        with MicrophoneStream(RATE, CHUNK) as stream:
            audio_generator = stream.generator()
            requests = (types.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)
            try:
                responses = client.streaming_recognize(streaming_config,
                                                       requests)
                listen_print_loop(responses,
                                  print_locally=args.debug,
                                  sock=sock)
            except KeyboardInterrupt:
                break
            except:  # ignore "400 Exceeded maximum allowed stream duration of 305 seconds."
                continue

    if sock is not None:
        sock.close()
Exemple #4
0
class GoogleAsr(Asr):
	NAME = 'Google Asr'
	DEPENDENCIES = {
		'system': [],
		'pip'   : {
			'google-cloud-speech==1.3.1'
		}
	}


	def __init__(self):
		super().__init__()
		self._credentialsFile = Path(self.Commons.rootDir(), 'credentials/googlecredentials.json')
		self._capableOfArbitraryCapture = True
		self._isOnlineASR = True

		self._client: Optional[SpeechClient] = None
		self._streamingConfig: Optional[types.StreamingRecognitionConfig] = None

		if self._credentialsFile.exists() and not self.ConfigManager.getAliceConfigByName('googleASRCredentials'):
			self.ConfigManager.updateAliceConfiguration(key='googleASRCredentials', value=self._credentialsFile.read_text(), doPreAndPostProcessing=False)

		self._internetLostFlag = Event()  # Set if internet goes down, cut the decoding
		self._lastResultCheck = 0  # The time the intermediate results were last checked. If actual time is greater than this value + 3, stop processing, internet issues

		self._previousCapture = ''  # The text that was last captured in the iteration


	def onStart(self):
		super().onStart()
		os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = str(self._credentialsFile)

		self._client = SpeechClient()
		# noinspection PyUnresolvedReferences
		config = types.RecognitionConfig(
			encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
			sample_rate_hertz=self.AudioServer.SAMPLERATE,
			language_code=self.LanguageManager.getLanguageAndCountryCode()
		)

		self._streamingConfig = types.StreamingRecognitionConfig(config=config, interim_results=True)


	def decodeStream(self, session: DialogSession) -> Optional[ASRResult]:
		super().decodeStream(session)

		recorder = Recorder(self._timeout, session.user, session.deviceUid)
		self.ASRManager.addRecorder(session.deviceUid, recorder)
		self._recorder = recorder
		result = None
		with Stopwatch() as processingTime:
			with recorder as stream:
				audioStream = stream.audioStream()
				# noinspection PyUnresolvedReferences
				try:
					requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audioStream)
					responses = self._client.streaming_recognize(self._streamingConfig, requests)
					result = self._checkResponses(session, responses)
				except Exception as e:
					self._internetLostFlag.clear()
					self.logWarning(f'Failed ASR request: {e}')

			self.end()

		return ASRResult(
			text=result[0],
			session=session,
			likelihood=result[1],
			processingTime=processingTime.time
		) if result else None


	def onInternetLost(self):
		self._internetLostFlag.set()


	def _checkResponses(self, session: DialogSession, responses: Generator) -> Optional[tuple]:
		if responses is None:
			return None

		for response in responses:
			if self._internetLostFlag.is_set():
				self.logDebug('Internet connectivity lost during ASR decoding')

				if not response.results:
					raise Exception('Internet connectivity lost during decoding')

				result = response.results[0]
				return result.alternatives[0].transcript, result.alternatives[0].confidence

			if not response.results:
				continue

			result = response.results[0]
			if not result.alternatives:
				continue

			if result.is_final:
				return result.alternatives[0].transcript, result.alternatives[0].confidence
			elif result.alternatives[0].transcript != self._previousCapture:
				self.partialTextCaptured(session=session, text=result.alternatives[0].transcript, likelihood=result.alternatives[0].confidence, seconds=0)
				self._previousCapture = result.alternatives[0].transcript
			elif result.alternatives[0].transcript == self._previousCapture:
				now = int(time())

				if self._lastResultCheck == 0:
					self._lastResultCheck = 0
					continue

				if now > self._lastResultCheck + 3:
					self.logDebug(f'Stopping process as there seems to be connectivity issues')
					return result.alternatives[0].transcript, result.alternatives[0].confidence

				self._lastResultCheck = now

		return None
Exemple #5
0
class GoogleAsr(Asr):

    NAME = 'Google Asr'
    DEPENDENCIES = {'system': [], 'pip': {'google-cloud-speech==1.3.1'}}

    def __init__(self):
        super().__init__()
        self._capableOfArbitraryCapture = True
        self._isOnlineASR = True

        self._client: Optional[SpeechClient] = None
        self._streamingConfig: Optional[
            types.StreamingRecognitionConfig] = None

        self._internetLostFlag = Event(
        )  # Set if internet goes down, cut the decoding
        self._lastResultCheck = 0  # The time the intermediate results were last checked. If actual time is greater than this value + 3, stop processing, internet issues

        self._previousCapture = ''  # The text that was last captured in the iteration
        self._delayedGoogleConfirmation = False  # set whether slow internet is detected or not

    def onStart(self):
        super().onStart()
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = str(
            Path(self.Commons.rootDir(), 'credentials/googlecredentials.json'))

        self._client = SpeechClient()
        # noinspection PyUnresolvedReferences
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=self.AudioServer.SAMPLERATE,
            language_code=self.LanguageManager.getLanguageAndCountryCode())

        self._streamingConfig = types.StreamingRecognitionConfig(
            config=config, interim_results=True)

    def decodeStream(self, session: DialogSession) -> Optional[ASRResult]:
        super().decodeStream(session)

        recorder = Recorder(self._timeout, session.user, session.siteId)
        self.ASRManager.addRecorder(session.siteId, recorder)
        self._recorder = recorder
        result = None
        with Stopwatch() as processingTime:
            with recorder as stream:
                audioStream = stream.audioStream()
                # noinspection PyUnresolvedReferences
                try:
                    requests = (types.StreamingRecognizeRequest(
                        audio_content=content) for content in audioStream)
                    responses = self._client.streaming_recognize(
                        self._streamingConfig, requests)
                    result = self._checkResponses(session, responses)
                except:
                    self._internetLostFlag.clear()
                    self.logWarning('Failed ASR request')

            self.end()

        return ASRResult(
            text=result[0],
            session=session,
            likelihood=result[1],
            processingTime=processingTime.time) if result else None

    def onInternetLost(self):
        self._internetLostFlag.set()

    def _checkResponses(self, session: DialogSession,
                        responses: Generator) -> Optional[tuple]:
        if responses is None:
            return None

        for response in responses:
            if self._internetLostFlag.is_set():
                self.logDebug('Internet connectivity lost during ASR decoding')

                if not response.results:
                    raise Exception(
                        'Internet connectivity lost during decoding')

                result = response.results[0]
                return result.alternatives[0].transcript, result.alternatives[
                    0].confidence

            if not response.results:
                continue

            result = response.results[0]
            if not result.alternatives:
                continue

            if result.is_final:
                self._lastResultCheck = 0
                self._delayedGoogleConfirmation = False
                # print(f'Text confirmed by Google')
                return result.alternatives[0].transcript, result.alternatives[
                    0].confidence
            elif result.alternatives[0].transcript != self._previousCapture:
                self.partialTextCaptured(
                    session=session,
                    text=result.alternatives[0].transcript,
                    likelihood=result.alternatives[0].confidence,
                    seconds=0)
                # below function captures the "potential" full utterance not just one word from it
                if len(self._previousCapture) <= len(
                        result.alternatives[0].transcript):
                    self._previousCapture = result.alternatives[0].transcript
            elif result.alternatives[0].transcript == self._previousCapture:

                # If we are here it's cause google hasn't responded yet with confirmation on captured text
                # Store the time in seconds since epoch
                now = int(time())
                # Set a reference to nows time plus 3 seconds
                self._lastResultCheck = now + 3
                # wait 3 seconds and see if google responds
                if not self._delayedGoogleConfirmation:
                    # print(f'Text of "{self._previousCapture}" captured but not confirmed by GoogleASR yet')
                    while now <= self._lastResultCheck:
                        now = int(time())
                        self._delayedGoogleConfirmation = True
                    # Give google the option to still process  the utterance
                    continue
                # During next iteration, If google hasn't responded in 3 seconds assume intent is correct
                if self._delayedGoogleConfirmation:
                    self.logDebug(
                        f'Stopping process as there seems to be connectivity issues'
                    )
                    self._lastResultCheck = 0
                    self._delayedGoogleConfirmation = False
                    return result.alternatives[
                        0].transcript, result.alternatives[0].confidence

        return None