def StartStream(self, streamNum, pos): synth = self.synthRef() if synth is None: log.debugWarning("Called StartStream method on SapiSink while driver is dead") return if synth._audioDucker: if audioDucking._isDebug(): log.debug("Enabling audio ducking due to starting speech stream") synth._audioDucker.enable()
def EndStream(self, streamNum, pos): synth = self.synthRef() if synth is None: log.debugWarning("Called Bookmark method on EndStream while driver is dead") return synthDoneSpeaking.notify(synth=synth) if synth._audioDucker: if audioDucking._isDebug(): log.debug("Disabling audio ducking due to speech stream end") synth._audioDucker.disable()
def cancel(self): # SAPI5's default means of stopping speech can sometimes lag at end of speech, especially with Win8 / Win 10 Microsoft Voices. # Therefore instruct the underlying audio interface to stop first, before interupting and purging any remaining speech. if self.ttsAudioStream: self.ttsAudioStream.setState(SPAudioState.STOP, 0) self.tts.Speak(None, SpeechVoiceSpeakFlags.Async | SpeechVoiceSpeakFlags.PurgeBeforeSpeak) if self._audioDucker: if audioDucking._isDebug(): log.debug("Disabling audio ducking due to setting output audio state to stop") self._audioDucker.disable()
def pause(self, switch: bool): # SAPI5's default means of pausing in most cases is either extremely slow # (e.g. takes more than half a second) or does not work at all. # Therefore instruct the underlying audio interface to pause instead. if self.ttsAudioStream: oldState = self.ttsAudioStream.GetStatus().State if switch and oldState == SPAudioState.RUN: # pausing if self._audioDucker: if audioDucking._isDebug(): log.debug("Disabling audio ducking due to setting output audio state to pause") self._audioDucker.disable() self.ttsAudioStream.setState(SPAudioState.PAUSE, 0) elif not switch and oldState == SPAudioState.PAUSE: # unpausing if self._audioDucker: if audioDucking._isDebug(): log.debug("Enabling audio ducking due to setting output audio state to run") self._audioDucker.enable() self.ttsAudioStream.setState(SPAudioState.RUN, 0)
def waveOutClose(waveOutHandle): if audioDucking._isDebug(): log.debugWarning("End ducking audio requested for SAPI5 synthdriver") try: res=windll.winmm.waveOutClose(waveOutHandle) or 0 except WindowsError as e: res=e.winerror if res==0 and waveOutHandle: _duckersByHandle.pop(waveOutHandle,None) else: log.warning("Closing wave out failed for SAPI5 synthdriver") log.debugWarning(f"Res: {res}\n waveOutHandle: {waveOutHandle}") return res
def waveOutOpen(pWaveOutHandle,deviceID,wfx,callback,callbackInstance,flags): if audioDucking._isDebug(): log.debugWarning("Ducking audio requested for SAPI5 synthdriver") try: res=windll.winmm.waveOutOpen(pWaveOutHandle,deviceID,wfx,callback,callbackInstance,flags) or 0 except WindowsError as e: res=e.winerror if res==0 and pWaveOutHandle: h=pWaveOutHandle.contents.value d=audioDucking.AudioDucker() if not d.enable(): log.warning("Ducking audio failed for SAPI5 synthdriver") _duckersByHandle[h]=d else: log.warning("Opening wave out failed for SAPI5 synthdriver") log.debugWarning(f"Win Error: {res}\n WaveOutHandle: {pWaveOutHandle}") return res
def speak(self, speechSequence): textList = [] # NVDA SpeechCommands are linear, but XML is hierarchical. # Therefore, we track values for non-empty tags. # When a tag changes, we close all previously opened tags and open new ones. tags = {} # We have to use something mutable here because it needs to be changed by the inner function. tagsChanged = [True] openedTags = [] def outputTags(): if not tagsChanged[0]: return for tag in reversed(openedTags): textList.append("</%s>" % tag) del openedTags[:] for tag, attrs in tags.items(): textList.append("<%s" % tag) for attr, val in attrs.items(): textList.append(' %s="%s"' % (attr, val)) textList.append(">") openedTags.append(tag) tagsChanged[0] = False pitch = self._pitch # Pitch must always be specified in the markup. tags["pitch"] = {"absmiddle": self._percentToPitch(pitch)} rate = self.rate volume = self.volume for item in speechSequence: if isinstance(item, str): outputTags() textList.append(item.replace("<", "<")) elif isinstance(item, IndexCommand): textList.append('<Bookmark Mark="%d" />' % item.index) elif isinstance(item, CharacterModeCommand): if item.state: tags["spell"] = {} else: try: del tags["spell"] except KeyError: pass tagsChanged[0] = True elif isinstance(item, BreakCommand): textList.append('<silence msec="%d" />' % item.time) elif isinstance(item, PitchCommand): tags["pitch"] = {"absmiddle": self._percentToPitch(int(pitch * item.multiplier))} tagsChanged[0] = True elif isinstance(item, VolumeCommand): if item.multiplier == 1: try: del tags["volume"] except KeyError: pass else: tags["volume"] = {"level": int(volume * item.multiplier)} tagsChanged[0] = True elif isinstance(item, RateCommand): if item.multiplier == 1: try: del tags["rate"] except KeyError: pass else: tags["rate"] = {"absspeed": self._percentToRate(int(rate * item.multiplier))} tagsChanged[0] = True elif isinstance(item, PhonemeCommand): try: textList.append(u'<pron sym="%s">%s</pron>' % (self._convertPhoneme(item.ipa), item.text or u"")) except LookupError: log.debugWarning("Couldn't convert character in IPA string: %s" % item.ipa) if item.text: textList.append(item.text) elif isinstance(item, SpeechCommand): log.debugWarning("Unsupported speech command: %s" % item) else: log.error("Unknown speech: %s" % item) # Close any tags that are still open. tags.clear() tagsChanged[0] = True outputTags() text = "".join(textList) flags = SpeechVoiceSpeakFlags.IsXML | SpeechVoiceSpeakFlags.Async # Ducking should be complete before the synth starts producing audio. # For this to happen, the speech method must block until ducking is complete. # Ducking should be disabled when the synth is finished producing audio. # Note that there may be calls to speak with a string that results in no audio, # it is important that in this case the audio does not get stuck ducked. # When there is no audio produced the startStream and endStream handlers are not called. # To prevent audio getting stuck ducked, it is unducked at the end of speech. # There are some known issues: # - When there is no audio produced by the synth, a user may notice volume lowering (ducking) temporarily. # - If the call to startStream handler is delayed significantly, users may notice a variation in volume # (as ducking is disabled at the end of speak, and re-enabled when the startStream handler is called) # A note on the synchronicity of components of this approach: # SAPISink.StartStream event handler (callback): # the synth speech is not blocked by this event callback. # SAPISink.EndStream event handler (callback): # assumed also to be async but not confirmed. Synchronicity is irrelevant to the current approach. # AudioDucker.disable returns before the audio is completely unducked. # AudioDucker.enable() ducking will complete before the function returns. # It is not possible to "double duck the audio", calling twice yields the same result as calling once. # AudioDucker class instances count the number of enables/disables, # in order to unduck there must be no remaining enabled audio ducker instances. # Due to this a temporary audio ducker is used around the call to speak. # SAPISink.StartStream: Ducking here may allow the early speech to start before ducking is completed. if audioDucking.isAudioDuckingSupported(): tempAudioDucker = audioDucking.AudioDucker() else: tempAudioDucker = None if tempAudioDucker: if audioDucking._isDebug(): log.debug("Enabling audio ducking due to speak call") tempAudioDucker.enable() try: self.tts.Speak(text, flags) finally: if tempAudioDucker: if audioDucking._isDebug(): log.debug("Disabling audio ducking after speak call") tempAudioDucker.disable()