def _get_supportedSettings(self): self.supportedSettings = settings = [ SynthDriver.VoiceSetting(), SynthDriver.RateSetting(), ] if self.supportsProsodyOptions: settings.append(SynthDriver.RateBoostSetting()) settings.extend([ SynthDriver.PitchSetting(), SynthDriver.VolumeSetting(), ]) return settings
class SynthDriver(SynthDriver): name = "RHVoice" description = "RHVoice" supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.RateSetting(), SynthDriver.PitchSetting(), SynthDriver.VolumeSetting()) @classmethod def check(cls): return os.path.isfile(lib_path) def __init__(self): self.__lib = load_tts_library() self.__cancel_flag = threading.Event() self.__player = audio_player(self.__cancel_flag) self.__sample_rate_callback = sample_rate_callback( self.__lib, self.__player) self.__c_sample_rate_callback = RHVoice_callback_types.set_sample_rate( self.__sample_rate_callback) self.__speech_callback = speech_callback(self.__lib, self.__player, self.__cancel_flag) self.__c_speech_callback = RHVoice_callback_types.play_speech( self.__speech_callback) self.__mark_callback = mark_callback(self.__lib) self.__c_mark_callback = RHVoice_callback_types.process_mark( self.__mark_callback) resource_paths = [ os.path.join(addon.path, "data").encode("UTF-8") for addon in addonHandler.getRunningAddons() if (addon.name.startswith("RHVoice-language") or addon.name.startswith("RHVoice-voice")) ] c_resource_paths = (c_char_p * (len(resource_paths) + 1))( *(resource_paths + [None])) init_params = RHVoice_init_params( None, config_path.encode("utf-8"), c_resource_paths, RHVoice_callbacks( self.__c_sample_rate_callback, self.__c_speech_callback, self.__c_mark_callback, cast(None, RHVoice_callback_types.word_starts), cast(None, RHVoice_callback_types.word_ends), cast(None, RHVoice_callback_types.sentence_starts), cast(None, RHVoice_callback_types.sentence_ends), cast(None, RHVoice_callback_types.play_audio)), 0) self.__tts_engine = self.__lib.RHVoice_new_tts_engine( byref(init_params)) if not self.__tts_engine: raise RuntimeError("RHVoice: initialization error") nvda_language = languageHandler.getLanguage().split("_")[0] number_of_voices = self.__lib.RHVoice_get_number_of_voices( self.__tts_engine) native_voices = self.__lib.RHVoice_get_voices(self.__tts_engine) self.__voice_languages = dict() self.__languages = set() for i in xrange(number_of_voices): native_voice = native_voices[i] self.__voice_languages[native_voice.name] = native_voice.language self.__languages.add(native_voice.language) self.__profile = None self.__profiles = list() number_of_profiles = self.__lib.RHVoice_get_number_of_voice_profiles( self.__tts_engine) native_profile_names = self.__lib.RHVoice_get_voice_profiles( self.__tts_engine) for i in xrange(number_of_profiles): name = native_profile_names[i] self.__profiles.append(name) if (self.__profile is None) and (nvda_language == self.__voice_languages[name.split("+")[0]]): self.__profile = name if self.__profile is None: self.__profile = self.__profiles[0] self.__rate = 50 self.__pitch = 50 self.__volume = 50 self.__tts_queue = Queue.Queue() self.__tts_thread = TTSThread(self.__tts_queue) self.__tts_thread.start() log.info("Using RHVoice version {}".format( self.__lib.RHVoice_get_version())) def terminate(self): self.cancel() self.__tts_queue.put(None) self.__tts_thread.join() self.__player.close() self.__lib.RHVoice_delete_tts_engine(self.__tts_engine) self.__tts_engine = None def speak(self, speech_sequence): spell_mode = False language_changed = False text_list = [u"<speak>"] for item in speech_sequence: if isinstance(item, basestring): s = escape_text(unicode(item)) text_list.append(( u'<say-as interpret-as="characters">{}</say-as>'.format(s) ) if spell_mode else s) elif isinstance(item, speech.IndexCommand): text_list.append('<mark name="%d"/>' % item.index) elif isinstance(item, speech.CharacterModeCommand): if item.state: spell_mode = True else: spell_mode = False elif isinstance(item, speech.LangChangeCommand): if language_changed: text_list.append(u"</voice>") language_changed = False if not item.lang: continue new_language = item.lang.split("_")[0] if new_language not in self.__languages: continue elif new_language == self.__voice_languages[ self.__profile.split("+")[0]]: continue text_list.append(u'<voice xml:lang="{}">'.format(new_language)) language_changed = True elif isinstance(item, speech.SpeechCommand): log.debugWarning("Unsupported speech command: %s" % item) else: log.error("Unknown speech: %s" % item) if language_changed: text_list.append(u"</voice>") text_list.append(u"</speak>") text = u"".join(text_list) task = speak_text(self.__lib, self.__tts_engine, text, self.__cancel_flag, self.__player) task.set_voice_profile(self.__profile) task.set_rate(self.__rate) task.set_pitch(self.__pitch) task.set_volume(self.__volume) self.__tts_queue.put(task) def pause(self, switch): self.__player.pause(switch) def cancel(self): try: while True: self.__tts_queue.get_nowait() except Queue.Empty: self.__cancel_flag.set() self.__tts_queue.put(self.__cancel_flag.clear) self.__player.stop() def _get_lastIndex(self): return self.__mark_callback.index def _get_availableVoices(self): return OrderedDict( (profile, VoiceInfo(profile, profile, self.__voice_languages[profile.split( "+")[0]])) for profile in self.__profiles) def _get_language(self): return self.__voice_languages[self.__profile.split("+")[0]] def _get_rate(self): return self.__rate def _set_rate(self, rate): self.__rate = max(0, min(100, rate)) def _get_pitch(self): return self.__pitch def _set_pitch(self, pitch): self.__pitch = max(0, min(100, pitch)) def _get_volume(self): return self.__volume def _set_volume(self, volume): self.__volume = max(0, min(100, volume)) def _get_voice(self): return self.__profile def _set_voice(self, voice): try: self.__profile = self.availableVoices[voice].ID except: pass
class SynthDriver(synthDriverHandler.SynthDriver): supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), BooleanDriverSetting("rateBoost", _("Rate boos&t"), True), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), NumericDriverSetting("hsz", _("Head Size"), False), NumericDriverSetting("rgh", _("Roughness"), False), NumericDriverSetting("bth", _("Breathiness"), False), BooleanDriverSetting( "backquoteVoiceTags", _("Enable backquote voice &tags"), False)) supportedCommands = { speech.IndexCommand, speech.CharacterModeCommand, speech.LangChangeCommand, speech.BreakCommand, speech.PitchCommand, speech.RateCommand, speech.VolumeCommand } supportedNotifications = {synthIndexReached, synthDoneSpeaking} description = 'IBMTTS' name = 'ibmeci' speakingLanguage = "" @classmethod def check(cls): return _ibmeci.eciCheck() def __init__(self): _ibmeci.initialize(self._onIndexReached, self._onDoneSpeaking) # This information doesn't really need to be displayed, and makes IBMTTS unusable if the addon is not in the same drive as NVDA executable. # But display it only on debug mode in case of it can be useful log.debug("Using IBMTTS version %s" % _ibmeci.eciVersion()) lang = languageHandler.getLanguage() self.rate = 50 self.speakingLanguage = lang self.variant = "1" PROSODY_ATTRS = { speech.PitchCommand: ECIVoiceParam.eciPitchBaseline, speech.VolumeCommand: ECIVoiceParam.eciVolume, speech.RateCommand: ECIVoiceParam.eciSpeed, } def speak(self, speechSequence): last = None defaultLanguage = self.language outlist = [] outlist.append((_ibmeci.speak, (b"`ts0", ))) for item in speechSequence: if isinstance(item, string_types): s = self.processText(unicode(item)) outlist.append((_ibmeci.speak, (s, ))) last = s elif isinstance(item, speech.IndexCommand): outlist.append((_ibmeci.index, (item.index, ))) elif isinstance(item, speech.LangChangeCommand): l = None if item.lang in langsAnnotations: l = langsAnnotations[item.lang] elif item.lang and item.lang[0:2] in langsAnnotations: l = langsAnnotations[item.lang[0:2]] if l: if item.lang != self.speakingLanguage and item.lang != self.speakingLanguage[ 0:2]: outlist.append((_ibmeci.speak, (l, ))) self.speakingLanguage = item.lang else: outlist.append( (_ibmeci.speak, (langsAnnotations[defaultLanguage], ))) self.speakingLanguage = defaultLanguage elif isinstance(item, speech.CharacterModeCommand): outlist.append( (_ibmeci.speak, (b"`ts1" if item.state else b"`ts0", ))) elif isinstance(item, speech.BreakCommand): outlist.append((_ibmeci.speak, (b' `p%d ' % item.time, ))) elif type(item) in self.PROSODY_ATTRS: val = max(0, min(item.newValue, 100)) if type(item) == speech.RateCommand: val = self.percentToRate(val) outlist.append((_ibmeci.setProsodyParam, (self.PROSODY_ATTRS[type(item)], val))) else: log.error("Unknown speech: %s" % item) if last is not None and last[-1] not in punctuation: # check if a pitch command is at the end of the list, because p1 need to be send before this. # index -2 is because -1 always seem to be an index command. if outlist[-2][0] == _ibmeci.setProsodyParam: outlist.insert(-2, (_ibmeci.speak, (b'`p1. ', ))) else: outlist.append((_ibmeci.speak, (b'`p1. ', ))) outlist.append((_ibmeci.setEndStringMark, ())) outlist.append((_ibmeci.synth, ())) #print(outlist) _ibmeci.eciQueue.put(outlist) _ibmeci.process() def processText(self, text): text = text.rstrip() if _ibmeci.params[9] in (65536, 65537): text = resub(english_fixes, text) if _ibmeci.params[9] in (131072, 131073): text = resub(spanish_fixes, text) if _ibmeci.params[9] in (196609, 196608): text = resub(french_fixes, text) text = text.replace( 'quil', 'qil' ) #Sometimes this string make everything buggy with IBMTTS in French if self._backquoteVoiceTags: #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. text = text.replace('`', ' ').encode('mbcs', 'replace') #no embedded commands text = b"`pp0 `vv%d %s" % (_ibmeci.getVParam( ECIVoiceParam.eciVolume), text) text = resub(anticrash_res, text) else: #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. text = text.encode('mbcs', 'replace') text = resub(anticrash_res, text) text = b"`pp0 `vv%d %s" % (_ibmeci.getVParam( ECIVoiceParam.eciVolume), text.replace(b'`', b' ') ) #no embedded commands text = pause_re.sub(br'\1 `p1\2\3', text) text = time_re.sub(br'\1:\2 \3', text) return text def pause(self, switch): _ibmeci.pause(switch) def terminate(self): _ibmeci.terminate() _backquoteVoiceTags = False def _get_backquoteVoiceTags(self): return self._backquoteVoiceTags def _set_backquoteVoiceTags(self, enable): if enable == self._backquoteVoiceTags: return self._backquoteVoiceTags = enable _rateBoost = False RATE_BOOST_MULTIPLIER = 1.6 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable != self._rateBoost: rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val = _ibmeci.getVParam(ECIVoiceParam.eciSpeed) if self._rateBoost: val = int(round(val / self.RATE_BOOST_MULTIPLIER)) return self._paramToPercent(val, minRate, maxRate) def percentToRate(self, val): val = self._percentToParam(val, minRate, maxRate) if self._rateBoost: val = int(round(val * self.RATE_BOOST_MULTIPLIER)) return val def _set_rate(self, val): val = self.percentToRate(val) self._rate = val _ibmeci.setVParam(ECIVoiceParam.eciSpeed, val) def _get_pitch(self): return _ibmeci.getVParam(ECIVoiceParam.eciPitchBaseline) def _set_pitch(self, vl): _ibmeci.setVParam(ECIVoiceParam.eciPitchBaseline, vl) def _get_volume(self): return _ibmeci.getVParam(ECIVoiceParam.eciVolume) def _set_volume(self, vl): _ibmeci.setVParam(ECIVoiceParam.eciVolume, int(vl)) def _set_inflection(self, vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciPitchFluctuation, vl) def _get_inflection(self): return _ibmeci.getVParam(ECIVoiceParam.eciPitchFluctuation) def _set_hsz(self, vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciHeadSize, vl) def _get_hsz(self): return _ibmeci.getVParam(ECIVoiceParam.eciHeadSize) def _set_rgh(self, vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciRoughness, vl) def _get_rgh(self): return _ibmeci.getVParam(ECIVoiceParam.eciRoughness) def _set_bth(self, vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciBreathiness, vl) def _get_bth(self): return _ibmeci.getVParam(ECIVoiceParam.eciBreathiness) def _getAvailableVoices(self): o = OrderedDict() for name in os.listdir(_ibmeci.ttsPath): if name.lower().endswith('.syn'): info = _ibmeci.langs[name.lower()[:3]] o[str(info[0])] = VoiceInfo(str(info[0]), info[1], info[2]) return o def _get_voice(self): return str(_ibmeci.params[_ibmeci.ECIParam.eciLanguageDialect]) def _set_voice(self, vl): _ibmeci.set_voice(vl) def _get_lastIndex(self): #fix? return _ibmeci.lastindex def cancel(self): _ibmeci.stop() def _getAvailableVariants(self): global variants return OrderedDict( (str(id), synthDriverHandler.VoiceInfo(str(id), name)) for id, name in variants.items()) def _set_variant(self, v): global variants self._variant = v if int(v) in variants else "1" _ibmeci.setVariant(int(v)) _ibmeci.setVParam(ECIVoiceParam.eciSpeed, self._rate) #if 'ibmtts' in config.conf['speech']: #config.conf['speech']['ibmtts']['pitch'] = self.pitch def _get_variant(self): return self._variant def _onIndexReached(self, index): synthIndexReached.notify(synth=self, index=index) def _onDoneSpeaking(self): synthDoneSpeaking.notify(synth=self)
class SynthDriver(SynthDriver): name = "WorldVoiceXVED2" description = "WorldVoice(VE)" supportedSettings = [ SynthDriver.VoiceSetting(), # SynthDriver.VariantSetting(), SynthDriver.RateSetting(), SynthDriver.PitchSetting(), SynthDriver.VolumeSetting(), driverHandler.DriverSetting( "numlan", # Translators: Label for a setting in voice settings dialog. _("Number &Language"), availableInSettingsRing=True, defaultVal="default", # Translators: Label for a setting in synth settings ring. displayName=_("Number Language"), ), driverHandler.DriverSetting( "nummod", # Translators: Label for a setting in voice settings dialog. _("Number &Mode"), availableInSettingsRing=True, defaultVal="value", # Translators: Label for a setting in synth settings ring. displayName=_("Number Mode"), ), driverHandler.NumericDriverSetting( "chinesespace", # Translators: Label for a setting in voice settings dialog. _("Pause time when encountering spaces between Chinese"), defaultVal=0, minStep=1, ), driverHandler.BooleanDriverSetting( "cni", _("Ignore comma between number"), defaultVal=False, ), driverHandler.BooleanDriverSetting( "dli", _("Ignore language information of document"), defaultVal=False, ), driverHandler.BooleanDriverSetting( "uwv", _("Enable WorldVoice setting rules to detect text language"), availableInSettingsRing=True, defaultVal=True, displayName=_("Enable WorldVoice rules"), ), ] supportedCommands = { speech.IndexCommand, speech.CharacterModeCommand, speech.LangChangeCommand, speech.BreakCommand, speech.PitchCommand, speech.RateCommand, speech.VolumeCommand, } supportedNotifications = {synthIndexReached, synthDoneSpeaking} @classmethod def check(cls): with _vocalizer.preOpenVocalizer() as check: return check def __init__(self): _config.load() # Initialize the driver try: _vocalizer.initialize(self._onIndexReached) log.debug("Vocalizer info: %s" % self._info()) except _vocalizer.VeError as e: if e.code == _vocalizer.VAUTONVDA_ERROR_INVALID: log.info("Vocalizer license for NVDA is Invalid") elif e.code == _vocalizer.VAUTONVDA_ERROR_DEMO_EXPIRED: log.info("Vocalizer demo license for NVDA as expired.") raise self._voiceManager = VoiceManager() self._realSpeakFunc = speech.speak self._realSpellingFunc = speech.speakSpelling speech.speak = self.patchedSpeak speech.speakSpelling = self.patchedSpeakSpelling speechSymbols = SpeechSymbols() speechSymbols.load('unicode.dic') self._languageDetector = languageDetection.LanguageDetector( list(self._voiceManager.languages), speechSymbols) speech._speakWithoutPauses = speech.SpeechWithoutPauses( speakFunc=self.patchedSpeak) speech.speakWithoutPauses = speech._speakWithoutPauses.speakWithoutPauses self._localeToVoices = self._voiceManager.localeToVoicesMap self._locales = sorted([ l for l in self._localeToVoices if len(self._localeToVoices[l]) > 0 ]) self._localeNames = list( map(self._getLocaleReadableName, self._locales)) self._voice = None def _onIndexReached(self, index): if index is not None: synthIndexReached.notify(synth=self, index=index) else: synthDoneSpeaking.notify(synth=self) def terminate(self): speech.speak = self._realSpeakFunc speech.speakSpelling = self._realSpellingFunc speech._speakWithoutPauses = speech.SpeechWithoutPauses( speakFunc=speech.speak) speech.speakWithoutPauses = speech._speakWithoutPauses.speakWithoutPauses try: self.cancel() self._voiceManager.close() _vocalizer.terminate() except RuntimeError: log.error("Vocalizer terminate", exc_info=True) def speak(self, speechSequence): if self.uwv \ and _config.vocalizerConfig['autoLanguageSwitching']['useUnicodeLanguageDetection'] \ and _config.vocalizerConfig['autoLanguageSwitching']['afterSymbolDetection']: speechSequence = self._languageDetector.add_detected_language_commands( speechSequence) speechSequence = list(speechSequence) speechSequence = self.patchedNumSpeechSequence(speechSequence) speechSequence = self.patchedSpaceSpeechSequence(speechSequence) currentInstance = defaultInstance = self._voiceManager.defaultVoiceInstance.token currentLanguage = defaultLanguage = self.language chunks = [] hasText = False charMode = False for command in speechSequence: if isinstance(command, str): command = command.strip() if not command: continue # If character mode is on use lower case characters # Because the synth does not allow to turn off the caps reporting if charMode or len(command) == 1: command = command.lower() # replace the excape character since it is used for parameter changing chunks.append(command.replace("\x1b", "")) hasText = True elif isinstance(command, speech.IndexCommand): chunks.append("\x1b\\mrk=%d\\" % command.index) elif isinstance(command, speech.BreakCommand): maxTime = 6553 if self.variant == "bet2" else 65535 breakTime = max(1, min(command.time, maxTime)) self._speak(currentInstance, chunks) chunks = [] hasText = False _vocalizer.processBreak(currentInstance, breakTime) elif isinstance(command, speech.CharacterModeCommand): charMode = command.state s = "\x1b\\tn=spell\\" if command.state else "\x1b\\tn=normal\\" chunks.append(s) elif isinstance(command, speech.LangChangeCommand) or isinstance( command, speechcommand.WVLangChangeCommand): if command.lang == currentLanguage: # Keep on the same voice. continue if command.lang is None: # No language, use default. currentInstance = defaultInstance currentLanguage = defaultLanguage continue # Changed language, lets see what we have. currentLanguage = command.lang newVoiceName = self._voiceManager.getVoiceNameForLanguage( currentLanguage) if newVoiceName is None: # No voice for this language, use default. newInstance = defaultInstance else: newInstance = self._voiceManager.getVoiceInstance( newVoiceName).token if newInstance == currentInstance: # Same voice, next command. continue if hasText: # We changed voice, send text we already have to vocalizer. self._speak(currentInstance, chunks) chunks = [] hasText = False currentInstance = newInstance elif isinstance(command, speech.PitchCommand): pitch = self._voiceManager.getVoiceParameter( currentInstance, _vocalizer.VE_PARAM_PITCH, type_=int) pitchOffset = self._percentToParam( command.offset, _vocalizer.PITCH_MIN, _vocalizer.PITCH_MAX) - _vocalizer.PITCH_MIN chunks.append("\x1b\\pitch=%d\\" % (pitch + pitchOffset)) elif isinstance(command, speechcommand.SplitCommand): self._speak(currentInstance, chunks) chunks = [] hasText = False if chunks: self._speak(currentInstance, chunks) def _speak(self, voiceInstance, chunks): text = speech.CHUNK_SEPARATOR.join(chunks).replace(" \x1b", "\x1b") _vocalizer.processText2Speech(voiceInstance, text) def patchedSpeak(self, speechSequence, symbolLevel=None, priority=None): if self._cni: temp = [] for command in speechSequence: if isinstance(command, str): temp.append(comma_number_pattern.sub( lambda m: '', command)) else: temp.append(command) speechSequence = temp if self._dli: speechSequence = self.patchedRemoveLangChangeCommandSpeechSequence( speechSequence) if self.uwv \ and _config.vocalizerConfig['autoLanguageSwitching']['useUnicodeLanguageDetection'] \ and not _config.vocalizerConfig['autoLanguageSwitching']['afterSymbolDetection']: speechSequence = self._languageDetector.add_detected_language_commands( speechSequence) speechSequence = list(speechSequence) self._realSpeakFunc(speechSequence, symbolLevel, priority=priority) def patchedSpeakSpelling(self, text, locale=None, useCharacterDescriptions=False, priority=None): if config.conf["speech"]["autoLanguageSwitching"] \ and _config.vocalizerConfig['autoLanguageSwitching']['useUnicodeLanguageDetection'] \ and config.conf["speech"]["trustVoiceLanguage"]: for text, loc in self._languageDetector.process_for_spelling( text, locale): self._realSpellingFunc(text, loc, useCharacterDescriptions, priority=priority) else: self._realSpellingFunc(text, locale, useCharacterDescriptions, priority=priority) def cancel(self): _vocalizer.stop() def pause(self, switch): if switch: _vocalizer.pause() else: _vocalizer.resume() def _get_volume(self): return self._voiceManager.defaultVoiceInstance.volume def _set_volume(self, value): self._voiceManager.defaultVoiceInstance.volume = value self._voiceManager.defaultVoiceInstance.commit() def _get_rate(self): return self._voiceManager.defaultVoiceInstance.rate def _set_rate(self, value): self._voiceManager.defaultVoiceInstance.rate = value self._voiceManager.defaultVoiceInstance.commit() def _get_pitch(self): return self._voiceManager.defaultVoiceInstance.pitch def _set_pitch(self, value): self._voiceManager.defaultVoiceInstance.pitch = value self._voiceManager.defaultVoiceInstance.commit() def _getAvailableVoices(self): return self._voiceManager.voiceInfos def _get_voice(self): if self._voice is None: voice = self._voiceManager.getVoiceNameForLanguage( languageHandler.getLanguage()) if voice is None: voice = list(self.availableVoices.keys())[0] return voice return self._voiceManager.defaultVoiceName def _set_voice(self, voiceName): self._voice = voiceName if voiceName == self._voiceManager.defaultVoiceName: return # Stop speech before setting a new voice to avoid voice instances # continuing speaking when changing voices for, e.g., say-all # See NVDA ticket #3540 _vocalizer.stop() self._voiceManager.setDefaultVoice(voiceName) # Available variants are cached by default. As variants maybe different for each voice remove the cached value # if hasattr(self, '_availableVariants'): # del self._availableVariants # Synchronize with the synth so the parameters # we report are not from the previous voice. # _vocalizer.sync() def _get_variant(self): return self._voiceManager.defaultVoiceInstance.variant def _set_variant(self, name): self.cancel() self._voiceManager.defaultVoiceInstance.variant = name def _getAvailableVariants(self): dbs = self._voiceManager.defaultVoiceInstance.variants return OrderedDict([(d, VoiceInfo(d, d)) for d in dbs]) def _get_availableLanguages(self): return self._voiceManager.languages def _get_language(self): return self._voiceManager.getVoiceLanguage() def _info(self): s = [self.description] return ", ".join(s) def _get_availableNumlans(self): return dict( { "default": driverHandler.StringParameterInfo("default", _("default")), }, **{ locale: driverHandler.StringParameterInfo(locale, name) for locale, name in zip(self._locales, self._localeNames) }) def _get_numlan(self): return self._numlan def _set_numlan(self, value): self._numlan = value def _get_availableNummods(self): return dict({ "value": driverHandler.StringParameterInfo("value", _("value")), "number": driverHandler.StringParameterInfo("number", _("number")), }) def _get_nummod(self): return self._nummod def _set_nummod(self, value): self._nummod = value def _get_chinesespace(self): return self._chinesespace def _set_chinesespace(self, value): self._chinesespace = value def _get_cni(self): return self._cni def _set_cni(self, value): self._cni = value def _get_dli(self): return self._dli def _set_dli(self, value): self._dli = value def patchedNumSpeechSequence(self, speechSequence): return self.coercionNumberLangChange(speechSequence, self._numlan, self._nummod) def patchedSpaceSpeechSequence(self, speechSequence): if not int(self._chinesespace) == 0: joinString = "" tempSpeechSequence = [] for command in speechSequence: if not isinstance(command, str): tempSpeechSequence.append(joinString) tempSpeechSequence.append(command) joinString = "" else: joinString += command tempSpeechSequence.append(joinString) speechSequence = tempSpeechSequence tempSpeechSequence = [] for command in speechSequence: if isinstance(command, str): result = re.split(chinese_space_pattern, command) if len(result) == 1: tempSpeechSequence.append(command) else: temp = [] for i in result: temp.append(i) temp.append( speech.BreakCommand( int(self._chinesespace) * 5)) temp = temp[:-1] tempSpeechSequence += temp else: tempSpeechSequence.append(command) speechSequence = tempSpeechSequence return speechSequence def patchedRemoveLangChangeCommandSpeechSequence(self, speechSequence): result = [] for command in speechSequence: if not isinstance(command, speech.LangChangeCommand): result.append(command) return result def patchedLengthSpeechSequence(self, speechSequence): result = [] for command in speechSequence: if isinstance(command, str): result.extend(self.lengthsplit(command, 100)) else: result.append(command) return result def lengthsplit(self, string, length): result = [] pattern = re.compile(r"[\s]") spaces = pattern.findall(string) others = pattern.split(string) fragment = "" for other, space in zip(others, spaces): fragment += other + space if len(fragment) > length: result.append(fragment) result.append(speechcommand.SplitCommand()) fragment = "" fragment += others[-1] result.append(fragment) return result def resplit(self, pattern, string, mode): result = [] numbers = pattern.findall(string) others = pattern.split(string) for other, number in zip(others, numbers): if mode == 'value': result.extend([ other, speech.LangChangeCommand('StartNumber'), number, speech.LangChangeCommand('EndNumber') ]) elif mode == 'number': result.extend([ other, speech.LangChangeCommand('StartNumber'), ' '.join(number).replace(" . ", "."), speech.LangChangeCommand('EndNumber') ]) result.append(others[-1]) return result def coercionNumberLangChange(self, speechSequence, numberLanguage, mode): result = [] for command in speechSequence: if isinstance(command, str): result.extend(self.resplit(number_pattern, command, mode)) else: result.append(command) currentLang = self.language for command in result: if isinstance(command, speech.LangChangeCommand): if command.lang == 'StartNumber': command.lang = numberLanguage elif command.lang == 'EndNumber': command.lang = currentLang else: currentLang = command.lang return result def _getLocaleReadableName(self, locale): description = languageHandler.getLanguageDescription(locale) return "%s" % (description) if description else locale
class SynthDriver(synthDriverHandler.SynthDriver): supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), driverHandler.NumericDriverSetting( "hsz", "Head Size"), driverHandler.NumericDriverSetting( "rgh", "Roughness"), driverHandler.NumericDriverSetting( "bth", "Breathiness"), driverHandler.BooleanDriverSetting( "backquoteVoiceTags", "Enable backquote voice &tags", True)) supportedCommands = { speech.IndexCommand, speech.CharacterModeCommand, speech.LangChangeCommand, speech.BreakCommand, speech.PitchCommand, speech.RateCommand, speech.VolumeCommand, speech.PhonemeCommand, } supportedNotifications = {synthIndexReached, synthDoneSpeaking} PROSODY_ATTRS = { speech.PitchCommand: _eloquence.pitch, speech.VolumeCommand: _eloquence.vlm, speech.RateCommand: _eloquence.rate, } description = 'ETI-Eloquence' name = 'eloquence' @classmethod def check(cls): return _eloquence.eciCheck() def __init__(self): _eloquence.initialize(self._onIndexReached) self.curvoice = "enu" self.rate = 50 self.variant = "1" def speak(self, speechSequence): last = None outlist = [] for item in speechSequence: if isinstance(item, str): s = str(item) s = self.xspeakText(s) outlist.append((_eloquence.speak, (s, ))) last = s elif isinstance(item, speech.IndexCommand): outlist.append((_eloquence.index, (item.index, ))) elif isinstance(item, speech.BreakCommand): # Eloquence doesn't respect delay time in milliseconds. # Therefor we need to adjust waiting time depending on curernt speech rate # The following table of adjustments has been measured empirically # Then we do linear approximation coefficients = { 10: 1, 43: 2, 60: 3, 75: 4, 85: 5, } ck = sorted(coefficients.keys()) if self.rate <= ck[0]: factor = coefficients[ck[0]] elif self.rate >= ck[-1]: factor = coefficients[ck[-1]] elif self.rate in ck: factor = coefficients[self.rate] else: li = [ index for index, r in enumerate(ck) if r < self.rate ][-1] ri = li + 1 ra = ck[li] rb = ck[ri] factor = 1.0 * coefficients[ra] + ( coefficients[rb] - coefficients[ra]) * (self.rate - ra) / (rb - ra) pFactor = factor * item.time pFactor = int(pFactor) outlist.append((_eloquence.speak, (f'`p{pFactor}.', ))) elif type(item) in self.PROSODY_ATTRS: pr = self.PROSODY_ATTRS[type(item)] if item.multiplier == 1: # Revert back to defaults outlist.append((_eloquence.cmdProsody, ( pr, None, ))) else: outlist.append((_eloquence.cmdProsody, ( pr, item.multiplier, ))) if last is not None and not last.rstrip()[-1] in punctuation: outlist.append((_eloquence.speak, ('`p1.', ))) outlist.append((_eloquence.index, (0xffff, ))) outlist.append((_eloquence.synth, ())) _eloquence.synth_queue.put(outlist) _eloquence.process() def xspeakText(self, text, should_pause=False): if _eloquence.params[9] == 65536 or _eloquence.params[9] == 65537: text = resub(english_fixes, text) if _eloquence.params[9] == 131072 or _eloquence.params[9] == 131073: text = resub(spanish_fixes, text) if _eloquence.params[9] in (196609, 196608): text = resub(french_fixes, text) #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. #text = text.encode('mbcs') text = normalizeText(text) text = resub(anticrash_res, text) if not self._backquoteVoiceTags: text = text.replace('`', ' ') text = "`pp0 `vv%d %s" % (self.getVParam(_eloquence.vlm), text ) #no embedded commands text = pause_re.sub(r'\1 `p1\2\3', text) text = time_re.sub(r'\1:\2 \3', text) #if two strings are sent separately, pause between them. This might fix some of the audio issues we're having. if should_pause: text = text + ' `p1.' return text # _eloquence.speak(text, index) # def cancel(self): # self.dll.eciStop(self.handle) def pause(self, switch): _eloquence.pause(switch) # self.dll.eciPause(self.handle,switch) def terminate(self): _eloquence.terminate() _backquoteVoiceTags = False def _get_backquoteVoiceTags(self): return self._backquoteVoiceTags def _set_backquoteVoiceTags(self, enable): if enable == self._backquoteVoiceTags: return self._backquoteVoiceTags = enable def _get_rate(self): return self._paramToPercent(self.getVParam(_eloquence.rate), minRate, maxRate) def _set_rate(self, vl): self._rate = self._percentToParam(vl, minRate, maxRate) self.setVParam(_eloquence.rate, self._percentToParam(vl, minRate, maxRate)) def _get_pitch(self): return self.getVParam(_eloquence.pitch) def _set_pitch(self, vl): self.setVParam(_eloquence.pitch, vl) def _get_volume(self): return self.getVParam(_eloquence.vlm) def _set_volume(self, vl): self.setVParam(_eloquence.vlm, int(vl)) def _set_inflection(self, vl): vl = int(vl) self.setVParam(_eloquence.fluctuation, vl) def _get_inflection(self): return self.getVParam(_eloquence.fluctuation) def _set_hsz(self, vl): vl = int(vl) self.setVParam(_eloquence.hsz, vl) def _get_hsz(self): return self.getVParam(_eloquence.hsz) def _set_rgh(self, vl): vl = int(vl) self.setVParam(_eloquence.rgh, vl) def _get_rgh(self): return self.getVParam(_eloquence.rgh) def _set_bth(self, vl): vl = int(vl) self.setVParam(_eloquence.bth, vl) def _get_bth(self): return self.getVParam(_eloquence.bth) def _getAvailableVoices(self): o = OrderedDict() for name in os.listdir(_eloquence.eciPath[:-8]): if not name.lower().endswith('.syn'): continue info = _eloquence.langs[name.lower()[:-4]] o[str(info[0])] = synthDriverHandler.VoiceInfo( str(info[0]), info[1], None) return o def _get_voice(self): return str(_eloquence.params[9]) def _set_voice(self, vl): _eloquence.set_voice(vl) self.curvoice = vl def getVParam(self, pr): return _eloquence.getVParam(pr) def setVParam(self, pr, vl): _eloquence.setVParam(pr, vl) def _get_lastIndex(self): #fix? return _eloquence.lastindex def cancel(self): _eloquence.stop() def _getAvailableVariants(self): global variants return OrderedDict( (str(id), synthDriverHandler.VoiceInfo(str(id), name)) for id, name in variants.items()) def _set_variant(self, v): global variants self._variant = v if int(v) in variants else "1" _eloquence.setVariant(int(v)) self.setVParam(_eloquence.rate, self._rate) # if 'eloquence' in config.conf['speech']: # config.conf['speech']['eloquence']['pitch'] = self.pitch def _get_variant(self): return self._variant def _onIndexReached(self, index): if index is not None: synthIndexReached.notify(synth=self, index=index) else: synthDoneSpeaking.notify(synth=self)
class SynthDriver(SynthDriver): name = "espeak" description = "eSpeak" supportedSettings = ( SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), # Translators: This is the name of the rate boost voice toggle # which further increases the speaking rate when enabled. BooleanSynthSetting("rateBoost", _("Rate boos&t")), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), ) @classmethod def check(cls): return True def __init__(self): _espeak.initialize() log.info("Using eSpeak version %s" % _espeak.info()) lang = languageHandler.getLanguage() _espeak.setVoiceByLanguage(lang) self._language = lang self._variantDict = _espeak.getVariantDict() self.variant = "max" self.rate = 30 self.pitch = 40 self.inflection = 75 def _get_language(self): return self._language def speak(self, speechSequence): defaultLanguage = self._language textList = [] langChanged = False for item in speechSequence: if isinstance(item, basestring): s = unicode(item) # Replace \01, as this is used for embedded commands. #Also replace < and > as espeak handles xml s.translate({ ord(u'\01'): None, ord(u'<'): u'<', ord(u'>'): u'>' }) textList.append(s) elif isinstance(item, speech.IndexCommand): textList.append("<mark name=\"%d\" />" % item.index) elif isinstance(item, speech.CharacterModeCommand): textList.append("<say-as interpret-as=\"characters\">" if item. state else "</say-as>") elif isinstance(item, speech.LangChangeCommand): if langChanged: textList.append("</voice>") textList.append( "<voice xml:lang=\"%s\">" % (item.lang if item.lang else defaultLanguage).replace( '_', '-')) langChanged = True elif isinstance(item, speech.SpeechCommand): log.debugWarning("Unsupported speech command: %s" % item) else: log.error("Unknown speech: %s" % item) if langChanged: textList.append("</voice>") text = u"".join(textList) _espeak.speak(text) def cancel(self): _espeak.stop() def pause(self, switch): _espeak.pause(switch) _rateBoost = False RATE_BOOST_MULTIPLIER = 3 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable == self._rateBoost: return rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val = _espeak.getParameter(_espeak.espeakRATE, 1) if self._rateBoost: val = int(val / self.RATE_BOOST_MULTIPLIER) return self._paramToPercent(val, _espeak.minRate, _espeak.maxRate) def _set_rate(self, rate): val = self._percentToParam(rate, _espeak.minRate, _espeak.maxRate) if self._rateBoost: val = int(val * self.RATE_BOOST_MULTIPLIER) _espeak.setParameter(_espeak.espeakRATE, val, 0) def _get_pitch(self): val = _espeak.getParameter(_espeak.espeakPITCH, 1) return self._paramToPercent(val, _espeak.minPitch, _espeak.maxPitch) def _set_pitch(self, pitch): val = self._percentToParam(pitch, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakPITCH, val, 0) def _get_inflection(self): val = _espeak.getParameter(_espeak.espeakRANGE, 1) return self._paramToPercent(val, _espeak.minPitch, _espeak.maxPitch) def _set_inflection(self, val): val = self._percentToParam(val, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakRANGE, val, 0) def _get_volume(self): return _espeak.getParameter(_espeak.espeakVOLUME, 1) def _set_volume(self, volume): _espeak.setParameter(_espeak.espeakVOLUME, volume, 0) def _getAvailableVoices(self): voices = OrderedDict() for v in _espeak.getVoiceList(): l = v.languages[1:] identifier = os.path.basename(v.identifier) voices[identifier] = VoiceInfo(identifier, v.name, l) return voices def _get_voice(self): curVoice = getattr(self, '_voice', None) if curVoice: return curVoice curVoice = _espeak.getCurrentVoice() if not curVoice: return "" return curVoice.identifier.split('+')[0] def _set_voice(self, identifier): if not identifier: return if "\\" in identifier: identifier = os.path.basename(identifier) self._voice = identifier try: _espeak.setVoiceAndVariant(voice=identifier, variant=self._variant) except: self._voice = None raise self._language = super(SynthDriver, self).language def _get_lastIndex(self): return _espeak.lastIndex def terminate(self): _espeak.terminate() def _get_variant(self): return self._variant def _set_variant(self, val): self._variant = val if val in self._variantDict else "max" _espeak.setVoiceAndVariant(variant=self._variant) def _getAvailableVariants(self): return OrderedDict((ID, VoiceInfo(ID, name)) for ID, name in self._variantDict.iteritems())
def _set_voice(self,val): try: val=GUID(val) except: val=self._enginesList[0].gModeID mode=None for mode in self._enginesList: if mode.gModeID==val: break if mode is None: raise ValueError("no such mode: %s"%val) self._currentMode=mode self._ttsAudio=CoCreateInstance(CLSID_MMAudioDest,IAudioMultiMediaDevice) self._ttsAudio.DeviceNumSet(nvwave.outputDeviceNameToID(config.conf["speech"]["outputDevice"], True)) self._ttsCentral=POINTER(ITTSCentralW)() self._ttsEngines.Select(self._currentMode.gModeID,byref(self._ttsCentral),self._ttsAudio) self._ttsAttrs=self._ttsCentral.QueryInterface(ITTSAttributes) #Find out rate limits hasRate=bool(mode.dwFeatures&TTSFEATURE_SPEED) if hasRate: try: oldVal=DWORD() self._ttsAttrs.SpeedGet(byref(oldVal)) self._ttsAttrs.SpeedSet(TTSATTR_MINSPEED) newVal=DWORD() self._ttsAttrs.SpeedGet(byref(newVal)) self._minRate=newVal.value self._ttsAttrs.SpeedSet(TTSATTR_MAXSPEED) self._ttsAttrs.SpeedGet(byref(newVal)) # ViaVoice (and perhaps other synths) doesn't seem to like the speed being set to maximum. self._maxRate=newVal.value-1 self._ttsAttrs.SpeedSet(oldVal.value) if self._maxRate<=self._minRate: hasRate=False except COMError: hasRate=False if hasRate: if not self.isSupported('rate'): self.supportedSettings.insert(1,SynthDriver.RateSetting()) else: if self.isSupported("rate"): self.removeSetting("rate") #Find out pitch limits hasPitch=bool(mode.dwFeatures&TTSFEATURE_PITCH) if hasPitch: try: oldVal=WORD() self._ttsAttrs.PitchGet(byref(oldVal)) self._ttsAttrs.PitchSet(TTSATTR_MINPITCH) newVal=WORD() self._ttsAttrs.PitchGet(byref(newVal)) self._minPitch=newVal.value self._ttsAttrs.PitchSet(TTSATTR_MAXPITCH) self._ttsAttrs.PitchGet(byref(newVal)) self._maxPitch=newVal.value self._ttsAttrs.PitchSet(oldVal.value) if self._maxPitch<=self._minPitch: hasPitch=False except COMError: hasPitch=False if hasPitch: if not self.isSupported('pitch'): self.supportedSettings.insert(2,SynthDriver.PitchSetting()) else: if self.isSupported('pitch'): self.removeSetting('pitch') #Find volume limits hasVolume=bool(mode.dwFeatures&TTSFEATURE_VOLUME) if hasVolume: try: oldVal=DWORD() self._ttsAttrs.VolumeGet(byref(oldVal)) self._ttsAttrs.VolumeSet(TTSATTR_MINVOLUME) newVal=DWORD() self._ttsAttrs.VolumeGet(byref(newVal)) self._minVolume=newVal.value self._ttsAttrs.VolumeSet(TTSATTR_MAXVOLUME) self._ttsAttrs.VolumeGet(byref(newVal)) self._maxVolume=newVal.value self._ttsAttrs.VolumeSet(oldVal.value) if self._maxVolume<=self._minVolume: hasVolume=False except COMError: hasVolume=False if hasVolume: if not self.isSupported('volume'): self.supportedSettings.insert(3,SynthDriver.VolumeSetting()) else: if self.isSupported('volume'): self.removeSetting('volume')
class SynthDriver(SynthDriver): supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.RateSetting(), SynthDriver.PitchSetting(), SynthDriver.VolumeSetting()) COM_CLASS = "SAPI.SPVoice" name = "sapi5" description = "Microsoft Speech API version 5" @classmethod def check(cls): try: r = winreg.OpenKey(winreg.HKEY_CLASSES_ROOT, cls.COM_CLASS) r.Close() return True except: return False ttsAudioStream = None #: Holds the ISPAudio interface for the current voice, to aid in stopping and pausing audio def __init__(self, _defaultVoiceToken=None): """ @param _defaultVoiceToken: an optional sapi voice token which should be used as the default voice (only useful for subclasses) @type _defaultVoiceToken: ISpeechObjectToken """ ensureWaveOutHooks() self._pitch = 50 self._initTts(_defaultVoiceToken) def terminate(self): del self.tts def _getAvailableVoices(self): voices = OrderedDict() v = self._getVoiceTokens() # #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators. # Therefore, fetch the items by index, as that method explicitly returns the correct interface. for i in xrange(len(v)): try: ID = v[i].Id name = v[i].GetDescription() try: language = locale.windows_locale[int( v[i].getattribute('language').split(';')[0], 16)] except KeyError: language = None except COMError: log.warning("Could not get the voice info. Skipping...") voices[ID] = VoiceInfo(ID, name, language) return voices def _getVoiceTokens(self): """Provides a collection of sapi5 voice tokens. Can be overridden by subclasses if tokens should be looked for in some other registry location.""" return self.tts.getVoices() def _get_rate(self): return (self.tts.rate * 5) + 50 def _get_pitch(self): return self._pitch def _get_volume(self): return self.tts.volume def _get_voice(self): return self.tts.voice.Id def _get_lastIndex(self): bookmark = self.tts.status.LastBookmark if bookmark != "" and bookmark is not None: return int(bookmark) else: return None def _percentToRate(self, percent): return (percent - 50) / 5 def _set_rate(self, rate): self.tts.Rate = self._percentToRate(rate) def _set_pitch(self, value): #pitch is really controled with xml around speak commands self._pitch = value def _set_volume(self, value): self.tts.Volume = value def _initTts(self, voice=None): self.tts = comtypes.client.CreateObject(self.COM_CLASS) if voice: # #749: It seems that SAPI 5 doesn't reset the audio parameters when the voice is changed, # but only when the audio output is changed. # Therefore, set the voice before setting the audio output. # Otherwise, we will get poor speech quality in some cases. self.tts.voice = voice outputDeviceID = nvwave.outputDeviceNameToID( config.conf["speech"]["outputDevice"], True) if outputDeviceID >= 0: self.tts.audioOutput = self.tts.getAudioOutputs()[outputDeviceID] from comInterfaces.SpeechLib import ISpAudio try: self.ttsAudioStream = self.tts.audioOutputStream.QueryInterface( ISpAudio) except COMError: log.debugWarning("SAPI5 voice does not support ISPAudio") self.ttsAudioStream = None def _set_voice(self, value): tokens = self._getVoiceTokens() # #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators. # Therefore, fetch the items by index, as that method explicitly returns the correct interface. for i in xrange(len(tokens)): voice = tokens[i] if value == voice.Id: break else: # Voice not found. return self._initTts(voice=voice) def _percentToPitch(self, percent): return percent / 2 - 25 IPA_TO_SAPI = { u"θ": u"th", u"s": u"s", } def _convertPhoneme(self, ipa): # We only know about US English phonemes. # Rather than just ignoring unknown phonemes, SAPI throws an exception. # Therefore, don't bother with any other language. if self.tts.voice.GetAttribute("language") != "409": raise LookupError("No data for this language") out = [] outAfter = None for ipaChar in ipa: if ipaChar == u"ˈ": outAfter = u"1" continue out.append(self.IPA_TO_SAPI[ipaChar]) if outAfter: out.append(outAfter) outAfter = None if outAfter: out.append(outAfter) return u" ".join(out) def speak(self, speechSequence): textList = [] # NVDA SpeechCommands are linear, but XML is hierarchical. # Therefore, we track values for non-empty tags. # When a tag changes, we close all previously opened tags and open new ones. tags = {} # We have to use something mutable here because it needs to be changed by the inner function. tagsChanged = [True] openedTags = [] def outputTags(): if not tagsChanged[0]: return for tag in reversed(openedTags): textList.append("</%s>" % tag) del openedTags[:] for tag, attrs in tags.iteritems(): textList.append("<%s" % tag) for attr, val in attrs.iteritems(): textList.append(' %s="%s"' % (attr, val)) textList.append(">") openedTags.append(tag) tagsChanged[0] = False pitch = self._pitch # Pitch must always be specified in the markup. tags["pitch"] = {"absmiddle": self._percentToPitch(pitch)} rate = self.rate volume = self.volume for item in speechSequence: if isinstance(item, basestring): outputTags() textList.append(item.replace("<", "<")) elif isinstance(item, speech.IndexCommand): textList.append('<Bookmark Mark="%d" />' % item.index) elif isinstance(item, speech.CharacterModeCommand): if item.state: tags["spell"] = {} else: try: del tags["spell"] except KeyError: pass tagsChanged[0] = True elif isinstance(item, speech.BreakCommand): textList.append('<silence msec="%d" />' % item.time) elif isinstance(item, speech.PitchCommand): tags["pitch"] = { "absmiddle": self._percentToPitch(int(pitch * item.multiplier)) } tagsChanged[0] = True elif isinstance(item, speech.VolumeCommand): if item.multiplier == 1: try: del tags["volume"] except KeyError: pass else: tags["volume"] = {"level": int(volume * item.multiplier)} tagsChanged[0] = True elif isinstance(item, speech.RateCommand): if item.multiplier == 1: try: del tags["rate"] except KeyError: pass else: tags["rate"] = { "absspeed": self._percentToRate(int(rate * item.multiplier)) } tagsChanged[0] = True elif isinstance(item, speech.PhonemeCommand): try: textList.append( u'<pron sym="%s">%s</pron>' % (self._convertPhoneme(item.ipa), item.text or u"")) except LookupError: log.debugWarning( "Couldn't convert character in IPA string: %s" % item.ipa) if item.text: textList.append(item.text) elif isinstance(item, speech.SpeechCommand): log.debugWarning("Unsupported speech command: %s" % item) else: log.error("Unknown speech: %s" % item) # Close any tags that are still open. tags.clear() tagsChanged[0] = True outputTags() text = "".join(textList) flags = constants.SVSFIsXML | constants.SVSFlagsAsync self.tts.Speak(text, flags) def cancel(self): # SAPI5's default means of stopping speech can sometimes lag at end of speech, especially with Win8 / Win 10 Microsoft Voices. # Therefore instruct the underlying audio interface to stop first, before interupting and purging any remaining speech. if self.ttsAudioStream: self.ttsAudioStream.setState(SPAS_STOP, 0) self.tts.Speak(None, 1 | constants.SVSFPurgeBeforeSpeak) def pause(self, switch): # SAPI5's default means of pausing in most cases is either extrmemely slow (e.g. takes more than half a second) or does not work at all. # Therefore instruct the underlying audio interface to pause instead. if self.ttsAudioStream: self.ttsAudioStream.setState(SPAS_PAUSE if switch else SPAS_RUN, 0)
class SynthDriver(SynthDriver): name = "RHVoice" description = "RHVoice" supportedSettings = (SynthDriver.RateSetting(), SynthDriver.PitchSetting(), SynthDriver.VolumeSetting(), SynthDriver.VoiceSetting(), SynthDriver.VariantSetting()) @classmethod def check(cls): return os.path.isfile(lib_path) def __init__(self): self.__lib = ctypes.CDLL(lib_path.encode(sys.getfilesystemencoding())) self.__lib.RHVoice_initialize.argtypes = (c_char_p, RHVoice_callback, c_char_p, c_uint) self.__lib.RHVoice_initialize.restype = c_int self.__lib.RHVoice_new_message_utf16.argtypes = (c_wchar_p, c_int, c_int) self.__lib.RHVoice_new_message_utf16.restype = RHVoice_message self.__lib.RHVoice_delete_message.argtypes = (RHVoice_message, ) self.__lib.RHVoice_speak.argtypes = (RHVoice_message, ) self.__lib.RHVoice_get_min_rate.restype = c_float self.__lib.RHVoice_get_rate.restype = c_float self.__lib.RHVoice_get_max_rate.restype = c_float self.__lib.RHVoice_get_min_pitch.restype = c_float self.__lib.RHVoice_get_pitch.restype = c_float self.__lib.RHVoice_get_max_pitch.restype = c_float self.__lib.RHVoice_get_volume.restype = c_float self.__lib.RHVoice_get_max_volume.restype = c_float self.__lib.RHVoice_get_voice_count.restype = c_int self.__lib.RHVoice_get_variant_count.restype = c_int self.__lib.RHVoice_get_voice_name.argtypes = (c_int, ) self.__lib.RHVoice_get_voice_name.restype = c_char_p self.__lib.RHVoice_get_variant_name.argtypes = (c_int, ) self.__lib.RHVoice_get_variant_name.restype = c_char_p self.__lib.RHVoice_find_voice.argtypes = (c_char_p, ) self.__lib.RHVoice_find_voice.restype = c_int self.__lib.RHVoice_find_variant.argtypes = (c_char_p, ) self.__lib.RHVoice_find_variant.restype = c_int self.__lib.RHVoice_get_voice.restype = c_int self.__lib.RHVoice_get_variant.restype = c_int self.__lib.RHVoice_set_voice.argtypes = (c_int, ) self.__lib.RHVoice_set_variant.argtypes = (c_int, ) self.__lib.RHVoice_get_version.restype = c_char_p self.__silence_flag = threading.Event() self.__audio_callback = AudioCallback(self.__lib, self.__silence_flag) self.__audio_callback_wrapper = RHVoice_callback(self.__audio_callback) sample_rate = self.__lib.RHVoice_initialize( data_path.encode("UTF-8"), self.__audio_callback_wrapper, cfg_path.encode("UTF-8"), 0) if sample_rate == 0: raise RuntimeError("RHVoice: initialization error") voice_count = self.__lib.RHVoice_get_voice_count() if voice_count == 0: raise RuntimeError("RHVoice: initialization error") self.__player = nvwave.WavePlayer( channels=1, samplesPerSec=sample_rate, bitsPerSample=16, outputDevice=config.conf["speech"]["outputDevice"]) self.__audio_callback.set_player(self.__player) self.__tts_queue = Queue.Queue() self.__tts_thread = TTSThread(self.__lib, self.__tts_queue, self.__player, self.__silence_flag) self._availableVoices = OrderedDict() for id in range(1, voice_count + 1): name = self.__lib.RHVoice_get_voice_name(id) self._availableVoices[name] = VoiceInfo(name, name, "ru") self.__lib.RHVoice_set_voice(1) self.__voice = self.__lib.RHVoice_get_voice_name(1) variant_count = self.__lib.RHVoice_get_variant_count() self._availableVariants = OrderedDict() for id in range(1, variant_count + 1): name = self.__lib.RHVoice_get_variant_name(id) self._availableVariants[name] = VoiceInfo(name, name, "ru") self.__lib.RHVoice_set_variant(1) self.__variant = self.__lib.RHVoice_get_variant_name(1) self.__rate = 50 self.__pitch = 50 self.__volume = 50 self.__native_rate_range = (self.__lib.RHVoice_get_min_rate(), self.__lib.RHVoice_get_max_rate(), self.__lib.RHVoice_get_rate()) self.__native_pitch_range = (self.__lib.RHVoice_get_min_pitch(), self.__lib.RHVoice_get_max_pitch(), self.__lib.RHVoice_get_pitch()) self.__native_volume_range = (0, self.__lib.RHVoice_get_max_volume(), self.__lib.RHVoice_get_volume()) self.__char_mapping = {} for c in range(9): self.__char_mapping[c] = 32 self.__char_mapping[11] = 32 self.__char_mapping[12] = 32 for c in range(14, 32): self.__char_mapping[c] = 32 self.__char_mapping[ord("<")] = u"<" self.__char_mapping[ord("&")] = u"&" self.__tts_thread.start() log.info("Using RHVoice version %s" % self.__lib.RHVoice_get_version()) def terminate(self): self.cancel() self.__tts_queue.put(None) self.__tts_thread.join() self.__player.close() self.__lib.RHVoice_terminate() def speak(self, speech_sequence): spell_mode = False text_list = [] for item in speech_sequence: if isinstance(item, basestring): s = unicode(item).translate(self.__char_mapping) text_list.append(( u'<say-as interpret-as="characters">%s</say-as>' % s) if spell_mode else s) elif isinstance(item, speech.IndexCommand): text_list.append('<mark name="%d"/>' % item.index) elif isinstance(item, speech.CharacterModeCommand): if item.state: spell_mode = True else: spell_mode = False elif isinstance(item, speech.SpeechCommand): log.debugWarning("Unsupported speech command: %s" % item) else: log.error("Unknown speech: %s" % item) text = u"".join(text_list) fmt_str = u'<speak><voice name="%s" variant="%d"><prosody rate="%f%%" pitch="%f%%" volume="%f%%">%s</prosody></voice></speak>' variant = self.__lib.RHVoice_find_variant(self.__variant) if variant == 0: variant = 1 rate = convert_to_native_percent(self.__rate, *self.__native_rate_range) pitch = convert_to_native_percent(self.__pitch, *self.__native_pitch_range) volume = convert_to_native_percent(self.__volume, *self.__native_volume_range) ssml = fmt_str % (self.__voice, variant, rate, pitch, volume, text) self.__tts_queue.put(ssml) def pause(self, switch): self.__player.pause(switch) def cancel(self): try: while True: self.__tts_queue.get_nowait() except Queue.Empty: self.__silence_flag.set() self.__tts_queue.put(()) self.__player.stop() def _get_lastIndex(self): return self.__audio_callback.index def _get_language(self): return "ru" def _get_rate(self): return self.__rate def _set_rate(self, rate): self.__rate = max(0, min(100, rate)) def _get_pitch(self): return self.__pitch def _set_pitch(self, pitch): self.__pitch = max(0, min(100, pitch)) def _get_volume(self): return self.__volume def _set_volume(self, volume): self.__volume = max(0, min(100, volume)) def _get_voice(self): return self.__voice def _set_voice(self, voice): try: self.__voice = self._availableVoices[voice].ID except: pass def _get_variant(self): return self.__variant def _set_variant(self, variant): try: self.__variant = self._availableVariants[variant].ID except: pass
class SynthDriver(SynthDriver): name = "pico" description = "Svox pico synthesizer" supportedSettings=(SynthDriver.VoiceSetting(),SynthDriver.RateSetting(),SynthDriver.PitchSetting(),SynthDriver.VolumeSetting()) supportedCommands = { IndexCommand, } supportedNotifications = {synthIndexReached, synthDoneSpeaking} availableVoices=OrderedDict() availableVoices["en-us"] = VoiceInfo('en-us', _('American English'), "en-us") availableVoices["en-gb"] = VoiceInfo('en-gb', _('British English'), "en-gb") availableVoices["es"] = VoiceInfo('es', _('Spanish'), "es") availableVoices["fr"] = VoiceInfo('fr', _('French'), "fr") availableVoices["it"] = VoiceInfo('it', _('Italian'), "it") availableVoices["de"] = VoiceInfo('de', _('Deutch'), "de") _voice = 'en-us' pitch = 50 rate = 50 volume = 100 #:tuples of (langName,langData,speakerData) voice_resources={ 'en-us': (b'American English', b'en-US_ta.bin', b'en-US_lh0_sg.bin'), 'en-gb': (b'British English', b'en-GB_ta.bin', b'en-GB_kh0_sg.bin'), 'es': (b'Spanish', b'es-ES_ta.bin', b'es-ES_zl0_sg.bin'), 'fr': (b'French', b'fr-FR_ta.bin', b'fr-FR_nk0_sg.bin'), 'it': (b'Italian', b'it-IT_ta.bin', b'it-IT_cm0_sg.bin'), 'de': (b'Deutch', b'de-DE_ta.bin', b'de-DE_gl0_sg.bin'), } @classmethod def check(cls): return os.path.isfile(os.path.join(BASE_PATH, "svox-pico.dll")) def pico_system_errcheck(self,result,func,args): if result!=0: message=ctypes.create_string_buffer(200) self.dll.pico_getSystemStatusMessage(self.pico_system,result,message) raise RuntimeError("error while calling '%s' with arguments %s. underlying API reports: '%s'"%(func.__name__,args,message.value)) return result def pico_engine_errcheck(self,result,func,args): if result<0: message=ctypes.create_string_buffer(200) self.dll.pico_getEngineStatusMessage(self.pico_engine, result, message) raise RuntimeError("error while calling '%s' with arguments %s. underlying API reports: '%s'"%(func.__name__,args,message.value)) return result def __init__(self): self.dll=ctypes.cdll.LoadLibrary(os.path.join(BASE_PATH, 'svox-pico.dll')) #prepare dll object system_functs = ('pico_initialize', 'pico_terminate', 'pico_getSystemStatusMessage', 'pico_getNrSystemWarnings', 'pico_getSystemWarning', 'pico_loadResource', 'pico_unloadResource', 'pico_getResourceName', 'pico_createVoiceDefinition', 'pico_addResourceToVoiceDefinition', 'pico_releaseVoiceDefinition', 'pico_newEngine', 'pico_disposeEngine') for func in system_functs: getattr(self.dll,func).errcheck=self.pico_system_errcheck engine_funcs = ('pico_putTextUtf8', 'pico_getData', 'pico_resetEngine', 'pico_getEngineStatusMessage', 'pico_getNrEngineWarnings', 'pico_getEngineWarning') for func in engine_funcs: getattr(self.dll, func).errcheck = self.pico_engine_errcheck #init pico system self._svox_memory = ctypes.create_string_buffer(SVOX_MEMORY_SIZE) self.pico_system = pico_system() self.dll.pico_initialize(self._svox_memory, SVOX_MEMORY_SIZE, ctypes.byref(self.pico_system)) self.pico_engine = None self.player = nvwave.WavePlayer(channels=1, samplesPerSec=16000, bitsPerSample=16, outputDevice=config.conf["speech"]["outputDevice"]) self.queue = queue.Queue() self.isSpeaking = False self.background_thread = threading.Thread(target=self.background_thread_func) self.background_thread.daemon = True self.background_thread.start() self._set_voice("es") #log the version #version_string=ctypes.create_string_buffer(200) #self.dll.picoext_getVersionInfo(version_string,200) #log.info("Using pico version '%s'"%version_string.value) def load_resources(self, name, langData, speakerData): """Loads lingware data, defines voice""" langRes = pico_resource() self.dll.pico_loadResource(self.pico_system, os.path.join(BASE_PATH.encode('utf-8'), b'svox-pico-data', langData), ctypes.byref(langRes)) langResName=ctypes.create_string_buffer(200) self.dll.pico_getResourceName(self.pico_system, langRes, langResName) speakerRes = pico_resource() self.dll.pico_loadResource(self.pico_system, os.path.join(BASE_PATH.encode('utf-8'), b'svox-pico-data', speakerData), ctypes.byref(speakerRes)) speakerResName=ctypes.create_string_buffer(200) self.dll.pico_getResourceName(self.pico_system, speakerRes, speakerResName) self.dll.pico_createVoiceDefinition(self.pico_system, name) self.dll.pico_addResourceToVoiceDefinition(self.pico_system, name, langResName) self.dll.pico_addResourceToVoiceDefinition(self.pico_system, name, speakerResName) self._resources = (name, langRes, speakerRes) def free_resources(self): if not self._resources: return self.dll.pico_releaseVoiceDefinition(self.pico_system,self._resources[0]) self.dll.pico_unloadResource(self.pico_system,ctypes.byref(self._resources[1])) self.dll.pico_unloadResource(self.pico_system,ctypes.byref(self._resources[2])) self._resources=None def terminate(self): self.cancel() self.queue.put((None,None)) self.background_thread.join() self.player.close() self.player=None if self.pico_engine: self.dll.pico_disposeEngine(self.pico_system,ctypes.byref(self.pico_engine)) self.free_resources() self.dll.pico_terminate(ctypes.byref(self.pico_system)) self.pico_system=None del self.dll def _get_voice(self): return self._voice def _set_voice(self,value): name = self.voice_resources[value][0] if self.pico_engine: self.cancel() self.queue.join() self.dll.pico_disposeEngine(self.pico_system,ctypes.byref(self.pico_engine)) self.free_resources() self.load_resources(*self.voice_resources[value]) self.pico_engine = pico_engine() self.dll.pico_newEngine(self.pico_system, name, ctypes.byref(self.pico_engine)) self._voice = value def build_string(self,s): """applies voice parameters""" pitch=self.pitch+50 if self.pitch<=50 else self.pitch*2 speed = int(20 +(self.rate/50.0) *80) if self.rate<=50 else 100 +(self.rate-50)*8 volume = self.volume*0.7 return ('<pitch level="%d"><speed level="%d"><volume level="%d">%s</volume></speed></pitch>' %(pitch, speed, volume, s)).encode('utf-8') def background_thread_func(self): bytes_sent=ctypes.c_int16() out_buffer=ctypes.create_string_buffer(OUT_BUFFER_SIZE) bytes_received=ctypes.c_int16() data_type=ctypes.c_int16() while True: data, index = self.queue.get() if data is None: break synthIndexReached.notify(synth=self, index=index) remaining=len(data)+1 while remaining and self.isSpeaking: self.dll.pico_putTextUtf8(self.pico_engine, data, remaining, ctypes.byref(bytes_sent)) remaining-=bytes_sent.value data=data[bytes_sent.value:] status=PICO_STEP_BUSY buf=BytesIO() while self.isSpeaking and status==PICO_STEP_BUSY: status=self.dll.pico_getData(self.pico_engine, out_buffer, OUT_BUFFER_SIZE, ctypes.byref(bytes_received), ctypes.byref(data_type)) if status==PICO_STEP_BUSY: buf.write(ctypes.string_at(out_buffer, bytes_received.value)) if buf.tell() >= 4096: self.player.feed(buf.getvalue()) buf.seek(0) buf.truncate(0) else: if buf.tell(): self.player.feed(buf.getvalue()) synthDoneSpeaking.notify(synth=self) self.player.idle() if not self.isSpeaking: #stop requested during playback self.dll.pico_resetEngine(self.pico_engine,0) self.lastIndex=None self.queue.task_done() def cancel(self): #clear queue try: while True: self.queue.get_nowait() self.queue.task_done() except queue.Empty: pass self.isSpeaking=False self.player.stop() self.lastIndex=None def speak(self,speechSequence): self.isSpeaking=True textList=[] index=None for item in speechSequence: if isinstance(item, str): textList.append(item) elif isinstance(item,speech.IndexCommand): index=item.index text = " ".join(textList) if text: self.queue.put((self.build_string(text), index)) def pause(self,switch): self.player.pause(switch)
class SynthDriver(synthDriverHandler.SynthDriver): supportedSettings=(SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), BooleanSynthSetting("rateBoost", _("Rate boos&t")), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), NumericSynthSetting("hsz", _("Head Size"), False), NumericSynthSetting("rgh", _("Roughness"), False), NumericSynthSetting("bth", _("Breathiness"), False), BooleanSynthSetting("backquoteVoiceTags", _("Enable backquote voice &tags"), False)) description='IBMTTS' name='ibmeci' speakingLanguage="" @classmethod def check(cls): return _ibmeci.eciCheck() def __init__(self): _ibmeci.initialize() # This information doesn't really need to be displayed, and makes IBMTTS unusable if the addon is not in the same drive as NVDA executable. # But display it only on debug mode in case of it can be useful log.debug("Using IBMTTS version %s" % _ibmeci.eciVersion()) lang = languageHandler.getLanguage() self.rate=50 self.speakingLanguage=lang self.variant="1" def speak(self,speechSequence): last = None defaultLanguage=self.language outlist = [] for item in speechSequence: if isinstance(item, string_types): s = self.processText(unicode(item)) outlist.append((_ibmeci.speak, (s,))) last = s elif isinstance(item,speech.IndexCommand): outlist.append((_ibmeci.index, (item.index,))) elif isinstance(item,speech.LangChangeCommand): l=None if item.lang in langsAnnotations: l = langsAnnotations[item.lang] elif item.lang and item.lang[0:2] in langsAnnotations: l = langsAnnotations[item.lang[0:2]] if l: if item.lang != self.speakingLanguage and item.lang != self.speakingLanguage[0:2]: outlist.append((_ibmeci.speak, (l,))) self.speakingLanguage=item.lang else: outlist.append((_ibmeci.speak, (langsAnnotations[defaultLanguage],))) self.speakingLanguage = defaultLanguage elif isinstance(item,speech.CharacterModeCommand): outlist.append((_ibmeci.speak, (b"`ts1" if item.state else "b`ts0",))) elif isinstance(item,speech.SpeechCommand): log.debugWarning("Unsupported speech command: %s"%item) else: log.error("Unknown speech: %s"%item) if last is not None and not last[-1] in punctuation: outlist.append((_ibmeci.speak, (b'`p1',))) outlist.append((_ibmeci.setEndStringMark, ())) outlist.append((_ibmeci.speak, (b"`ts0",))) outlist.append((_ibmeci.synth, ())) _ibmeci.synthQueue.put(outlist) _ibmeci.process() def processText(self,text): text = text.rstrip() if _ibmeci.params[9] in (65536, 65537): text = resub(english_fixes, text) if _ibmeci.params[9] in (131072, 131073): text = resub(spanish_fixes, text) if _ibmeci.params[9] in (196609, 196608): text = resub(french_fixes, text) text = text.replace('quil', 'qil') #Sometimes this string make everything buggy with IBMTTS in French #if not self._backquoteVoiceTags: text = text.replace(u'‵', ' ') if self._backquoteVoiceTags: text = "`pp0 `vv%d %s" % (self.getVParam(_ibmeci.vlm), text.replace('`', ' ')) #no embedded commands text = resub(anticrash_res, text) #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. text = text.encode('mbcs', 'replace') else: #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. text = text.encode('mbcs', 'replace') text = resub(anticrash_res, text) text = b"`pp0 `vv%d %s" % (self.getVParam(_ibmeci.vlm), text.replace(b'`', b' ')) #no embedded commands text = pause_re.sub(br'\1 `p1\2\3', text) text = time_re.sub(br'\1:\2 \3', text) # temporal fix: replace , with `" -" because IBMTTS seems ignore commas at the end. # if you know a better solution please let me know to update it. if text[-1] == b",": text = text[0:-1]+b" -" return text def pause(self,switch): _ibmeci.pause(switch) def terminate(self): _ibmeci.terminate() _backquoteVoiceTags=False def _get_backquoteVoiceTags(self): return self._backquoteVoiceTags def _set_backquoteVoiceTags(self, enable): if enable == self._backquoteVoiceTags: return self._backquoteVoiceTags = enable _rateBoost = False RATE_BOOST_MULTIPLIER = 1.6 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable != self._rateBoost: rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val = self.getVParam(_ibmeci.rate) if self._rateBoost: val=int(round(val/self.RATE_BOOST_MULTIPLIER)) return self._paramToPercent(val, minRate, maxRate) def _set_rate(self,vl): val = self._percentToParam(vl, minRate, maxRate) if self._rateBoost: val = int(round(val *self.RATE_BOOST_MULTIPLIER)) self._rate = val self.setVParam(_ibmeci.rate, val) def _get_pitch(self): return self.getVParam(_ibmeci.pitch) def _set_pitch(self,vl): self.setVParam(_ibmeci.pitch,vl) def _get_volume(self): return self.getVParam(_ibmeci.vlm) def _set_volume(self,vl): self.setVParam(_ibmeci.vlm,int(vl)) def _set_inflection(self,vl): vl = int(vl) self.setVParam(_ibmeci.fluctuation,vl) def _get_inflection(self): return self.getVParam(_ibmeci.fluctuation) def _set_hsz(self,vl): vl = int(vl) self.setVParam(_ibmeci.hsz,vl) def _get_hsz(self): return self.getVParam(_ibmeci.hsz) def _set_rgh(self,vl): vl = int(vl) self.setVParam(_ibmeci.rgh,vl) def _get_rgh(self): return self.getVParam(_ibmeci.rgh) def _set_bth(self,vl): vl = int(vl) self.setVParam(_ibmeci.bth,vl) def _get_bth(self): return self.getVParam(_ibmeci.bth) def _getAvailableVoices(self): o = OrderedDict() for name in os.listdir(_ibmeci.ttsPath): if name.lower().endswith('.syn'): info = _ibmeci.langs[name.lower()[:3]] o[str(info[0])] = VoiceInfo(str(info[0]), info[1], info[2]) return o def _get_voice(self): return str(_ibmeci.params[9]) def _set_voice(self,vl): _ibmeci.set_voice(vl) def getVParam(self,pr): return _ibmeci.getVParam(pr) def setVParam(self, pr,vl): _ibmeci.setVParam(pr, vl) def _get_lastIndex(self): #fix? return _ibmeci.lastindex def cancel(self): _ibmeci.stop() def _getAvailableVariants(self): global variants return OrderedDict((str(id), synthDriverHandler.VoiceInfo(str(id), name)) for id, name in variants.items()) def _set_variant(self, v): global variants self._variant = v if int(v) in variants else "1" _ibmeci.setVariant(int(v)) self.setVParam(_ibmeci.rate, self._rate) # if 'ibmtts' in config.conf['speech']: # config.conf['speech']['ibmtts']['pitch'] = self.pitch def _get_variant(self): return self._variant
class SynthDriver(SynthDriver): supportedSettings = (SynthDriver.RateSetting(), SynthDriver.PitchSetting(minStep=5), SynthDriver.InflectionSetting(minStep=10), SynthDriver.VolumeSetting(minStep=2)) description = "Audiologic Tts3" name = "audiologic" @classmethod def check(cls): try: r = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, "SOFTWARE\Audiologic\Sintesi Audiologic") r.Close() return True except: return False def __init__(self): _audiologic.TtsOpen() def terminate(self): _audiologic.TtsClose() def speakText(self, text, index=None): if isinstance(index, int) and index >= 0: text = "[:BMK=%d]%s" % (index, text) _audiologic.TtsSpeak(text) def _get_lastIndex(self): return _audiologic.LastIndex def cancel(self): _audiologic.TtsStop() def _getAvailableVoices(self): return OrderedDict((("", VoiceInfo("", "Tts3", language="it")), )) def _get_voice(self): return "" def _set_voice(self, voice): pass def _get_rate(self): return self._paramToPercent(_audiologic.TtsGetProsody('Speed'), _audiologic.minRate, _audiologic.maxRate) def _set_rate(self, value): _audiologic.TtsSetParam( _audiologic.ttsRate, self._percentToParam(value, _audiologic.minRate, _audiologic.maxRate), 0) def _get_pitch(self): return self._paramToPercent(_audiologic.TtsGetProsody('Pitch'), _audiologic.minPitch, _audiologic.maxPitch) def _set_pitch(self, value): _audiologic.TtsSetParam( _audiologic.ttsPitch, self._percentToParam(value, _audiologic.minPitch, _audiologic.maxPitch), 0) def _get_volume(self): return self._paramToPercent(_audiologic.TtsGetProsody('Vol'), _audiologic.minVol, _audiologic.maxVol) def _set_volume(self, value): _audiologic.TtsSetParam( _audiologic.ttsVol, self._percentToParam(value, _audiologic.minVol, _audiologic.maxVol), 0) def _get_inflection(self): return _audiologic.TtsGetProsody('Expression') * 10 def _set_inflection(self, value): _audiologic.TtsSetParam(_audiologic.ttsExpression, int(value / 10), 0) def pause(self, switch): if switch: _audiologic.TtsPause() else: _audiologic.TtsRestart()
class SynthDriver(SynthDriver): name = "newfon" description = "Newfon" supportedSettings = ( SynthDriver.VoiceSetting(), SynthDriver.LanguageSetting(), SynthDriver.RateSetting(), SynthSetting("accel", _("&Acceleration")), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(10), SynthDriver.VolumeSetting(), ) _volume = 100 _language = "ru" _pitch = 50 _accel = 0 _inflection = 50 _rate = 70 availableVoices = OrderedDict(( str(index), VoiceInfo(str(index), name) ) for index, name in enumerate( [_("male 1"), _("female 1"), _("male 2"), _("female 2")])) availableAccels = OrderedDict( (str(x), StringParameterInfo(str(x), str(x))) for x in xrange(8)) pitchTable = [(90, 130), (190, 330), (60, 120), (220, 340)] availableLanguages = OrderedDict( (("ru", LanguageInfo("ru")), ("uk", LanguageInfo("uk")))) newfon_lib = None sdrvxpdbDll = None dictDll = None @classmethod def check(cls): return os.path.isfile('synthDrivers/newfon_nvda.dll') def calculateMinMaxPitch(self, pitch, inflection): min, max = self.pitchTable[int(self.voice)] i = max - min i = int((i / 50.0) * ((inflection - 50) / 2)) min -= i max += i i = int((pitch - 50) / 1.3) min += i max += i return min, max def __init__(self): global player player = nvwave.WavePlayer( channels=1, samplesPerSec=10000, bitsPerSample=8, outputDevice=config.conf["speech"]["outputDevice"]) self.hasDictLib = os.path.isfile('synthDrivers/dict.dll') if self.hasDictLib: self.sdrvxpdb_lib = windll.LoadLibrary( r"synthDrivers\sdrvxpdb.dll") self.dict_lib = windll.LoadLibrary(r"synthDrivers\dict.dll") self.newfon_lib = windll.LoadLibrary(r"synthDrivers\newfon_nvda.dll") self.newfon_lib.speakText.argtypes = [c_char_p, c_int] if not self.newfon_lib.initialize(): raise Exception self.newfon_lib.set_callback(processAudio) self.newfon_lib.set_dictionary(1) def terminate(self): self.cancel() global player player.close() player = None self.newfon_lib.terminate() del self.newfon_lib if self.hasDictLib: del self.dict_lib del self.sdrvxpdb_lib def speakText(self, text, index=None): global isSpeaking isSpeaking = True text = processText(text, self._language) if index is not None: self.newfon_lib.speakText(text, index) else: self.newfon_lib.speakText(text, -1) def _get_lastIndex(self): return self.newfon_lib.get_lastIndex() def cancel(self): self.newfon_lib.cancel() global isSpeaking, player isSpeaking = False player.stop() def _get_voice(self): return str(self.newfon_lib.get_voice()) def _set_voice(self, value): self.newfon_lib.set_voice(int(value)) self._set_pitch(self._pitch) def _get_volume(self): return self._volume def _set_volume(self, value): self.newfon_lib.set_volume(value) self._volume = value def _get_rate(self): return self._rate def _set_rate(self, value): self.newfon_lib.set_rate(value) self._rate = value def _set_pitch(self, value): #if value <= 50: value = 50 #self.newfon_lib.set_accel(value/5 -10 ) self._pitch = value min, max = self.calculateMinMaxPitch(self._pitch, self._inflection) self.newfon_lib.set_pitch_min(min) self.newfon_lib.set_pitch_max(max) def _get_pitch(self): return self._pitch def pause(self, switch): global player player.pause(switch) def _get_language(self): return self._language def _set_language(self, language): self._language = language if not self.hasDictLib: return if language == "ru": self.newfon_lib.set_dictionary(1) else: self.newfon_lib.set_dictionary(0) def _set_inflection(self, inflection): self._inflection = inflection self._set_pitch(self._pitch) def _get_inflection(self): return self._inflection def _set_accel(self, a): self._accel = a self.newfon_lib.set_accel(int(a)) def _get_accel(self): return self._accel
class SynthDriver(SynthDriver): name = "espeak" description = "eSpeak NG" supportedSettings = ( SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), SynthDriver.RateBoostSetting(), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), ) supportedCommands = { IndexCommand, CharacterModeCommand, LangChangeCommand, BreakCommand, PitchCommand, RateCommand, VolumeCommand, PhonemeCommand, } supportedNotifications = {synthIndexReached, synthDoneSpeaking} # A mapping of commonly used language codes to eSpeak languages. # Introduced due to eSpeak issue: https://github.com/espeak-ng/espeak-ng/issues/1200 # These are used when eSpeak doesn't support a given language code # but a default alias is appropriate. _defaultLangToLocale = { # Languages without locale that aren't supported in eSpeak 7e5457f91e10, # with a language with locale that is supported. # Found via: # set(stripLocaleFromLangCode(lang) for lang in self.availableLanguages).difference(self.availableLanguages) "en": "en-gb", "chr": "chr-US-Qaaa-x-west", "fr": "fr-fr", } availableLanguages: Set[Optional[str]] """ For eSpeak commit 7e5457f91e10, this is equivalent to: { 'ia', 'ru', 'cy', 'ms', 'af', 'fi', 'fr-fr', 'nog', 'gu', 'hu', 'eu', 'om', 'en-029', 'de', 'es', 'kk', 'an', 'nci', 'uk', 'vi-vn-x-south', 'grc', 'it', 'vi-vn-x-central', 'bg', 'piqd', 'ug', 'ar', 'da', 'mi', 'mr', 'pt-br', 'fr-ch', 'py', 'uz', 'en-gb', 'sw', 'as', 'shn', 'vi', 'nl', 'bs', 'ga', 'pap', 'sv', 'kn', 'gn', 'th', 'tr', 'pa', 'mt', 'chr-US-Qaaa-x-west', 'eo', 'kok', 'ky', 'lfn', 'is', 'pt', 'en-gb-x-gbcwmd', 'en-gb-x-rp', 'ht', 'bpy', 'fr-be', 'nb', 'lt', 'ja', 'te', 'tn', 'es-419', 'gd', 'sjn', 'he', 'hyw', 'et', 'ro', 'ru-lv', 'sq', 'quc', 'am', 'hr', 'qya', 'ka', 'el', 'tt', 'or', 'pl', 'qu', 'ba', 'ta', 'cmn', 'io', 'en-us', 'ur', 'hi', 'en-gb-scotland', 'fa', 'kl', 'tk', 'ku', 'si', 'cv', 'ca', 'qdb', 'hak', 'fa-latn', 'lv', 'en-gb-x-gbclan', 'ltg', 'ne', 'sl', 'az', 'yue', 'sk', 'hy', 'my', 'ko', 'mk', 'smj', 'ml', 'cmn-latn-pinyin', 'id', 'la', 'sr', 'bn', 'sd', 'cs', 'jbo', 'haw' } """ @classmethod def check(cls): return True def __init__(self): _espeak.initialize(self._onIndexReached) log.info("Using eSpeak NG version %s" % _espeak.info()) lang = getLanguage() _espeak.setVoiceByLanguage(lang) self._language = lang self._variantDict = _espeak.getVariantDict() self.variant = "max" self.rate = 30 self.pitch = 40 self.inflection = 75 def _get_language(self): return self._language PROSODY_ATTRS = { PitchCommand: "pitch", VolumeCommand: "volume", RateCommand: "rate", } IPA_TO_ESPEAK = { u"θ": u"T", u"s": u"s", u"ˈ": u"'", } def _processText(self, text): # We need to make several replacements. return text.translate({ 0x1: None, # used for embedded commands 0x3C: u"<", # <: because of XML 0x3E: u">", # >: because of XML 0x5B: u" [", # [: [[ indicates phonemes }) def _normalizeLangCommand(self, command: LangChangeCommand) -> LangChangeCommand: """ Checks if a LangChangeCommand language is compatible with eSpeak. If not, find a default mapping occurs in L{_defaultLangToLocale}. Otherwise, finds a language of a different dialect exists (e.g. ru-ru to ru). Returns an eSpeak compatible LangChangeCommand. """ lowerCaseAvailableLangs = set(lang.lower() for lang in self.availableLanguages) # Use default language if no command.lang is supplied langWithLocale = command.lang if command.lang else self._language langWithLocale = langWithLocale.lower().replace('_', '-') langWithoutLocale: Optional[str] = stripLocaleFromLangCode( langWithLocale) # Check for any language where the language code matches, regardless of dialect: e.g. ru-ru to ru matchingLangs = filter( lambda lang: stripLocaleFromLangCode(lang) == langWithoutLocale, lowerCaseAvailableLangs) anyLocaleMatchingLang = next(matchingLangs, None) # Check from a list of known default mapping locales: e.g. en to en-gb # Created due to eSpeak issue: https://github.com/espeak-ng/espeak-ng/issues/1200 knownDefaultLang = self._defaultLangToLocale.get( langWithoutLocale, None) if knownDefaultLang is not None and knownDefaultLang not in self.availableLanguages: # This means eSpeak has changed and we need to update the mapping log.error( f"Default mapping unknown to eSpeak {knownDefaultLang} not in {self.availableLanguages}" ) knownDefaultLang = None if langWithLocale in lowerCaseAvailableLangs: eSpeakLang = langWithLocale elif knownDefaultLang is not None: eSpeakLang = knownDefaultLang elif langWithoutLocale in lowerCaseAvailableLangs: eSpeakLang = langWithoutLocale elif anyLocaleMatchingLang is not None: eSpeakLang = anyLocaleMatchingLang else: log.debugWarning( f"Unable to find an eSpeak language for '{langWithLocale}'") eSpeakLang = None return LangChangeCommand(eSpeakLang) def _handleLangChangeCommand( self, langChangeCommand: LangChangeCommand, langChanged: bool, ) -> str: """Get language xml tags needed to handle a lang change command. - if a language change has already been handled for this speech, close the open voice tag. - if the language is supported by eSpeak, switch to that language. - otherwise, switch to the default synthesizer language. """ langChangeCommand = self._normalizeLangCommand(langChangeCommand) voiceChangeXML = "" if langChanged: # Close existing voice tag voiceChangeXML += "</voice>" if langChangeCommand.lang is not None: # Open new voice tag using eSpeak compatible language voiceChangeXML += f'<voice xml:lang="{langChangeCommand.lang}">' else: # Open new voice tag using default voice voiceChangeXML += "<voice>" return voiceChangeXML # C901 'speak' is too complex # Note: when working on speak, look for opportunities to simplify # and move logic out into smaller helper functions. def speak(self, speechSequence: SpeechSequence): # noqa: C901 textList: List[str] = [] langChanged = False prosody: Dict[str, int] = {} # We output malformed XML, as we might close an outer tag after opening an inner one; e.g. # <voice><prosody></voice></prosody>. # However, eSpeak doesn't seem to mind. for item in speechSequence: if isinstance(item, str): textList.append(self._processText(item)) elif isinstance(item, IndexCommand): textList.append("<mark name=\"%d\" />" % item.index) elif isinstance(item, CharacterModeCommand): textList.append("<say-as interpret-as=\"characters\">" if item. state else "</say-as>") elif isinstance(item, LangChangeCommand): langChangeXML = self._handleLangChangeCommand( item, langChanged) textList.append(langChangeXML) langChanged = True elif isinstance(item, BreakCommand): # Break commands are ignored at the start of speech unless strength is specified. # Refer to eSpeak issue: https://github.com/espeak-ng/espeak-ng/issues/1232 textList.append(f'<break time="{item.time}ms" strength="1" />') elif type(item) in self.PROSODY_ATTRS: if prosody: # Close previous prosody tag. textList.append("</prosody>") attr = self.PROSODY_ATTRS[type(item)] if item.multiplier == 1: # Returning to normal. try: del prosody[attr] except KeyError: pass else: prosody[attr] = int(item.multiplier * 100) if not prosody: continue textList.append("<prosody") for attr, val in prosody.items(): textList.append(' %s="%d%%"' % (attr, val)) textList.append(">") elif isinstance(item, PhonemeCommand): # We can't use str.translate because we want to reject unknown characters. try: phonemes = "".join( [self.IPA_TO_ESPEAK[char] for char in item.ipa]) # There needs to be a space after the phoneme command. # Otherwise, eSpeak will announce a subsequent SSML tag instead of processing it. textList.append(u"[[%s]] " % phonemes) except KeyError: log.debugWarning("Unknown character in IPA string: %s" % item.ipa) if item.text: textList.append(self._processText(item.text)) else: log.error("Unknown speech: %s" % item) # Close any open tags. if langChanged: textList.append("</voice>") if prosody: textList.append("</prosody>") text = u"".join(textList) _espeak.speak(text) def cancel(self): _espeak.stop() def pause(self, switch): _espeak.pause(switch) _rateBoost = False RATE_BOOST_MULTIPLIER = 3 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable == self._rateBoost: return rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val = _espeak.getParameter(_espeak.espeakRATE, 1) if self._rateBoost: val = int(val / self.RATE_BOOST_MULTIPLIER) return self._paramToPercent(val, _espeak.minRate, _espeak.maxRate) def _set_rate(self, rate): val = self._percentToParam(rate, _espeak.minRate, _espeak.maxRate) if self._rateBoost: val = int(val * self.RATE_BOOST_MULTIPLIER) _espeak.setParameter(_espeak.espeakRATE, val, 0) def _get_pitch(self): val = _espeak.getParameter(_espeak.espeakPITCH, 1) return self._paramToPercent(val, _espeak.minPitch, _espeak.maxPitch) def _set_pitch(self, pitch): val = self._percentToParam(pitch, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakPITCH, val, 0) def _get_inflection(self): val = _espeak.getParameter(_espeak.espeakRANGE, 1) return self._paramToPercent(val, _espeak.minPitch, _espeak.maxPitch) def _set_inflection(self, val): val = self._percentToParam(val, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakRANGE, val, 0) def _get_volume(self): return _espeak.getParameter(_espeak.espeakVOLUME, 1) def _set_volume(self, volume): _espeak.setParameter(_espeak.espeakVOLUME, volume, 0) def _getAvailableVoices(self): voices = OrderedDict() for v in _espeak.getVoiceList(): l = _espeak.decodeEspeakString(v.languages[1:]) # #7167: Some languages names contain unicode characters EG: Norwegian Bokmål name = _espeak.decodeEspeakString(v.name) # #5783: For backwards compatibility, voice identifies should always be lowercase identifier = os.path.basename( _espeak.decodeEspeakString(v.identifier)).lower() voices[identifier] = VoiceInfo(identifier, name, l) return voices def _get_voice(self): curVoice = getattr(self, '_voice', None) if curVoice: return curVoice curVoice = _espeak.getCurrentVoice() if not curVoice: return "" # #5783: For backwards compatibility, voice identifies should always be lowercase return _espeak.decodeEspeakString( curVoice.identifier).split('+')[0].lower() def _set_voice(self, identifier): if not identifier: return # #5783: For backwards compatibility, voice identifies should always be lowercase identifier = identifier.lower() if "\\" in identifier: identifier = os.path.basename(identifier) self._voice = identifier try: _espeak.setVoiceAndVariant(voice=identifier, variant=self._variant) except: self._voice = None raise self._language = super(SynthDriver, self).language def _onIndexReached(self, index): if index is not None: synthIndexReached.notify(synth=self, index=index) else: synthDoneSpeaking.notify(synth=self) def terminate(self): _espeak.terminate() def _get_variant(self): return self._variant def _set_variant(self, val): self._variant = val if val in self._variantDict else "max" _espeak.setVoiceAndVariant(variant=self._variant) def _getAvailableVariants(self): return OrderedDict((ID, VoiceInfo(ID, name)) for ID, name in self._variantDict.items())
class SynthDriver(SynthDriver): name = "espeak" description = "eSpeak NG" supportedSettings=( SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), # Translators: This is the name of the rate boost voice toggle # which further increases the speaking rate when enabled. BooleanSynthSetting("rateBoost",_("Rate boos&t")), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), ) @classmethod def check(cls): return True def __init__(self): _espeak.initialize() log.info("Using eSpeak version %s" % _espeak.info()) lang=languageHandler.getLanguage() _espeak.setVoiceByLanguage(lang) self._language=lang self._variantDict=_espeak.getVariantDict() self.variant="max" self.rate=30 self.pitch=40 self.inflection=75 def _get_language(self): return self._language PROSODY_ATTRS = { speech.PitchCommand: "pitch", speech.VolumeCommand: "volume", speech.RateCommand: "rate", } IPA_TO_ESPEAK = { u"θ": u"T", u"s": u"s", u"ˈ": u"'", } def _processText(self, text): text = unicode(text) # We need to make several replacements. return text.translate({ 0x1: None, # used for embedded commands 0x3C: u"<", # <: because of XML 0x3E: u">", # >: because of XML 0x5B: u" [", # [: [[ indicates phonemes }) def speak(self,speechSequence): defaultLanguage=self._language textList=[] langChanged=False prosody={} # We output malformed XML, as we might close an outer tag after opening an inner one; e.g. # <voice><prosody></voice></prosody>. # However, eSpeak doesn't seem to mind. for item in speechSequence: if isinstance(item,basestring): textList.append(self._processText(item)) elif isinstance(item,speech.IndexCommand): textList.append("<mark name=\"%d\" />"%item.index) elif isinstance(item,speech.CharacterModeCommand): textList.append("<say-as interpret-as=\"characters\">" if item.state else "</say-as>") elif isinstance(item,speech.LangChangeCommand): if langChanged: textList.append("</voice>") textList.append("<voice xml:lang=\"%s\">"%(item.lang if item.lang else defaultLanguage).replace('_','-')) langChanged=True elif isinstance(item,speech.BreakCommand): textList.append('<break time="%dms" />' % item.time) elif type(item) in self.PROSODY_ATTRS: if prosody: # Close previous prosody tag. textList.append("</prosody>") attr=self.PROSODY_ATTRS[type(item)] if item.multiplier==1: # Returning to normal. try: del prosody[attr] except KeyError: pass else: prosody[attr]=int(item.multiplier* 100) if not prosody: continue textList.append("<prosody") for attr,val in prosody.iteritems(): textList.append(' %s="%d%%"'%(attr,val)) textList.append(">") elif isinstance(item,speech.PhonemeCommand): # We can't use unicode.translate because we want to reject unknown characters. try: phonemes="".join([self.IPA_TO_ESPEAK[char] for char in item.ipa]) # There needs to be a space after the phoneme command. # Otherwise, eSpeak will announce a subsequent SSML tag instead of processing it. textList.append(u"[[%s]] "%phonemes) except KeyError: log.debugWarning("Unknown character in IPA string: %s"%item.ipa) if item.text: textList.append(self._processText(item.text)) elif isinstance(item,speech.SpeechCommand): log.debugWarning("Unsupported speech command: %s"%item) else: log.error("Unknown speech: %s"%item) # Close any open tags. if langChanged: textList.append("</voice>") if prosody: textList.append("</prosody>") text=u"".join(textList) _espeak.speak(text) def cancel(self): _espeak.stop() def pause(self,switch): _espeak.pause(switch) _rateBoost = False RATE_BOOST_MULTIPLIER = 3 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable == self._rateBoost: return rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val=_espeak.getParameter(_espeak.espeakRATE,1) if self._rateBoost: val=int(val/self.RATE_BOOST_MULTIPLIER) return self._paramToPercent(val,_espeak.minRate,_espeak.maxRate) def _set_rate(self,rate): val=self._percentToParam(rate, _espeak.minRate, _espeak.maxRate) if self._rateBoost: val=int(val*self.RATE_BOOST_MULTIPLIER) _espeak.setParameter(_espeak.espeakRATE,val,0) def _get_pitch(self): val=_espeak.getParameter(_espeak.espeakPITCH,1) return self._paramToPercent(val,_espeak.minPitch,_espeak.maxPitch) def _set_pitch(self,pitch): val=self._percentToParam(pitch, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakPITCH,val,0) def _get_inflection(self): val=_espeak.getParameter(_espeak.espeakRANGE,1) return self._paramToPercent(val,_espeak.minPitch,_espeak.maxPitch) def _set_inflection(self,val): val=self._percentToParam(val, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakRANGE,val,0) def _get_volume(self): return _espeak.getParameter(_espeak.espeakVOLUME,1) def _set_volume(self,volume): _espeak.setParameter(_espeak.espeakVOLUME,volume,0) def _getAvailableVoices(self): voices=OrderedDict() for v in _espeak.getVoiceList(): l=v.languages[1:] # #5783: For backwards compatibility, voice identifies should always be lowercase identifier=os.path.basename(v.identifier).lower() voices[identifier]=VoiceInfo(identifier,v.name,l) return voices def _get_voice(self): curVoice=getattr(self,'_voice',None) if curVoice: return curVoice curVoice = _espeak.getCurrentVoice() if not curVoice: return "" # #5783: For backwards compatibility, voice identifies should always be lowercase return curVoice.identifier.split('+')[0].lower() def _set_voice(self, identifier): if not identifier: return # #5783: For backwards compatibility, voice identifies should always be lowercase identifier=identifier.lower() if "\\" in identifier: identifier=os.path.basename(identifier) self._voice=identifier try: _espeak.setVoiceAndVariant(voice=identifier,variant=self._variant) except: self._voice=None raise self._language=super(SynthDriver,self).language def _get_lastIndex(self): return _espeak.lastIndex def terminate(self): _espeak.terminate() def _get_variant(self): return self._variant def _set_variant(self,val): self._variant = val if val in self._variantDict else "max" _espeak.setVoiceAndVariant(variant=self._variant) def _getAvailableVariants(self): return OrderedDict((ID,VoiceInfo(ID, name)) for ID, name in self._variantDict.iteritems())
class SynthDriver(SynthDriver): name = "oneCore" # Translators: Description for a speech synthesizer. description = _("Windows OneCore voices") supportedSettings = ( SynthDriver.VoiceSetting(), SynthDriver.RateSetting(), SynthDriver.PitchSetting(), SynthDriver.VolumeSetting(), ) # These are all controlled via SSML, so we only need attributes, not properties. rate = None pitch = None volume = None @classmethod def check(cls): if not hasattr(sys, "frozen"): # #3793: Source copies don't report the correct version on Windows 10 because Python isn't manifested for higher versions. # We want this driver to work for source copies on Windows 10, so just return True here. # If this isn't in fact Windows 10, it will fail when constructed, which is okay. return True # For binary copies, only present this as an available synth if this is Windows 10. return winVersion.winVersion.major >= 10 def __init__(self): super(SynthDriver, self).__init__() self._dll = ctypes.windll[DLL_FILE] self._dll.ocSpeech_getCurrentVoiceLanguage.restype = ctypes.c_wchar_p self._handle = self._dll.ocSpeech_initialize() self._callbackInst = ocSpeech_Callback(self._callback) self._dll.ocSpeech_setCallback(self._handle, self._callbackInst) self._dll.ocSpeech_getVoices.restype = bstrReturn self._dll.ocSpeech_getCurrentVoiceId.restype = ctypes.c_wchar_p self._player = nvwave.WavePlayer( 1, SAMPLES_PER_SEC, BITS_PER_SAMPLE, outputDevice=config.conf["speech"]["outputDevice"]) # Initialize state. self._queuedSpeech = [] self._wasCancelled = False self._isProcessing = False # Set initial values for parameters that can't be queried. # This initialises our cache for the value. self.rate = 50 self.pitch = 50 self.volume = 100 def terminate(self): super(SynthDriver, self).terminate() self._dll.ocSpeech_terminate(self._handle) # Drop the ctypes function instance for the callback, # as it is holding a reference to an instance method, which causes a reference cycle. self._callbackInst = None def cancel(self): # Set a flag to tell the callback not to push more audio. self._wasCancelled = True log.debug("Cancelling") # There might be more text pending. Throw it away. self._queuedSpeech = [] self._player.stop() def speak(self, speechSequence): conv = _OcSsmlConverter(self.language, self.rate, self.pitch, self.volume) text = conv.convertToXml(speechSequence) self._queueSpeech(text) def _queueSpeech(self, item): self._queuedSpeech.append(item) # We only process the queue here if it isn't already being processed. if not self._isProcessing: self._processQueue() def _processQueue(self): if self._queuedSpeech: item = self._queuedSpeech.pop(0) self._wasCancelled = False log.debug("Begin processing speech") self._isProcessing = True # ocSpeech_speak is async. # It will call _callback in a background thread once done, # which will eventually process the queue again. self._dll.ocSpeech_speak(self._handle, item) return log.debug("Queue empty, done processing") self._isProcessing = False def _callback(self, bytes, len, markers): if len == 0: # The C++ code will log an error with details. log.debugWarning("ocSpeech_speak failed!") self._processQueue() return # This gets called in a background thread. # Strip the wav header. assert len > WAV_HEADER_LEN bytes += WAV_HEADER_LEN len -= WAV_HEADER_LEN data = ctypes.string_at(bytes, len) if markers: markers = markers.split('|') else: markers = [] prevMarker = None prevPos = 0 # Push audio up to each marker so we can sync the audio with the markers. for marker in markers: if self._wasCancelled: break name, pos = marker.split(':') pos = int(pos) # pos is a time offset in 100-nanosecond units. # Convert this to a byte offset. # Order the equation so we don't have to do floating point. pos = pos * BYTES_PER_SEC / HUNDRED_NS_PER_SEC # Push audio up to this marker. self._player.feed(data[prevPos:pos]) # _player.feed blocks until the previous chunk of audio is complete, not the chunk we just pushed. # Therefore, indicate that we've reached the previous marker. if prevMarker: self.lastIndex = prevMarker prevMarker = int(name) prevPos = pos if self._wasCancelled: log.debug("Cancelled, stopped pushing audio") else: self._player.feed(data[prevPos:]) if prevMarker: self.lastIndex = prevMarker log.debug("Done pushing audio") self._processQueue() def _getAvailableVoices(self, onlyValid=True): voices = OrderedDict() voicesStr = self._dll.ocSpeech_getVoices(self._handle).split('|') for voiceStr in voicesStr: id, name = voiceStr.split(":") if onlyValid and not self._isVoiceValid(id): continue voices[id] = VoiceInfo(id, name) return voices def _isVoiceValid(self, id): idParts = id.split('\\') rootKey = getattr(_winreg, idParts[0]) subkey = "\\".join(idParts[1:]) try: hkey = _winreg.OpenKey(rootKey, subkey) except WindowsError as e: log.debugWarning("Could not open registry key %s, %s" % (id, e)) return False try: langDataPath = _winreg.QueryValueEx(hkey, 'langDataPath') except WindowsError as e: log.debugWarning( "Could not open registry value 'langDataPath', %s" % e) return False if not langDataPath or not isinstance(langDataPath[0], basestring): log.debugWarning("Invalid langDataPath value") return False if not os.path.isfile(os.path.expandvars(langDataPath[0])): log.debugWarning("Missing language data file: %s" % langDataPath[0]) return False try: voicePath = _winreg.QueryValueEx(hkey, 'voicePath') except WindowsError as e: log.debugWarning( "Could not open registry value 'langDataPath', %s" % e) return False if not voicePath or not isinstance(voicePath[0], basestring): log.debugWarning("Invalid voicePath value") return False if not os.path.isfile(os.path.expandvars(voicePath[0] + '.apm')): log.debugWarning("Missing voice file: %s" % voicePath[0] + ".apm") return False return True def _get_voice(self): return self._dll.ocSpeech_getCurrentVoiceId(self._handle) def _set_voice(self, id): voices = self._getAvailableVoices(onlyValid=False) for index, voice in enumerate(voices): if voice == id: break else: raise LookupError("No such voice: %s" % id) self._dll.ocSpeech_setVoice(self._handle, index) def _get_language(self): return self._dll.ocSpeech_getCurrentVoiceLanguage(self._handle) def pause(self, switch): self._player.pause(switch)
class SynthDriver(SynthDriver): exposeExtraParams = True def __init__(self): if self.exposeExtraParams: self._extraParamNames = [x[0] for x in speechPlayer.Frame._fields_] self.supportedSettings = SynthDriver.supportedSettings + tuple( NumericDriverSetting( "speechPlayer_%s" % x, "frame.%s" % x, normalStep=1) for x in self._extraParamNames) for x in self._extraParamNames: setattr(self, "speechPlayer_%s" % x, 50) self.player = speechPlayer.SpeechPlayer(16000) _espeak.initialize() _espeak.setVoiceByLanguage('en') self.pitch = 50 self.rate = 50 self.volume = 90 self.inflection = 60 self.audioThread = AudioThread(self, self.player, 16000) @classmethod def check(cls): return True name = "nvSpeechPlayer" description = "nvSpeechPlayer" supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.RateSetting(), SynthDriver.PitchSetting(), SynthDriver.VolumeSetting(), SynthDriver.InflectionSetting()) supportedCommands = { speech.IndexCommand, speech.PitchCommand, } supportedNotifications = {synthIndexReached, synthDoneSpeaking} _curPitch = 50 _curVoice = 'Adam' _curInflection = 0.5 _curVolume = 1.0 _curRate = 1.0 def speak(self, speakList): userIndex = None pitchOffset = 0 # Merge adjacent strings index = 0 while index < len(speakList): item = speakList[index] if index > 0: lastItem = speakList[index - 1] if isinstance(item, str) and isinstance(lastItem, str): speakList[index - 1] = " ".join([lastItem, item]) del speakList[index] continue index += 1 endPause = 20 for item in speakList: if isinstance(item, speech.PitchCommand): pitchOffset = item.offset elif isinstance(item, speech.IndexCommand): userIndex = item.index elif isinstance(item, str): textList = re_textPause.split(item) lastIndex = len(textList) - 1 for index, chunk in enumerate(textList): if not chunk: continue chunk = chunk.strip() if not chunk: continue clauseType = chunk[-1] if clauseType in ('.', '!'): endPause = 150 elif clauseType == '?': endPause = 150 elif clauseType == ',': endPause = 120 else: endPause = 100 clauseType = None endPause /= self._curRate textBuf = ctypes.create_unicode_buffer(chunk) textPtr = ctypes.c_void_p(ctypes.addressof(textBuf)) chunks = [] while textPtr: phonemeBuf = _espeak.espeakDLL.espeak_TextToPhonemes( ctypes.byref(textPtr), _espeak.espeakCHARS_WCHAR, 0x36100 + 0x82) if not phonemeBuf: continue chunks.append(ctypes.string_at(phonemeBuf)) chunk = b"".join(chunks).decode('utf8') chunk = chunk.replace('ə͡l', 'ʊ͡l') chunk = chunk.replace('a͡ɪ', 'ɑ͡ɪ') chunk = chunk.replace('e͡ɪ', 'e͡i') chunk = chunk.replace('ə͡ʊ', 'o͡u') chunk = chunk.strip() if not chunk: continue pitch = self._curPitch + pitchOffset basePitch = 25 + (21.25 * (pitch / 12.5)) for args in ipa.generateFramesAndTiming( chunk, speed=self._curRate, basePitch=basePitch, inflection=self._curInflection, clauseType=clauseType): frame = args[0] if frame: applyVoiceToFrame(frame, self._curVoice) if self.exposeExtraParams: for x in self._extraParamNames: ratio = getattr( self, "speechPlayer_%s" % x) / 50.0 setattr(frame, x, getattr(frame, x) * ratio) frame.preFormantGain *= self._curVolume self.player.queueFrame(*args, userIndex=userIndex) userIndex = None self.player.queueFrame(None, endPause, max(10.0, 10.0 / self._curRate), userIndex=userIndex) self.audioThread.isSpeaking = True self.audioThread.synthEvent.set() def cancel(self): self.player.queueFrame(None, 20, 5, purgeQueue=True) self.audioThread.isSpeaking = False self.audioThread.synthEvent.set() self.audioThread.wavePlayer.stop() def pause(self, switch): self.audioThread.wavePlayer.pause(switch) def _get_rate(self): return int(math.log(self._curRate / 0.25, 2) * 25.0) def _set_rate(self, val): self._curRate = 0.25 * (2**(val / 25.0)) def _get_pitch(self): return self._curPitch def _set_pitch(self, val): self._curPitch = val def _get_volume(self): return int(self._curVolume * 75) def _set_volume(self, val): self._curVolume = val / 75.0 def _get_inflection(self): return int(self._curInflection / 0.01) def _set_inflection(self, val): self._curInflection = val * 0.01 def _get_voice(self): return self._curVoice def _set_voice(self, voice): if voice not in self.availableVoices: voice = 'Adam' self._curVoice = voice if self.exposeExtraParams: for paramName in self._extraParamNames: setattr(self, "speechPlayer_%s" % paramName, 50) def _getAvailableVoices(self): d = OrderedDict() for name in sorted(voices): d[name] = VoiceInfo(name, name) return d def terminate(self): self.audioThread.terminate() del self.player _espeak.terminate()
class SynthDriver(SynthDriver): name = "oneCore" # Translators: Description for a speech synthesizer. description = _("Windows OneCore voices") supportedSettings = ( SynthDriver.VoiceSetting(), SynthDriver.RateSetting(), SynthDriver.PitchSetting(), SynthDriver.VolumeSetting(), ) # These are all controlled via SSML, so we only need attributes, not properties. rate = None pitch = None volume = None @classmethod def check(cls): if not hasattr(sys, "frozen"): # #3793: Source copies don't report the correct version on Windows 10 because Python isn't manifested for higher versions. # We want this driver to work for source copies on Windows 10, so just return True here. # If this isn't in fact Windows 10, it will fail when constructed, which is okay. return True # For binary copies, only present this as an available synth if this is Windows 10. return winVersion.winVersion.major >= 10 def __init__(self): super(SynthDriver, self).__init__() self._dll = NVDAHelper.getHelperLocalWin10Dll() self._dll.ocSpeech_getCurrentVoiceLanguage.restype = ctypes.c_wchar_p self._handle = self._dll.ocSpeech_initialize() self._callbackInst = ocSpeech_Callback(self._callback) self._dll.ocSpeech_setCallback(self._handle, self._callbackInst) self._dll.ocSpeech_getVoices.restype = NVDAHelper.bstrReturn self._dll.ocSpeech_getCurrentVoiceId.restype = ctypes.c_wchar_p self._player= None # Initialize state. self._queuedSpeech = [] self._wasCancelled = False self._isProcessing = False # Initialize the voice to a sane default self.voice=self._getDefaultVoice() # Set initial values for parameters that can't be queried. # This initialises our cache for the value. self.rate = 50 self.pitch = 50 self.volume = 100 def _maybeInitPlayer(self, wav): """Initialize audio playback based on the wave header provided by the synthesizer. If the sampling rate has not changed, the existing player is used. Otherwise, a new one is created with the appropriate parameters. """ samplesPerSec = wav.getframerate() if self._player and self._player.samplesPerSec == samplesPerSec: return if self._player: # Finalise any pending audio. self._player.idle() bytesPerSample = wav.getsampwidth() self._bytesPerSec = samplesPerSec * bytesPerSample self._player = nvwave.WavePlayer(channels=wav.getnchannels(), samplesPerSec=samplesPerSec, bitsPerSample=bytesPerSample * 8, outputDevice=config.conf["speech"]["outputDevice"]) def terminate(self): super(SynthDriver, self).terminate() self._dll.ocSpeech_terminate(self._handle) # Drop the ctypes function instance for the callback, # as it is holding a reference to an instance method, which causes a reference cycle. self._callbackInst = None def cancel(self): # Set a flag to tell the callback not to push more audio. self._wasCancelled = True log.debug("Cancelling") # There might be more text pending. Throw it away. self._queuedSpeech = [] if self._player: self._player.stop() def speak(self, speechSequence): conv = _OcSsmlConverter(self.language, self.rate, self.pitch, self.volume) text = conv.convertToXml(speechSequence) # #7495: Calling WaveOutOpen blocks for ~100 ms if called from the callback # when the SSML includes marks. # We're not quite sure why. # To work around this, open the device before queuing. if self._player: self._player.open() self._queueSpeech(text) def _queueSpeech(self, item): self._queuedSpeech.append(item) # We only process the queue here if it isn't already being processed. if not self._isProcessing: self._processQueue() def _processQueue(self): if not self._queuedSpeech: # There are no more queued utterances at this point, so call idle. # This blocks while waiting for the final chunk to play, # so by the time this is done, there might be something queued. log.debug("Calling idle on audio player") self._player.idle() if self._queuedSpeech: item = self._queuedSpeech.pop(0) self._wasCancelled = False log.debug("Begin processing speech") self._isProcessing = True # ocSpeech_speak is async. # It will call _callback in a background thread once done, # which will eventually process the queue again. self._dll.ocSpeech_speak(self._handle, item) return log.debug("Queue empty, done processing") self._isProcessing = False def _callback(self, bytes, len, markers): if len == 0: # The C++ code will log an error with details. log.debugWarning("ocSpeech_speak failed!") self._processQueue() return # This gets called in a background thread. stream = cStringIO.StringIO(ctypes.string_at(bytes, len)) wav = wave.open(stream, "r") self._maybeInitPlayer(wav) data = wav.readframes(wav.getnframes()) if markers: markers = markers.split('|') else: markers = [] prevMarker = None prevPos = 0 # Push audio up to each marker so we can sync the audio with the markers. for marker in markers: if self._wasCancelled: break name, pos = marker.split(':') pos = int(pos) # pos is a time offset in 100-nanosecond units. # Convert this to a byte offset. # Order the equation so we don't have to do floating point. pos = pos * self._bytesPerSec / HUNDRED_NS_PER_SEC # Push audio up to this marker. self._player.feed(data[prevPos:pos]) # _player.feed blocks until the previous chunk of audio is complete, not the chunk we just pushed. # Therefore, indicate that we've reached the previous marker. if prevMarker: self.lastIndex = prevMarker prevMarker = int(name) prevPos = pos if self._wasCancelled: log.debug("Cancelled, stopped pushing audio") else: self._player.feed(data[prevPos:]) if prevMarker: self.lastIndex = prevMarker log.debug("Done pushing audio") self._processQueue() def _getVoiceInfoFromOnecoreVoiceString(self, voiceStr): """ Produces an NVDA VoiceInfo object representing the given voice string from Onecore speech. """ # The voice string is made up of the ID, the language, and the display name. ID,language,name=voiceStr.split(':') language=language.replace('-','_') return VoiceInfo(ID,name,language=language) def _getAvailableVoices(self): voices = OrderedDict() # Fetch the full list of voices that Onecore speech knows about. # Note that it may give back voices that are uninstalled or broken. voicesStr = self._dll.ocSpeech_getVoices(self._handle).split('|') for index,voiceStr in enumerate(voicesStr): voiceInfo=self._getVoiceInfoFromOnecoreVoiceString(voiceStr) # Filter out any invalid voices. if not self._isVoiceValid(voiceInfo.ID): continue voiceInfo.onecoreIndex=index voices[voiceInfo.ID] = voiceInfo return voices def _isVoiceValid(self,ID): """ Checks that the given voice actually exists and is valid. It checks the Registry, and also ensures that its data files actually exist on this machine. @param ID: the ID of the requested voice. @type ID: string @returns: True if the voice is valid, false otherwise. @rtype: boolean """ IDParts = ID.split('\\') rootKey = getattr(winreg, IDParts[0]) subkey = "\\".join(IDParts[1:]) try: hkey = winreg.OpenKey(rootKey, subkey) except WindowsError as e: log.debugWarning("Could not open registry key %s, %r" % (ID, e)) return False try: langDataPath = winreg.QueryValueEx(hkey, 'langDataPath') except WindowsError as e: log.debugWarning("Could not open registry value 'langDataPath', %r" % e) return False if not langDataPath or not isinstance(langDataPath[0], basestring): log.debugWarning("Invalid langDataPath value") return False if not os.path.isfile(os.path.expandvars(langDataPath[0])): log.debugWarning("Missing language data file: %s" % langDataPath[0]) return False try: voicePath = winreg.QueryValueEx(hkey, 'voicePath') except WindowsError as e: log.debugWarning("Could not open registry value 'langDataPath', %r" % e) return False if not voicePath or not isinstance(voicePath[0],basestring): log.debugWarning("Invalid voicePath value") return False if not os.path.isfile(os.path.expandvars(voicePath[0] + '.apm')): log.debugWarning("Missing voice file: %s" % voicePath[0] + ".apm") return False return True def _get_voice(self): return self._dll.ocSpeech_getCurrentVoiceId(self._handle) def _set_voice(self, ID): voices = self.availableVoices # Try setting the requested voice for voice in voices.itervalues(): if voice.ID == ID: self._dll.ocSpeech_setVoice(self._handle, voice.onecoreIndex) return raise LookupError("No such voice: %s"%ID) def _getDefaultVoice(self): """ Finds the best available voice that can be used as a default. It first tries finding a voice with the same language and country as the user's configured Windows language (E.g. en_AU), else one that matches just the language (E.g. en), else simply the first available. @returns: the ID of the voice, suitable for passing to self.voice for setting. @rtype: string """ voices = self.availableVoices # Try matching to NVDA language fullLanguage=languageHandler.getWindowsLanguage() for voice in voices.itervalues(): if voice.language==fullLanguage: return voice.ID baseLanguage=fullLanguage.split('_')[0] if baseLanguage!=fullLanguage: for voice in voices.itervalues(): if voice.language.startswith(baseLanguage): return voice.ID # Just use the first available for voice in voices.itervalues(): return voice.ID raise RuntimeError("No voices available") def _get_language(self): return self._dll.ocSpeech_getCurrentVoiceLanguage(self._handle) def pause(self, switch): if self._player: self._player.pause(switch)
class SynthDriver(synthDriverHandler.SynthDriver): supportedSettings=(SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), BooleanDriverSetting("rateBoost", _("Rate boos&t"), True), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), NumericDriverSetting("hsz", _("Head size"), False), NumericDriverSetting("rgh", _("Roughness"), False), NumericDriverSetting("bth", _("Breathiness"), False), BooleanDriverSetting("backquoteVoiceTags", _("Enable backquote voice &tags"), False), BooleanDriverSetting("ABRDICT", _("Enable &abbreviation dictionary"), False), BooleanDriverSetting("phrasePrediction", _("Enable phrase prediction"), False), BooleanDriverSetting("shortpause", _("&Shorten pauses"), False), BooleanDriverSetting("sendParams", _("Always Send Current Speech Settings (enable to prevent some tags from sticking, disable for viavoice binary compatibility)"), False)) supportedCommands = { IndexCommand, CharacterModeCommand, LangChangeCommand, BreakCommand, PitchCommand, RateCommand, VolumeCommand } supportedNotifications = {synthIndexReached, synthDoneSpeaking} description='IBMTTS' name='ibmeci' speakingLanguage="" @classmethod def check(cls): return _ibmeci.eciCheck() def __init__(self): _ibmeci.initialize(self._onIndexReached, self._onDoneSpeaking) # This information doesn't really need to be displayed, and makes IBMTTS unusable if the addon is not in the same drive as NVDA executable. # But display it only on debug mode in case of it can be useful log.debug("Using IBMTTS version %s" % _ibmeci.eciVersion()) lang = languageHandler.getLanguage() self.rate=50 self.speakingLanguage=lang self.variant="1" self.currentEncoding = "mbcs" PROSODY_ATTRS = { PitchCommand: ECIVoiceParam.eciPitchBaseline, VolumeCommand: ECIVoiceParam.eciVolume, RateCommand: ECIVoiceParam.eciSpeed, } def speak(self,speechSequence): last = None defaultLanguage=self.language outlist = [] charmode=False for item in speechSequence: if isinstance(item, string_types): s = self.processText(item) outlist.append((_ibmeci.speak, (s,))) last = s elif isinstance(item,IndexCommand): outlist.append((_ibmeci.index, (item.index,))) elif isinstance(item,LangChangeCommand): l=None if item.lang in langsAnnotations: l = langsAnnotations[item.lang] elif item.lang and item.lang[0:2] in langsAnnotations: l = langsAnnotations[item.lang[0:2]] if l: if item.lang != self.speakingLanguage and item.lang != self.speakingLanguage[0:2]: outlist.append((_ibmeci.speak, (l,))) self.speakingLanguage=item.lang self.updateEncoding(l) else: outlist.append((_ibmeci.speak, (langsAnnotations[defaultLanguage],))) self.speakingLanguage = defaultLanguage elif isinstance(item,CharacterModeCommand): outlist.append((_ibmeci.speak, (b"`ts1" if item.state else b"`ts0",))) if item.state: charmode=True elif isinstance(item,BreakCommand): # taken from eloquence_threshold (https://github.com/pumper42nickel/eloquence_threshold) # Eloquence doesn't respect delay time in milliseconds. # Therefore we need to adjust waiting time depending on current speech rate # The following table of adjustments has been measured empirically # Then we do linear approximation coefficients = { 10:1, 43:2, 60:3, 75:4, 85:5, } ck = sorted(coefficients.keys()) if self.rate <= ck[0]: factor = coefficients[ck[0]] elif self.rate >= ck[-1]: factor = coefficients[ck[-1]] elif self.rate in ck: factor = coefficients[self.rate] else: li = [index for index, r in enumerate(ck) if r<self.rate][-1] ri = li + 1 ra = ck[li] rb = ck[ri] factor = 1.0 * coefficients[ra] + (coefficients[rb] - coefficients[ra]) * (self.rate - ra) / (rb-ra) pFactor = factor*item.time pFactor = int(pFactor) outlist.append((_ibmeci.speak, (b' `p%d '%(pFactor),))) elif type(item) in self.PROSODY_ATTRS: val = max(0, min(item.newValue, 100)) if type(item) == RateCommand: val = self.percentToRate(val) outlist.append((_ibmeci.setProsodyParam, (self.PROSODY_ATTRS[type(item)], val))) else: log.error("Unknown speech: %s"%item) if last is not None and last[-1] not in punctuation: # check if a pitch command is at the end of the list, because p1 need to be send before this. # index -2 is because -1 always seem to be an index command. if outlist[-2][0] == _ibmeci.setProsodyParam: outlist.insert(-2, (_ibmeci.speak, (b'`p1 ',))) else: outlist.append((_ibmeci.speak, (b'`p1 ',))) if charmode: outlist.append((_ibmeci.speak, (b"`ts0",))) outlist.append((_ibmeci.setEndStringMark, ())) outlist.append((_ibmeci.synth, ())) _ibmeci.eciQueue.put(outlist) _ibmeci.process() def processText(self,text): #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. text = text.encode(self.currentEncoding, 'replace') # special unicode symbols may encode to backquote. For this reason, backquote processing is after this. text = text.rstrip() if _ibmeci.params[9] in (65536, 65537, 393216, 655360, 720897): text = resub(english_fixes, text) #Applies to all languages with dual language support. if _ibmeci.params[9] in (65536, 65537, 393216, 655360, 720897) and _ibmeci.isIBM: text = resub(english_ibm_fixes, text) if _ibmeci.params[9] in (131072, 131073) and not _ibmeci.isIBM: text = resub(spanish_fixes, text) if _ibmeci.params[9] in ('esp', 131072) and _ibmeci.isIBM: text = resub(spanish_ibm_fixes, text) if _ibmeci.params[9] in (196609, 196608): text = text.replace(br'quil', br'qil') #Sometimes this string make everything buggy with IBMTTS in French if _ibmeci.params[9] in ('deu', 262144): text = resub(german_fixes, text) if _ibmeci.params[9] in ('ptb', 458752) and _ibmeci.isIBM: text = resub(portuguese_ibm_fixes, text) if not self._backquoteVoiceTags: text=text.replace(b'`', b' ') # no embedded commands if self._shortpause: text = pause_re.sub(br'\1 `p1\2\3\4', text) # this enforces short, JAWS-like pauses. if not _ibmeci.isIBM: text = time_re.sub(br'\1:\2 \3', text) # apparently if this isn't done strings like 2:30:15 will only announce 2:30 embeds=b'' if self._ABRDICT: embeds+=b"`da1 " else: embeds+=b"`da0 " if self._phrasePrediction: embeds+=b"`pp1 " else: embeds+=b"`pp0 " if self._sendParams: embeds+=b"`vv%d `vs%d " % (_ibmeci.getVParam(ECIVoiceParam.eciVolume), _ibmeci.getVParam(ECIVoiceParam.eciSpeed)) text = b"%s %s" % (embeds.rstrip(), text) # bring all the printf stuff into one call, in one string. This avoids all the concatonation and printf additions of the previous organization. return text def pause(self,switch): _ibmeci.pause(switch) def terminate(self): _ibmeci.terminate() _backquoteVoiceTags=False _ABRDICT=False _phrasePrediction=False _shortpause=False _sendParams=True def _get_backquoteVoiceTags(self): return self._backquoteVoiceTags def _set_backquoteVoiceTags(self, enable): if enable == self._backquoteVoiceTags: return self._backquoteVoiceTags = enable def _get_ABRDICT(self): return self._ABRDICT def _set_ABRDICT(self, enable): if enable == self._ABRDICT: return self._ABRDICT = enable def _get_phrasePrediction(self): return self._phrasePrediction def _set_phrasePrediction(self, enable): if enable == self._phrasePrediction: return self._phrasePrediction = enable def _get_shortpause(self): return self._shortpause def _set_shortpause(self, enable): if enable == self._shortpause: return self._shortpause = enable def _get_sendParams(self): return self._sendParams def _set_sendParams(self, enable): if enable == self._sendParams: return self._sendParams = enable _rateBoost = False RATE_BOOST_MULTIPLIER = 1.6 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable != self._rateBoost: rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val = _ibmeci.getVParam(ECIVoiceParam.eciSpeed) if self._rateBoost: val=int(round(val/self.RATE_BOOST_MULTIPLIER)) return self._paramToPercent(val, minRate, maxRate) def percentToRate(self, val): val = self._percentToParam(val, minRate, maxRate) if self._rateBoost: val = int(round(val *self.RATE_BOOST_MULTIPLIER)) return val def _set_rate(self,val): val = self.percentToRate(val) self._rate = val _ibmeci.setVParam(ECIVoiceParam.eciSpeed, val) def _get_pitch(self): return _ibmeci.getVParam(ECIVoiceParam.eciPitchBaseline) def _set_pitch(self,vl): _ibmeci.setVParam(ECIVoiceParam.eciPitchBaseline,vl) def _get_volume(self): return _ibmeci.getVParam(ECIVoiceParam.eciVolume) def _set_volume(self,vl): _ibmeci.setVParam(ECIVoiceParam.eciVolume,int(vl)) def _set_inflection(self,vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciPitchFluctuation,vl) def _get_inflection(self): return _ibmeci.getVParam(ECIVoiceParam.eciPitchFluctuation) def _set_hsz(self,vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciHeadSize,vl) def _get_hsz(self): return _ibmeci.getVParam(ECIVoiceParam.eciHeadSize) def _set_rgh(self,vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciRoughness,vl) def _get_rgh(self): return _ibmeci.getVParam(ECIVoiceParam.eciRoughness) def _set_bth(self,vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciBreathiness,vl) def _get_bth(self): return _ibmeci.getVParam(ECIVoiceParam.eciBreathiness) def _getAvailableVoices(self): o = OrderedDict() for name in os.listdir(_ibmeci.ttsPath): if name.lower().endswith('.syn'): info = _ibmeci.langs[name.lower()[:3]] o[str(info[0])] = VoiceInfo(str(info[0]), info[1], info[2]) return o def _get_voice(self): return str(_ibmeci.params[_ibmeci.ECIParam.eciLanguageDialect]) def _set_voice(self,vl): _ibmeci.setVoice(int(vl)) self.updateEncoding(int(vl)) def updateEncoding(self, lang): # lang must be a number asociated with IBMTTS languages or a string with an annotation language. # currently we don't need to consider the decimal part for the conversion. if isinstance(lang, bytes): lang = int(float(lang[2:])) * 65536 #chinese if lang == 393216: self.currentEncoding = "gb2312" # japan elif lang == 524288: self.currentEncoding = "cp932" # korean elif lang == 655360: self.currentEncoding = "cp949" elif lang == 720897: self.currentEncoding = "big5" else: self.currentEncoding = "mbcs" def _get_lastIndex(self): #fix? return _ibmeci.lastindex def cancel(self): _ibmeci.stop() def _getAvailableVariants(self): global variants return OrderedDict((str(id), synthDriverHandler.VoiceInfo(str(id), name)) for id, name in variants.items()) def _set_variant(self, v): global variants self._variant = v if int(v) in variants else "1" _ibmeci.setVariant(int(v)) _ibmeci.setVParam(ECIVoiceParam.eciSpeed, self._rate) #if 'ibmtts' in config.conf['speech']: #config.conf['speech']['ibmtts']['pitch'] = self.pitch def _get_variant(self): return self._variant def _onIndexReached(self, index): synthIndexReached.notify(synth=self, index=index) def _onDoneSpeaking(self): synthDoneSpeaking.notify(synth=self)
class SynthDriver(SynthDriver): supportedSettings=(SynthDriver.VoiceSetting(),SynthDriver.RateSetting(),SynthDriver.PitchSetting(),SynthDriver.VolumeSetting()) supportedCommands = { IndexCommand, CharacterModeCommand, LangChangeCommand, BreakCommand, PitchCommand, RateCommand, VolumeCommand, PhonemeCommand, } supportedNotifications = {synthIndexReached, synthDoneSpeaking} COM_CLASS = "SAPI.SPVoice" name="sapi5" description="Microsoft Speech API version 5" @classmethod def check(cls): try: r=winreg.OpenKey(winreg.HKEY_CLASSES_ROOT,cls.COM_CLASS) r.Close() return True except: return False ttsAudioStream=None #: Holds the ISPAudio interface for the current voice, to aid in stopping and pausing audio _audioDucker: Optional[audioDucking.AudioDucker] = None def __init__(self,_defaultVoiceToken=None): """ @param _defaultVoiceToken: an optional sapi voice token which should be used as the default voice (only useful for subclasses) @type _defaultVoiceToken: ISpeechObjectToken """ if audioDucking.isAudioDuckingSupported(): self._audioDucker = audioDucking.AudioDucker() self._pitch=50 self._initTts(_defaultVoiceToken) def terminate(self): self._eventsConnection = None self.tts = None def _getAvailableVoices(self): voices=OrderedDict() v=self._getVoiceTokens() # #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators. # Therefore, fetch the items by index, as that method explicitly returns the correct interface. for i in range(len(v)): try: ID=v[i].Id name=v[i].GetDescription() try: language=locale.windows_locale[int(v[i].getattribute('language').split(';')[0],16)] except KeyError: language=None except COMError: log.warning("Could not get the voice info. Skipping...") voices[ID]=VoiceInfo(ID,name,language) return voices def _getVoiceTokens(self): """Provides a collection of sapi5 voice tokens. Can be overridden by subclasses if tokens should be looked for in some other registry location.""" return self.tts.getVoices() def _get_rate(self): return (self.tts.rate*5)+50 def _get_pitch(self): return self._pitch def _get_volume(self): return self.tts.volume def _get_voice(self): return self.tts.voice.Id def _get_lastIndex(self): bookmark=self.tts.status.LastBookmark if bookmark!="" and bookmark is not None: return int(bookmark) else: return None def _percentToRate(self, percent): return (percent - 50) // 5 def _set_rate(self,rate): self.tts.Rate = self._percentToRate(rate) def _set_pitch(self,value): #pitch is really controled with xml around speak commands self._pitch=value def _set_volume(self,value): self.tts.Volume = value def _initTts(self, voice=None): self.tts=comtypes.client.CreateObject(self.COM_CLASS) if voice: # #749: It seems that SAPI 5 doesn't reset the audio parameters when the voice is changed, # but only when the audio output is changed. # Therefore, set the voice before setting the audio output. # Otherwise, we will get poor speech quality in some cases. self.tts.voice = voice outputDeviceID=nvwave.outputDeviceNameToID(config.conf["speech"]["outputDevice"], True) if outputDeviceID>=0: self.tts.audioOutput=self.tts.getAudioOutputs()[outputDeviceID] self._eventsConnection = comtypes.client.GetEvents(self.tts, SapiSink(weakref.ref(self))) self.tts.EventInterests = ( SpeechVoiceEvents.StartInputStream | SpeechVoiceEvents.Bookmark | SpeechVoiceEvents.EndInputStream ) from comInterfaces.SpeechLib import ISpAudio try: self.ttsAudioStream=self.tts.audioOutputStream.QueryInterface(ISpAudio) except COMError: log.debugWarning("SAPI5 voice does not support ISPAudio") self.ttsAudioStream=None def _set_voice(self,value): tokens = self._getVoiceTokens() # #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators. # Therefore, fetch the items by index, as that method explicitly returns the correct interface. for i in range(len(tokens)): voice=tokens[i] if value==voice.Id: break else: # Voice not found. return self._initTts(voice=voice) def _percentToPitch(self, percent): return percent // 2 - 25 IPA_TO_SAPI = { u"θ": u"th", u"s": u"s", } def _convertPhoneme(self, ipa): # We only know about US English phonemes. # Rather than just ignoring unknown phonemes, SAPI throws an exception. # Therefore, don't bother with any other language. if self.tts.voice.GetAttribute("language") != "409": raise LookupError("No data for this language") out = [] outAfter = None for ipaChar in ipa: if ipaChar == u"ˈ": outAfter = u"1" continue out.append(self.IPA_TO_SAPI[ipaChar]) if outAfter: out.append(outAfter) outAfter = None if outAfter: out.append(outAfter) return u" ".join(out) def speak(self, speechSequence): textList = [] # NVDA SpeechCommands are linear, but XML is hierarchical. # Therefore, we track values for non-empty tags. # When a tag changes, we close all previously opened tags and open new ones. tags = {} # We have to use something mutable here because it needs to be changed by the inner function. tagsChanged = [True] openedTags = [] def outputTags(): if not tagsChanged[0]: return for tag in reversed(openedTags): textList.append("</%s>" % tag) del openedTags[:] for tag, attrs in tags.items(): textList.append("<%s" % tag) for attr, val in attrs.items(): textList.append(' %s="%s"' % (attr, val)) textList.append(">") openedTags.append(tag) tagsChanged[0] = False pitch = self._pitch # Pitch must always be specified in the markup. tags["pitch"] = {"absmiddle": self._percentToPitch(pitch)} rate = self.rate volume = self.volume for item in speechSequence: if isinstance(item, str): outputTags() textList.append(item.replace("<", "<")) elif isinstance(item, IndexCommand): textList.append('<Bookmark Mark="%d" />' % item.index) elif isinstance(item, CharacterModeCommand): if item.state: tags["spell"] = {} else: try: del tags["spell"] except KeyError: pass tagsChanged[0] = True elif isinstance(item, BreakCommand): textList.append('<silence msec="%d" />' % item.time) elif isinstance(item, PitchCommand): tags["pitch"] = {"absmiddle": self._percentToPitch(int(pitch * item.multiplier))} tagsChanged[0] = True elif isinstance(item, VolumeCommand): if item.multiplier == 1: try: del tags["volume"] except KeyError: pass else: tags["volume"] = {"level": int(volume * item.multiplier)} tagsChanged[0] = True elif isinstance(item, RateCommand): if item.multiplier == 1: try: del tags["rate"] except KeyError: pass else: tags["rate"] = {"absspeed": self._percentToRate(int(rate * item.multiplier))} tagsChanged[0] = True elif isinstance(item, PhonemeCommand): try: textList.append(u'<pron sym="%s">%s</pron>' % (self._convertPhoneme(item.ipa), item.text or u"")) except LookupError: log.debugWarning("Couldn't convert character in IPA string: %s" % item.ipa) if item.text: textList.append(item.text) elif isinstance(item, SpeechCommand): log.debugWarning("Unsupported speech command: %s" % item) else: log.error("Unknown speech: %s" % item) # Close any tags that are still open. tags.clear() tagsChanged[0] = True outputTags() text = "".join(textList) flags = SpeechVoiceSpeakFlags.IsXML | SpeechVoiceSpeakFlags.Async # Ducking should be complete before the synth starts producing audio. # For this to happen, the speech method must block until ducking is complete. # Ducking should be disabled when the synth is finished producing audio. # Note that there may be calls to speak with a string that results in no audio, # it is important that in this case the audio does not get stuck ducked. # When there is no audio produced the startStream and endStream handlers are not called. # To prevent audio getting stuck ducked, it is unducked at the end of speech. # There are some known issues: # - When there is no audio produced by the synth, a user may notice volume lowering (ducking) temporarily. # - If the call to startStream handler is delayed significantly, users may notice a variation in volume # (as ducking is disabled at the end of speak, and re-enabled when the startStream handler is called) # A note on the synchronicity of components of this approach: # SAPISink.StartStream event handler (callback): # the synth speech is not blocked by this event callback. # SAPISink.EndStream event handler (callback): # assumed also to be async but not confirmed. Synchronicity is irrelevant to the current approach. # AudioDucker.disable returns before the audio is completely unducked. # AudioDucker.enable() ducking will complete before the function returns. # It is not possible to "double duck the audio", calling twice yields the same result as calling once. # AudioDucker class instances count the number of enables/disables, # in order to unduck there must be no remaining enabled audio ducker instances. # Due to this a temporary audio ducker is used around the call to speak. # SAPISink.StartStream: Ducking here may allow the early speech to start before ducking is completed. if audioDucking.isAudioDuckingSupported(): tempAudioDucker = audioDucking.AudioDucker() else: tempAudioDucker = None if tempAudioDucker: if audioDucking._isDebug(): log.debug("Enabling audio ducking due to speak call") tempAudioDucker.enable() try: self.tts.Speak(text, flags) finally: if tempAudioDucker: if audioDucking._isDebug(): log.debug("Disabling audio ducking after speak call") tempAudioDucker.disable() def cancel(self): # SAPI5's default means of stopping speech can sometimes lag at end of speech, especially with Win8 / Win 10 Microsoft Voices. # Therefore instruct the underlying audio interface to stop first, before interupting and purging any remaining speech. if self.ttsAudioStream: self.ttsAudioStream.setState(SPAudioState.STOP, 0) self.tts.Speak(None, SpeechVoiceSpeakFlags.Async | SpeechVoiceSpeakFlags.PurgeBeforeSpeak) if self._audioDucker: if audioDucking._isDebug(): log.debug("Disabling audio ducking due to setting output audio state to stop") self._audioDucker.disable() def pause(self, switch: bool): # SAPI5's default means of pausing in most cases is either extremely slow # (e.g. takes more than half a second) or does not work at all. # Therefore instruct the underlying audio interface to pause instead. if self.ttsAudioStream: oldState = self.ttsAudioStream.GetStatus().State if switch and oldState == SPAudioState.RUN: # pausing if self._audioDucker: if audioDucking._isDebug(): log.debug("Disabling audio ducking due to setting output audio state to pause") self._audioDucker.disable() self.ttsAudioStream.setState(SPAudioState.PAUSE, 0) elif not switch and oldState == SPAudioState.PAUSE: # unpausing if self._audioDucker: if audioDucking._isDebug(): log.debug("Enabling audio ducking due to setting output audio state to run") self._audioDucker.enable() self.ttsAudioStream.setState(SPAudioState.RUN, 0)
class SynthDriver(SynthDriver): supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.RateSetting(), SynthDriver.PitchSetting(), SynthDriver.VolumeSetting()) supportedCommands = { speech.IndexCommand, speech.CharacterModeCommand, speech.LangChangeCommand, speech.BreakCommand, speech.PitchCommand, speech.RateCommand, speech.VolumeCommand, speech.PhonemeCommand, } supportedNotifications = {synthIndexReached, synthDoneSpeaking} COM_CLASS = "SAPI.SPVoice" name = "dual_sapi5" description = "Dual voice using Speech API version 5" @classmethod def check(cls): try: r = winreg.OpenKey(winreg.HKEY_CLASSES_ROOT, cls.COM_CLASS) r.Close() return True except: return False ttsAudioStream = None #: Holds the ISPAudio interface for the current voice, to aid in stopping and pausing audio def __init__(self, _defaultVoiceToken=None): """ @param _defaultVoiceToken: an optional sapi voice token which should be used as the default voice (only useful for subclasses) @type _defaultVoiceToken: ISpeechObjectToken """ ensureWaveOutHooks() self._pitch = 50 self._initTts(_defaultVoiceToken) confspec = { "sapi5SecondVoice": "string(default='')", "sapi5SecondRate": "integer(default=50)", "sapi5SecondPitch": "integer(default=50)", "sapi5SecondVolume": "integer(default=100)", "sapi5SecondIsLatin": "boolean(default=False)", "sapi5NonLatinPriority": "boolean(default=False)", "sapi5ConsiderContext": "boolean(default=False)", } config.conf.spec["dual_voice"] = confspec _realtime.sapi5SecondVoice = config.conf["dual_voice"][ "sapi5SecondVoice"] _realtime.sapi5SecondRate = config.conf["dual_voice"][ "sapi5SecondRate"] _realtime.sapi5SecondPitch = config.conf["dual_voice"][ "sapi5SecondPitch"] _realtime.sapi5SecondVolume = config.conf["dual_voice"][ "sapi5SecondVolume"] _realtime.sapi5SecondIsLatin = config.conf["dual_voice"][ "sapi5SecondIsLatin"] _realtime.sapi5NonLatinPriority = config.conf["dual_voice"][ "sapi5NonLatinPriority"] _realtime.sapi5ConsiderContext = config.conf["dual_voice"][ "sapi5ConsiderContext"] _realtime.primaryVoiceID = _defaultVoiceToken _realtime.problemisticPrimaryVoiceID = '' def terminate(self): self._eventsConnection = None self.tts = None def _getAvailableVoices(self): voices = OrderedDict() v = self._getVoiceTokens() # #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators. # Therefore, fetch the items by index, as that method explicitly returns the correct interface. for i in range(len(v)): try: ID = v[i].Id name = v[i].GetDescription() try: language = locale.windows_locale[int( v[i].getattribute('language').split(';')[0], 16)] except KeyError: language = None # Extract the name Attribute of each voice which could be used in SAPI5 XML for voice selection. voiceAttribName = v[i].getattribute('name') except COMError: log.warning("Could not get the voice info. Skipping...") voices[ID] = VoiceInfo(ID, name, language) if voiceAttribName in _realtime.list_VoiceAttribName: log.warning( name + ' do not has the required Name attribute in the registry. Hence it could not be used as the secondary voice.' ) else: _realtime.list_VoiceAttribName.append(voiceAttribName) _realtime.list_VoiceID.append(ID) _realtime.list_VoiceName.append(name) _realtime.list_VoiceLang.append(language) return voices def _getVoiceTokens(self): """Provides a collection of sapi5 voice tokens. Can be overridden by subclasses if tokens should be looked for in some other registry location.""" return self.tts.getVoices() def _get_rate(self): return (self.tts.rate * 5) + 50 def _get_pitch(self): return self._pitch def _get_volume(self): # return self.tts.volume return _realtime.sapi5FirstVolume def _get_voice(self): return self.tts.voice.Id def _get_lastIndex(self): bookmark = self.tts.status.LastBookmark if bookmark != "" and bookmark is not None: return int(bookmark) else: return None def _percentToRate(self, percent): return (percent - 50) // 5 def _set_rate(self, rate): self.tts.Rate = self._percentToRate(rate) def _set_pitch(self, value): #pitch is really controled with xml around speak commands self._pitch = value def _set_volume(self, value): # self.tts.Volume = value _realtime.sapi5FirstVolume = value def _initTts(self, voice=None): self.tts = comtypes.client.CreateObject(self.COM_CLASS) if voice: # #749: It seems that SAPI 5 doesn't reset the audio parameters when the voice is changed, # but only when the audio output is changed. # Therefore, set the voice before setting the audio output. # Otherwise, we will get poor speech quality in some cases. self.tts.voice = voice outputDeviceID = nvwave.outputDeviceNameToID( config.conf["speech"]["outputDevice"], True) if outputDeviceID >= 0: self.tts.audioOutput = self.tts.getAudioOutputs()[outputDeviceID] self._eventsConnection = comtypes.client.GetEvents( self.tts, SapiSink(weakref.ref(self))) self.tts.EventInterests = constants.SVEBookmark | constants.SVEEndInputStream from comInterfaces.SpeechLib import ISpAudio try: self.ttsAudioStream = self.tts.audioOutputStream.QueryInterface( ISpAudio) except COMError: log.debugWarning("SAPI5 voice does not support ISPAudio") self.ttsAudioStream = None def _set_voice(self, value): tokens = self._getVoiceTokens() # #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators. # Therefore, fetch the items by index, as that method explicitly returns the correct interface. for i in range(len(tokens)): voice = tokens[i] if value == voice.Id: break else: # Voice not found. return self._initTts(voice=voice) _realtime.primaryVoiceID = voice.Id _realtime.problemisticPrimaryVoiceID = '' def _percentToPitch(self, percent): return percent // 2 - 25 IPA_TO_SAPI = { u"θ": u"th", u"s": u"s", } def _convertPhoneme(self, ipa): # We only know about US English phonemes. # Rather than just ignoring unknown phonemes, SAPI throws an exception. # Therefore, don't bother with any other language. if self.tts.voice.GetAttribute("language") != "409": raise LookupError("No data for this language") out = [] outAfter = None for ipaChar in ipa: if ipaChar == u"ˈ": outAfter = u"1" continue out.append(self.IPA_TO_SAPI[ipaChar]) if outAfter: out.append(outAfter) outAfter = None if outAfter: out.append(outAfter) return u" ".join(out) def _speak(self, speechSequence): textList = [] # NVDA SpeechCommands are linear, but XML is hierarchical. # Therefore, we track values for non-empty tags. # When a tag changes, we close all previously opened tags and open new ones. tags = {} # We have to use something mutable here because it needs to be changed by the inner function. tagsChanged = [True] openedTags = [] def outputTags(): if not tagsChanged[0]: return for tag in reversed(openedTags): textList.append("</%s>" % tag) del openedTags[:] for tag, attrs in tags.items(): textList.append("<%s" % tag) for attr, val in attrs.items(): textList.append(' %s="%s"' % (attr, val)) textList.append(">") openedTags.append(tag) tagsChanged[0] = False pitch = self._pitch # Pitch must always be specified in the markup. tags["pitch"] = {"absmiddle": self._percentToPitch(pitch)} rate = self.rate # volume = self.volume volume = _realtime.sapi5FirstVolume for item in speechSequence: if isinstance(item, str): outputTags() #item = item.replace("1", "Yek") # Mahmood Taghavi item = item.replace("<", "<") #item = item + '<voice required="Name=Microsoft Anna"> Mahmood Taghavi </voice>' item = _dual_sapi5.nlp(text=item) # Mahmood Taghavi textList.append(item) #textList.append(item.replace("<", "<")) elif isinstance(item, speech.IndexCommand): textList.append('<Bookmark Mark="%d" />' % item.index) elif isinstance(item, speech.CharacterModeCommand): if item.state: tags["spell"] = {} else: try: del tags["spell"] except KeyError: pass tagsChanged[0] = True elif isinstance(item, speech.BreakCommand): textList.append('<silence msec="%d" />' % item.time) elif isinstance(item, speech.PitchCommand): tags["pitch"] = { "absmiddle": self._percentToPitch(int(pitch * item.multiplier)) } tagsChanged[0] = True elif isinstance(item, speech.VolumeCommand): if item.multiplier == 1: try: del tags["volume"] except KeyError: pass else: tags["volume"] = {"level": int(volume * item.multiplier)} tagsChanged[0] = True elif isinstance(item, speech.RateCommand): if item.multiplier == 1: try: del tags["rate"] except KeyError: pass else: tags["rate"] = { "absspeed": self._percentToRate(int(rate * item.multiplier)) } tagsChanged[0] = True elif isinstance(item, speech.PhonemeCommand): try: textList.append( u'<pron sym="%s">%s</pron>' % (self._convertPhoneme(item.ipa), item.text or u"")) except LookupError: log.debugWarning( "Couldn't convert character in IPA string: %s" % item.ipa) if item.text: textList.append(item.text) elif isinstance(item, speech.SpeechCommand): log.debugWarning("Unsupported speech command: %s" % item) else: log.error("Unknown speech: %s" % item) # Close any tags that are still open. tags.clear() tagsChanged[0] = True outputTags() text = "".join(textList) flags = constants.SVSFIsXML | constants.SVSFlagsAsync self.tts.Speak(text, flags) def speak(self, speechSequence): try: self._speak(speechSequence) except: if (_realtime.problemisticPrimaryVoiceID == _realtime.primaryVoiceID) and ( _realtime.problemisticSapi5SecondVoice == _realtime.sapi5SecondVoice): log.error( 'Dual Voice add-on: Fatal error! It seems the selected voices and the computer default voice have problems. So at least select another voice as the computer default voice!' ) speech.setSynth('espeak') else: _realtime.problemisticSapi5SecondVoice = _realtime.sapi5SecondVoice _realtime.problemisticPrimaryVoiceID = _realtime.primaryVoiceID try: # Possible solution 1: find the primary voice and use it also as the secondary voice. index = _realtime.list_VoiceID.index( _realtime.primaryVoiceID) voiceAttribName = _realtime.list_VoiceAttribName[index] log.warning( 'Dual Voice add-on: Error in at least one of the selected SAPI 5 voices has been occured! The primary voice was (' + voiceAttribName + ') and the secondary voice was (' + _realtime.sapi5SecondVoice + ')') log.warning( 'Dual Voice add-on: Try possible solution 1! Use the primary voice (' + voiceAttribName + ') in place of the possible problematic secondary voice (' + _realtime.sapi5SecondVoice + ').') _realtime.tempStringVar = _realtime.sapi5SecondVoice _realtime.sapi5SecondVoice = voiceAttribName #config.conf["dual_voice"]["sapi5SecondVoice"] = _realtime.sapi5SecondVoice self._speak(speechSequence) except: # Possible solution 2: find the default voice and use it as the primary voice. _realtime.sapi5SecondVoice = _realtime.tempStringVar #config.conf["dual_voice"]["sapi5SecondVoice"] = _realtime.sapi5SecondVoice log.warning( 'Dual Voice add-on: The possible solution 1 was failed! Hence the selected secondary voice was restored.' ) log.warning( 'Dual Voice add-on: Try possible solution 2! Use the computer default voice (' + _realtime.list_VoiceAttribName[0] + ') in place of the possible problematic primary voice (' + voiceAttribName + ').') tokens = self._getVoiceTokens() voice = tokens[0] self._initTts(voice=voice) _realtime.primaryVoiceID = voice.Id self._speak(speechSequence) def cancel(self): # SAPI5's default means of stopping speech can sometimes lag at end of speech, especially with Win8 / Win 10 Microsoft Voices. # Therefore instruct the underlying audio interface to stop first, before interupting and purging any remaining speech. if self.ttsAudioStream: self.ttsAudioStream.setState(SPAS_STOP, 0) self.tts.Speak(None, 1 | constants.SVSFPurgeBeforeSpeak) def pause(self, switch): # SAPI5's default means of pausing in most cases is either extrmemely slow (e.g. takes more than half a second) or does not work at all. # Therefore instruct the underlying audio interface to pause instead. if self.ttsAudioStream: self.ttsAudioStream.setState(SPAS_PAUSE if switch else SPAS_RUN, 0)
class SynthDriver(SynthDriver): supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.RateSetting(), SynthDriver.PitchSetting(), SynthDriver.VolumeSetting()) COM_CLASS = "SAPI.SPVoice" name = "sapi5" description = "Microsoft Speech API version 5" @classmethod def check(cls): try: r = _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, cls.COM_CLASS) r.Close() return True except: return False def __init__(self, _defaultVoiceToken=None): """ @param _defaultVoiceToken: an optional sapi voice token which should be used as the default voice (only useful for subclasses) @type _defaultVoiceToken: ISpeechObjectToken """ self._pitch = 50 self._initTts(_defaultVoiceToken) def terminate(self): del self.tts def _getAvailableVoices(self): voices = OrderedDict() v = self._getVoiceTokens() # #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators. # Therefore, fetch the items by index, as that method explicitly returns the correct interface. for i in xrange(len(v)): try: ID = v[i].Id name = v[i].GetDescription() try: language = locale.windows_locale[int( v[i].getattribute('language').split(';')[0], 16)] except KeyError: language = None except COMError: log.warning("Could not get the voice info. Skipping...") voices[ID] = VoiceInfo(ID, name, language) return voices def _getVoiceTokens(self): """Provides a collection of sapi5 voice tokens. Can be overridden by subclasses if tokens should be looked for in some other registry location.""" return self.tts.getVoices() def _get_rate(self): return (self.tts.rate * 5) + 50 def _get_pitch(self): return self._pitch def _get_volume(self): return self.tts.volume def _get_voice(self): return self.tts.voice.Id def _get_lastIndex(self): bookmark = self.tts.status.LastBookmark if bookmark != "" and bookmark is not None: return int(bookmark) else: return None def _set_rate(self, rate): self.tts.Rate = (rate - 50) / 5 def _set_pitch(self, value): #pitch is really controled with xml around speak commands self._pitch = value def _set_volume(self, value): self.tts.Volume = value def _initTts(self, voice=None): self.tts = comtypes.client.CreateObject(self.COM_CLASS) if voice: # #749: It seems that SAPI 5 doesn't reset the audio parameters when the voice is changed, # but only when the audio output is changed. # Therefore, set the voice before setting the audio output. # Otherwise, we will get poor speech quality in some cases. self.tts.voice = voice outputDeviceID = nvwave.outputDeviceNameToID( config.conf["speech"]["outputDevice"], True) if outputDeviceID >= 0: self.tts.audioOutput = self.tts.getAudioOutputs()[outputDeviceID] def _set_voice(self, value): tokens = self._getVoiceTokens() # #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators. # Therefore, fetch the items by index, as that method explicitly returns the correct interface. for i in xrange(len(tokens)): voice = tokens[i] if value == voice.Id: break else: # Voice not found. return self._initTts(voice=voice) def speak(self, speechSequence): textList = [] for item in speechSequence: if isinstance(item, basestring): textList.append(item.replace("<", "<")) elif isinstance(item, speech.IndexCommand): textList.append("<Bookmark Mark=\"%d\" />" % item.index) elif isinstance(item, speech.CharacterModeCommand): textList.append("<spell>" if item.state else "</spell>") elif isinstance(item, speech.SpeechCommand): log.debugWarning("Unsupported speech command: %s" % item) else: log.error("Unknown speech: %s" % item) text = "".join(textList) #Pitch must always be hardcoded pitch = (self._pitch / 2) - 25 text = "<pitch absmiddle=\"%s\">%s</pitch>" % (pitch, text) flags = constants.SVSFIsXML | constants.SVSFlagsAsync self.tts.Speak(text, flags) def cancel(self): #if self.tts.Status.RunningState == 2: self.tts.Speak(None, 1 | constants.SVSFPurgeBeforeSpeak) def pause(self, switch): if switch: self.cancel()