class SynthDriver(synthDriverHandler.SynthDriver): supportedSettings=(SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), BooleanDriverSetting("rateBoost", _("Rate boos&t"), True), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), NumericDriverSetting("hsz", _("Head size"), False), NumericDriverSetting("rgh", _("Roughness"), False), NumericDriverSetting("bth", _("Breathiness"), False), BooleanDriverSetting("backquoteVoiceTags", _("Enable backquote voice &tags"), False), BooleanDriverSetting("ABRDICT", _("Enable &abbreviation dictionary"), False), BooleanDriverSetting("phrasePrediction", _("Enable phrase prediction"), False), BooleanDriverSetting("shortpause", _("&Shorten pauses"), False), BooleanDriverSetting("sendParams", _("Always Send Current Speech Settings (enable to prevent some tags from sticking, disable for viavoice binary compatibility)"), False)) supportedCommands = { IndexCommand, CharacterModeCommand, LangChangeCommand, BreakCommand, PitchCommand, RateCommand, VolumeCommand } supportedNotifications = {synthIndexReached, synthDoneSpeaking} description='IBMTTS' name='ibmeci' speakingLanguage="" @classmethod def check(cls): return _ibmeci.eciCheck() def __init__(self): _ibmeci.initialize(self._onIndexReached, self._onDoneSpeaking) # This information doesn't really need to be displayed, and makes IBMTTS unusable if the addon is not in the same drive as NVDA executable. # But display it only on debug mode in case of it can be useful log.debug("Using IBMTTS version %s" % _ibmeci.eciVersion()) lang = languageHandler.getLanguage() self.rate=50 self.speakingLanguage=lang self.variant="1" self.currentEncoding = "mbcs" PROSODY_ATTRS = { PitchCommand: ECIVoiceParam.eciPitchBaseline, VolumeCommand: ECIVoiceParam.eciVolume, RateCommand: ECIVoiceParam.eciSpeed, } def speak(self,speechSequence): last = None defaultLanguage=self.language outlist = [] charmode=False for item in speechSequence: if isinstance(item, string_types): s = self.processText(item) outlist.append((_ibmeci.speak, (s,))) last = s elif isinstance(item,IndexCommand): outlist.append((_ibmeci.index, (item.index,))) elif isinstance(item,LangChangeCommand): l=None if item.lang in langsAnnotations: l = langsAnnotations[item.lang] elif item.lang and item.lang[0:2] in langsAnnotations: l = langsAnnotations[item.lang[0:2]] if l: if item.lang != self.speakingLanguage and item.lang != self.speakingLanguage[0:2]: outlist.append((_ibmeci.speak, (l,))) self.speakingLanguage=item.lang self.updateEncoding(l) else: outlist.append((_ibmeci.speak, (langsAnnotations[defaultLanguage],))) self.speakingLanguage = defaultLanguage elif isinstance(item,CharacterModeCommand): outlist.append((_ibmeci.speak, (b"`ts1" if item.state else b"`ts0",))) if item.state: charmode=True elif isinstance(item,BreakCommand): # taken from eloquence_threshold (https://github.com/pumper42nickel/eloquence_threshold) # Eloquence doesn't respect delay time in milliseconds. # Therefore we need to adjust waiting time depending on current speech rate # The following table of adjustments has been measured empirically # Then we do linear approximation coefficients = { 10:1, 43:2, 60:3, 75:4, 85:5, } ck = sorted(coefficients.keys()) if self.rate <= ck[0]: factor = coefficients[ck[0]] elif self.rate >= ck[-1]: factor = coefficients[ck[-1]] elif self.rate in ck: factor = coefficients[self.rate] else: li = [index for index, r in enumerate(ck) if r<self.rate][-1] ri = li + 1 ra = ck[li] rb = ck[ri] factor = 1.0 * coefficients[ra] + (coefficients[rb] - coefficients[ra]) * (self.rate - ra) / (rb-ra) pFactor = factor*item.time pFactor = int(pFactor) outlist.append((_ibmeci.speak, (b' `p%d '%(pFactor),))) elif type(item) in self.PROSODY_ATTRS: val = max(0, min(item.newValue, 100)) if type(item) == RateCommand: val = self.percentToRate(val) outlist.append((_ibmeci.setProsodyParam, (self.PROSODY_ATTRS[type(item)], val))) else: log.error("Unknown speech: %s"%item) if last is not None and last[-1] not in punctuation: # check if a pitch command is at the end of the list, because p1 need to be send before this. # index -2 is because -1 always seem to be an index command. if outlist[-2][0] == _ibmeci.setProsodyParam: outlist.insert(-2, (_ibmeci.speak, (b'`p1 ',))) else: outlist.append((_ibmeci.speak, (b'`p1 ',))) if charmode: outlist.append((_ibmeci.speak, (b"`ts0",))) outlist.append((_ibmeci.setEndStringMark, ())) outlist.append((_ibmeci.synth, ())) _ibmeci.eciQueue.put(outlist) _ibmeci.process() def processText(self,text): #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. text = text.encode(self.currentEncoding, 'replace') # special unicode symbols may encode to backquote. For this reason, backquote processing is after this. text = text.rstrip() if _ibmeci.params[9] in (65536, 65537, 393216, 655360, 720897): text = resub(english_fixes, text) #Applies to all languages with dual language support. if _ibmeci.params[9] in (65536, 65537, 393216, 655360, 720897) and _ibmeci.isIBM: text = resub(english_ibm_fixes, text) if _ibmeci.params[9] in (131072, 131073) and not _ibmeci.isIBM: text = resub(spanish_fixes, text) if _ibmeci.params[9] in ('esp', 131072) and _ibmeci.isIBM: text = resub(spanish_ibm_fixes, text) if _ibmeci.params[9] in (196609, 196608): text = text.replace(br'quil', br'qil') #Sometimes this string make everything buggy with IBMTTS in French if _ibmeci.params[9] in ('deu', 262144): text = resub(german_fixes, text) if _ibmeci.params[9] in ('ptb', 458752) and _ibmeci.isIBM: text = resub(portuguese_ibm_fixes, text) if not self._backquoteVoiceTags: text=text.replace(b'`', b' ') # no embedded commands if self._shortpause: text = pause_re.sub(br'\1 `p1\2\3\4', text) # this enforces short, JAWS-like pauses. if not _ibmeci.isIBM: text = time_re.sub(br'\1:\2 \3', text) # apparently if this isn't done strings like 2:30:15 will only announce 2:30 embeds=b'' if self._ABRDICT: embeds+=b"`da1 " else: embeds+=b"`da0 " if self._phrasePrediction: embeds+=b"`pp1 " else: embeds+=b"`pp0 " if self._sendParams: embeds+=b"`vv%d `vs%d " % (_ibmeci.getVParam(ECIVoiceParam.eciVolume), _ibmeci.getVParam(ECIVoiceParam.eciSpeed)) text = b"%s %s" % (embeds.rstrip(), text) # bring all the printf stuff into one call, in one string. This avoids all the concatonation and printf additions of the previous organization. return text def pause(self,switch): _ibmeci.pause(switch) def terminate(self): _ibmeci.terminate() _backquoteVoiceTags=False _ABRDICT=False _phrasePrediction=False _shortpause=False _sendParams=True def _get_backquoteVoiceTags(self): return self._backquoteVoiceTags def _set_backquoteVoiceTags(self, enable): if enable == self._backquoteVoiceTags: return self._backquoteVoiceTags = enable def _get_ABRDICT(self): return self._ABRDICT def _set_ABRDICT(self, enable): if enable == self._ABRDICT: return self._ABRDICT = enable def _get_phrasePrediction(self): return self._phrasePrediction def _set_phrasePrediction(self, enable): if enable == self._phrasePrediction: return self._phrasePrediction = enable def _get_shortpause(self): return self._shortpause def _set_shortpause(self, enable): if enable == self._shortpause: return self._shortpause = enable def _get_sendParams(self): return self._sendParams def _set_sendParams(self, enable): if enable == self._sendParams: return self._sendParams = enable _rateBoost = False RATE_BOOST_MULTIPLIER = 1.6 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable != self._rateBoost: rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val = _ibmeci.getVParam(ECIVoiceParam.eciSpeed) if self._rateBoost: val=int(round(val/self.RATE_BOOST_MULTIPLIER)) return self._paramToPercent(val, minRate, maxRate) def percentToRate(self, val): val = self._percentToParam(val, minRate, maxRate) if self._rateBoost: val = int(round(val *self.RATE_BOOST_MULTIPLIER)) return val def _set_rate(self,val): val = self.percentToRate(val) self._rate = val _ibmeci.setVParam(ECIVoiceParam.eciSpeed, val) def _get_pitch(self): return _ibmeci.getVParam(ECIVoiceParam.eciPitchBaseline) def _set_pitch(self,vl): _ibmeci.setVParam(ECIVoiceParam.eciPitchBaseline,vl) def _get_volume(self): return _ibmeci.getVParam(ECIVoiceParam.eciVolume) def _set_volume(self,vl): _ibmeci.setVParam(ECIVoiceParam.eciVolume,int(vl)) def _set_inflection(self,vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciPitchFluctuation,vl) def _get_inflection(self): return _ibmeci.getVParam(ECIVoiceParam.eciPitchFluctuation) def _set_hsz(self,vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciHeadSize,vl) def _get_hsz(self): return _ibmeci.getVParam(ECIVoiceParam.eciHeadSize) def _set_rgh(self,vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciRoughness,vl) def _get_rgh(self): return _ibmeci.getVParam(ECIVoiceParam.eciRoughness) def _set_bth(self,vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciBreathiness,vl) def _get_bth(self): return _ibmeci.getVParam(ECIVoiceParam.eciBreathiness) def _getAvailableVoices(self): o = OrderedDict() for name in os.listdir(_ibmeci.ttsPath): if name.lower().endswith('.syn'): info = _ibmeci.langs[name.lower()[:3]] o[str(info[0])] = VoiceInfo(str(info[0]), info[1], info[2]) return o def _get_voice(self): return str(_ibmeci.params[_ibmeci.ECIParam.eciLanguageDialect]) def _set_voice(self,vl): _ibmeci.setVoice(int(vl)) self.updateEncoding(int(vl)) def updateEncoding(self, lang): # lang must be a number asociated with IBMTTS languages or a string with an annotation language. # currently we don't need to consider the decimal part for the conversion. if isinstance(lang, bytes): lang = int(float(lang[2:])) * 65536 #chinese if lang == 393216: self.currentEncoding = "gb2312" # japan elif lang == 524288: self.currentEncoding = "cp932" # korean elif lang == 655360: self.currentEncoding = "cp949" elif lang == 720897: self.currentEncoding = "big5" else: self.currentEncoding = "mbcs" def _get_lastIndex(self): #fix? return _ibmeci.lastindex def cancel(self): _ibmeci.stop() def _getAvailableVariants(self): global variants return OrderedDict((str(id), synthDriverHandler.VoiceInfo(str(id), name)) for id, name in variants.items()) def _set_variant(self, v): global variants self._variant = v if int(v) in variants else "1" _ibmeci.setVariant(int(v)) _ibmeci.setVParam(ECIVoiceParam.eciSpeed, self._rate) #if 'ibmtts' in config.conf['speech']: #config.conf['speech']['ibmtts']['pitch'] = self.pitch def _get_variant(self): return self._variant def _onIndexReached(self, index): synthIndexReached.notify(synth=self, index=index) def _onDoneSpeaking(self): synthDoneSpeaking.notify(synth=self)
class SynthDriver(SynthDriver): name = "espeak" description = "eSpeak NG" supportedSettings=( SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), # Translators: This is the name of the rate boost voice toggle # which further increases the speaking rate when enabled. BooleanSynthSetting("rateBoost",_("Rate boos&t")), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), ) @classmethod def check(cls): return True def __init__(self): _espeak.initialize() log.info("Using eSpeak version %s" % _espeak.info()) lang=languageHandler.getLanguage() _espeak.setVoiceByLanguage(lang) self._language=lang self._variantDict=_espeak.getVariantDict() self.variant="max" self.rate=30 self.pitch=40 self.inflection=75 def _get_language(self): return self._language PROSODY_ATTRS = { speech.PitchCommand: "pitch", speech.VolumeCommand: "volume", speech.RateCommand: "rate", } IPA_TO_ESPEAK = { u"θ": u"T", u"s": u"s", u"ˈ": u"'", } def _processText(self, text): text = unicode(text) # We need to make several replacements. return text.translate({ 0x1: None, # used for embedded commands 0x3C: u"<", # <: because of XML 0x3E: u">", # >: because of XML 0x5B: u" [", # [: [[ indicates phonemes }) def speak(self,speechSequence): defaultLanguage=self._language textList=[] langChanged=False prosody={} # We output malformed XML, as we might close an outer tag after opening an inner one; e.g. # <voice><prosody></voice></prosody>. # However, eSpeak doesn't seem to mind. for item in speechSequence: if isinstance(item,basestring): textList.append(self._processText(item)) elif isinstance(item,speech.IndexCommand): textList.append("<mark name=\"%d\" />"%item.index) elif isinstance(item,speech.CharacterModeCommand): textList.append("<say-as interpret-as=\"characters\">" if item.state else "</say-as>") elif isinstance(item,speech.LangChangeCommand): if langChanged: textList.append("</voice>") textList.append("<voice xml:lang=\"%s\">"%(item.lang if item.lang else defaultLanguage).replace('_','-')) langChanged=True elif isinstance(item,speech.BreakCommand): textList.append('<break time="%dms" />' % item.time) elif type(item) in self.PROSODY_ATTRS: if prosody: # Close previous prosody tag. textList.append("</prosody>") attr=self.PROSODY_ATTRS[type(item)] if item.multiplier==1: # Returning to normal. try: del prosody[attr] except KeyError: pass else: prosody[attr]=int(item.multiplier* 100) if not prosody: continue textList.append("<prosody") for attr,val in prosody.iteritems(): textList.append(' %s="%d%%"'%(attr,val)) textList.append(">") elif isinstance(item,speech.PhonemeCommand): # We can't use unicode.translate because we want to reject unknown characters. try: phonemes="".join([self.IPA_TO_ESPEAK[char] for char in item.ipa]) # There needs to be a space after the phoneme command. # Otherwise, eSpeak will announce a subsequent SSML tag instead of processing it. textList.append(u"[[%s]] "%phonemes) except KeyError: log.debugWarning("Unknown character in IPA string: %s"%item.ipa) if item.text: textList.append(self._processText(item.text)) elif isinstance(item,speech.SpeechCommand): log.debugWarning("Unsupported speech command: %s"%item) else: log.error("Unknown speech: %s"%item) # Close any open tags. if langChanged: textList.append("</voice>") if prosody: textList.append("</prosody>") text=u"".join(textList) _espeak.speak(text) def cancel(self): _espeak.stop() def pause(self,switch): _espeak.pause(switch) _rateBoost = False RATE_BOOST_MULTIPLIER = 3 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable == self._rateBoost: return rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val=_espeak.getParameter(_espeak.espeakRATE,1) if self._rateBoost: val=int(val/self.RATE_BOOST_MULTIPLIER) return self._paramToPercent(val,_espeak.minRate,_espeak.maxRate) def _set_rate(self,rate): val=self._percentToParam(rate, _espeak.minRate, _espeak.maxRate) if self._rateBoost: val=int(val*self.RATE_BOOST_MULTIPLIER) _espeak.setParameter(_espeak.espeakRATE,val,0) def _get_pitch(self): val=_espeak.getParameter(_espeak.espeakPITCH,1) return self._paramToPercent(val,_espeak.minPitch,_espeak.maxPitch) def _set_pitch(self,pitch): val=self._percentToParam(pitch, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakPITCH,val,0) def _get_inflection(self): val=_espeak.getParameter(_espeak.espeakRANGE,1) return self._paramToPercent(val,_espeak.minPitch,_espeak.maxPitch) def _set_inflection(self,val): val=self._percentToParam(val, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakRANGE,val,0) def _get_volume(self): return _espeak.getParameter(_espeak.espeakVOLUME,1) def _set_volume(self,volume): _espeak.setParameter(_espeak.espeakVOLUME,volume,0) def _getAvailableVoices(self): voices=OrderedDict() for v in _espeak.getVoiceList(): l=v.languages[1:] # #5783: For backwards compatibility, voice identifies should always be lowercase identifier=os.path.basename(v.identifier).lower() voices[identifier]=VoiceInfo(identifier,v.name,l) return voices def _get_voice(self): curVoice=getattr(self,'_voice',None) if curVoice: return curVoice curVoice = _espeak.getCurrentVoice() if not curVoice: return "" # #5783: For backwards compatibility, voice identifies should always be lowercase return curVoice.identifier.split('+')[0].lower() def _set_voice(self, identifier): if not identifier: return # #5783: For backwards compatibility, voice identifies should always be lowercase identifier=identifier.lower() if "\\" in identifier: identifier=os.path.basename(identifier) self._voice=identifier try: _espeak.setVoiceAndVariant(voice=identifier,variant=self._variant) except: self._voice=None raise self._language=super(SynthDriver,self).language def _get_lastIndex(self): return _espeak.lastIndex def terminate(self): _espeak.terminate() def _get_variant(self): return self._variant def _set_variant(self,val): self._variant = val if val in self._variantDict else "max" _espeak.setVoiceAndVariant(variant=self._variant) def _getAvailableVariants(self): return OrderedDict((ID,VoiceInfo(ID, name)) for ID, name in self._variantDict.iteritems())
class SynthDriver(SynthDriver): exposeExtraParams = True def __init__(self): if self.exposeExtraParams: self._extraParamNames = [x[0] for x in speechPlayer.Frame._fields_] self.supportedSettings = SynthDriver.supportedSettings + tuple( NumericDriverSetting( "speechPlayer_%s" % x, "frame.%s" % x, normalStep=1) for x in self._extraParamNames) for x in self._extraParamNames: setattr(self, "speechPlayer_%s" % x, 50) self.player = speechPlayer.SpeechPlayer(16000) _espeak.initialize() _espeak.setVoiceByLanguage('en') self.pitch = 50 self.rate = 50 self.volume = 90 self.inflection = 60 self.audioThread = AudioThread(self, self.player, 16000) @classmethod def check(cls): return True name = "nvSpeechPlayer" description = "nvSpeechPlayer" supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.RateSetting(), SynthDriver.PitchSetting(), SynthDriver.VolumeSetting(), SynthDriver.InflectionSetting()) supportedCommands = { speech.IndexCommand, speech.PitchCommand, } supportedNotifications = {synthIndexReached, synthDoneSpeaking} _curPitch = 50 _curVoice = 'Adam' _curInflection = 0.5 _curVolume = 1.0 _curRate = 1.0 def speak(self, speakList): userIndex = None pitchOffset = 0 # Merge adjacent strings index = 0 while index < len(speakList): item = speakList[index] if index > 0: lastItem = speakList[index - 1] if isinstance(item, str) and isinstance(lastItem, str): speakList[index - 1] = " ".join([lastItem, item]) del speakList[index] continue index += 1 endPause = 20 for item in speakList: if isinstance(item, speech.PitchCommand): pitchOffset = item.offset elif isinstance(item, speech.IndexCommand): userIndex = item.index elif isinstance(item, str): textList = re_textPause.split(item) lastIndex = len(textList) - 1 for index, chunk in enumerate(textList): if not chunk: continue chunk = chunk.strip() if not chunk: continue clauseType = chunk[-1] if clauseType in ('.', '!'): endPause = 150 elif clauseType == '?': endPause = 150 elif clauseType == ',': endPause = 120 else: endPause = 100 clauseType = None endPause /= self._curRate textBuf = ctypes.create_unicode_buffer(chunk) textPtr = ctypes.c_void_p(ctypes.addressof(textBuf)) chunks = [] while textPtr: phonemeBuf = _espeak.espeakDLL.espeak_TextToPhonemes( ctypes.byref(textPtr), _espeak.espeakCHARS_WCHAR, 0x36100 + 0x82) if not phonemeBuf: continue chunks.append(ctypes.string_at(phonemeBuf)) chunk = b"".join(chunks).decode('utf8') chunk = chunk.replace('ə͡l', 'ʊ͡l') chunk = chunk.replace('a͡ɪ', 'ɑ͡ɪ') chunk = chunk.replace('e͡ɪ', 'e͡i') chunk = chunk.replace('ə͡ʊ', 'o͡u') chunk = chunk.strip() if not chunk: continue pitch = self._curPitch + pitchOffset basePitch = 25 + (21.25 * (pitch / 12.5)) for args in ipa.generateFramesAndTiming( chunk, speed=self._curRate, basePitch=basePitch, inflection=self._curInflection, clauseType=clauseType): frame = args[0] if frame: applyVoiceToFrame(frame, self._curVoice) if self.exposeExtraParams: for x in self._extraParamNames: ratio = getattr( self, "speechPlayer_%s" % x) / 50.0 setattr(frame, x, getattr(frame, x) * ratio) frame.preFormantGain *= self._curVolume self.player.queueFrame(*args, userIndex=userIndex) userIndex = None self.player.queueFrame(None, endPause, max(10.0, 10.0 / self._curRate), userIndex=userIndex) self.audioThread.isSpeaking = True self.audioThread.synthEvent.set() def cancel(self): self.player.queueFrame(None, 20, 5, purgeQueue=True) self.audioThread.isSpeaking = False self.audioThread.synthEvent.set() self.audioThread.wavePlayer.stop() def pause(self, switch): self.audioThread.wavePlayer.pause(switch) def _get_rate(self): return int(math.log(self._curRate / 0.25, 2) * 25.0) def _set_rate(self, val): self._curRate = 0.25 * (2**(val / 25.0)) def _get_pitch(self): return self._curPitch def _set_pitch(self, val): self._curPitch = val def _get_volume(self): return int(self._curVolume * 75) def _set_volume(self, val): self._curVolume = val / 75.0 def _get_inflection(self): return int(self._curInflection / 0.01) def _set_inflection(self, val): self._curInflection = val * 0.01 def _get_voice(self): return self._curVoice def _set_voice(self, voice): if voice not in self.availableVoices: voice = 'Adam' self._curVoice = voice if self.exposeExtraParams: for paramName in self._extraParamNames: setattr(self, "speechPlayer_%s" % paramName, 50) def _getAvailableVoices(self): d = OrderedDict() for name in sorted(voices): d[name] = VoiceInfo(name, name) return d def terminate(self): self.audioThread.terminate() del self.player _espeak.terminate()
class SynthDriver(synthDriverHandler.SynthDriver): supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), driverHandler.NumericDriverSetting( "hsz", "Head Size"), driverHandler.NumericDriverSetting( "rgh", "Roughness"), driverHandler.NumericDriverSetting( "bth", "Breathiness"), driverHandler.BooleanDriverSetting( "backquoteVoiceTags", "Enable backquote voice &tags", True)) supportedCommands = { speech.IndexCommand, speech.CharacterModeCommand, speech.LangChangeCommand, speech.BreakCommand, speech.PitchCommand, speech.RateCommand, speech.VolumeCommand, speech.PhonemeCommand, } supportedNotifications = {synthIndexReached, synthDoneSpeaking} PROSODY_ATTRS = { speech.PitchCommand: _eloquence.pitch, speech.VolumeCommand: _eloquence.vlm, speech.RateCommand: _eloquence.rate, } description = 'ETI-Eloquence' name = 'eloquence' @classmethod def check(cls): return _eloquence.eciCheck() def __init__(self): _eloquence.initialize(self._onIndexReached) self.curvoice = "enu" self.rate = 50 self.variant = "1" def speak(self, speechSequence): last = None outlist = [] for item in speechSequence: if isinstance(item, str): s = str(item) s = self.xspeakText(s) outlist.append((_eloquence.speak, (s, ))) last = s elif isinstance(item, speech.IndexCommand): outlist.append((_eloquence.index, (item.index, ))) elif isinstance(item, speech.BreakCommand): # Eloquence doesn't respect delay time in milliseconds. # Therefor we need to adjust waiting time depending on curernt speech rate # The following table of adjustments has been measured empirically # Then we do linear approximation coefficients = { 10: 1, 43: 2, 60: 3, 75: 4, 85: 5, } ck = sorted(coefficients.keys()) if self.rate <= ck[0]: factor = coefficients[ck[0]] elif self.rate >= ck[-1]: factor = coefficients[ck[-1]] elif self.rate in ck: factor = coefficients[self.rate] else: li = [ index for index, r in enumerate(ck) if r < self.rate ][-1] ri = li + 1 ra = ck[li] rb = ck[ri] factor = 1.0 * coefficients[ra] + ( coefficients[rb] - coefficients[ra]) * (self.rate - ra) / (rb - ra) pFactor = factor * item.time pFactor = int(pFactor) outlist.append((_eloquence.speak, (f'`p{pFactor}.', ))) elif type(item) in self.PROSODY_ATTRS: pr = self.PROSODY_ATTRS[type(item)] if item.multiplier == 1: # Revert back to defaults outlist.append((_eloquence.cmdProsody, ( pr, None, ))) else: outlist.append((_eloquence.cmdProsody, ( pr, item.multiplier, ))) if last is not None and not last.rstrip()[-1] in punctuation: outlist.append((_eloquence.speak, ('`p1.', ))) outlist.append((_eloquence.index, (0xffff, ))) outlist.append((_eloquence.synth, ())) _eloquence.synth_queue.put(outlist) _eloquence.process() def xspeakText(self, text, should_pause=False): if _eloquence.params[9] == 65536 or _eloquence.params[9] == 65537: text = resub(english_fixes, text) if _eloquence.params[9] == 131072 or _eloquence.params[9] == 131073: text = resub(spanish_fixes, text) if _eloquence.params[9] in (196609, 196608): text = resub(french_fixes, text) #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. #text = text.encode('mbcs') text = normalizeText(text) text = resub(anticrash_res, text) if not self._backquoteVoiceTags: text = text.replace('`', ' ') text = "`pp0 `vv%d %s" % (self.getVParam(_eloquence.vlm), text ) #no embedded commands text = pause_re.sub(r'\1 `p1\2\3', text) text = time_re.sub(r'\1:\2 \3', text) #if two strings are sent separately, pause between them. This might fix some of the audio issues we're having. if should_pause: text = text + ' `p1.' return text # _eloquence.speak(text, index) # def cancel(self): # self.dll.eciStop(self.handle) def pause(self, switch): _eloquence.pause(switch) # self.dll.eciPause(self.handle,switch) def terminate(self): _eloquence.terminate() _backquoteVoiceTags = False def _get_backquoteVoiceTags(self): return self._backquoteVoiceTags def _set_backquoteVoiceTags(self, enable): if enable == self._backquoteVoiceTags: return self._backquoteVoiceTags = enable def _get_rate(self): return self._paramToPercent(self.getVParam(_eloquence.rate), minRate, maxRate) def _set_rate(self, vl): self._rate = self._percentToParam(vl, minRate, maxRate) self.setVParam(_eloquence.rate, self._percentToParam(vl, minRate, maxRate)) def _get_pitch(self): return self.getVParam(_eloquence.pitch) def _set_pitch(self, vl): self.setVParam(_eloquence.pitch, vl) def _get_volume(self): return self.getVParam(_eloquence.vlm) def _set_volume(self, vl): self.setVParam(_eloquence.vlm, int(vl)) def _set_inflection(self, vl): vl = int(vl) self.setVParam(_eloquence.fluctuation, vl) def _get_inflection(self): return self.getVParam(_eloquence.fluctuation) def _set_hsz(self, vl): vl = int(vl) self.setVParam(_eloquence.hsz, vl) def _get_hsz(self): return self.getVParam(_eloquence.hsz) def _set_rgh(self, vl): vl = int(vl) self.setVParam(_eloquence.rgh, vl) def _get_rgh(self): return self.getVParam(_eloquence.rgh) def _set_bth(self, vl): vl = int(vl) self.setVParam(_eloquence.bth, vl) def _get_bth(self): return self.getVParam(_eloquence.bth) def _getAvailableVoices(self): o = OrderedDict() for name in os.listdir(_eloquence.eciPath[:-8]): if not name.lower().endswith('.syn'): continue info = _eloquence.langs[name.lower()[:-4]] o[str(info[0])] = synthDriverHandler.VoiceInfo( str(info[0]), info[1], None) return o def _get_voice(self): return str(_eloquence.params[9]) def _set_voice(self, vl): _eloquence.set_voice(vl) self.curvoice = vl def getVParam(self, pr): return _eloquence.getVParam(pr) def setVParam(self, pr, vl): _eloquence.setVParam(pr, vl) def _get_lastIndex(self): #fix? return _eloquence.lastindex def cancel(self): _eloquence.stop() def _getAvailableVariants(self): global variants return OrderedDict( (str(id), synthDriverHandler.VoiceInfo(str(id), name)) for id, name in variants.items()) def _set_variant(self, v): global variants self._variant = v if int(v) in variants else "1" _eloquence.setVariant(int(v)) self.setVParam(_eloquence.rate, self._rate) # if 'eloquence' in config.conf['speech']: # config.conf['speech']['eloquence']['pitch'] = self.pitch def _get_variant(self): return self._variant def _onIndexReached(self, index): if index is not None: synthIndexReached.notify(synth=self, index=index) else: synthDoneSpeaking.notify(synth=self)
class SynthDriver(synthDriverHandler.SynthDriver): supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), BooleanDriverSetting("rateBoost", _("Rate boos&t"), True), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), NumericDriverSetting("hsz", _("Head Size"), False), NumericDriverSetting("rgh", _("Roughness"), False), NumericDriverSetting("bth", _("Breathiness"), False), BooleanDriverSetting( "backquoteVoiceTags", _("Enable backquote voice &tags"), False)) supportedCommands = { speech.IndexCommand, speech.CharacterModeCommand, speech.LangChangeCommand, speech.BreakCommand, speech.PitchCommand, speech.RateCommand, speech.VolumeCommand } supportedNotifications = {synthIndexReached, synthDoneSpeaking} description = 'IBMTTS' name = 'ibmeci' speakingLanguage = "" @classmethod def check(cls): return _ibmeci.eciCheck() def __init__(self): _ibmeci.initialize(self._onIndexReached, self._onDoneSpeaking) # This information doesn't really need to be displayed, and makes IBMTTS unusable if the addon is not in the same drive as NVDA executable. # But display it only on debug mode in case of it can be useful log.debug("Using IBMTTS version %s" % _ibmeci.eciVersion()) lang = languageHandler.getLanguage() self.rate = 50 self.speakingLanguage = lang self.variant = "1" PROSODY_ATTRS = { speech.PitchCommand: ECIVoiceParam.eciPitchBaseline, speech.VolumeCommand: ECIVoiceParam.eciVolume, speech.RateCommand: ECIVoiceParam.eciSpeed, } def speak(self, speechSequence): last = None defaultLanguage = self.language outlist = [] outlist.append((_ibmeci.speak, (b"`ts0", ))) for item in speechSequence: if isinstance(item, string_types): s = self.processText(unicode(item)) outlist.append((_ibmeci.speak, (s, ))) last = s elif isinstance(item, speech.IndexCommand): outlist.append((_ibmeci.index, (item.index, ))) elif isinstance(item, speech.LangChangeCommand): l = None if item.lang in langsAnnotations: l = langsAnnotations[item.lang] elif item.lang and item.lang[0:2] in langsAnnotations: l = langsAnnotations[item.lang[0:2]] if l: if item.lang != self.speakingLanguage and item.lang != self.speakingLanguage[ 0:2]: outlist.append((_ibmeci.speak, (l, ))) self.speakingLanguage = item.lang else: outlist.append( (_ibmeci.speak, (langsAnnotations[defaultLanguage], ))) self.speakingLanguage = defaultLanguage elif isinstance(item, speech.CharacterModeCommand): outlist.append( (_ibmeci.speak, (b"`ts1" if item.state else b"`ts0", ))) elif isinstance(item, speech.BreakCommand): outlist.append((_ibmeci.speak, (b' `p%d ' % item.time, ))) elif type(item) in self.PROSODY_ATTRS: val = max(0, min(item.newValue, 100)) if type(item) == speech.RateCommand: val = self.percentToRate(val) outlist.append((_ibmeci.setProsodyParam, (self.PROSODY_ATTRS[type(item)], val))) else: log.error("Unknown speech: %s" % item) if last is not None and last[-1] not in punctuation: # check if a pitch command is at the end of the list, because p1 need to be send before this. # index -2 is because -1 always seem to be an index command. if outlist[-2][0] == _ibmeci.setProsodyParam: outlist.insert(-2, (_ibmeci.speak, (b'`p1. ', ))) else: outlist.append((_ibmeci.speak, (b'`p1. ', ))) outlist.append((_ibmeci.setEndStringMark, ())) outlist.append((_ibmeci.synth, ())) #print(outlist) _ibmeci.eciQueue.put(outlist) _ibmeci.process() def processText(self, text): text = text.rstrip() if _ibmeci.params[9] in (65536, 65537): text = resub(english_fixes, text) if _ibmeci.params[9] in (131072, 131073): text = resub(spanish_fixes, text) if _ibmeci.params[9] in (196609, 196608): text = resub(french_fixes, text) text = text.replace( 'quil', 'qil' ) #Sometimes this string make everything buggy with IBMTTS in French if self._backquoteVoiceTags: #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. text = text.replace('`', ' ').encode('mbcs', 'replace') #no embedded commands text = b"`pp0 `vv%d %s" % (_ibmeci.getVParam( ECIVoiceParam.eciVolume), text) text = resub(anticrash_res, text) else: #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. text = text.encode('mbcs', 'replace') text = resub(anticrash_res, text) text = b"`pp0 `vv%d %s" % (_ibmeci.getVParam( ECIVoiceParam.eciVolume), text.replace(b'`', b' ') ) #no embedded commands text = pause_re.sub(br'\1 `p1\2\3', text) text = time_re.sub(br'\1:\2 \3', text) return text def pause(self, switch): _ibmeci.pause(switch) def terminate(self): _ibmeci.terminate() _backquoteVoiceTags = False def _get_backquoteVoiceTags(self): return self._backquoteVoiceTags def _set_backquoteVoiceTags(self, enable): if enable == self._backquoteVoiceTags: return self._backquoteVoiceTags = enable _rateBoost = False RATE_BOOST_MULTIPLIER = 1.6 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable != self._rateBoost: rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val = _ibmeci.getVParam(ECIVoiceParam.eciSpeed) if self._rateBoost: val = int(round(val / self.RATE_BOOST_MULTIPLIER)) return self._paramToPercent(val, minRate, maxRate) def percentToRate(self, val): val = self._percentToParam(val, minRate, maxRate) if self._rateBoost: val = int(round(val * self.RATE_BOOST_MULTIPLIER)) return val def _set_rate(self, val): val = self.percentToRate(val) self._rate = val _ibmeci.setVParam(ECIVoiceParam.eciSpeed, val) def _get_pitch(self): return _ibmeci.getVParam(ECIVoiceParam.eciPitchBaseline) def _set_pitch(self, vl): _ibmeci.setVParam(ECIVoiceParam.eciPitchBaseline, vl) def _get_volume(self): return _ibmeci.getVParam(ECIVoiceParam.eciVolume) def _set_volume(self, vl): _ibmeci.setVParam(ECIVoiceParam.eciVolume, int(vl)) def _set_inflection(self, vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciPitchFluctuation, vl) def _get_inflection(self): return _ibmeci.getVParam(ECIVoiceParam.eciPitchFluctuation) def _set_hsz(self, vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciHeadSize, vl) def _get_hsz(self): return _ibmeci.getVParam(ECIVoiceParam.eciHeadSize) def _set_rgh(self, vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciRoughness, vl) def _get_rgh(self): return _ibmeci.getVParam(ECIVoiceParam.eciRoughness) def _set_bth(self, vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciBreathiness, vl) def _get_bth(self): return _ibmeci.getVParam(ECIVoiceParam.eciBreathiness) def _getAvailableVoices(self): o = OrderedDict() for name in os.listdir(_ibmeci.ttsPath): if name.lower().endswith('.syn'): info = _ibmeci.langs[name.lower()[:3]] o[str(info[0])] = VoiceInfo(str(info[0]), info[1], info[2]) return o def _get_voice(self): return str(_ibmeci.params[_ibmeci.ECIParam.eciLanguageDialect]) def _set_voice(self, vl): _ibmeci.set_voice(vl) def _get_lastIndex(self): #fix? return _ibmeci.lastindex def cancel(self): _ibmeci.stop() def _getAvailableVariants(self): global variants return OrderedDict( (str(id), synthDriverHandler.VoiceInfo(str(id), name)) for id, name in variants.items()) def _set_variant(self, v): global variants self._variant = v if int(v) in variants else "1" _ibmeci.setVariant(int(v)) _ibmeci.setVParam(ECIVoiceParam.eciSpeed, self._rate) #if 'ibmtts' in config.conf['speech']: #config.conf['speech']['ibmtts']['pitch'] = self.pitch def _get_variant(self): return self._variant def _onIndexReached(self, index): synthIndexReached.notify(synth=self, index=index) def _onDoneSpeaking(self): synthDoneSpeaking.notify(synth=self)
class SynthDriver(synthDriverHandler.SynthDriver): supportedSettings=(SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), BooleanSynthSetting("rateBoost", _("Rate boos&t")), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), NumericSynthSetting("hsz", _("Head Size"), False), NumericSynthSetting("rgh", _("Roughness"), False), NumericSynthSetting("bth", _("Breathiness"), False), BooleanSynthSetting("backquoteVoiceTags", _("Enable backquote voice &tags"), False)) description='IBMTTS' name='ibmeci' speakingLanguage="" @classmethod def check(cls): return _ibmeci.eciCheck() def __init__(self): _ibmeci.initialize() # This information doesn't really need to be displayed, and makes IBMTTS unusable if the addon is not in the same drive as NVDA executable. # But display it only on debug mode in case of it can be useful log.debug("Using IBMTTS version %s" % _ibmeci.eciVersion()) lang = languageHandler.getLanguage() self.rate=50 self.speakingLanguage=lang self.variant="1" def speak(self,speechSequence): last = None defaultLanguage=self.language outlist = [] for item in speechSequence: if isinstance(item, string_types): s = self.processText(unicode(item)) outlist.append((_ibmeci.speak, (s,))) last = s elif isinstance(item,speech.IndexCommand): outlist.append((_ibmeci.index, (item.index,))) elif isinstance(item,speech.LangChangeCommand): l=None if item.lang in langsAnnotations: l = langsAnnotations[item.lang] elif item.lang and item.lang[0:2] in langsAnnotations: l = langsAnnotations[item.lang[0:2]] if l: if item.lang != self.speakingLanguage and item.lang != self.speakingLanguage[0:2]: outlist.append((_ibmeci.speak, (l,))) self.speakingLanguage=item.lang else: outlist.append((_ibmeci.speak, (langsAnnotations[defaultLanguage],))) self.speakingLanguage = defaultLanguage elif isinstance(item,speech.CharacterModeCommand): outlist.append((_ibmeci.speak, (b"`ts1" if item.state else "b`ts0",))) elif isinstance(item,speech.SpeechCommand): log.debugWarning("Unsupported speech command: %s"%item) else: log.error("Unknown speech: %s"%item) if last is not None and not last[-1] in punctuation: outlist.append((_ibmeci.speak, (b'`p1',))) outlist.append((_ibmeci.setEndStringMark, ())) outlist.append((_ibmeci.speak, (b"`ts0",))) outlist.append((_ibmeci.synth, ())) _ibmeci.synthQueue.put(outlist) _ibmeci.process() def processText(self,text): text = text.rstrip() if _ibmeci.params[9] in (65536, 65537): text = resub(english_fixes, text) if _ibmeci.params[9] in (131072, 131073): text = resub(spanish_fixes, text) if _ibmeci.params[9] in (196609, 196608): text = resub(french_fixes, text) text = text.replace('quil', 'qil') #Sometimes this string make everything buggy with IBMTTS in French #if not self._backquoteVoiceTags: text = text.replace(u'‵', ' ') if self._backquoteVoiceTags: text = "`pp0 `vv%d %s" % (self.getVParam(_ibmeci.vlm), text.replace('`', ' ')) #no embedded commands text = resub(anticrash_res, text) #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. text = text.encode('mbcs', 'replace') else: #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. text = text.encode('mbcs', 'replace') text = resub(anticrash_res, text) text = b"`pp0 `vv%d %s" % (self.getVParam(_ibmeci.vlm), text.replace(b'`', b' ')) #no embedded commands text = pause_re.sub(br'\1 `p1\2\3', text) text = time_re.sub(br'\1:\2 \3', text) # temporal fix: replace , with `" -" because IBMTTS seems ignore commas at the end. # if you know a better solution please let me know to update it. if text[-1] == b",": text = text[0:-1]+b" -" return text def pause(self,switch): _ibmeci.pause(switch) def terminate(self): _ibmeci.terminate() _backquoteVoiceTags=False def _get_backquoteVoiceTags(self): return self._backquoteVoiceTags def _set_backquoteVoiceTags(self, enable): if enable == self._backquoteVoiceTags: return self._backquoteVoiceTags = enable _rateBoost = False RATE_BOOST_MULTIPLIER = 1.6 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable != self._rateBoost: rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val = self.getVParam(_ibmeci.rate) if self._rateBoost: val=int(round(val/self.RATE_BOOST_MULTIPLIER)) return self._paramToPercent(val, minRate, maxRate) def _set_rate(self,vl): val = self._percentToParam(vl, minRate, maxRate) if self._rateBoost: val = int(round(val *self.RATE_BOOST_MULTIPLIER)) self._rate = val self.setVParam(_ibmeci.rate, val) def _get_pitch(self): return self.getVParam(_ibmeci.pitch) def _set_pitch(self,vl): self.setVParam(_ibmeci.pitch,vl) def _get_volume(self): return self.getVParam(_ibmeci.vlm) def _set_volume(self,vl): self.setVParam(_ibmeci.vlm,int(vl)) def _set_inflection(self,vl): vl = int(vl) self.setVParam(_ibmeci.fluctuation,vl) def _get_inflection(self): return self.getVParam(_ibmeci.fluctuation) def _set_hsz(self,vl): vl = int(vl) self.setVParam(_ibmeci.hsz,vl) def _get_hsz(self): return self.getVParam(_ibmeci.hsz) def _set_rgh(self,vl): vl = int(vl) self.setVParam(_ibmeci.rgh,vl) def _get_rgh(self): return self.getVParam(_ibmeci.rgh) def _set_bth(self,vl): vl = int(vl) self.setVParam(_ibmeci.bth,vl) def _get_bth(self): return self.getVParam(_ibmeci.bth) def _getAvailableVoices(self): o = OrderedDict() for name in os.listdir(_ibmeci.ttsPath): if name.lower().endswith('.syn'): info = _ibmeci.langs[name.lower()[:3]] o[str(info[0])] = VoiceInfo(str(info[0]), info[1], info[2]) return o def _get_voice(self): return str(_ibmeci.params[9]) def _set_voice(self,vl): _ibmeci.set_voice(vl) def getVParam(self,pr): return _ibmeci.getVParam(pr) def setVParam(self, pr,vl): _ibmeci.setVParam(pr, vl) def _get_lastIndex(self): #fix? return _ibmeci.lastindex def cancel(self): _ibmeci.stop() def _getAvailableVariants(self): global variants return OrderedDict((str(id), synthDriverHandler.VoiceInfo(str(id), name)) for id, name in variants.items()) def _set_variant(self, v): global variants self._variant = v if int(v) in variants else "1" _ibmeci.setVariant(int(v)) self.setVParam(_ibmeci.rate, self._rate) # if 'ibmtts' in config.conf['speech']: # config.conf['speech']['ibmtts']['pitch'] = self.pitch def _get_variant(self): return self._variant
class SynthDriver(SynthDriver): supportedSettings = (SynthDriver.RateSetting(), SynthDriver.PitchSetting(minStep=5), SynthDriver.InflectionSetting(minStep=10), SynthDriver.VolumeSetting(minStep=2)) description = "Audiologic Tts3" name = "audiologic" @classmethod def check(cls): try: r = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, "SOFTWARE\Audiologic\Sintesi Audiologic") r.Close() return True except: return False def __init__(self): _audiologic.TtsOpen() def terminate(self): _audiologic.TtsClose() def speakText(self, text, index=None): if isinstance(index, int) and index >= 0: text = "[:BMK=%d]%s" % (index, text) _audiologic.TtsSpeak(text) def _get_lastIndex(self): return _audiologic.LastIndex def cancel(self): _audiologic.TtsStop() def _getAvailableVoices(self): return OrderedDict((("", VoiceInfo("", "Tts3", language="it")), )) def _get_voice(self): return "" def _set_voice(self, voice): pass def _get_rate(self): return self._paramToPercent(_audiologic.TtsGetProsody('Speed'), _audiologic.minRate, _audiologic.maxRate) def _set_rate(self, value): _audiologic.TtsSetParam( _audiologic.ttsRate, self._percentToParam(value, _audiologic.minRate, _audiologic.maxRate), 0) def _get_pitch(self): return self._paramToPercent(_audiologic.TtsGetProsody('Pitch'), _audiologic.minPitch, _audiologic.maxPitch) def _set_pitch(self, value): _audiologic.TtsSetParam( _audiologic.ttsPitch, self._percentToParam(value, _audiologic.minPitch, _audiologic.maxPitch), 0) def _get_volume(self): return self._paramToPercent(_audiologic.TtsGetProsody('Vol'), _audiologic.minVol, _audiologic.maxVol) def _set_volume(self, value): _audiologic.TtsSetParam( _audiologic.ttsVol, self._percentToParam(value, _audiologic.minVol, _audiologic.maxVol), 0) def _get_inflection(self): return _audiologic.TtsGetProsody('Expression') * 10 def _set_inflection(self, value): _audiologic.TtsSetParam(_audiologic.ttsExpression, int(value / 10), 0) def pause(self, switch): if switch: _audiologic.TtsPause() else: _audiologic.TtsRestart()
class SynthDriver(SynthDriver): name = "newfon" description = "Newfon" supportedSettings = ( SynthDriver.VoiceSetting(), SynthDriver.LanguageSetting(), SynthDriver.RateSetting(), SynthSetting("accel", _("&Acceleration")), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(10), SynthDriver.VolumeSetting(), ) _volume = 100 _language = "ru" _pitch = 50 _accel = 0 _inflection = 50 _rate = 70 availableVoices = OrderedDict(( str(index), VoiceInfo(str(index), name) ) for index, name in enumerate( [_("male 1"), _("female 1"), _("male 2"), _("female 2")])) availableAccels = OrderedDict( (str(x), StringParameterInfo(str(x), str(x))) for x in xrange(8)) pitchTable = [(90, 130), (190, 330), (60, 120), (220, 340)] availableLanguages = OrderedDict( (("ru", LanguageInfo("ru")), ("uk", LanguageInfo("uk")))) newfon_lib = None sdrvxpdbDll = None dictDll = None @classmethod def check(cls): return os.path.isfile('synthDrivers/newfon_nvda.dll') def calculateMinMaxPitch(self, pitch, inflection): min, max = self.pitchTable[int(self.voice)] i = max - min i = int((i / 50.0) * ((inflection - 50) / 2)) min -= i max += i i = int((pitch - 50) / 1.3) min += i max += i return min, max def __init__(self): global player player = nvwave.WavePlayer( channels=1, samplesPerSec=10000, bitsPerSample=8, outputDevice=config.conf["speech"]["outputDevice"]) self.hasDictLib = os.path.isfile('synthDrivers/dict.dll') if self.hasDictLib: self.sdrvxpdb_lib = windll.LoadLibrary( r"synthDrivers\sdrvxpdb.dll") self.dict_lib = windll.LoadLibrary(r"synthDrivers\dict.dll") self.newfon_lib = windll.LoadLibrary(r"synthDrivers\newfon_nvda.dll") self.newfon_lib.speakText.argtypes = [c_char_p, c_int] if not self.newfon_lib.initialize(): raise Exception self.newfon_lib.set_callback(processAudio) self.newfon_lib.set_dictionary(1) def terminate(self): self.cancel() global player player.close() player = None self.newfon_lib.terminate() del self.newfon_lib if self.hasDictLib: del self.dict_lib del self.sdrvxpdb_lib def speakText(self, text, index=None): global isSpeaking isSpeaking = True text = processText(text, self._language) if index is not None: self.newfon_lib.speakText(text, index) else: self.newfon_lib.speakText(text, -1) def _get_lastIndex(self): return self.newfon_lib.get_lastIndex() def cancel(self): self.newfon_lib.cancel() global isSpeaking, player isSpeaking = False player.stop() def _get_voice(self): return str(self.newfon_lib.get_voice()) def _set_voice(self, value): self.newfon_lib.set_voice(int(value)) self._set_pitch(self._pitch) def _get_volume(self): return self._volume def _set_volume(self, value): self.newfon_lib.set_volume(value) self._volume = value def _get_rate(self): return self._rate def _set_rate(self, value): self.newfon_lib.set_rate(value) self._rate = value def _set_pitch(self, value): #if value <= 50: value = 50 #self.newfon_lib.set_accel(value/5 -10 ) self._pitch = value min, max = self.calculateMinMaxPitch(self._pitch, self._inflection) self.newfon_lib.set_pitch_min(min) self.newfon_lib.set_pitch_max(max) def _get_pitch(self): return self._pitch def pause(self, switch): global player player.pause(switch) def _get_language(self): return self._language def _set_language(self, language): self._language = language if not self.hasDictLib: return if language == "ru": self.newfon_lib.set_dictionary(1) else: self.newfon_lib.set_dictionary(0) def _set_inflection(self, inflection): self._inflection = inflection self._set_pitch(self._pitch) def _get_inflection(self): return self._inflection def _set_accel(self, a): self._accel = a self.newfon_lib.set_accel(int(a)) def _get_accel(self): return self._accel
class SynthDriver(SynthDriver): name = "espeak" description = "eSpeak NG" supportedSettings = ( SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), SynthDriver.RateBoostSetting(), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), ) supportedCommands = { IndexCommand, CharacterModeCommand, LangChangeCommand, BreakCommand, PitchCommand, RateCommand, VolumeCommand, PhonemeCommand, } supportedNotifications = {synthIndexReached, synthDoneSpeaking} # A mapping of commonly used language codes to eSpeak languages. # Introduced due to eSpeak issue: https://github.com/espeak-ng/espeak-ng/issues/1200 # These are used when eSpeak doesn't support a given language code # but a default alias is appropriate. _defaultLangToLocale = { # Languages without locale that aren't supported in eSpeak 7e5457f91e10, # with a language with locale that is supported. # Found via: # set(stripLocaleFromLangCode(lang) for lang in self.availableLanguages).difference(self.availableLanguages) "en": "en-gb", "chr": "chr-US-Qaaa-x-west", "fr": "fr-fr", } availableLanguages: Set[Optional[str]] """ For eSpeak commit 7e5457f91e10, this is equivalent to: { 'ia', 'ru', 'cy', 'ms', 'af', 'fi', 'fr-fr', 'nog', 'gu', 'hu', 'eu', 'om', 'en-029', 'de', 'es', 'kk', 'an', 'nci', 'uk', 'vi-vn-x-south', 'grc', 'it', 'vi-vn-x-central', 'bg', 'piqd', 'ug', 'ar', 'da', 'mi', 'mr', 'pt-br', 'fr-ch', 'py', 'uz', 'en-gb', 'sw', 'as', 'shn', 'vi', 'nl', 'bs', 'ga', 'pap', 'sv', 'kn', 'gn', 'th', 'tr', 'pa', 'mt', 'chr-US-Qaaa-x-west', 'eo', 'kok', 'ky', 'lfn', 'is', 'pt', 'en-gb-x-gbcwmd', 'en-gb-x-rp', 'ht', 'bpy', 'fr-be', 'nb', 'lt', 'ja', 'te', 'tn', 'es-419', 'gd', 'sjn', 'he', 'hyw', 'et', 'ro', 'ru-lv', 'sq', 'quc', 'am', 'hr', 'qya', 'ka', 'el', 'tt', 'or', 'pl', 'qu', 'ba', 'ta', 'cmn', 'io', 'en-us', 'ur', 'hi', 'en-gb-scotland', 'fa', 'kl', 'tk', 'ku', 'si', 'cv', 'ca', 'qdb', 'hak', 'fa-latn', 'lv', 'en-gb-x-gbclan', 'ltg', 'ne', 'sl', 'az', 'yue', 'sk', 'hy', 'my', 'ko', 'mk', 'smj', 'ml', 'cmn-latn-pinyin', 'id', 'la', 'sr', 'bn', 'sd', 'cs', 'jbo', 'haw' } """ @classmethod def check(cls): return True def __init__(self): _espeak.initialize(self._onIndexReached) log.info("Using eSpeak NG version %s" % _espeak.info()) lang = getLanguage() _espeak.setVoiceByLanguage(lang) self._language = lang self._variantDict = _espeak.getVariantDict() self.variant = "max" self.rate = 30 self.pitch = 40 self.inflection = 75 def _get_language(self): return self._language PROSODY_ATTRS = { PitchCommand: "pitch", VolumeCommand: "volume", RateCommand: "rate", } IPA_TO_ESPEAK = { u"θ": u"T", u"s": u"s", u"ˈ": u"'", } def _processText(self, text): # We need to make several replacements. return text.translate({ 0x1: None, # used for embedded commands 0x3C: u"<", # <: because of XML 0x3E: u">", # >: because of XML 0x5B: u" [", # [: [[ indicates phonemes }) def _normalizeLangCommand(self, command: LangChangeCommand) -> LangChangeCommand: """ Checks if a LangChangeCommand language is compatible with eSpeak. If not, find a default mapping occurs in L{_defaultLangToLocale}. Otherwise, finds a language of a different dialect exists (e.g. ru-ru to ru). Returns an eSpeak compatible LangChangeCommand. """ lowerCaseAvailableLangs = set(lang.lower() for lang in self.availableLanguages) # Use default language if no command.lang is supplied langWithLocale = command.lang if command.lang else self._language langWithLocale = langWithLocale.lower().replace('_', '-') langWithoutLocale: Optional[str] = stripLocaleFromLangCode( langWithLocale) # Check for any language where the language code matches, regardless of dialect: e.g. ru-ru to ru matchingLangs = filter( lambda lang: stripLocaleFromLangCode(lang) == langWithoutLocale, lowerCaseAvailableLangs) anyLocaleMatchingLang = next(matchingLangs, None) # Check from a list of known default mapping locales: e.g. en to en-gb # Created due to eSpeak issue: https://github.com/espeak-ng/espeak-ng/issues/1200 knownDefaultLang = self._defaultLangToLocale.get( langWithoutLocale, None) if knownDefaultLang is not None and knownDefaultLang not in self.availableLanguages: # This means eSpeak has changed and we need to update the mapping log.error( f"Default mapping unknown to eSpeak {knownDefaultLang} not in {self.availableLanguages}" ) knownDefaultLang = None if langWithLocale in lowerCaseAvailableLangs: eSpeakLang = langWithLocale elif knownDefaultLang is not None: eSpeakLang = knownDefaultLang elif langWithoutLocale in lowerCaseAvailableLangs: eSpeakLang = langWithoutLocale elif anyLocaleMatchingLang is not None: eSpeakLang = anyLocaleMatchingLang else: log.debugWarning( f"Unable to find an eSpeak language for '{langWithLocale}'") eSpeakLang = None return LangChangeCommand(eSpeakLang) def _handleLangChangeCommand( self, langChangeCommand: LangChangeCommand, langChanged: bool, ) -> str: """Get language xml tags needed to handle a lang change command. - if a language change has already been handled for this speech, close the open voice tag. - if the language is supported by eSpeak, switch to that language. - otherwise, switch to the default synthesizer language. """ langChangeCommand = self._normalizeLangCommand(langChangeCommand) voiceChangeXML = "" if langChanged: # Close existing voice tag voiceChangeXML += "</voice>" if langChangeCommand.lang is not None: # Open new voice tag using eSpeak compatible language voiceChangeXML += f'<voice xml:lang="{langChangeCommand.lang}">' else: # Open new voice tag using default voice voiceChangeXML += "<voice>" return voiceChangeXML # C901 'speak' is too complex # Note: when working on speak, look for opportunities to simplify # and move logic out into smaller helper functions. def speak(self, speechSequence: SpeechSequence): # noqa: C901 textList: List[str] = [] langChanged = False prosody: Dict[str, int] = {} # We output malformed XML, as we might close an outer tag after opening an inner one; e.g. # <voice><prosody></voice></prosody>. # However, eSpeak doesn't seem to mind. for item in speechSequence: if isinstance(item, str): textList.append(self._processText(item)) elif isinstance(item, IndexCommand): textList.append("<mark name=\"%d\" />" % item.index) elif isinstance(item, CharacterModeCommand): textList.append("<say-as interpret-as=\"characters\">" if item. state else "</say-as>") elif isinstance(item, LangChangeCommand): langChangeXML = self._handleLangChangeCommand( item, langChanged) textList.append(langChangeXML) langChanged = True elif isinstance(item, BreakCommand): # Break commands are ignored at the start of speech unless strength is specified. # Refer to eSpeak issue: https://github.com/espeak-ng/espeak-ng/issues/1232 textList.append(f'<break time="{item.time}ms" strength="1" />') elif type(item) in self.PROSODY_ATTRS: if prosody: # Close previous prosody tag. textList.append("</prosody>") attr = self.PROSODY_ATTRS[type(item)] if item.multiplier == 1: # Returning to normal. try: del prosody[attr] except KeyError: pass else: prosody[attr] = int(item.multiplier * 100) if not prosody: continue textList.append("<prosody") for attr, val in prosody.items(): textList.append(' %s="%d%%"' % (attr, val)) textList.append(">") elif isinstance(item, PhonemeCommand): # We can't use str.translate because we want to reject unknown characters. try: phonemes = "".join( [self.IPA_TO_ESPEAK[char] for char in item.ipa]) # There needs to be a space after the phoneme command. # Otherwise, eSpeak will announce a subsequent SSML tag instead of processing it. textList.append(u"[[%s]] " % phonemes) except KeyError: log.debugWarning("Unknown character in IPA string: %s" % item.ipa) if item.text: textList.append(self._processText(item.text)) else: log.error("Unknown speech: %s" % item) # Close any open tags. if langChanged: textList.append("</voice>") if prosody: textList.append("</prosody>") text = u"".join(textList) _espeak.speak(text) def cancel(self): _espeak.stop() def pause(self, switch): _espeak.pause(switch) _rateBoost = False RATE_BOOST_MULTIPLIER = 3 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable == self._rateBoost: return rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val = _espeak.getParameter(_espeak.espeakRATE, 1) if self._rateBoost: val = int(val / self.RATE_BOOST_MULTIPLIER) return self._paramToPercent(val, _espeak.minRate, _espeak.maxRate) def _set_rate(self, rate): val = self._percentToParam(rate, _espeak.minRate, _espeak.maxRate) if self._rateBoost: val = int(val * self.RATE_BOOST_MULTIPLIER) _espeak.setParameter(_espeak.espeakRATE, val, 0) def _get_pitch(self): val = _espeak.getParameter(_espeak.espeakPITCH, 1) return self._paramToPercent(val, _espeak.minPitch, _espeak.maxPitch) def _set_pitch(self, pitch): val = self._percentToParam(pitch, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakPITCH, val, 0) def _get_inflection(self): val = _espeak.getParameter(_espeak.espeakRANGE, 1) return self._paramToPercent(val, _espeak.minPitch, _espeak.maxPitch) def _set_inflection(self, val): val = self._percentToParam(val, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakRANGE, val, 0) def _get_volume(self): return _espeak.getParameter(_espeak.espeakVOLUME, 1) def _set_volume(self, volume): _espeak.setParameter(_espeak.espeakVOLUME, volume, 0) def _getAvailableVoices(self): voices = OrderedDict() for v in _espeak.getVoiceList(): l = _espeak.decodeEspeakString(v.languages[1:]) # #7167: Some languages names contain unicode characters EG: Norwegian Bokmål name = _espeak.decodeEspeakString(v.name) # #5783: For backwards compatibility, voice identifies should always be lowercase identifier = os.path.basename( _espeak.decodeEspeakString(v.identifier)).lower() voices[identifier] = VoiceInfo(identifier, name, l) return voices def _get_voice(self): curVoice = getattr(self, '_voice', None) if curVoice: return curVoice curVoice = _espeak.getCurrentVoice() if not curVoice: return "" # #5783: For backwards compatibility, voice identifies should always be lowercase return _espeak.decodeEspeakString( curVoice.identifier).split('+')[0].lower() def _set_voice(self, identifier): if not identifier: return # #5783: For backwards compatibility, voice identifies should always be lowercase identifier = identifier.lower() if "\\" in identifier: identifier = os.path.basename(identifier) self._voice = identifier try: _espeak.setVoiceAndVariant(voice=identifier, variant=self._variant) except: self._voice = None raise self._language = super(SynthDriver, self).language def _onIndexReached(self, index): if index is not None: synthIndexReached.notify(synth=self, index=index) else: synthDoneSpeaking.notify(synth=self) def terminate(self): _espeak.terminate() def _get_variant(self): return self._variant def _set_variant(self, val): self._variant = val if val in self._variantDict else "max" _espeak.setVoiceAndVariant(variant=self._variant) def _getAvailableVariants(self): return OrderedDict((ID, VoiceInfo(ID, name)) for ID, name in self._variantDict.items())
class SynthDriver(SynthDriver): name = "espeak" description = "eSpeak" supportedSettings = ( SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), # Translators: This is the name of the rate boost voice toggle # which further increases the speaking rate when enabled. BooleanSynthSetting("rateBoost", _("Rate boos&t")), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), ) @classmethod def check(cls): return True def __init__(self): _espeak.initialize() log.info("Using eSpeak version %s" % _espeak.info()) lang = languageHandler.getLanguage() _espeak.setVoiceByLanguage(lang) self._language = lang self._variantDict = _espeak.getVariantDict() self.variant = "max" self.rate = 30 self.pitch = 40 self.inflection = 75 def _get_language(self): return self._language def speak(self, speechSequence): defaultLanguage = self._language textList = [] langChanged = False for item in speechSequence: if isinstance(item, basestring): s = unicode(item) # Replace \01, as this is used for embedded commands. #Also replace < and > as espeak handles xml s.translate({ ord(u'\01'): None, ord(u'<'): u'<', ord(u'>'): u'>' }) textList.append(s) elif isinstance(item, speech.IndexCommand): textList.append("<mark name=\"%d\" />" % item.index) elif isinstance(item, speech.CharacterModeCommand): textList.append("<say-as interpret-as=\"characters\">" if item. state else "</say-as>") elif isinstance(item, speech.LangChangeCommand): if langChanged: textList.append("</voice>") textList.append( "<voice xml:lang=\"%s\">" % (item.lang if item.lang else defaultLanguage).replace( '_', '-')) langChanged = True elif isinstance(item, speech.SpeechCommand): log.debugWarning("Unsupported speech command: %s" % item) else: log.error("Unknown speech: %s" % item) if langChanged: textList.append("</voice>") text = u"".join(textList) _espeak.speak(text) def cancel(self): _espeak.stop() def pause(self, switch): _espeak.pause(switch) _rateBoost = False RATE_BOOST_MULTIPLIER = 3 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable == self._rateBoost: return rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val = _espeak.getParameter(_espeak.espeakRATE, 1) if self._rateBoost: val = int(val / self.RATE_BOOST_MULTIPLIER) return self._paramToPercent(val, _espeak.minRate, _espeak.maxRate) def _set_rate(self, rate): val = self._percentToParam(rate, _espeak.minRate, _espeak.maxRate) if self._rateBoost: val = int(val * self.RATE_BOOST_MULTIPLIER) _espeak.setParameter(_espeak.espeakRATE, val, 0) def _get_pitch(self): val = _espeak.getParameter(_espeak.espeakPITCH, 1) return self._paramToPercent(val, _espeak.minPitch, _espeak.maxPitch) def _set_pitch(self, pitch): val = self._percentToParam(pitch, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakPITCH, val, 0) def _get_inflection(self): val = _espeak.getParameter(_espeak.espeakRANGE, 1) return self._paramToPercent(val, _espeak.minPitch, _espeak.maxPitch) def _set_inflection(self, val): val = self._percentToParam(val, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakRANGE, val, 0) def _get_volume(self): return _espeak.getParameter(_espeak.espeakVOLUME, 1) def _set_volume(self, volume): _espeak.setParameter(_espeak.espeakVOLUME, volume, 0) def _getAvailableVoices(self): voices = OrderedDict() for v in _espeak.getVoiceList(): l = v.languages[1:] identifier = os.path.basename(v.identifier) voices[identifier] = VoiceInfo(identifier, v.name, l) return voices def _get_voice(self): curVoice = getattr(self, '_voice', None) if curVoice: return curVoice curVoice = _espeak.getCurrentVoice() if not curVoice: return "" return curVoice.identifier.split('+')[0] def _set_voice(self, identifier): if not identifier: return if "\\" in identifier: identifier = os.path.basename(identifier) self._voice = identifier try: _espeak.setVoiceAndVariant(voice=identifier, variant=self._variant) except: self._voice = None raise self._language = super(SynthDriver, self).language def _get_lastIndex(self): return _espeak.lastIndex def terminate(self): _espeak.terminate() def _get_variant(self): return self._variant def _set_variant(self, val): self._variant = val if val in self._variantDict else "max" _espeak.setVoiceAndVariant(variant=self._variant) def _getAvailableVariants(self): return OrderedDict((ID, VoiceInfo(ID, name)) for ID, name in self._variantDict.iteritems())