class SynthDriver(synthDriverHandler.SynthDriver): supportedSettings=(SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), BooleanDriverSetting("rateBoost", _("Rate boos&t"), True), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), NumericDriverSetting("hsz", _("Head size"), False), NumericDriverSetting("rgh", _("Roughness"), False), NumericDriverSetting("bth", _("Breathiness"), False), BooleanDriverSetting("backquoteVoiceTags", _("Enable backquote voice &tags"), False), BooleanDriverSetting("ABRDICT", _("Enable &abbreviation dictionary"), False), BooleanDriverSetting("phrasePrediction", _("Enable phrase prediction"), False), BooleanDriverSetting("shortpause", _("&Shorten pauses"), False), BooleanDriverSetting("sendParams", _("Always Send Current Speech Settings (enable to prevent some tags from sticking, disable for viavoice binary compatibility)"), False)) supportedCommands = { IndexCommand, CharacterModeCommand, LangChangeCommand, BreakCommand, PitchCommand, RateCommand, VolumeCommand } supportedNotifications = {synthIndexReached, synthDoneSpeaking} description='IBMTTS' name='ibmeci' speakingLanguage="" @classmethod def check(cls): return _ibmeci.eciCheck() def __init__(self): _ibmeci.initialize(self._onIndexReached, self._onDoneSpeaking) # This information doesn't really need to be displayed, and makes IBMTTS unusable if the addon is not in the same drive as NVDA executable. # But display it only on debug mode in case of it can be useful log.debug("Using IBMTTS version %s" % _ibmeci.eciVersion()) lang = languageHandler.getLanguage() self.rate=50 self.speakingLanguage=lang self.variant="1" self.currentEncoding = "mbcs" PROSODY_ATTRS = { PitchCommand: ECIVoiceParam.eciPitchBaseline, VolumeCommand: ECIVoiceParam.eciVolume, RateCommand: ECIVoiceParam.eciSpeed, } def speak(self,speechSequence): last = None defaultLanguage=self.language outlist = [] charmode=False for item in speechSequence: if isinstance(item, string_types): s = self.processText(item) outlist.append((_ibmeci.speak, (s,))) last = s elif isinstance(item,IndexCommand): outlist.append((_ibmeci.index, (item.index,))) elif isinstance(item,LangChangeCommand): l=None if item.lang in langsAnnotations: l = langsAnnotations[item.lang] elif item.lang and item.lang[0:2] in langsAnnotations: l = langsAnnotations[item.lang[0:2]] if l: if item.lang != self.speakingLanguage and item.lang != self.speakingLanguage[0:2]: outlist.append((_ibmeci.speak, (l,))) self.speakingLanguage=item.lang self.updateEncoding(l) else: outlist.append((_ibmeci.speak, (langsAnnotations[defaultLanguage],))) self.speakingLanguage = defaultLanguage elif isinstance(item,CharacterModeCommand): outlist.append((_ibmeci.speak, (b"`ts1" if item.state else b"`ts0",))) if item.state: charmode=True elif isinstance(item,BreakCommand): # taken from eloquence_threshold (https://github.com/pumper42nickel/eloquence_threshold) # Eloquence doesn't respect delay time in milliseconds. # Therefore we need to adjust waiting time depending on current speech rate # The following table of adjustments has been measured empirically # Then we do linear approximation coefficients = { 10:1, 43:2, 60:3, 75:4, 85:5, } ck = sorted(coefficients.keys()) if self.rate <= ck[0]: factor = coefficients[ck[0]] elif self.rate >= ck[-1]: factor = coefficients[ck[-1]] elif self.rate in ck: factor = coefficients[self.rate] else: li = [index for index, r in enumerate(ck) if r<self.rate][-1] ri = li + 1 ra = ck[li] rb = ck[ri] factor = 1.0 * coefficients[ra] + (coefficients[rb] - coefficients[ra]) * (self.rate - ra) / (rb-ra) pFactor = factor*item.time pFactor = int(pFactor) outlist.append((_ibmeci.speak, (b' `p%d '%(pFactor),))) elif type(item) in self.PROSODY_ATTRS: val = max(0, min(item.newValue, 100)) if type(item) == RateCommand: val = self.percentToRate(val) outlist.append((_ibmeci.setProsodyParam, (self.PROSODY_ATTRS[type(item)], val))) else: log.error("Unknown speech: %s"%item) if last is not None and last[-1] not in punctuation: # check if a pitch command is at the end of the list, because p1 need to be send before this. # index -2 is because -1 always seem to be an index command. if outlist[-2][0] == _ibmeci.setProsodyParam: outlist.insert(-2, (_ibmeci.speak, (b'`p1 ',))) else: outlist.append((_ibmeci.speak, (b'`p1 ',))) if charmode: outlist.append((_ibmeci.speak, (b"`ts0",))) outlist.append((_ibmeci.setEndStringMark, ())) outlist.append((_ibmeci.synth, ())) _ibmeci.eciQueue.put(outlist) _ibmeci.process() def processText(self,text): #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. text = text.encode(self.currentEncoding, 'replace') # special unicode symbols may encode to backquote. For this reason, backquote processing is after this. text = text.rstrip() if _ibmeci.params[9] in (65536, 65537, 393216, 655360, 720897): text = resub(english_fixes, text) #Applies to all languages with dual language support. if _ibmeci.params[9] in (65536, 65537, 393216, 655360, 720897) and _ibmeci.isIBM: text = resub(english_ibm_fixes, text) if _ibmeci.params[9] in (131072, 131073) and not _ibmeci.isIBM: text = resub(spanish_fixes, text) if _ibmeci.params[9] in ('esp', 131072) and _ibmeci.isIBM: text = resub(spanish_ibm_fixes, text) if _ibmeci.params[9] in (196609, 196608): text = text.replace(br'quil', br'qil') #Sometimes this string make everything buggy with IBMTTS in French if _ibmeci.params[9] in ('deu', 262144): text = resub(german_fixes, text) if _ibmeci.params[9] in ('ptb', 458752) and _ibmeci.isIBM: text = resub(portuguese_ibm_fixes, text) if not self._backquoteVoiceTags: text=text.replace(b'`', b' ') # no embedded commands if self._shortpause: text = pause_re.sub(br'\1 `p1\2\3\4', text) # this enforces short, JAWS-like pauses. if not _ibmeci.isIBM: text = time_re.sub(br'\1:\2 \3', text) # apparently if this isn't done strings like 2:30:15 will only announce 2:30 embeds=b'' if self._ABRDICT: embeds+=b"`da1 " else: embeds+=b"`da0 " if self._phrasePrediction: embeds+=b"`pp1 " else: embeds+=b"`pp0 " if self._sendParams: embeds+=b"`vv%d `vs%d " % (_ibmeci.getVParam(ECIVoiceParam.eciVolume), _ibmeci.getVParam(ECIVoiceParam.eciSpeed)) text = b"%s %s" % (embeds.rstrip(), text) # bring all the printf stuff into one call, in one string. This avoids all the concatonation and printf additions of the previous organization. return text def pause(self,switch): _ibmeci.pause(switch) def terminate(self): _ibmeci.terminate() _backquoteVoiceTags=False _ABRDICT=False _phrasePrediction=False _shortpause=False _sendParams=True def _get_backquoteVoiceTags(self): return self._backquoteVoiceTags def _set_backquoteVoiceTags(self, enable): if enable == self._backquoteVoiceTags: return self._backquoteVoiceTags = enable def _get_ABRDICT(self): return self._ABRDICT def _set_ABRDICT(self, enable): if enable == self._ABRDICT: return self._ABRDICT = enable def _get_phrasePrediction(self): return self._phrasePrediction def _set_phrasePrediction(self, enable): if enable == self._phrasePrediction: return self._phrasePrediction = enable def _get_shortpause(self): return self._shortpause def _set_shortpause(self, enable): if enable == self._shortpause: return self._shortpause = enable def _get_sendParams(self): return self._sendParams def _set_sendParams(self, enable): if enable == self._sendParams: return self._sendParams = enable _rateBoost = False RATE_BOOST_MULTIPLIER = 1.6 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable != self._rateBoost: rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val = _ibmeci.getVParam(ECIVoiceParam.eciSpeed) if self._rateBoost: val=int(round(val/self.RATE_BOOST_MULTIPLIER)) return self._paramToPercent(val, minRate, maxRate) def percentToRate(self, val): val = self._percentToParam(val, minRate, maxRate) if self._rateBoost: val = int(round(val *self.RATE_BOOST_MULTIPLIER)) return val def _set_rate(self,val): val = self.percentToRate(val) self._rate = val _ibmeci.setVParam(ECIVoiceParam.eciSpeed, val) def _get_pitch(self): return _ibmeci.getVParam(ECIVoiceParam.eciPitchBaseline) def _set_pitch(self,vl): _ibmeci.setVParam(ECIVoiceParam.eciPitchBaseline,vl) def _get_volume(self): return _ibmeci.getVParam(ECIVoiceParam.eciVolume) def _set_volume(self,vl): _ibmeci.setVParam(ECIVoiceParam.eciVolume,int(vl)) def _set_inflection(self,vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciPitchFluctuation,vl) def _get_inflection(self): return _ibmeci.getVParam(ECIVoiceParam.eciPitchFluctuation) def _set_hsz(self,vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciHeadSize,vl) def _get_hsz(self): return _ibmeci.getVParam(ECIVoiceParam.eciHeadSize) def _set_rgh(self,vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciRoughness,vl) def _get_rgh(self): return _ibmeci.getVParam(ECIVoiceParam.eciRoughness) def _set_bth(self,vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciBreathiness,vl) def _get_bth(self): return _ibmeci.getVParam(ECIVoiceParam.eciBreathiness) def _getAvailableVoices(self): o = OrderedDict() for name in os.listdir(_ibmeci.ttsPath): if name.lower().endswith('.syn'): info = _ibmeci.langs[name.lower()[:3]] o[str(info[0])] = VoiceInfo(str(info[0]), info[1], info[2]) return o def _get_voice(self): return str(_ibmeci.params[_ibmeci.ECIParam.eciLanguageDialect]) def _set_voice(self,vl): _ibmeci.setVoice(int(vl)) self.updateEncoding(int(vl)) def updateEncoding(self, lang): # lang must be a number asociated with IBMTTS languages or a string with an annotation language. # currently we don't need to consider the decimal part for the conversion. if isinstance(lang, bytes): lang = int(float(lang[2:])) * 65536 #chinese if lang == 393216: self.currentEncoding = "gb2312" # japan elif lang == 524288: self.currentEncoding = "cp932" # korean elif lang == 655360: self.currentEncoding = "cp949" elif lang == 720897: self.currentEncoding = "big5" else: self.currentEncoding = "mbcs" def _get_lastIndex(self): #fix? return _ibmeci.lastindex def cancel(self): _ibmeci.stop() def _getAvailableVariants(self): global variants return OrderedDict((str(id), synthDriverHandler.VoiceInfo(str(id), name)) for id, name in variants.items()) def _set_variant(self, v): global variants self._variant = v if int(v) in variants else "1" _ibmeci.setVariant(int(v)) _ibmeci.setVParam(ECIVoiceParam.eciSpeed, self._rate) #if 'ibmtts' in config.conf['speech']: #config.conf['speech']['ibmtts']['pitch'] = self.pitch def _get_variant(self): return self._variant def _onIndexReached(self, index): synthIndexReached.notify(synth=self, index=index) def _onDoneSpeaking(self): synthDoneSpeaking.notify(synth=self)
class SynthDriver(synthDriverHandler.SynthDriver): supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), BooleanDriverSetting("rateBoost", _("Rate boos&t"), True), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), NumericDriverSetting("hsz", _("Head Size"), False), NumericDriverSetting("rgh", _("Roughness"), False), NumericDriverSetting("bth", _("Breathiness"), False), BooleanDriverSetting( "backquoteVoiceTags", _("Enable backquote voice &tags"), False)) supportedCommands = { speech.IndexCommand, speech.CharacterModeCommand, speech.LangChangeCommand, speech.BreakCommand, speech.PitchCommand, speech.RateCommand, speech.VolumeCommand } supportedNotifications = {synthIndexReached, synthDoneSpeaking} description = 'IBMTTS' name = 'ibmeci' speakingLanguage = "" @classmethod def check(cls): return _ibmeci.eciCheck() def __init__(self): _ibmeci.initialize(self._onIndexReached, self._onDoneSpeaking) # This information doesn't really need to be displayed, and makes IBMTTS unusable if the addon is not in the same drive as NVDA executable. # But display it only on debug mode in case of it can be useful log.debug("Using IBMTTS version %s" % _ibmeci.eciVersion()) lang = languageHandler.getLanguage() self.rate = 50 self.speakingLanguage = lang self.variant = "1" PROSODY_ATTRS = { speech.PitchCommand: ECIVoiceParam.eciPitchBaseline, speech.VolumeCommand: ECIVoiceParam.eciVolume, speech.RateCommand: ECIVoiceParam.eciSpeed, } def speak(self, speechSequence): last = None defaultLanguage = self.language outlist = [] outlist.append((_ibmeci.speak, (b"`ts0", ))) for item in speechSequence: if isinstance(item, string_types): s = self.processText(unicode(item)) outlist.append((_ibmeci.speak, (s, ))) last = s elif isinstance(item, speech.IndexCommand): outlist.append((_ibmeci.index, (item.index, ))) elif isinstance(item, speech.LangChangeCommand): l = None if item.lang in langsAnnotations: l = langsAnnotations[item.lang] elif item.lang and item.lang[0:2] in langsAnnotations: l = langsAnnotations[item.lang[0:2]] if l: if item.lang != self.speakingLanguage and item.lang != self.speakingLanguage[ 0:2]: outlist.append((_ibmeci.speak, (l, ))) self.speakingLanguage = item.lang else: outlist.append( (_ibmeci.speak, (langsAnnotations[defaultLanguage], ))) self.speakingLanguage = defaultLanguage elif isinstance(item, speech.CharacterModeCommand): outlist.append( (_ibmeci.speak, (b"`ts1" if item.state else b"`ts0", ))) elif isinstance(item, speech.BreakCommand): outlist.append((_ibmeci.speak, (b' `p%d ' % item.time, ))) elif type(item) in self.PROSODY_ATTRS: val = max(0, min(item.newValue, 100)) if type(item) == speech.RateCommand: val = self.percentToRate(val) outlist.append((_ibmeci.setProsodyParam, (self.PROSODY_ATTRS[type(item)], val))) else: log.error("Unknown speech: %s" % item) if last is not None and last[-1] not in punctuation: # check if a pitch command is at the end of the list, because p1 need to be send before this. # index -2 is because -1 always seem to be an index command. if outlist[-2][0] == _ibmeci.setProsodyParam: outlist.insert(-2, (_ibmeci.speak, (b'`p1. ', ))) else: outlist.append((_ibmeci.speak, (b'`p1. ', ))) outlist.append((_ibmeci.setEndStringMark, ())) outlist.append((_ibmeci.synth, ())) #print(outlist) _ibmeci.eciQueue.put(outlist) _ibmeci.process() def processText(self, text): text = text.rstrip() if _ibmeci.params[9] in (65536, 65537): text = resub(english_fixes, text) if _ibmeci.params[9] in (131072, 131073): text = resub(spanish_fixes, text) if _ibmeci.params[9] in (196609, 196608): text = resub(french_fixes, text) text = text.replace( 'quil', 'qil' ) #Sometimes this string make everything buggy with IBMTTS in French if self._backquoteVoiceTags: #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. text = text.replace('`', ' ').encode('mbcs', 'replace') #no embedded commands text = b"`pp0 `vv%d %s" % (_ibmeci.getVParam( ECIVoiceParam.eciVolume), text) text = resub(anticrash_res, text) else: #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. text = text.encode('mbcs', 'replace') text = resub(anticrash_res, text) text = b"`pp0 `vv%d %s" % (_ibmeci.getVParam( ECIVoiceParam.eciVolume), text.replace(b'`', b' ') ) #no embedded commands text = pause_re.sub(br'\1 `p1\2\3', text) text = time_re.sub(br'\1:\2 \3', text) return text def pause(self, switch): _ibmeci.pause(switch) def terminate(self): _ibmeci.terminate() _backquoteVoiceTags = False def _get_backquoteVoiceTags(self): return self._backquoteVoiceTags def _set_backquoteVoiceTags(self, enable): if enable == self._backquoteVoiceTags: return self._backquoteVoiceTags = enable _rateBoost = False RATE_BOOST_MULTIPLIER = 1.6 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable != self._rateBoost: rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val = _ibmeci.getVParam(ECIVoiceParam.eciSpeed) if self._rateBoost: val = int(round(val / self.RATE_BOOST_MULTIPLIER)) return self._paramToPercent(val, minRate, maxRate) def percentToRate(self, val): val = self._percentToParam(val, minRate, maxRate) if self._rateBoost: val = int(round(val * self.RATE_BOOST_MULTIPLIER)) return val def _set_rate(self, val): val = self.percentToRate(val) self._rate = val _ibmeci.setVParam(ECIVoiceParam.eciSpeed, val) def _get_pitch(self): return _ibmeci.getVParam(ECIVoiceParam.eciPitchBaseline) def _set_pitch(self, vl): _ibmeci.setVParam(ECIVoiceParam.eciPitchBaseline, vl) def _get_volume(self): return _ibmeci.getVParam(ECIVoiceParam.eciVolume) def _set_volume(self, vl): _ibmeci.setVParam(ECIVoiceParam.eciVolume, int(vl)) def _set_inflection(self, vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciPitchFluctuation, vl) def _get_inflection(self): return _ibmeci.getVParam(ECIVoiceParam.eciPitchFluctuation) def _set_hsz(self, vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciHeadSize, vl) def _get_hsz(self): return _ibmeci.getVParam(ECIVoiceParam.eciHeadSize) def _set_rgh(self, vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciRoughness, vl) def _get_rgh(self): return _ibmeci.getVParam(ECIVoiceParam.eciRoughness) def _set_bth(self, vl): vl = int(vl) _ibmeci.setVParam(ECIVoiceParam.eciBreathiness, vl) def _get_bth(self): return _ibmeci.getVParam(ECIVoiceParam.eciBreathiness) def _getAvailableVoices(self): o = OrderedDict() for name in os.listdir(_ibmeci.ttsPath): if name.lower().endswith('.syn'): info = _ibmeci.langs[name.lower()[:3]] o[str(info[0])] = VoiceInfo(str(info[0]), info[1], info[2]) return o def _get_voice(self): return str(_ibmeci.params[_ibmeci.ECIParam.eciLanguageDialect]) def _set_voice(self, vl): _ibmeci.set_voice(vl) def _get_lastIndex(self): #fix? return _ibmeci.lastindex def cancel(self): _ibmeci.stop() def _getAvailableVariants(self): global variants return OrderedDict( (str(id), synthDriverHandler.VoiceInfo(str(id), name)) for id, name in variants.items()) def _set_variant(self, v): global variants self._variant = v if int(v) in variants else "1" _ibmeci.setVariant(int(v)) _ibmeci.setVParam(ECIVoiceParam.eciSpeed, self._rate) #if 'ibmtts' in config.conf['speech']: #config.conf['speech']['ibmtts']['pitch'] = self.pitch def _get_variant(self): return self._variant def _onIndexReached(self, index): synthIndexReached.notify(synth=self, index=index) def _onDoneSpeaking(self): synthDoneSpeaking.notify(synth=self)
class SynthDriver(SynthDriver): name = "espeak" description = "eSpeak NG" supportedSettings=( SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), # Translators: This is the name of the rate boost voice toggle # which further increases the speaking rate when enabled. BooleanSynthSetting("rateBoost",_("Rate boos&t")), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), ) @classmethod def check(cls): return True def __init__(self): _espeak.initialize() log.info("Using eSpeak version %s" % _espeak.info()) lang=languageHandler.getLanguage() _espeak.setVoiceByLanguage(lang) self._language=lang self._variantDict=_espeak.getVariantDict() self.variant="max" self.rate=30 self.pitch=40 self.inflection=75 def _get_language(self): return self._language PROSODY_ATTRS = { speech.PitchCommand: "pitch", speech.VolumeCommand: "volume", speech.RateCommand: "rate", } IPA_TO_ESPEAK = { u"θ": u"T", u"s": u"s", u"ˈ": u"'", } def _processText(self, text): text = unicode(text) # We need to make several replacements. return text.translate({ 0x1: None, # used for embedded commands 0x3C: u"<", # <: because of XML 0x3E: u">", # >: because of XML 0x5B: u" [", # [: [[ indicates phonemes }) def speak(self,speechSequence): defaultLanguage=self._language textList=[] langChanged=False prosody={} # We output malformed XML, as we might close an outer tag after opening an inner one; e.g. # <voice><prosody></voice></prosody>. # However, eSpeak doesn't seem to mind. for item in speechSequence: if isinstance(item,basestring): textList.append(self._processText(item)) elif isinstance(item,speech.IndexCommand): textList.append("<mark name=\"%d\" />"%item.index) elif isinstance(item,speech.CharacterModeCommand): textList.append("<say-as interpret-as=\"characters\">" if item.state else "</say-as>") elif isinstance(item,speech.LangChangeCommand): if langChanged: textList.append("</voice>") textList.append("<voice xml:lang=\"%s\">"%(item.lang if item.lang else defaultLanguage).replace('_','-')) langChanged=True elif isinstance(item,speech.BreakCommand): textList.append('<break time="%dms" />' % item.time) elif type(item) in self.PROSODY_ATTRS: if prosody: # Close previous prosody tag. textList.append("</prosody>") attr=self.PROSODY_ATTRS[type(item)] if item.multiplier==1: # Returning to normal. try: del prosody[attr] except KeyError: pass else: prosody[attr]=int(item.multiplier* 100) if not prosody: continue textList.append("<prosody") for attr,val in prosody.iteritems(): textList.append(' %s="%d%%"'%(attr,val)) textList.append(">") elif isinstance(item,speech.PhonemeCommand): # We can't use unicode.translate because we want to reject unknown characters. try: phonemes="".join([self.IPA_TO_ESPEAK[char] for char in item.ipa]) # There needs to be a space after the phoneme command. # Otherwise, eSpeak will announce a subsequent SSML tag instead of processing it. textList.append(u"[[%s]] "%phonemes) except KeyError: log.debugWarning("Unknown character in IPA string: %s"%item.ipa) if item.text: textList.append(self._processText(item.text)) elif isinstance(item,speech.SpeechCommand): log.debugWarning("Unsupported speech command: %s"%item) else: log.error("Unknown speech: %s"%item) # Close any open tags. if langChanged: textList.append("</voice>") if prosody: textList.append("</prosody>") text=u"".join(textList) _espeak.speak(text) def cancel(self): _espeak.stop() def pause(self,switch): _espeak.pause(switch) _rateBoost = False RATE_BOOST_MULTIPLIER = 3 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable == self._rateBoost: return rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val=_espeak.getParameter(_espeak.espeakRATE,1) if self._rateBoost: val=int(val/self.RATE_BOOST_MULTIPLIER) return self._paramToPercent(val,_espeak.minRate,_espeak.maxRate) def _set_rate(self,rate): val=self._percentToParam(rate, _espeak.minRate, _espeak.maxRate) if self._rateBoost: val=int(val*self.RATE_BOOST_MULTIPLIER) _espeak.setParameter(_espeak.espeakRATE,val,0) def _get_pitch(self): val=_espeak.getParameter(_espeak.espeakPITCH,1) return self._paramToPercent(val,_espeak.minPitch,_espeak.maxPitch) def _set_pitch(self,pitch): val=self._percentToParam(pitch, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakPITCH,val,0) def _get_inflection(self): val=_espeak.getParameter(_espeak.espeakRANGE,1) return self._paramToPercent(val,_espeak.minPitch,_espeak.maxPitch) def _set_inflection(self,val): val=self._percentToParam(val, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakRANGE,val,0) def _get_volume(self): return _espeak.getParameter(_espeak.espeakVOLUME,1) def _set_volume(self,volume): _espeak.setParameter(_espeak.espeakVOLUME,volume,0) def _getAvailableVoices(self): voices=OrderedDict() for v in _espeak.getVoiceList(): l=v.languages[1:] # #5783: For backwards compatibility, voice identifies should always be lowercase identifier=os.path.basename(v.identifier).lower() voices[identifier]=VoiceInfo(identifier,v.name,l) return voices def _get_voice(self): curVoice=getattr(self,'_voice',None) if curVoice: return curVoice curVoice = _espeak.getCurrentVoice() if not curVoice: return "" # #5783: For backwards compatibility, voice identifies should always be lowercase return curVoice.identifier.split('+')[0].lower() def _set_voice(self, identifier): if not identifier: return # #5783: For backwards compatibility, voice identifies should always be lowercase identifier=identifier.lower() if "\\" in identifier: identifier=os.path.basename(identifier) self._voice=identifier try: _espeak.setVoiceAndVariant(voice=identifier,variant=self._variant) except: self._voice=None raise self._language=super(SynthDriver,self).language def _get_lastIndex(self): return _espeak.lastIndex def terminate(self): _espeak.terminate() def _get_variant(self): return self._variant def _set_variant(self,val): self._variant = val if val in self._variantDict else "max" _espeak.setVoiceAndVariant(variant=self._variant) def _getAvailableVariants(self): return OrderedDict((ID,VoiceInfo(ID, name)) for ID, name in self._variantDict.iteritems())
class SynthDriver(synthDriverHandler.SynthDriver): supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), driverHandler.NumericDriverSetting( "hsz", "Head Size"), driverHandler.NumericDriverSetting( "rgh", "Roughness"), driverHandler.NumericDriverSetting( "bth", "Breathiness"), driverHandler.BooleanDriverSetting( "backquoteVoiceTags", "Enable backquote voice &tags", True)) supportedCommands = { speech.IndexCommand, speech.CharacterModeCommand, speech.LangChangeCommand, speech.BreakCommand, speech.PitchCommand, speech.RateCommand, speech.VolumeCommand, speech.PhonemeCommand, } supportedNotifications = {synthIndexReached, synthDoneSpeaking} PROSODY_ATTRS = { speech.PitchCommand: _eloquence.pitch, speech.VolumeCommand: _eloquence.vlm, speech.RateCommand: _eloquence.rate, } description = 'ETI-Eloquence' name = 'eloquence' @classmethod def check(cls): return _eloquence.eciCheck() def __init__(self): _eloquence.initialize(self._onIndexReached) self.curvoice = "enu" self.rate = 50 self.variant = "1" def speak(self, speechSequence): last = None outlist = [] for item in speechSequence: if isinstance(item, str): s = str(item) s = self.xspeakText(s) outlist.append((_eloquence.speak, (s, ))) last = s elif isinstance(item, speech.IndexCommand): outlist.append((_eloquence.index, (item.index, ))) elif isinstance(item, speech.BreakCommand): # Eloquence doesn't respect delay time in milliseconds. # Therefor we need to adjust waiting time depending on curernt speech rate # The following table of adjustments has been measured empirically # Then we do linear approximation coefficients = { 10: 1, 43: 2, 60: 3, 75: 4, 85: 5, } ck = sorted(coefficients.keys()) if self.rate <= ck[0]: factor = coefficients[ck[0]] elif self.rate >= ck[-1]: factor = coefficients[ck[-1]] elif self.rate in ck: factor = coefficients[self.rate] else: li = [ index for index, r in enumerate(ck) if r < self.rate ][-1] ri = li + 1 ra = ck[li] rb = ck[ri] factor = 1.0 * coefficients[ra] + ( coefficients[rb] - coefficients[ra]) * (self.rate - ra) / (rb - ra) pFactor = factor * item.time pFactor = int(pFactor) outlist.append((_eloquence.speak, (f'`p{pFactor}.', ))) elif type(item) in self.PROSODY_ATTRS: pr = self.PROSODY_ATTRS[type(item)] if item.multiplier == 1: # Revert back to defaults outlist.append((_eloquence.cmdProsody, ( pr, None, ))) else: outlist.append((_eloquence.cmdProsody, ( pr, item.multiplier, ))) if last is not None and not last.rstrip()[-1] in punctuation: outlist.append((_eloquence.speak, ('`p1.', ))) outlist.append((_eloquence.index, (0xffff, ))) outlist.append((_eloquence.synth, ())) _eloquence.synth_queue.put(outlist) _eloquence.process() def xspeakText(self, text, should_pause=False): if _eloquence.params[9] == 65536 or _eloquence.params[9] == 65537: text = resub(english_fixes, text) if _eloquence.params[9] == 131072 or _eloquence.params[9] == 131073: text = resub(spanish_fixes, text) if _eloquence.params[9] in (196609, 196608): text = resub(french_fixes, text) #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. #text = text.encode('mbcs') text = normalizeText(text) text = resub(anticrash_res, text) if not self._backquoteVoiceTags: text = text.replace('`', ' ') text = "`pp0 `vv%d %s" % (self.getVParam(_eloquence.vlm), text ) #no embedded commands text = pause_re.sub(r'\1 `p1\2\3', text) text = time_re.sub(r'\1:\2 \3', text) #if two strings are sent separately, pause between them. This might fix some of the audio issues we're having. if should_pause: text = text + ' `p1.' return text # _eloquence.speak(text, index) # def cancel(self): # self.dll.eciStop(self.handle) def pause(self, switch): _eloquence.pause(switch) # self.dll.eciPause(self.handle,switch) def terminate(self): _eloquence.terminate() _backquoteVoiceTags = False def _get_backquoteVoiceTags(self): return self._backquoteVoiceTags def _set_backquoteVoiceTags(self, enable): if enable == self._backquoteVoiceTags: return self._backquoteVoiceTags = enable def _get_rate(self): return self._paramToPercent(self.getVParam(_eloquence.rate), minRate, maxRate) def _set_rate(self, vl): self._rate = self._percentToParam(vl, minRate, maxRate) self.setVParam(_eloquence.rate, self._percentToParam(vl, minRate, maxRate)) def _get_pitch(self): return self.getVParam(_eloquence.pitch) def _set_pitch(self, vl): self.setVParam(_eloquence.pitch, vl) def _get_volume(self): return self.getVParam(_eloquence.vlm) def _set_volume(self, vl): self.setVParam(_eloquence.vlm, int(vl)) def _set_inflection(self, vl): vl = int(vl) self.setVParam(_eloquence.fluctuation, vl) def _get_inflection(self): return self.getVParam(_eloquence.fluctuation) def _set_hsz(self, vl): vl = int(vl) self.setVParam(_eloquence.hsz, vl) def _get_hsz(self): return self.getVParam(_eloquence.hsz) def _set_rgh(self, vl): vl = int(vl) self.setVParam(_eloquence.rgh, vl) def _get_rgh(self): return self.getVParam(_eloquence.rgh) def _set_bth(self, vl): vl = int(vl) self.setVParam(_eloquence.bth, vl) def _get_bth(self): return self.getVParam(_eloquence.bth) def _getAvailableVoices(self): o = OrderedDict() for name in os.listdir(_eloquence.eciPath[:-8]): if not name.lower().endswith('.syn'): continue info = _eloquence.langs[name.lower()[:-4]] o[str(info[0])] = synthDriverHandler.VoiceInfo( str(info[0]), info[1], None) return o def _get_voice(self): return str(_eloquence.params[9]) def _set_voice(self, vl): _eloquence.set_voice(vl) self.curvoice = vl def getVParam(self, pr): return _eloquence.getVParam(pr) def setVParam(self, pr, vl): _eloquence.setVParam(pr, vl) def _get_lastIndex(self): #fix? return _eloquence.lastindex def cancel(self): _eloquence.stop() def _getAvailableVariants(self): global variants return OrderedDict( (str(id), synthDriverHandler.VoiceInfo(str(id), name)) for id, name in variants.items()) def _set_variant(self, v): global variants self._variant = v if int(v) in variants else "1" _eloquence.setVariant(int(v)) self.setVParam(_eloquence.rate, self._rate) # if 'eloquence' in config.conf['speech']: # config.conf['speech']['eloquence']['pitch'] = self.pitch def _get_variant(self): return self._variant def _onIndexReached(self, index): if index is not None: synthIndexReached.notify(synth=self, index=index) else: synthDoneSpeaking.notify(synth=self)
class SynthDriver(SynthDriver): name = "RHVoice" description = "RHVoice" supportedSettings = (SynthDriver.RateSetting(), SynthDriver.PitchSetting(), SynthDriver.VolumeSetting(), SynthDriver.VoiceSetting(), SynthDriver.VariantSetting()) @classmethod def check(cls): return os.path.isfile(lib_path) def __init__(self): self.__lib = ctypes.CDLL(lib_path.encode(sys.getfilesystemencoding())) self.__lib.RHVoice_initialize.argtypes = (c_char_p, RHVoice_callback, c_char_p, c_uint) self.__lib.RHVoice_initialize.restype = c_int self.__lib.RHVoice_new_message_utf16.argtypes = (c_wchar_p, c_int, c_int) self.__lib.RHVoice_new_message_utf16.restype = RHVoice_message self.__lib.RHVoice_delete_message.argtypes = (RHVoice_message, ) self.__lib.RHVoice_speak.argtypes = (RHVoice_message, ) self.__lib.RHVoice_get_min_rate.restype = c_float self.__lib.RHVoice_get_rate.restype = c_float self.__lib.RHVoice_get_max_rate.restype = c_float self.__lib.RHVoice_get_min_pitch.restype = c_float self.__lib.RHVoice_get_pitch.restype = c_float self.__lib.RHVoice_get_max_pitch.restype = c_float self.__lib.RHVoice_get_volume.restype = c_float self.__lib.RHVoice_get_max_volume.restype = c_float self.__lib.RHVoice_get_voice_count.restype = c_int self.__lib.RHVoice_get_variant_count.restype = c_int self.__lib.RHVoice_get_voice_name.argtypes = (c_int, ) self.__lib.RHVoice_get_voice_name.restype = c_char_p self.__lib.RHVoice_get_variant_name.argtypes = (c_int, ) self.__lib.RHVoice_get_variant_name.restype = c_char_p self.__lib.RHVoice_find_voice.argtypes = (c_char_p, ) self.__lib.RHVoice_find_voice.restype = c_int self.__lib.RHVoice_find_variant.argtypes = (c_char_p, ) self.__lib.RHVoice_find_variant.restype = c_int self.__lib.RHVoice_get_voice.restype = c_int self.__lib.RHVoice_get_variant.restype = c_int self.__lib.RHVoice_set_voice.argtypes = (c_int, ) self.__lib.RHVoice_set_variant.argtypes = (c_int, ) self.__lib.RHVoice_get_version.restype = c_char_p self.__silence_flag = threading.Event() self.__audio_callback = AudioCallback(self.__lib, self.__silence_flag) self.__audio_callback_wrapper = RHVoice_callback(self.__audio_callback) sample_rate = self.__lib.RHVoice_initialize( data_path.encode("UTF-8"), self.__audio_callback_wrapper, cfg_path.encode("UTF-8"), 0) if sample_rate == 0: raise RuntimeError("RHVoice: initialization error") voice_count = self.__lib.RHVoice_get_voice_count() if voice_count == 0: raise RuntimeError("RHVoice: initialization error") self.__player = nvwave.WavePlayer( channels=1, samplesPerSec=sample_rate, bitsPerSample=16, outputDevice=config.conf["speech"]["outputDevice"]) self.__audio_callback.set_player(self.__player) self.__tts_queue = Queue.Queue() self.__tts_thread = TTSThread(self.__lib, self.__tts_queue, self.__player, self.__silence_flag) self._availableVoices = OrderedDict() for id in range(1, voice_count + 1): name = self.__lib.RHVoice_get_voice_name(id) self._availableVoices[name] = VoiceInfo(name, name, "ru") self.__lib.RHVoice_set_voice(1) self.__voice = self.__lib.RHVoice_get_voice_name(1) variant_count = self.__lib.RHVoice_get_variant_count() self._availableVariants = OrderedDict() for id in range(1, variant_count + 1): name = self.__lib.RHVoice_get_variant_name(id) self._availableVariants[name] = VoiceInfo(name, name, "ru") self.__lib.RHVoice_set_variant(1) self.__variant = self.__lib.RHVoice_get_variant_name(1) self.__rate = 50 self.__pitch = 50 self.__volume = 50 self.__native_rate_range = (self.__lib.RHVoice_get_min_rate(), self.__lib.RHVoice_get_max_rate(), self.__lib.RHVoice_get_rate()) self.__native_pitch_range = (self.__lib.RHVoice_get_min_pitch(), self.__lib.RHVoice_get_max_pitch(), self.__lib.RHVoice_get_pitch()) self.__native_volume_range = (0, self.__lib.RHVoice_get_max_volume(), self.__lib.RHVoice_get_volume()) self.__char_mapping = {} for c in range(9): self.__char_mapping[c] = 32 self.__char_mapping[11] = 32 self.__char_mapping[12] = 32 for c in range(14, 32): self.__char_mapping[c] = 32 self.__char_mapping[ord("<")] = u"<" self.__char_mapping[ord("&")] = u"&" self.__tts_thread.start() log.info("Using RHVoice version %s" % self.__lib.RHVoice_get_version()) def terminate(self): self.cancel() self.__tts_queue.put(None) self.__tts_thread.join() self.__player.close() self.__lib.RHVoice_terminate() def speak(self, speech_sequence): spell_mode = False text_list = [] for item in speech_sequence: if isinstance(item, basestring): s = unicode(item).translate(self.__char_mapping) text_list.append(( u'<say-as interpret-as="characters">%s</say-as>' % s) if spell_mode else s) elif isinstance(item, speech.IndexCommand): text_list.append('<mark name="%d"/>' % item.index) elif isinstance(item, speech.CharacterModeCommand): if item.state: spell_mode = True else: spell_mode = False elif isinstance(item, speech.SpeechCommand): log.debugWarning("Unsupported speech command: %s" % item) else: log.error("Unknown speech: %s" % item) text = u"".join(text_list) fmt_str = u'<speak><voice name="%s" variant="%d"><prosody rate="%f%%" pitch="%f%%" volume="%f%%">%s</prosody></voice></speak>' variant = self.__lib.RHVoice_find_variant(self.__variant) if variant == 0: variant = 1 rate = convert_to_native_percent(self.__rate, *self.__native_rate_range) pitch = convert_to_native_percent(self.__pitch, *self.__native_pitch_range) volume = convert_to_native_percent(self.__volume, *self.__native_volume_range) ssml = fmt_str % (self.__voice, variant, rate, pitch, volume, text) self.__tts_queue.put(ssml) def pause(self, switch): self.__player.pause(switch) def cancel(self): try: while True: self.__tts_queue.get_nowait() except Queue.Empty: self.__silence_flag.set() self.__tts_queue.put(()) self.__player.stop() def _get_lastIndex(self): return self.__audio_callback.index def _get_language(self): return "ru" def _get_rate(self): return self.__rate def _set_rate(self, rate): self.__rate = max(0, min(100, rate)) def _get_pitch(self): return self.__pitch def _set_pitch(self, pitch): self.__pitch = max(0, min(100, pitch)) def _get_volume(self): return self.__volume def _set_volume(self, volume): self.__volume = max(0, min(100, volume)) def _get_voice(self): return self.__voice def _set_voice(self, voice): try: self.__voice = self._availableVoices[voice].ID except: pass def _get_variant(self): return self.__variant def _set_variant(self, variant): try: self.__variant = self._availableVariants[variant].ID except: pass
class SynthDriver(synthDriverHandler.SynthDriver): supportedSettings=(SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), BooleanSynthSetting("rateBoost", _("Rate boos&t")), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), NumericSynthSetting("hsz", _("Head Size"), False), NumericSynthSetting("rgh", _("Roughness"), False), NumericSynthSetting("bth", _("Breathiness"), False), BooleanSynthSetting("backquoteVoiceTags", _("Enable backquote voice &tags"), False)) description='IBMTTS' name='ibmeci' speakingLanguage="" @classmethod def check(cls): return _ibmeci.eciCheck() def __init__(self): _ibmeci.initialize() # This information doesn't really need to be displayed, and makes IBMTTS unusable if the addon is not in the same drive as NVDA executable. # But display it only on debug mode in case of it can be useful log.debug("Using IBMTTS version %s" % _ibmeci.eciVersion()) lang = languageHandler.getLanguage() self.rate=50 self.speakingLanguage=lang self.variant="1" def speak(self,speechSequence): last = None defaultLanguage=self.language outlist = [] for item in speechSequence: if isinstance(item, string_types): s = self.processText(unicode(item)) outlist.append((_ibmeci.speak, (s,))) last = s elif isinstance(item,speech.IndexCommand): outlist.append((_ibmeci.index, (item.index,))) elif isinstance(item,speech.LangChangeCommand): l=None if item.lang in langsAnnotations: l = langsAnnotations[item.lang] elif item.lang and item.lang[0:2] in langsAnnotations: l = langsAnnotations[item.lang[0:2]] if l: if item.lang != self.speakingLanguage and item.lang != self.speakingLanguage[0:2]: outlist.append((_ibmeci.speak, (l,))) self.speakingLanguage=item.lang else: outlist.append((_ibmeci.speak, (langsAnnotations[defaultLanguage],))) self.speakingLanguage = defaultLanguage elif isinstance(item,speech.CharacterModeCommand): outlist.append((_ibmeci.speak, (b"`ts1" if item.state else "b`ts0",))) elif isinstance(item,speech.SpeechCommand): log.debugWarning("Unsupported speech command: %s"%item) else: log.error("Unknown speech: %s"%item) if last is not None and not last[-1] in punctuation: outlist.append((_ibmeci.speak, (b'`p1',))) outlist.append((_ibmeci.setEndStringMark, ())) outlist.append((_ibmeci.speak, (b"`ts0",))) outlist.append((_ibmeci.synth, ())) _ibmeci.synthQueue.put(outlist) _ibmeci.process() def processText(self,text): text = text.rstrip() if _ibmeci.params[9] in (65536, 65537): text = resub(english_fixes, text) if _ibmeci.params[9] in (131072, 131073): text = resub(spanish_fixes, text) if _ibmeci.params[9] in (196609, 196608): text = resub(french_fixes, text) text = text.replace('quil', 'qil') #Sometimes this string make everything buggy with IBMTTS in French #if not self._backquoteVoiceTags: text = text.replace(u'‵', ' ') if self._backquoteVoiceTags: text = "`pp0 `vv%d %s" % (self.getVParam(_ibmeci.vlm), text.replace('`', ' ')) #no embedded commands text = resub(anticrash_res, text) #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. text = text.encode('mbcs', 'replace') else: #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it. text = text.encode('mbcs', 'replace') text = resub(anticrash_res, text) text = b"`pp0 `vv%d %s" % (self.getVParam(_ibmeci.vlm), text.replace(b'`', b' ')) #no embedded commands text = pause_re.sub(br'\1 `p1\2\3', text) text = time_re.sub(br'\1:\2 \3', text) # temporal fix: replace , with `" -" because IBMTTS seems ignore commas at the end. # if you know a better solution please let me know to update it. if text[-1] == b",": text = text[0:-1]+b" -" return text def pause(self,switch): _ibmeci.pause(switch) def terminate(self): _ibmeci.terminate() _backquoteVoiceTags=False def _get_backquoteVoiceTags(self): return self._backquoteVoiceTags def _set_backquoteVoiceTags(self, enable): if enable == self._backquoteVoiceTags: return self._backquoteVoiceTags = enable _rateBoost = False RATE_BOOST_MULTIPLIER = 1.6 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable != self._rateBoost: rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val = self.getVParam(_ibmeci.rate) if self._rateBoost: val=int(round(val/self.RATE_BOOST_MULTIPLIER)) return self._paramToPercent(val, minRate, maxRate) def _set_rate(self,vl): val = self._percentToParam(vl, minRate, maxRate) if self._rateBoost: val = int(round(val *self.RATE_BOOST_MULTIPLIER)) self._rate = val self.setVParam(_ibmeci.rate, val) def _get_pitch(self): return self.getVParam(_ibmeci.pitch) def _set_pitch(self,vl): self.setVParam(_ibmeci.pitch,vl) def _get_volume(self): return self.getVParam(_ibmeci.vlm) def _set_volume(self,vl): self.setVParam(_ibmeci.vlm,int(vl)) def _set_inflection(self,vl): vl = int(vl) self.setVParam(_ibmeci.fluctuation,vl) def _get_inflection(self): return self.getVParam(_ibmeci.fluctuation) def _set_hsz(self,vl): vl = int(vl) self.setVParam(_ibmeci.hsz,vl) def _get_hsz(self): return self.getVParam(_ibmeci.hsz) def _set_rgh(self,vl): vl = int(vl) self.setVParam(_ibmeci.rgh,vl) def _get_rgh(self): return self.getVParam(_ibmeci.rgh) def _set_bth(self,vl): vl = int(vl) self.setVParam(_ibmeci.bth,vl) def _get_bth(self): return self.getVParam(_ibmeci.bth) def _getAvailableVoices(self): o = OrderedDict() for name in os.listdir(_ibmeci.ttsPath): if name.lower().endswith('.syn'): info = _ibmeci.langs[name.lower()[:3]] o[str(info[0])] = VoiceInfo(str(info[0]), info[1], info[2]) return o def _get_voice(self): return str(_ibmeci.params[9]) def _set_voice(self,vl): _ibmeci.set_voice(vl) def getVParam(self,pr): return _ibmeci.getVParam(pr) def setVParam(self, pr,vl): _ibmeci.setVParam(pr, vl) def _get_lastIndex(self): #fix? return _ibmeci.lastindex def cancel(self): _ibmeci.stop() def _getAvailableVariants(self): global variants return OrderedDict((str(id), synthDriverHandler.VoiceInfo(str(id), name)) for id, name in variants.items()) def _set_variant(self, v): global variants self._variant = v if int(v) in variants else "1" _ibmeci.setVariant(int(v)) self.setVParam(_ibmeci.rate, self._rate) # if 'ibmtts' in config.conf['speech']: # config.conf['speech']['ibmtts']['pitch'] = self.pitch def _get_variant(self): return self._variant
class SynthDriver(SynthDriver): name = "espeak" description = "eSpeak NG" supportedSettings = ( SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), SynthDriver.RateBoostSetting(), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), ) supportedCommands = { IndexCommand, CharacterModeCommand, LangChangeCommand, BreakCommand, PitchCommand, RateCommand, VolumeCommand, PhonemeCommand, } supportedNotifications = {synthIndexReached, synthDoneSpeaking} # A mapping of commonly used language codes to eSpeak languages. # Introduced due to eSpeak issue: https://github.com/espeak-ng/espeak-ng/issues/1200 # These are used when eSpeak doesn't support a given language code # but a default alias is appropriate. _defaultLangToLocale = { # Languages without locale that aren't supported in eSpeak 7e5457f91e10, # with a language with locale that is supported. # Found via: # set(stripLocaleFromLangCode(lang) for lang in self.availableLanguages).difference(self.availableLanguages) "en": "en-gb", "chr": "chr-US-Qaaa-x-west", "fr": "fr-fr", } availableLanguages: Set[Optional[str]] """ For eSpeak commit 7e5457f91e10, this is equivalent to: { 'ia', 'ru', 'cy', 'ms', 'af', 'fi', 'fr-fr', 'nog', 'gu', 'hu', 'eu', 'om', 'en-029', 'de', 'es', 'kk', 'an', 'nci', 'uk', 'vi-vn-x-south', 'grc', 'it', 'vi-vn-x-central', 'bg', 'piqd', 'ug', 'ar', 'da', 'mi', 'mr', 'pt-br', 'fr-ch', 'py', 'uz', 'en-gb', 'sw', 'as', 'shn', 'vi', 'nl', 'bs', 'ga', 'pap', 'sv', 'kn', 'gn', 'th', 'tr', 'pa', 'mt', 'chr-US-Qaaa-x-west', 'eo', 'kok', 'ky', 'lfn', 'is', 'pt', 'en-gb-x-gbcwmd', 'en-gb-x-rp', 'ht', 'bpy', 'fr-be', 'nb', 'lt', 'ja', 'te', 'tn', 'es-419', 'gd', 'sjn', 'he', 'hyw', 'et', 'ro', 'ru-lv', 'sq', 'quc', 'am', 'hr', 'qya', 'ka', 'el', 'tt', 'or', 'pl', 'qu', 'ba', 'ta', 'cmn', 'io', 'en-us', 'ur', 'hi', 'en-gb-scotland', 'fa', 'kl', 'tk', 'ku', 'si', 'cv', 'ca', 'qdb', 'hak', 'fa-latn', 'lv', 'en-gb-x-gbclan', 'ltg', 'ne', 'sl', 'az', 'yue', 'sk', 'hy', 'my', 'ko', 'mk', 'smj', 'ml', 'cmn-latn-pinyin', 'id', 'la', 'sr', 'bn', 'sd', 'cs', 'jbo', 'haw' } """ @classmethod def check(cls): return True def __init__(self): _espeak.initialize(self._onIndexReached) log.info("Using eSpeak NG version %s" % _espeak.info()) lang = getLanguage() _espeak.setVoiceByLanguage(lang) self._language = lang self._variantDict = _espeak.getVariantDict() self.variant = "max" self.rate = 30 self.pitch = 40 self.inflection = 75 def _get_language(self): return self._language PROSODY_ATTRS = { PitchCommand: "pitch", VolumeCommand: "volume", RateCommand: "rate", } IPA_TO_ESPEAK = { u"θ": u"T", u"s": u"s", u"ˈ": u"'", } def _processText(self, text): # We need to make several replacements. return text.translate({ 0x1: None, # used for embedded commands 0x3C: u"<", # <: because of XML 0x3E: u">", # >: because of XML 0x5B: u" [", # [: [[ indicates phonemes }) def _normalizeLangCommand(self, command: LangChangeCommand) -> LangChangeCommand: """ Checks if a LangChangeCommand language is compatible with eSpeak. If not, find a default mapping occurs in L{_defaultLangToLocale}. Otherwise, finds a language of a different dialect exists (e.g. ru-ru to ru). Returns an eSpeak compatible LangChangeCommand. """ lowerCaseAvailableLangs = set(lang.lower() for lang in self.availableLanguages) # Use default language if no command.lang is supplied langWithLocale = command.lang if command.lang else self._language langWithLocale = langWithLocale.lower().replace('_', '-') langWithoutLocale: Optional[str] = stripLocaleFromLangCode( langWithLocale) # Check for any language where the language code matches, regardless of dialect: e.g. ru-ru to ru matchingLangs = filter( lambda lang: stripLocaleFromLangCode(lang) == langWithoutLocale, lowerCaseAvailableLangs) anyLocaleMatchingLang = next(matchingLangs, None) # Check from a list of known default mapping locales: e.g. en to en-gb # Created due to eSpeak issue: https://github.com/espeak-ng/espeak-ng/issues/1200 knownDefaultLang = self._defaultLangToLocale.get( langWithoutLocale, None) if knownDefaultLang is not None and knownDefaultLang not in self.availableLanguages: # This means eSpeak has changed and we need to update the mapping log.error( f"Default mapping unknown to eSpeak {knownDefaultLang} not in {self.availableLanguages}" ) knownDefaultLang = None if langWithLocale in lowerCaseAvailableLangs: eSpeakLang = langWithLocale elif knownDefaultLang is not None: eSpeakLang = knownDefaultLang elif langWithoutLocale in lowerCaseAvailableLangs: eSpeakLang = langWithoutLocale elif anyLocaleMatchingLang is not None: eSpeakLang = anyLocaleMatchingLang else: log.debugWarning( f"Unable to find an eSpeak language for '{langWithLocale}'") eSpeakLang = None return LangChangeCommand(eSpeakLang) def _handleLangChangeCommand( self, langChangeCommand: LangChangeCommand, langChanged: bool, ) -> str: """Get language xml tags needed to handle a lang change command. - if a language change has already been handled for this speech, close the open voice tag. - if the language is supported by eSpeak, switch to that language. - otherwise, switch to the default synthesizer language. """ langChangeCommand = self._normalizeLangCommand(langChangeCommand) voiceChangeXML = "" if langChanged: # Close existing voice tag voiceChangeXML += "</voice>" if langChangeCommand.lang is not None: # Open new voice tag using eSpeak compatible language voiceChangeXML += f'<voice xml:lang="{langChangeCommand.lang}">' else: # Open new voice tag using default voice voiceChangeXML += "<voice>" return voiceChangeXML # C901 'speak' is too complex # Note: when working on speak, look for opportunities to simplify # and move logic out into smaller helper functions. def speak(self, speechSequence: SpeechSequence): # noqa: C901 textList: List[str] = [] langChanged = False prosody: Dict[str, int] = {} # We output malformed XML, as we might close an outer tag after opening an inner one; e.g. # <voice><prosody></voice></prosody>. # However, eSpeak doesn't seem to mind. for item in speechSequence: if isinstance(item, str): textList.append(self._processText(item)) elif isinstance(item, IndexCommand): textList.append("<mark name=\"%d\" />" % item.index) elif isinstance(item, CharacterModeCommand): textList.append("<say-as interpret-as=\"characters\">" if item. state else "</say-as>") elif isinstance(item, LangChangeCommand): langChangeXML = self._handleLangChangeCommand( item, langChanged) textList.append(langChangeXML) langChanged = True elif isinstance(item, BreakCommand): # Break commands are ignored at the start of speech unless strength is specified. # Refer to eSpeak issue: https://github.com/espeak-ng/espeak-ng/issues/1232 textList.append(f'<break time="{item.time}ms" strength="1" />') elif type(item) in self.PROSODY_ATTRS: if prosody: # Close previous prosody tag. textList.append("</prosody>") attr = self.PROSODY_ATTRS[type(item)] if item.multiplier == 1: # Returning to normal. try: del prosody[attr] except KeyError: pass else: prosody[attr] = int(item.multiplier * 100) if not prosody: continue textList.append("<prosody") for attr, val in prosody.items(): textList.append(' %s="%d%%"' % (attr, val)) textList.append(">") elif isinstance(item, PhonemeCommand): # We can't use str.translate because we want to reject unknown characters. try: phonemes = "".join( [self.IPA_TO_ESPEAK[char] for char in item.ipa]) # There needs to be a space after the phoneme command. # Otherwise, eSpeak will announce a subsequent SSML tag instead of processing it. textList.append(u"[[%s]] " % phonemes) except KeyError: log.debugWarning("Unknown character in IPA string: %s" % item.ipa) if item.text: textList.append(self._processText(item.text)) else: log.error("Unknown speech: %s" % item) # Close any open tags. if langChanged: textList.append("</voice>") if prosody: textList.append("</prosody>") text = u"".join(textList) _espeak.speak(text) def cancel(self): _espeak.stop() def pause(self, switch): _espeak.pause(switch) _rateBoost = False RATE_BOOST_MULTIPLIER = 3 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable == self._rateBoost: return rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val = _espeak.getParameter(_espeak.espeakRATE, 1) if self._rateBoost: val = int(val / self.RATE_BOOST_MULTIPLIER) return self._paramToPercent(val, _espeak.minRate, _espeak.maxRate) def _set_rate(self, rate): val = self._percentToParam(rate, _espeak.minRate, _espeak.maxRate) if self._rateBoost: val = int(val * self.RATE_BOOST_MULTIPLIER) _espeak.setParameter(_espeak.espeakRATE, val, 0) def _get_pitch(self): val = _espeak.getParameter(_espeak.espeakPITCH, 1) return self._paramToPercent(val, _espeak.minPitch, _espeak.maxPitch) def _set_pitch(self, pitch): val = self._percentToParam(pitch, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakPITCH, val, 0) def _get_inflection(self): val = _espeak.getParameter(_espeak.espeakRANGE, 1) return self._paramToPercent(val, _espeak.minPitch, _espeak.maxPitch) def _set_inflection(self, val): val = self._percentToParam(val, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakRANGE, val, 0) def _get_volume(self): return _espeak.getParameter(_espeak.espeakVOLUME, 1) def _set_volume(self, volume): _espeak.setParameter(_espeak.espeakVOLUME, volume, 0) def _getAvailableVoices(self): voices = OrderedDict() for v in _espeak.getVoiceList(): l = _espeak.decodeEspeakString(v.languages[1:]) # #7167: Some languages names contain unicode characters EG: Norwegian Bokmål name = _espeak.decodeEspeakString(v.name) # #5783: For backwards compatibility, voice identifies should always be lowercase identifier = os.path.basename( _espeak.decodeEspeakString(v.identifier)).lower() voices[identifier] = VoiceInfo(identifier, name, l) return voices def _get_voice(self): curVoice = getattr(self, '_voice', None) if curVoice: return curVoice curVoice = _espeak.getCurrentVoice() if not curVoice: return "" # #5783: For backwards compatibility, voice identifies should always be lowercase return _espeak.decodeEspeakString( curVoice.identifier).split('+')[0].lower() def _set_voice(self, identifier): if not identifier: return # #5783: For backwards compatibility, voice identifies should always be lowercase identifier = identifier.lower() if "\\" in identifier: identifier = os.path.basename(identifier) self._voice = identifier try: _espeak.setVoiceAndVariant(voice=identifier, variant=self._variant) except: self._voice = None raise self._language = super(SynthDriver, self).language def _onIndexReached(self, index): if index is not None: synthIndexReached.notify(synth=self, index=index) else: synthDoneSpeaking.notify(synth=self) def terminate(self): _espeak.terminate() def _get_variant(self): return self._variant def _set_variant(self, val): self._variant = val if val in self._variantDict else "max" _espeak.setVoiceAndVariant(variant=self._variant) def _getAvailableVariants(self): return OrderedDict((ID, VoiceInfo(ID, name)) for ID, name in self._variantDict.items())
class SynthDriver(SynthDriver): name = "espeak" description = "eSpeak" supportedSettings = ( SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(), # Translators: This is the name of the rate boost voice toggle # which further increases the speaking rate when enabled. BooleanSynthSetting("rateBoost", _("Rate boos&t")), SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), ) @classmethod def check(cls): return True def __init__(self): _espeak.initialize() log.info("Using eSpeak version %s" % _espeak.info()) lang = languageHandler.getLanguage() _espeak.setVoiceByLanguage(lang) self._language = lang self._variantDict = _espeak.getVariantDict() self.variant = "max" self.rate = 30 self.pitch = 40 self.inflection = 75 def _get_language(self): return self._language def speak(self, speechSequence): defaultLanguage = self._language textList = [] langChanged = False for item in speechSequence: if isinstance(item, basestring): s = unicode(item) # Replace \01, as this is used for embedded commands. #Also replace < and > as espeak handles xml s.translate({ ord(u'\01'): None, ord(u'<'): u'<', ord(u'>'): u'>' }) textList.append(s) elif isinstance(item, speech.IndexCommand): textList.append("<mark name=\"%d\" />" % item.index) elif isinstance(item, speech.CharacterModeCommand): textList.append("<say-as interpret-as=\"characters\">" if item. state else "</say-as>") elif isinstance(item, speech.LangChangeCommand): if langChanged: textList.append("</voice>") textList.append( "<voice xml:lang=\"%s\">" % (item.lang if item.lang else defaultLanguage).replace( '_', '-')) langChanged = True elif isinstance(item, speech.SpeechCommand): log.debugWarning("Unsupported speech command: %s" % item) else: log.error("Unknown speech: %s" % item) if langChanged: textList.append("</voice>") text = u"".join(textList) _espeak.speak(text) def cancel(self): _espeak.stop() def pause(self, switch): _espeak.pause(switch) _rateBoost = False RATE_BOOST_MULTIPLIER = 3 def _get_rateBoost(self): return self._rateBoost def _set_rateBoost(self, enable): if enable == self._rateBoost: return rate = self.rate self._rateBoost = enable self.rate = rate def _get_rate(self): val = _espeak.getParameter(_espeak.espeakRATE, 1) if self._rateBoost: val = int(val / self.RATE_BOOST_MULTIPLIER) return self._paramToPercent(val, _espeak.minRate, _espeak.maxRate) def _set_rate(self, rate): val = self._percentToParam(rate, _espeak.minRate, _espeak.maxRate) if self._rateBoost: val = int(val * self.RATE_BOOST_MULTIPLIER) _espeak.setParameter(_espeak.espeakRATE, val, 0) def _get_pitch(self): val = _espeak.getParameter(_espeak.espeakPITCH, 1) return self._paramToPercent(val, _espeak.minPitch, _espeak.maxPitch) def _set_pitch(self, pitch): val = self._percentToParam(pitch, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakPITCH, val, 0) def _get_inflection(self): val = _espeak.getParameter(_espeak.espeakRANGE, 1) return self._paramToPercent(val, _espeak.minPitch, _espeak.maxPitch) def _set_inflection(self, val): val = self._percentToParam(val, _espeak.minPitch, _espeak.maxPitch) _espeak.setParameter(_espeak.espeakRANGE, val, 0) def _get_volume(self): return _espeak.getParameter(_espeak.espeakVOLUME, 1) def _set_volume(self, volume): _espeak.setParameter(_espeak.espeakVOLUME, volume, 0) def _getAvailableVoices(self): voices = OrderedDict() for v in _espeak.getVoiceList(): l = v.languages[1:] identifier = os.path.basename(v.identifier) voices[identifier] = VoiceInfo(identifier, v.name, l) return voices def _get_voice(self): curVoice = getattr(self, '_voice', None) if curVoice: return curVoice curVoice = _espeak.getCurrentVoice() if not curVoice: return "" return curVoice.identifier.split('+')[0] def _set_voice(self, identifier): if not identifier: return if "\\" in identifier: identifier = os.path.basename(identifier) self._voice = identifier try: _espeak.setVoiceAndVariant(voice=identifier, variant=self._variant) except: self._voice = None raise self._language = super(SynthDriver, self).language def _get_lastIndex(self): return _espeak.lastIndex def terminate(self): _espeak.terminate() def _get_variant(self): return self._variant def _set_variant(self, val): self._variant = val if val in self._variantDict else "max" _espeak.setVoiceAndVariant(variant=self._variant) def _getAvailableVariants(self): return OrderedDict((ID, VoiceInfo(ID, name)) for ID, name in self._variantDict.iteritems())