コード例 #1
0
ファイル: oneCore.py プロジェクト: zstanecic/nvda
 def _get_supportedSettings(self):
     self.supportedSettings = settings = [
         SynthDriver.VoiceSetting(),
         SynthDriver.RateSetting(),
     ]
     if self.supportsProsodyOptions:
         settings.append(SynthDriver.RateBoostSetting())
     settings.extend([
         SynthDriver.PitchSetting(),
         SynthDriver.VolumeSetting(),
     ])
     return settings
コード例 #2
0
ファイル: espeak.py プロジェクト: yang123vc/nvda
class SynthDriver(SynthDriver):
	name = "espeak"
	description = "eSpeak NG"

	supportedSettings=(
		SynthDriver.VoiceSetting(),
		SynthDriver.VariantSetting(),
		SynthDriver.RateSetting(),
		# Translators: This is the name of the rate boost voice toggle
		# which further increases the speaking rate when enabled.
		BooleanSynthSetting("rateBoost",_("Rate boos&t")),
		SynthDriver.PitchSetting(),
		SynthDriver.InflectionSetting(),
		SynthDriver.VolumeSetting(),
	)

	@classmethod
	def check(cls):
		return True

	def __init__(self):
		_espeak.initialize()
		log.info("Using eSpeak version %s" % _espeak.info())
		lang=languageHandler.getLanguage()
		_espeak.setVoiceByLanguage(lang)
		self._language=lang
		self._variantDict=_espeak.getVariantDict()
		self.variant="max"
		self.rate=30
		self.pitch=40
		self.inflection=75

	def _get_language(self):
		return self._language

	PROSODY_ATTRS = {
		speech.PitchCommand: "pitch",
		speech.VolumeCommand: "volume",
		speech.RateCommand: "rate",
	}

	IPA_TO_ESPEAK = {
		u"θ": u"T",
		u"s": u"s",
		u"ˈ": u"'",
	}

	def _processText(self, text):
		text = unicode(text)
		# We need to make several replacements.
		return text.translate({
			0x1: None, # used for embedded commands
			0x3C: u"&lt;", # <: because of XML
			0x3E: u"&gt;", # >: because of XML
			0x5B: u" [", # [: [[ indicates phonemes
		})

	def speak(self,speechSequence):
		defaultLanguage=self._language
		textList=[]
		langChanged=False
		prosody={}
		# We output malformed XML, as we might close an outer tag after opening an inner one; e.g.
		# <voice><prosody></voice></prosody>.
		# However, eSpeak doesn't seem to mind.
		for item in speechSequence:
			if isinstance(item,basestring):
				textList.append(self._processText(item))
			elif isinstance(item,speech.IndexCommand):
				textList.append("<mark name=\"%d\" />"%item.index)
			elif isinstance(item,speech.CharacterModeCommand):
				textList.append("<say-as interpret-as=\"characters\">" if item.state else "</say-as>")
			elif isinstance(item,speech.LangChangeCommand):
				if langChanged:
					textList.append("</voice>")
				textList.append("<voice xml:lang=\"%s\">"%(item.lang if item.lang else defaultLanguage).replace('_','-'))
				langChanged=True
			elif isinstance(item,speech.BreakCommand):
				textList.append('<break time="%dms" />' % item.time)
			elif type(item) in self.PROSODY_ATTRS:
				if prosody:
					# Close previous prosody tag.
					textList.append("</prosody>")
				attr=self.PROSODY_ATTRS[type(item)]
				if item.multiplier==1:
					# Returning to normal.
					try:
						del prosody[attr]
					except KeyError:
						pass
				else:
					prosody[attr]=int(item.multiplier* 100)
				if not prosody:
					continue
				textList.append("<prosody")
				for attr,val in prosody.iteritems():
					textList.append(' %s="%d%%"'%(attr,val))
				textList.append(">")
			elif isinstance(item,speech.PhonemeCommand):
				# We can't use unicode.translate because we want to reject unknown characters.
				try:
					phonemes="".join([self.IPA_TO_ESPEAK[char] for char in item.ipa])
					# There needs to be a space after the phoneme command.
					# Otherwise, eSpeak will announce a subsequent SSML tag instead of processing it.
					textList.append(u"[[%s]] "%phonemes)
				except KeyError:
					log.debugWarning("Unknown character in IPA string: %s"%item.ipa)
					if item.text:
						textList.append(self._processText(item.text))
			elif isinstance(item,speech.SpeechCommand):
				log.debugWarning("Unsupported speech command: %s"%item)
			else:
				log.error("Unknown speech: %s"%item)
		# Close any open tags.
		if langChanged:
			textList.append("</voice>")
		if prosody:
			textList.append("</prosody>")
		text=u"".join(textList)
		_espeak.speak(text)

	def cancel(self):
		_espeak.stop()

	def pause(self,switch):
		_espeak.pause(switch)

	_rateBoost = False
	RATE_BOOST_MULTIPLIER = 3

	def _get_rateBoost(self):
		return self._rateBoost

	def _set_rateBoost(self, enable):
		if enable == self._rateBoost:
			return
		rate = self.rate
		self._rateBoost = enable
		self.rate = rate

	def _get_rate(self):
		val=_espeak.getParameter(_espeak.espeakRATE,1)
		if self._rateBoost:
			val=int(val/self.RATE_BOOST_MULTIPLIER)
		return self._paramToPercent(val,_espeak.minRate,_espeak.maxRate)

	def _set_rate(self,rate):
		val=self._percentToParam(rate, _espeak.minRate, _espeak.maxRate)
		if self._rateBoost:
			val=int(val*self.RATE_BOOST_MULTIPLIER)
		_espeak.setParameter(_espeak.espeakRATE,val,0)

	def _get_pitch(self):
		val=_espeak.getParameter(_espeak.espeakPITCH,1)
		return self._paramToPercent(val,_espeak.minPitch,_espeak.maxPitch)

	def _set_pitch(self,pitch):
		val=self._percentToParam(pitch, _espeak.minPitch, _espeak.maxPitch)
		_espeak.setParameter(_espeak.espeakPITCH,val,0)

	def _get_inflection(self):
		val=_espeak.getParameter(_espeak.espeakRANGE,1)
		return self._paramToPercent(val,_espeak.minPitch,_espeak.maxPitch)

	def _set_inflection(self,val):
		val=self._percentToParam(val, _espeak.minPitch, _espeak.maxPitch)
		_espeak.setParameter(_espeak.espeakRANGE,val,0)

	def _get_volume(self):
		return _espeak.getParameter(_espeak.espeakVOLUME,1)

	def _set_volume(self,volume):
		_espeak.setParameter(_espeak.espeakVOLUME,volume,0)

	def _getAvailableVoices(self):
		voices=OrderedDict()
		for v in _espeak.getVoiceList():
			l=v.languages[1:]
			# #5783: For backwards compatibility, voice identifies should always be lowercase
			identifier=os.path.basename(v.identifier).lower()
			voices[identifier]=VoiceInfo(identifier,v.name,l)
		return voices

	def _get_voice(self):
		curVoice=getattr(self,'_voice',None)
		if curVoice: return curVoice
		curVoice = _espeak.getCurrentVoice()
		if not curVoice:
			return ""
		# #5783: For backwards compatibility, voice identifies should always be lowercase
		return curVoice.identifier.split('+')[0].lower()

	def _set_voice(self, identifier):
		if not identifier:
			return
		# #5783: For backwards compatibility, voice identifies should always be lowercase
		identifier=identifier.lower()
		if "\\" in identifier:
			identifier=os.path.basename(identifier)
		self._voice=identifier
		try:
			_espeak.setVoiceAndVariant(voice=identifier,variant=self._variant)
		except:
			self._voice=None
			raise
		self._language=super(SynthDriver,self).language

	def _get_lastIndex(self):
		return _espeak.lastIndex

	def terminate(self):
		_espeak.terminate()

	def _get_variant(self):
		return self._variant

	def _set_variant(self,val):
		self._variant = val if val in self._variantDict else "max"
		_espeak.setVoiceAndVariant(variant=self._variant)

	def _getAvailableVariants(self):
		return OrderedDict((ID,VoiceInfo(ID, name)) for ID, name in self._variantDict.iteritems())
コード例 #3
0
ファイル: __init__.py プロジェクト: stayen/RHVoice
class SynthDriver(SynthDriver):
    name = "RHVoice"
    description = "RHVoice"

    supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.RateSetting(),
                         SynthDriver.PitchSetting(),
                         SynthDriver.VolumeSetting())

    @classmethod
    def check(cls):
        return os.path.isfile(lib_path)

    def __init__(self):
        self.__lib = load_tts_library()
        self.__cancel_flag = threading.Event()
        self.__player = audio_player(self.__cancel_flag)
        self.__sample_rate_callback = sample_rate_callback(
            self.__lib, self.__player)
        self.__c_sample_rate_callback = RHVoice_callback_types.set_sample_rate(
            self.__sample_rate_callback)
        self.__speech_callback = speech_callback(self.__lib, self.__player,
                                                 self.__cancel_flag)
        self.__c_speech_callback = RHVoice_callback_types.play_speech(
            self.__speech_callback)
        self.__mark_callback = mark_callback(self.__lib)
        self.__c_mark_callback = RHVoice_callback_types.process_mark(
            self.__mark_callback)
        resource_paths = [
            os.path.join(addon.path, "data").encode("UTF-8")
            for addon in addonHandler.getRunningAddons()
            if (addon.name.startswith("RHVoice-language")
                or addon.name.startswith("RHVoice-voice"))
        ]
        c_resource_paths = (c_char_p * (len(resource_paths) + 1))(
            *(resource_paths + [None]))
        init_params = RHVoice_init_params(
            None, config_path.encode("utf-8"), c_resource_paths,
            RHVoice_callbacks(
                self.__c_sample_rate_callback, self.__c_speech_callback,
                self.__c_mark_callback,
                cast(None, RHVoice_callback_types.word_starts),
                cast(None, RHVoice_callback_types.word_ends),
                cast(None, RHVoice_callback_types.sentence_starts),
                cast(None, RHVoice_callback_types.sentence_ends),
                cast(None, RHVoice_callback_types.play_audio)), 0)
        self.__tts_engine = self.__lib.RHVoice_new_tts_engine(
            byref(init_params))
        if not self.__tts_engine:
            raise RuntimeError("RHVoice: initialization error")
        nvda_language = languageHandler.getLanguage().split("_")[0]
        number_of_voices = self.__lib.RHVoice_get_number_of_voices(
            self.__tts_engine)
        native_voices = self.__lib.RHVoice_get_voices(self.__tts_engine)
        self.__voice_languages = dict()
        self.__languages = set()
        for i in xrange(number_of_voices):
            native_voice = native_voices[i]
            self.__voice_languages[native_voice.name] = native_voice.language
            self.__languages.add(native_voice.language)
        self.__profile = None
        self.__profiles = list()
        number_of_profiles = self.__lib.RHVoice_get_number_of_voice_profiles(
            self.__tts_engine)
        native_profile_names = self.__lib.RHVoice_get_voice_profiles(
            self.__tts_engine)
        for i in xrange(number_of_profiles):
            name = native_profile_names[i]
            self.__profiles.append(name)
            if (self.__profile is
                    None) and (nvda_language
                               == self.__voice_languages[name.split("+")[0]]):
                self.__profile = name
        if self.__profile is None:
            self.__profile = self.__profiles[0]
        self.__rate = 50
        self.__pitch = 50
        self.__volume = 50
        self.__tts_queue = Queue.Queue()
        self.__tts_thread = TTSThread(self.__tts_queue)
        self.__tts_thread.start()
        log.info("Using RHVoice version {}".format(
            self.__lib.RHVoice_get_version()))

    def terminate(self):
        self.cancel()
        self.__tts_queue.put(None)
        self.__tts_thread.join()
        self.__player.close()
        self.__lib.RHVoice_delete_tts_engine(self.__tts_engine)
        self.__tts_engine = None

    def speak(self, speech_sequence):
        spell_mode = False
        language_changed = False
        text_list = [u"<speak>"]
        for item in speech_sequence:
            if isinstance(item, basestring):
                s = escape_text(unicode(item))
                text_list.append((
                    u'<say-as interpret-as="characters">{}</say-as>'.format(s)
                ) if spell_mode else s)
            elif isinstance(item, speech.IndexCommand):
                text_list.append('<mark name="%d"/>' % item.index)
            elif isinstance(item, speech.CharacterModeCommand):
                if item.state:
                    spell_mode = True
                else:
                    spell_mode = False
            elif isinstance(item, speech.LangChangeCommand):
                if language_changed:
                    text_list.append(u"</voice>")
                    language_changed = False
                if not item.lang:
                    continue
                new_language = item.lang.split("_")[0]
                if new_language not in self.__languages:
                    continue
                elif new_language == self.__voice_languages[
                        self.__profile.split("+")[0]]:
                    continue
                text_list.append(u'<voice xml:lang="{}">'.format(new_language))
                language_changed = True
            elif isinstance(item, speech.SpeechCommand):
                log.debugWarning("Unsupported speech command: %s" % item)
            else:
                log.error("Unknown speech: %s" % item)
        if language_changed:
            text_list.append(u"</voice>")
        text_list.append(u"</speak>")
        text = u"".join(text_list)
        task = speak_text(self.__lib, self.__tts_engine, text,
                          self.__cancel_flag, self.__player)
        task.set_voice_profile(self.__profile)
        task.set_rate(self.__rate)
        task.set_pitch(self.__pitch)
        task.set_volume(self.__volume)
        self.__tts_queue.put(task)

    def pause(self, switch):
        self.__player.pause(switch)

    def cancel(self):
        try:
            while True:
                self.__tts_queue.get_nowait()
        except Queue.Empty:
            self.__cancel_flag.set()
            self.__tts_queue.put(self.__cancel_flag.clear)
            self.__player.stop()

    def _get_lastIndex(self):
        return self.__mark_callback.index

    def _get_availableVoices(self):
        return OrderedDict(
            (profile,
             VoiceInfo(profile, profile, self.__voice_languages[profile.split(
                 "+")[0]])) for profile in self.__profiles)

    def _get_language(self):
        return self.__voice_languages[self.__profile.split("+")[0]]

    def _get_rate(self):
        return self.__rate

    def _set_rate(self, rate):
        self.__rate = max(0, min(100, rate))

    def _get_pitch(self):
        return self.__pitch

    def _set_pitch(self, pitch):
        self.__pitch = max(0, min(100, pitch))

    def _get_volume(self):
        return self.__volume

    def _set_volume(self, volume):
        self.__volume = max(0, min(100, volume))

    def _get_voice(self):
        return self.__profile

    def _set_voice(self, voice):
        try:
            self.__profile = self.availableVoices[voice].ID
        except:
            pass
コード例 #4
0
class SynthDriver(synthDriverHandler.SynthDriver):
    supportedSettings = (SynthDriver.VoiceSetting(),
                         SynthDriver.VariantSetting(),
                         SynthDriver.RateSetting(),
                         BooleanDriverSetting("rateBoost",
                                              _("Rate boos&t"), True),
                         SynthDriver.PitchSetting(),
                         SynthDriver.InflectionSetting(),
                         SynthDriver.VolumeSetting(),
                         NumericDriverSetting("hsz", _("Head Size"), False),
                         NumericDriverSetting("rgh", _("Roughness"), False),
                         NumericDriverSetting("bth", _("Breathiness"), False),
                         BooleanDriverSetting(
                             "backquoteVoiceTags",
                             _("Enable backquote voice &tags"), False))
    supportedCommands = {
        speech.IndexCommand, speech.CharacterModeCommand,
        speech.LangChangeCommand, speech.BreakCommand, speech.PitchCommand,
        speech.RateCommand, speech.VolumeCommand
    }
    supportedNotifications = {synthIndexReached, synthDoneSpeaking}

    description = 'IBMTTS'
    name = 'ibmeci'
    speakingLanguage = ""

    @classmethod
    def check(cls):
        return _ibmeci.eciCheck()

    def __init__(self):
        _ibmeci.initialize(self._onIndexReached, self._onDoneSpeaking)
        # This information doesn't really need to be displayed, and makes IBMTTS unusable if the addon is not in the same drive as NVDA executable.
        # But display it only on debug mode in case of it can be useful
        log.debug("Using IBMTTS version %s" % _ibmeci.eciVersion())
        lang = languageHandler.getLanguage()
        self.rate = 50
        self.speakingLanguage = lang
        self.variant = "1"

    PROSODY_ATTRS = {
        speech.PitchCommand: ECIVoiceParam.eciPitchBaseline,
        speech.VolumeCommand: ECIVoiceParam.eciVolume,
        speech.RateCommand: ECIVoiceParam.eciSpeed,
    }

    def speak(self, speechSequence):
        last = None
        defaultLanguage = self.language
        outlist = []
        outlist.append((_ibmeci.speak, (b"`ts0", )))
        for item in speechSequence:
            if isinstance(item, string_types):
                s = self.processText(unicode(item))
                outlist.append((_ibmeci.speak, (s, )))
                last = s
            elif isinstance(item, speech.IndexCommand):
                outlist.append((_ibmeci.index, (item.index, )))
            elif isinstance(item, speech.LangChangeCommand):
                l = None
                if item.lang in langsAnnotations:
                    l = langsAnnotations[item.lang]
                elif item.lang and item.lang[0:2] in langsAnnotations:
                    l = langsAnnotations[item.lang[0:2]]
                if l:
                    if item.lang != self.speakingLanguage and item.lang != self.speakingLanguage[
                            0:2]:
                        outlist.append((_ibmeci.speak, (l, )))
                        self.speakingLanguage = item.lang
                else:
                    outlist.append(
                        (_ibmeci.speak, (langsAnnotations[defaultLanguage], )))
                    self.speakingLanguage = defaultLanguage
            elif isinstance(item, speech.CharacterModeCommand):
                outlist.append(
                    (_ibmeci.speak, (b"`ts1" if item.state else b"`ts0", )))
            elif isinstance(item, speech.BreakCommand):
                outlist.append((_ibmeci.speak, (b' `p%d ' % item.time, )))
            elif type(item) in self.PROSODY_ATTRS:
                val = max(0, min(item.newValue, 100))
                if type(item) == speech.RateCommand:
                    val = self.percentToRate(val)
                outlist.append((_ibmeci.setProsodyParam,
                                (self.PROSODY_ATTRS[type(item)], val)))
            else:
                log.error("Unknown speech: %s" % item)
        if last is not None and last[-1] not in punctuation:
            # check if a pitch command is at the end of the list, because p1 need to be send before this.
            # index -2 is because -1 always seem to be an index command.
            if outlist[-2][0] == _ibmeci.setProsodyParam:
                outlist.insert(-2, (_ibmeci.speak, (b'`p1. ', )))
            else:
                outlist.append((_ibmeci.speak, (b'`p1. ', )))
        outlist.append((_ibmeci.setEndStringMark, ()))
        outlist.append((_ibmeci.synth, ()))
        #print(outlist)
        _ibmeci.eciQueue.put(outlist)
        _ibmeci.process()

    def processText(self, text):
        text = text.rstrip()
        if _ibmeci.params[9] in (65536, 65537):
            text = resub(english_fixes, text)
        if _ibmeci.params[9] in (131072, 131073):
            text = resub(spanish_fixes, text)
        if _ibmeci.params[9] in (196609, 196608):
            text = resub(french_fixes, text)
            text = text.replace(
                'quil', 'qil'
            )  #Sometimes this string make everything buggy with IBMTTS in French
        if self._backquoteVoiceTags:
            #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it.
            text = text.replace('`',
                                ' ').encode('mbcs',
                                            'replace')  #no embedded commands
            text = b"`pp0 `vv%d %s" % (_ibmeci.getVParam(
                ECIVoiceParam.eciVolume), text)
            text = resub(anticrash_res, text)
        else:
            #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it.
            text = text.encode('mbcs', 'replace')
            text = resub(anticrash_res, text)
            text = b"`pp0 `vv%d %s" % (_ibmeci.getVParam(
                ECIVoiceParam.eciVolume), text.replace(b'`', b' ')
                                       )  #no embedded commands
        text = pause_re.sub(br'\1 `p1\2\3', text)
        text = time_re.sub(br'\1:\2 \3', text)
        return text

    def pause(self, switch):
        _ibmeci.pause(switch)

    def terminate(self):
        _ibmeci.terminate()

    _backquoteVoiceTags = False

    def _get_backquoteVoiceTags(self):
        return self._backquoteVoiceTags

    def _set_backquoteVoiceTags(self, enable):
        if enable == self._backquoteVoiceTags:
            return
        self._backquoteVoiceTags = enable

    _rateBoost = False
    RATE_BOOST_MULTIPLIER = 1.6

    def _get_rateBoost(self):
        return self._rateBoost

    def _set_rateBoost(self, enable):
        if enable != self._rateBoost:
            rate = self.rate
            self._rateBoost = enable
            self.rate = rate

    def _get_rate(self):
        val = _ibmeci.getVParam(ECIVoiceParam.eciSpeed)
        if self._rateBoost: val = int(round(val / self.RATE_BOOST_MULTIPLIER))
        return self._paramToPercent(val, minRate, maxRate)

    def percentToRate(self, val):
        val = self._percentToParam(val, minRate, maxRate)
        if self._rateBoost: val = int(round(val * self.RATE_BOOST_MULTIPLIER))
        return val

    def _set_rate(self, val):
        val = self.percentToRate(val)
        self._rate = val
        _ibmeci.setVParam(ECIVoiceParam.eciSpeed, val)

    def _get_pitch(self):
        return _ibmeci.getVParam(ECIVoiceParam.eciPitchBaseline)

    def _set_pitch(self, vl):
        _ibmeci.setVParam(ECIVoiceParam.eciPitchBaseline, vl)

    def _get_volume(self):
        return _ibmeci.getVParam(ECIVoiceParam.eciVolume)

    def _set_volume(self, vl):
        _ibmeci.setVParam(ECIVoiceParam.eciVolume, int(vl))

    def _set_inflection(self, vl):
        vl = int(vl)
        _ibmeci.setVParam(ECIVoiceParam.eciPitchFluctuation, vl)

    def _get_inflection(self):
        return _ibmeci.getVParam(ECIVoiceParam.eciPitchFluctuation)

    def _set_hsz(self, vl):
        vl = int(vl)
        _ibmeci.setVParam(ECIVoiceParam.eciHeadSize, vl)

    def _get_hsz(self):
        return _ibmeci.getVParam(ECIVoiceParam.eciHeadSize)

    def _set_rgh(self, vl):
        vl = int(vl)
        _ibmeci.setVParam(ECIVoiceParam.eciRoughness, vl)

    def _get_rgh(self):
        return _ibmeci.getVParam(ECIVoiceParam.eciRoughness)

    def _set_bth(self, vl):
        vl = int(vl)
        _ibmeci.setVParam(ECIVoiceParam.eciBreathiness, vl)

    def _get_bth(self):
        return _ibmeci.getVParam(ECIVoiceParam.eciBreathiness)

    def _getAvailableVoices(self):
        o = OrderedDict()
        for name in os.listdir(_ibmeci.ttsPath):
            if name.lower().endswith('.syn'):
                info = _ibmeci.langs[name.lower()[:3]]
                o[str(info[0])] = VoiceInfo(str(info[0]), info[1], info[2])
        return o

    def _get_voice(self):
        return str(_ibmeci.params[_ibmeci.ECIParam.eciLanguageDialect])

    def _set_voice(self, vl):
        _ibmeci.set_voice(vl)

    def _get_lastIndex(self):
        #fix?
        return _ibmeci.lastindex

    def cancel(self):
        _ibmeci.stop()

    def _getAvailableVariants(self):
        global variants
        return OrderedDict(
            (str(id), synthDriverHandler.VoiceInfo(str(id), name))
            for id, name in variants.items())

    def _set_variant(self, v):
        global variants
        self._variant = v if int(v) in variants else "1"
        _ibmeci.setVariant(int(v))
        _ibmeci.setVParam(ECIVoiceParam.eciSpeed, self._rate)
        #if 'ibmtts' in config.conf['speech']:
        #config.conf['speech']['ibmtts']['pitch'] = self.pitch

    def _get_variant(self):
        return self._variant

    def _onIndexReached(self, index):
        synthIndexReached.notify(synth=self, index=index)

    def _onDoneSpeaking(self):
        synthDoneSpeaking.notify(synth=self)
コード例 #5
0
class SynthDriver(SynthDriver):
    name = "WorldVoiceXVED2"
    description = "WorldVoice(VE)"
    supportedSettings = [
        SynthDriver.VoiceSetting(),
        # SynthDriver.VariantSetting(),
        SynthDriver.RateSetting(),
        SynthDriver.PitchSetting(),
        SynthDriver.VolumeSetting(),
        driverHandler.DriverSetting(
            "numlan",
            # Translators: Label for a setting in voice settings dialog.
            _("Number &Language"),
            availableInSettingsRing=True,
            defaultVal="default",
            # Translators: Label for a setting in synth settings ring.
            displayName=_("Number Language"),
        ),
        driverHandler.DriverSetting(
            "nummod",
            # Translators: Label for a setting in voice settings dialog.
            _("Number &Mode"),
            availableInSettingsRing=True,
            defaultVal="value",
            # Translators: Label for a setting in synth settings ring.
            displayName=_("Number Mode"),
        ),
        driverHandler.NumericDriverSetting(
            "chinesespace",
            # Translators: Label for a setting in voice settings dialog.
            _("Pause time when encountering spaces between Chinese"),
            defaultVal=0,
            minStep=1,
        ),
        driverHandler.BooleanDriverSetting(
            "cni",
            _("Ignore comma between number"),
            defaultVal=False,
        ),
        driverHandler.BooleanDriverSetting(
            "dli",
            _("Ignore language information of document"),
            defaultVal=False,
        ),
        driverHandler.BooleanDriverSetting(
            "uwv",
            _("Enable WorldVoice setting rules to detect text language"),
            availableInSettingsRing=True,
            defaultVal=True,
            displayName=_("Enable WorldVoice rules"),
        ),
    ]
    supportedCommands = {
        speech.IndexCommand,
        speech.CharacterModeCommand,
        speech.LangChangeCommand,
        speech.BreakCommand,
        speech.PitchCommand,
        speech.RateCommand,
        speech.VolumeCommand,
    }
    supportedNotifications = {synthIndexReached, synthDoneSpeaking}

    @classmethod
    def check(cls):
        with _vocalizer.preOpenVocalizer() as check:
            return check

    def __init__(self):
        _config.load()
        # Initialize the driver
        try:
            _vocalizer.initialize(self._onIndexReached)
            log.debug("Vocalizer info: %s" % self._info())
        except _vocalizer.VeError as e:
            if e.code == _vocalizer.VAUTONVDA_ERROR_INVALID:
                log.info("Vocalizer license for NVDA is Invalid")
            elif e.code == _vocalizer.VAUTONVDA_ERROR_DEMO_EXPIRED:
                log.info("Vocalizer demo license for NVDA as expired.")
            raise
        self._voiceManager = VoiceManager()

        self._realSpeakFunc = speech.speak
        self._realSpellingFunc = speech.speakSpelling
        speech.speak = self.patchedSpeak
        speech.speakSpelling = self.patchedSpeakSpelling

        speechSymbols = SpeechSymbols()
        speechSymbols.load('unicode.dic')
        self._languageDetector = languageDetection.LanguageDetector(
            list(self._voiceManager.languages), speechSymbols)

        speech._speakWithoutPauses = speech.SpeechWithoutPauses(
            speakFunc=self.patchedSpeak)
        speech.speakWithoutPauses = speech._speakWithoutPauses.speakWithoutPauses

        self._localeToVoices = self._voiceManager.localeToVoicesMap
        self._locales = sorted([
            l for l in self._localeToVoices if len(self._localeToVoices[l]) > 0
        ])
        self._localeNames = list(
            map(self._getLocaleReadableName, self._locales))

        self._voice = None

    def _onIndexReached(self, index):
        if index is not None:
            synthIndexReached.notify(synth=self, index=index)
        else:
            synthDoneSpeaking.notify(synth=self)

    def terminate(self):
        speech.speak = self._realSpeakFunc
        speech.speakSpelling = self._realSpellingFunc

        speech._speakWithoutPauses = speech.SpeechWithoutPauses(
            speakFunc=speech.speak)
        speech.speakWithoutPauses = speech._speakWithoutPauses.speakWithoutPauses

        try:
            self.cancel()
            self._voiceManager.close()
            _vocalizer.terminate()
        except RuntimeError:
            log.error("Vocalizer terminate", exc_info=True)

    def speak(self, speechSequence):
        if self.uwv \
         and _config.vocalizerConfig['autoLanguageSwitching']['useUnicodeLanguageDetection'] \
         and _config.vocalizerConfig['autoLanguageSwitching']['afterSymbolDetection']:
            speechSequence = self._languageDetector.add_detected_language_commands(
                speechSequence)
            speechSequence = list(speechSequence)
        speechSequence = self.patchedNumSpeechSequence(speechSequence)
        speechSequence = self.patchedSpaceSpeechSequence(speechSequence)

        currentInstance = defaultInstance = self._voiceManager.defaultVoiceInstance.token
        currentLanguage = defaultLanguage = self.language
        chunks = []
        hasText = False
        charMode = False
        for command in speechSequence:
            if isinstance(command, str):
                command = command.strip()
                if not command:
                    continue
                # If character mode is on use lower case characters
                # Because the synth does not allow to turn off the caps reporting
                if charMode or len(command) == 1:
                    command = command.lower()
                # replace the excape character since it is used for parameter changing
                chunks.append(command.replace("\x1b", ""))
                hasText = True
            elif isinstance(command, speech.IndexCommand):
                chunks.append("\x1b\\mrk=%d\\" % command.index)
            elif isinstance(command, speech.BreakCommand):
                maxTime = 6553 if self.variant == "bet2" else 65535
                breakTime = max(1, min(command.time, maxTime))
                self._speak(currentInstance, chunks)
                chunks = []
                hasText = False
                _vocalizer.processBreak(currentInstance, breakTime)
            elif isinstance(command, speech.CharacterModeCommand):
                charMode = command.state
                s = "\x1b\\tn=spell\\" if command.state else "\x1b\\tn=normal\\"
                chunks.append(s)
            elif isinstance(command, speech.LangChangeCommand) or isinstance(
                    command, speechcommand.WVLangChangeCommand):
                if command.lang == currentLanguage:
                    # Keep on the same voice.
                    continue
                if command.lang is None:
                    # No language, use default.
                    currentInstance = defaultInstance
                    currentLanguage = defaultLanguage
                    continue
                # Changed language, lets see what we have.
                currentLanguage = command.lang
                newVoiceName = self._voiceManager.getVoiceNameForLanguage(
                    currentLanguage)
                if newVoiceName is None:
                    # No voice for this language, use default.
                    newInstance = defaultInstance
                else:
                    newInstance = self._voiceManager.getVoiceInstance(
                        newVoiceName).token
                if newInstance == currentInstance:
                    # Same voice, next command.
                    continue
                if hasText:  # We changed voice, send text we already have to vocalizer.
                    self._speak(currentInstance, chunks)
                    chunks = []
                    hasText = False
                currentInstance = newInstance
            elif isinstance(command, speech.PitchCommand):
                pitch = self._voiceManager.getVoiceParameter(
                    currentInstance, _vocalizer.VE_PARAM_PITCH, type_=int)
                pitchOffset = self._percentToParam(
                    command.offset, _vocalizer.PITCH_MIN,
                    _vocalizer.PITCH_MAX) - _vocalizer.PITCH_MIN
                chunks.append("\x1b\\pitch=%d\\" % (pitch + pitchOffset))
            elif isinstance(command, speechcommand.SplitCommand):
                self._speak(currentInstance, chunks)
                chunks = []
                hasText = False
        if chunks:
            self._speak(currentInstance, chunks)

    def _speak(self, voiceInstance, chunks):
        text = speech.CHUNK_SEPARATOR.join(chunks).replace("  \x1b", "\x1b")
        _vocalizer.processText2Speech(voiceInstance, text)

    def patchedSpeak(self, speechSequence, symbolLevel=None, priority=None):
        if self._cni:
            temp = []
            for command in speechSequence:
                if isinstance(command, str):
                    temp.append(comma_number_pattern.sub(
                        lambda m: '', command))
                else:
                    temp.append(command)
            speechSequence = temp
        if self._dli:
            speechSequence = self.patchedRemoveLangChangeCommandSpeechSequence(
                speechSequence)
        if self.uwv \
         and _config.vocalizerConfig['autoLanguageSwitching']['useUnicodeLanguageDetection'] \
         and not _config.vocalizerConfig['autoLanguageSwitching']['afterSymbolDetection']:
            speechSequence = self._languageDetector.add_detected_language_commands(
                speechSequence)
            speechSequence = list(speechSequence)
        self._realSpeakFunc(speechSequence, symbolLevel, priority=priority)

    def patchedSpeakSpelling(self,
                             text,
                             locale=None,
                             useCharacterDescriptions=False,
                             priority=None):
        if config.conf["speech"]["autoLanguageSwitching"] \
         and _config.vocalizerConfig['autoLanguageSwitching']['useUnicodeLanguageDetection'] \
         and config.conf["speech"]["trustVoiceLanguage"]:
            for text, loc in self._languageDetector.process_for_spelling(
                    text, locale):
                self._realSpellingFunc(text,
                                       loc,
                                       useCharacterDescriptions,
                                       priority=priority)
        else:
            self._realSpellingFunc(text,
                                   locale,
                                   useCharacterDescriptions,
                                   priority=priority)

    def cancel(self):
        _vocalizer.stop()

    def pause(self, switch):
        if switch:
            _vocalizer.pause()
        else:
            _vocalizer.resume()

    def _get_volume(self):
        return self._voiceManager.defaultVoiceInstance.volume

    def _set_volume(self, value):
        self._voiceManager.defaultVoiceInstance.volume = value
        self._voiceManager.defaultVoiceInstance.commit()

    def _get_rate(self):
        return self._voiceManager.defaultVoiceInstance.rate

    def _set_rate(self, value):
        self._voiceManager.defaultVoiceInstance.rate = value
        self._voiceManager.defaultVoiceInstance.commit()

    def _get_pitch(self):
        return self._voiceManager.defaultVoiceInstance.pitch

    def _set_pitch(self, value):
        self._voiceManager.defaultVoiceInstance.pitch = value
        self._voiceManager.defaultVoiceInstance.commit()

    def _getAvailableVoices(self):
        return self._voiceManager.voiceInfos

    def _get_voice(self):
        if self._voice is None:
            voice = self._voiceManager.getVoiceNameForLanguage(
                languageHandler.getLanguage())
            if voice is None:
                voice = list(self.availableVoices.keys())[0]
            return voice
        return self._voiceManager.defaultVoiceName

    def _set_voice(self, voiceName):
        self._voice = voiceName
        if voiceName == self._voiceManager.defaultVoiceName:
            return
        # Stop speech before setting a new voice to avoid voice instances
        # continuing speaking when changing voices for, e.g., say-all
        # See NVDA ticket #3540
        _vocalizer.stop()
        self._voiceManager.setDefaultVoice(voiceName)
        # Available variants are cached by default. As variants maybe different for each voice remove the cached value
        # if hasattr(self, '_availableVariants'):
        # del self._availableVariants
        # Synchronize with the synth so the parameters
        # we report are not from the previous voice.
        # _vocalizer.sync()

    def _get_variant(self):
        return self._voiceManager.defaultVoiceInstance.variant

    def _set_variant(self, name):
        self.cancel()
        self._voiceManager.defaultVoiceInstance.variant = name

    def _getAvailableVariants(self):
        dbs = self._voiceManager.defaultVoiceInstance.variants
        return OrderedDict([(d, VoiceInfo(d, d)) for d in dbs])

    def _get_availableLanguages(self):
        return self._voiceManager.languages

    def _get_language(self):
        return self._voiceManager.getVoiceLanguage()

    def _info(self):
        s = [self.description]
        return ", ".join(s)

    def _get_availableNumlans(self):
        return dict(
            {
                "default":
                driverHandler.StringParameterInfo("default", _("default")),
            }, **{
                locale: driverHandler.StringParameterInfo(locale, name)
                for locale, name in zip(self._locales, self._localeNames)
            })

    def _get_numlan(self):
        return self._numlan

    def _set_numlan(self, value):
        self._numlan = value

    def _get_availableNummods(self):
        return dict({
            "value":
            driverHandler.StringParameterInfo("value", _("value")),
            "number":
            driverHandler.StringParameterInfo("number", _("number")),
        })

    def _get_nummod(self):
        return self._nummod

    def _set_nummod(self, value):
        self._nummod = value

    def _get_chinesespace(self):
        return self._chinesespace

    def _set_chinesespace(self, value):
        self._chinesespace = value

    def _get_cni(self):
        return self._cni

    def _set_cni(self, value):
        self._cni = value

    def _get_dli(self):
        return self._dli

    def _set_dli(self, value):
        self._dli = value

    def patchedNumSpeechSequence(self, speechSequence):
        return self.coercionNumberLangChange(speechSequence, self._numlan,
                                             self._nummod)

    def patchedSpaceSpeechSequence(self, speechSequence):
        if not int(self._chinesespace) == 0:
            joinString = ""
            tempSpeechSequence = []
            for command in speechSequence:
                if not isinstance(command, str):
                    tempSpeechSequence.append(joinString)
                    tempSpeechSequence.append(command)
                    joinString = ""
                else:
                    joinString += command
            tempSpeechSequence.append(joinString)
            speechSequence = tempSpeechSequence

            tempSpeechSequence = []
            for command in speechSequence:
                if isinstance(command, str):
                    result = re.split(chinese_space_pattern, command)
                    if len(result) == 1:
                        tempSpeechSequence.append(command)
                    else:
                        temp = []
                        for i in result:
                            temp.append(i)
                            temp.append(
                                speech.BreakCommand(
                                    int(self._chinesespace) * 5))
                        temp = temp[:-1]
                        tempSpeechSequence += temp
                else:
                    tempSpeechSequence.append(command)
            speechSequence = tempSpeechSequence
        return speechSequence

    def patchedRemoveLangChangeCommandSpeechSequence(self, speechSequence):
        result = []
        for command in speechSequence:
            if not isinstance(command, speech.LangChangeCommand):
                result.append(command)
        return result

    def patchedLengthSpeechSequence(self, speechSequence):
        result = []
        for command in speechSequence:
            if isinstance(command, str):
                result.extend(self.lengthsplit(command, 100))
            else:
                result.append(command)
        return result

    def lengthsplit(self, string, length):
        result = []
        pattern = re.compile(r"[\s]")
        spaces = pattern.findall(string)
        others = pattern.split(string)
        fragment = ""
        for other, space in zip(others, spaces):
            fragment += other + space
            if len(fragment) > length:
                result.append(fragment)
                result.append(speechcommand.SplitCommand())
                fragment = ""
        fragment += others[-1]
        result.append(fragment)
        return result

    def resplit(self, pattern, string, mode):
        result = []
        numbers = pattern.findall(string)
        others = pattern.split(string)
        for other, number in zip(others, numbers):
            if mode == 'value':
                result.extend([
                    other,
                    speech.LangChangeCommand('StartNumber'), number,
                    speech.LangChangeCommand('EndNumber')
                ])
            elif mode == 'number':
                result.extend([
                    other,
                    speech.LangChangeCommand('StartNumber'),
                    ' '.join(number).replace(" . ", "."),
                    speech.LangChangeCommand('EndNumber')
                ])
        result.append(others[-1])
        return result

    def coercionNumberLangChange(self, speechSequence, numberLanguage, mode):
        result = []
        for command in speechSequence:
            if isinstance(command, str):
                result.extend(self.resplit(number_pattern, command, mode))
            else:
                result.append(command)

        currentLang = self.language
        for command in result:
            if isinstance(command, speech.LangChangeCommand):
                if command.lang == 'StartNumber':
                    command.lang = numberLanguage
                elif command.lang == 'EndNumber':
                    command.lang = currentLang
                else:
                    currentLang = command.lang
        return result

    def _getLocaleReadableName(self, locale):
        description = languageHandler.getLanguageDescription(locale)
        return "%s" % (description) if description else locale
コード例 #6
0
ファイル: espeak.py プロジェクト: mai-codes/evoHax-nvda
class SynthDriver(SynthDriver):
    name = "espeak"
    description = "eSpeak"

    supportedSettings = (
        SynthDriver.VoiceSetting(),
        SynthDriver.VariantSetting(),
        SynthDriver.RateSetting(),
        # Translators: This is the name of the rate boost voice toggle
        # which further increases the speaking rate when enabled.
        BooleanSynthSetting("rateBoost", _("Rate boos&t")),
        SynthDriver.PitchSetting(),
        SynthDriver.InflectionSetting(),
        SynthDriver.VolumeSetting(),
    )

    @classmethod
    def check(cls):
        return True

    def __init__(self):
        _espeak.initialize()
        log.info("Using eSpeak version %s" % _espeak.info())
        lang = languageHandler.getLanguage()
        _espeak.setVoiceByLanguage(lang)
        self._language = lang
        self._variantDict = _espeak.getVariantDict()
        self.variant = "max"
        self.rate = 30
        self.pitch = 40
        self.inflection = 75

    def _get_language(self):
        return self._language

    def speak(self, speechSequence):
        defaultLanguage = self._language
        textList = []
        langChanged = False
        for item in speechSequence:
            if isinstance(item, basestring):
                s = unicode(item)
                # Replace \01, as this is used for embedded commands.
                #Also replace < and > as espeak handles xml
                s.translate({
                    ord(u'\01'): None,
                    ord(u'<'): u'&lt;',
                    ord(u'>'): u'&gt;'
                })
                textList.append(s)
            elif isinstance(item, speech.IndexCommand):
                textList.append("<mark name=\"%d\" />" % item.index)
            elif isinstance(item, speech.CharacterModeCommand):
                textList.append("<say-as interpret-as=\"characters\">" if item.
                                state else "</say-as>")
            elif isinstance(item, speech.LangChangeCommand):
                if langChanged:
                    textList.append("</voice>")
                textList.append(
                    "<voice xml:lang=\"%s\">" %
                    (item.lang if item.lang else defaultLanguage).replace(
                        '_', '-'))
                langChanged = True
            elif isinstance(item, speech.SpeechCommand):
                log.debugWarning("Unsupported speech command: %s" % item)
            else:
                log.error("Unknown speech: %s" % item)
        if langChanged:
            textList.append("</voice>")
        text = u"".join(textList)
        _espeak.speak(text)

    def cancel(self):
        _espeak.stop()

    def pause(self, switch):
        _espeak.pause(switch)

    _rateBoost = False
    RATE_BOOST_MULTIPLIER = 3

    def _get_rateBoost(self):
        return self._rateBoost

    def _set_rateBoost(self, enable):
        if enable == self._rateBoost:
            return
        rate = self.rate
        self._rateBoost = enable
        self.rate = rate

    def _get_rate(self):
        val = _espeak.getParameter(_espeak.espeakRATE, 1)
        if self._rateBoost:
            val = int(val / self.RATE_BOOST_MULTIPLIER)
        return self._paramToPercent(val, _espeak.minRate, _espeak.maxRate)

    def _set_rate(self, rate):
        val = self._percentToParam(rate, _espeak.minRate, _espeak.maxRate)
        if self._rateBoost:
            val = int(val * self.RATE_BOOST_MULTIPLIER)
        _espeak.setParameter(_espeak.espeakRATE, val, 0)

    def _get_pitch(self):
        val = _espeak.getParameter(_espeak.espeakPITCH, 1)
        return self._paramToPercent(val, _espeak.minPitch, _espeak.maxPitch)

    def _set_pitch(self, pitch):
        val = self._percentToParam(pitch, _espeak.minPitch, _espeak.maxPitch)
        _espeak.setParameter(_espeak.espeakPITCH, val, 0)

    def _get_inflection(self):
        val = _espeak.getParameter(_espeak.espeakRANGE, 1)
        return self._paramToPercent(val, _espeak.minPitch, _espeak.maxPitch)

    def _set_inflection(self, val):
        val = self._percentToParam(val, _espeak.minPitch, _espeak.maxPitch)
        _espeak.setParameter(_espeak.espeakRANGE, val, 0)

    def _get_volume(self):
        return _espeak.getParameter(_espeak.espeakVOLUME, 1)

    def _set_volume(self, volume):
        _espeak.setParameter(_espeak.espeakVOLUME, volume, 0)

    def _getAvailableVoices(self):
        voices = OrderedDict()
        for v in _espeak.getVoiceList():
            l = v.languages[1:]
            identifier = os.path.basename(v.identifier)
            voices[identifier] = VoiceInfo(identifier, v.name, l)
        return voices

    def _get_voice(self):
        curVoice = getattr(self, '_voice', None)
        if curVoice: return curVoice
        curVoice = _espeak.getCurrentVoice()
        if not curVoice:
            return ""
        return curVoice.identifier.split('+')[0]

    def _set_voice(self, identifier):
        if not identifier:
            return
        if "\\" in identifier:
            identifier = os.path.basename(identifier)
        self._voice = identifier
        try:
            _espeak.setVoiceAndVariant(voice=identifier, variant=self._variant)
        except:
            self._voice = None
            raise
        self._language = super(SynthDriver, self).language

    def _get_lastIndex(self):
        return _espeak.lastIndex

    def terminate(self):
        _espeak.terminate()

    def _get_variant(self):
        return self._variant

    def _set_variant(self, val):
        self._variant = val if val in self._variantDict else "max"
        _espeak.setVoiceAndVariant(variant=self._variant)

    def _getAvailableVariants(self):
        return OrderedDict((ID, VoiceInfo(ID, name))
                           for ID, name in self._variantDict.iteritems())
コード例 #7
0
class SynthDriver(SynthDriver):

	name="sapi4"
	description="Microsoft Speech API version 4"
	supportedSettings=[SynthDriver.VoiceSetting()]

	isRunning = False

	@classmethod
	def check(cls):
		try:
			_winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, r"CLSID\%s" % CLSID_TTSEnumerator).Close()
			return True
		except WindowsError:
			return False

	def _fetchEnginesList(self):
		enginesList=[]
		self._ttsEngines.Reset()
		while True:
			mode=TTSMODEINFO()
			fetched=c_ulong()
			try:
				self._ttsEngines.Next(1,byref(mode),byref(fetched))
			except:
				log.error("can't get next engine",exc_info=True)
				break
			if fetched.value==0:
				break
			enginesList.append(mode)
		return enginesList

	def __init__(self):
		self.lastIndex=None
		self._bufSink=SynthDriverBufSink(self)
		self._bufSinkPtr=self._bufSink.QueryInterface(ITTSBufNotifySink)
		# HACK: Some buggy engines call Release() too many times on our buf sink.
		# Therefore, don't let the buf sink be deleted before we release it ourselves.
		self._bufSink._allowDelete=False
		self._ttsEngines=CoCreateInstance(CLSID_TTSEnumerator, ITTSEnumW)
		self._enginesList=self._fetchEnginesList()
		if len(self._enginesList)==0:
			raise RuntimeError("No Sapi4 engines available")
		self.voice=str(self._enginesList[0].gModeID)
		self._rate = None

	def terminate(self):
		self._bufSink._allowDelete = True

	def speak(self,speechSequence):
		textList=[]
		charMode=False
		for item in speechSequence:
			if isinstance(item,basestring):
				item = item.replace(u"\u2022", '').replace(u"\uf0b7", '') # nvdajp (bullet)
				textList.append(item.replace('\\','\\\\'))
			elif isinstance(item,speech.IndexCommand):
				textList.append("\\mrk=%d\\"%item.index)
			elif isinstance(item,speech.CharacterModeCommand):
				textList.append("\\RmS=1\\" if item.state else "\\RmS=0\\")
				charMode=item.state
			elif isinstance(item,speech.SpeechCommand):
				log.debugWarning("Unsupported speech command: %s"%item)
			else:
				log.error("Unknown speech: %s"%item)
		if charMode:
			# Some synths stay in character mode if we don't explicitly disable it.
			textList.append("\\RmS=0\\")
		text="".join(textList)
		flags=TTSDATAFLAG_TAGGED
		global isRunning
		isRunning = True
		self._ttsCentral.TextData(VOICECHARSET.CHARSET_TEXT, flags,TextSDATA(text),self._bufSinkPtr,ITTSBufNotifySink._iid_)

	def cancel(self):
		global isRunning
		isRunning = True
		self._ttsCentral.AudioReset()
		self.lastIndex=None

	def pause(self,switch):
		if switch:
			try:
				self._ttsCentral.AudioPause()
			except COMError:
				pass
		else:
			self._ttsCentral.AudioResume()

	def setSpeaking(self, switch):
		global isRunning
		isRunning = switch

	def isSpeaking(self):
		return isRunning

	def removeSetting(self,name):
		#Putting it here because currently no other synths make use of it. OrderedDict, where you are?
		for i,s in enumerate(self.supportedSettings):
			if s.name==name:
				del self.supportedSettings[i]
				return

	def _set_voice(self,val):
		try:
			val=GUID(val)
		except:
			val=self._enginesList[0].gModeID
		mode=None
		for mode in self._enginesList:
			if mode.gModeID==val:
				break
		if mode is None:
			raise ValueError("no such mode: %s"%val)
		self._currentMode=mode
		self._ttsAudio=CoCreateInstance(CLSID_MMAudioDest,IAudioMultiMediaDevice)
		self._ttsAudio.DeviceNumSet(nvwave.outputDeviceNameToID(config.conf["speech"]["outputDevice"], True))
		self._ttsCentral=POINTER(ITTSCentralW)()
		self._ttsEngines.Select(self._currentMode.gModeID,byref(self._ttsCentral),self._ttsAudio)
		self._ttsAttrs=self._ttsCentral.QueryInterface(ITTSAttributes)
		#Find out rate limits
		hasRate=bool(mode.dwFeatures&TTSFEATURE_SPEED)
		if hasRate:
			try:
				oldVal=DWORD()
				self._ttsAttrs.SpeedGet(byref(oldVal))
				self._ttsAttrs.SpeedSet(TTSATTR_MINSPEED)
				newVal=DWORD()
				self._ttsAttrs.SpeedGet(byref(newVal))
				self._minRate=newVal.value
				self._ttsAttrs.SpeedSet(TTSATTR_MAXSPEED)
				self._ttsAttrs.SpeedGet(byref(newVal))
				# ViaVoice (and perhaps other synths) doesn't seem to like the speed being set to maximum.
				self._maxRate=newVal.value-1
				self._ttsAttrs.SpeedSet(oldVal.value)
				if self._maxRate<=self._minRate:
					hasRate=False
			except COMError:
				hasRate=False
		if hasRate:
			if not self.isSupported('rate'):
				self.supportedSettings.insert(1,SynthDriver.RateSetting())
		else:
			if self.isSupported("rate"): self.removeSetting("rate")
		#Find out pitch limits
		hasPitch=bool(mode.dwFeatures&TTSFEATURE_PITCH)
		if hasPitch:
			try:
				oldVal=WORD()
				self._ttsAttrs.PitchGet(byref(oldVal))
				self._ttsAttrs.PitchSet(TTSATTR_MINPITCH)
				newVal=WORD()
				self._ttsAttrs.PitchGet(byref(newVal))
				self._minPitch=newVal.value
				self._ttsAttrs.PitchSet(TTSATTR_MAXPITCH)
				self._ttsAttrs.PitchGet(byref(newVal))
				self._maxPitch=newVal.value
				self._ttsAttrs.PitchSet(oldVal.value)
				if self._maxPitch<=self._minPitch:
					hasPitch=False
			except COMError:
				hasPitch=False
		if hasPitch:
			if not self.isSupported('pitch'):
				self.supportedSettings.insert(2,SynthDriver.PitchSetting())
		else:
			if self.isSupported('pitch'): self.removeSetting('pitch')
		#Find volume limits
		hasVolume=bool(mode.dwFeatures&TTSFEATURE_VOLUME)
		if hasVolume:
			try:
				oldVal=DWORD()
				self._ttsAttrs.VolumeGet(byref(oldVal))
				self._ttsAttrs.VolumeSet(TTSATTR_MINVOLUME)
				newVal=DWORD()
				self._ttsAttrs.VolumeGet(byref(newVal))
				self._minVolume=newVal.value
				self._ttsAttrs.VolumeSet(TTSATTR_MAXVOLUME)
				self._ttsAttrs.VolumeGet(byref(newVal))
				self._maxVolume=newVal.value
				self._ttsAttrs.VolumeSet(oldVal.value)
				if self._maxVolume<=self._minVolume:
					hasVolume=False
			except COMError:
				hasVolume=False
		if hasVolume:
			if not self.isSupported('volume'):
				self.supportedSettings.insert(3,SynthDriver.VolumeSetting())
		else:
			if self.isSupported('volume'): self.removeSetting('volume')

	def _get_voice(self):
		return str(self._currentMode.gModeID)

	def _getAvailableVoices(self):
		voices=OrderedDict()
		for mode in self._enginesList:
			ID=str(mode.gModeID)
			name="%s - %s"%(mode.szModeName,mode.szProductName)
			try:
				language=locale.windows_locale[mode.language.LanguageID]
			except KeyError:
				language=None
			voices[ID]=VoiceInfo(ID,name,language)
		return voices

	def _get_rate(self):
		if self._rate is not None:
			return self._rate
		val=DWORD()
		self._ttsAttrs.SpeedGet(byref(val))
		ret = self._paramToPercent(val.value,self._minRate,self._maxRate)
		return min(100, ret)

	def _set_rate(self,val):
		self._rate = val
		val=self._percentToParam(val,self._minRate,self._maxRate)
		val=min(self._maxRate, val)
		self._ttsAttrs.SpeedSet(val)

	def _get_pitch(self):
		val=WORD()
		self._ttsAttrs.PitchGet(byref(val))
		return self._paramToPercent(val.value,self._minPitch,self._maxPitch)

	def _set_pitch(self,val):
		val=self._percentToParam(val,self._minPitch,self._maxPitch)
		self._ttsAttrs.PitchSet(val)

	def _get_volume(self):
		val=DWORD()
		self._ttsAttrs.VolumeGet(byref(val))
		return self._paramToPercent(val.value&0xffff,self._minVolume&0xffff,self._maxVolume&0xffff)

	def _set_volume(self,val):
		val=self._percentToParam(val,self._minVolume&0xffff,self._maxVolume&0xffff)
		val+=val<<16
		self._ttsAttrs.VolumeSet(val)
コード例 #8
0
ファイル: RHVoice.py プロジェクト: yuriy12/RHVoice
class SynthDriver(SynthDriver):
    name = "RHVoice"
    description = "RHVoice"

    supportedSettings = (SynthDriver.RateSetting(), SynthDriver.PitchSetting(),
                         SynthDriver.VolumeSetting(),
                         SynthDriver.VoiceSetting(),
                         SynthDriver.VariantSetting())

    @classmethod
    def check(cls):
        return os.path.isfile(lib_path)

    def __init__(self):
        self.__lib = ctypes.CDLL(lib_path.encode(sys.getfilesystemencoding()))
        self.__lib.RHVoice_initialize.argtypes = (c_char_p, RHVoice_callback,
                                                  c_char_p, c_uint)
        self.__lib.RHVoice_initialize.restype = c_int
        self.__lib.RHVoice_new_message_utf16.argtypes = (c_wchar_p, c_int,
                                                         c_int)
        self.__lib.RHVoice_new_message_utf16.restype = RHVoice_message
        self.__lib.RHVoice_delete_message.argtypes = (RHVoice_message, )
        self.__lib.RHVoice_speak.argtypes = (RHVoice_message, )
        self.__lib.RHVoice_get_min_rate.restype = c_float
        self.__lib.RHVoice_get_rate.restype = c_float
        self.__lib.RHVoice_get_max_rate.restype = c_float
        self.__lib.RHVoice_get_min_pitch.restype = c_float
        self.__lib.RHVoice_get_pitch.restype = c_float
        self.__lib.RHVoice_get_max_pitch.restype = c_float
        self.__lib.RHVoice_get_volume.restype = c_float
        self.__lib.RHVoice_get_max_volume.restype = c_float
        self.__lib.RHVoice_get_voice_count.restype = c_int
        self.__lib.RHVoice_get_variant_count.restype = c_int
        self.__lib.RHVoice_get_voice_name.argtypes = (c_int, )
        self.__lib.RHVoice_get_voice_name.restype = c_char_p
        self.__lib.RHVoice_get_variant_name.argtypes = (c_int, )
        self.__lib.RHVoice_get_variant_name.restype = c_char_p
        self.__lib.RHVoice_find_voice.argtypes = (c_char_p, )
        self.__lib.RHVoice_find_voice.restype = c_int
        self.__lib.RHVoice_find_variant.argtypes = (c_char_p, )
        self.__lib.RHVoice_find_variant.restype = c_int
        self.__lib.RHVoice_get_voice.restype = c_int
        self.__lib.RHVoice_get_variant.restype = c_int
        self.__lib.RHVoice_set_voice.argtypes = (c_int, )
        self.__lib.RHVoice_set_variant.argtypes = (c_int, )
        self.__lib.RHVoice_get_version.restype = c_char_p
        self.__silence_flag = threading.Event()
        self.__audio_callback = AudioCallback(self.__lib, self.__silence_flag)
        self.__audio_callback_wrapper = RHVoice_callback(self.__audio_callback)
        sample_rate = self.__lib.RHVoice_initialize(
            data_path.encode("UTF-8"), self.__audio_callback_wrapper,
            cfg_path.encode("UTF-8"), 0)
        if sample_rate == 0:
            raise RuntimeError("RHVoice: initialization error")
        voice_count = self.__lib.RHVoice_get_voice_count()
        if voice_count == 0:
            raise RuntimeError("RHVoice: initialization error")
        self.__player = nvwave.WavePlayer(
            channels=1,
            samplesPerSec=sample_rate,
            bitsPerSample=16,
            outputDevice=config.conf["speech"]["outputDevice"])
        self.__audio_callback.set_player(self.__player)
        self.__tts_queue = Queue.Queue()
        self.__tts_thread = TTSThread(self.__lib, self.__tts_queue,
                                      self.__player, self.__silence_flag)
        self._availableVoices = OrderedDict()
        for id in range(1, voice_count + 1):
            name = self.__lib.RHVoice_get_voice_name(id)
            self._availableVoices[name] = VoiceInfo(name, name, "ru")
        self.__lib.RHVoice_set_voice(1)
        self.__voice = self.__lib.RHVoice_get_voice_name(1)
        variant_count = self.__lib.RHVoice_get_variant_count()
        self._availableVariants = OrderedDict()
        for id in range(1, variant_count + 1):
            name = self.__lib.RHVoice_get_variant_name(id)
            self._availableVariants[name] = VoiceInfo(name, name, "ru")
        self.__lib.RHVoice_set_variant(1)
        self.__variant = self.__lib.RHVoice_get_variant_name(1)
        self.__rate = 50
        self.__pitch = 50
        self.__volume = 50
        self.__native_rate_range = (self.__lib.RHVoice_get_min_rate(),
                                    self.__lib.RHVoice_get_max_rate(),
                                    self.__lib.RHVoice_get_rate())
        self.__native_pitch_range = (self.__lib.RHVoice_get_min_pitch(),
                                     self.__lib.RHVoice_get_max_pitch(),
                                     self.__lib.RHVoice_get_pitch())
        self.__native_volume_range = (0, self.__lib.RHVoice_get_max_volume(),
                                      self.__lib.RHVoice_get_volume())
        self.__char_mapping = {}
        for c in range(9):
            self.__char_mapping[c] = 32
            self.__char_mapping[11] = 32
            self.__char_mapping[12] = 32
            for c in range(14, 32):
                self.__char_mapping[c] = 32
                self.__char_mapping[ord("<")] = u"&lt;"
                self.__char_mapping[ord("&")] = u"&amp;"
        self.__tts_thread.start()
        log.info("Using RHVoice version %s" % self.__lib.RHVoice_get_version())

    def terminate(self):
        self.cancel()
        self.__tts_queue.put(None)
        self.__tts_thread.join()
        self.__player.close()
        self.__lib.RHVoice_terminate()

    def speak(self, speech_sequence):
        spell_mode = False
        text_list = []
        for item in speech_sequence:
            if isinstance(item, basestring):
                s = unicode(item).translate(self.__char_mapping)
                text_list.append((
                    u'<say-as interpret-as="characters">%s</say-as>' %
                    s) if spell_mode else s)
            elif isinstance(item, speech.IndexCommand):
                text_list.append('<mark name="%d"/>' % item.index)
            elif isinstance(item, speech.CharacterModeCommand):
                if item.state:
                    spell_mode = True
                else:
                    spell_mode = False
            elif isinstance(item, speech.SpeechCommand):
                log.debugWarning("Unsupported speech command: %s" % item)
            else:
                log.error("Unknown speech: %s" % item)
        text = u"".join(text_list)
        fmt_str = u'<speak><voice name="%s" variant="%d"><prosody rate="%f%%" pitch="%f%%" volume="%f%%">%s</prosody></voice></speak>'
        variant = self.__lib.RHVoice_find_variant(self.__variant)
        if variant == 0:
            variant = 1
        rate = convert_to_native_percent(self.__rate,
                                         *self.__native_rate_range)
        pitch = convert_to_native_percent(self.__pitch,
                                          *self.__native_pitch_range)
        volume = convert_to_native_percent(self.__volume,
                                           *self.__native_volume_range)
        ssml = fmt_str % (self.__voice, variant, rate, pitch, volume, text)
        self.__tts_queue.put(ssml)

    def pause(self, switch):
        self.__player.pause(switch)

    def cancel(self):
        try:
            while True:
                self.__tts_queue.get_nowait()
        except Queue.Empty:
            self.__silence_flag.set()
            self.__tts_queue.put(())
            self.__player.stop()

    def _get_lastIndex(self):
        return self.__audio_callback.index

    def _get_language(self):
        return "ru"

    def _get_rate(self):
        return self.__rate

    def _set_rate(self, rate):
        self.__rate = max(0, min(100, rate))

    def _get_pitch(self):
        return self.__pitch

    def _set_pitch(self, pitch):
        self.__pitch = max(0, min(100, pitch))

    def _get_volume(self):
        return self.__volume

    def _set_volume(self, volume):
        self.__volume = max(0, min(100, volume))

    def _get_voice(self):
        return self.__voice

    def _set_voice(self, voice):
        try:
            self.__voice = self._availableVoices[voice].ID
        except:
            pass

    def _get_variant(self):
        return self.__variant

    def _set_variant(self, variant):
        try:
            self.__variant = self._availableVariants[variant].ID
        except:
            pass
コード例 #9
0
class SynthDriver(SynthDriver):
    name = "oneCore"
    # Translators: Description for a speech synthesizer.
    description = _("Windows OneCore voices")
    supportedSettings = (
        SynthDriver.VoiceSetting(),
        SynthDriver.RateSetting(),
        SynthDriver.PitchSetting(),
        SynthDriver.VolumeSetting(),
    )
    # These are all controlled via SSML, so we only need attributes, not properties.
    rate = None
    pitch = None
    volume = None

    @classmethod
    def check(cls):
        if not hasattr(sys, "frozen"):
            # #3793: Source copies don't report the correct version on Windows 10 because Python isn't manifested for higher versions.
            # We want this driver to work for source copies on Windows 10, so just return True here.
            # If this isn't in fact Windows 10, it will fail when constructed, which is okay.
            return True
        # For binary copies, only present this as an available synth if this is Windows 10.
        return winVersion.winVersion.major >= 10

    def __init__(self):
        super(SynthDriver, self).__init__()
        self._dll = ctypes.windll[DLL_FILE]
        self._dll.ocSpeech_getCurrentVoiceLanguage.restype = ctypes.c_wchar_p
        self._handle = self._dll.ocSpeech_initialize()
        self._callbackInst = ocSpeech_Callback(self._callback)
        self._dll.ocSpeech_setCallback(self._handle, self._callbackInst)
        self._dll.ocSpeech_getVoices.restype = bstrReturn
        self._dll.ocSpeech_getCurrentVoiceId.restype = ctypes.c_wchar_p
        self._player = nvwave.WavePlayer(
            1,
            SAMPLES_PER_SEC,
            BITS_PER_SAMPLE,
            outputDevice=config.conf["speech"]["outputDevice"])
        # Initialize state.
        self._queuedSpeech = []
        self._wasCancelled = False
        self._isProcessing = False
        # Set initial values for parameters that can't be queried.
        # This initialises our cache for the value.
        self.rate = 50
        self.pitch = 50
        self.volume = 100

    def terminate(self):
        super(SynthDriver, self).terminate()
        self._dll.ocSpeech_terminate(self._handle)
        # Drop the ctypes function instance for the callback,
        # as it is holding a reference to an instance method, which causes a reference cycle.
        self._callbackInst = None

    def cancel(self):
        # Set a flag to tell the callback not to push more audio.
        self._wasCancelled = True
        log.debug("Cancelling")
        # There might be more text pending. Throw it away.
        self._queuedSpeech = []
        self._player.stop()

    def speak(self, speechSequence):
        conv = _OcSsmlConverter(self.language, self.rate, self.pitch,
                                self.volume)
        text = conv.convertToXml(speechSequence)
        self._queueSpeech(text)

    def _queueSpeech(self, item):
        self._queuedSpeech.append(item)
        # We only process the queue here if it isn't already being processed.
        if not self._isProcessing:
            self._processQueue()

    def _processQueue(self):
        if self._queuedSpeech:
            item = self._queuedSpeech.pop(0)
            self._wasCancelled = False
            log.debug("Begin processing speech")
            self._isProcessing = True
            # ocSpeech_speak is async.
            # It will call _callback in a background thread once done,
            # which will eventually process the queue again.
            self._dll.ocSpeech_speak(self._handle, item)
            return
        log.debug("Queue empty, done processing")
        self._isProcessing = False

    def _callback(self, bytes, len, markers):
        if len == 0:
            # The C++ code will log an error with details.
            log.debugWarning("ocSpeech_speak failed!")
            self._processQueue()
            return
        # This gets called in a background thread.
        # Strip the wav header.
        assert len > WAV_HEADER_LEN
        bytes += WAV_HEADER_LEN
        len -= WAV_HEADER_LEN
        data = ctypes.string_at(bytes, len)
        if markers:
            markers = markers.split('|')
        else:
            markers = []
        prevMarker = None
        prevPos = 0

        # Push audio up to each marker so we can sync the audio with the markers.
        for marker in markers:
            if self._wasCancelled:
                break
            name, pos = marker.split(':')
            pos = int(pos)
            # pos is a time offset in 100-nanosecond units.
            # Convert this to a byte offset.
            # Order the equation so we don't have to do floating point.
            pos = pos * BYTES_PER_SEC / HUNDRED_NS_PER_SEC
            # Push audio up to this marker.
            self._player.feed(data[prevPos:pos])
            # _player.feed blocks until the previous chunk of audio is complete, not the chunk we just pushed.
            # Therefore, indicate that we've reached the previous marker.
            if prevMarker:
                self.lastIndex = prevMarker
            prevMarker = int(name)
            prevPos = pos
        if self._wasCancelled:
            log.debug("Cancelled, stopped pushing audio")
        else:
            self._player.feed(data[prevPos:])
            if prevMarker:
                self.lastIndex = prevMarker
            log.debug("Done pushing audio")
        self._processQueue()

    def _getAvailableVoices(self, onlyValid=True):
        voices = OrderedDict()
        voicesStr = self._dll.ocSpeech_getVoices(self._handle).split('|')
        for voiceStr in voicesStr:
            id, name = voiceStr.split(":")
            if onlyValid and not self._isVoiceValid(id):
                continue
            voices[id] = VoiceInfo(id, name)
        return voices

    def _isVoiceValid(self, id):
        idParts = id.split('\\')
        rootKey = getattr(_winreg, idParts[0])
        subkey = "\\".join(idParts[1:])
        try:
            hkey = _winreg.OpenKey(rootKey, subkey)
        except WindowsError as e:
            log.debugWarning("Could not open registry key %s, %s" % (id, e))
            return False
        try:
            langDataPath = _winreg.QueryValueEx(hkey, 'langDataPath')
        except WindowsError as e:
            log.debugWarning(
                "Could not open registry value 'langDataPath', %s" % e)
            return False
        if not langDataPath or not isinstance(langDataPath[0], basestring):
            log.debugWarning("Invalid langDataPath value")
            return False
        if not os.path.isfile(os.path.expandvars(langDataPath[0])):
            log.debugWarning("Missing language data file: %s" %
                             langDataPath[0])
            return False
        try:
            voicePath = _winreg.QueryValueEx(hkey, 'voicePath')
        except WindowsError as e:
            log.debugWarning(
                "Could not open registry value 'langDataPath', %s" % e)
            return False
        if not voicePath or not isinstance(voicePath[0], basestring):
            log.debugWarning("Invalid voicePath value")
            return False
        if not os.path.isfile(os.path.expandvars(voicePath[0] + '.apm')):
            log.debugWarning("Missing voice file: %s" % voicePath[0] + ".apm")
            return False
        return True

    def _get_voice(self):
        return self._dll.ocSpeech_getCurrentVoiceId(self._handle)

    def _set_voice(self, id):
        voices = self._getAvailableVoices(onlyValid=False)
        for index, voice in enumerate(voices):
            if voice == id:
                break
        else:
            raise LookupError("No such voice: %s" % id)
        self._dll.ocSpeech_setVoice(self._handle, index)

    def _get_language(self):
        return self._dll.ocSpeech_getCurrentVoiceLanguage(self._handle)

    def pause(self, switch):
        self._player.pause(switch)
コード例 #10
0
class SynthDriver(synthDriverHandler.SynthDriver):
	supportedSettings=(SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(),
	SynthDriver.RateSetting(), BooleanSynthSetting("rateBoost", _("Rate boos&t")),
	SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(), NumericSynthSetting("hsz", _("Head Size"), False), NumericSynthSetting("rgh", _("Roughness"), False), NumericSynthSetting("bth", _("Breathiness"), False), BooleanSynthSetting("backquoteVoiceTags", _("Enable backquote voice &tags"), False))
	description='IBMTTS'
	name='ibmeci'
	speakingLanguage=""
	@classmethod
	def check(cls):
		return _ibmeci.eciCheck()
		
	def __init__(self):
		_ibmeci.initialize()
		# This information doesn't really need to be displayed, and makes IBMTTS unusable if the addon is not in the same drive as NVDA executable.
		# But display it only on debug mode in case of it can be useful
		log.debug("Using IBMTTS version %s" % _ibmeci.eciVersion())
		lang = languageHandler.getLanguage()
		self.rate=50
		self.speakingLanguage=lang
		self.variant="1"


	def speak(self,speechSequence):
		last = None
		defaultLanguage=self.language
		outlist = []
		for item in speechSequence:
			if isinstance(item, string_types):
				s = self.processText(unicode(item))
				outlist.append((_ibmeci.speak, (s,)))
				last = s
			elif isinstance(item,speech.IndexCommand):
				outlist.append((_ibmeci.index, (item.index,)))
			elif isinstance(item,speech.LangChangeCommand):
				l=None
				if item.lang in langsAnnotations: l = langsAnnotations[item.lang]
				elif item.lang and item.lang[0:2] in langsAnnotations: l = langsAnnotations[item.lang[0:2]]
				if l:
					if item.lang != self.speakingLanguage and item.lang != self.speakingLanguage[0:2]:
						outlist.append((_ibmeci.speak, (l,)))
						self.speakingLanguage=item.lang
				else:
					outlist.append((_ibmeci.speak, (langsAnnotations[defaultLanguage],)))
					self.speakingLanguage = defaultLanguage
			elif isinstance(item,speech.CharacterModeCommand):
				outlist.append((_ibmeci.speak, (b"`ts1" if item.state else "b`ts0",)))
			elif isinstance(item,speech.SpeechCommand):
				log.debugWarning("Unsupported speech command: %s"%item)
			else:
				log.error("Unknown speech: %s"%item)
		if last is not None and not last[-1] in punctuation: outlist.append((_ibmeci.speak, (b'`p1',)))
		outlist.append((_ibmeci.setEndStringMark, ()))
		
		outlist.append((_ibmeci.speak, (b"`ts0",)))
		outlist.append((_ibmeci.synth, ()))
		_ibmeci.synthQueue.put(outlist)
		_ibmeci.process()

	def processText(self,text):
		text = text.rstrip()
		if _ibmeci.params[9] in (65536, 65537): text = resub(english_fixes, text)
		if _ibmeci.params[9] in (131072,  131073): text = resub(spanish_fixes, text)
		if _ibmeci.params[9] in (196609, 196608):
			text = resub(french_fixes, text)
			text = text.replace('quil', 'qil') #Sometimes this string make everything buggy with IBMTTS in French
		#if not self._backquoteVoiceTags: text = text.replace(u'‵', ' ')
		if self._backquoteVoiceTags:
			text = "`pp0 `vv%d %s" % (self.getVParam(_ibmeci.vlm), text.replace('`', ' ')) #no embedded commands
			text = resub(anticrash_res, text)
			#this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it.
			text = text.encode('mbcs', 'replace')
		else:
			#this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it.
			text = text.encode('mbcs', 'replace')
			text = resub(anticrash_res, text)
			text = b"`pp0 `vv%d %s" % (self.getVParam(_ibmeci.vlm), text.replace(b'`', b' ')) #no embedded commands
		text = pause_re.sub(br'\1 `p1\2\3', text)
		text = time_re.sub(br'\1:\2 \3', text)
		# temporal fix: replace , with `" -" because IBMTTS seems ignore commas at the end.
		# if you know a better solution please let me know to update it.
		if text[-1] == b",": text = text[0:-1]+b" -"
		return text



	def pause(self,switch):
		_ibmeci.pause(switch)

	def terminate(self):
		_ibmeci.terminate()

	_backquoteVoiceTags=False
	def _get_backquoteVoiceTags(self):
		return self._backquoteVoiceTags

	def _set_backquoteVoiceTags(self, enable):
		if enable == self._backquoteVoiceTags:
			return
		self._backquoteVoiceTags = enable

	_rateBoost = False
	RATE_BOOST_MULTIPLIER = 1.6
	def _get_rateBoost(self):
		return self._rateBoost

	def _set_rateBoost(self, enable):
		if enable != self._rateBoost:
			rate = self.rate
			self._rateBoost = enable
			self.rate = rate


	def _get_rate(self):
		val = self.getVParam(_ibmeci.rate)
		if self._rateBoost: val=int(round(val/self.RATE_BOOST_MULTIPLIER))
		return self._paramToPercent(val, minRate, maxRate)

	def _set_rate(self,vl):
		val = self._percentToParam(vl, minRate, maxRate)
		if self._rateBoost: val = int(round(val *self.RATE_BOOST_MULTIPLIER))
		self._rate = val
		self.setVParam(_ibmeci.rate, val)

	def _get_pitch(self):
		return self.getVParam(_ibmeci.pitch)

	def _set_pitch(self,vl):
		self.setVParam(_ibmeci.pitch,vl)

	def _get_volume(self):
		return self.getVParam(_ibmeci.vlm)

	def _set_volume(self,vl):
		self.setVParam(_ibmeci.vlm,int(vl))

	def _set_inflection(self,vl):
		vl = int(vl)
		self.setVParam(_ibmeci.fluctuation,vl)

	def _get_inflection(self):
		return self.getVParam(_ibmeci.fluctuation)

	def _set_hsz(self,vl):
		vl = int(vl)
		self.setVParam(_ibmeci.hsz,vl)

	def _get_hsz(self):
		return self.getVParam(_ibmeci.hsz)

	def _set_rgh(self,vl):
		vl = int(vl)
		self.setVParam(_ibmeci.rgh,vl)

	def _get_rgh(self):
		return self.getVParam(_ibmeci.rgh)

	def _set_bth(self,vl):
		vl = int(vl)
		self.setVParam(_ibmeci.bth,vl)

	def _get_bth(self):
		return self.getVParam(_ibmeci.bth)

	def _getAvailableVoices(self):
		o = OrderedDict()
		for name in os.listdir(_ibmeci.ttsPath):
			if name.lower().endswith('.syn'):
				info = _ibmeci.langs[name.lower()[:3]]
				o[str(info[0])] = VoiceInfo(str(info[0]), info[1], info[2])
		return o

	def _get_voice(self):
		return str(_ibmeci.params[9])
	def _set_voice(self,vl):
		_ibmeci.set_voice(vl)
	def getVParam(self,pr):
		return _ibmeci.getVParam(pr)

	def setVParam(self, pr,vl):
		_ibmeci.setVParam(pr, vl)

	def _get_lastIndex(self):
#fix?
		return _ibmeci.lastindex

	def cancel(self):
		_ibmeci.stop()

	def _getAvailableVariants(self):
		global variants
		return OrderedDict((str(id), synthDriverHandler.VoiceInfo(str(id), name)) for id, name in variants.items())

	def _set_variant(self, v):
		global variants
		self._variant = v if int(v) in variants else "1"
		_ibmeci.setVariant(int(v))
		self.setVParam(_ibmeci.rate, self._rate)
#  if 'ibmtts' in config.conf['speech']:
#   config.conf['speech']['ibmtts']['pitch'] = self.pitch

	def _get_variant(self): return self._variant
コード例 #11
0
class SynthDriver(SynthDriver):
    supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.RateSetting(),
                         SynthDriver.PitchSetting(),
                         SynthDriver.VolumeSetting())

    COM_CLASS = "SAPI.SPVoice"

    name = "sapi5"
    description = "Microsoft Speech API version 5"

    @classmethod
    def check(cls):
        try:
            r = winreg.OpenKey(winreg.HKEY_CLASSES_ROOT, cls.COM_CLASS)
            r.Close()
            return True
        except:
            return False

    ttsAudioStream = None  #: Holds the ISPAudio interface for the current voice, to aid in stopping and pausing audio

    def __init__(self, _defaultVoiceToken=None):
        """
		@param _defaultVoiceToken: an optional sapi voice token which should be used as the default voice (only useful for subclasses)
		@type _defaultVoiceToken: ISpeechObjectToken
		"""
        ensureWaveOutHooks()
        self._pitch = 50
        self._initTts(_defaultVoiceToken)

    def terminate(self):
        del self.tts

    def _getAvailableVoices(self):
        voices = OrderedDict()
        v = self._getVoiceTokens()
        # #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators.
        # Therefore, fetch the items by index, as that method explicitly returns the correct interface.
        for i in xrange(len(v)):
            try:
                ID = v[i].Id
                name = v[i].GetDescription()
                try:
                    language = locale.windows_locale[int(
                        v[i].getattribute('language').split(';')[0], 16)]
                except KeyError:
                    language = None
            except COMError:
                log.warning("Could not get the voice info. Skipping...")
            voices[ID] = VoiceInfo(ID, name, language)
        return voices

    def _getVoiceTokens(self):
        """Provides a collection of sapi5 voice tokens. Can be overridden by subclasses if tokens should be looked for in some other registry location."""
        return self.tts.getVoices()

    def _get_rate(self):
        return (self.tts.rate * 5) + 50

    def _get_pitch(self):
        return self._pitch

    def _get_volume(self):
        return self.tts.volume

    def _get_voice(self):
        return self.tts.voice.Id

    def _get_lastIndex(self):
        bookmark = self.tts.status.LastBookmark
        if bookmark != "" and bookmark is not None:
            return int(bookmark)
        else:
            return None

    def _percentToRate(self, percent):
        return (percent - 50) / 5

    def _set_rate(self, rate):
        self.tts.Rate = self._percentToRate(rate)

    def _set_pitch(self, value):
        #pitch is really controled with xml around speak commands
        self._pitch = value

    def _set_volume(self, value):
        self.tts.Volume = value

    def _initTts(self, voice=None):
        self.tts = comtypes.client.CreateObject(self.COM_CLASS)
        if voice:
            # #749: It seems that SAPI 5 doesn't reset the audio parameters when the voice is changed,
            # but only when the audio output is changed.
            # Therefore, set the voice before setting the audio output.
            # Otherwise, we will get poor speech quality in some cases.
            self.tts.voice = voice
        outputDeviceID = nvwave.outputDeviceNameToID(
            config.conf["speech"]["outputDevice"], True)
        if outputDeviceID >= 0:
            self.tts.audioOutput = self.tts.getAudioOutputs()[outputDeviceID]
        from comInterfaces.SpeechLib import ISpAudio
        try:
            self.ttsAudioStream = self.tts.audioOutputStream.QueryInterface(
                ISpAudio)
        except COMError:
            log.debugWarning("SAPI5 voice does not support ISPAudio")
            self.ttsAudioStream = None

    def _set_voice(self, value):
        tokens = self._getVoiceTokens()
        # #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators.
        # Therefore, fetch the items by index, as that method explicitly returns the correct interface.
        for i in xrange(len(tokens)):
            voice = tokens[i]
            if value == voice.Id:
                break
        else:
            # Voice not found.
            return
        self._initTts(voice=voice)

    def _percentToPitch(self, percent):
        return percent / 2 - 25

    IPA_TO_SAPI = {
        u"θ": u"th",
        u"s": u"s",
    }

    def _convertPhoneme(self, ipa):
        # We only know about US English phonemes.
        # Rather than just ignoring unknown phonemes, SAPI throws an exception.
        # Therefore, don't bother with any other language.
        if self.tts.voice.GetAttribute("language") != "409":
            raise LookupError("No data for this language")
        out = []
        outAfter = None
        for ipaChar in ipa:
            if ipaChar == u"ˈ":
                outAfter = u"1"
                continue
            out.append(self.IPA_TO_SAPI[ipaChar])
            if outAfter:
                out.append(outAfter)
                outAfter = None
        if outAfter:
            out.append(outAfter)
        return u" ".join(out)

    def speak(self, speechSequence):
        textList = []

        # NVDA SpeechCommands are linear, but XML is hierarchical.
        # Therefore, we track values for non-empty tags.
        # When a tag changes, we close all previously opened tags and open new ones.
        tags = {}
        # We have to use something mutable here because it needs to be changed by the inner function.
        tagsChanged = [True]
        openedTags = []

        def outputTags():
            if not tagsChanged[0]:
                return
            for tag in reversed(openedTags):
                textList.append("</%s>" % tag)
            del openedTags[:]
            for tag, attrs in tags.iteritems():
                textList.append("<%s" % tag)
                for attr, val in attrs.iteritems():
                    textList.append(' %s="%s"' % (attr, val))
                textList.append(">")
                openedTags.append(tag)
            tagsChanged[0] = False

        pitch = self._pitch
        # Pitch must always be specified in the markup.
        tags["pitch"] = {"absmiddle": self._percentToPitch(pitch)}
        rate = self.rate
        volume = self.volume

        for item in speechSequence:
            if isinstance(item, basestring):
                outputTags()
                textList.append(item.replace("<", "&lt;"))
            elif isinstance(item, speech.IndexCommand):
                textList.append('<Bookmark Mark="%d" />' % item.index)
            elif isinstance(item, speech.CharacterModeCommand):
                if item.state:
                    tags["spell"] = {}
                else:
                    try:
                        del tags["spell"]
                    except KeyError:
                        pass
                tagsChanged[0] = True
            elif isinstance(item, speech.BreakCommand):
                textList.append('<silence msec="%d" />' % item.time)
            elif isinstance(item, speech.PitchCommand):
                tags["pitch"] = {
                    "absmiddle":
                    self._percentToPitch(int(pitch * item.multiplier))
                }
                tagsChanged[0] = True
            elif isinstance(item, speech.VolumeCommand):
                if item.multiplier == 1:
                    try:
                        del tags["volume"]
                    except KeyError:
                        pass
                else:
                    tags["volume"] = {"level": int(volume * item.multiplier)}
                tagsChanged[0] = True
            elif isinstance(item, speech.RateCommand):
                if item.multiplier == 1:
                    try:
                        del tags["rate"]
                    except KeyError:
                        pass
                else:
                    tags["rate"] = {
                        "absspeed":
                        self._percentToRate(int(rate * item.multiplier))
                    }
                tagsChanged[0] = True
            elif isinstance(item, speech.PhonemeCommand):
                try:
                    textList.append(
                        u'<pron sym="%s">%s</pron>' %
                        (self._convertPhoneme(item.ipa), item.text or u""))
                except LookupError:
                    log.debugWarning(
                        "Couldn't convert character in IPA string: %s" %
                        item.ipa)
                    if item.text:
                        textList.append(item.text)
            elif isinstance(item, speech.SpeechCommand):
                log.debugWarning("Unsupported speech command: %s" % item)
            else:
                log.error("Unknown speech: %s" % item)
        # Close any tags that are still open.
        tags.clear()
        tagsChanged[0] = True
        outputTags()

        text = "".join(textList)
        flags = constants.SVSFIsXML | constants.SVSFlagsAsync
        self.tts.Speak(text, flags)

    def cancel(self):
        # SAPI5's default means of stopping speech can sometimes lag at end of speech, especially with Win8 / Win 10 Microsoft Voices.
        # Therefore  instruct the underlying audio interface to stop first, before interupting and purging any remaining speech.
        if self.ttsAudioStream:
            self.ttsAudioStream.setState(SPAS_STOP, 0)
        self.tts.Speak(None, 1 | constants.SVSFPurgeBeforeSpeak)

    def pause(self, switch):
        # SAPI5's default means of pausing in most cases is either extrmemely slow (e.g. takes more than half a second) or does not work at all.
        # Therefore instruct the underlying audio interface to pause instead.
        if self.ttsAudioStream:
            self.ttsAudioStream.setState(SPAS_PAUSE if switch else SPAS_RUN, 0)
コード例 #12
0
ファイル: pico.py プロジェクト: hxebolax/PicoTTS-NVDA
class SynthDriver(SynthDriver):
	name = "pico"
	description = "Svox pico synthesizer"
	supportedSettings=(SynthDriver.VoiceSetting(),SynthDriver.RateSetting(),SynthDriver.PitchSetting(),SynthDriver.VolumeSetting())
	supportedCommands = {
		IndexCommand,
	}

	supportedNotifications = {synthIndexReached, synthDoneSpeaking}
	availableVoices=OrderedDict()
	availableVoices["en-us"] = VoiceInfo('en-us', _('American English'), "en-us")
	availableVoices["en-gb"] = VoiceInfo('en-gb', _('British English'), "en-gb")
	availableVoices["es"] = VoiceInfo('es', _('Spanish'), "es")
	availableVoices["fr"] = VoiceInfo('fr', _('French'), "fr")
	availableVoices["it"] = VoiceInfo('it', _('Italian'), "it")
	availableVoices["de"] = VoiceInfo('de', _('Deutch'), "de")
	_voice = 'en-us'
	pitch = 50
	rate = 50
	volume = 100

	#:tuples of (langName,langData,speakerData)
	voice_resources={
		'en-us': (b'American English', b'en-US_ta.bin', b'en-US_lh0_sg.bin'),
		'en-gb': (b'British English', b'en-GB_ta.bin', b'en-GB_kh0_sg.bin'),
		'es': (b'Spanish', b'es-ES_ta.bin', b'es-ES_zl0_sg.bin'),
		'fr': (b'French', b'fr-FR_ta.bin', b'fr-FR_nk0_sg.bin'),
		'it': (b'Italian', b'it-IT_ta.bin', b'it-IT_cm0_sg.bin'),
		'de': (b'Deutch', b'de-DE_ta.bin', b'de-DE_gl0_sg.bin'),
	}

	@classmethod
	def check(cls):
		return os.path.isfile(os.path.join(BASE_PATH, "svox-pico.dll"))

	def pico_system_errcheck(self,result,func,args):
		if result!=0:
			message=ctypes.create_string_buffer(200)
			self.dll.pico_getSystemStatusMessage(self.pico_system,result,message)
			raise RuntimeError("error while calling '%s' with arguments %s. underlying API reports: '%s'"%(func.__name__,args,message.value))
		return result

	def pico_engine_errcheck(self,result,func,args):
		if result<0:
			message=ctypes.create_string_buffer(200)
			self.dll.pico_getEngineStatusMessage(self.pico_engine, result, message)
			raise RuntimeError("error while calling '%s' with arguments %s. underlying API reports: '%s'"%(func.__name__,args,message.value))
		return result

	def __init__(self):
		self.dll=ctypes.cdll.LoadLibrary(os.path.join(BASE_PATH, 'svox-pico.dll'))
		#prepare dll object
		system_functs = ('pico_initialize', 'pico_terminate', 'pico_getSystemStatusMessage', 'pico_getNrSystemWarnings',
		'pico_getSystemWarning', 'pico_loadResource', 'pico_unloadResource', 'pico_getResourceName', 'pico_createVoiceDefinition', 'pico_addResourceToVoiceDefinition',
		'pico_releaseVoiceDefinition', 'pico_newEngine', 'pico_disposeEngine')
		for func in system_functs:
			getattr(self.dll,func).errcheck=self.pico_system_errcheck
		engine_funcs = ('pico_putTextUtf8', 'pico_getData', 'pico_resetEngine', 'pico_getEngineStatusMessage', 'pico_getNrEngineWarnings', 'pico_getEngineWarning')
		for func in engine_funcs:
			getattr(self.dll, func).errcheck = self.pico_engine_errcheck
		#init pico system
		self._svox_memory = ctypes.create_string_buffer(SVOX_MEMORY_SIZE)
		self.pico_system = pico_system()
		self.dll.pico_initialize(self._svox_memory, SVOX_MEMORY_SIZE, ctypes.byref(self.pico_system))
		self.pico_engine = None
		self.player = nvwave.WavePlayer(channels=1, samplesPerSec=16000, bitsPerSample=16, outputDevice=config.conf["speech"]["outputDevice"])
		self.queue = queue.Queue()
		self.isSpeaking = False
		self.background_thread = threading.Thread(target=self.background_thread_func)
		self.background_thread.daemon  = True
		self.background_thread.start()
		self._set_voice("es")
		#log the version
		#version_string=ctypes.create_string_buffer(200)
		#self.dll.picoext_getVersionInfo(version_string,200)
		#log.info("Using pico version '%s'"%version_string.value)

	def load_resources(self, name, langData, speakerData):
		"""Loads lingware data, defines voice"""
		langRes = pico_resource()
		self.dll.pico_loadResource(self.pico_system, os.path.join(BASE_PATH.encode('utf-8'), b'svox-pico-data', langData), ctypes.byref(langRes))
		langResName=ctypes.create_string_buffer(200)
		self.dll.pico_getResourceName(self.pico_system, langRes, langResName)
		speakerRes = pico_resource()
		self.dll.pico_loadResource(self.pico_system, os.path.join(BASE_PATH.encode('utf-8'), b'svox-pico-data', speakerData), ctypes.byref(speakerRes))
		speakerResName=ctypes.create_string_buffer(200)
		self.dll.pico_getResourceName(self.pico_system, speakerRes, speakerResName)
		self.dll.pico_createVoiceDefinition(self.pico_system, name)
		self.dll.pico_addResourceToVoiceDefinition(self.pico_system, name, langResName)
		self.dll.pico_addResourceToVoiceDefinition(self.pico_system, name, speakerResName)
		self._resources = (name, langRes, speakerRes)

	def free_resources(self):
		if not self._resources: return
		self.dll.pico_releaseVoiceDefinition(self.pico_system,self._resources[0])
		self.dll.pico_unloadResource(self.pico_system,ctypes.byref(self._resources[1]))
		self.dll.pico_unloadResource(self.pico_system,ctypes.byref(self._resources[2]))
		self._resources=None

	def terminate(self):
		self.cancel()
		self.queue.put((None,None))
		self.background_thread.join()
		self.player.close()
		self.player=None
		if self.pico_engine:
			self.dll.pico_disposeEngine(self.pico_system,ctypes.byref(self.pico_engine))
		self.free_resources()
		self.dll.pico_terminate(ctypes.byref(self.pico_system))
		self.pico_system=None
		del self.dll

	def _get_voice(self):
		return self._voice

	def _set_voice(self,value):
		name = self.voice_resources[value][0]
		if self.pico_engine:
			self.cancel()
			self.queue.join()
			self.dll.pico_disposeEngine(self.pico_system,ctypes.byref(self.pico_engine))
			self.free_resources()
		self.load_resources(*self.voice_resources[value])
		self.pico_engine = pico_engine()
		self.dll.pico_newEngine(self.pico_system,  name, ctypes.byref(self.pico_engine))
		self._voice = value

	def build_string(self,s):
		"""applies voice parameters"""
		pitch=self.pitch+50 if self.pitch<=50 else self.pitch*2
		speed = int(20 +(self.rate/50.0) *80) if self.rate<=50 else 100 +(self.rate-50)*8
		volume = self.volume*0.7
		return ('<pitch level="%d"><speed level="%d"><volume level="%d">%s</volume></speed></pitch>' %(pitch, speed, volume, s)).encode('utf-8')

	def background_thread_func(self):
		bytes_sent=ctypes.c_int16()
		out_buffer=ctypes.create_string_buffer(OUT_BUFFER_SIZE)
		bytes_received=ctypes.c_int16()
		data_type=ctypes.c_int16()
		while True:
			data, index = self.queue.get()
			if data is None:
				break
			synthIndexReached.notify(synth=self, index=index)
			remaining=len(data)+1
			while remaining and self.isSpeaking:
				self.dll.pico_putTextUtf8(self.pico_engine, data, remaining, ctypes.byref(bytes_sent))
				remaining-=bytes_sent.value
				data=data[bytes_sent.value:]
				status=PICO_STEP_BUSY
				buf=BytesIO()
				while self.isSpeaking and status==PICO_STEP_BUSY:
					status=self.dll.pico_getData(self.pico_engine, out_buffer, OUT_BUFFER_SIZE, ctypes.byref(bytes_received), ctypes.byref(data_type))
					if status==PICO_STEP_BUSY:
						buf.write(ctypes.string_at(out_buffer, bytes_received.value))
						if buf.tell() >= 4096:
							self.player.feed(buf.getvalue())
							buf.seek(0)
							buf.truncate(0)
					else:
						if buf.tell():
							self.player.feed(buf.getvalue())
						synthDoneSpeaking.notify(synth=self)
						self.player.idle()
				if not self.isSpeaking: #stop requested during playback
					self.dll.pico_resetEngine(self.pico_engine,0)
			self.lastIndex=None
			self.queue.task_done()

	def cancel(self):
		#clear queue
		try:
			while True:
				self.queue.get_nowait()
				self.queue.task_done()
		except queue.Empty:
			pass
		self.isSpeaking=False
		self.player.stop()
		self.lastIndex=None

	def speak(self,speechSequence):
		self.isSpeaking=True
		textList=[]
		index=None
		for item in speechSequence:
			if isinstance(item, str):
				textList.append(item)
			elif isinstance(item,speech.IndexCommand):
				index=item.index
		text = " ".join(textList)
		if text:
			self.queue.put((self.build_string(text), index))

	def pause(self,switch):
		self.player.pause(switch)
コード例 #13
0
ファイル: newfon.py プロジェクト: mai-codes/evoHax-nvda
class SynthDriver(SynthDriver):
    name = "newfon"
    description = "Newfon"
    supportedSettings = (
        SynthDriver.VoiceSetting(),
        SynthDriver.LanguageSetting(),
        SynthDriver.RateSetting(),
        SynthSetting("accel", _("&Acceleration")),
        SynthDriver.PitchSetting(),
        SynthDriver.InflectionSetting(10),
        SynthDriver.VolumeSetting(),
    )
    _volume = 100
    _language = "ru"
    _pitch = 50
    _accel = 0
    _inflection = 50
    _rate = 70
    availableVoices = OrderedDict((
        str(index), VoiceInfo(str(index), name)
    ) for index, name in enumerate(
        [_("male 1"), _("female 1"),
         _("male 2"), _("female 2")]))
    availableAccels = OrderedDict(
        (str(x), StringParameterInfo(str(x), str(x))) for x in xrange(8))
    pitchTable = [(90, 130), (190, 330), (60, 120), (220, 340)]
    availableLanguages = OrderedDict(
        (("ru", LanguageInfo("ru")), ("uk", LanguageInfo("uk"))))
    newfon_lib = None
    sdrvxpdbDll = None
    dictDll = None

    @classmethod
    def check(cls):
        return os.path.isfile('synthDrivers/newfon_nvda.dll')

    def calculateMinMaxPitch(self, pitch, inflection):
        min, max = self.pitchTable[int(self.voice)]
        i = max - min
        i = int((i / 50.0) * ((inflection - 50) / 2))
        min -= i
        max += i
        i = int((pitch - 50) / 1.3)
        min += i
        max += i
        return min, max

    def __init__(self):
        global player
        player = nvwave.WavePlayer(
            channels=1,
            samplesPerSec=10000,
            bitsPerSample=8,
            outputDevice=config.conf["speech"]["outputDevice"])
        self.hasDictLib = os.path.isfile('synthDrivers/dict.dll')
        if self.hasDictLib:
            self.sdrvxpdb_lib = windll.LoadLibrary(
                r"synthDrivers\sdrvxpdb.dll")
            self.dict_lib = windll.LoadLibrary(r"synthDrivers\dict.dll")
        self.newfon_lib = windll.LoadLibrary(r"synthDrivers\newfon_nvda.dll")
        self.newfon_lib.speakText.argtypes = [c_char_p, c_int]
        if not self.newfon_lib.initialize(): raise Exception
        self.newfon_lib.set_callback(processAudio)
        self.newfon_lib.set_dictionary(1)

    def terminate(self):
        self.cancel()
        global player
        player.close()
        player = None
        self.newfon_lib.terminate()
        del self.newfon_lib
        if self.hasDictLib:
            del self.dict_lib
            del self.sdrvxpdb_lib

    def speakText(self, text, index=None):
        global isSpeaking
        isSpeaking = True
        text = processText(text, self._language)
        if index is not None:
            self.newfon_lib.speakText(text, index)
        else:
            self.newfon_lib.speakText(text, -1)

    def _get_lastIndex(self):
        return self.newfon_lib.get_lastIndex()

    def cancel(self):
        self.newfon_lib.cancel()
        global isSpeaking, player
        isSpeaking = False
        player.stop()

    def _get_voice(self):
        return str(self.newfon_lib.get_voice())

    def _set_voice(self, value):
        self.newfon_lib.set_voice(int(value))
        self._set_pitch(self._pitch)

    def _get_volume(self):
        return self._volume

    def _set_volume(self, value):
        self.newfon_lib.set_volume(value)
        self._volume = value

    def _get_rate(self):
        return self._rate

    def _set_rate(self, value):
        self.newfon_lib.set_rate(value)
        self._rate = value

    def _set_pitch(self, value):
        #if value <= 50: value = 50
        #self.newfon_lib.set_accel(value/5 -10 )
        self._pitch = value
        min, max = self.calculateMinMaxPitch(self._pitch, self._inflection)
        self.newfon_lib.set_pitch_min(min)
        self.newfon_lib.set_pitch_max(max)

    def _get_pitch(self):
        return self._pitch

    def pause(self, switch):
        global player
        player.pause(switch)

    def _get_language(self):
        return self._language

    def _set_language(self, language):
        self._language = language
        if not self.hasDictLib: return
        if language == "ru": self.newfon_lib.set_dictionary(1)
        else: self.newfon_lib.set_dictionary(0)

    def _set_inflection(self, inflection):
        self._inflection = inflection
        self._set_pitch(self._pitch)

    def _get_inflection(self):
        return self._inflection

    def _set_accel(self, a):
        self._accel = a
        self.newfon_lib.set_accel(int(a))

    def _get_accel(self):
        return self._accel
コード例 #14
0
ファイル: espeak.py プロジェクト: marlon-sousa/nvda
class SynthDriver(SynthDriver):
    name = "espeak"
    description = "eSpeak NG"

    supportedSettings = (
        SynthDriver.VoiceSetting(),
        SynthDriver.VariantSetting(),
        SynthDriver.RateSetting(),
        SynthDriver.RateBoostSetting(),
        SynthDriver.PitchSetting(),
        SynthDriver.InflectionSetting(),
        SynthDriver.VolumeSetting(),
    )
    supportedCommands = {
        IndexCommand,
        CharacterModeCommand,
        LangChangeCommand,
        BreakCommand,
        PitchCommand,
        RateCommand,
        VolumeCommand,
        PhonemeCommand,
    }
    supportedNotifications = {synthIndexReached, synthDoneSpeaking}

    # A mapping of commonly used language codes to eSpeak languages.
    # Introduced due to eSpeak issue: https://github.com/espeak-ng/espeak-ng/issues/1200
    # These are used when eSpeak doesn't support a given language code
    # but a default alias is appropriate.
    _defaultLangToLocale = {
        # Languages without locale that aren't supported in eSpeak 7e5457f91e10,
        # with a language with locale that is supported.
        # Found via:
        # set(stripLocaleFromLangCode(lang) for lang in self.availableLanguages).difference(self.availableLanguages)
        "en": "en-gb",
        "chr": "chr-US-Qaaa-x-west",
        "fr": "fr-fr",
    }

    availableLanguages: Set[Optional[str]]
    """
	For eSpeak commit 7e5457f91e10, this is equivalent to:
	{
		'ia',
		'ru',
		'cy',
		'ms',
		'af',
		'fi',
		'fr-fr',
		'nog',
		'gu',
		'hu',
		'eu',
		'om',
		'en-029',
		'de',
		'es',
		'kk',
		'an',
		'nci',
		'uk',
		'vi-vn-x-south',
		'grc',
		'it',
		'vi-vn-x-central',
		'bg',
		'piqd',
		'ug',
		'ar',
		'da',
		'mi',
		'mr',
		'pt-br',
		'fr-ch',
		'py',
		'uz',
		'en-gb',
		'sw',
		'as',
		'shn',
		'vi',
		'nl',
		'bs',
		'ga',
		'pap',
		'sv',
		'kn',
		'gn',
		'th',
		'tr',
		'pa',
		'mt',
		'chr-US-Qaaa-x-west',
		'eo',
		'kok',
		'ky',
		'lfn',
		'is',
		'pt',
		'en-gb-x-gbcwmd',
		'en-gb-x-rp',
		'ht',
		'bpy',
		'fr-be',
		'nb',
		'lt',
		'ja',
		'te',
		'tn',
		'es-419',
		'gd',
		'sjn',
		'he',
		'hyw',
		'et',
		'ro',
		'ru-lv',
		'sq',
		'quc',
		'am',
		'hr',
		'qya',
		'ka',
		'el',
		'tt',
		'or',
		'pl',
		'qu',
		'ba',
		'ta',
		'cmn',
		'io',
		'en-us',
		'ur',
		'hi',
		'en-gb-scotland',
		'fa',
		'kl',
		'tk',
		'ku',
		'si',
		'cv',
		'ca',
		'qdb',
		'hak',
		'fa-latn',
		'lv',
		'en-gb-x-gbclan',
		'ltg',
		'ne',
		'sl',
		'az',
		'yue',
		'sk',
		'hy',
		'my',
		'ko',
		'mk',
		'smj',
		'ml',
		'cmn-latn-pinyin',
		'id',
		'la',
		'sr',
		'bn',
		'sd',
		'cs',
		'jbo',
		'haw'
	}
	"""
    @classmethod
    def check(cls):
        return True

    def __init__(self):
        _espeak.initialize(self._onIndexReached)
        log.info("Using eSpeak NG version %s" % _espeak.info())
        lang = getLanguage()
        _espeak.setVoiceByLanguage(lang)
        self._language = lang
        self._variantDict = _espeak.getVariantDict()
        self.variant = "max"
        self.rate = 30
        self.pitch = 40
        self.inflection = 75

    def _get_language(self):
        return self._language

    PROSODY_ATTRS = {
        PitchCommand: "pitch",
        VolumeCommand: "volume",
        RateCommand: "rate",
    }

    IPA_TO_ESPEAK = {
        u"θ": u"T",
        u"s": u"s",
        u"ˈ": u"'",
    }

    def _processText(self, text):
        # We need to make several replacements.
        return text.translate({
            0x1: None,  # used for embedded commands
            0x3C: u"&lt;",  # <: because of XML
            0x3E: u"&gt;",  # >: because of XML
            0x5B: u" [",  # [: [[ indicates phonemes
        })

    def _normalizeLangCommand(self,
                              command: LangChangeCommand) -> LangChangeCommand:
        """
		Checks if a LangChangeCommand language is compatible with eSpeak.
		If not, find a default mapping occurs in L{_defaultLangToLocale}.
		Otherwise, finds a language of a different dialect exists (e.g. ru-ru to ru).
		Returns an eSpeak compatible LangChangeCommand.
		"""
        lowerCaseAvailableLangs = set(lang.lower()
                                      for lang in self.availableLanguages)
        # Use default language if no command.lang is supplied
        langWithLocale = command.lang if command.lang else self._language
        langWithLocale = langWithLocale.lower().replace('_', '-')

        langWithoutLocale: Optional[str] = stripLocaleFromLangCode(
            langWithLocale)

        # Check for any language where the language code matches, regardless of dialect: e.g. ru-ru to ru
        matchingLangs = filter(
            lambda lang: stripLocaleFromLangCode(lang) == langWithoutLocale,
            lowerCaseAvailableLangs)
        anyLocaleMatchingLang = next(matchingLangs, None)

        # Check from a list of known default mapping locales: e.g. en to en-gb
        # Created due to eSpeak issue: https://github.com/espeak-ng/espeak-ng/issues/1200
        knownDefaultLang = self._defaultLangToLocale.get(
            langWithoutLocale, None)
        if knownDefaultLang is not None and knownDefaultLang not in self.availableLanguages:
            # This means eSpeak has changed and we need to update the mapping
            log.error(
                f"Default mapping unknown to eSpeak {knownDefaultLang} not in {self.availableLanguages}"
            )
            knownDefaultLang = None

        if langWithLocale in lowerCaseAvailableLangs:
            eSpeakLang = langWithLocale
        elif knownDefaultLang is not None:
            eSpeakLang = knownDefaultLang
        elif langWithoutLocale in lowerCaseAvailableLangs:
            eSpeakLang = langWithoutLocale
        elif anyLocaleMatchingLang is not None:
            eSpeakLang = anyLocaleMatchingLang
        else:
            log.debugWarning(
                f"Unable to find an eSpeak language for '{langWithLocale}'")
            eSpeakLang = None
        return LangChangeCommand(eSpeakLang)

    def _handleLangChangeCommand(
        self,
        langChangeCommand: LangChangeCommand,
        langChanged: bool,
    ) -> str:
        """Get language xml tags needed to handle a lang change command.
			- if a language change has already been handled for this speech,
			close the open voice tag.
			- if the language is supported by eSpeak, switch to that language.
			- otherwise, switch to the default synthesizer language.
		"""
        langChangeCommand = self._normalizeLangCommand(langChangeCommand)
        voiceChangeXML = ""
        if langChanged:
            # Close existing voice tag
            voiceChangeXML += "</voice>"
        if langChangeCommand.lang is not None:
            # Open new voice tag using eSpeak compatible language
            voiceChangeXML += f'<voice xml:lang="{langChangeCommand.lang}">'
        else:
            # Open new voice tag using default voice
            voiceChangeXML += "<voice>"
        return voiceChangeXML

    # C901 'speak' is too complex
    # Note: when working on speak, look for opportunities to simplify
    # and move logic out into smaller helper functions.
    def speak(self, speechSequence: SpeechSequence):  # noqa: C901
        textList: List[str] = []
        langChanged = False
        prosody: Dict[str, int] = {}
        # We output malformed XML, as we might close an outer tag after opening an inner one; e.g.
        # <voice><prosody></voice></prosody>.
        # However, eSpeak doesn't seem to mind.
        for item in speechSequence:
            if isinstance(item, str):
                textList.append(self._processText(item))
            elif isinstance(item, IndexCommand):
                textList.append("<mark name=\"%d\" />" % item.index)
            elif isinstance(item, CharacterModeCommand):
                textList.append("<say-as interpret-as=\"characters\">" if item.
                                state else "</say-as>")
            elif isinstance(item, LangChangeCommand):
                langChangeXML = self._handleLangChangeCommand(
                    item, langChanged)
                textList.append(langChangeXML)
                langChanged = True
            elif isinstance(item, BreakCommand):
                # Break commands are ignored at the start of speech unless strength is specified.
                # Refer to eSpeak issue: https://github.com/espeak-ng/espeak-ng/issues/1232
                textList.append(f'<break time="{item.time}ms" strength="1" />')
            elif type(item) in self.PROSODY_ATTRS:
                if prosody:
                    # Close previous prosody tag.
                    textList.append("</prosody>")
                attr = self.PROSODY_ATTRS[type(item)]
                if item.multiplier == 1:
                    # Returning to normal.
                    try:
                        del prosody[attr]
                    except KeyError:
                        pass
                else:
                    prosody[attr] = int(item.multiplier * 100)
                if not prosody:
                    continue
                textList.append("<prosody")
                for attr, val in prosody.items():
                    textList.append(' %s="%d%%"' % (attr, val))
                textList.append(">")
            elif isinstance(item, PhonemeCommand):
                # We can't use str.translate because we want to reject unknown characters.
                try:
                    phonemes = "".join(
                        [self.IPA_TO_ESPEAK[char] for char in item.ipa])
                    # There needs to be a space after the phoneme command.
                    # Otherwise, eSpeak will announce a subsequent SSML tag instead of processing it.
                    textList.append(u"[[%s]] " % phonemes)
                except KeyError:
                    log.debugWarning("Unknown character in IPA string: %s" %
                                     item.ipa)
                    if item.text:
                        textList.append(self._processText(item.text))
            else:
                log.error("Unknown speech: %s" % item)
        # Close any open tags.
        if langChanged:
            textList.append("</voice>")
        if prosody:
            textList.append("</prosody>")
        text = u"".join(textList)
        _espeak.speak(text)

    def cancel(self):
        _espeak.stop()

    def pause(self, switch):
        _espeak.pause(switch)

    _rateBoost = False
    RATE_BOOST_MULTIPLIER = 3

    def _get_rateBoost(self):
        return self._rateBoost

    def _set_rateBoost(self, enable):
        if enable == self._rateBoost:
            return
        rate = self.rate
        self._rateBoost = enable
        self.rate = rate

    def _get_rate(self):
        val = _espeak.getParameter(_espeak.espeakRATE, 1)
        if self._rateBoost:
            val = int(val / self.RATE_BOOST_MULTIPLIER)
        return self._paramToPercent(val, _espeak.minRate, _espeak.maxRate)

    def _set_rate(self, rate):
        val = self._percentToParam(rate, _espeak.minRate, _espeak.maxRate)
        if self._rateBoost:
            val = int(val * self.RATE_BOOST_MULTIPLIER)
        _espeak.setParameter(_espeak.espeakRATE, val, 0)

    def _get_pitch(self):
        val = _espeak.getParameter(_espeak.espeakPITCH, 1)
        return self._paramToPercent(val, _espeak.minPitch, _espeak.maxPitch)

    def _set_pitch(self, pitch):
        val = self._percentToParam(pitch, _espeak.minPitch, _espeak.maxPitch)
        _espeak.setParameter(_espeak.espeakPITCH, val, 0)

    def _get_inflection(self):
        val = _espeak.getParameter(_espeak.espeakRANGE, 1)
        return self._paramToPercent(val, _espeak.minPitch, _espeak.maxPitch)

    def _set_inflection(self, val):
        val = self._percentToParam(val, _espeak.minPitch, _espeak.maxPitch)
        _espeak.setParameter(_espeak.espeakRANGE, val, 0)

    def _get_volume(self):
        return _espeak.getParameter(_espeak.espeakVOLUME, 1)

    def _set_volume(self, volume):
        _espeak.setParameter(_espeak.espeakVOLUME, volume, 0)

    def _getAvailableVoices(self):
        voices = OrderedDict()
        for v in _espeak.getVoiceList():
            l = _espeak.decodeEspeakString(v.languages[1:])
            # #7167: Some languages names contain unicode characters EG: Norwegian Bokmål
            name = _espeak.decodeEspeakString(v.name)
            # #5783: For backwards compatibility, voice identifies should always be lowercase
            identifier = os.path.basename(
                _espeak.decodeEspeakString(v.identifier)).lower()
            voices[identifier] = VoiceInfo(identifier, name, l)
        return voices

    def _get_voice(self):
        curVoice = getattr(self, '_voice', None)
        if curVoice: return curVoice
        curVoice = _espeak.getCurrentVoice()
        if not curVoice:
            return ""
        # #5783: For backwards compatibility, voice identifies should always be lowercase
        return _espeak.decodeEspeakString(
            curVoice.identifier).split('+')[0].lower()

    def _set_voice(self, identifier):
        if not identifier:
            return
        # #5783: For backwards compatibility, voice identifies should always be lowercase
        identifier = identifier.lower()
        if "\\" in identifier:
            identifier = os.path.basename(identifier)
        self._voice = identifier
        try:
            _espeak.setVoiceAndVariant(voice=identifier, variant=self._variant)
        except:
            self._voice = None
            raise
        self._language = super(SynthDriver, self).language

    def _onIndexReached(self, index):
        if index is not None:
            synthIndexReached.notify(synth=self, index=index)
        else:
            synthDoneSpeaking.notify(synth=self)

    def terminate(self):
        _espeak.terminate()

    def _get_variant(self):
        return self._variant

    def _set_variant(self, val):
        self._variant = val if val in self._variantDict else "max"
        _espeak.setVoiceAndVariant(variant=self._variant)

    def _getAvailableVariants(self):
        return OrderedDict((ID, VoiceInfo(ID, name))
                           for ID, name in self._variantDict.items())
コード例 #15
0
class SynthDriver(SynthDriver):

    exposeExtraParams = True

    def __init__(self):
        if self.exposeExtraParams:
            self._extraParamNames = [x[0] for x in speechPlayer.Frame._fields_]
            self.supportedSettings = SynthDriver.supportedSettings + tuple(
                NumericDriverSetting(
                    "speechPlayer_%s" % x, "frame.%s" % x, normalStep=1)
                for x in self._extraParamNames)
            for x in self._extraParamNames:
                setattr(self, "speechPlayer_%s" % x, 50)
        self.player = speechPlayer.SpeechPlayer(16000)
        _espeak.initialize()
        _espeak.setVoiceByLanguage('en')
        self.pitch = 50
        self.rate = 50
        self.volume = 90
        self.inflection = 60
        self.audioThread = AudioThread(self, self.player, 16000)

    @classmethod
    def check(cls):
        return True

    name = "nvSpeechPlayer"
    description = "nvSpeechPlayer"

    supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.RateSetting(),
                         SynthDriver.PitchSetting(),
                         SynthDriver.VolumeSetting(),
                         SynthDriver.InflectionSetting())

    supportedCommands = {
        speech.IndexCommand,
        speech.PitchCommand,
    }

    supportedNotifications = {synthIndexReached, synthDoneSpeaking}

    _curPitch = 50
    _curVoice = 'Adam'
    _curInflection = 0.5
    _curVolume = 1.0
    _curRate = 1.0

    def speak(self, speakList):
        userIndex = None
        pitchOffset = 0
        # Merge adjacent strings
        index = 0
        while index < len(speakList):
            item = speakList[index]
            if index > 0:
                lastItem = speakList[index - 1]
                if isinstance(item, str) and isinstance(lastItem, str):
                    speakList[index - 1] = " ".join([lastItem, item])
                    del speakList[index]
                    continue
            index += 1
        endPause = 20
        for item in speakList:
            if isinstance(item, speech.PitchCommand):
                pitchOffset = item.offset
            elif isinstance(item, speech.IndexCommand):
                userIndex = item.index
            elif isinstance(item, str):
                textList = re_textPause.split(item)
                lastIndex = len(textList) - 1
                for index, chunk in enumerate(textList):
                    if not chunk: continue
                    chunk = chunk.strip()
                    if not chunk: continue
                    clauseType = chunk[-1]
                    if clauseType in ('.', '!'):
                        endPause = 150
                    elif clauseType == '?':
                        endPause = 150
                    elif clauseType == ',':
                        endPause = 120
                    else:
                        endPause = 100
                        clauseType = None
                    endPause /= self._curRate
                    textBuf = ctypes.create_unicode_buffer(chunk)
                    textPtr = ctypes.c_void_p(ctypes.addressof(textBuf))
                    chunks = []
                    while textPtr:
                        phonemeBuf = _espeak.espeakDLL.espeak_TextToPhonemes(
                            ctypes.byref(textPtr), _espeak.espeakCHARS_WCHAR,
                            0x36100 + 0x82)
                        if not phonemeBuf: continue
                        chunks.append(ctypes.string_at(phonemeBuf))
                    chunk = b"".join(chunks).decode('utf8')
                    chunk = chunk.replace('ə͡l', 'ʊ͡l')
                    chunk = chunk.replace('a͡ɪ', 'ɑ͡ɪ')
                    chunk = chunk.replace('e͡ɪ', 'e͡i')
                    chunk = chunk.replace('ə͡ʊ', 'o͡u')
                    chunk = chunk.strip()
                    if not chunk: continue
                    pitch = self._curPitch + pitchOffset
                    basePitch = 25 + (21.25 * (pitch / 12.5))
                    for args in ipa.generateFramesAndTiming(
                            chunk,
                            speed=self._curRate,
                            basePitch=basePitch,
                            inflection=self._curInflection,
                            clauseType=clauseType):
                        frame = args[0]
                        if frame:
                            applyVoiceToFrame(frame, self._curVoice)
                            if self.exposeExtraParams:
                                for x in self._extraParamNames:
                                    ratio = getattr(
                                        self, "speechPlayer_%s" % x) / 50.0
                                    setattr(frame, x,
                                            getattr(frame, x) * ratio)
                            frame.preFormantGain *= self._curVolume
                        self.player.queueFrame(*args, userIndex=userIndex)
                        userIndex = None
        self.player.queueFrame(None,
                               endPause,
                               max(10.0, 10.0 / self._curRate),
                               userIndex=userIndex)
        self.audioThread.isSpeaking = True
        self.audioThread.synthEvent.set()

    def cancel(self):
        self.player.queueFrame(None, 20, 5, purgeQueue=True)
        self.audioThread.isSpeaking = False
        self.audioThread.synthEvent.set()
        self.audioThread.wavePlayer.stop()

    def pause(self, switch):
        self.audioThread.wavePlayer.pause(switch)

    def _get_rate(self):
        return int(math.log(self._curRate / 0.25, 2) * 25.0)

    def _set_rate(self, val):
        self._curRate = 0.25 * (2**(val / 25.0))

    def _get_pitch(self):
        return self._curPitch

    def _set_pitch(self, val):
        self._curPitch = val

    def _get_volume(self):
        return int(self._curVolume * 75)

    def _set_volume(self, val):
        self._curVolume = val / 75.0

    def _get_inflection(self):
        return int(self._curInflection / 0.01)

    def _set_inflection(self, val):
        self._curInflection = val * 0.01

    def _get_voice(self):
        return self._curVoice

    def _set_voice(self, voice):
        if voice not in self.availableVoices:
            voice = 'Adam'
        self._curVoice = voice
        if self.exposeExtraParams:
            for paramName in self._extraParamNames:
                setattr(self, "speechPlayer_%s" % paramName, 50)

    def _getAvailableVoices(self):
        d = OrderedDict()
        for name in sorted(voices):
            d[name] = VoiceInfo(name, name)
        return d

    def terminate(self):
        self.audioThread.terminate()
        del self.player
        _espeak.terminate()
コード例 #16
0
ファイル: oneCore.py プロジェクト: supertanglang/nvda
class SynthDriver(SynthDriver):
	name = "oneCore"
	# Translators: Description for a speech synthesizer.
	description = _("Windows OneCore voices")
	supportedSettings = (
		SynthDriver.VoiceSetting(),
		SynthDriver.RateSetting(),
		SynthDriver.PitchSetting(),
		SynthDriver.VolumeSetting(),
	)
	# These are all controlled via SSML, so we only need attributes, not properties.
	rate = None
	pitch = None
	volume = None

	@classmethod
	def check(cls):
		if not hasattr(sys, "frozen"):
			# #3793: Source copies don't report the correct version on Windows 10 because Python isn't manifested for higher versions.
			# We want this driver to work for source copies on Windows 10, so just return True here.
			# If this isn't in fact Windows 10, it will fail when constructed, which is okay.
			return True
		# For binary copies, only present this as an available synth if this is Windows 10.
		return winVersion.winVersion.major >= 10

	def __init__(self):
		super(SynthDriver, self).__init__()
		self._dll = NVDAHelper.getHelperLocalWin10Dll()
		self._dll.ocSpeech_getCurrentVoiceLanguage.restype = ctypes.c_wchar_p
		self._handle = self._dll.ocSpeech_initialize()
		self._callbackInst = ocSpeech_Callback(self._callback)
		self._dll.ocSpeech_setCallback(self._handle, self._callbackInst)
		self._dll.ocSpeech_getVoices.restype = NVDAHelper.bstrReturn
		self._dll.ocSpeech_getCurrentVoiceId.restype = ctypes.c_wchar_p
		self._player= None
		# Initialize state.
		self._queuedSpeech = []
		self._wasCancelled = False
		self._isProcessing = False
		# Initialize the voice to a sane default
		self.voice=self._getDefaultVoice()
		# Set initial values for parameters that can't be queried.
		# This initialises our cache for the value.
		self.rate = 50
		self.pitch = 50
		self.volume = 100

	def _maybeInitPlayer(self, wav):
		"""Initialize audio playback based on the wave header provided by the synthesizer.
		If the sampling rate has not changed, the existing player is used.
		Otherwise, a new one is created with the appropriate parameters.
		"""
		samplesPerSec = wav.getframerate()
		if self._player and self._player.samplesPerSec == samplesPerSec:
			return
		if self._player:
			# Finalise any pending audio.
			self._player.idle()
		bytesPerSample = wav.getsampwidth()
		self._bytesPerSec = samplesPerSec * bytesPerSample
		self._player = nvwave.WavePlayer(channels=wav.getnchannels(),
			samplesPerSec=samplesPerSec, bitsPerSample=bytesPerSample * 8,
			outputDevice=config.conf["speech"]["outputDevice"])

	def terminate(self):
		super(SynthDriver, self).terminate()
		self._dll.ocSpeech_terminate(self._handle)
		# Drop the ctypes function instance for the callback,
		# as it is holding a reference to an instance method, which causes a reference cycle.
		self._callbackInst = None

	def cancel(self):
		# Set a flag to tell the callback not to push more audio.
		self._wasCancelled = True
		log.debug("Cancelling")
		# There might be more text pending. Throw it away.
		self._queuedSpeech = []
		if self._player:
			self._player.stop()

	def speak(self, speechSequence):
		conv = _OcSsmlConverter(self.language, self.rate, self.pitch, self.volume)
		text = conv.convertToXml(speechSequence)
		# #7495: Calling WaveOutOpen blocks for ~100 ms if called from the callback
		# when the SSML includes marks.
		# We're not quite sure why.
		# To work around this, open the device before queuing.
		if self._player:
			self._player.open()
		self._queueSpeech(text)

	def _queueSpeech(self, item):
		self._queuedSpeech.append(item)
		# We only process the queue here if it isn't already being processed.
		if not self._isProcessing:
			self._processQueue()

	def _processQueue(self):
		if not self._queuedSpeech:
			# There are no more queued utterances at this point, so call idle.
			# This blocks while waiting for the final chunk to play,
			# so by the time this is done, there might be something queued.
			log.debug("Calling idle on audio player")
			self._player.idle()
		if self._queuedSpeech:
			item = self._queuedSpeech.pop(0)
			self._wasCancelled = False
			log.debug("Begin processing speech")
			self._isProcessing = True
			# ocSpeech_speak is async.
			# It will call _callback in a background thread once done,
			# which will eventually process the queue again.
			self._dll.ocSpeech_speak(self._handle, item)
			return
		log.debug("Queue empty, done processing")
		self._isProcessing = False

	def _callback(self, bytes, len, markers):
		if len == 0:
			# The C++ code will log an error with details.
			log.debugWarning("ocSpeech_speak failed!")
			self._processQueue()
			return
		# This gets called in a background thread.
		stream = cStringIO.StringIO(ctypes.string_at(bytes, len))
		wav = wave.open(stream, "r")
		self._maybeInitPlayer(wav)
		data = wav.readframes(wav.getnframes())
		if markers:
			markers = markers.split('|')
		else:
			markers = []
		prevMarker = None
		prevPos = 0

		# Push audio up to each marker so we can sync the audio with the markers.
		for marker in markers:
			if self._wasCancelled:
				break
			name, pos = marker.split(':')
			pos = int(pos)
			# pos is a time offset in 100-nanosecond units.
			# Convert this to a byte offset.
			# Order the equation so we don't have to do floating point.
			pos = pos * self._bytesPerSec / HUNDRED_NS_PER_SEC
			# Push audio up to this marker.
			self._player.feed(data[prevPos:pos])
			# _player.feed blocks until the previous chunk of audio is complete, not the chunk we just pushed.
			# Therefore, indicate that we've reached the previous marker.
			if prevMarker:
				self.lastIndex = prevMarker
			prevMarker = int(name)
			prevPos = pos
		if self._wasCancelled:
			log.debug("Cancelled, stopped pushing audio")
		else:
			self._player.feed(data[prevPos:])
			if prevMarker:
				self.lastIndex = prevMarker
			log.debug("Done pushing audio")
		self._processQueue()

	def _getVoiceInfoFromOnecoreVoiceString(self, voiceStr):
		"""
		Produces an NVDA VoiceInfo object representing the given voice string from Onecore speech.
		"""
		# The voice string is made up of the ID, the language, and the display name.
		ID,language,name=voiceStr.split(':')
		language=language.replace('-','_')
		return VoiceInfo(ID,name,language=language)

	def _getAvailableVoices(self):
		voices = OrderedDict()
		# Fetch the full list of voices that Onecore speech knows about.
		# Note that it may give back voices that are uninstalled or broken. 
		voicesStr = self._dll.ocSpeech_getVoices(self._handle).split('|')
		for index,voiceStr in enumerate(voicesStr):
			voiceInfo=self._getVoiceInfoFromOnecoreVoiceString(voiceStr)
			# Filter out any invalid voices.
			if not self._isVoiceValid(voiceInfo.ID):
				continue
			voiceInfo.onecoreIndex=index
			voices[voiceInfo.ID] =  voiceInfo
		return voices

	def _isVoiceValid(self,ID):
		"""
		Checks that the given voice actually exists and is valid.
		It checks the Registry, and also ensures that its data files actually exist on this machine.
		@param ID: the ID of the requested voice.
		@type ID: string
		@returns: True if the voice is valid, false otherwise.
		@rtype: boolean
		"""
		IDParts = ID.split('\\')
		rootKey = getattr(winreg, IDParts[0])
		subkey = "\\".join(IDParts[1:])
		try:
			hkey = winreg.OpenKey(rootKey, subkey)
		except WindowsError as e:
			log.debugWarning("Could not open registry key %s, %r" % (ID, e))
			return False
		try:
			langDataPath = winreg.QueryValueEx(hkey, 'langDataPath')
		except WindowsError as e:
			log.debugWarning("Could not open registry value 'langDataPath', %r" % e)
			return False
		if not langDataPath or not isinstance(langDataPath[0], basestring):
			log.debugWarning("Invalid langDataPath value")
			return False
		if not os.path.isfile(os.path.expandvars(langDataPath[0])):
			log.debugWarning("Missing language data file: %s" % langDataPath[0])
			return False
		try:
			voicePath = winreg.QueryValueEx(hkey, 'voicePath')
		except WindowsError as e:
			log.debugWarning("Could not open registry value 'langDataPath', %r" % e)
			return False
		if not voicePath or not isinstance(voicePath[0],basestring):
			log.debugWarning("Invalid voicePath value")
			return False
		if not os.path.isfile(os.path.expandvars(voicePath[0] + '.apm')):
			log.debugWarning("Missing voice file: %s" % voicePath[0] + ".apm")
			return False
		return True

	def _get_voice(self):
		return self._dll.ocSpeech_getCurrentVoiceId(self._handle)

	def _set_voice(self, ID):
		voices = self.availableVoices
		# Try setting the requested voice
		for voice in voices.itervalues():
			if voice.ID == ID:
				self._dll.ocSpeech_setVoice(self._handle, voice.onecoreIndex)
				return
		raise LookupError("No such voice: %s"%ID)

	def _getDefaultVoice(self):
		"""
		Finds the best available voice that can be used as a default.
		It first tries finding a voice with the same language and country as the user's configured Windows language (E.g. en_AU), 
		else one that matches just the language (E.g. en), 
		else simply the first available.
		@returns: the ID of the voice, suitable for passing to self.voice for setting.
		@rtype: string
		"""
		voices = self.availableVoices
		# Try matching to NVDA language
		fullLanguage=languageHandler.getWindowsLanguage()
		for voice in voices.itervalues():
			if voice.language==fullLanguage:
				return voice.ID
		baseLanguage=fullLanguage.split('_')[0]
		if baseLanguage!=fullLanguage:
			for voice in voices.itervalues():
				if voice.language.startswith(baseLanguage):
					return voice.ID
		# Just use the first available
		for voice in voices.itervalues():
			return voice.ID
		raise RuntimeError("No voices available")

	def _get_language(self):
		return self._dll.ocSpeech_getCurrentVoiceLanguage(self._handle)

	def pause(self, switch):
		if self._player:
			self._player.pause(switch)
コード例 #17
0
        self.voices = self.voices + (self.tmpVoice, )
        self.tmpVoice = ('nk - telugu')
        self.voices = self.voices + (self.tmpVoice, )

        self.index = 0
        self.indexList = [0]
        self.thread = SynthDriver.myThread(1, "Thread", self.indexList)
        self.thread.timeToDie = False
        self.thread.start()
        self.sendText = SynthDriver.sendText()
        self.sendCommand = SynthDriver.sendCommand()
        self.myRate = 50

    name = "openmary"
    description = "Emily - Open Mary"
    supportedSettings = (SynthDriver.RateSetting(), SynthDriver.VoiceSetting())
    myVoice = 'hercules - greek'
    voices = ('hercules - greek')

    @classmethod
    def check(cls):
        return True

    def speak(self, speechSequence):
        text = ""
        for item in speechSequence:
            if isinstance(item, basestring):
                text = text + item
            elif isinstance(item, speech.IndexCommand):
                self.index = item.index
コード例 #18
0
class SynthDriver(SynthDriver):
	supportedSettings=(SynthDriver.VoiceSetting(),SynthDriver.RateSetting(),SynthDriver.PitchSetting(),SynthDriver.VolumeSetting())
	supportedCommands = {
		IndexCommand,
		CharacterModeCommand,
		LangChangeCommand,
		BreakCommand,
		PitchCommand,
		RateCommand,
		VolumeCommand,
		PhonemeCommand,
	}
	supportedNotifications = {synthIndexReached, synthDoneSpeaking}

	COM_CLASS = "SAPI.SPVoice"

	name="sapi5"
	description="Microsoft Speech API version 5"

	@classmethod
	def check(cls):
		try:
			r=winreg.OpenKey(winreg.HKEY_CLASSES_ROOT,cls.COM_CLASS)
			r.Close()
			return True
		except:
			return False

	ttsAudioStream=None #: Holds the ISPAudio interface for the current voice, to aid in stopping and pausing audio
	_audioDucker: Optional[audioDucking.AudioDucker] = None

	def __init__(self,_defaultVoiceToken=None):
		"""
		@param _defaultVoiceToken: an optional sapi voice token which should be used as the default voice (only useful for subclasses)
		@type _defaultVoiceToken: ISpeechObjectToken
		"""
		if audioDucking.isAudioDuckingSupported():
			self._audioDucker = audioDucking.AudioDucker()
		self._pitch=50
		self._initTts(_defaultVoiceToken)

	def terminate(self):
		self._eventsConnection = None
		self.tts = None

	def _getAvailableVoices(self):
		voices=OrderedDict()
		v=self._getVoiceTokens()
		# #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators.
		# Therefore, fetch the items by index, as that method explicitly returns the correct interface.
		for i in range(len(v)):
			try:
				ID=v[i].Id
				name=v[i].GetDescription()
				try:
					language=locale.windows_locale[int(v[i].getattribute('language').split(';')[0],16)]
				except KeyError:
					language=None
			except COMError:
				log.warning("Could not get the voice info. Skipping...")
			voices[ID]=VoiceInfo(ID,name,language)
		return voices

	def _getVoiceTokens(self):
		"""Provides a collection of sapi5 voice tokens. Can be overridden by subclasses if tokens should be looked for in some other registry location."""
		return self.tts.getVoices()

	def _get_rate(self):
		return (self.tts.rate*5)+50

	def _get_pitch(self):
		return self._pitch

	def _get_volume(self):
		return self.tts.volume

	def _get_voice(self):
		return self.tts.voice.Id
 
	def _get_lastIndex(self):
		bookmark=self.tts.status.LastBookmark
		if bookmark!="" and bookmark is not None:
			return int(bookmark)
		else:
			return None

	def _percentToRate(self, percent):
		return (percent - 50) // 5

	def _set_rate(self,rate):
		self.tts.Rate = self._percentToRate(rate)

	def _set_pitch(self,value):
		#pitch is really controled with xml around speak commands
		self._pitch=value

	def _set_volume(self,value):
		self.tts.Volume = value

	def _initTts(self, voice=None):
		self.tts=comtypes.client.CreateObject(self.COM_CLASS)
		if voice:
			# #749: It seems that SAPI 5 doesn't reset the audio parameters when the voice is changed,
			# but only when the audio output is changed.
			# Therefore, set the voice before setting the audio output.
			# Otherwise, we will get poor speech quality in some cases.
			self.tts.voice = voice
		outputDeviceID=nvwave.outputDeviceNameToID(config.conf["speech"]["outputDevice"], True)
		if outputDeviceID>=0:
			self.tts.audioOutput=self.tts.getAudioOutputs()[outputDeviceID]
		self._eventsConnection = comtypes.client.GetEvents(self.tts, SapiSink(weakref.ref(self)))
		self.tts.EventInterests = (
			SpeechVoiceEvents.StartInputStream | SpeechVoiceEvents.Bookmark | SpeechVoiceEvents.EndInputStream
		)
		from comInterfaces.SpeechLib import ISpAudio
		try:
			self.ttsAudioStream=self.tts.audioOutputStream.QueryInterface(ISpAudio)
		except COMError:
			log.debugWarning("SAPI5 voice does not support ISPAudio") 
			self.ttsAudioStream=None

	def _set_voice(self,value):
		tokens = self._getVoiceTokens()
		# #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators.
		# Therefore, fetch the items by index, as that method explicitly returns the correct interface.
		for i in range(len(tokens)):
			voice=tokens[i]
			if value==voice.Id:
				break
		else:
			# Voice not found.
			return
		self._initTts(voice=voice)

	def _percentToPitch(self, percent):
		return percent // 2 - 25

	IPA_TO_SAPI = {
		u"θ": u"th",
		u"s": u"s",
	}
	def _convertPhoneme(self, ipa):
		# We only know about US English phonemes.
		# Rather than just ignoring unknown phonemes, SAPI throws an exception.
		# Therefore, don't bother with any other language.
		if self.tts.voice.GetAttribute("language") != "409":
			raise LookupError("No data for this language")
		out = []
		outAfter = None
		for ipaChar in ipa:
			if ipaChar == u"ˈ":
				outAfter = u"1"
				continue
			out.append(self.IPA_TO_SAPI[ipaChar])
			if outAfter:
				out.append(outAfter)
				outAfter = None
		if outAfter:
			out.append(outAfter)
		return u" ".join(out)

	def speak(self, speechSequence):
		textList = []

		# NVDA SpeechCommands are linear, but XML is hierarchical.
		# Therefore, we track values for non-empty tags.
		# When a tag changes, we close all previously opened tags and open new ones.
		tags = {}
		# We have to use something mutable here because it needs to be changed by the inner function.
		tagsChanged = [True]
		openedTags = []
		def outputTags():
			if not tagsChanged[0]:
				return
			for tag in reversed(openedTags):
				textList.append("</%s>" % tag)
			del openedTags[:]
			for tag, attrs in tags.items():
				textList.append("<%s" % tag)
				for attr, val in attrs.items():
					textList.append(' %s="%s"' % (attr, val))
				textList.append(">")
				openedTags.append(tag)
			tagsChanged[0] = False

		pitch = self._pitch
		# Pitch must always be specified in the markup.
		tags["pitch"] = {"absmiddle": self._percentToPitch(pitch)}
		rate = self.rate
		volume = self.volume

		for item in speechSequence:
			if isinstance(item, str):
				outputTags()
				textList.append(item.replace("<", "&lt;"))
			elif isinstance(item, IndexCommand):
				textList.append('<Bookmark Mark="%d" />' % item.index)
			elif isinstance(item, CharacterModeCommand):
				if item.state:
					tags["spell"] = {}
				else:
					try:
						del tags["spell"]
					except KeyError:
						pass
				tagsChanged[0] = True
			elif isinstance(item, BreakCommand):
				textList.append('<silence msec="%d" />' % item.time)
			elif isinstance(item, PitchCommand):
				tags["pitch"] = {"absmiddle": self._percentToPitch(int(pitch * item.multiplier))}
				tagsChanged[0] = True
			elif isinstance(item, VolumeCommand):
				if item.multiplier == 1:
					try:
						del tags["volume"]
					except KeyError:
						pass
				else:
					tags["volume"] = {"level": int(volume * item.multiplier)}
				tagsChanged[0] = True
			elif isinstance(item, RateCommand):
				if item.multiplier == 1:
					try:
						del tags["rate"]
					except KeyError:
						pass
				else:
					tags["rate"] = {"absspeed": self._percentToRate(int(rate * item.multiplier))}
				tagsChanged[0] = True
			elif isinstance(item, PhonemeCommand):
				try:
					textList.append(u'<pron sym="%s">%s</pron>'
						% (self._convertPhoneme(item.ipa), item.text or u""))
				except LookupError:
					log.debugWarning("Couldn't convert character in IPA string: %s" % item.ipa)
					if item.text:
						textList.append(item.text)
			elif isinstance(item, SpeechCommand):
				log.debugWarning("Unsupported speech command: %s" % item)
			else:
				log.error("Unknown speech: %s" % item)
		# Close any tags that are still open.
		tags.clear()
		tagsChanged[0] = True
		outputTags()

		text = "".join(textList)
		flags = SpeechVoiceSpeakFlags.IsXML | SpeechVoiceSpeakFlags.Async
		# Ducking should be complete before the synth starts producing audio.
		# For this to happen, the speech method must block until ducking is complete.
		# Ducking should be disabled when the synth is finished producing audio.
		# Note that there may be calls to speak with a string that results in no audio,
		# it is important that in this case the audio does not get stuck ducked.
		# When there is no audio produced the startStream and endStream handlers are not called.
		# To prevent audio getting stuck ducked, it is unducked at the end of speech.
		# There are some known issues:
		# - When there is no audio produced by the synth, a user may notice volume lowering (ducking) temporarily.
		# - If the call to startStream handler is delayed significantly, users may notice a variation in volume
		# (as ducking is disabled at the end of speak, and re-enabled when the startStream handler is called)
		
		# A note on the synchronicity of components of this approach:
		# SAPISink.StartStream event handler (callback):
		# the synth speech is not blocked by this event callback.
		# SAPISink.EndStream event handler (callback):
		# assumed also to be async but not confirmed. Synchronicity is irrelevant to the current approach.
		# AudioDucker.disable returns before the audio is completely unducked.
		# AudioDucker.enable() ducking will complete before the function returns.
		# It is not possible to "double duck the audio", calling twice yields the same result as calling once.
		# AudioDucker class instances count the number of enables/disables,
		# in order to unduck there must be no remaining enabled audio ducker instances.
		# Due to this a temporary audio ducker is used around the call to speak.
		# SAPISink.StartStream: Ducking here may allow the early speech to start before ducking is completed.
		if audioDucking.isAudioDuckingSupported():
			tempAudioDucker = audioDucking.AudioDucker()
		else:
			tempAudioDucker = None
		if tempAudioDucker:
			if audioDucking._isDebug():
				log.debug("Enabling audio ducking due to speak call")
			tempAudioDucker.enable()
		try:
			self.tts.Speak(text, flags)
		finally:
			if tempAudioDucker:
				if audioDucking._isDebug():
					log.debug("Disabling audio ducking  after speak call")
				tempAudioDucker.disable()

	def cancel(self):
		# SAPI5's default means of stopping speech can sometimes lag at end of speech, especially with Win8 / Win 10 Microsoft Voices.
		# Therefore  instruct the underlying audio interface to stop first, before interupting and purging any remaining speech.
		if self.ttsAudioStream:
			self.ttsAudioStream.setState(SPAudioState.STOP, 0)
		self.tts.Speak(None, SpeechVoiceSpeakFlags.Async | SpeechVoiceSpeakFlags.PurgeBeforeSpeak)
		if self._audioDucker:
			if audioDucking._isDebug():
				log.debug("Disabling audio ducking due to setting output audio state to stop")
			self._audioDucker.disable()

	def pause(self, switch: bool):
		# SAPI5's default means of pausing in most cases is either extremely slow
		# (e.g. takes more than half a second) or does not work at all.
		# Therefore instruct the underlying audio interface to pause instead.
		if self.ttsAudioStream:
			oldState = self.ttsAudioStream.GetStatus().State
			if switch and oldState == SPAudioState.RUN:
				# pausing
				if self._audioDucker:
					if audioDucking._isDebug():
						log.debug("Disabling audio ducking due to setting output audio state to pause")
					self._audioDucker.disable()
				self.ttsAudioStream.setState(SPAudioState.PAUSE, 0)
			elif not switch and oldState == SPAudioState.PAUSE:
				# unpausing
				if self._audioDucker:
					if audioDucking._isDebug():
						log.debug("Enabling audio ducking due to setting output audio state to run")
					self._audioDucker.enable()
				self.ttsAudioStream.setState(SPAudioState.RUN, 0)
コード例 #19
0
class SynthDriver(synthDriverHandler.SynthDriver):
	supportedSettings=(SynthDriver.VoiceSetting(), SynthDriver.VariantSetting(), SynthDriver.RateSetting(),
		BooleanDriverSetting("rateBoost", _("Rate boos&t"), True),
		SynthDriver.PitchSetting(), SynthDriver.InflectionSetting(), SynthDriver.VolumeSetting(),
		NumericDriverSetting("hsz", _("Head size"), False),
		NumericDriverSetting("rgh", _("Roughness"), False),
		NumericDriverSetting("bth", _("Breathiness"), False),
		BooleanDriverSetting("backquoteVoiceTags", _("Enable backquote voice &tags"), False),
		BooleanDriverSetting("ABRDICT", _("Enable &abbreviation dictionary"), False),
		BooleanDriverSetting("phrasePrediction", _("Enable phrase prediction"), False),
		BooleanDriverSetting("shortpause", _("&Shorten pauses"), False),
		BooleanDriverSetting("sendParams", _("Always Send Current Speech Settings (enable to prevent some tags from sticking, disable for viavoice binary compatibility)"), False))
	supportedCommands = {
		IndexCommand,
		CharacterModeCommand,
		LangChangeCommand,
		BreakCommand,
		PitchCommand,
		RateCommand,
		VolumeCommand
	}
	supportedNotifications = {synthIndexReached, synthDoneSpeaking}

	description='IBMTTS'
	name='ibmeci'
	speakingLanguage=""
	
	@classmethod
	def check(cls):
		return _ibmeci.eciCheck()

	def __init__(self):
		_ibmeci.initialize(self._onIndexReached, self._onDoneSpeaking)
		# This information doesn't really need to be displayed, and makes IBMTTS unusable if the addon is not in the same drive as NVDA executable.
		# But display it only on debug mode in case of it can be useful
		log.debug("Using IBMTTS version %s" % _ibmeci.eciVersion())
		lang = languageHandler.getLanguage()
		self.rate=50
		self.speakingLanguage=lang
		self.variant="1"
		self.currentEncoding = "mbcs"

	PROSODY_ATTRS = {
		PitchCommand: ECIVoiceParam.eciPitchBaseline,
		VolumeCommand: ECIVoiceParam.eciVolume,
		RateCommand: ECIVoiceParam.eciSpeed,
	}

	def speak(self,speechSequence):
		last = None
		defaultLanguage=self.language
		outlist = []
		charmode=False
		for item in speechSequence:
			if isinstance(item, string_types):
				s = self.processText(item)
				outlist.append((_ibmeci.speak, (s,)))
				last = s
			elif isinstance(item,IndexCommand):
				outlist.append((_ibmeci.index, (item.index,)))
			elif isinstance(item,LangChangeCommand):
				l=None
				if item.lang in langsAnnotations: l = langsAnnotations[item.lang]
				elif item.lang and item.lang[0:2] in langsAnnotations: l = langsAnnotations[item.lang[0:2]]
				if l:
					if item.lang != self.speakingLanguage and item.lang != self.speakingLanguage[0:2]:
						outlist.append((_ibmeci.speak, (l,)))
						self.speakingLanguage=item.lang
						self.updateEncoding(l)
				else:
					outlist.append((_ibmeci.speak, (langsAnnotations[defaultLanguage],)))
					self.speakingLanguage = defaultLanguage
			elif isinstance(item,CharacterModeCommand):
				outlist.append((_ibmeci.speak, (b"`ts1" if item.state else b"`ts0",)))
				if item.state:
					charmode=True
			elif isinstance(item,BreakCommand):
				# taken from eloquence_threshold (https://github.com/pumper42nickel/eloquence_threshold)
				# Eloquence doesn't respect delay time in milliseconds.
				# Therefore we need to adjust waiting time depending on current speech rate
				# The following table of adjustments has been measured empirically
				# Then we do linear approximation
				coefficients = {
						10:1,
						43:2,
						60:3,
						75:4,
						85:5,
				}
				ck = sorted(coefficients.keys())
				if self.rate <= ck[0]:
					factor = coefficients[ck[0]]
				elif self.rate >= ck[-1]:
					factor = coefficients[ck[-1]]
				elif self.rate in ck:
					factor = coefficients[self.rate]
				else:
					li = [index for index, r in enumerate(ck) if r<self.rate][-1]
					ri = li + 1
					ra = ck[li]
					rb = ck[ri]
					factor = 1.0 * coefficients[ra] + (coefficients[rb] - coefficients[ra]) * (self.rate - ra) / (rb-ra)
				pFactor = factor*item.time
				pFactor = int(pFactor)
				outlist.append((_ibmeci.speak, (b' `p%d '%(pFactor),)))
			elif type(item) in self.PROSODY_ATTRS:
				val = max(0, min(item.newValue, 100))
				if type(item) == RateCommand: val = self.percentToRate(val)
				outlist.append((_ibmeci.setProsodyParam, (self.PROSODY_ATTRS[type(item)], val)))
			else:
				log.error("Unknown speech: %s"%item)
		if last is not None and last[-1] not in punctuation:
			# check if a pitch command is at the end of the list, because p1 need to be send before this.
			# index -2 is because -1 always seem to be an index command.
			if outlist[-2][0] == _ibmeci.setProsodyParam: outlist.insert(-2, (_ibmeci.speak, (b'`p1 ',)))
			else: outlist.append((_ibmeci.speak, (b'`p1 ',)))
		if charmode:
			outlist.append((_ibmeci.speak, (b"`ts0",)))
		outlist.append((_ibmeci.setEndStringMark, ()))
		outlist.append((_ibmeci.synth, ()))
		_ibmeci.eciQueue.put(outlist)
		_ibmeci.process()

	def processText(self,text):
		#this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it.
		text = text.encode(self.currentEncoding, 'replace') # special unicode symbols may encode to backquote. For this reason, backquote processing is after this.
		text = text.rstrip()
		if _ibmeci.params[9] in (65536, 65537, 393216, 655360, 720897): text = resub(english_fixes, text) #Applies to all languages with dual language support.
		if _ibmeci.params[9] in (65536, 65537, 393216, 655360, 720897) and _ibmeci.isIBM: text = resub(english_ibm_fixes, text)
		if _ibmeci.params[9] in (131072,  131073) and not _ibmeci.isIBM: text = resub(spanish_fixes, text)
		if _ibmeci.params[9] in ('esp', 131072) and _ibmeci.isIBM: text = resub(spanish_ibm_fixes, text)
		if _ibmeci.params[9] in (196609, 196608):
			text = text.replace(br'quil', br'qil') #Sometimes this string make everything buggy with IBMTTS in French
		if  _ibmeci.params[9] in ('deu', 262144):
			text = resub(german_fixes, text)
		if  _ibmeci.params[9] in ('ptb', 458752) and _ibmeci.isIBM:
			text = resub(portuguese_ibm_fixes, text)
		if not self._backquoteVoiceTags:
			text=text.replace(b'`', b' ') # no embedded commands
		if self._shortpause:
			text = pause_re.sub(br'\1 `p1\2\3\4', text) # this enforces short, JAWS-like pauses.
		if not _ibmeci.isIBM:
			text = time_re.sub(br'\1:\2 \3', text) # apparently if this isn't done strings like 2:30:15 will only announce 2:30
		embeds=b''
		if self._ABRDICT:
			embeds+=b"`da1 "
		else:
			embeds+=b"`da0 "
		if self._phrasePrediction:
			embeds+=b"`pp1 "
		else:
			embeds+=b"`pp0 "
		if self._sendParams:
			embeds+=b"`vv%d `vs%d " % (_ibmeci.getVParam(ECIVoiceParam.eciVolume), _ibmeci.getVParam(ECIVoiceParam.eciSpeed))
		text = b"%s %s" % (embeds.rstrip(), text) # bring all the printf stuff into one call, in one string. This avoids all the concatonation and printf additions of the previous organization.
		return text
	def pause(self,switch):
		_ibmeci.pause(switch)

	def terminate(self):
		_ibmeci.terminate()

	_backquoteVoiceTags=False
	_ABRDICT=False
	_phrasePrediction=False
	_shortpause=False
	_sendParams=True
	def _get_backquoteVoiceTags(self):
		return self._backquoteVoiceTags

	def _set_backquoteVoiceTags(self, enable):
		if enable == self._backquoteVoiceTags:
			return
		self._backquoteVoiceTags = enable
	def _get_ABRDICT(self):
		return self._ABRDICT
	def _set_ABRDICT(self, enable):
		if enable == self._ABRDICT:
			return
		self._ABRDICT = enable
	def _get_phrasePrediction(self):
		return self._phrasePrediction
	def _set_phrasePrediction(self, enable):
		if enable == self._phrasePrediction:
			return
		self._phrasePrediction = enable
	def _get_shortpause(self):
		return self._shortpause
	def _set_shortpause(self, enable):
		if enable == self._shortpause:
			return
		self._shortpause = enable
	def _get_sendParams(self):
		return self._sendParams
	def _set_sendParams(self, enable):
		if enable == self._sendParams:
			return
		self._sendParams = enable
	_rateBoost = False
	RATE_BOOST_MULTIPLIER = 1.6
	def _get_rateBoost(self):
		return self._rateBoost

	def _set_rateBoost(self, enable):
		if enable != self._rateBoost:
			rate = self.rate
			self._rateBoost = enable
			self.rate = rate

	def _get_rate(self):
		val = _ibmeci.getVParam(ECIVoiceParam.eciSpeed)
		if self._rateBoost: val=int(round(val/self.RATE_BOOST_MULTIPLIER))
		return self._paramToPercent(val, minRate, maxRate)

	def percentToRate(self, val):
		val = self._percentToParam(val, minRate, maxRate)
		if self._rateBoost: val = int(round(val *self.RATE_BOOST_MULTIPLIER))
		return val

	def _set_rate(self,val):
		val = self.percentToRate(val)
		self._rate = val
		_ibmeci.setVParam(ECIVoiceParam.eciSpeed, val)

	def _get_pitch(self):
		return _ibmeci.getVParam(ECIVoiceParam.eciPitchBaseline)

	def _set_pitch(self,vl):
		_ibmeci.setVParam(ECIVoiceParam.eciPitchBaseline,vl)

	def _get_volume(self):
		return _ibmeci.getVParam(ECIVoiceParam.eciVolume)

	def _set_volume(self,vl):
		_ibmeci.setVParam(ECIVoiceParam.eciVolume,int(vl))

	def _set_inflection(self,vl):
		vl = int(vl)
		_ibmeci.setVParam(ECIVoiceParam.eciPitchFluctuation,vl)

	def _get_inflection(self):
		return _ibmeci.getVParam(ECIVoiceParam.eciPitchFluctuation)

	def _set_hsz(self,vl):
		vl = int(vl)
		_ibmeci.setVParam(ECIVoiceParam.eciHeadSize,vl)

	def _get_hsz(self):
		return _ibmeci.getVParam(ECIVoiceParam.eciHeadSize)

	def _set_rgh(self,vl):
		vl = int(vl)
		_ibmeci.setVParam(ECIVoiceParam.eciRoughness,vl)

	def _get_rgh(self):
		return _ibmeci.getVParam(ECIVoiceParam.eciRoughness)

	def _set_bth(self,vl):
		vl = int(vl)
		_ibmeci.setVParam(ECIVoiceParam.eciBreathiness,vl)

	def _get_bth(self):
		return _ibmeci.getVParam(ECIVoiceParam.eciBreathiness)

	def _getAvailableVoices(self):
		o = OrderedDict()
		for name in os.listdir(_ibmeci.ttsPath):
			if name.lower().endswith('.syn'):
				info = _ibmeci.langs[name.lower()[:3]]
				o[str(info[0])] = VoiceInfo(str(info[0]), info[1], info[2])
		return o

	def _get_voice(self):
		return str(_ibmeci.params[_ibmeci.ECIParam.eciLanguageDialect])
	def _set_voice(self,vl):
		_ibmeci.setVoice(int(vl))
		self.updateEncoding(int(vl))

	def updateEncoding(self, lang): # lang must be a number asociated with IBMTTS languages or a string with an annotation language.
		# currently we don't need to consider the decimal part for the conversion.
		if isinstance(lang, bytes): lang = int(float(lang[2:])) * 65536
		#chinese
		if lang == 393216: self.currentEncoding = "gb2312"
		# japan
		elif lang == 524288: self.currentEncoding = "cp932"
		# korean
		elif lang == 655360: self.currentEncoding = "cp949"
		elif lang == 720897: self.currentEncoding = "big5"
		else: self.currentEncoding = "mbcs"

	def _get_lastIndex(self):
		#fix?
		return _ibmeci.lastindex

	def cancel(self):
		_ibmeci.stop()

	def _getAvailableVariants(self):
		global variants
		return OrderedDict((str(id), synthDriverHandler.VoiceInfo(str(id), name)) for id, name in variants.items())

	def _set_variant(self, v):
		global variants
		self._variant = v if int(v) in variants else "1"
		_ibmeci.setVariant(int(v))
		_ibmeci.setVParam(ECIVoiceParam.eciSpeed, self._rate)
		#if 'ibmtts' in config.conf['speech']:
		#config.conf['speech']['ibmtts']['pitch'] = self.pitch

	def _get_variant(self): return self._variant

	def _onIndexReached(self, index): synthIndexReached.notify(synth=self, index=index)

	def _onDoneSpeaking(self): synthDoneSpeaking.notify(synth=self)
コード例 #20
0
class SynthDriver(synthDriverHandler.SynthDriver):
    supportedSettings = (SynthDriver.VoiceSetting(),
                         SynthDriver.VariantSetting(),
                         SynthDriver.RateSetting(), SynthDriver.PitchSetting(),
                         SynthDriver.InflectionSetting(),
                         SynthDriver.VolumeSetting(),
                         driverHandler.NumericDriverSetting(
                             "hsz", "Head Size"),
                         driverHandler.NumericDriverSetting(
                             "rgh", "Roughness"),
                         driverHandler.NumericDriverSetting(
                             "bth", "Breathiness"),
                         driverHandler.BooleanDriverSetting(
                             "backquoteVoiceTags",
                             "Enable backquote voice &tags", True))
    supportedCommands = {
        speech.IndexCommand,
        speech.CharacterModeCommand,
        speech.LangChangeCommand,
        speech.BreakCommand,
        speech.PitchCommand,
        speech.RateCommand,
        speech.VolumeCommand,
        speech.PhonemeCommand,
    }
    supportedNotifications = {synthIndexReached, synthDoneSpeaking}
    PROSODY_ATTRS = {
        speech.PitchCommand: _eloquence.pitch,
        speech.VolumeCommand: _eloquence.vlm,
        speech.RateCommand: _eloquence.rate,
    }

    description = 'ETI-Eloquence'
    name = 'eloquence'

    @classmethod
    def check(cls):
        return _eloquence.eciCheck()

    def __init__(self):
        _eloquence.initialize(self._onIndexReached)
        self.curvoice = "enu"
        self.rate = 50
        self.variant = "1"

    def speak(self, speechSequence):
        last = None
        outlist = []
        for item in speechSequence:
            if isinstance(item, str):
                s = str(item)
                s = self.xspeakText(s)
                outlist.append((_eloquence.speak, (s, )))
                last = s
            elif isinstance(item, speech.IndexCommand):
                outlist.append((_eloquence.index, (item.index, )))
            elif isinstance(item, speech.BreakCommand):
                # Eloquence doesn't respect delay time in milliseconds.
                # Therefor we need to adjust waiting time depending on curernt speech rate
                # The following table of adjustments has been measured empirically
                # Then we do linear approximation
                coefficients = {
                    10: 1,
                    43: 2,
                    60: 3,
                    75: 4,
                    85: 5,
                }
                ck = sorted(coefficients.keys())
                if self.rate <= ck[0]:
                    factor = coefficients[ck[0]]
                elif self.rate >= ck[-1]:
                    factor = coefficients[ck[-1]]
                elif self.rate in ck:
                    factor = coefficients[self.rate]
                else:
                    li = [
                        index for index, r in enumerate(ck) if r < self.rate
                    ][-1]
                    ri = li + 1
                    ra = ck[li]
                    rb = ck[ri]
                    factor = 1.0 * coefficients[ra] + (
                        coefficients[rb] - coefficients[ra]) * (self.rate -
                                                                ra) / (rb - ra)
                pFactor = factor * item.time
                pFactor = int(pFactor)
                outlist.append((_eloquence.speak, (f'`p{pFactor}.', )))
            elif type(item) in self.PROSODY_ATTRS:
                pr = self.PROSODY_ATTRS[type(item)]
                if item.multiplier == 1:
                    # Revert back to defaults
                    outlist.append((_eloquence.cmdProsody, (
                        pr,
                        None,
                    )))
                else:
                    outlist.append((_eloquence.cmdProsody, (
                        pr,
                        item.multiplier,
                    )))
        if last is not None and not last.rstrip()[-1] in punctuation:
            outlist.append((_eloquence.speak, ('`p1.', )))
        outlist.append((_eloquence.index, (0xffff, )))
        outlist.append((_eloquence.synth, ()))
        _eloquence.synth_queue.put(outlist)
        _eloquence.process()

    def xspeakText(self, text, should_pause=False):
        if _eloquence.params[9] == 65536 or _eloquence.params[9] == 65537:
            text = resub(english_fixes, text)
        if _eloquence.params[9] == 131072 or _eloquence.params[9] == 131073:
            text = resub(spanish_fixes, text)
        if _eloquence.params[9] in (196609, 196608):
            text = resub(french_fixes, text)
        #this converts to ansi for anticrash. If this breaks with foreign langs, we can remove it.
        #text = text.encode('mbcs')
        text = normalizeText(text)
        text = resub(anticrash_res, text)
        if not self._backquoteVoiceTags:
            text = text.replace('`', ' ')
        text = "`pp0 `vv%d %s" % (self.getVParam(_eloquence.vlm), text
                                  )  #no embedded commands
        text = pause_re.sub(r'\1 `p1\2\3', text)
        text = time_re.sub(r'\1:\2 \3', text)
        #if two strings are sent separately, pause between them. This might fix some of the audio issues we're having.
        if should_pause:
            text = text + ' `p1.'
        return text
        #  _eloquence.speak(text, index)

        # def cancel(self):
        #  self.dll.eciStop(self.handle)

    def pause(self, switch):
        _eloquence.pause(switch)
        #  self.dll.eciPause(self.handle,switch)

    def terminate(self):
        _eloquence.terminate()

    _backquoteVoiceTags = False

    def _get_backquoteVoiceTags(self):
        return self._backquoteVoiceTags

    def _set_backquoteVoiceTags(self, enable):
        if enable == self._backquoteVoiceTags:
            return
        self._backquoteVoiceTags = enable

    def _get_rate(self):
        return self._paramToPercent(self.getVParam(_eloquence.rate), minRate,
                                    maxRate)

    def _set_rate(self, vl):
        self._rate = self._percentToParam(vl, minRate, maxRate)
        self.setVParam(_eloquence.rate,
                       self._percentToParam(vl, minRate, maxRate))

    def _get_pitch(self):
        return self.getVParam(_eloquence.pitch)

    def _set_pitch(self, vl):
        self.setVParam(_eloquence.pitch, vl)

    def _get_volume(self):
        return self.getVParam(_eloquence.vlm)

    def _set_volume(self, vl):
        self.setVParam(_eloquence.vlm, int(vl))

    def _set_inflection(self, vl):
        vl = int(vl)
        self.setVParam(_eloquence.fluctuation, vl)

    def _get_inflection(self):
        return self.getVParam(_eloquence.fluctuation)

    def _set_hsz(self, vl):
        vl = int(vl)
        self.setVParam(_eloquence.hsz, vl)

    def _get_hsz(self):
        return self.getVParam(_eloquence.hsz)

    def _set_rgh(self, vl):
        vl = int(vl)
        self.setVParam(_eloquence.rgh, vl)

    def _get_rgh(self):
        return self.getVParam(_eloquence.rgh)

    def _set_bth(self, vl):
        vl = int(vl)
        self.setVParam(_eloquence.bth, vl)

    def _get_bth(self):
        return self.getVParam(_eloquence.bth)

    def _getAvailableVoices(self):
        o = OrderedDict()
        for name in os.listdir(_eloquence.eciPath[:-8]):
            if not name.lower().endswith('.syn'): continue
            info = _eloquence.langs[name.lower()[:-4]]
            o[str(info[0])] = synthDriverHandler.VoiceInfo(
                str(info[0]), info[1], None)
        return o

    def _get_voice(self):
        return str(_eloquence.params[9])

    def _set_voice(self, vl):
        _eloquence.set_voice(vl)
        self.curvoice = vl

    def getVParam(self, pr):
        return _eloquence.getVParam(pr)

    def setVParam(self, pr, vl):
        _eloquence.setVParam(pr, vl)

    def _get_lastIndex(self):
        #fix?
        return _eloquence.lastindex

    def cancel(self):
        _eloquence.stop()

    def _getAvailableVariants(self):

        global variants
        return OrderedDict(
            (str(id), synthDriverHandler.VoiceInfo(str(id), name))
            for id, name in variants.items())

    def _set_variant(self, v):
        global variants
        self._variant = v if int(v) in variants else "1"
        _eloquence.setVariant(int(v))
        self.setVParam(_eloquence.rate, self._rate)
        #  if 'eloquence' in config.conf['speech']:
        #   config.conf['speech']['eloquence']['pitch'] = self.pitch

    def _get_variant(self):
        return self._variant

    def _onIndexReached(self, index):
        if index is not None:
            synthIndexReached.notify(synth=self, index=index)
        else:
            synthDoneSpeaking.notify(synth=self)
コード例 #21
0
class SynthDriver(SynthDriver):
    supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.RateSetting(),
                         SynthDriver.PitchSetting(),
                         SynthDriver.VolumeSetting())
    supportedCommands = {
        speech.IndexCommand,
        speech.CharacterModeCommand,
        speech.LangChangeCommand,
        speech.BreakCommand,
        speech.PitchCommand,
        speech.RateCommand,
        speech.VolumeCommand,
        speech.PhonemeCommand,
    }
    supportedNotifications = {synthIndexReached, synthDoneSpeaking}

    COM_CLASS = "SAPI.SPVoice"

    name = "dual_sapi5"
    description = "Dual voice using Speech API version 5"

    @classmethod
    def check(cls):
        try:
            r = winreg.OpenKey(winreg.HKEY_CLASSES_ROOT, cls.COM_CLASS)
            r.Close()
            return True
        except:
            return False

    ttsAudioStream = None  #: Holds the ISPAudio interface for the current voice, to aid in stopping and pausing audio

    def __init__(self, _defaultVoiceToken=None):
        """
		@param _defaultVoiceToken: an optional sapi voice token which should be used as the default voice (only useful for subclasses)
		@type _defaultVoiceToken: ISpeechObjectToken
		"""
        ensureWaveOutHooks()
        self._pitch = 50
        self._initTts(_defaultVoiceToken)
        confspec = {
            "sapi5SecondVoice": "string(default='')",
            "sapi5SecondRate": "integer(default=50)",
            "sapi5SecondPitch": "integer(default=50)",
            "sapi5SecondVolume": "integer(default=100)",
            "sapi5SecondIsLatin": "boolean(default=False)",
            "sapi5NonLatinPriority": "boolean(default=False)",
            "sapi5ConsiderContext": "boolean(default=False)",
        }
        config.conf.spec["dual_voice"] = confspec

        _realtime.sapi5SecondVoice = config.conf["dual_voice"][
            "sapi5SecondVoice"]
        _realtime.sapi5SecondRate = config.conf["dual_voice"][
            "sapi5SecondRate"]
        _realtime.sapi5SecondPitch = config.conf["dual_voice"][
            "sapi5SecondPitch"]
        _realtime.sapi5SecondVolume = config.conf["dual_voice"][
            "sapi5SecondVolume"]
        _realtime.sapi5SecondIsLatin = config.conf["dual_voice"][
            "sapi5SecondIsLatin"]
        _realtime.sapi5NonLatinPriority = config.conf["dual_voice"][
            "sapi5NonLatinPriority"]
        _realtime.sapi5ConsiderContext = config.conf["dual_voice"][
            "sapi5ConsiderContext"]
        _realtime.primaryVoiceID = _defaultVoiceToken
        _realtime.problemisticPrimaryVoiceID = ''

    def terminate(self):
        self._eventsConnection = None
        self.tts = None

    def _getAvailableVoices(self):
        voices = OrderedDict()
        v = self._getVoiceTokens()
        # #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators.
        # Therefore, fetch the items by index, as that method explicitly returns the correct interface.
        for i in range(len(v)):
            try:
                ID = v[i].Id
                name = v[i].GetDescription()
                try:
                    language = locale.windows_locale[int(
                        v[i].getattribute('language').split(';')[0], 16)]
                except KeyError:
                    language = None
                # Extract the name Attribute of each voice which could be used in SAPI5 XML for voice selection.
                voiceAttribName = v[i].getattribute('name')
            except COMError:
                log.warning("Could not get the voice info. Skipping...")
            voices[ID] = VoiceInfo(ID, name, language)
            if voiceAttribName in _realtime.list_VoiceAttribName:
                log.warning(
                    name +
                    ' do not has the required Name attribute in the registry. Hence it could not be used as the secondary voice.'
                )
            else:
                _realtime.list_VoiceAttribName.append(voiceAttribName)
                _realtime.list_VoiceID.append(ID)
                _realtime.list_VoiceName.append(name)
                _realtime.list_VoiceLang.append(language)
        return voices

    def _getVoiceTokens(self):
        """Provides a collection of sapi5 voice tokens. Can be overridden by subclasses if tokens should be looked for in some other registry location."""
        return self.tts.getVoices()

    def _get_rate(self):
        return (self.tts.rate * 5) + 50

    def _get_pitch(self):
        return self._pitch

    def _get_volume(self):
        #		return self.tts.volume
        return _realtime.sapi5FirstVolume

    def _get_voice(self):
        return self.tts.voice.Id

    def _get_lastIndex(self):
        bookmark = self.tts.status.LastBookmark
        if bookmark != "" and bookmark is not None:
            return int(bookmark)
        else:
            return None

    def _percentToRate(self, percent):
        return (percent - 50) // 5

    def _set_rate(self, rate):
        self.tts.Rate = self._percentToRate(rate)

    def _set_pitch(self, value):
        #pitch is really controled with xml around speak commands
        self._pitch = value

    def _set_volume(self, value):
        #		self.tts.Volume = value
        _realtime.sapi5FirstVolume = value

    def _initTts(self, voice=None):
        self.tts = comtypes.client.CreateObject(self.COM_CLASS)
        if voice:
            # #749: It seems that SAPI 5 doesn't reset the audio parameters when the voice is changed,
            # but only when the audio output is changed.
            # Therefore, set the voice before setting the audio output.
            # Otherwise, we will get poor speech quality in some cases.
            self.tts.voice = voice
        outputDeviceID = nvwave.outputDeviceNameToID(
            config.conf["speech"]["outputDevice"], True)
        if outputDeviceID >= 0:
            self.tts.audioOutput = self.tts.getAudioOutputs()[outputDeviceID]
        self._eventsConnection = comtypes.client.GetEvents(
            self.tts, SapiSink(weakref.ref(self)))
        self.tts.EventInterests = constants.SVEBookmark | constants.SVEEndInputStream
        from comInterfaces.SpeechLib import ISpAudio
        try:
            self.ttsAudioStream = self.tts.audioOutputStream.QueryInterface(
                ISpAudio)
        except COMError:
            log.debugWarning("SAPI5 voice does not support ISPAudio")
            self.ttsAudioStream = None

    def _set_voice(self, value):
        tokens = self._getVoiceTokens()
        # #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators.
        # Therefore, fetch the items by index, as that method explicitly returns the correct interface.
        for i in range(len(tokens)):
            voice = tokens[i]
            if value == voice.Id:
                break
        else:
            # Voice not found.
            return
        self._initTts(voice=voice)
        _realtime.primaryVoiceID = voice.Id
        _realtime.problemisticPrimaryVoiceID = ''

    def _percentToPitch(self, percent):
        return percent // 2 - 25

    IPA_TO_SAPI = {
        u"θ": u"th",
        u"s": u"s",
    }

    def _convertPhoneme(self, ipa):
        # We only know about US English phonemes.
        # Rather than just ignoring unknown phonemes, SAPI throws an exception.
        # Therefore, don't bother with any other language.
        if self.tts.voice.GetAttribute("language") != "409":
            raise LookupError("No data for this language")
        out = []
        outAfter = None
        for ipaChar in ipa:
            if ipaChar == u"ˈ":
                outAfter = u"1"
                continue
            out.append(self.IPA_TO_SAPI[ipaChar])
            if outAfter:
                out.append(outAfter)
                outAfter = None
        if outAfter:
            out.append(outAfter)
        return u" ".join(out)

    def _speak(self, speechSequence):
        textList = []

        # NVDA SpeechCommands are linear, but XML is hierarchical.
        # Therefore, we track values for non-empty tags.
        # When a tag changes, we close all previously opened tags and open new ones.
        tags = {}
        # We have to use something mutable here because it needs to be changed by the inner function.
        tagsChanged = [True]
        openedTags = []

        def outputTags():
            if not tagsChanged[0]:
                return
            for tag in reversed(openedTags):
                textList.append("</%s>" % tag)
            del openedTags[:]
            for tag, attrs in tags.items():
                textList.append("<%s" % tag)
                for attr, val in attrs.items():
                    textList.append(' %s="%s"' % (attr, val))
                textList.append(">")
                openedTags.append(tag)
            tagsChanged[0] = False

        pitch = self._pitch
        # Pitch must always be specified in the markup.
        tags["pitch"] = {"absmiddle": self._percentToPitch(pitch)}
        rate = self.rate
        #		volume = self.volume
        volume = _realtime.sapi5FirstVolume

        for item in speechSequence:
            if isinstance(item, str):
                outputTags()
                #item = item.replace("1", "Yek") # Mahmood Taghavi
                item = item.replace("<", "&lt;")
                #item = item + '<voice required="Name=Microsoft Anna"> Mahmood Taghavi </voice>'
                item = _dual_sapi5.nlp(text=item)  # Mahmood Taghavi
                textList.append(item)
                #textList.append(item.replace("<", "&lt;"))
            elif isinstance(item, speech.IndexCommand):
                textList.append('<Bookmark Mark="%d" />' % item.index)
            elif isinstance(item, speech.CharacterModeCommand):
                if item.state:
                    tags["spell"] = {}
                else:
                    try:
                        del tags["spell"]
                    except KeyError:
                        pass
                tagsChanged[0] = True
            elif isinstance(item, speech.BreakCommand):
                textList.append('<silence msec="%d" />' % item.time)
            elif isinstance(item, speech.PitchCommand):
                tags["pitch"] = {
                    "absmiddle":
                    self._percentToPitch(int(pitch * item.multiplier))
                }
                tagsChanged[0] = True
            elif isinstance(item, speech.VolumeCommand):
                if item.multiplier == 1:
                    try:
                        del tags["volume"]
                    except KeyError:
                        pass
                else:
                    tags["volume"] = {"level": int(volume * item.multiplier)}
                tagsChanged[0] = True
            elif isinstance(item, speech.RateCommand):
                if item.multiplier == 1:
                    try:
                        del tags["rate"]
                    except KeyError:
                        pass
                else:
                    tags["rate"] = {
                        "absspeed":
                        self._percentToRate(int(rate * item.multiplier))
                    }
                tagsChanged[0] = True
            elif isinstance(item, speech.PhonemeCommand):
                try:
                    textList.append(
                        u'<pron sym="%s">%s</pron>' %
                        (self._convertPhoneme(item.ipa), item.text or u""))
                except LookupError:
                    log.debugWarning(
                        "Couldn't convert character in IPA string: %s" %
                        item.ipa)
                    if item.text:
                        textList.append(item.text)
            elif isinstance(item, speech.SpeechCommand):
                log.debugWarning("Unsupported speech command: %s" % item)
            else:
                log.error("Unknown speech: %s" % item)
        # Close any tags that are still open.
        tags.clear()
        tagsChanged[0] = True
        outputTags()

        text = "".join(textList)
        flags = constants.SVSFIsXML | constants.SVSFlagsAsync
        self.tts.Speak(text, flags)

    def speak(self, speechSequence):
        try:
            self._speak(speechSequence)
        except:
            if (_realtime.problemisticPrimaryVoiceID
                    == _realtime.primaryVoiceID) and (
                        _realtime.problemisticSapi5SecondVoice
                        == _realtime.sapi5SecondVoice):
                log.error(
                    'Dual Voice add-on: Fatal error! It seems the selected voices and the computer default voice have problems. So at least select another voice as the computer default voice!'
                )
                speech.setSynth('espeak')
            else:
                _realtime.problemisticSapi5SecondVoice = _realtime.sapi5SecondVoice
                _realtime.problemisticPrimaryVoiceID = _realtime.primaryVoiceID
                try:
                    # Possible solution 1: find the primary voice and use it also as the secondary voice.
                    index = _realtime.list_VoiceID.index(
                        _realtime.primaryVoiceID)
                    voiceAttribName = _realtime.list_VoiceAttribName[index]
                    log.warning(
                        'Dual Voice add-on: Error in at least one of the selected SAPI 5 voices has been occured! The primary voice was ('
                        + voiceAttribName + ') and the secondary voice was (' +
                        _realtime.sapi5SecondVoice + ')')
                    log.warning(
                        'Dual Voice add-on: Try possible solution 1! Use the primary voice ('
                        + voiceAttribName +
                        ') in place of the possible problematic secondary voice ('
                        + _realtime.sapi5SecondVoice + ').')
                    _realtime.tempStringVar = _realtime.sapi5SecondVoice
                    _realtime.sapi5SecondVoice = voiceAttribName
                    #config.conf["dual_voice"]["sapi5SecondVoice"] = _realtime.sapi5SecondVoice
                    self._speak(speechSequence)
                except:
                    # Possible solution 2: find the default voice and use it as the primary voice.
                    _realtime.sapi5SecondVoice = _realtime.tempStringVar
                    #config.conf["dual_voice"]["sapi5SecondVoice"] = _realtime.sapi5SecondVoice
                    log.warning(
                        'Dual Voice add-on: The possible solution 1 was failed! Hence the selected secondary voice was restored.'
                    )
                    log.warning(
                        'Dual Voice add-on: Try possible solution 2! Use the computer default voice ('
                        + _realtime.list_VoiceAttribName[0] +
                        ') in place of the possible problematic primary voice ('
                        + voiceAttribName + ').')
                    tokens = self._getVoiceTokens()
                    voice = tokens[0]
                    self._initTts(voice=voice)
                    _realtime.primaryVoiceID = voice.Id
                    self._speak(speechSequence)

    def cancel(self):
        # SAPI5's default means of stopping speech can sometimes lag at end of speech, especially with Win8 / Win 10 Microsoft Voices.
        # Therefore  instruct the underlying audio interface to stop first, before interupting and purging any remaining speech.
        if self.ttsAudioStream:
            self.ttsAudioStream.setState(SPAS_STOP, 0)
        self.tts.Speak(None, 1 | constants.SVSFPurgeBeforeSpeak)

    def pause(self, switch):
        # SAPI5's default means of pausing in most cases is either extrmemely slow (e.g. takes more than half a second) or does not work at all.
        # Therefore instruct the underlying audio interface to pause instead.
        if self.ttsAudioStream:
            self.ttsAudioStream.setState(SPAS_PAUSE if switch else SPAS_RUN, 0)
コード例 #22
0
ファイル: sapi5.py プロジェクト: mai-codes/evoHax-nvda
class SynthDriver(SynthDriver):
    supportedSettings = (SynthDriver.VoiceSetting(), SynthDriver.RateSetting(),
                         SynthDriver.PitchSetting(),
                         SynthDriver.VolumeSetting())

    COM_CLASS = "SAPI.SPVoice"

    name = "sapi5"
    description = "Microsoft Speech API version 5"

    @classmethod
    def check(cls):
        try:
            r = _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, cls.COM_CLASS)
            r.Close()
            return True
        except:
            return False

    def __init__(self, _defaultVoiceToken=None):
        """
		@param _defaultVoiceToken: an optional sapi voice token which should be used as the default voice (only useful for subclasses)
		@type _defaultVoiceToken: ISpeechObjectToken
		"""
        self._pitch = 50
        self._initTts(_defaultVoiceToken)

    def terminate(self):
        del self.tts

    def _getAvailableVoices(self):
        voices = OrderedDict()
        v = self._getVoiceTokens()
        # #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators.
        # Therefore, fetch the items by index, as that method explicitly returns the correct interface.
        for i in xrange(len(v)):
            try:
                ID = v[i].Id
                name = v[i].GetDescription()
                try:
                    language = locale.windows_locale[int(
                        v[i].getattribute('language').split(';')[0], 16)]
                except KeyError:
                    language = None
            except COMError:
                log.warning("Could not get the voice info. Skipping...")
            voices[ID] = VoiceInfo(ID, name, language)
        return voices

    def _getVoiceTokens(self):
        """Provides a collection of sapi5 voice tokens. Can be overridden by subclasses if tokens should be looked for in some other registry location."""
        return self.tts.getVoices()

    def _get_rate(self):
        return (self.tts.rate * 5) + 50

    def _get_pitch(self):
        return self._pitch

    def _get_volume(self):
        return self.tts.volume

    def _get_voice(self):
        return self.tts.voice.Id

    def _get_lastIndex(self):
        bookmark = self.tts.status.LastBookmark
        if bookmark != "" and bookmark is not None:
            return int(bookmark)
        else:
            return None

    def _set_rate(self, rate):
        self.tts.Rate = (rate - 50) / 5

    def _set_pitch(self, value):
        #pitch is really controled with xml around speak commands
        self._pitch = value

    def _set_volume(self, value):
        self.tts.Volume = value

    def _initTts(self, voice=None):
        self.tts = comtypes.client.CreateObject(self.COM_CLASS)
        if voice:
            # #749: It seems that SAPI 5 doesn't reset the audio parameters when the voice is changed,
            # but only when the audio output is changed.
            # Therefore, set the voice before setting the audio output.
            # Otherwise, we will get poor speech quality in some cases.
            self.tts.voice = voice
        outputDeviceID = nvwave.outputDeviceNameToID(
            config.conf["speech"]["outputDevice"], True)
        if outputDeviceID >= 0:
            self.tts.audioOutput = self.tts.getAudioOutputs()[outputDeviceID]

    def _set_voice(self, value):
        tokens = self._getVoiceTokens()
        # #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators.
        # Therefore, fetch the items by index, as that method explicitly returns the correct interface.
        for i in xrange(len(tokens)):
            voice = tokens[i]
            if value == voice.Id:
                break
        else:
            # Voice not found.
            return
        self._initTts(voice=voice)

    def speak(self, speechSequence):
        textList = []
        for item in speechSequence:
            if isinstance(item, basestring):
                textList.append(item.replace("<", "&lt;"))
            elif isinstance(item, speech.IndexCommand):
                textList.append("<Bookmark Mark=\"%d\" />" % item.index)
            elif isinstance(item, speech.CharacterModeCommand):
                textList.append("<spell>" if item.state else "</spell>")
            elif isinstance(item, speech.SpeechCommand):
                log.debugWarning("Unsupported speech command: %s" % item)
            else:
                log.error("Unknown speech: %s" % item)
        text = "".join(textList)
        #Pitch must always be hardcoded
        pitch = (self._pitch / 2) - 25
        text = "<pitch absmiddle=\"%s\">%s</pitch>" % (pitch, text)
        flags = constants.SVSFIsXML | constants.SVSFlagsAsync
        self.tts.Speak(text, flags)

    def cancel(self):
        #if self.tts.Status.RunningState == 2:
        self.tts.Speak(None, 1 | constants.SVSFPurgeBeforeSpeak)

    def pause(self, switch):
        if switch:
            self.cancel()