Ejemplo n.º 1
0
    def _tokenize(self, text):
        # Pre-clean
        text = text.strip()

        # Apply pre-processors
        for pp in self.pre_processor_funcs:
            log.debug("pre-processing: %s", pp)
            text = pp(text)

        if _len(text) <= self.GOOGLE_TTS_MAX_CHARS:
            return _clean_tokens([text])

        # Tokenize
        log.debug("tokenizing: %s", self.tokenizer_func)
        tokens = self.tokenizer_func(text)

        # Clean
        tokens = _clean_tokens(tokens)

        # Minimize
        min_tokens = []
        for t in tokens:
            min_tokens += _minimize(t, ' ', self.GOOGLE_TTS_MAX_CHARS)

        # Filter empty tokens, post-minimize
        tokens = [t for t in min_tokens if t]

        return min_tokens
Ejemplo n.º 2
0
    def url(self):
        """ 
        generate url to call google tts
        """

        try:
            # Calculate token
            part_tk = self.token.calculate_token(self.text)
        except requests.exceptions.RequestException as e:  # pragma: no cover
            log.debug(str(e), exc_info=True)
            raise gTTSError("Connection error during token calculation: %s" %
                            str(e))

        payload = {
            'ie': 'UTF-8',
            'q': self.text,
            'tl': self.lang,
            'ttsspeed': self.speed,
            'total': 1,
            'idx': 0,
            'client': 'tw-ob',
            'textlen': _len(self.text),
            'tk': part_tk
        }

        return self.GOOGLE_TTS_URL + "?" + urllib.parse.urlencode(payload)
Ejemplo n.º 3
0
    def _prepare_requests(self):
        """Created the TTS API the request(s) without sending them.

        Returns:
            list: ``requests.PreparedRequests_``. <https://2.python-requests.org/en/master/api/#requests.PreparedRequest>`_``.
        """
        # TTS API URL
        translate_url = _translate_url(tld=self.tld, path="translate_tts")

        text_parts = self._tokenize(self.text)
        log.debug("text_parts: %i", len(text_parts))
        assert text_parts, 'No text to send to TTS API'

        prepared_requests = []
        for idx, part in enumerate(text_parts):
            try:
                # Calculate token
                part_tk = self.token.calculate_token(part)
            except requests.exceptions.RequestException as e:  # pragma: no cover
                log.debug(str(e), exc_info=True)
                raise gTTSError(
                    "Connection error during token calculation: %s" % str(e))

            payload = {
                'ie': 'UTF-8',
                'q': part,
                'tl': self.lang,
                'ttsspeed': self.speed,
                'total': len(text_parts),
                'idx': idx,
                'client': 'tw-ob',
                'textlen': _len(part),
                'tk': part_tk
            }

            log.debug("payload-%i: %s", idx, payload)

            # Request
            r = requests.Request(method='GET',
                                 url=translate_url,
                                 params=payload,
                                 headers=self.GOOGLE_TTS_HEADERS)

            # Prepare request
            prepared_requests.append(r.prepare())

        return prepared_requests
Ejemplo n.º 4
0
    def stream_to_fps(self, fps):
        if not isinstance(fps, list):
            fps = [fps]
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

        text_parts = self._tokenize(self.text)
        assert text_parts, 'No text to send to TTS API'
        for idx, part in enumerate(text_parts):
            try:
                # Calculate token
                part_tk = self.token.calculate_token(part)
            except requests.exceptions.RequestException as e:  # pragma: no cover
                raise gtts.gTTSError(
                    "Connection error during token calculation: %s" %
                    str(e))

            payload = {'ie': 'UTF-8',
                       'q': part,
                       'tl': self.lang,
                       'ttsspeed': self.speed,
                       'total': len(text_parts),
                       'idx': idx,
                       'client': 'tw-ob',
                       'textlen': _len(part),
                       'tk': part_tk}
            r = None
            try:
                # Request
                r = requests.get(self.GOOGLE_TTS_URL,
                                 params=payload,
                                 headers=self.GOOGLE_TTS_HEADERS,
                                 proxies=urllib.request.getproxies(),
                                 verify=False,
                                 stream=True)

                r.raise_for_status()
            except requests.exceptions.HTTPError:
                # Request successful, bad response
                raise gtts.gTTSError(tts=self, response=r)
            except requests.exceptions.RequestException as e:  # pragma: no cover
                # Request failed
                raise gtts.gTTSError(str(e))
            for chunk in r.iter_content(chunk_size=1024):
                for f in fps:
                    f.write(chunk)
Ejemplo n.º 5
0
    def write_to_fp(self, fp):
        """Do the TTS API request and write bytes to a file-like object.

        Args:
            fp (file object): Any file-like object to write the ``mp3`` to.

        Raises:
            :class:`gTTSError`: When there's an error with the API request.
            TypeError: When ``fp`` is a file-like object that takes bytes.

        """
        # When disabling ssl verify in requests (for proxies and firewalls),
        # urllib3 prints an insecure warning on stdout. We disable that.
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

        text_parts = self._tokenize(self.text)
        log.debug("text_parts: %i", len(text_parts))
        assert text_parts, 'No text to send to TTS API'

        for idx, part in enumerate(text_parts):
            try:
                # Calculate token
                part_tk = self.token.calculate_token(part)
            except requests.exceptions.RequestException as e:  # pragma: no cover
                log.debug(str(e), exc_info=True)
                raise gTTSError(
                    "Connection error during token calculation: %s" % str(e))

            payload = {
                'ie': 'UTF-8',
                'q': part,
                'tl': self.lang,
                'ttsspeed': self.speed,
                'total': len(text_parts),
                'idx': idx,
                'client': 'tw-ob',
                'textlen': _len(part),
                'tk': part_tk
            }

            log.debug("payload-%i: %s", idx, payload)

            try:
                # Request
                r = requests.get(self.GOOGLE_TTS_URL,
                                 params=payload,
                                 headers=self.GOOGLE_TTS_HEADERS,
                                 proxies=urllib.request.getproxies(),
                                 verify=False)

                log.debug("headers-%i: %s", idx, r.request.headers)
                log.debug("url-%i: %s", idx, r.request.url)
                log.debug("status-%i: %s", idx, r.status_code)

                r.raise_for_status()
            except requests.exceptions.HTTPError as e:
                # Request successful, bad response
                raise gTTSError(tts=self, response=r)
            except requests.exceptions.RequestException as e:  # pragma: no cover
                # Request failed
                raise gTTSError(str(e))

            try:
                # Write
                for chunk in r.iter_content(chunk_size=1024):
                    fp.write(chunk)
                log.debug("part-%i written to %s", idx, fp)
            except (AttributeError, TypeError) as e:
                raise TypeError(
                    "'fp' is not a file-like object or it does not take bytes: %s"
                    % str(e))
Ejemplo n.º 6
0
def test_len_unicode():
    text = u"但在一个重要的任务上"
    assert _len(text) == 10
Ejemplo n.º 7
0
def test_len_ascii():
    text = "Bacon ipsum dolor sit amet flank corned beef."
    assert _len(text) == 45
Ejemplo n.º 8
0
 def test_unicode(self):
     text = u"但在一个重要的任务上"
     self.assertEqual(_len(text), 10)
Ejemplo n.º 9
0
 def test_ascii(self):
     text = "Bacon ipsum dolor sit amet flank corned beef."
     self.assertEqual(_len(text), 45)