def test_other_punctuation(self):
        # String of the unique 'other punctuations'
        other_punc_str = ''.join(
            set(symbols.ALL_PUNC) - set(symbols.TONE_MARKS) -
            set(symbols.PERIOD_COMMA))

        t = Tokenizer([other_punctuation])
        self.assertEqual(len(t.run(other_punc_str)) - 1, len(other_punc_str))
Example #2
0
    def __init__(
            self,
            text,
            tld='com',
            lang='en',
            slow=False,
            lang_check=True,
            pre_processor_funcs=[
                pre_processors.tone_marks,
                pre_processors.end_of_line,
                pre_processors.abbreviations,
                pre_processors.word_sub
            ],
            tokenizer_func=Tokenizer([
                tokenizer_cases.tone_marks,
                tokenizer_cases.period_comma,
                tokenizer_cases.colon,
                tokenizer_cases.other_punctuation
            ]).run
    ):

        # Debug
        for k, v in dict(locals()).items():
            if k == 'self':
                continue
            log.debug("%s: %s", k, v)

        # Text
        assert text, 'No text to speak'
        self.text = text

        # Translate URL top-level domain
        self.tld = tld

        # Language
        self.lang_check = lang_check
        self.lang = lang

        if self.lang_check:
            # Fallback lang in case it is deprecated
            self.lang = _fallback_deprecated_lang(lang)

            try:
                langs = tts_langs()
                if self.lang not in langs:
                   raise ValueError("Language not supported: %s" % lang)
            except RuntimeError as e:
                log.debug(str(e), exc_info=True)
                log.warning(str(e))

        # Read speed
        if slow:
            self.speed = Speed.SLOW
        else:
            self.speed = Speed.NORMAL

        # Pre-processors and tokenizer
        self.pre_processor_funcs = pre_processor_funcs
        self.tokenizer_func = tokenizer_func
Example #3
0
    def __init__(
            self,
            text,
            lang='en',
            slow=False,
            lang_check=True,
            pre_processor_funcs=[
                pre_processors.tone_marks,
                pre_processors.end_of_line,
                pre_processors.abbreviations,
                pre_processors.word_sub
            ],
            tokenizer_func=Tokenizer([
                tokenizer_cases.tone_marks,
                tokenizer_cases.period_comma,
                tokenizer_cases.colon,
                tokenizer_cases.other_punctuation
            ]).run
    ):

        # Debug
        for k, v in locals().items():
            if k == 'self':
                continue
            log.debug("%s: %s", str(k), str(v))

        # Text
        assert text, 'No text to speak'
        self.text = text

        # Language
        if lang_check:
            try:
                langs = tts_langs()
                if lang.lower() not in langs:
                    raise ValueError("Language not supported: %s" % lang)
            except RuntimeError as e:
                log.debug(str(e), exc_info=True)
                log.warning(str(e))

        self.lang_check = lang_check
        self.lang = lang.lower()

        # Read speed
        if slow:
            self.speed = Speed.SLOW
        else:
            self.speed = Speed.NORMAL

        # Pre-processors and tokenizer
        self.pre_processor_funcs = pre_processor_funcs
        self.tokenizer_func = tokenizer_func

        # Google Translate token
        self.token = gtts_token.Token()
Example #4
0
    def __init__(self,
                 text,
                 lang='en',
                 slow=False,
                 lang_check=True,
                 pre_processor_funcs=[
                     pre_processors.tone_marks, pre_processors.end_of_line,
                     pre_processors.abbreviations, pre_processors.word_sub
                 ],
                 tokenizer_func=Tokenizer([
                     tokenizer_cases.tone_marks, tokenizer_cases.period_comma,
                     tokenizer_cases.colon, tokenizer_cases.other_punctuation
                 ]).run):

        # Debug
        for k, v in locals().items():
            if k == 'self':
                continue
            log.debug("%s: %s", k, v)

        # Text
        assert text, 'No text to speak'
        self.text = text

        self.lang = 'en'

        # Read speed
        if slow:
            self.speed = Speed.SLOW
        else:
            self.speed = Speed.NORMAL

        # Pre-processors and tokenizer
        self.pre_processor_funcs = pre_processor_funcs
        self.tokenizer_func = tokenizer_func

        # Google Translate token
        self.token = gtts_token.Token()
 def test_tone_marks(self):
     t = Tokenizer([tone_marks])
     _in = "Lorem? Ipsum!"
     _out = ['Lorem?', 'Ipsum!']
     self.assertEqual(t.run(_in), _out)
 def test_legacy_all_punctuation(self):
     t = Tokenizer([legacy_all_punctuation])
     self.assertEqual(
         len(t.run(symbols.ALL_PUNC)) - 1, len(symbols.ALL_PUNC))
 def test_period_comma(self):
     t = Tokenizer([period_comma])
     _in = "Hello, it's 24.5 degrees in the U.K. today. $20,000,000."
     _out = ['Hello', "it's 24.5 degrees in the U.K. today", '$20,000,000.']
     self.assertEqual(t.run(_in), _out)
 def test_colon(self):
     t = Tokenizer([colon])
     _in = "It's now 6:30 which means: morning missing:space"
     _out = ["It's now 6:30 which means", ' morning missing', 'space']
     self.assertEqual(t.run(_in), _out)