Beispiel #1
0
    def _dictionary(self, text: str, destination_language: str,
                    source_language: str) -> str:
        if source_language == "AUTO":
            source_language = self._language(text)

        destination_language = Language(destination_language).name.lower()
        source_language = Language(source_language).name.lower()

        request = self.session.post(
            "https://dict.deepl.com/" + source_language + "-" +
            destination_language + "/search?ajax=1&source=" + source_language +
            "&onlyDictEntries=1&translator=dnsof7h3k2lgh3gda&delay=800&jsStatus=0&kind=full&eventkind=keyup&forleftside=true",
            data={"query": text})
        if request.status_code < 400:
            response = BeautifulSoup(request.text, "html.parser")
            _result = []
            for element in response.find_all("a"):
                if element.has_attr('class'):
                    if "dictLink" in element["class"]:
                        _result.append(element.text.replace("\n", ""))
                        # if "featured" in element["class"]:
                        #     results["featured"].append(element.text.replace("\n", ""))
                        # else:
                        #     results["less_common"].append(element.text.replace("\n", ""))
            return source_language, _result
Beispiel #2
0
    def _example(self, text: str, destination_language: str,
                 source_language: str):
        # TODO: nrows value

        if source_language == "auto":
            source_language = self._language(text)

        destination_language = Language(destination_language).alpha2
        source_language = Language(source_language).alpha2

        url = "https://context.reverso.net/bst-query-service"
        params = {
            "source_text": text,
            "source_lang": source_language,
            "target_lang": destination_language,
            "npage": 1,
            "nrows": 20,
            "expr_sug": 0,
            "json": 1,
            "dym_apply": True,
            "pos_reorder": 5
        }
        request = self.session.post(
            url,
            params=params,
            headers={"Content-Type": "application/x-www-form-urlencoded"},
            verify=False)
        response = request.json()

        if request.status_code < 400:
            return source_language, response["list"]
Beispiel #3
0
 def _language_denormalize(self, language_code):
     if str(language_code) == "auto-detect":
         return Language("auto")
     elif str(language_code).lower() in {"zh-cn", "zh-hans"}:
         return Language("zho")
     elif str(language_code).lower() == "zh-tw":
         return Language("och")
     return Language(language_code)
    def _language_denormalize(self, language_code) -> str:
        """
        This is the language denormalization function
        It receives a string with the translator language code and returns a "translatepy.language.Language" object

        Must return a string with the correct language code
        """
        if str(language_code).lower() in {"zh-cn", "zh"}:
            return Language("zho")
        return Language(language_code)
Beispiel #5
0
    def translate_html(self, html: Union[str, PageElement, Tag, BeautifulSoup], destination_language: str, source_language: str = "auto", parser: str = "html.parser", threads_limit: int = 100) -> Union[str, PageElement, Tag, BeautifulSoup]:
        """
        Translates the given HTML string or BeautifulSoup object to the given language

        i.e
         English: `<div class="hello"><h1>Hello</h1> everyone and <a href="/welcome">welcome</a> to <span class="w-full">my website</span></div>`
         French: `<div class="hello"><h1>Bonjour</h1>tout le monde et<a href="/welcome">Bienvenue</a>à<span class="w-full">Mon site internet</span></div>`

        Note: This method is not perfect since it is not tag/context aware. Example: `<span>Hello <strong>everyone</strong></span>` will not be understood as
        "Hello everyone" with "everyone" in bold but rather "Hello" and "everyone" separately.

        Warning: If you give a `bs4.BeautifulSoup`, `bs4.element.PageElement` or `bs4.element.Tag` input (which are mutable), they will be modified.
        If you don't want this behavior, please make sure to pass the string version of the element:
        >>> result = Translate().translate_html(str(page_element), "French")

        Parameters:
        ----------
            html : str | bs4.element.PageElement | bs4.element.Tag | bs4.BeautifulSoup
                The HTML string to be translated. This can also be an instance of BeautifulSoup's `BeautifulSoup` element, `PageElement` or `Tag` element.
            destination_language : str
                The language the HTML string needs to be translated in.
            source_language : str, default = "auto"
                The language of the HTML string.
            parser : str, default = "html.parser"
                The parser that BeautifulSoup will use to parse the HTML string.
            threads_limit : int, default = 100
                The maximum number of threads that will be spawned by translate_html

        Returns:
        --------
            BeautifulSoup:
                The result will be the same element as the input `html` parameter with the values modified if the given
                input is of bs4.BeautifulSoup, bs4.element.PageElement or bs4.element.Tag instance.
            str:
                The result will be a string in any other case.

        """
        dest_lang = Language(destination_language)
        source_lang = Language(source_language)

        def _translate(node: NavigableString):
            try:
                node.replace_with(self.translate(str(node), destination_language=dest_lang, source_language=source_lang).result)
            except Exception:  # ignore if it couldn't find any result or an error occured
                pass

        if not isinstance(html, (PageElement, Tag, BeautifulSoup)):
            page = BeautifulSoup(str(html), str(parser))
        else:
            page = html
        # nodes = [tag.text for tag in page.find_all(text=True, recursive=True, attrs=lambda class_name: "notranslate" not in str(class_name).split()) if not isinstance(tag, (PreformattedString)) and remove_spaces(tag) != ""]
        nodes = [tag for tag in page.find_all(text=True, recursive=True) if not isinstance(tag, (PreformattedString)) and remove_spaces(tag) != ""]
        with ThreadPool(int(threads_limit)) as pool:
            pool.map(_translate, nodes)
        return page if isinstance(html, (PageElement, Tag, BeautifulSoup)) else str(page)
Beispiel #6
0
    def _text_to_speech(self, text, speed, gender, source_language):
        if source_language == "auto":
            source_language = self._language(text)

        _supported_langs_url = "https://voice.reverso.net/RestPronunciation.svc/v1/output=json/GetAvailableVoices"
        _supported_langs_result = self.session.get(_supported_langs_url,
                                                   verify=False)
        _supported_langs_list = _supported_langs_result.json()["Voices"]

        _gender = "M" if gender == "male" else "F"
        _text = base64.b64encode(text.encode()).decode()
        _source_language = "US English".lower(
        ) if source_language == "eng" else Language.by_reverso(
            source_language).name.lower()

        for _supported_lang in _supported_langs_list:
            if _supported_lang["Language"].lower(
            ) == _source_language and _supported_lang["Gender"] == _gender:
                voice = _supported_lang["Name"]
                break
        else:
            raise UnsupportedMethod(
                "{source_lang} language not supported by Reverso".format(
                    source_lang=source_language))

        url = "https://voice.reverso.net/RestPronunciation.svc/v1/output=json/GetVoiceStream/voiceName={}?voiceSpeed={}&inputText={}".format(
            voice, speed, _text)
        response = self.session.get(url, verify=False)
        if response.status_code < 400:
            return source_language, response.content
Beispiel #7
0
 def _language_normalize(self, language):
     _language = Language(language)
     if language.id == "zho":
         return "zh-Hans"
     elif language.id == "och":
         return "zh-Hant"
     return _language.alpha2
Beispiel #8
0
    def _language_denormalize(self, language_code: str) -> Language:
        """
        This is the language denormalization function
        It receives a string with the translator language code and returns a "translatepy.language.Language" object

        Must return a string with the correct language code
        """
        return Language(language_code)
Beispiel #9
0
    def translate(self, text: str, destination_language: str, source_language: str = "auto") -> TranslationResult:
        """
        Translates the given text to the given language

        i.e Good morning (en) --> おはようございます (ja)
        """
        dest_lang = Language(destination_language)
        source_lang = Language(source_language)

        def _translate(translator: BaseTranslator, index: int):
            translator = self._instantiate_translator(translator, self.services, index)
            result = translator.translate(
                text=text, destination_language=dest_lang, source_language=source_lang
            )
            if result is None:
                raise NoResult("{service} did not return any value".format(service=translator.__repr__()))
            return result

        def _fast_translate(queue: Queue, translator: BaseTranslator, index: int):
            try:
                queue.put(_translate(translator=translator, index=index))
            except Exception:
                pass

        if self.FAST_MODE:
            _queue = Queue()
            threads = []
            for index, service in enumerate(self.services):
                thread = Thread(target=_fast_translate, args=(_queue, service, index))
                thread.start()
                threads.append(thread)
            result = _queue.get(threads=threads)  # wait for a value and return it
            if result is None:
                raise NoResult("No service has returned a valid result")
            return result

        for index, service in enumerate(self.services):
            try:
                return _translate(translator=service, index=index)
            except Exception:
                continue
        else:
            raise NoResult("No service has returned a valid result")
Beispiel #10
0
    def dictionary(self, text: str, destination_language: str, source_language="auto") -> DictionaryResult:
        """
        Returns a list of translations that are classified between two categories: featured and less common

        i.e Hello --> {'featured': ['ハロー', 'こんにちは'], 'less_common': ['hello', '今日は', 'どうも', 'こんにちわ', 'こにちは', 'ほいほい', 'おーい', 'アンニョンハセヨ', 'アニョハセヨ'}
        """
        dest_lang = Language(destination_language)
        source_lang = Language(source_language)

        def _dictionary(translator: BaseTranslator, index: int):
            translator = self._instantiate_translator(translator, self.services, index)
            result = translator.dictionary(
                text=text, destination_language=dest_lang, source_language=source_lang
            )
            if result is None:
                raise NoResult("{service} did not return any value".format(service=translator.__repr__()))
            return result

        def _fast_dictionary(queue: Queue, translator: BaseTranslator, index: int):
            try:
                queue.put(_dictionary(translator=translator, index=index))
            except Exception:
                pass

        if self.FAST_MODE:
            _queue = Queue()
            threads = []
            for index, service in enumerate(self.services):
                thread = Thread(target=_fast_dictionary, args=(_queue, service, index))
                thread.start()
                threads.append(thread)
            result = _queue.get(threads=threads)  # wait for a value and return it
            if result is None:
                raise NoResult("No service has returned a valid result")
            return result

        for index, service in enumerate(self.services):
            try:
                return _dictionary(translator=service, index=index)
            except Exception:
                continue
        else:
            raise NoResult("No service has returned a valid result")
Beispiel #11
0
    def example(self, text: str, destination_language: str, source_language: str = "auto") -> ExampleResult:
        """
        Returns a set of examples / use cases for the given word

        i.e Hello --> ['Hello friends how are you?', 'Hello im back again.']
        """
        dest_lang = Language(destination_language)
        source_lang = Language(source_language)

        def _example(translator: BaseTranslator, index: int):
            translator = self._instantiate_translator(translator, self.services, index)
            result = translator.example(
                text=text, destination_language=dest_lang, source_language=source_lang
            )
            if result is None:
                raise NoResult("{service} did not return any value".format(service=translator.__repr__()))
            return result

        def _fast_example(queue: Queue, translator: BaseTranslator, index: int):
            try:
                queue.put(_example(translator=translator, index=index))
            except Exception:
                pass

        if self.FAST_MODE:
            _queue = Queue()
            threads = []
            for index, service in enumerate(self.services):
                thread = Thread(target=_fast_example, args=(_queue, service, index))
                thread.start()
                threads.append(thread)
            result = _queue.get(threads=threads)  # wait for a value and return it
            if result is None:
                raise NoResult("No service has returned a valid result")
            return result

        for index, service in enumerate(self.services):
            try:
                return _example(translator=service, index=index)
            except Exception:
                continue
        else:
            raise NoResult("No service has returned a valid result")
Beispiel #12
0
    def _detect_and_validate_lang(self, language: str) -> str:
        """
        Validates the language code, and converts the language code into a single format.
        """
        if isinstance(language, Language):
            result = language
        else:
            result = Language(language)

        normalized_result = self._language_normalize(result)

        if self._supported_languages:  # Check if the attribute is not empty
            if normalized_result not in self._supported_languages:
                raise UnsupportedLanguage(
                    "The language {language_code} is not supported by {service}"
                    .format(language_code=language, service=str(self)))

        return normalized_result
Beispiel #13
0
    def spellcheck(self, text: str, source_language: str = "auto") -> SpellcheckResult:
        """
        Checks the spelling of a given text

        i.e God morning --> Good morning
        """
        source_lang = Language(source_language)

        def _spellcheck(translator: BaseTranslator, index: int):
            translator = self._instantiate_translator(translator, self.services, index)
            result = translator.spellcheck(
                text=text, source_language=source_lang
            )
            if result is None:
                raise NoResult("{service} did not return any value".format(service=translator.__repr__()))
            return result

        def _fast_spellcheck(queue: Queue, translator: BaseTranslator, index: int):
            try:
                queue.put(_spellcheck(translator=translator, index=index))
            except Exception:
                pass

        if self.FAST_MODE:
            _queue = Queue()
            threads = []
            for index, service in enumerate(self.services):
                thread = Thread(target=_fast_spellcheck, args=(_queue, service, index))
                thread.start()
                threads.append(thread)
            result = _queue.get(threads=threads)  # wait for a value and return it
            if result is None:
                raise NoResult("No service has returned a valid result")
            return result

        for index, service in enumerate(self.services):
            try:
                return _spellcheck(translator=service, index=index)
            except Exception:
                continue
        else:
            raise NoResult("No service has returned a valid result")
Beispiel #14
0
 def _language_denormalize(self, language_code):
     if str(language_code).lower() in {"zh", "zh-cn"}:
         return Language("zho")
     return Language(language_code)
Beispiel #15
0
    def text_to_speech(self, text: str, speed: int = 100, gender: str = "female", source_language: str = "auto") -> TextToSpechResult:
        """
        Gives back the text to speech result for the given text

        Args:
          text: the given text
          source_language: the source language

        Returns:
            the mp3 file as bytes

        Example:
            >>> from translatepy import Translator
            >>> t = Translator()
            >>> result = t.text_to_speech("Hello, how are you?")
            >>> with open("output.mp3", "wb") as output: # open a binary (b) file to write (w)
            ...     output.write(result.result)
                    # or:
                    result.write_to_file(output)
            # Or you can just use write_to_file method:
            >>> result.write_to_file("output.mp3")
            >>> print("Output of Text to Speech is available in output.mp3!")

            # the result is an MP3 file with the text to speech output
        """
        source_lang = Language(source_language)

        def _text_to_speech(translator: BaseTranslator, index: int):
            translator = self._instantiate_translator(translator, self.services, index)
            result = translator.text_to_speech(
                text=text, speed=speed, gender=gender, source_language=source_lang
            )
            if result is None:
                raise NoResult("{service} did not return any value".format(service=translator.__repr__()))
            return result

        def _fast_text_to_speech(queue: Queue, translator: BaseTranslator, index: int):
            try:
                queue.put(_text_to_speech(translator=translator, index=index))
            except Exception:
                pass

        if self.FAST_MODE:
            _queue = Queue()
            threads = []
            for index, service in enumerate(self.services):
                thread = Thread(target=_fast_text_to_speech, args=(_queue, service, index))
                thread.start()
                threads.append(thread)
            result = _queue.get(threads=threads)  # wait for a value and return it
            if result is None:
                raise NoResult("No service has returned a valid result")
            return result

        for index, service in enumerate(self.services):
            try:
                return _text_to_speech(translator=service, index=index)
            except Exception:
                continue
        else:
            raise NoResult("No service has returned a valid result")
Beispiel #16
0
 def _language_denormalize(self, language_code):
     if str(language_code).lower() == "zh-cn":
         return Language("zho")
     elif str(language_code).lower() == "zh-tw":
         return Language("och")
     return Language(language_code)
Beispiel #17
0
def vectorize(string: str, data: Language):
    vector = StringVector(string)
    if len(vector.set) > 0:
        return {
            "i": data.id,
            "s": list(vector.set),
            "l": vector.length,
            "c": dict(vector.counter)
        }
    else:
        raise ValueError


for lang in LANGUAGE_DATA:
    l = Language(lang)
    if l.alpha2 is not None and l.extra.type in {Types.ANCIENT, Types.LIVING}:
        for _, name in l.in_foreign_languages.items():
            normalized_language = LANGUAGE_CLEANUP_REGEX.sub(
                "",
                str(name).lower()).replace(" ", "")
            try:
                results[normalized_language] = vectorize(
                    normalized_language, l)
            except Exception as e:
                print(e)
                continue

with open("vector_results.py", "w") as out:
    out.write(
        f"VECTORS = {dumps(results, ensure_ascii=False, separators=(',', ':')).replace('[', '{').replace(']', '}')}"