def _dictionary(self, text: str, destination_language: str, source_language: str) -> str: if source_language == "AUTO": source_language = self._language(text) destination_language = Language(destination_language).name.lower() source_language = Language(source_language).name.lower() request = self.session.post( "https://dict.deepl.com/" + source_language + "-" + destination_language + "/search?ajax=1&source=" + source_language + "&onlyDictEntries=1&translator=dnsof7h3k2lgh3gda&delay=800&jsStatus=0&kind=full&eventkind=keyup&forleftside=true", data={"query": text}) if request.status_code < 400: response = BeautifulSoup(request.text, "html.parser") _result = [] for element in response.find_all("a"): if element.has_attr('class'): if "dictLink" in element["class"]: _result.append(element.text.replace("\n", "")) # if "featured" in element["class"]: # results["featured"].append(element.text.replace("\n", "")) # else: # results["less_common"].append(element.text.replace("\n", "")) return source_language, _result
def _example(self, text: str, destination_language: str, source_language: str): # TODO: nrows value if source_language == "auto": source_language = self._language(text) destination_language = Language(destination_language).alpha2 source_language = Language(source_language).alpha2 url = "https://context.reverso.net/bst-query-service" params = { "source_text": text, "source_lang": source_language, "target_lang": destination_language, "npage": 1, "nrows": 20, "expr_sug": 0, "json": 1, "dym_apply": True, "pos_reorder": 5 } request = self.session.post( url, params=params, headers={"Content-Type": "application/x-www-form-urlencoded"}, verify=False) response = request.json() if request.status_code < 400: return source_language, response["list"]
def _language_denormalize(self, language_code): if str(language_code) == "auto-detect": return Language("auto") elif str(language_code).lower() in {"zh-cn", "zh-hans"}: return Language("zho") elif str(language_code).lower() == "zh-tw": return Language("och") return Language(language_code)
def _language_denormalize(self, language_code) -> str: """ This is the language denormalization function It receives a string with the translator language code and returns a "translatepy.language.Language" object Must return a string with the correct language code """ if str(language_code).lower() in {"zh-cn", "zh"}: return Language("zho") return Language(language_code)
def translate_html(self, html: Union[str, PageElement, Tag, BeautifulSoup], destination_language: str, source_language: str = "auto", parser: str = "html.parser", threads_limit: int = 100) -> Union[str, PageElement, Tag, BeautifulSoup]: """ Translates the given HTML string or BeautifulSoup object to the given language i.e English: `<div class="hello"><h1>Hello</h1> everyone and <a href="/welcome">welcome</a> to <span class="w-full">my website</span></div>` French: `<div class="hello"><h1>Bonjour</h1>tout le monde et<a href="/welcome">Bienvenue</a>à<span class="w-full">Mon site internet</span></div>` Note: This method is not perfect since it is not tag/context aware. Example: `<span>Hello <strong>everyone</strong></span>` will not be understood as "Hello everyone" with "everyone" in bold but rather "Hello" and "everyone" separately. Warning: If you give a `bs4.BeautifulSoup`, `bs4.element.PageElement` or `bs4.element.Tag` input (which are mutable), they will be modified. If you don't want this behavior, please make sure to pass the string version of the element: >>> result = Translate().translate_html(str(page_element), "French") Parameters: ---------- html : str | bs4.element.PageElement | bs4.element.Tag | bs4.BeautifulSoup The HTML string to be translated. This can also be an instance of BeautifulSoup's `BeautifulSoup` element, `PageElement` or `Tag` element. destination_language : str The language the HTML string needs to be translated in. source_language : str, default = "auto" The language of the HTML string. parser : str, default = "html.parser" The parser that BeautifulSoup will use to parse the HTML string. threads_limit : int, default = 100 The maximum number of threads that will be spawned by translate_html Returns: -------- BeautifulSoup: The result will be the same element as the input `html` parameter with the values modified if the given input is of bs4.BeautifulSoup, bs4.element.PageElement or bs4.element.Tag instance. str: The result will be a string in any other case. """ dest_lang = Language(destination_language) source_lang = Language(source_language) def _translate(node: NavigableString): try: node.replace_with(self.translate(str(node), destination_language=dest_lang, source_language=source_lang).result) except Exception: # ignore if it couldn't find any result or an error occured pass if not isinstance(html, (PageElement, Tag, BeautifulSoup)): page = BeautifulSoup(str(html), str(parser)) else: page = html # nodes = [tag.text for tag in page.find_all(text=True, recursive=True, attrs=lambda class_name: "notranslate" not in str(class_name).split()) if not isinstance(tag, (PreformattedString)) and remove_spaces(tag) != ""] nodes = [tag for tag in page.find_all(text=True, recursive=True) if not isinstance(tag, (PreformattedString)) and remove_spaces(tag) != ""] with ThreadPool(int(threads_limit)) as pool: pool.map(_translate, nodes) return page if isinstance(html, (PageElement, Tag, BeautifulSoup)) else str(page)
def _text_to_speech(self, text, speed, gender, source_language): if source_language == "auto": source_language = self._language(text) _supported_langs_url = "https://voice.reverso.net/RestPronunciation.svc/v1/output=json/GetAvailableVoices" _supported_langs_result = self.session.get(_supported_langs_url, verify=False) _supported_langs_list = _supported_langs_result.json()["Voices"] _gender = "M" if gender == "male" else "F" _text = base64.b64encode(text.encode()).decode() _source_language = "US English".lower( ) if source_language == "eng" else Language.by_reverso( source_language).name.lower() for _supported_lang in _supported_langs_list: if _supported_lang["Language"].lower( ) == _source_language and _supported_lang["Gender"] == _gender: voice = _supported_lang["Name"] break else: raise UnsupportedMethod( "{source_lang} language not supported by Reverso".format( source_lang=source_language)) url = "https://voice.reverso.net/RestPronunciation.svc/v1/output=json/GetVoiceStream/voiceName={}?voiceSpeed={}&inputText={}".format( voice, speed, _text) response = self.session.get(url, verify=False) if response.status_code < 400: return source_language, response.content
def _language_normalize(self, language): _language = Language(language) if language.id == "zho": return "zh-Hans" elif language.id == "och": return "zh-Hant" return _language.alpha2
def _language_denormalize(self, language_code: str) -> Language: """ This is the language denormalization function It receives a string with the translator language code and returns a "translatepy.language.Language" object Must return a string with the correct language code """ return Language(language_code)
def translate(self, text: str, destination_language: str, source_language: str = "auto") -> TranslationResult: """ Translates the given text to the given language i.e Good morning (en) --> おはようございます (ja) """ dest_lang = Language(destination_language) source_lang = Language(source_language) def _translate(translator: BaseTranslator, index: int): translator = self._instantiate_translator(translator, self.services, index) result = translator.translate( text=text, destination_language=dest_lang, source_language=source_lang ) if result is None: raise NoResult("{service} did not return any value".format(service=translator.__repr__())) return result def _fast_translate(queue: Queue, translator: BaseTranslator, index: int): try: queue.put(_translate(translator=translator, index=index)) except Exception: pass if self.FAST_MODE: _queue = Queue() threads = [] for index, service in enumerate(self.services): thread = Thread(target=_fast_translate, args=(_queue, service, index)) thread.start() threads.append(thread) result = _queue.get(threads=threads) # wait for a value and return it if result is None: raise NoResult("No service has returned a valid result") return result for index, service in enumerate(self.services): try: return _translate(translator=service, index=index) except Exception: continue else: raise NoResult("No service has returned a valid result")
def dictionary(self, text: str, destination_language: str, source_language="auto") -> DictionaryResult: """ Returns a list of translations that are classified between two categories: featured and less common i.e Hello --> {'featured': ['ハロー', 'こんにちは'], 'less_common': ['hello', '今日は', 'どうも', 'こんにちわ', 'こにちは', 'ほいほい', 'おーい', 'アンニョンハセヨ', 'アニョハセヨ'} """ dest_lang = Language(destination_language) source_lang = Language(source_language) def _dictionary(translator: BaseTranslator, index: int): translator = self._instantiate_translator(translator, self.services, index) result = translator.dictionary( text=text, destination_language=dest_lang, source_language=source_lang ) if result is None: raise NoResult("{service} did not return any value".format(service=translator.__repr__())) return result def _fast_dictionary(queue: Queue, translator: BaseTranslator, index: int): try: queue.put(_dictionary(translator=translator, index=index)) except Exception: pass if self.FAST_MODE: _queue = Queue() threads = [] for index, service in enumerate(self.services): thread = Thread(target=_fast_dictionary, args=(_queue, service, index)) thread.start() threads.append(thread) result = _queue.get(threads=threads) # wait for a value and return it if result is None: raise NoResult("No service has returned a valid result") return result for index, service in enumerate(self.services): try: return _dictionary(translator=service, index=index) except Exception: continue else: raise NoResult("No service has returned a valid result")
def example(self, text: str, destination_language: str, source_language: str = "auto") -> ExampleResult: """ Returns a set of examples / use cases for the given word i.e Hello --> ['Hello friends how are you?', 'Hello im back again.'] """ dest_lang = Language(destination_language) source_lang = Language(source_language) def _example(translator: BaseTranslator, index: int): translator = self._instantiate_translator(translator, self.services, index) result = translator.example( text=text, destination_language=dest_lang, source_language=source_lang ) if result is None: raise NoResult("{service} did not return any value".format(service=translator.__repr__())) return result def _fast_example(queue: Queue, translator: BaseTranslator, index: int): try: queue.put(_example(translator=translator, index=index)) except Exception: pass if self.FAST_MODE: _queue = Queue() threads = [] for index, service in enumerate(self.services): thread = Thread(target=_fast_example, args=(_queue, service, index)) thread.start() threads.append(thread) result = _queue.get(threads=threads) # wait for a value and return it if result is None: raise NoResult("No service has returned a valid result") return result for index, service in enumerate(self.services): try: return _example(translator=service, index=index) except Exception: continue else: raise NoResult("No service has returned a valid result")
def _detect_and_validate_lang(self, language: str) -> str: """ Validates the language code, and converts the language code into a single format. """ if isinstance(language, Language): result = language else: result = Language(language) normalized_result = self._language_normalize(result) if self._supported_languages: # Check if the attribute is not empty if normalized_result not in self._supported_languages: raise UnsupportedLanguage( "The language {language_code} is not supported by {service}" .format(language_code=language, service=str(self))) return normalized_result
def spellcheck(self, text: str, source_language: str = "auto") -> SpellcheckResult: """ Checks the spelling of a given text i.e God morning --> Good morning """ source_lang = Language(source_language) def _spellcheck(translator: BaseTranslator, index: int): translator = self._instantiate_translator(translator, self.services, index) result = translator.spellcheck( text=text, source_language=source_lang ) if result is None: raise NoResult("{service} did not return any value".format(service=translator.__repr__())) return result def _fast_spellcheck(queue: Queue, translator: BaseTranslator, index: int): try: queue.put(_spellcheck(translator=translator, index=index)) except Exception: pass if self.FAST_MODE: _queue = Queue() threads = [] for index, service in enumerate(self.services): thread = Thread(target=_fast_spellcheck, args=(_queue, service, index)) thread.start() threads.append(thread) result = _queue.get(threads=threads) # wait for a value and return it if result is None: raise NoResult("No service has returned a valid result") return result for index, service in enumerate(self.services): try: return _spellcheck(translator=service, index=index) except Exception: continue else: raise NoResult("No service has returned a valid result")
def _language_denormalize(self, language_code): if str(language_code).lower() in {"zh", "zh-cn"}: return Language("zho") return Language(language_code)
def text_to_speech(self, text: str, speed: int = 100, gender: str = "female", source_language: str = "auto") -> TextToSpechResult: """ Gives back the text to speech result for the given text Args: text: the given text source_language: the source language Returns: the mp3 file as bytes Example: >>> from translatepy import Translator >>> t = Translator() >>> result = t.text_to_speech("Hello, how are you?") >>> with open("output.mp3", "wb") as output: # open a binary (b) file to write (w) ... output.write(result.result) # or: result.write_to_file(output) # Or you can just use write_to_file method: >>> result.write_to_file("output.mp3") >>> print("Output of Text to Speech is available in output.mp3!") # the result is an MP3 file with the text to speech output """ source_lang = Language(source_language) def _text_to_speech(translator: BaseTranslator, index: int): translator = self._instantiate_translator(translator, self.services, index) result = translator.text_to_speech( text=text, speed=speed, gender=gender, source_language=source_lang ) if result is None: raise NoResult("{service} did not return any value".format(service=translator.__repr__())) return result def _fast_text_to_speech(queue: Queue, translator: BaseTranslator, index: int): try: queue.put(_text_to_speech(translator=translator, index=index)) except Exception: pass if self.FAST_MODE: _queue = Queue() threads = [] for index, service in enumerate(self.services): thread = Thread(target=_fast_text_to_speech, args=(_queue, service, index)) thread.start() threads.append(thread) result = _queue.get(threads=threads) # wait for a value and return it if result is None: raise NoResult("No service has returned a valid result") return result for index, service in enumerate(self.services): try: return _text_to_speech(translator=service, index=index) except Exception: continue else: raise NoResult("No service has returned a valid result")
def _language_denormalize(self, language_code): if str(language_code).lower() == "zh-cn": return Language("zho") elif str(language_code).lower() == "zh-tw": return Language("och") return Language(language_code)
def vectorize(string: str, data: Language): vector = StringVector(string) if len(vector.set) > 0: return { "i": data.id, "s": list(vector.set), "l": vector.length, "c": dict(vector.counter) } else: raise ValueError for lang in LANGUAGE_DATA: l = Language(lang) if l.alpha2 is not None and l.extra.type in {Types.ANCIENT, Types.LIVING}: for _, name in l.in_foreign_languages.items(): normalized_language = LANGUAGE_CLEANUP_REGEX.sub( "", str(name).lower()).replace(" ", "") try: results[normalized_language] = vectorize( normalized_language, l) except Exception as e: print(e) continue with open("vector_results.py", "w") as out: out.write( f"VECTORS = {dumps(results, ensure_ascii=False, separators=(',', ':')).replace('[', '{').replace(']', '}')}"