class EbookTranslator: def __init__(self, path, source_language, target_language, engine="Google"): self.source_language = source_language self.target_language = target_language self.ebook = Ebook(path) self.set_out_path() self.set_counter_path() self.set_start_point() self.translator = Translator(source_language, target_language, engine) def translate(self): for counter, original_sentence in enumerate(tqdm( self.ebook.sentences)): translated_sentence = self.translator.translate(original_sentence) both_sentences = f"{translated_sentence}\n\n{original_sentence}\n\n" self.write_to_file(both_sentences, self.out_path) self.write_to_file(str(counter), self.counter_path, mode="w") self.quit() def set_out_path(self): self.out_path = self.ebook.path.replace( ".txt", f"_translated_to_{self.target_language}.txt") def set_counter_path(self): self.counter_path = self.ebook.path.replace(".txt", f"_counter.txt") def write_counter_to_file(self, counter): self.write_to_file(counter, self.counter_path, mode="w") def write_to_file(self, text, path, mode="a"): with open(path, mode) as file: file.write(text) def read_counter(self): with open(self.counter_path) as file: return int(file.read()) def set_start_point(self): if os.path.exists(self.counter_path): start_point = self.read_counter() + 1 print(f"Continuing translation from sentence {start_point}") self.ebook.sentences = self.ebook.sentences[start_point:] else: print("Starting translation from the beginning.") def quit(self): self.translator.quit()
class TranslatorTest(unittest.TestCase): def setUp(self): warnings.filterwarnings("ignore", category=ResourceWarning, message="unclosed.*<ssl.SSLSocket.*>") self.translator = Translator(source_language="en", target_language="de", engine="Google") def test_1_set_engine(self): self.translator.set_engine("Google") self.assertTrue(self.translator.engine) def test_2_translate(self): text = get_mock_text() translation = self.translator.translate(text) self.assertTrue(isinstance(translation, str)) def test_3_quit(self): self.translator.quit()
tw["content"] = translation[j] except IndexError: pass news += deepcopy(translated_tweets) translated_tweets = [] return news + tweets if __name__ == '__main__': reader = InterTASSReader('intertass-ES-train-tagged.xml') tweets = list(reader.tweets()) # iterador sobre los tweets langs = list(LANGUAGES.keys())[:50] try: langs.remove("es") langs.remove('zh-cn') langs.remove('zh-tw') except ValueError: pass client = Translator(headless_browser=True, bulk=True) augmented_train = [] for lang in tqdm(langs): try: augmented_train += bulk_translate(tweets, lang) except: pass client.quit() with open("augmented_data.pkl", "wb") as f: pickle.dump(augmented_train, f)