Ejemplo n.º 1
0
class EbookTranslator:
    def __init__(self,
                 path,
                 source_language,
                 target_language,
                 engine="Google"):
        self.source_language = source_language
        self.target_language = target_language
        self.ebook = Ebook(path)
        self.set_out_path()
        self.set_counter_path()
        self.set_start_point()
        self.translator = Translator(source_language, target_language, engine)

    def translate(self):
        for counter, original_sentence in enumerate(tqdm(
                self.ebook.sentences)):
            translated_sentence = self.translator.translate(original_sentence)
            both_sentences = f"{translated_sentence}\n\n{original_sentence}\n\n"
            self.write_to_file(both_sentences, self.out_path)
            self.write_to_file(str(counter), self.counter_path, mode="w")
        self.quit()

    def set_out_path(self):
        self.out_path = self.ebook.path.replace(
            ".txt", f"_translated_to_{self.target_language}.txt")

    def set_counter_path(self):
        self.counter_path = self.ebook.path.replace(".txt", f"_counter.txt")

    def write_counter_to_file(self, counter):
        self.write_to_file(counter, self.counter_path, mode="w")

    def write_to_file(self, text, path, mode="a"):
        with open(path, mode) as file:
            file.write(text)

    def read_counter(self):
        with open(self.counter_path) as file:
            return int(file.read())

    def set_start_point(self):
        if os.path.exists(self.counter_path):
            start_point = self.read_counter() + 1
            print(f"Continuing translation from sentence {start_point}")
            self.ebook.sentences = self.ebook.sentences[start_point:]
        else:
            print("Starting translation from the beginning.")

    def quit(self):
        self.translator.quit()
Ejemplo n.º 2
0
class TranslatorTest(unittest.TestCase):

    def setUp(self):
        warnings.filterwarnings("ignore", category=ResourceWarning, message="unclosed.*<ssl.SSLSocket.*>")
        self.translator = Translator(source_language="en", target_language="de", engine="Google")

    def test_1_set_engine(self):
        self.translator.set_engine("Google")
        self.assertTrue(self.translator.engine)

    def test_2_translate(self):
        text = get_mock_text()
        translation = self.translator.translate(text)
        self.assertTrue(isinstance(translation, str))

    def test_3_quit(self):
        self.translator.quit()
Ejemplo n.º 3
0
                    tw["content"] = translation[j]
                except IndexError:
                    pass
            news += deepcopy(translated_tweets)
            translated_tweets = []
    return news + tweets


if __name__ == '__main__':
    reader = InterTASSReader('intertass-ES-train-tagged.xml')
    tweets = list(reader.tweets())  # iterador sobre los tweets

    langs = list(LANGUAGES.keys())[:50]
    try:
        langs.remove("es")
        langs.remove('zh-cn')
        langs.remove('zh-tw')
    except ValueError:
        pass

    client = Translator(headless_browser=True, bulk=True)
    augmented_train = []
    for lang in tqdm(langs):
        try:
            augmented_train += bulk_translate(tweets, lang)
        except:
            pass
    client.quit()

    with open("augmented_data.pkl", "wb") as f:
        pickle.dump(augmented_train, f)