def create_translate(nazwa_katalogu, nazwa_pliku): vtt = webvtt.read(f'{nazwa_katalogu}\{nazwa_pliku}') translator = Translator() list_sentence = [] for element_in_vtt in vtt: list_sentence.append(element_in_vtt.text) translated_lines = [] for line in list_sentence: new_line = line split_lines = line.split('.') for s_line in split_lines: if len(s_line) < 1: continue trans = translator.translate(s_line, src='en', dest='pl') print(f'{trans.origin} -> {trans.text}') new_line = new_line.replace(s_line.strip(), trans.text.replace('.', '')) translated_lines.append(new_line) for index, minute in enumerate(vtt): minute.text = translated_lines[index] vtt[index].text = minute.text print(vtt) nazwa_pliku = nazwa_pliku.split("(", maxsplit=1)[0] vtt.save(f'{nazwa_katalogu}\{nazwa_pliku}(pl)')
def translate(text, src, dst): if not text or src == dst: return text translator = Translator() translator = Translator(service_urls=['translate.google.com']) try: # save hidden links links = reLink.findall(text) # replace emojis str_demoji = emoji.demojize(text, delimiters=emoji_delimiters) # translate tran = translator.translate(str_demoji, src=src, dest=dst).text # fix emojis for match in reEmoji.findall(tran): tran = tran.replace(match[0], emoji.emojize(f":{match[1].lower()}:")) # fix hidden links offset = 0 for i, match in enumerate(reLink.finditer(tran)): b, e = match.regs[0] insert = f"[{match[1]}]({links[i][1]})" tran = f"{tran[:b + offset]}{insert}{tran[e + offset:]}" offset += len(insert) - e + b return tran except HTTPException: return translate(text, src, dst) except: pass return text
def do_single_translate(cn_text): """ Do single text translate from CN to TW :param cn_text: Text in CN :return: Text in TW """ translator = Translator() result = translator.translate(cn_text, src='zh-cn', dest='zh-tw') text = result.text return text
def do_translate(cn_texts): """ Do translate from CN to TW :param cn_texts: Texts in CN :return: Texts in TW """ translator = Translator() result = translator.translate(cn_texts, src='zh-cn', dest='zh-tw') tw_texts = [r.text for r in result] return tw_texts
async def translate(context): """ PagerMaid universal translator. """ translator = Translator() reply = await context.get_reply_message() message = context.arguments ap_lang = config['application_language'] if message: pass elif reply: message = reply.text else: await context.edit(lang('arg_error')) return try: if not silent: await context.edit(lang('translate_processing')) try: result = translator.translate(clear_emojis(message), dest=ap_lang) except: from translate import Translator as trans result = trans(to_lang=ap_lang.replace('zh-cn', 'zh')).translate( clear_emojis(message)) except ValueError: await context.edit(lang('translate_ValueError')) return try: source_lang = result.src source_text = result.origin trans_lang = result.dest except AttributeError: await context.edit(lang('google_connection_error')) return result = f"**{lang('translate_hits')}**\n{lang('translate_original_lang')}: {source_lang}\n{source_text} -> {result.text}" if len(result) > 4096: await context.edit(lang('translate_tg_limit_uploading_file')) await attach_log(result, context.chat_id, "translation.txt", context.id) return await context.edit(result) if len(result) <= 4096: await log( f"{lang('translate_get')}: `{source_text}` \n{lang('translate_from')} {source_lang} {lang('translate_to')} {trans_lang}" ) else: await log( f"{lang('translate_get')}{translate('translate_from')} {source_lang} {lang('translate_to')} {trans_lang}." )
def transl_it(text, src, dest): try: # create a new object translator = Translator() # text is what you want to transalte, src the source language, # id est the language of the sentence, dest is the destination language a = translator.translate(text, src=src, dest=dest) # this is a manipulation to extract basic info from the output translated = (LANGUAGES[a.src], LANGUAGES[a.dest], a.text) return translated except Exception as ee: #print(ee) # this is the handling of the error, that bad input text can lead confidence = translator.detect(text) translated = (False, False, "error: " + str(ee) + "\n" + str(confidence)) return translated
def translate(word, language): translator = Translator() languages = translator.glanguage() keys = list(languages['sl'].keys()) values = list(languages['sl'].values()) location = -1 for i in range(len(keys)): if language.lower() == keys[i].lower() or language.lower( ) == values[i].lower(): location = i if location == -1: return ('Language entered is not supported') else: translation = str(translator.translate(word, dest=keys[location])) translation = translation[translation.find("text") + 5:translation.find(', p')] return (f'{word} in {values[location]} => {translation}')
def start_translation(): translator = Translator() languages_code_list = [] log.info(f"Start of translation service") for language in language_list: languages_code_list.append(language["value"]) default_language_string_list = list(resources[DEFAULT_LANGUAGE].values()) for language in languages_code_list: if language != DEFAULT_LANGUAGE: start_time = time.time() translations = translator.translate(default_language_string_list, src=DEFAULT_LANGUAGE, dest=language) add_translations_to_resource_dict(translations, language) log.info( f"The {language} translation has taken {time.time()-start_time} seconds" ) log.info(f"End of loading for resources for: {language}")
def test_japanese_punctuation(): from pygoogletranslation import Translator t = Translator() assert t.translate("おはよう。げんきですか。").text == "Good morning. How are you." #<--- this is how google translate web translates it. Sadly, with the english-point replacement method, the result is different from this, as can be seen in the test. assert t.translate("今日、明日").text == "Today, tomorrow" # assert t.translate("「こんにちは」").text == "\"Hello\""
from pygoogletranslation import Translator t = Translator() print(t.translate("これはテストです", dest="en").text)
countryCodes = [ "af", "sq", "am", "ar", "hy", "az", "bn", "bs", "bg", "ca", "hr", "cs", "da", "nl", "en", "et", "fa", "tl", "fi", "fr", "ka", "de", "el", "gu", "ht", "ha", "he", "hi", "hu", "is", "id", "it", "ja", "kn", "kk", "ko", "lv", "lt", "mk", "ms", "ml", "mt", "mn", "no", "fa", "ps", "pl", "pt", "ro", "ru", "sr", "si", "sk", "sl", "so", "es", "sw", "sv", "tl", "ta", "te", "th", "tr", "uk", "ur", "uz", "vi", "cy" ] result_dict = dict() for code in countryCodes: result_dict[code] = {} result_dict[code]['title'] = translator.translate('Language Support', dest=code, src="en").text result_dict[code]['selectLabel'] = translator.translate( 'Choose Instruction Language', dest=code, src="en").text result_dict[code]['headerLine'] = translator.translate( 'Please change your language by following the settings below', dest=code, src="en").text result_dict[code]['list'] = {} result_dict[code]['list']['chrome'] = [] results = translator.translate([ "At the top right, click More. Then Settings.", "At the bottom, click Advanced.", "Under 'Languages,' click Language.", "Next to the language you'd like to use, click More.",
"af", "sq", "am", "ar", "hy", "az", "bn", "bs", "bg", "ca", "hr", "cs", "da", "nl", "en", "et", "fa", "tl", "fi", "fr", "ka", "de", "el", "gu", "ht", "ha", "he", "hi", "hu", "is", "id", "it", "ja", "kn", "kk", "ko", "lv", "lt", "mk", "ms", "ml", "mt", "mn", "no", "fa", "ps", "pl", "pt", "ro", "ru", "sr", "si", "sk", "sl", "so", "es", "sw", "sv", "tl", "ta", "te", "th", "tr", "uk", "ur", "uz", "vi", "cy" ] for code in countryCodes: result_dict = dict() result_dict['language'] = code result_dict['title'] = translator.translate('Language Support', dest=code, src="en").text result_dict['selectLabel'] = translator.translate( 'Choose Instruction Language', dest=code, src="en").text result_dict['headerLine'] = translator.translate( 'Please change your language by following the settings below', dest=code, src="en").text result_dict['list'] = {} result_dict['list']['chrome'] = [] results = translator.translate([ "Your Language must be chosen within the chrome settings.", "Within the searchbar there will be a Translate Icon.", "Select your chosen Language from the dropdown menu."
class Maestro: def __init__(self, df, output_path, output_name, batch): # storing variables self.df = df self.filename = Path(output_path) / output_name self.raw_file = '{}_raw.csv'.format(self.filename) self.batch = batch # initialize tools self.translator = Translator() self.__initialize_senti() # collect jobs job_list = self.__collect_jobs() self.total_job = len(job_list) # initialize queues self.jobs = Queue(maxsize=self.total_job) for job in job_list: self.jobs.put(job) self.results = Queue(maxsize=self.total_job) # setup threading variables self.stop = threading.Event() self.worker_ct_lock = threading.Lock() self.worker_ct = 0 # num_of_spawned worker def __initialize_senti(self): self.senti = PySentiStr() self.senti.setSentiStrengthPath( str(Path.cwd() / 'lib' / 'SentiStrengthCom.jar')) self.senti.setSentiStrengthLanguageFolderPath(str(Path.cwd() / 'lang')) # simple test to make sure senti works test = self.senti.getSentiment(['You are beautiful'], 'dual') assert type(test) is list assert type(test[0]) is tuple def __collect_jobs(self): try: out_df = pd.read_csv(self.raw_file, header=None) processed_ser = self.df['tweetid'].isin(out_df[1]) except FileNotFoundError: zeros = np.zeros((len(self.df.index), ), dtype=bool) processed_ser = pd.Series(zeros) job_list = processed_ser[~processed_ser].index job_list = list(grouper(job_list, self.batch)) if len(job_list) > 0: job_list[-1] = tuple(job for job in job_list[-1] if job is not None) return job_list def __despawn_worker(self): with self.worker_ct_lock: self.worker_ct = self.worker_ct - 1 def __translate(self, thread_num): with self.worker_ct_lock: self.worker_ct = self.worker_ct + 1 while not self.stop.is_set() and not self.jobs.empty(): job = self.jobs.get() try: mini_df = self.df.loc[job, ] # trailing comma is needed ids = mini_df.iloc[:, 0] items = mini_df.iloc[:, -1].to_numpy().tolist() except Exception as e: print('Worker #{} got pandas error: {}'.format(thread_num, e)) break try: if len(items) == 1: translations = [self.translator.translate(items)] else: translations = self.translator.translate(items) except Exception as e: print('Worker #{} got translation error: {}'.format( thread_num, e)) break self.results.put((job, ids, translations)) self.__despawn_worker() def __save(self, results): with open(self.raw_file, 'a', encoding='utf-8', newline='') as csv_file: writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerows(results) def __process(self, score='dual'): total_batch = int(np.ceil(len(self.df.index) / self.batch)) pbar = tqdm(total=total_batch, initial=(total_batch - self.total_job)) while not self.stop.is_set() or not self.results.empty(): time.sleep(2) if not self.results.empty(): # merges all results job_list, id_list, translation_list = ([], [], []) steps = 0 while not self.results.empty(): job, ids, translations = self.results.get() job_list.extend(job) id_list.extend(ids) translation_list.extend(translations) steps = steps + 1 # analyze sentiments texts = [tr.text for tr in translation_list] try: sentis = self.senti.getSentiment(texts, score) except Exception as e: print('Process got sentistrength error:', e) break try: rows = [ (order, i, *senti, tr.src, text) for order, i, senti, tr, text in zip( job_list, id_list, sentis, translation_list, texts) ] except Exception as e: print(e) break try: self.__save(rows) except Exception as e: print('Process got on save error:', e) break pbar.update(steps) time.sleep(.1) # prevent too much loop checking if not self.stop.is_set(): self.stop.set() # force stop all threads print('Rebuilding...') self.__rebuild() print('Exiting...') pbar.close() def __rebuild(self): try: sf = pd.read_csv(self.raw_file, header=None, names=[ 'order', 'tweetid', '+', '-', 'src_lang', 'translation' ]) sf.sort_values('order', inplace=True) sf.to_csv('{}.csv'.format(self.filename), index=None) except FileNotFoundError: pass except Exception as e: print(ERR_STR.format('rebuild', 'on rebuilding csv'), e) def play(self, n_thread=1): if n_thread < 1: return with ThreadPoolExecutor(max_workers=n_thread + 1) as executor: try: executor.map(self.__translate, range(n_thread)) print('Spawing {} workers...'.format(n_thread)) while self.worker_ct is 0: pass # waiting for any worker being spawned print('Aye, Sir!') executor.submit(self.__process) # as long as there are atleast a worker while self.worker_ct > 0: # wait for any keyboard interrupt time.sleep(.5) # power napping for half second # either no job left or all worker has been despawned self.stop.set() if self.jobs.empty(): print('All done!') if self.worker_ct is 0: print('All workers quit their job!') except KeyboardInterrupt: print('\nKeyboard interrupt') except Exception as e: print(ERR_STR.format('play', 'something went wrong'), e) finally: self.stop.set() print('Byee 👋')