def translate(input_phrase): translator = Translator() # detect language type of input phrase detect_result = translator.detect(input_phrase) language_type = detect_result.lang # define data frame trans_phrase_array = [''] * 5 colmuns_list = ['English', 'Japanese', 'Spanish', 'Chinese', 'Ch_Prns'] print('Language type: %s' % language_type) # switch translation process depend on language type if 'ja' in language_type: # Japanese print('Input phrase is Japanese') phrase_in_zh = to_chinese_simplified(translator, input_phrase) trans_phrase_array[0] = to_english(translator, input_phrase).text trans_phrase_array[1] = input_phrase trans_phrase_array[2] = to_spanish(translator, input_phrase).text trans_phrase_array[3] = phrase_in_zh.text trans_phrase_array[4] = phrase_in_zh.pronunciation elif 'zh-CN' in language_type: # Chinese(Simplified) print('Input phrase is Simplified Chinese') phrase_in_zh = to_chinese_simplified(translator, input_phrase) trans_phrase_array[0] = to_english(translator, input_phrase).text trans_phrase_array[1] = to_japanese(translator, input_phrase).text trans_phrase_array[2] = to_spanish(translator, input_phrase).text trans_phrase_array[3] = phrase_in_zh.text trans_phrase_array[4] = phrase_in_zh.pronunciation elif 'en' in language_type: # English print('Input phrase is English') phrase_in_zh = to_chinese_simplified(translator, input_phrase) trans_phrase_array[0] = input_phrase trans_phrase_array[1] = to_japanese(translator, input_phrase).text trans_phrase_array[2] = to_spanish(translator, input_phrase).text trans_phrase_array[3] = phrase_in_zh.text trans_phrase_array[4] = phrase_in_zh.pronunciation elif 'es' in language_type: # Spanish print('Input phrase is Spanish') phrase_in_zh = to_chinese_simplified(translator, input_phrase) trans_phrase_array[0] = to_english(translator, input_phrase).text trans_phrase_array[1] = to_japanese(translator, input_phrase).text trans_phrase_array[2] = input_phrase trans_phrase_array[3] = phrase_in_zh.text trans_phrase_array[4] = phrase_in_zh.pronunciation else: print('Input phrase should be Japanese, English, Chinese or Spanish') # write in csv # df_trans_phrase = pd.DataFrame(index=[], columns=colmuns_list) df_trans_phrase = pd.read_csv('phrase_translation_database.csv', encoding='utf-8-sig', index_col=0) sr_trans_phrase = pd.Series(trans_phrase_array, index=colmuns_list, name=input_phrase) df_trans_phrase = df_trans_phrase.append(sr_trans_phrase) df_trans_phrase.to_csv('phrase_translation_database.csv', encoding='utf-8-sig')
def detect_language_google(self, sample): ''' EN: It uses Google Translate to detect the language of a given sample. SP: Utiliza el Traductor de Google para detectar el idioma de una muestra dada. :param sample: sample of text from which the language is detected / muestra de texto a partir de la cual detectar el idioma :return: the detected language / el idioma detectado ''' translator = Translator() det = translator.detect(sample) language_google = self.LANGUAGES_GOOGLE[det.lang] return language_google
# Thai to English translations = translator.translate( text='ลมอ่อนพัดโชยมาน้ำตาก็ไหลรินเหลือเพียงกลิ่นหัวใจฟุ้งไปกับความเหงา', src='th', dest='en') print('Original Text :') print(translations.origin) print('Translated Text :') print(translations.text) #%% language detection print('----------- Language Detection --------------') LangDetectObj = translator.detect('이 문장은 한글로 쓰여졌습니다.') LangDetectObj = translator.detect('ไปกับพี่มั๊ยจ๊ะน้องสาว') LangDetect = LangDetectObj.lang LangConf = LangDetectObj.confidence print('Detect language is : %s with confident : %.3f' % (LangDetect, LangConf)) #%% Translate from list print('-----------Translator from list --------------') translations = translator.translate( ['The quick brown fox', 'jumps over', 'the lazy dog'], dest='ko') for translation in translations: # since the text is list, translations become list (iterable object) print(translation.origin, ' -> ', translation.text)
class DataTranslation: def __init__(self) -> None: self.translator = Translator() def make_translation(self, word: str) -> Union[bool, str]: """ Translate a word using GoogleTrans library The language might be indicate in the 2 first character :param word: string :param language: string :return: word string, or False if the word can't be translated """ if word == 'en:spreads': # better translation for the word spreads return 'Pate à tartiner' elif word == 'en:sweets-spreads': return 'Pâte à tartiner sucrée' elif word == 'en:plant-based-spreads': return 'Pâte à tartiner végétal' elif word == ' ': return False else: # take the language indication language = self.get_language(word) if not language: return False else: # remove language indicator word = self.slice_language(word) # translate word to french word = self.translator.translate(word, src=language, dest='fr').text # remove any type of punctuation return word.replace('-', ' ') def detect_lang(self, word: str) -> Union[bool, str]: """Detect language with googletrans""" # detect language tr = self.translator.detect(word) if tr.confidence < 0.5: return False else: for l in LANGCODES.values(): if tr.lang == l: return tr.lang return False def slice_language(self, word: str) -> str: if len(word) > 3 and word[2] == ':': return word[3:] return word def get_language(self, word: str) -> str: """ Return the language of the word if its has a language indicator. If its has not googletrans library will detect its language """ if len(self.slice_language(word)) == len(word): # the word has no language indicator lang = self.detect_lang(word) else: lang = word[:2] tr = self.translator.detect(word) # if language detected is different # the confidence of the translator is above 90 % # take the language from the translator if tr.confidence > 0.9 and tr.lang != lang: lang = tr.lang return lang