def process_df(source, target, text, with_styles=True): import sagas rs = [] options = {'get_pronounce'} # options.add('get_pronounce') res, t = translate(text, source=source, target=target, trans_verbose=False, options=options) # print(res, text, t[ips_idx]) print('✁', '%s(%s %s)' % (text, res, ''.join(t.pronounce))) for sent in text.split(' '): res, t = translate(sent, source=source, target=target, trans_verbose=False, options=options) # print(res, sent, t[ips_idx]) print('%s(%s%s)' % (sent, res, marks(t)), end=" ") rs.append((sent, res, marks(t, False))) print('.') if with_styles: return sagas.to_df( rs, ['word', 'trans', 'ips']).style.set_table_styles(styles) else: return sagas.to_df(rs, ['word', 'trans', 'ips'])
def proc_word(self, type_name: Text, word: Text, head: Text, index: int, r, lang: Text) -> Dict[Text, Any]: from sagas.tool.misc import translit_chunk, display_synsets, target_lang from sagas.nlu.translator import translate res, _ = translate(word, source=lang, target=target_lang(lang), trans_verbose=False) # result=f"[{type_name}]({word}{translit_chunk(word, lang)}) {res}{target}" result = { 'type': type_name, 'text': word, 'lemma': r['lemma'], 'translit': translit_chunk(word, lang), 'translate': res, 'lang': lang, 'index': index, } if head != '': res_t, _ = translate(head, source=lang, target=target_lang(lang), trans_verbose=False, options={'disable_correct'}) # target=f" ⊙︿⊙ {res_t}({head})" result['head'] = head result['head_trans'] = res_t # tc.emp('magenta', result) return result
def deconstructing(self, text, target='ar'): """ $ python -m sagas.ar.arabic_processor deconstructing 'I am a student' $ python -m sagas.ar.arabic_processor deconstructing 'I am a student' de $ python -m sagas.ar.arabic_processor deconstructing 'I am a student' fr $ python -m sagas.ar.arabic_processor deconstructing 'I am a student' es $ python -m sagas.ar.arabic_processor deconstructing 'I am a student' vi ## other langs: ru, ja, zh :param text: :param target: :return: """ import sagas source = 'en' options = {'get_pronounce', 'get_translations'} res, t = translate(text, source=source, target=target, trans_verbose=False, options=options) print('✁', '%s(%s %s)' % (text, res, ''.join(t.pronounce))) for sent in text.split(' '): res, t = translate(sent, source=source, target=target, trans_verbose=False, options=options) # print('%s(%s%s)' % (sent, res, marks_th(t.pronounce)), end=" ") print('%s(%s%s)' % (sent, res, marks_th(t.pronounce))) sagas.print_df(t.translations) print('.')
def process(self, source, target, text, ips_idx=1): """ from sagas.ko.korea_processor import KoreaProcessor kp=KoreaProcessor() kp.process('ko', 'en', '나는 냉장고가 두 개있다.', 0) see also: procs-ko-tr.ipynb :param source: :param target: :param text: :param ips_idx: :return: """ verbose = False options = {'get_pronounce'} # options.add('get_pronounce') res, t = translate(text, source=source, target=target, trans_verbose=verbose, options=options) # print(res, text, t[ips_idx]) print('✁', '%s(%s %s)' % (text, res, t.pronounce[ips_idx])) for sent in text.split(' '): res, t = translate(sent, source=source, target=target, trans_verbose=verbose, options=options) # print(res, sent, t[ips_idx]) print('%s(%s,%s)' % (sent, res, t.pronounce[ips_idx][1:]), end=" ") print('.')
def process_en_th(text, target='th'): """ process_en_th('I have two refrigerators') process_en_th('I have two refrigerators', 'ar') process_en_th('We are going to the park to play', 'ar') # hebrew process_en_vi('I have two refrigerators', 'iw') process_en_th('I have two refrigerators', 'hi') process_en_th('I am a student', 'hi') # greek language process_en_th('I am a student', 'el') process_en_th('I have two refrigerators', 'el') process_en_th('I have two refrigerators', 'ko') :param text: :param target: :return: """ source = 'en' options = {'get_pronounce'} res, t = translate(text, source=source, target=target, trans_verbose=False, options=options) print('✁', '%s(%s %s)' % (text, res, ''.join(t.pronounce))) for sent in text.split(' '): res, t = translate(sent, source=source, target=target, trans_verbose=False, options=options) print('%s(%s%s)' % (sent, res, marks_th(t.pronounce)), end=" ") print('.')
def trans_google(self, ctx: TransContext): from tqdm import tqdm from sagas.nlu.translator import translate import time import random source, targets, text, says = ctx.pars() # for target in tqdm(targets.split(';')): print('.. translate to', ctx.target_list) for target in tqdm(ctx.target_list): # default options options = set(cf.ensure('translator_opts')) # options.add('disable_correct') if says == target: options.add('get_pronounce') if says == source and target == 'en': options.add('get_pronounce') trans, tracker = translate(text, source=source, target=target, options=options) count = 0 while trans == '': print('wait a second, try again ...') sleep(1) trans, tracker = translate(text, source=source, target=target, options=options) count = count + 1 if count > self.retries: break if trans != '': # result=text+'\n\t* '+trans+'\n' # line='[%s] '%target[:2]+trans line = '%s="%s"' % (target[:2], trans) ctx.target_sents.append(line) # ctx.target_sents.extend(tracker.pronounce) for i, p in enumerate(tracker.pronounce): ps = p[2:] ctx.target_sents.append(f'v{i}="{ps}"') ctx.sents_map[target[:2]] = trans # print('☌'+line) else: print( 'translate fail, the clipboard content has not been changed.' ) # will exit return False # time.sleep(random.uniform(0.05, 0.20)) return True
def translate(self, text, target='zh-CN', source='auto', verbose=False): """ $ python -m sagas.nlu.translator_cli translate 'Садись, где хочешь.' $ python -m sagas.nlu.translator_cli translate 'Садись, где хочешь.' en $ python -m sagas.nlu.translator_cli translate 'Садись, где хочешь.' en ru # multi-sentences $ python -m sagas.nlu.translator_cli translate 'Что в этом конверте? Письмо и фотографии.' ja auto True $ python -m sagas.nlu.translator_cli translate 'Что в этом конверте? Письмо и фотографии.' en auto True $ python -m sagas.nlu.translator_cli translate 'I am a student.' ar en True $ python -m sagas.nlu.translator_cli translate 'I have two refrigerators' th en True $ python -m sagas.nlu.translator_cli translate 'I have two refrigerators' iw en True $ python -m sagas.nlu.translator_cli translate '次の信号を右に曲がってください。' zh ja True # word translations $ python -m sagas.nlu.translator_cli translate 'city' ar en True $ python -m sagas.nlu.translator_cli translate 'tiger' lo en True $ python -m sagas.nlu.translator_cli translate 'गतिविधि' en hi True $ python -m sagas.nlu.translator_cli translate 'fly' en no True :param text: :return: """ res,_ = translate(text, source=source, target=target, trans_verbose=verbose, # options={'disable_correct'}, options={'disable_correct', 'disable_cache'} ) print(res)
def trans_subs(self, kind: Text, word: Text, lang: Text, pos: Text) -> bool: from sagas.nlu.translator import translate, with_words, WordsObserver ws = get_word_sets(word, lang, pos) # ensure synsets existence if ws: return predicate(kind, word, lang, pos) word = word.split('/')[self.get_opt( 'trans_idx', -1)] # text or lemma, default is lemma r, t = translate(word, source=lang, target='en', options={'get_pronounce'}, tracker=with_words()) logger.debug(f"translate {word}: {r}") if not r: return self.check_subs(kind, word, lang, pos) word_r = r.lower() candidates = t.observer(WordsObserver).get_axis(word_r, pos) result = predicate(kind, candidates, 'en', pos) if result: self.add_subs(word, word_r, candidates) return result
def proc_children_column(self, df, lang: Text) -> List[InferPart]: from sagas.nlu.translator import translate from sagas.tool.misc import translit_chunk, display_synsets, target_lang result = [] # for id, (name, r) in enumerate(zip(partcol, textcol)): rels = [] for id, row in df.iterrows(): # df['rel'], df['children'], df['index'] name, r, idx = row['rel'], row['children'], row['index'] if name in rels: continue else: rels.append(name) if name not in ('punct', 'head_root'): sent = join_text(r, lang) res, _ = translate(sent, source=lang, target=target_lang(lang), trans_verbose=False, options={'disable_correct'}) # chunk=f"{indent}[{name}]({sent}{translit_chunk(sent, lang)}) {res}" chunk = InferPart( name=name, chunk=sent, text=row['text'], lemma=row['lemma'], translit=translit_chunk(sent, lang), translate=res, index=idx, ) result.append(chunk) # tc.emp('cyan', chunk) return result
def trans_to(self, sents, targets): from sagas.nlu.translator import translate rs = [] for target in targets: r, _ = translate(sents, source=self.lang, target=target) # print(r) rs.append(r) return rs
def analyse_en(self, sents): """ $ python -m sagas.ar.arabic_processor analyse_en 'I am a student' :param sents: :return: """ r, _ = translate(sents, source='en', target='ar') print('✔', r) self.analyse(r)
def process_zh_vi(text, target='vi'): """ process_zh_vi('我是一个学生') process_zh_vi('我是一个学生', 'th') :param text: :param target: :return: """ import jieba source = 'zh-CN' res, t = translate(text, source=source, target=target, trans_verbose=False) print('✁', '%s(%s %s)' % (text, res, ''.join(t.pronounce))) for sent in jieba.cut(text): res, t = translate(sent, source=source, target=target, trans_verbose=False) print('%s(%s%s)' % (sent, res, marks(t.pronounce)), end=" ") print('.')
def process(source, target, text): options = {'get_pronounce'} # options.add('get_pronounce') res, t = translate(text, source=source, target=target, trans_verbose=False, options=options) # print(res, text, t[ips_idx]) print('✁', '%s(%s %s)' % (text, res, ''.join(t.pronounce))) for sent in text.split(' '): res, t = translate(sent, source=source, target=target, trans_verbose=False, options=options) # print(res, sent, t[ips_idx]) print('%s(%s%s)' % (sent, res, marks(t.pronounce)), end=" ") print('.')
def trans(w, pos: Text): r, t = translate(w, source='hi', target='en', options={'get_pronounce'}, tracker=with_words()) # df=t.observer(WordsObserver).word_trans_df # if df is not None: # candidates=[w for w in df['word']][:3] # else: # candidates=[] obs: WordsObserver = t.observer(WordsObserver) candidates = obs.get_candidates(pos) return {'word': obs.get_axis(r.lower(), pos), 'candidates': candidates}
def process_en_vi(text, target='vi'): """ process_en_vi('I am a student') # indonesian process_en_vi('I am a student', 'id') process_en_vi('I am a student', 'ar') # turkish process_en_vi('I am a student', 'tr') process_en_vi('I am a student', 'th') :param text: :param target: :return: """ source = 'en' res, t = translate(text, source=source, target=target, trans_verbose=False) print('✁', '%s(%s %s)' % (text, res, ''.join(t.pronounce))) for sent in text.split(' '): res, t = translate(sent, source=source, target=target, trans_verbose=False) print('%s(%s%s)' % (sent, res, marks(t.pronounce)), end=" ") print('.')
def trans_en(self, text, target='zh-CN'): """ $ python -m sagas.nlu.translator_cli trans_en 'I have two refrigerators' es $ python -m sagas.nlu.translator_cli trans_en 'I have two refrigerators' he :param text: :param target: :return: """ import sagas sagas.nlu.google_translator.logger.setLevel(logging.DEBUG) res, _ = translate(text, source='en', target=target, trans_verbose=True, options={'disable_correct'} ) print(res)
def analyse(self, sents_zh): """ analyse('现在整个世界都变成了一个村庄。') $ python -m sagas.ko.korea_processor analyse '现在整个世界都变成了一个村庄。' $ ko 美丽的花开得很好。 $ ko 姐姐买来了新衣服。 :param sents_zh: :return: """ # translate from chinese to korea sents_ko, _ = translate(sents_zh, source='zh-CN', target='ko') print('✔', sents_ko) self.process('ko', 'zh-CN', sents_ko) # NluTools().say(sents_ko, 'ko') self.say_in(sents_ko, 'ko_KR', 200)
def get_word_trans(self, word, lang, pos='*'): import sagas from sagas.nlu.translator import translate, with_words, WordsObserver r, t = translate(word, source=lang, target='en', options={'get_pronounce'}, tracker=with_words()) if r: word_r = r.lower() tc.emp('cyan', f"1. translate: {word_r}") obs: WordsObserver = t.observer(WordsObserver) dfs = obs.trans_dfs if dfs: tc.emp('cyan', f"2. candidates: {obs.get_axis(word_r, pos)}") for k, df in dfs.items(): print(f"- pos:{k} -") sagas.print_df(df) else: tc.emp('cyan', f"2. no candidates.") return word_r return ''
def contrast(self, text, source, target='en', word_map=None): """ $ nlu contrast '저는 허락을 못 받아서 안 왔어요.' ko :param text: :param source: :param target: :return: """ from sagas.nlu.translator import get_word_map from sagas.nlu.translator import translate from sagas.tool.misc import color_print r, tracker = translate(text, source=source, target=target, options={'get_pronounce'}) tc.info(r) for i, p in enumerate(tracker.pronounce): ps = p[2:] tc.info(f'v{i}="{ps}"') rs, trans_table=get_word_map(source, target, text, words=word_map, local_translit=True if source in contrast_translit_langs else False) for i, (k, r) in enumerate(rs.items()): tc.info(f"{i} - ", r.replace('\n', ' ')) color_print('cyan', ' '.join(trans_table))
def tra(self, sents): from sagas.nlu.translator import translate r, _ = translate(sents, source='en', target=self.lang) print(r) return r