def test_bulk_translate(self): translator = Translator() assert translator.bulk_translate(['apple', 'banana'] * 10) == ['사과', '바나나'] * 10 # Test again with same Instance object assert translator.bulk_translate(['apple', 'banana'] * 10) == ['사과', '바나나'] * 10
"host": os.getenv("POSTGRES_HOST"), "database": os.getenv("POSTGRES_DATABASE"), "user": os.getenv("POSTGRES_USER"), "password": os.getenv("POSTGRES_PASSWORD") } async def connect_to_db(): """Create a pool to the postgres database using asyncpg""" return await asyncpg.create_pool(**postgres_options, command_timeout=60) # papago translation keys - not needed - https://developers.naver.com/docs/papago/ papago_client_id = os.getenv("PAPAGO_CLIENT_ID") papago_client_secret = os.getenv("PAPAGO_CLIENT_SECRET") translator = Translator() # LastFM last_fm_api_key = os.getenv("LAST_API_KEY") last_fm_shared_secret = os.getenv("LAST_SHARED_SECRET") last_fm_root_url = os.getenv("LAST_ROOT_URL") last_fm_headers = {'user-agent': os.getenv("LAST_USER_AGENT")} # Patreon patreon_link = os.getenv("PATREON_LINK") patreon_role_id = make_int(os.getenv("PATREON_ROLE_ID")) patreon_super_role_id = make_int(os.getenv("PATREON_SUPER_ROLE_ID")) # startup time startup_time = datetime.now()
''' 파파고를 이용한 파일 내용 자동번역 코드 made by 안산드레이아스 설치파일 pip install pypapago ''' from pypapago import Translator # 불러올 파일명 입력(동일 디렉터리 위치) fileName = './test.txt' # 객체 할당, 번역할 파일 열기 translator = Translator() with open(fileName, encoding='utf-8', errors='ignore') as f: # 줄을 읽어 저장, byte를 string으로 디코딩 line = f.readline() print(line) # 파일 내 텍스트 출력 print(type(line)) # 텍스트 타입 출력 # 번역할 문자를 입력 forTranslateString = line # 번역하기 (\n(엔터) 이나오면 오류 발생 문자열 처리 필요) english -> korean 옵션 result = translator.translate(forTranslateString, source='en', target='ko', verbose=False) # 결과 출력 print(result) # 번역된 텍스트 출력 with open("afterTranslate.txt", 'w') as f: # 번역 완료된 텍스트 저장 f.write(result)
def __init__(self): # 어떤 임베딩 모델을 불러올지에 따라서 다름 self.translator = Translator() self.bft = BertFineTuning()
class Text_preprocess: def __init__(self): # 어떤 임베딩 모델을 불러올지에 따라서 다름 self.translator = Translator() self.bft = BertFineTuning() def word_embbeding(self, text): return self.bft.word_embedding(text) def papago(self, text, source = "ko", target = "en"): try: result = self.translator.translate(text, source = source, target = target) except: result = "" return result def SVO_extractor(self, data): with StanfordOpenIE() as client: svo_pos = {'s_pos' : [], 'v_pos' : [], 'o_pos': [], 'label' : [], 'date' : []} for index, row in tqdm(data.iterrows()): try: for sentence in client.annotate(row['header']): svo_pos['s_pos'].append(sentence['subject']) svo_pos['v_pos'].append(sentence['relation']) svo_pos['o_pos'].append(sentence['object']) svo_pos['label'].append(1) svo_pos['date'].append(row['date']) except AttributeError: pass return svo_pos #괄호 지우기 + 필요한 한자 한글로 변환하기 def svo_embedding(self, svo_pos): for i in tqdm(range(len(svo_pos['s_pos']))): svo_pos['s_pos'][i] = self.word_embbeding(svo_pos['s_pos'][i]) svo_pos['v_pos'][i] = self.word_embbeding(svo_pos['v_pos'][i]) svo_pos['o_pos'][i] = self.word_embbeding(svo_pos['o_pos'][i]) return svo_pos def clean_text(self, text): patterns = [r"\([^<|>]*\)", r"\[[^<|>]*\]", r"\<[^<|>]*\>", r"[^\w\s]"] for p in patterns: try: text = re.sub(p, '',text) except TypeError: text = '' for hanza in replace_dict.keys(): if hanza in text: text = re.sub(hanza, replace_dict[hanza], text) return text.strip() #단어 지우기 def mkstopwords(self, stopword): stopwords = '' for text in stopword: stopwords += '|' + text return stopwords[1:] #총 길이가 j가 안되는 부분 전부 지우기 #남은 한자 있는 열 날리기 ( 무슨 의미인지 예측 불가이므로 그냥 날림 ) def mk_del_smallwords(self, data, j): data_index=[] for i in range(len(data)): if len(re.split(' ', str(data.iloc[i]['header']))) <= j: data_index.append(data.iloc[i].name) for i in range(len(data)): if re.search(r"[\u4e00-\u9fff]", str(data.iloc[i]['header'])): data_index.append(data.iloc[i].name) data = data.drop(data_index) return data def stop_words(self, data, stopword, i): stopwords = self.mkstopwords(stopword) data = data.loc[~data['header'].str.contains(stopwords, na=False)] data =self.mk_del_smallwords(data, i) return data
def test_verbose_request(self): translator = Translator() assert set( translator.translate('사과', verbose=True, source='ko', target='en').keys()) == self.VERBOSE_KEYS
def test_default_request(self): translator = Translator() assert translator.translate('Apple') == '사과'
def test_create_instance(self): sample_re_pattern = re.compile('w+') translator = Translator(headers={'test': 1234}, regex_pattern=sample_re_pattern) assert translator.headers == {'test': 1234} assert translator.regex_pattern == sample_re_pattern
from pypapago import Translator translator = Translator() result = translator.translate('I am GROOT') print(result) # 나는 그루트다 from pypapago import Translator translator = Translator() result = translator.translate( '카카오는 파파고를 좋아해', source='ko', target='en', ) print(result) # Kakao likes papago. #Code Desc #ko Korean #en English #ja Japanese #zh-CN Chinese #zh-TW Chinese traditional #es Spanish #fr French #vi Vietnamese #th Thai #id Indonesia
def event_handler(event_type, slack_event): if event_type == "app_mention": channel = slack_event["event"]["channel"] userMessage = slack_event["event"]["blocks"][0]['elements'][0][ 'elements'][1]['text'] attachments_dict = dict() attachments_dict['color'] = '#2398cf' if '공지' in userMessage: if '장학' in userMessage: req = requests.get(scholarshipNoticeUrl) noticeType = '장학공지' elif '학사' in userMessage: req = requests.get(bachelorNoticeUrl) noticeType = '학사공지' else: req = requests.get(generalNoticeUrl) noticeType = '일반공지' html = req.text soup = BeautifulSoup(html, 'html.parser') Notice = NoticeCrawler(soup, noticeType) attachments_dict = Notice.crawling().getAnswer() elif '학식' in userMessage: req = requests.get(cafeteriaUrl) html = req.text soup = BeautifulSoup(req.content.decode('euc-kr', 'replace'), 'html.parser') Cafeteria = CafeteriaCrawler(soup) attachments_dict['text'] = Cafeteria.crawling().getAnswer() elif '번역' in userMessage: pypapago = Translator() attachments_dict['pretext'] = '*[번역] 파파고는 말한다. *:penguin:' attachments_dict['text'] = '```' + pypapago.translate( userMessage[3:]) + '```' attachments_dict['mrkdwn_in'] = ["text", "pretext"] elif '버스' in userMessage or '대성' in userMessage: req = requests.get(businformationUrl) html = req.text soup = BeautifulSoup(html, 'html.parser') BusInfo = BusInformation(soup) attachments_dict = BusInfo.crawling().getAnswer() elif '안녕' in userMessage: attachments_dict[ 'text'] = '나는 *지마블루*:small_blue_diamond: 진리를 찾아 이곳까지 왔죠. \n시간이 얼마 남지 않았습니다. ~*이 활동이 저의 마지막이 될 것 입니다.*~' else: attachments_dict['text'] = '무슨 말인지 모르겠네요..' """ attachments_dict = dict() attachments_dict['pretext'] = "attachments 블록 전에 나타나는 text" attachments_dict['title'] = "다른 텍스트 보다 크고 볼드되어서 보이는 title" attachments_dict['title_link'] = "https://corikachu.github.io" attachments_dict['fallback'] = "클라이언트에서 노티피케이션에 보이는 텍스트 입니다. attachment 블록에는 나타나지 않습니다" attachments_dict['text'] = "본문 텍스트! 5줄이 넘어가면 *show more*로 보이게 됩니다.\n1\n2\n3\n4\n5\n6\n7\n8\n9\n1\n2\n3\n4\n5\n6\n7\n8\n9" attachments_dict['mrkdwn_in'] = ["text", "pretext"] # 마크다운을 적용시킬 인자들을 선택합니다. attachments = [attachments_dict] """ #slack.chat.post_message(channel="#channel", text=None, attachments=attachments, as_user=True) # attchment 상태에 따랄 보내는 코드로. slack.chat.post_message(channel, attachments=[attachments_dict], as_user=True) # Image attachments """ [ { "fallback": "Required plain-text summary of the attachment.", "text": "Optional text that appears within the attachment", "image_url": "https://mblogthumb-phinf.pstatic.net/MjAxNjEwMjJfNjAg/MDAxNDc3MTM5MDkzMTY5.nTQZS9VKPU3Y1P0J-nOcN4JMz75qU00n09XpQcGJZkAg.3fNACGwA3s_2TSRQxnY6sQDokClABM5fUumyIXAYdQUg.PNG.bugman1303/JW_T-Rex.png?type=w2" } ] """ return make_response( "앱 멘션 메시지가 보내졌습니다.", 200, ) message = "[%s] 이벤트 핸들러를 찾을 수 없습니다." % event_type return make_response(message, 200, {"X-Slack-No-Retry": 1})
decoder=decoder, optimizer=optimizer) ckpt_manager = tf.train.CheckpointManager(ckpt, CKPT_DIR, max_to_keep=5) start_epoch = 0 #assert ckpt_manager.latest_checkpoint != True start_epoch = int(ckpt_manager.latest_checkpoint.split('-')[-1]) # checkpoint_path에서 가장 최근의 checkpoint를 restore합니다. ckpt.restore(ckpt_manager.latest_checkpoint) image_path = INPUT_DIR + '/test.png' result, attention_plot = evaluate(image_path, max_length, attention_features_shape, encoder, decoder, image_features_extract_model, tokenizer) predicted_caption = ' '.join(result) predicted_caption = predicted_caption[:-5] translator = Translator() translated_caption = translator.translate(predicted_caption) print('Prediction Caption:', predicted_caption) print('Korean Caption : ', translated_caption) # 한국어 캡션 저장 f = open(OUTPUT_DIR + "/test.txt", mode='wt', encoding='utf-8') f.write(translated_caption) f.close() # Attention Plot plot_attention(image_path, result, attention_plot)