def eval_coco_online(): from googletrans import Translator from time import sleep # Read in the file file = open( '/home/jxgu/github/im2text_jxgu/pytorch/tmp/coco_test_5k_zh.txt', 'r') zh_lines = file.readlines() en_lines = [] count = 0 translator = Translator() for line in zh_lines: translation = translator.translate(line) #en_lines.append(translation.text.replace("there is", "")) en_lines.append(translation.text) count += 1 if count % 100 == 0: translator = Translator() print('... %d sentences prepared' % count) tmp_name = 'tmp/coco_test_5k_en_online' with open(tmp_name + '.txt', 'w') as file: for line in en_lines: file.write("%s\n" % line.encode("utf-8").lower()) ref_id_json = 'tmp/captions_val_image_info_karpathy_5k_test_11080899_results.json' utils.text2textid(tmp_name, ref_id_json) print('Calculating scores for the generated results ... ...') utils.test2cocojson(tmp_name + '_id') lang_stats = language_eval_json(tmp_name)
def eval_coco_online(mscoco_zh_text): from googletrans import Translator from time import sleep # Read in the file file = open(mscoco_zh_text, 'r') zh_lines = file.readlines() en_lines = [] count = 0 translator = Translator() for line in zh_lines: translation = translator.translate(line) #en_lines.append(translation.text.replace("there is", "")) en_lines.append(translation.text) count += 1 if count % 100 == 0: translator = Translator() print('... %d sentences prepared' % count) tmp_name = mscoco_zh_text.replace('zh', 'en').replace('.txt', 'offline') with open(tmp_name + '.txt', 'w') as file: for line in en_lines: file.write("%s\n" % line.encode("utf-8").lower()) print('Calculating scores for the generated results ... ...') utils.text2textid(tmp_name, mscoco_zh_text.replace('.txt', '.json')) utils.test2cocojson(tmp_name + '_id') lang_stats = language_eval('coco', json.load(open(tmp_name + '_id.json')), opt.id, '')
def eval_coco_offline(): #os.system("bash test.sh") from googletrans import Translator translator = Translator() root = os.getcwd() + '/' use_translation = False mscoco_src_text = '/home/jxgu/github/im2text_jxgu/pytorch/tmp/coco_test_5k_zh.txt' if os.path.exists(mscoco_src_text) is True and use_translation: #mscoco_zh_json = 'tmp/20180419-075726.denseatt_zh_mscoco.json' #utils.cocojson2text(mscoco_zh_json, mscoco_zh_json.replace('.json', '.txt')) nmt_model = root + "neural_machine_translation/save/20180308-091231/demo-model-0303-full_acc_54.75_ppl_9.10_e22.pt" print("Start translating chinese to english ...") bashCommand = "cd neural_machine_translation && python translate.py" + \ " -model " + nmt_model + \ " -src " + mscoco_src_text + \ " -output " + root + 'tmp/coco_test_5k_en.txt' \ " -verbose -gpu 0" _output = subprocess.check_output(['bash', '-c', bashCommand]) print("Finish translating chinese to english ...") self_bleu_flag = False en_lines = [] print('------------------------------------------- aic self-bleu') if self_bleu_flag: file = open('tmp/coco_test_5k_zh_en.txt', 'r') zh_lines = file.readlines() count = 0 for line in zh_lines: translation = line en_lines.append(translation.replace("there is", "")) count += 1 if count % 100 == 0: print('... %d sentences prepared' % count) reference = list() for line in en_lines: text = nltk.word_tokenize(line) reference.append(text) for i in range(7): print('Self BLEU: {}'.format(utils.self_bleu(reference, i))) else: tmp_name = 'tmp/coco_test_5k_en_offline' file = open('tmp/coco_test_5k_zh.en.txt', 'r') en_lines = file.readlines() with open(tmp_name + '.txt', 'w') as file: for line in en_lines: file.write("%s" % line.encode("utf-8").lower()) ref_id_json = 'tmp/captions_val_image_info_karpathy_5k_test_11080899_results.json' utils.text2textid(tmp_name, ref_id_json) print('Calculating scores for the generated results ... ...') utils.test2cocojson(tmp_name + '_id') lang_stats = language_eval_json(tmp_name) print('------------------------------------------- end')
def eval_coco_offline(): dump_json_path = i2t_eval() root = os.getcwd() + '/' use_translation = False mscoco_zh_json = dump_json_path mscoco_zh_text = dump_json_path.replace('.json', '.txt') utils.cocojson2text(mscoco_zh_json, mscoco_zh_json.replace('.json', '.txt')) nmt_model = root + "save/opennmt/aic_zh2en_nmt_part_acc_61.49_ppl_7.43_e22.pt" print("Start translating chinese to english ...") bashCommand = "cd ../OpenNMT-py && python translate.py" + \ " -model " + nmt_model + \ " -src " + root + '/' + mscoco_zh_text + \ " -output " + root + '/' + mscoco_zh_text.replace('zh', 'en') + \ " -verbose -gpu 0" _output = subprocess.check_output(['bash', '-c', bashCommand]) print("Finish translating chinese to english ...") self_bleu_flag = False en_lines = [] if self_bleu_flag: print('------------------------------------------- aic self-bleu') count = 0 file = open(mscoco_zh_text.replace('zh', 'en'), 'r') zh_lines = file.readlines() for line in zh_lines: translation = line en_lines.append(translation.replace("there is", "")) count += 1 if count % 100 == 0: print('... %d sentences prepared' % count) reference = list() for line in en_lines: text = nltk.word_tokenize(line) reference.append(text) for i in range(7): print('Self BLEU: {}'.format(utils.self_bleu(reference, i))) else: tmp_name = mscoco_zh_text.replace('zh', 'en').replace('.txt', 'offline') file = open(mscoco_zh_text.replace('zh', 'en'), 'r') en_lines = file.readlines() with open(tmp_name + '.txt', 'w') as file: for line in en_lines: file.write("%s" % line.encode("utf-8").lower()) utils.text2textid(tmp_name, dump_json_path) print('Calculating scores for the generated results ... ...') utils.test2cocojson(tmp_name + '_id') lang_stats = language_eval('coco', json.load(open(tmp_name + '_id.json')), opt.id, '')
def eval_30K(type, text_in): import time import re, string if type == 'online': from googletrans import Translator translator = Translator() # Read in the file file = open(text_in, 'r') zh_lines = file.readlines() en_lines = [] count = 0 for line in zh_lines: if type == 'online': translation = translator.translate(line) #en_lines.append(translation.text.replace("there is", "")) tmp = translation.text #tmp = re.sub('[%s]' % re.escape(string.punctuation), '', tmp) else: tmp = line.replace("there is", "") en_lines.append(tmp) count += 1 if count % 100 == 0: if type == 'online': translator = Translator() print('... %d sentences prepared' % count) tmp_name = 'tmp/flickr_test_1k_en_' + type with open(tmp_name + '.txt', 'w') as file: for line in en_lines: file.write("%s\n" % line.encode("utf-8").lower()) ref_id_json = 'tmp/captions_val_image_info_karpathy_1k_test_11080899_results.json' utils.text2textid(tmp_name, ref_id_json) print('Calculating scores for the generated results ... ...') utils.test2cocojson(tmp_name + '_id') lang_stats = language_eval_json_30K(tmp_name)