def eval_coco_online():

    from googletrans import Translator
    from time import sleep

    # Read in the file
    file = open(
        '/home/jxgu/github/im2text_jxgu/pytorch/tmp/coco_test_5k_zh.txt', 'r')
    zh_lines = file.readlines()
    en_lines = []
    count = 0
    translator = Translator()
    for line in zh_lines:
        translation = translator.translate(line)
        #en_lines.append(translation.text.replace("there is", ""))
        en_lines.append(translation.text)
        count += 1
        if count % 100 == 0:
            translator = Translator()
            print('... %d sentences prepared' % count)

    tmp_name = 'tmp/coco_test_5k_en_online'
    with open(tmp_name + '.txt', 'w') as file:
        for line in en_lines:
            file.write("%s\n" % line.encode("utf-8").lower())

    ref_id_json = 'tmp/captions_val_image_info_karpathy_5k_test_11080899_results.json'
    utils.text2textid(tmp_name, ref_id_json)
    print('Calculating scores for the generated results ... ...')
    utils.test2cocojson(tmp_name + '_id')
    lang_stats = language_eval_json(tmp_name)
예제 #2
0
def eval_coco_online(mscoco_zh_text):
    from googletrans import Translator
    from time import sleep
    # Read in the file
    file = open(mscoco_zh_text, 'r')
    zh_lines = file.readlines()
    en_lines = []
    count = 0
    translator = Translator()
    for line in zh_lines:
        translation = translator.translate(line)
        #en_lines.append(translation.text.replace("there is", ""))
        en_lines.append(translation.text)
        count += 1
        if count % 100 == 0:
            translator = Translator()
            print('... %d sentences prepared' % count)

    tmp_name = mscoco_zh_text.replace('zh', 'en').replace('.txt', 'offline')
    with open(tmp_name + '.txt', 'w') as file:
        for line in en_lines:
            file.write("%s\n" % line.encode("utf-8").lower())
    print('Calculating scores for the generated results ... ...')
    utils.text2textid(tmp_name, mscoco_zh_text.replace('.txt', '.json'))
    utils.test2cocojson(tmp_name + '_id')
    lang_stats = language_eval('coco', json.load(open(tmp_name + '_id.json')),
                               opt.id, '')
def eval_coco_offline():
    #os.system("bash test.sh")
    from googletrans import Translator
    translator = Translator()
    root = os.getcwd() + '/'
    use_translation = False
    mscoco_src_text = '/home/jxgu/github/im2text_jxgu/pytorch/tmp/coco_test_5k_zh.txt'
    if os.path.exists(mscoco_src_text) is True and use_translation:
        #mscoco_zh_json = 'tmp/20180419-075726.denseatt_zh_mscoco.json'
        #utils.cocojson2text(mscoco_zh_json, mscoco_zh_json.replace('.json', '.txt'))
        nmt_model = root + "neural_machine_translation/save/20180308-091231/demo-model-0303-full_acc_54.75_ppl_9.10_e22.pt"
        print("Start translating chinese to english ...")
        bashCommand = "cd neural_machine_translation && python translate.py" + \
                      " -model " + nmt_model + \
                      " -src " + mscoco_src_text + \
                      " -output " + root + 'tmp/coco_test_5k_en.txt' \
                      " -verbose -gpu 0"
        _output = subprocess.check_output(['bash', '-c', bashCommand])
        print("Finish translating chinese to english ...")

    self_bleu_flag = False
    en_lines = []
    print('------------------------------------------- aic self-bleu')
    if self_bleu_flag:
        file = open('tmp/coco_test_5k_zh_en.txt', 'r')
        zh_lines = file.readlines()
        count = 0
        for line in zh_lines:
            translation = line
            en_lines.append(translation.replace("there is", ""))
            count += 1
            if count % 100 == 0:
                print('... %d sentences prepared' % count)
        reference = list()
        for line in en_lines:
            text = nltk.word_tokenize(line)
            reference.append(text)
        for i in range(7):
            print('Self BLEU: {}'.format(utils.self_bleu(reference, i)))
    else:
        tmp_name = 'tmp/coco_test_5k_en_offline'
        file = open('tmp/coco_test_5k_zh.en.txt', 'r')
        en_lines = file.readlines()
        with open(tmp_name + '.txt', 'w') as file:
            for line in en_lines:
                file.write("%s" % line.encode("utf-8").lower())
        ref_id_json = 'tmp/captions_val_image_info_karpathy_5k_test_11080899_results.json'
        utils.text2textid(tmp_name, ref_id_json)
        print('Calculating scores for the generated results ... ...')
        utils.test2cocojson(tmp_name + '_id')
        lang_stats = language_eval_json(tmp_name)
    print('------------------------------------------- end')
예제 #4
0
def eval_coco_offline():
    dump_json_path = i2t_eval()
    root = os.getcwd() + '/'
    use_translation = False
    mscoco_zh_json = dump_json_path
    mscoco_zh_text = dump_json_path.replace('.json', '.txt')
    utils.cocojson2text(mscoco_zh_json,
                        mscoco_zh_json.replace('.json', '.txt'))
    nmt_model = root + "save/opennmt/aic_zh2en_nmt_part_acc_61.49_ppl_7.43_e22.pt"
    print("Start translating chinese to english ...")
    bashCommand = "cd ../OpenNMT-py && python translate.py" + \
                  " -model " + nmt_model + \
                  " -src " + root + '/' + mscoco_zh_text + \
                  " -output " + root + '/' + mscoco_zh_text.replace('zh', 'en') + \
                  " -verbose -gpu 0"
    _output = subprocess.check_output(['bash', '-c', bashCommand])
    print("Finish translating chinese to english ...")

    self_bleu_flag = False
    en_lines = []
    if self_bleu_flag:
        print('------------------------------------------- aic self-bleu')
        count = 0
        file = open(mscoco_zh_text.replace('zh', 'en'), 'r')
        zh_lines = file.readlines()
        for line in zh_lines:
            translation = line
            en_lines.append(translation.replace("there is", ""))
            count += 1
            if count % 100 == 0:
                print('... %d sentences prepared' % count)
        reference = list()
        for line in en_lines:
            text = nltk.word_tokenize(line)
            reference.append(text)
        for i in range(7):
            print('Self BLEU: {}'.format(utils.self_bleu(reference, i)))
    else:
        tmp_name = mscoco_zh_text.replace('zh',
                                          'en').replace('.txt', 'offline')
        file = open(mscoco_zh_text.replace('zh', 'en'), 'r')
        en_lines = file.readlines()
        with open(tmp_name + '.txt', 'w') as file:
            for line in en_lines:
                file.write("%s" % line.encode("utf-8").lower())
        utils.text2textid(tmp_name, dump_json_path)
        print('Calculating scores for the generated results ... ...')
        utils.test2cocojson(tmp_name + '_id')
        lang_stats = language_eval('coco',
                                   json.load(open(tmp_name + '_id.json')),
                                   opt.id, '')
def eval_30K(type, text_in):
    import time
    import re, string
    if type == 'online':
        from googletrans import Translator
        translator = Translator()

    # Read in the file
    file = open(text_in, 'r')
    zh_lines = file.readlines()
    en_lines = []
    count = 0
    for line in zh_lines:
        if type == 'online':
            translation = translator.translate(line)
            #en_lines.append(translation.text.replace("there is", ""))
            tmp = translation.text
            #tmp = re.sub('[%s]' % re.escape(string.punctuation), '', tmp)
        else:
            tmp = line.replace("there is", "")
        en_lines.append(tmp)
        count += 1
        if count % 100 == 0:
            if type == 'online':
                translator = Translator()
            print('... %d sentences prepared' % count)

    tmp_name = 'tmp/flickr_test_1k_en_' + type
    with open(tmp_name + '.txt', 'w') as file:
        for line in en_lines:
            file.write("%s\n" % line.encode("utf-8").lower())

    ref_id_json = 'tmp/captions_val_image_info_karpathy_1k_test_11080899_results.json'
    utils.text2textid(tmp_name, ref_id_json)
    print('Calculating scores for the generated results ... ...')
    utils.test2cocojson(tmp_name + '_id')
    lang_stats = language_eval_json_30K(tmp_name)