Beispiel #1
0
def process_data(source):
    data_builder.str_format_to_bert(source, args,
                                    '../bert_data_test/cnndm.test.0.bert.pt')
    args.bert_data_path = '../bert_data_test/cnndm'
    tgt, time_used = test_text_abs(args, device_id, cp, step, predictor)

    # some postprocessing

    sentences = tgt.split('<q>')
    sentences = [sent.capitalize() for sent in sentences]
    # sentences = '. '.join(sentences).rstrip()
    # sentences = sentences.replace(' ,', ',')
    # sentences = sentences + '.'
    return sentences
Beispiel #2
0
def translate():
    if request.method == 'POST':
        # user inputs
        req = request.json
        print(req)

        # api call
        res = {}
        if len(req['src']) > 0:
            print("request src " + str(req['src']))

            #globals: args, device_id, cp, step, predictor

            try:
                data_builder.str_format_to_bert(
                    req['src'], args, '../bert_data_test/cnndm.test.0.bert.pt')
                args.bert_data_path = '../bert_data_test/cnndm'
                tgt, time_used = test_text_abs(args, device_id, cp, step,
                                               predictor)

                # some postprocessing

                sentences = tgt.split('<q>')
                sentences = [sent.capitalize() for sent in sentences]
                sentences = '. '.join(sentences).rstrip()
                sentences = sentences.replace(' ,', ',')
                if sentences[-1] not in ['.', '?', '!']:
                    sentences = sentences + '.'
                res['tgt'] = sentences
                print("completed {} ".format(time_used))
            except:
                print("process error, return empty str")

        return jsonify(res)

    return render_template('index.html')
Beispiel #3
0
        elif (args.mode == 'oracle'):
            baseline(args, cal_oracle=True)
        if (args.mode == 'test'):
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
            except:
                step = 0
            test_abs(args, device_id, cp, step)
        elif (args.mode == 'test_text'):
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
            except:
                step = 0
                test_text_abs(args, device_id, cp, step)

    elif (args.task == 'ext'):
        if (args.mode == 'train'):
            train_ext(args, device_id)
        elif (args.mode == 'validate'):
            validate_ext(args, device_id)
        if (args.mode == 'test'):
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
            except:
                step = 0
            test_ext(args, device_id, cp, step)
        elif (args.mode == 'test_text'):
            cp = args.test_from
Beispiel #4
0
        elif (args.mode == 'oracle'):
            baseline(args, cal_oracle=True)
        if (args.mode == 'test'):
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
            except:
                step = 0
            test_abs(args, device_id, cp, step)
        elif (args.mode == 'test_text'):
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
            except:
                step = 0
            test_text_abs(args)

    elif (args.task == 'ext'):
        if (args.mode == 'train'):
            train_ext(args, device_id)
        elif (args.mode == 'validate'):
            validate_ext(args, device_id)
        if (args.mode == 'test'):
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
            except:
                step = 0
            test_ext(args, device_id, cp, step)
        elif (args.mode == 'test_text'):
            cp = args.test_from
Beispiel #5
0
                step = 0
            test_abs(args, device_id, cp, step, tokenizer)
        elif (args.mode == 'dev'):
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
            except:
                step = 0
            dev_abs(args, device_id, cp, step, tokenizer)
        elif (args.mode == 'test_text'):
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
            except:
                step = 0
                test_text_abs(args, device_id, cp, step, tokenizer)

    elif (args.task == 'ext'):
        if (args.mode == 'train'):
            train_ext(args, device_id)
        elif (args.mode == 'validate'):
            validate_ext(args, device_id)
        if (args.mode == 'test'):
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
            except:
                step = 0
            test_ext(args, device_id, cp, step)
        elif (args.mode == 'test_text'):
            cp = args.test_from
Beispiel #6
0
    args.gpu_ranks = [int(i) for i in range(len(args.visible_gpus.split(',')))]
    args.world_size = len(args.gpu_ranks)
    os.environ["CUDA_VISIBLE_DEVICES"] = args.visible_gpus

    init_logger(args.log_file)
    device = "cpu" if args.visible_gpus == '-1' else "cuda"
    device_id = 0 if device == "cuda" else -1

    return args, device_id


if __name__ == '__main__':
    args, device_id = init_args()
    print(args.task, args.mode)

    cp = args.test_from
    try:
        step = int(cp.split('.')[-2].split('_')[-1])
    except:
        step = 0

    predictor = load_models_abs(args, device_id, cp, step)

    with open('foo.txt') as f:
        source = f.read().rstrip()

    data_builder.str_format_to_bert(source, args,
                                    '../bert_data_test/cnndm.test.0.bert.pt')
    args.bert_data_path = '../bert_data_test/cnndm'
    test_text_abs(args, device_id, cp, step, predictor)
Beispiel #7
0
    cp = args.test_from
    try:
        step = int(cp.split('.')[-2].split('_')[-1])
    except:
        step = 0

    predictor = load_models_abs(args, device_id, cp, step)

    all_files = glob.glob(os.path.join('./bert_data/cnndm', '*'))
    print('Files In Input Dir: ' + str(len(all_files)))
    for file in all_files:
        with open(file) as f:
            source = f.read().rstrip()

        data_builder.str_format_to_bert(
            source, args, './bert_data_test/cnndm.test.0.bert.pt')
        args.bert_data_path = './bert_data_test/cnndm'
        tgt, time_used = test_text_abs(args, device_id, cp, step, predictor)

        # some postprocessing

        sentences = tgt.split('<q>')
        sentences = [sent.capitalize() for sent in sentences]
        sentences = '. '.join(sentences).rstrip()
        sentences = sentences.replace(' ,', ',')
        sentences = sentences + '.'

        print("summary [{}]".format(sentences))
        print(r.get_scores(sentences, source, avg=True))
        print("time used {}".format(time_used))
Beispiel #8
0
 def run(args, hpspace):
     if (args.task == 'abs'):
         if (args.mode == 'train'):
             if (args.hyperopt):
                 t = time.time()
                 newT = str(t).split(".")
                 args.model_path = "../models/" + newT[0]
                 args.log_file = "../logs/abs_bert_abs_" + newT[0]
                 args.lr_bert = hpspace['lr_bert']
                 args.lr_dec = hpspace['lr_dec']
                 args.accum_count = int(hpspace['accum_count'])
                 args.beta1 = hpspace['beta1']
                 args.beta2 = hpspace['beta2']
                 args.visible_gpus = '0'
                 args.bert_model = '..temp/bert-base-danish-uncased-v2'
                 args.vocab = '..temp/bert-base-danish-uncased-v2'
             train_stats = train_abs(args, device_id)
             x = train_stats.x
             ppl = train_stats.perplexity
             acc = train_stats.acc
             print(x)
             return {
                 'loss': x,
                 'eval_time': time.time(),
                 'status': STATUS_OK,
                 'other_stuff': {
                     'ppl': ppl,
                     'acc': acc
                 }
             }
         elif (args.mode == 'validate'):
             validate_abs(args, device_id)
         elif (args.mode == 'lead'):
             baseline(args, cal_lead=True)
         elif (args.mode == 'oracle'):
             baseline(args, cal_oracle=True)
         if (args.mode == 'test'):
             cp = args.test_from
             try:
                 step = int(cp.split('.')[-2].split('_')[-1])
             except:
                 step = 0
             test_abs(args, device_id, cp, step)
         elif (args.mode == 'test_text'):
             cp = args.test_from
             try:
                 step = int(cp.split('.')[-2].split('_')[-1])
             except:
                 step = 0
                 test_text_abs(args, device_id, cp, step)
     elif (args.task == 'ext'):
         if (args.mode == 'train'):
             train_ext(args, device_id)
         elif (args.mode == 'validate'):
             validate_ext(args, device_id)
         if (args.mode == 'test'):
             cp = args.test_from
             try:
                 step = int(cp.split('.')[-2].split('_')[-1])
             except:
                 step = 0
             test_ext(args, device_id, cp, step)
         elif (args.mode == 'test_text'):
             cp = args.test_from
             try:
                 step = int(cp.split('.')[-2].split('_')[-1])
             except:
                 step = 0
                 test_text_abs(args, device_id, cp, step)
     if (args.mode == "sent_label"):
         step = 0
         sent_label_ext(args, device_id)
Beispiel #9
0
    os.makedirs(args.model_path, exist_ok=True)
    os.makedirs(args.log_path, exist_ok=True)
    os.makedirs(args.result_path, exist_ok=True)

    # Train/valid/test
    if args.task == 'abs':
        if args.mode == 'train':
            init_logger(os.path.join(args.log_path, 'train.log'))
            train_abs(args, DEVICE_ID)
        elif args.mode == 'validate':
            init_logger(os.path.join(args.log_path, 'valid.log'))
            validate_abs(args, DEVICE_ID)
        elif args.mode == 'test':
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
            except:
                print("Not correct model name (EX: model_step_1200.pt)")
            init_logger(
                os.path.join(args.log_path, 'test.' + str(step) + '.log'))
            test_abs(args, DEVICE_ID, cp, step)
        elif args.mode == 'test_text':
            cp = args.test_from
            try:
                step = int(cp.split('.')[-2].split('_')[-1])
            except:
                print("Not correct model name (EX: model_step_1200.pt)")
            init_logger(
                os.path.join(args.log_path, 'test_text.' + str(step) + '.log'))
            test_text_abs(args, DEVICE_ID, cp, step)