def process_data(source): data_builder.str_format_to_bert(source, args, '../bert_data_test/cnndm.test.0.bert.pt') args.bert_data_path = '../bert_data_test/cnndm' tgt, time_used = test_text_abs(args, device_id, cp, step, predictor) # some postprocessing sentences = tgt.split('<q>') sentences = [sent.capitalize() for sent in sentences] # sentences = '. '.join(sentences).rstrip() # sentences = sentences.replace(' ,', ',') # sentences = sentences + '.' return sentences
def translate(): if request.method == 'POST': # user inputs req = request.json print(req) # api call res = {} if len(req['src']) > 0: print("request src " + str(req['src'])) #globals: args, device_id, cp, step, predictor try: data_builder.str_format_to_bert( req['src'], args, '../bert_data_test/cnndm.test.0.bert.pt') args.bert_data_path = '../bert_data_test/cnndm' tgt, time_used = test_text_abs(args, device_id, cp, step, predictor) # some postprocessing sentences = tgt.split('<q>') sentences = [sent.capitalize() for sent in sentences] sentences = '. '.join(sentences).rstrip() sentences = sentences.replace(' ,', ',') if sentences[-1] not in ['.', '?', '!']: sentences = sentences + '.' res['tgt'] = sentences print("completed {} ".format(time_used)) except: print("process error, return empty str") return jsonify(res) return render_template('index.html')
elif (args.mode == 'oracle'): baseline(args, cal_oracle=True) if (args.mode == 'test'): cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: step = 0 test_abs(args, device_id, cp, step) elif (args.mode == 'test_text'): cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: step = 0 test_text_abs(args, device_id, cp, step) elif (args.task == 'ext'): if (args.mode == 'train'): train_ext(args, device_id) elif (args.mode == 'validate'): validate_ext(args, device_id) if (args.mode == 'test'): cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: step = 0 test_ext(args, device_id, cp, step) elif (args.mode == 'test_text'): cp = args.test_from
elif (args.mode == 'oracle'): baseline(args, cal_oracle=True) if (args.mode == 'test'): cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: step = 0 test_abs(args, device_id, cp, step) elif (args.mode == 'test_text'): cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: step = 0 test_text_abs(args) elif (args.task == 'ext'): if (args.mode == 'train'): train_ext(args, device_id) elif (args.mode == 'validate'): validate_ext(args, device_id) if (args.mode == 'test'): cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: step = 0 test_ext(args, device_id, cp, step) elif (args.mode == 'test_text'): cp = args.test_from
step = 0 test_abs(args, device_id, cp, step, tokenizer) elif (args.mode == 'dev'): cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: step = 0 dev_abs(args, device_id, cp, step, tokenizer) elif (args.mode == 'test_text'): cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: step = 0 test_text_abs(args, device_id, cp, step, tokenizer) elif (args.task == 'ext'): if (args.mode == 'train'): train_ext(args, device_id) elif (args.mode == 'validate'): validate_ext(args, device_id) if (args.mode == 'test'): cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: step = 0 test_ext(args, device_id, cp, step) elif (args.mode == 'test_text'): cp = args.test_from
args.gpu_ranks = [int(i) for i in range(len(args.visible_gpus.split(',')))] args.world_size = len(args.gpu_ranks) os.environ["CUDA_VISIBLE_DEVICES"] = args.visible_gpus init_logger(args.log_file) device = "cpu" if args.visible_gpus == '-1' else "cuda" device_id = 0 if device == "cuda" else -1 return args, device_id if __name__ == '__main__': args, device_id = init_args() print(args.task, args.mode) cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: step = 0 predictor = load_models_abs(args, device_id, cp, step) with open('foo.txt') as f: source = f.read().rstrip() data_builder.str_format_to_bert(source, args, '../bert_data_test/cnndm.test.0.bert.pt') args.bert_data_path = '../bert_data_test/cnndm' test_text_abs(args, device_id, cp, step, predictor)
cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: step = 0 predictor = load_models_abs(args, device_id, cp, step) all_files = glob.glob(os.path.join('./bert_data/cnndm', '*')) print('Files In Input Dir: ' + str(len(all_files))) for file in all_files: with open(file) as f: source = f.read().rstrip() data_builder.str_format_to_bert( source, args, './bert_data_test/cnndm.test.0.bert.pt') args.bert_data_path = './bert_data_test/cnndm' tgt, time_used = test_text_abs(args, device_id, cp, step, predictor) # some postprocessing sentences = tgt.split('<q>') sentences = [sent.capitalize() for sent in sentences] sentences = '. '.join(sentences).rstrip() sentences = sentences.replace(' ,', ',') sentences = sentences + '.' print("summary [{}]".format(sentences)) print(r.get_scores(sentences, source, avg=True)) print("time used {}".format(time_used))
def run(args, hpspace): if (args.task == 'abs'): if (args.mode == 'train'): if (args.hyperopt): t = time.time() newT = str(t).split(".") args.model_path = "../models/" + newT[0] args.log_file = "../logs/abs_bert_abs_" + newT[0] args.lr_bert = hpspace['lr_bert'] args.lr_dec = hpspace['lr_dec'] args.accum_count = int(hpspace['accum_count']) args.beta1 = hpspace['beta1'] args.beta2 = hpspace['beta2'] args.visible_gpus = '0' args.bert_model = '..temp/bert-base-danish-uncased-v2' args.vocab = '..temp/bert-base-danish-uncased-v2' train_stats = train_abs(args, device_id) x = train_stats.x ppl = train_stats.perplexity acc = train_stats.acc print(x) return { 'loss': x, 'eval_time': time.time(), 'status': STATUS_OK, 'other_stuff': { 'ppl': ppl, 'acc': acc } } elif (args.mode == 'validate'): validate_abs(args, device_id) elif (args.mode == 'lead'): baseline(args, cal_lead=True) elif (args.mode == 'oracle'): baseline(args, cal_oracle=True) if (args.mode == 'test'): cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: step = 0 test_abs(args, device_id, cp, step) elif (args.mode == 'test_text'): cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: step = 0 test_text_abs(args, device_id, cp, step) elif (args.task == 'ext'): if (args.mode == 'train'): train_ext(args, device_id) elif (args.mode == 'validate'): validate_ext(args, device_id) if (args.mode == 'test'): cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: step = 0 test_ext(args, device_id, cp, step) elif (args.mode == 'test_text'): cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: step = 0 test_text_abs(args, device_id, cp, step) if (args.mode == "sent_label"): step = 0 sent_label_ext(args, device_id)
os.makedirs(args.model_path, exist_ok=True) os.makedirs(args.log_path, exist_ok=True) os.makedirs(args.result_path, exist_ok=True) # Train/valid/test if args.task == 'abs': if args.mode == 'train': init_logger(os.path.join(args.log_path, 'train.log')) train_abs(args, DEVICE_ID) elif args.mode == 'validate': init_logger(os.path.join(args.log_path, 'valid.log')) validate_abs(args, DEVICE_ID) elif args.mode == 'test': cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: print("Not correct model name (EX: model_step_1200.pt)") init_logger( os.path.join(args.log_path, 'test.' + str(step) + '.log')) test_abs(args, DEVICE_ID, cp, step) elif args.mode == 'test_text': cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: print("Not correct model name (EX: model_step_1200.pt)") init_logger( os.path.join(args.log_path, 'test_text.' + str(step) + '.log')) test_text_abs(args, DEVICE_ID, cp, step)