def save(args): """Save inference model.""" gpu_id = 0 place = fluid.CUDAPlace(gpu_id) task = tasks.create_task(args) model = models.create_model(args, place) model.save_inference_model(args.inference_model_path) return
def evaluate(args): """Evaluation main function.""" if args.is_distributed: dev_count = fluid.core.get_cuda_device_count() gpu_id = int(os.getenv("FLAGS_selected_gpus")) phase = "distributed_test" else: dev_count = 1 gpu_id = 0 phase = "test" place = fluid.CUDAPlace(gpu_id) # setup task and model task = tasks.create_task(args) model = models.create_model(args, place) # setup dataset eval_generator = task.get_data_loader( model, input_file=args.eval_file, num_part=model.topo.data_info.size, part_id=model.topo.data_info.rank, phase=phase ) if model.topo.pp_info.size != 1: raise ValueError("Cannot support pipeline in evaluation now!") if model.topo.world.size > dev_count: raise ValueError("Cannot support evaluation on multiple nodes now!") evaluate_dataset( task, model, eval_generator, args, dev_count, gpu_id, training_step=0, tag="test" ) return
def infer(args): """Inference main function.""" if args.is_distributed: dev_count = fluid.core.get_cuda_device_count() gpu_id = int(os.getenv("FLAGS_selected_gpus")) phase = "distributed_test" else: dev_count = 1 gpu_id = 0 phase = "test" place = fluid.CUDAPlace(gpu_id) task = tasks.create_task(args) model = models.create_model(args, place) # setup dataset infer_generator = task.get_data_loader(model, input_file=args.infer_file, num_part=model.topo.data_info.size, part_id=model.topo.data_info.rank, phase=phase, is_infer=True) if model.topo.pp_info.size != 1: raise ValueError("Cannot support pipeline in inference now!") if model.topo.sharding_info.size != 1: raise ValueError("Cannot support sharding in inference now!") if model.topo.world.size > dev_count: raise ValueError("Cannot support evaluation on multiple nodes now!") # run inference timer = Timer() timer.start() infer_out = {} step = 0 # fix no input data case. for step, data in enumerate(infer_generator(), 1): predictions = task.infer_step(model, data) for pred in predictions: infer_out[pred["data_id"]] = pred if step % args.log_steps == 0: time_cost = timer.pass_time print(f"\tstep: {step}, time: {time_cost:.3f}, " f"queue size: {infer_generator.queue.size()}, " f"speed: {step / time_cost:.3f} steps/s") time_cost = timer.pass_time print(f"[infer] steps: {step} time cost: {time_cost}, " f"speed: {step / time_cost} steps/s") if args.is_distributed: # merge inference outputs in distributed mode. part_file = os.path.join(args.save_path, f"inference_output.part_{gpu_id}") with open(part_file, "w") as fp: json.dump(infer_out, fp, ensure_ascii=False, indent=2) part_finish_file = os.path.join( args.save_path, f"inference_output.part_{gpu_id}.finish") with open(part_finish_file, "w"): pass # Only run on master GPU in each node if gpu_id != 0: return if args.is_distributed: part_files = f"inference_output.part_*.finish" while True: ret = subprocess.getoutput( f"find {args.save_path} -maxdepth 1 -name {part_files}") num_completed = len(ret.split("\n")) if num_completed != dev_count: time.sleep(1) continue infer_out = {} for dev_id in range(dev_count): part_file = os.path.join(args.save_path, f"inference_output.part_{dev_id}") with open(part_file, "r") as fp: part_infer_out = json.load(fp) for data_id in part_infer_out: infer_out[data_id] = part_infer_out[data_id] break subprocess.getoutput( "rm " + os.path.join(args.save_path, f"inference_output.part*")) # save inference outputs inference_output = os.path.join(args.save_path, "inference_output.txt") with open(inference_output, "w") as f: for data_id in sorted(infer_out.keys(), key=lambda x: int(x)): f.write("\t".join( map(str, [ infer_out[data_id][name] for name in args.output_name.split(",") ])) + "\n") print(f"save inference result into: {inference_output}") return
def infer(args): """Main inference function.""" place = fluid.CUDAPlace(0) task = DialogGeneration(args) model = models.create_model(args, place) task.debug() empty_ds_seq = "<ds/> " + " ".join(flatten_ds({})) + " </ds>" post_process = PostProcess(args.db_file, normalization=args.normalization, db_guidance=args.db_guidance) # record original order and init status output_order = [] # {"dial_id": {"prev_ds": "", "turns": [], "cur_turn_idx": 0}} dial_status = defaultdict(dict) with open(args.infer_file, "r") as fin: next(fin) for line in fin: dial_id, turn_idx, utt = line.strip().split("\t") output_order.append(f"{dial_id}-{turn_idx}") if dial_id not in dial_status: dial_status[dial_id]["prev_ds"] = empty_ds_seq dial_status[dial_id]["turns"] = [] dial_status[dial_id]["cur_turn_idx"] = 0 dial_status[dial_id]["turns"].append({ "utts": utt, "turn_idx": turn_idx }) dial_ids = sorted(list(dial_status.keys())) # batch inference outputs = {} timer = Timer() batch_idx = 0 while len(dial_ids) > 0: logger.info(f"Batch index: {batch_idx}") batch_idx += 1 timer.start() cur_dial_ids = dial_ids[:args.dial_batch_size] cur_inputs = {} for cur_dial_id in cur_dial_ids: cur_dial_turn = dial_status[cur_dial_id]["turns"][ dial_status[cur_dial_id]["cur_turn_idx"]] cur_utt = cur_dial_turn["utts"] prev_ds = dial_status[cur_dial_id]["prev_ds"] src = f"{cur_utt} [SEP] {prev_ds}\x010" cur_inputs[f"{cur_dial_id}-{cur_dial_turn['turn_idx']}"] = src cur_outputs = generate(cur_inputs, model, task) time_cost_infer = timer.pass_time logger.debug(f"Time cost (prediction): {time_cost_infer}") # post process cur_outputs_postprocess = {} for dial_turn_tag, pred_ds in cur_outputs.items(): dial_id, _ = dial_turn_tag.split("-") cur_dial_turn = dial_status[dial_id]["turns"][dial_status[dial_id] ["cur_turn_idx"]] cur_utt_ls = cur_dial_turn["utts"].split("[SEP]") postprocessed_pred_ds = post_process.run( pred_ds, prev_ds=dial_status[dial_id]["prev_ds"], utt_list=cur_utt_ls) cur_outputs_postprocess[dial_turn_tag] = postprocessed_pred_ds outputs.update(cur_outputs_postprocess) time_cost_postprocess = timer.pass_time - time_cost_infer logger.debug(f"Time cost (postprocess): {time_cost_postprocess}") # update `cur_turn_idx` and `prev_ds` for dial_turn_tag in cur_outputs: dial_id, _ = dial_turn_tag.split("-") dial_status[dial_id]["cur_turn_idx"] += 1 if dial_status[dial_id]["cur_turn_idx"] >= len( dial_status[dial_id]["turns"]): dial_ids.remove(dial_id) else: dial_status[dial_id]["prev_ds"] = outputs[dial_turn_tag] timer.reset() # reorder and output sample_indices = [] with open(args.session_to_sample_mapping_file, "r") as fin: for line in fin: line = line.strip() if line: sample_indices.append(int(line)) pred_seqs = [outputs[dial_turn_tag] for dial_turn_tag in output_order] pred_sample_labels = [None] * len(pred_seqs) for pred_ds_seq, sample_idx in zip(pred_seqs, sample_indices): pred_ds_dict = parse_ds(pred_ds_seq, date_prefix="$") pred_sample_labels[sample_idx] = pred_ds_dict out_seq_file = os.path.join(args.save_path, "inference_output.txt") out_sample_label_file = os.path.join(args.save_path, "inference_labels.json") with open(out_seq_file, "w") as fout_seq, open(out_sample_label_file, "w") as fout_label: fout_seq.write("\n".join(pred_seqs)) json.dump(pred_sample_labels, fout_label, indent=2) logger.info(f"Save inference sequences to `{out_seq_file}`") logger.info(f"Save inference sample labels to `{out_sample_label_file}`")
def train(args): """The main function of training.""" if args.is_distributed: dev_count = fluid.core.get_cuda_device_count() gpu_id = int(os.getenv("FLAGS_selected_gpus")) else: dev_count = 1 gpu_id = 0 place = fluid.CUDAPlace(gpu_id) # setup task and model task = tasks.create_task(args) model = models.create_model(args, place) global need_save need_save = model.topo.dp_info.rank == 0 # setup datasets train_generator = task.get_data_loader( model, input_file=args.train_file, num_epochs=args.num_epochs, num_part=model.topo.data_info.size, part_id=model.topo.data_info.rank, phase="train" ) if model.topo.pp_info.size == 1: assert model.topo.mp_info.size <= dev_count and dev_count % model.topo.mp_info.size == 0 valid_num_part = dev_count // model.topo.mp_info.size valid_part_id = gpu_id // model.topo.mp_info.size else: raise ValueError("Cannot support pipeline in training now!") print("# part in validation:", valid_num_part) print("part id in validation:", valid_part_id) valid_tags = [] valid_generators = [] for valid_file in args.valid_file.split(","): if ":" in valid_file: valid_tag, valid_file = valid_file.split(":") else: valid_tag = "valid" valid_tags.append(valid_tag) valid_generators.append(task.get_data_loader( model, input_file=valid_file, num_part=valid_num_part, part_id=valid_part_id, phase="distributed_valid" if args.is_distributed else "valid" )) # maintain best metric (init) best_metric = -1e10 if args.eval_metric.startswith("-"): scale = -1.0 eval_metric = args.eval_metric[1:] else: scale = 1.0 eval_metric = args.eval_metric # start training timer = Timer() timer.start() print("Training is start.") for step, data in enumerate(train_generator(), args.start_step + 1): outputs = task.train_step(model, data) timer.pause() if step % args.log_steps == 0: time_cost = timer.pass_time current_epoch, current_file_index, total_file = task.reader.get_train_progress() current_lr = outputs.pop('scheduled_lr') print(f"[train][{current_epoch}] progress: {current_file_index}/{total_file} " f"step: {step}, time: {time_cost:.3f}, " f"queue size: {train_generator.queue.size()}, " f"speed: {args.log_steps / time_cost:.3f} steps/s") print(f"\tcurrent lr: {current_lr:.7f}") metrics = task.get_metrics(outputs) print("\t" + ", ".join(f"{k}: {v:.4f}" for k, v in metrics.items())) timer.reset() if step % args.validation_steps == 0: for valid_tag, valid_generator in zip(valid_tags, valid_generators): eval_metrics = evaluate(task, model, valid_generator, args, dev_count, gpu_id, step, tag=valid_tag) if valid_tag == "valid": valid_metrics = eval_metrics # save lastest model if args.save_steps <= 0: save_model(model, args.save_path, "lastest", dev_count, gpu_id, args) # maintain best metric (update) if valid_metrics[eval_metric] * scale > best_metric: best_metric = valid_metrics[eval_metric] * scale print(f"Get better valid metric: {eval_metric} = {valid_metrics[eval_metric]}") # save best model (with best evaluation metric) save_model(model, args.save_path, "best", dev_count, gpu_id, args) if args.save_steps > 0 and step % args.save_steps == 0: save_model(model, args.save_path, f"step_{step}", dev_count, gpu_id, args) timer.start() print("Training is completed.") return
def infer(args): """Inference main function.""" if args.is_distributed: fleet.init(is_collective=True) dev_count = fluid.core.get_cuda_device_count() gpu_id = int(os.getenv("FLAGS_selected_gpus")) trainers_num = fleet.worker_num() trainer_id = fleet.worker_index() phase = "distributed_test" else: dev_count = 1 gpu_id = 0 trainers_num = 1 trainer_id = 0 phase = "test" place = fluid.CUDAPlace(gpu_id) task = tasks.create_task(args) model = models.create_model(args, place) infer_generator = task.get_data_loader(model, input_file=args.infer_file, num_part=trainers_num, part_id=trainer_id, phase=phase, is_infer=True) # run inference timer = Timer() timer.start() infer_out = {} step = 0 for step, data in enumerate(infer_generator(), 1): predictions = task.infer_step(model, data) for pred in predictions: infer_out[pred["data_id"]] = pred if step % args.log_steps == 0: time_cost = timer.pass_time print(f"\tstep: {step}, time: {time_cost:.3f}, " f"queue size: {infer_generator.queue.size()}, " f"speed: {step / time_cost:.3f} steps/s") time_cost = timer.pass_time print(f"[infer] steps: {step} time cost: {time_cost}, " f"speed: {step / time_cost} steps/s") if args.is_distributed: # merge inference outputs in distributed mode. part_file = os.path.join(args.save_path, f"inference_output.part_{gpu_id}") with open(part_file, "w") as fp: json.dump(infer_out, fp, ensure_ascii=False, indent=2) part_finish_file = os.path.join( args.save_path, f"inference_output.part_{gpu_id}.finish") with open(part_finish_file, "w"): pass # Only run on master GPU in each node if gpu_id != 0: return if args.is_distributed: part_files = f"inference_output.part_*.finish" while True: ret = subprocess.getoutput( f"find {args.save_path} -maxdepth 1 -name {part_files}") num_completed = len(ret.split("\n")) if num_completed != dev_count: time.sleep(1) continue infer_out = {} for dev_id in range(dev_count): part_file = os.path.join(args.save_path, f"inference_output.part_{dev_id}") with open(part_file, "r") as fp: part_infer_out = json.load(fp) for data_id in part_infer_out: infer_out[data_id] = part_infer_out[data_id] break subprocess.getoutput( "rm " + os.path.join(args.save_path, f"inference_output.part*")) # save inference outputs inference_output = os.path.join(args.save_path, args.save_name) save_array = [] for i in range(len(infer_out)): save_array.append(infer_out[str(i)]["emb"]) np_array = np.array(save_array) np.save(inference_output, np_array) return
def interact(args): """Interaction main function.""" if args.is_distributed: dev_count = fluid.core.get_cuda_device_count() gpu_id = int(os.getenv("FLAGS_selected_gpus")) else: dev_count = 1 gpu_id = 0 place = fluid.CUDAPlace(gpu_id) task = DialogGeneration(args) model = models.create_model(args, place) if model.topo.pp_info.size != 1: raise ValueError("Cannot support pipeline in inference now!") if model.topo.sharding_info.size != 1: raise ValueError("Cannot support sharding in inference now!") if model.topo.world.size > dev_count: raise ValueError("Cannot support evaluation on multiple nodes now!") if args.is_distributed: if gpu_id > 0: Example = namedtuple("Example", ["src", "data_id"]) context = [] with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: host, port = "127.0.0.1", args.port + gpu_id s.bind((host, port)) s.listen() while True: conn, addr = s.accept() with conn: data = conn.recv(1024) if data.decode("utf8") == "[EXIT]": break example = Example(src=data.decode("utf8"), data_id=0) task.reader.features[0] = example try: record = task.reader._convert_example_to_record( example, is_infer=True) except ValueError as e: print(f"[FATAL] {e}") raise e data = task.reader._pad_batch_records([record], is_infer=True) pred = task.infer_step(model, data)[0] bot_response = pred["response"] context.append(bot_response) return else: def send_request(dst_id, src): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: host, port = "127.0.0.1", args.port + dst_id s.connect((host, port)) data = src.encode("utf8") s.sendall(data) Example = namedtuple("Example", ["src", "data_id"]) context = [] start_info = "Enter [EXIT] to quit the interaction, [NEXT] to start a new conversation." cprint(start_info, "yellow", attrs=["bold"]) while True: if args.is_distributed: print(colored("[Human]:", "red", attrs=["bold"])) user_utt = input().strip() else: user_utt = input(colored("[Human]: ", "red", attrs=["bold"])).strip() if user_utt == "[EXIT]": if args.is_distributed: threads = [] for i in range(1, dev_count): thread = threading.Thread(target=send_request, args=(i, "[EXIT]")) thread.start() threads.append(thread) break elif user_utt == "[NEXT]": context = [] cprint(start_info, "yellow", attrs=["bold"]) else: context.append(user_utt) src = " [SEP] ".join(context) if args.is_distributed: threads = [] for i in range(1, dev_count): thread = threading.Thread(target=send_request, args=(i, src)) thread.start() threads.append(thread) example = Example(src=src, data_id=0) task.reader.features[0] = example try: record = task.reader._convert_example_to_record(example, is_infer=True) except ValueError as e: print(f"[FATAL] {e}") raise e data = task.reader._pad_batch_records([record], is_infer=True) pred = task.infer_step(model, data)[0] bot_response = pred["response"] if args.is_distributed: print(colored("[Bot]:", "blue", attrs=["bold"])) print(colored(bot_response, attrs=["bold"])) else: print(colored("[Bot]:", "blue", attrs=["bold"]), colored(bot_response, attrs=["bold"])) context.append(bot_response) if args.is_distributed: for thread in threads: thread.join() return
def infer_dst(args): """Inference main function.""" if args.is_distributed: fleet.init(is_collective=True) dev_count = fluid.core.get_cuda_device_count() gpu_id = int(os.getenv("FLAGS_selected_gpus")) trainers_num = fleet.worker_num() trainer_id = fleet.worker_index() phase = "distributed_test" else: dev_count = 1 gpu_id = 0 trainers_num = 1 trainer_id = 0 phase = "test" place = fluid.CUDAPlace(gpu_id) task = tasks.create_task(args) model = models.create_model(args, place) # task.debug() schema = get_schema(args.dataset) empty_ds_seq = "<ds/> " + " ".join(flatten_ds({}, schema)) + " </ds>" # record original order and init status output_order = [] # {"dial_id": {"prev_ds": "", "turns": [{"utts": utts, "turn_idx": turn_idx}], "cur_idx": 0}} dial_status = defaultdict(dict) with open(args.infer_file, "r") as fin: next(fin) for line in fin: dial_id, turn_idx, utts = line.strip().split("\t") output_order.append(f"{dial_id}-{turn_idx}") if dial_id not in dial_status: dial_status[dial_id]["prev_ds"] = empty_ds_seq dial_status[dial_id]["turns"] = [] dial_status[dial_id]["cur_idx"] = 0 dial_status[dial_id]["turns"].append({ "utts": utts, "turn_idx": turn_idx }) dial_ids = list(dial_status.keys()) # batch inference outputs = {} timer = Timer() while len(dial_ids) > 0: timer.start() cur_dial_ids = dial_ids[:args.dial_batch_size] logger.info(f"Sampled dialogue ids: {cur_dial_ids}") # 1st: basic generation basic_inputs = {} for cur_dial_id in cur_dial_ids: cur_idx = dial_status[cur_dial_id]["cur_idx"] cur_dial_turn = dial_status[cur_dial_id]["turns"][cur_idx] cur_utts = cur_dial_turn["utts"] prev_ds = dial_status[cur_dial_id]["prev_ds"] src = f"<gen/> {cur_utts} [SEP] {prev_ds} </gen>\x010" basic_inputs[f"{cur_dial_id}-{cur_dial_turn['turn_idx']}"] = src basic_outputs = generate(basic_inputs, model, task) # 2nd: amending generation amending_inputs = {} for cur_dial_id in cur_dial_ids: cur_idx = dial_status[cur_dial_id]["cur_idx"] cur_dial_turn = dial_status[cur_dial_id]["turns"][cur_idx] cur_utts = cur_dial_turn["utts"] basic_ds = basic_outputs[ f"{cur_dial_id}-{cur_dial_turn['turn_idx']}"] src = f"<amend/> {cur_utts} [SEP] {basic_ds} </amend>\x010" amending_inputs[f"{cur_dial_id}-{cur_dial_turn['turn_idx']}"] = src amending_outputs = generate(amending_inputs, model, task) outputs.update(amending_outputs) time_cost_infer = timer.pass_time logger.info(f"Time cost: {time_cost_infer}") # debug info for dial_turn_tag in basic_inputs: logger.debug(f"[basic input]: {basic_inputs[dial_turn_tag]}") logger.debug(f"[basic output]: {basic_outputs[dial_turn_tag]}") logger.debug(f"[amending input]: {amending_inputs[dial_turn_tag]}") logger.debug( f"[amending output]: {amending_outputs[dial_turn_tag]}") # update dial_status for dial_turn_tag in amending_outputs: dial_id, _ = dial_turn_tag.split("-") dial_status[dial_id]["cur_idx"] += 1 if dial_status[dial_id]["cur_idx"] >= len( dial_status[dial_id]["turns"]): dial_ids.remove(dial_id) else: dial_status[dial_id]["prev_ds"] = outputs[dial_turn_tag] timer.reset() # reorder and output if gpu_id == 0: pred_seqs = [] pred_labels = [] for dial_turn_tag in output_order: pred_seqs.append(outputs[dial_turn_tag]) pred_label = parse_ds(outputs[dial_turn_tag], schema) pred_labels.append(pred_label) out_seq_file = os.path.join(args.save_path, "inference_output.txt") out_label_file = os.path.join(args.save_path, "inference_labels.json") with open(out_seq_file, "w") as fout_seq, open(out_label_file, "w") as fout_label: fout_seq.write("\n".join(pred_seqs)) json.dump(pred_labels, fout_label, indent=2) logger.info(f"Save inference sequences to `{out_seq_file}`") logger.info(f"Save inference labels to `{out_label_file}`")