def main(args): paddle.seed(12345) # load config config = load_yaml(args.config_yaml) config["config_abs_dir"] = args.abs_dir # load static model class static_model_class = load_static_model_class(config) input_data = static_model_class.create_feeds(is_infer=True) input_data_names = [data.name for data in input_data] fetch_vars = static_model_class.infer_net(input_data) logger.info("cpu_num: {}".format(os.getenv("CPU_NUM"))) use_gpu = config.get("runner.use_gpu", True) use_auc = config.get("runner.use_auc", False) test_data_dir = config.get("runner.test_data_dir", None) print_interval = config.get("runner.print_interval", None) model_load_path = config.get("runner.infer_load_path", "model_output") start_epoch = config.get("runner.infer_start_epoch", 0) end_epoch = config.get("runner.infer_end_epoch", 10) batch_size = config.get("runner.infer_batch_size", None) os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1)) logger.info("**************common.configs**********") logger.info( "use_gpu: {}, test_data_dir: {}, start_epoch: {}, end_epoch: {}, print_interval: {}, model_load_path: {}" .format(use_gpu, test_data_dir, start_epoch, end_epoch, print_interval, model_load_path)) logger.info("**************common.configs**********") place = paddle.set_device('gpu' if use_gpu else 'cpu') exe = paddle.static.Executor(place) # initialize exe.run(paddle.static.default_startup_program()) test_dataloader = create_data_loader(config=config, place=place, mode="test") for epoch_id in range(start_epoch, end_epoch): logger.info("load model epoch {}".format(epoch_id)) model_path = os.path.join(model_load_path, str(epoch_id)) load_static_model(paddle.static.default_main_program(), model_path, prefix='rec_static') runner_results = [] epoch_begin = time.time() interval_begin = time.time() if use_auc: reset_auc() for batch_id, batch_data in enumerate(test_dataloader()): batch_runner_result = {} fetch_batch_var = exe.run( program=paddle.static.default_main_program(), feed=dict(zip(input_data_names, batch_data)), fetch_list=[var for _, var in fetch_vars.items()]) for var_idx, var_name in enumerate(fetch_vars): batch_runner_result[var_name] = np.array( fetch_batch_var[var_idx]).tolist() runner_results.append(batch_runner_result) if batch_id % print_interval == 0: metric_str = "" for var_idx, var_name in enumerate(fetch_vars): metric_str += "{}: {}, ".format( var_name, fetch_batch_var[var_idx][0]) logger.info( "epoch: {}, batch_id: {}, ".format(epoch_id, batch_id) + metric_str + "speed: {:.2f} ins/s".format(print_interval * batch_size / (time.time() - interval_begin))) interval_begin = time.time() reader_start = time.time() metric_str = "" for var_idx, var_name in enumerate(fetch_vars): metric_str += "{}: {}, ".format(var_name, fetch_batch_var[var_idx][0]) logger.info("epoch: {} done, ".format(epoch_id) + metric_str + "epoch time: {:.2f} s".format(time.time() - epoch_begin)) runner_result_save_path = config.get("runner.runner_result_dump_path", None) if runner_result_save_path: logging.info( "Dump runner result in {}".format(runner_result_save_path)) with open(runner_result_save_path, 'w+') as fout: json.dump(runner_results, fout)
def main(args): paddle.seed(12345) # load config config = load_yaml(args.config_yaml) config["config_abs_dir"] = args.abs_dir # load static model class static_model_class = load_static_model_class(config) input_data = static_model_class.create_feeds(is_infer=True) input_data_names = [data.name for data in input_data] fetch_vars = static_model_class.infer_net(input_data) logger.info("cpu_num: {}".format(os.getenv("CPU_NUM"))) use_gpu = config.get("runner.use_gpu", True) use_auc = config.get("runner.use_auc", False) use_visual = config.get("runner.use_visual", False) auc_num = config.get("runner.auc_num", 1) test_data_dir = config.get("runner.test_data_dir", None) print_interval = config.get("runner.print_interval", None) model_load_path = config.get("runner.infer_load_path", "model_output") start_epoch = config.get("runner.infer_start_epoch", 0) end_epoch = config.get("runner.infer_end_epoch", 10) batch_size = config.get("runner.infer_batch_size", None) os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1)) logger.info("**************common.configs**********") logger.info( "use_gpu: {}, use_visual: {}, test_data_dir: {}, start_epoch: {}, end_epoch: {}, print_interval: {}, model_load_path: {}". format(use_gpu, use_visual, test_data_dir, start_epoch, end_epoch, print_interval, model_load_path)) logger.info("**************common.configs**********") place = paddle.set_device('gpu' if use_gpu else 'cpu') exe = paddle.static.Executor(place) # initialize exe.run(paddle.static.default_startup_program()) test_dataloader = create_data_loader( config=config, place=place, mode="test") # Create a log_visual object and store the data in the path if use_visual: from visualdl import LogWriter log_visual = LogWriter(args.abs_dir + "/visualDL_log/infer") step_num = 0 for epoch_id in range(start_epoch, end_epoch): logger.info("load model epoch {}".format(epoch_id)) model_path = os.path.join(model_load_path, str(epoch_id)) load_static_model( paddle.static.default_main_program(), model_path, prefix='rec_static') epoch_begin = time.time() interval_begin = time.time() if use_auc: reset_auc(auc_num) for batch_id, batch_data in enumerate(test_dataloader()): fetch_batch_var = exe.run( program=paddle.static.default_main_program(), feed=dict(zip(input_data_names, batch_data)), fetch_list=[var for _, var in fetch_vars.items()]) if batch_id % print_interval == 0: metric_str = "" for var_idx, var_name in enumerate(fetch_vars): metric_str += "{}: {}, ".format( var_name, fetch_batch_var[var_idx][0]) if use_visual: log_visual.add_scalar( tag="infer/" + var_name, step=step_num, value=fetch_batch_var[var_idx][0]) logger.info("epoch: {}, batch_id: {}, ".format( epoch_id, batch_id) + metric_str + "speed: {:.2f} ins/s". format(print_interval * batch_size / (time.time( ) - interval_begin))) interval_begin = time.time() reader_start = time.time() step_num = step_num + 1 metric_str = "" for var_idx, var_name in enumerate(fetch_vars): metric_str += "{}: {}, ".format(var_name, fetch_batch_var[var_idx][0]) logger.info("epoch: {} done, ".format(epoch_id) + metric_str + "epoch time: {:.2f} s".format(time.time() - epoch_begin))
def main(args): paddle.seed(12345) # load config config = load_yaml(args.config_yaml) config["config_abs_dir"] = args.abs_dir # modify config from command if args.opt: for parameter in args.opt: parameter = parameter.strip() key, value = parameter.split("=") if type(config.get(key)) is int: value = int(value) if type(config.get(key)) is float: value = float(value) if type(config.get(key)) is bool: value = (True if value.lower() == "true" else False) config[key] = value # load static model class static_model_class = load_static_model_class(config) input_data = static_model_class.create_feeds(is_infer=True) input_data_names = [data.name for data in input_data] fetch_vars = static_model_class.infer_net(input_data) logger.info("cpu_num: {}".format(os.getenv("CPU_NUM"))) use_gpu = config.get("runner.use_gpu", True) use_xpu = config.get("runner.use_xpu", False) use_auc = config.get("runner.use_auc", False) use_visual = config.get("runner.use_visual", False) auc_num = config.get("runner.auc_num", 1) test_data_dir = config.get("runner.test_data_dir", None) print_interval = config.get("runner.print_interval", None) model_load_path = config.get("runner.infer_load_path", "model_output") start_epoch = config.get("runner.infer_start_epoch", 0) end_epoch = config.get("runner.infer_end_epoch", 10) batch_size = config.get("runner.infer_batch_size", None) use_save_data = config.get("runner.use_save_data", False) reader_type = config.get("runner.reader_type", "DataLoader") use_fleet = config.get("runner.use_fleet", False) os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1)) logger.info("**************common.configs**********") logger.info( "use_gpu: {}, use_xpu: {}, use_visual: {}, infer_batch_size: {}, test_data_dir: {}, start_epoch: {}, end_epoch: {}, print_interval: {}, model_load_path: {}" .format(use_gpu, use_xpu, use_visual, batch_size, test_data_dir, start_epoch, end_epoch, print_interval, model_load_path)) logger.info("**************common.configs**********") if use_xpu: xpu_device = 'xpu:{0}'.format(os.getenv('FLAGS_selected_xpus', 0)) place = paddle.set_device(xpu_device) else: place = paddle.set_device('gpu' if use_gpu else 'cpu') exe = paddle.static.Executor(place) # initialize exe.run(paddle.static.default_startup_program()) if reader_type == 'DataLoader': test_dataloader = create_data_loader(config=config, place=place, mode="test") elif reader_type == "CustomizeDataLoader": test_dataloader = static_model_class.create_data_loader() # Create a log_visual object and store the data in the path if use_visual: from visualdl import LogWriter log_visual = LogWriter(args.abs_dir + "/visualDL_log/infer") step_num = 0 for epoch_id in range(start_epoch, end_epoch): logger.info("load model epoch {}".format(epoch_id)) model_path = os.path.join(model_load_path, str(epoch_id)) load_static_model(paddle.static.default_main_program(), model_path, prefix='rec_static') epoch_begin = time.time() interval_begin = time.time() infer_reader_cost = 0.0 infer_run_cost = 0.0 reader_start = time.time() if use_auc: reset_auc(use_fleet, auc_num) #we will drop the last incomplete batch when dataset size is not divisible by the batch size assert any( test_dataloader() ), "test_dataloader's size is null, please ensure batch size < dataset size!" for batch_id, batch_data in enumerate(test_dataloader()): infer_reader_cost += time.time() - reader_start infer_start = time.time() fetch_batch_var = exe.run( program=paddle.static.default_main_program(), feed=dict(zip(input_data_names, batch_data)), fetch_list=[var for _, var in fetch_vars.items()]) infer_run_cost += time.time() - infer_start if batch_id % print_interval == 0: metric_str = "" for var_idx, var_name in enumerate(fetch_vars): metric_str += "{}: {}, ".format( var_name, fetch_batch_var[var_idx][0]) if use_visual: log_visual.add_scalar( tag="infer/" + var_name, step=step_num, value=fetch_batch_var[var_idx][0]) logger.info( "epoch: {}, batch_id: {}, ".format(epoch_id, batch_id) + metric_str + "avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.2f} ins/s" .format( infer_reader_cost / print_interval, (infer_reader_cost + infer_run_cost) / print_interval, batch_size, print_interval * batch_size / (time.time() - interval_begin))) interval_begin = time.time() infer_reader_cost = 0.0 infer_run_cost = 0.0 reader_start = time.time() step_num = step_num + 1 metric_str = "" for var_idx, var_name in enumerate(fetch_vars): metric_str += "{}: {}, ".format(var_name, fetch_batch_var[var_idx][0]) logger.info("epoch: {} done, ".format(epoch_id) + metric_str + "epoch time: {:.2f} s".format(time.time() - epoch_begin)) if use_save_data: save_data(fetch_batch_var, model_load_path)
def main(args): paddle.seed(12345) # load config config = load_yaml(args.config_yaml) config["config_abs_dir"] = args.abs_dir # load static model class static_model_class = load_static_model_class(config) input_data = static_model_class.create_feeds(is_infer=True) input_data_names = [data.name for data in input_data] fetch_vars = static_model_class.infer_net(input_data) logger.info("cpu_num: {}".format(os.getenv("CPU_NUM"))) use_gpu = config.get("runner.use_gpu", True) use_auc = config.get("runner.use_auc", False) auc_num = config.get("runner.auc_num", 1) test_data_dir = config.get("runner.test_data_dir", None) print_interval = config.get("runner.print_interval", None) model_load_path = config.get("runner.infer_load_path", "model_output") start_epoch = config.get("runner.infer_start_epoch", 0) end_epoch = config.get("runner.infer_end_epoch", 10) batch_size = config.get("runner.infer_batch_size", None) os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1)) logger.info("**************common.configs**********") logger.info( "use_gpu: {}, test_data_dir: {}, start_epoch: {}, end_epoch: {}, print_interval: {}, model_load_path: {}" .format(use_gpu, test_data_dir, start_epoch, end_epoch, print_interval, model_load_path)) logger.info("**************common.configs**********") place = paddle.set_device('gpu' if use_gpu else 'cpu') exe = paddle.static.Executor(place) # initialize exe.run(paddle.static.default_startup_program()) test_dataloader = create_data_loader(config=config, place=place, mode="test") for epoch_id in range(start_epoch, end_epoch): logger.info("load model epoch {}".format(epoch_id)) model_path = os.path.join(model_load_path, str(epoch_id)) load_static_model(paddle.static.default_main_program(), model_path, prefix='rec_static') epoch_begin = time.time() interval_begin = time.time() b = paddle.static.global_scope().find_var("item_emb").get_tensor() b = np.array(b) import faiss if use_gpu: res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = 0 faiss_index = faiss.GpuIndexFlatIP(res, b.shape[-1], flat_config) faiss_index.add(b) else: faiss_index = faiss.IndexFlatIP(b.shape[-1]) faiss_index.add(b) total = 1 total_recall = 0.0 total_ndcg = 0.0 total_hitrate = 0 for batch_id, batch_data in enumerate(test_dataloader()): fetch_batch_var = exe.run( program=paddle.static.default_main_program(), feed=dict(zip(input_data_names, batch_data[:2])), fetch_list=[var for _, var in fetch_vars.items()]) user_embs = fetch_batch_var[0] target_items = np.squeeze(np.array(batch_data[-1]), axis=1) if len(user_embs.shape) == 2: D, I = faiss_index.search(user_embs, args.top_n) for i, iid_list in enumerate(target_items): recall = 0 dcg = 0.0 item_list = set(I[i]) iid_list = list(filter(lambda x: x != 0, list(iid_list))) for no, iid in enumerate(iid_list): if iid in item_list: recall += 1 dcg += 1.0 / math.log(no + 2, 2) idcg = 0.0 for no in range(recall): idcg += 1.0 / math.log(no + 2, 2) total_recall += recall * 1.0 / len(iid_list) if recall > 0: total_ndcg += dcg / idcg total_hitrate += 1 else: ni = user_embs.shape[1] user_embs = np.reshape(user_embs, [-1, user_embs.shape[-1]]) D, I = faiss_index.search(user_embs, args.top_n) for i, iid_list in enumerate(target_items): recall = 0 dcg = 0.0 item_list_set = set() item_list = list( zip(np.reshape(I[i * ni:(i + 1) * ni], -1), np.reshape(D[i * ni:(i + 1) * ni], -1))) item_list.sort(key=lambda x: x[1], reverse=True) for j in range(len(item_list)): if item_list[j][0] not in item_list_set and item_list[ j][0] != 0: item_list_set.add(item_list[j][0]) if len(item_list_set) >= args.top_n: break iid_list = list(filter(lambda x: x != 0, list(iid_list))) for no, iid in enumerate(iid_list): if iid == 0: break if iid in item_list_set: recall += 1 dcg += 1.0 / math.log(no + 2, 2) idcg = 0.0 for no in range(recall): idcg += 1.0 / math.log(no + 2, 2) total_recall += recall * 1.0 / len(iid_list) if recall > 0: total_ndcg += dcg / idcg total_hitrate += 1 total += target_items.shape[0] if batch_id % print_interval == 0: recall = total_recall / total ndcg = total_ndcg / total hitrate = total_hitrate * 1.0 / total metric_str = "" metric_str += "recall@%d: %.5f, " % (args.top_n, recall) metric_str += "ndcg@%d: %.5f, " % (args.top_n, ndcg) metric_str += "hitrate@%d: %.5f, " % (args.top_n, hitrate) logger.info( "epoch: {}, batch_id: {}, ".format(epoch_id, batch_id) + metric_str + "speed: {:.2f} ins/s".format(print_interval * batch_size / (time.time() - interval_begin))) recall = total_recall / total ndcg = total_ndcg / total hitrate = total_hitrate * 1.0 / total metric_str = "" metric_str += "recall@%d: %.5f, " % (args.top_n, recall) metric_str += "ndcg@%d: %.5f, " % (args.top_n, ndcg) metric_str += "hitrate@%d: %.5f, " % (args.top_n, hitrate) logger.info("epoch: {} done, ".format(epoch_id) + metric_str + "epoch time: {:.2f} s".format(time.time() - epoch_begin))
def main(args): paddle.seed(12345) # load config config = load_yaml(args.config_yaml) config["config_abs_dir"] = args.abs_dir # load static model class static_model_class = load_static_model_class(config) input_data = static_model_class.create_feeds(is_infer=True) input_data_names = [data.name for data in input_data] fetch_vars = static_model_class.infer_net(input_data) logger.info("cpu_num: {}".format(os.getenv("CPU_NUM"))) use_gpu = config.get("runner.use_gpu", True) use_auc = config.get("runner.use_auc", False) test_data_dir = config.get("runner.test_data_dir", None) print_interval = config.get("runner.print_interval", None) model_load_path = config.get("runner.infer_load_path", "model_output") start_epoch = config.get("runner.infer_start_epoch", 0) end_epoch = config.get("runner.infer_end_epoch", 10) batch_size = config.get("runner.infer_batch_size", None) sparse_feature_number = config.get( "hyper_parameters.sparse_feature_number") os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1)) logger.info("**************common.configs**********") logger.info( "use_gpu: {}, test_data_dir: {}, start_epoch: {}, end_epoch: {}, print_interval: {}, model_load_path: {}" .format(use_gpu, test_data_dir, start_epoch, end_epoch, print_interval, model_load_path)) logger.info("**************common.configs**********") place = paddle.set_device('gpu' if use_gpu else 'cpu') exe = paddle.static.Executor(place) # initialize exe.run(paddle.static.default_startup_program()) test_dataloader = create_data_loader(config=config, place=place, mode="test") for epoch_id in range(start_epoch, end_epoch): logger.info("load model epoch {}".format(epoch_id)) model_path = os.path.join(model_load_path, str(epoch_id)) load_static_model(paddle.static.default_main_program(), model_path, prefix='rec_static') accum_num_sum = 0 accum_num = 0 epoch_begin = time.time() interval_begin = time.time() for batch_id, batch_data in enumerate(test_dataloader()): #print(np.array(batch_data[0])) ##b_size = len([dat[0] for dat in batch_data]) #print(b_size) #wa = np.array([dat[0] for dat in batch_data]).astype( # "int64").reshape(b_size) #wb = np.array([dat[1] for dat in batch_data]).astype( # "int64").reshape(b_size) #wc = np.array([dat[2] for dat in batch_data]).astype( # "int64").reshape(b_size) fetch_batch_var = exe.run( program=paddle.static.default_main_program(), feed={ "analogy_a": np.array(batch_data[0]), "analogy_b": np.array(batch_data[1]), "analogy_c": np.array(batch_data[2]), "all_label": np.arange(sparse_feature_number).reshape( sparse_feature_number).astype("int64") }, fetch_list=[var for _, var in fetch_vars.items()]) pre = np.array(fetch_batch_var[0]) #pre = pred_idx.numpy() label = np.array(batch_data[3]) inputs_word = np.array(batch_data[4]) for ii in range(len(label)): top4 = pre[ii][0] accum_num_sum += 1 for idx in top4: if int(idx) in inputs_word[ii]: continue if int(idx) == int(label[ii][0]): accum_num += 1 break if batch_id % print_interval == 0: logger.info( "infer epoch: {}, batch_id: {}, acc: {:.6f}, speed: {:.2f} ins/s" .format( epoch_id, batch_id, accum_num * 1.0 / accum_num_sum, print_interval * batch_size / (time.time() - interval_begin))) interval_begin = time.time() logger.info( "infer epoch: {} done, acc: {:.6f}, : epoch time{:.2f} s".format( epoch_id, accum_num * 1.0 / accum_num_sum, time.time() - epoch_begin)) epoch_begin = time.time()