Esempio n. 1
0
def main(args):
    paddle.seed(12345)

    # load config
    config = load_yaml(args.config_yaml)
    config["yaml_path"] = args.config_yaml
    config["config_abs_dir"] = args.abs_dir
    # load static model class
    static_model_class = load_static_model_class(config)

    input_data = static_model_class.create_feeds()
    input_data_names = [data.name for data in input_data]

    fetch_vars = static_model_class.net(input_data)
    #infer_target_var = model.infer_target_var
    logger.info("cpu_num: {}".format(os.getenv("CPU_NUM")))
    static_model_class.create_optimizer()

    use_gpu = config.get("runner.use_gpu", True)
    use_auc = config.get("runner.use_auc", False)
    auc_num = config.get("runner.auc_num", 1)
    train_data_dir = config.get("runner.train_data_dir", None)
    epochs = config.get("runner.epochs", None)
    print_interval = config.get("runner.print_interval", None)
    model_save_path = config.get("runner.model_save_path", "model_output")
    model_init_path = config.get("runner.model_init_path", None)
    batch_size = config.get("runner.train_batch_size", None)
    reader_type = config.get("runner.reader_type", "DataLoader")
    os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1))
    logger.info("**************common.configs**********")
    logger.info(
        "use_gpu: {}, train_data_dir: {}, epochs: {}, print_interval: {}, model_save_path: {}"
        .format(use_gpu, train_data_dir, epochs, print_interval,
                model_save_path))
    logger.info("**************common.configs**********")

    place = paddle.set_device('gpu' if use_gpu else 'cpu')
    exe = paddle.static.Executor(place)
    # initialize
    exe.run(paddle.static.default_startup_program())

    last_epoch_id = config.get("last_epoch", -1)

    if reader_type == 'QueueDataset':
        dataset, file_list = get_reader(input_data, config)
    elif reader_type == 'DataLoader':
        train_dataloader = create_data_loader(config=config, place=place)

    for epoch_id in range(last_epoch_id + 1, epochs):

        epoch_begin = time.time()
        if use_auc:
            reset_auc(auc_num)
        if reader_type == 'DataLoader':
            fetch_batch_var = dataloader_train(epoch_id, train_dataloader,
                                               input_data_names, fetch_vars,
                                               exe, config)
            metric_str = ""
            for var_idx, var_name in enumerate(fetch_vars):
                metric_str += "{}: {}, ".format(var_name,
                                                fetch_batch_var[var_idx])
            logger.info("epoch: {} done, ".format(epoch_id) + metric_str +
                        "epoch time: {:.2f} s".format(time.time() -
                                                      epoch_begin))
        elif reader_type == 'QueueDataset':
            fetch_batch_var = dataset_train(epoch_id, dataset, fetch_vars, exe,
                                            config)
            logger.info("epoch: {} done, ".format(epoch_id) +
                        "epoch time: {:.2f} s".format(time.time() -
                                                      epoch_begin))
        else:
            logger.info("reader type wrong")

        save_static_model(paddle.static.default_main_program(),
                          model_save_path,
                          epoch_id,
                          prefix='rec_static')
Esempio n. 2
0
def main(args):
    paddle.seed(12345)

    # load config
    config = load_yaml(args.config_yaml)
    config["config_abs_dir"] = args.abs_dir
    # load static model class
    static_model_class = load_static_model_class(config)

    input_data = static_model_class.create_feeds(is_infer=True)
    input_data_names = [data.name for data in input_data]

    fetch_vars = static_model_class.infer_net(input_data)
    logger.info("cpu_num: {}".format(os.getenv("CPU_NUM")))

    use_gpu = config.get("runner.use_gpu", True)
    use_auc = config.get("runner.use_auc", False)
    test_data_dir = config.get("runner.test_data_dir", None)
    print_interval = config.get("runner.print_interval", None)
    model_load_path = config.get("runner.infer_load_path", "model_output")
    start_epoch = config.get("runner.infer_start_epoch", 0)
    end_epoch = config.get("runner.infer_end_epoch", 10)
    batch_size = config.get("runner.infer_batch_size", None)
    os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1))
    logger.info("**************common.configs**********")
    logger.info(
        "use_gpu: {}, test_data_dir: {}, start_epoch: {}, end_epoch: {}, print_interval: {}, model_load_path: {}"
        .format(use_gpu, test_data_dir, start_epoch, end_epoch, print_interval,
                model_load_path))
    logger.info("**************common.configs**********")

    place = paddle.set_device('gpu' if use_gpu else 'cpu')
    exe = paddle.static.Executor(place)
    # initialize
    exe.run(paddle.static.default_startup_program())

    test_dataloader = create_data_loader(config=config,
                                         place=place,
                                         mode="test")

    for epoch_id in range(start_epoch, end_epoch):
        logger.info("load model epoch {}".format(epoch_id))
        model_path = os.path.join(model_load_path, str(epoch_id))
        load_static_model(paddle.static.default_main_program(),
                          model_path,
                          prefix='rec_static')
        runner_results = []
        epoch_begin = time.time()
        interval_begin = time.time()
        if use_auc:
            reset_auc()
        for batch_id, batch_data in enumerate(test_dataloader()):
            batch_runner_result = {}
            fetch_batch_var = exe.run(
                program=paddle.static.default_main_program(),
                feed=dict(zip(input_data_names, batch_data)),
                fetch_list=[var for _, var in fetch_vars.items()])
            for var_idx, var_name in enumerate(fetch_vars):
                batch_runner_result[var_name] = np.array(
                    fetch_batch_var[var_idx]).tolist()

            runner_results.append(batch_runner_result)
            if batch_id % print_interval == 0:
                metric_str = ""
                for var_idx, var_name in enumerate(fetch_vars):
                    metric_str += "{}: {}, ".format(
                        var_name, fetch_batch_var[var_idx][0])
                logger.info(
                    "epoch: {}, batch_id: {}, ".format(epoch_id, batch_id) +
                    metric_str +
                    "speed: {:.2f} ins/s".format(print_interval * batch_size /
                                                 (time.time() -
                                                  interval_begin)))
                interval_begin = time.time()
            reader_start = time.time()

        metric_str = ""
        for var_idx, var_name in enumerate(fetch_vars):
            metric_str += "{}: {}, ".format(var_name,
                                            fetch_batch_var[var_idx][0])
        logger.info("epoch: {} done, ".format(epoch_id) + metric_str +
                    "epoch time: {:.2f} s".format(time.time() - epoch_begin))

        runner_result_save_path = config.get("runner.runner_result_dump_path",
                                             None)
        if runner_result_save_path:
            logging.info(
                "Dump runner result in {}".format(runner_result_save_path))
            with open(runner_result_save_path, 'w+') as fout:
                json.dump(runner_results, fout)
Esempio n. 3
0
def main(args):
    paddle.seed(12345)

    # load config
    config = load_yaml(args.config_yaml)
    config["yaml_path"] = args.config_yaml
    config["config_abs_dir"] = args.abs_dir
    # modify config from command
    if args.opt:
        for parameter in args.opt:
            parameter = parameter.strip()
            key, value = parameter.split("=")
            config[key] = value
    # load static model class
    static_model_class = load_static_model_class(config)

    input_data = static_model_class.create_feeds()
    input_data_names = [data.name for data in input_data]

    fetch_vars = static_model_class.net(input_data)
    #infer_target_var = model.infer_target_var
    logger.info("cpu_num: {}".format(os.getenv("CPU_NUM")))
    static_model_class.create_optimizer()

    use_gpu = config.get("runner.use_gpu", True)
    use_auc = config.get("runner.use_auc", False)
    use_visual = config.get("runner.use_visual", False)
    use_inference = config.get("runner.use_inference", False)
    auc_num = config.get("runner.auc_num", 1)
    train_data_dir = config.get("runner.train_data_dir", None)
    epochs = config.get("runner.epochs", None)
    print_interval = config.get("runner.print_interval", None)
    model_save_path = config.get("runner.model_save_path", "model_output")
    model_init_path = config.get("runner.model_init_path", None)
    batch_size = config.get("runner.train_batch_size", None)
    reader_type = config.get("runner.reader_type", "DataLoader")
    os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1))
    logger.info("**************common.configs**********")
    logger.info(
        "use_gpu: {}, use_visual: {}, train_batch_size: {}, train_data_dir: {}, epochs: {}, print_interval: {}, model_save_path: {}"
        .format(use_gpu, use_visual, batch_size, train_data_dir, epochs,
                print_interval, model_save_path))
    logger.info("**************common.configs**********")

    place = paddle.set_device('gpu' if use_gpu else 'cpu')
    exe = paddle.static.Executor(place)
    # initialize
    exe.run(paddle.static.default_startup_program())

    last_epoch_id = config.get("last_epoch", -1)

    # Create a log_visual object and store the data in the path
    if use_visual:
        from visualdl import LogWriter
        log_visual = LogWriter(args.abs_dir + "/visualDL_log/train")
    else:
        log_visual = None
    step_num = 0

    if reader_type == 'QueueDataset':
        dataset, file_list = get_reader(input_data, config)
    elif reader_type == 'DataLoader':
        train_dataloader = create_data_loader(config=config, place=place)

    for epoch_id in range(last_epoch_id + 1, epochs):

        epoch_begin = time.time()
        if use_auc:
            reset_auc(auc_num)
        if reader_type == 'DataLoader':
            fetch_batch_var, step_num = dataloader_train(
                epoch_id, train_dataloader, input_data_names, fetch_vars, exe,
                config, use_visual, log_visual, step_num)
            metric_str = ""
            for var_idx, var_name in enumerate(fetch_vars):
                metric_str += "{}: {}, ".format(var_name,
                                                fetch_batch_var[var_idx])
            logger.info("epoch: {} done, ".format(epoch_id) + metric_str +
                        "epoch time: {:.2f} s".format(time.time() -
                                                      epoch_begin))
        elif reader_type == 'QueueDataset':
            fetch_batch_var = dataset_train(epoch_id, dataset, fetch_vars, exe,
                                            config)
            logger.info("epoch: {} done, ".format(epoch_id) +
                        "epoch time: {:.2f} s".format(time.time() -
                                                      epoch_begin))
        else:
            logger.info("reader type wrong")

        save_static_model(paddle.static.default_main_program(),
                          model_save_path,
                          epoch_id,
                          prefix='rec_static')

        if use_inference:
            feed_var_names = config.get("runner.save_inference_feed_varnames",
                                        [])
            feedvars = []
            fetch_var_names = config.get(
                "runner.save_inference_fetch_varnames", [])
            fetchvars = []
            for var_name in feed_var_names:
                if var_name not in paddle.static.default_main_program(
                ).global_block().vars:
                    raise ValueError(
                        "Feed variable: {} not in default_main_program, global block has follow vars: {}"
                        .format(
                            var_name,
                            paddle.static.default_main_program().global_block(
                            ).vars.keys()))
                else:
                    feedvars.append(paddle.static.default_main_program().
                                    global_block().vars[var_name])
            for var_name in fetch_var_names:
                if var_name not in paddle.static.default_main_program(
                ).global_block().vars:
                    raise ValueError(
                        "Fetch variable: {} not in default_main_program, global block has follow vars: {}"
                        .format(
                            var_name,
                            paddle.static.default_main_program().global_block(
                            ).vars.keys()))
                else:
                    fetchvars.append(paddle.static.default_main_program().
                                     global_block().vars[var_name])

            save_inference_model(model_save_path, epoch_id, feedvars,
                                 fetchvars, exe)
Esempio n. 4
0
def main(args):
    paddle.seed(12345)

    # load config
    config = load_yaml(args.config_yaml)
    config["config_abs_dir"] = args.abs_dir
    # load static model class
    static_model_class = load_static_model_class(config)

    input_data = static_model_class.create_feeds(is_infer=True)
    input_data_names = [data.name for data in input_data]

    fetch_vars = static_model_class.infer_net(input_data)
    logger.info("cpu_num: {}".format(os.getenv("CPU_NUM")))

    use_gpu = config.get("runner.use_gpu", True)
    use_auc = config.get("runner.use_auc", False)
    use_visual = config.get("runner.use_visual", False)
    auc_num = config.get("runner.auc_num", 1)
    test_data_dir = config.get("runner.test_data_dir", None)
    print_interval = config.get("runner.print_interval", None)
    model_load_path = config.get("runner.infer_load_path", "model_output")
    start_epoch = config.get("runner.infer_start_epoch", 0)
    end_epoch = config.get("runner.infer_end_epoch", 10)
    batch_size = config.get("runner.infer_batch_size", None)
    os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1))
    logger.info("**************common.configs**********")
    logger.info(
        "use_gpu: {}, use_visual: {}, test_data_dir: {}, start_epoch: {}, end_epoch: {}, print_interval: {}, model_load_path: {}".
        format(use_gpu, use_visual, test_data_dir, start_epoch, end_epoch,
               print_interval, model_load_path))
    logger.info("**************common.configs**********")

    place = paddle.set_device('gpu' if use_gpu else 'cpu')
    exe = paddle.static.Executor(place)
    # initialize
    exe.run(paddle.static.default_startup_program())

    test_dataloader = create_data_loader(
        config=config, place=place, mode="test")

    # Create a log_visual object and store the data in the path
    if use_visual:
        from visualdl import LogWriter
        log_visual = LogWriter(args.abs_dir + "/visualDL_log/infer")
    step_num = 0

    for epoch_id in range(start_epoch, end_epoch):
        logger.info("load model epoch {}".format(epoch_id))
        model_path = os.path.join(model_load_path, str(epoch_id))
        load_static_model(
            paddle.static.default_main_program(),
            model_path,
            prefix='rec_static')

        epoch_begin = time.time()
        interval_begin = time.time()
        if use_auc:
            reset_auc(auc_num)
        for batch_id, batch_data in enumerate(test_dataloader()):
            fetch_batch_var = exe.run(
                program=paddle.static.default_main_program(),
                feed=dict(zip(input_data_names, batch_data)),
                fetch_list=[var for _, var in fetch_vars.items()])
            if batch_id % print_interval == 0:
                metric_str = ""
                for var_idx, var_name in enumerate(fetch_vars):
                    metric_str += "{}: {}, ".format(
                        var_name, fetch_batch_var[var_idx][0])
                    if use_visual:
                        log_visual.add_scalar(
                            tag="infer/" + var_name,
                            step=step_num,
                            value=fetch_batch_var[var_idx][0])
                logger.info("epoch: {}, batch_id: {}, ".format(
                    epoch_id, batch_id) + metric_str + "speed: {:.2f} ins/s".
                            format(print_interval * batch_size / (time.time(
                            ) - interval_begin)))
                interval_begin = time.time()
            reader_start = time.time()
            step_num = step_num + 1

        metric_str = ""
        for var_idx, var_name in enumerate(fetch_vars):
            metric_str += "{}: {}, ".format(var_name,
                                            fetch_batch_var[var_idx][0])
        logger.info("epoch: {} done, ".format(epoch_id) + metric_str +
                    "epoch time: {:.2f} s".format(time.time() - epoch_begin))
Esempio n. 5
0
def main(args):
    paddle.seed(12345)

    # load config
    config = load_yaml(args.config_yaml)
    config["config_abs_dir"] = args.abs_dir
    # load static model class
    static_model_class = load_static_model_class(config)

    input_data = static_model_class.create_feeds(is_infer=True)
    input_data_names = [data.name for data in input_data]

    fetch_vars = static_model_class.infer_net(input_data)
    logger.info("cpu_num: {}".format(os.getenv("CPU_NUM")))

    use_gpu = config.get("runner.use_gpu", True)
    use_auc = config.get("runner.use_auc", False)
    auc_num = config.get("runner.auc_num", 1)
    test_data_dir = config.get("runner.test_data_dir", None)
    print_interval = config.get("runner.print_interval", None)
    model_load_path = config.get("runner.infer_load_path", "model_output")
    start_epoch = config.get("runner.infer_start_epoch", 0)
    end_epoch = config.get("runner.infer_end_epoch", 10)
    batch_size = config.get("runner.infer_batch_size", None)
    os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1))
    logger.info("**************common.configs**********")
    logger.info(
        "use_gpu: {}, test_data_dir: {}, start_epoch: {}, end_epoch: {}, print_interval: {}, model_load_path: {}"
        .format(use_gpu, test_data_dir, start_epoch, end_epoch, print_interval,
                model_load_path))
    logger.info("**************common.configs**********")

    place = paddle.set_device('gpu' if use_gpu else 'cpu')
    exe = paddle.static.Executor(place)
    # initialize
    exe.run(paddle.static.default_startup_program())

    test_dataloader = create_data_loader(config=config,
                                         place=place,
                                         mode="test")

    for epoch_id in range(start_epoch, end_epoch):
        logger.info("load model epoch {}".format(epoch_id))
        model_path = os.path.join(model_load_path, str(epoch_id))
        load_static_model(paddle.static.default_main_program(),
                          model_path,
                          prefix='rec_static')

        epoch_begin = time.time()
        interval_begin = time.time()

        b = paddle.static.global_scope().find_var("item_emb").get_tensor()
        b = np.array(b)

        import faiss
        if use_gpu:
            res = faiss.StandardGpuResources()
            flat_config = faiss.GpuIndexFlatConfig()
            flat_config.device = 0
            faiss_index = faiss.GpuIndexFlatIP(res, b.shape[-1], flat_config)
            faiss_index.add(b)
        else:
            faiss_index = faiss.IndexFlatIP(b.shape[-1])
            faiss_index.add(b)

        total = 1
        total_recall = 0.0
        total_ndcg = 0.0
        total_hitrate = 0

        for batch_id, batch_data in enumerate(test_dataloader()):
            fetch_batch_var = exe.run(
                program=paddle.static.default_main_program(),
                feed=dict(zip(input_data_names, batch_data[:2])),
                fetch_list=[var for _, var in fetch_vars.items()])

            user_embs = fetch_batch_var[0]
            target_items = np.squeeze(np.array(batch_data[-1]), axis=1)

            if len(user_embs.shape) == 2:
                D, I = faiss_index.search(user_embs, args.top_n)
                for i, iid_list in enumerate(target_items):
                    recall = 0
                    dcg = 0.0
                    item_list = set(I[i])
                    iid_list = list(filter(lambda x: x != 0, list(iid_list)))
                    for no, iid in enumerate(iid_list):
                        if iid in item_list:
                            recall += 1
                            dcg += 1.0 / math.log(no + 2, 2)
                    idcg = 0.0
                    for no in range(recall):
                        idcg += 1.0 / math.log(no + 2, 2)
                    total_recall += recall * 1.0 / len(iid_list)
                    if recall > 0:
                        total_ndcg += dcg / idcg
                        total_hitrate += 1
            else:
                ni = user_embs.shape[1]
                user_embs = np.reshape(user_embs, [-1, user_embs.shape[-1]])
                D, I = faiss_index.search(user_embs, args.top_n)
                for i, iid_list in enumerate(target_items):
                    recall = 0
                    dcg = 0.0
                    item_list_set = set()
                    item_list = list(
                        zip(np.reshape(I[i * ni:(i + 1) * ni], -1),
                            np.reshape(D[i * ni:(i + 1) * ni], -1)))
                    item_list.sort(key=lambda x: x[1], reverse=True)
                    for j in range(len(item_list)):
                        if item_list[j][0] not in item_list_set and item_list[
                                j][0] != 0:
                            item_list_set.add(item_list[j][0])
                            if len(item_list_set) >= args.top_n:
                                break
                    iid_list = list(filter(lambda x: x != 0, list(iid_list)))
                    for no, iid in enumerate(iid_list):
                        if iid == 0:
                            break
                        if iid in item_list_set:
                            recall += 1
                            dcg += 1.0 / math.log(no + 2, 2)
                    idcg = 0.0
                    for no in range(recall):
                        idcg += 1.0 / math.log(no + 2, 2)

                    total_recall += recall * 1.0 / len(iid_list)
                    if recall > 0:
                        total_ndcg += dcg / idcg
                        total_hitrate += 1
            total += target_items.shape[0]

            if batch_id % print_interval == 0:
                recall = total_recall / total
                ndcg = total_ndcg / total
                hitrate = total_hitrate * 1.0 / total
                metric_str = ""
                metric_str += "recall@%d: %.5f, " % (args.top_n, recall)
                metric_str += "ndcg@%d: %.5f, " % (args.top_n, ndcg)
                metric_str += "hitrate@%d: %.5f, " % (args.top_n, hitrate)
                logger.info(
                    "epoch: {}, batch_id: {}, ".format(epoch_id, batch_id) +
                    metric_str +
                    "speed: {:.2f} ins/s".format(print_interval * batch_size /
                                                 (time.time() -
                                                  interval_begin)))

        recall = total_recall / total
        ndcg = total_ndcg / total
        hitrate = total_hitrate * 1.0 / total
        metric_str = ""
        metric_str += "recall@%d: %.5f, " % (args.top_n, recall)
        metric_str += "ndcg@%d: %.5f, " % (args.top_n, ndcg)
        metric_str += "hitrate@%d: %.5f, " % (args.top_n, hitrate)

        logger.info("epoch: {} done, ".format(epoch_id) + metric_str +
                    "epoch time: {:.2f} s".format(time.time() - epoch_begin))
Esempio n. 6
0
def main(args):
    paddle.seed(12345)

    # load config
    config = load_yaml(args.config_yaml)
    config["config_abs_dir"] = args.abs_dir
    # modify config from command
    if args.opt:
        for parameter in args.opt:
            parameter = parameter.strip()
            key, value = parameter.split("=")
            if type(config.get(key)) is int:
                value = int(value)
            if type(config.get(key)) is float:
                value = float(value)
            if type(config.get(key)) is bool:
                value = (True if value.lower() == "true" else False)
            config[key] = value
    # load static model class
    static_model_class = load_static_model_class(config)

    input_data = static_model_class.create_feeds(is_infer=True)
    input_data_names = [data.name for data in input_data]

    fetch_vars = static_model_class.infer_net(input_data)
    logger.info("cpu_num: {}".format(os.getenv("CPU_NUM")))

    use_gpu = config.get("runner.use_gpu", True)
    use_xpu = config.get("runner.use_xpu", False)
    use_auc = config.get("runner.use_auc", False)
    use_visual = config.get("runner.use_visual", False)
    auc_num = config.get("runner.auc_num", 1)
    test_data_dir = config.get("runner.test_data_dir", None)
    print_interval = config.get("runner.print_interval", None)
    model_load_path = config.get("runner.infer_load_path", "model_output")
    start_epoch = config.get("runner.infer_start_epoch", 0)
    end_epoch = config.get("runner.infer_end_epoch", 10)
    batch_size = config.get("runner.infer_batch_size", None)
    use_save_data = config.get("runner.use_save_data", False)
    reader_type = config.get("runner.reader_type", "DataLoader")
    use_fleet = config.get("runner.use_fleet", False)
    os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1))
    logger.info("**************common.configs**********")
    logger.info(
        "use_gpu: {}, use_xpu: {}, use_visual: {}, infer_batch_size: {}, test_data_dir: {}, start_epoch: {}, end_epoch: {}, print_interval: {}, model_load_path: {}"
        .format(use_gpu, use_xpu, use_visual, batch_size, test_data_dir,
                start_epoch, end_epoch, print_interval, model_load_path))
    logger.info("**************common.configs**********")

    if use_xpu:
        xpu_device = 'xpu:{0}'.format(os.getenv('FLAGS_selected_xpus', 0))
        place = paddle.set_device(xpu_device)
    else:
        place = paddle.set_device('gpu' if use_gpu else 'cpu')
    exe = paddle.static.Executor(place)
    # initialize
    exe.run(paddle.static.default_startup_program())

    if reader_type == 'DataLoader':
        test_dataloader = create_data_loader(config=config,
                                             place=place,
                                             mode="test")
    elif reader_type == "CustomizeDataLoader":
        test_dataloader = static_model_class.create_data_loader()

    # Create a log_visual object and store the data in the path
    if use_visual:
        from visualdl import LogWriter
        log_visual = LogWriter(args.abs_dir + "/visualDL_log/infer")
    step_num = 0

    for epoch_id in range(start_epoch, end_epoch):
        logger.info("load model epoch {}".format(epoch_id))
        model_path = os.path.join(model_load_path, str(epoch_id))
        load_static_model(paddle.static.default_main_program(),
                          model_path,
                          prefix='rec_static')

        epoch_begin = time.time()
        interval_begin = time.time()
        infer_reader_cost = 0.0
        infer_run_cost = 0.0
        reader_start = time.time()

        if use_auc:
            reset_auc(use_fleet, auc_num)

        #we will drop the last incomplete batch when dataset size is not divisible by the batch size
        assert any(
            test_dataloader()
        ), "test_dataloader's size is null, please ensure batch size < dataset size!"

        for batch_id, batch_data in enumerate(test_dataloader()):
            infer_reader_cost += time.time() - reader_start
            infer_start = time.time()
            fetch_batch_var = exe.run(
                program=paddle.static.default_main_program(),
                feed=dict(zip(input_data_names, batch_data)),
                fetch_list=[var for _, var in fetch_vars.items()])
            infer_run_cost += time.time() - infer_start
            if batch_id % print_interval == 0:
                metric_str = ""
                for var_idx, var_name in enumerate(fetch_vars):
                    metric_str += "{}: {}, ".format(
                        var_name, fetch_batch_var[var_idx][0])
                    if use_visual:
                        log_visual.add_scalar(
                            tag="infer/" + var_name,
                            step=step_num,
                            value=fetch_batch_var[var_idx][0])
                logger.info(
                    "epoch: {}, batch_id: {}, ".format(epoch_id, batch_id) +
                    metric_str +
                    "avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.2f} ins/s"
                    .format(
                        infer_reader_cost /
                        print_interval, (infer_reader_cost + infer_run_cost) /
                        print_interval, batch_size, print_interval *
                        batch_size / (time.time() - interval_begin)))
                interval_begin = time.time()
                infer_reader_cost = 0.0
                infer_run_cost = 0.0
            reader_start = time.time()
            step_num = step_num + 1

        metric_str = ""
        for var_idx, var_name in enumerate(fetch_vars):
            metric_str += "{}: {}, ".format(var_name,
                                            fetch_batch_var[var_idx][0])
        logger.info("epoch: {} done, ".format(epoch_id) + metric_str +
                    "epoch time: {:.2f} s".format(time.time() - epoch_begin))
        if use_save_data:
            save_data(fetch_batch_var, model_load_path)
Esempio n. 7
0
def main(args):
    paddle.seed(12345)

    # load config
    config = load_yaml(args.config_yaml)
    config["config_abs_dir"] = args.abs_dir
    # load static model class
    static_model_class = load_static_model_class(config)

    input_data = static_model_class.create_feeds(is_infer=True)
    input_data_names = [data.name for data in input_data]

    fetch_vars = static_model_class.infer_net(input_data)
    logger.info("cpu_num: {}".format(os.getenv("CPU_NUM")))

    use_gpu = config.get("runner.use_gpu", True)
    use_auc = config.get("runner.use_auc", False)
    test_data_dir = config.get("runner.test_data_dir", None)
    print_interval = config.get("runner.print_interval", None)
    model_load_path = config.get("runner.infer_load_path", "model_output")
    start_epoch = config.get("runner.infer_start_epoch", 0)
    end_epoch = config.get("runner.infer_end_epoch", 10)
    batch_size = config.get("runner.infer_batch_size", None)
    sparse_feature_number = config.get(
        "hyper_parameters.sparse_feature_number")
    os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1))
    logger.info("**************common.configs**********")
    logger.info(
        "use_gpu: {}, test_data_dir: {}, start_epoch: {}, end_epoch: {}, print_interval: {}, model_load_path: {}".
        format(use_gpu, test_data_dir, start_epoch, end_epoch, print_interval,
               model_load_path))
    logger.info("**************common.configs**********")

    place = paddle.set_device('gpu' if use_gpu else 'cpu')
    exe = paddle.static.Executor(place)
    # initialize
    exe.run(paddle.static.default_startup_program())

    test_dataloader = create_data_loader(
        config=config, place=place, mode="test")

    for epoch_id in range(start_epoch, end_epoch):
        logger.info("load model epoch {}".format(epoch_id))
        model_path = os.path.join(model_load_path, str(epoch_id))
        load_static_model(
            paddle.static.default_main_program(),
            model_path,
            prefix='rec_static')

        accum_num_sum = 0
        accum_num = 0
        epoch_begin = time.time()
        interval_begin = time.time()
        for batch_id, batch_data in enumerate(test_dataloader()):
            #print(np.array(batch_data[0]))
            ##b_size = len([dat[0] for dat in batch_data])
            #print(b_size)
            #wa = np.array([dat[0] for dat in batch_data]).astype(
            #            "int64").reshape(b_size)
            #wb = np.array([dat[1] for dat in batch_data]).astype(
            #            "int64").reshape(b_size)
            #wc = np.array([dat[2] for dat in batch_data]).astype(
            #            "int64").reshape(b_size)
            fetch_batch_var = exe.run(
                program=paddle.static.default_main_program(),
                feed={
                    "analogy_a": np.array(batch_data[0]),
                    "analogy_b": np.array(batch_data[1]),
                    "analogy_c": np.array(batch_data[2]),
                    "all_label": np.arange(sparse_feature_number)
                    .reshape(sparse_feature_number).astype("int64")
                },
                fetch_list=[var for _, var in fetch_vars.items()])
            pre = np.array(fetch_batch_var[0])
            #pre = pred_idx.numpy()
            label = np.array(batch_data[3])
            inputs_word = np.array(batch_data[4])

            for ii in range(len(label)):
                top4 = pre[ii][0]
                accum_num_sum += 1
                for idx in top4:
                    if int(idx) in inputs_word[ii]:
                        continue
                    if int(idx) == int(label[ii][0]):
                        accum_num += 1
                    break

            if batch_id % print_interval == 0:
                logger.info(
                    "infer epoch: {}, batch_id: {}, acc: {:.6f}, speed: {:.2f} ins/s".
                    format(epoch_id, batch_id, accum_num * 1.0 / accum_num_sum,
                           print_interval * batch_size / (time.time() -
                                                          interval_begin)))
                interval_begin = time.time()
        logger.info("infer epoch: {} done, acc: {:.6f}, : epoch time{:.2f} s".
                    format(epoch_id, accum_num * 1.0 / accum_num_sum,
                           time.time() - epoch_begin))

        epoch_begin = time.time()
Esempio n. 8
0
def main(args):
    paddle.seed(12345)

    # load config
    config = load_yaml(args.config_yaml)
    config["config_abs_dir"] = args.abs_dir
    # load static model class
    static_model_class = load_static_model_class(config)

    input_data = static_model_class.create_feeds()
    input_data_names = [data.name for data in input_data]

    fetch_vars = static_model_class.net(input_data)
    #infer_target_var = model.infer_target_var
    logger.info("cpu_num: {}".format(os.getenv("CPU_NUM")))
    static_model_class.create_optimizer()

    use_gpu = config.get("runner.use_gpu", True)
    use_auc = config.get("runner.use_auc", False)
    train_data_dir = config.get("runner.train_data_dir", None)
    epochs = config.get("runner.epochs", None)
    print_interval = config.get("runner.print_interval", None)
    model_save_path = config.get("runner.model_save_path", "model_output")
    model_init_path = config.get("runner.model_init_path", None)
    batch_size = config.get("runner.train_batch_size", None)
    os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1))
    logger.info("**************common.configs**********")
    logger.info(
        "use_gpu: {}, train_data_dir: {}, epochs: {}, print_interval: {}, model_save_path: {}".
        format(use_gpu, train_data_dir, epochs, print_interval,
               model_save_path))
    logger.info("**************common.configs**********")

    place = paddle.set_device('gpu' if use_gpu else 'cpu')
    exe = paddle.static.Executor(place)
    # initialize
    exe.run(paddle.static.default_startup_program())

    last_epoch_id = config.get("last_epoch", -1)
    train_dataloader = create_data_loader(config=config, place=place)

    for epoch_id in range(last_epoch_id + 1, epochs):

        epoch_begin = time.time()
        interval_begin = time.time()
        train_reader_cost = 0.0
        train_run_cost = 0.0
        total_samples = 0
        reader_start = time.time()
        if use_auc:
            reset_auc()
        for batch_id, batch_data in enumerate(train_dataloader()):
            train_reader_cost += time.time() - reader_start
            train_start = time.time()

            fetch_batch_var = exe.run(
                program=paddle.static.default_main_program(),
                feed=dict(zip(input_data_names, batch_data)),
                fetch_list=[var for _, var in fetch_vars.items()])
            train_run_cost += time.time() - train_start
            total_samples += batch_size
            if batch_id % print_interval == 0:
                metric_str = ""
                for var_idx, var_name in enumerate(fetch_vars):
                    metric_str += "{}: {}, ".format(var_name,
                                                    fetch_batch_var[var_idx])
                logger.info(
                    "epoch: {}, batch_id: {}, ".format(epoch_id,
                                                       batch_id) + metric_str +
                    "avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec".
                    format(train_reader_cost / print_interval, (
                        train_reader_cost + train_run_cost) / print_interval,
                           total_samples / print_interval, total_samples / (
                               train_reader_cost + train_run_cost)))
                train_reader_cost = 0.0
                train_run_cost = 0.0
                total_samples = 0
            reader_start = time.time()

        metric_str = ""
        for var_idx, var_name in enumerate(fetch_vars):
            metric_str += "{}: {}, ".format(var_name, fetch_batch_var[var_idx])
        logger.info("epoch: {} done, ".format(epoch_id) + metric_str +
                    "epoch time: {:.2f} s".format(time.time() - epoch_begin))

        save_static_model(
            paddle.static.default_main_program(),
            model_save_path,
            epoch_id,
            prefix='rec_static')