Beispiel #1
0
def eval(args):
    model_file = os.path.join(args.model_path, args.model_filename)
    params_file = os.path.join(args.model_path, args.params_filename)
    config = paddle_infer.Config(model_file, params_file)
    config.enable_mkldnn()

    predictor = paddle_infer.create_predictor(config)

    input_names = predictor.get_input_names()
    input_handle = predictor.get_input_handle(input_names[0])
    output_names = predictor.get_output_names()
    output_handle = predictor.get_output_handle(output_names[0])

    val_dataset = dataset.ImageNetDataset(data_dir=args.data_dir, mode='val')
    eval_loader = paddle.io.DataLoader(
        val_dataset, batch_size=args.batch_size, drop_last=True)

    cost_time = 0.
    total_num = 0.
    correct_1_num = 0
    correct_5_num = 0
    for batch_id, data in enumerate(eval_loader()):
        img_np = np.array([tensor.numpy() for tensor in data[0]])
        label_np = np.array([tensor.numpy() for tensor in data[1]])

        input_handle.reshape(img_np.shape)
        input_handle.copy_from_cpu(img_np)

        t1 = time.time()
        predictor.run()
        t2 = time.time()
        cost_time += (t2 - t1)

        output_data = output_handle.copy_to_cpu()

        for i in range(len(label_np)):
            label = label_np[i][0]
            result = output_data[i, :]
            index = result.argsort()
            total_num += 1
            if index[-1] == label:
                correct_1_num += 1
            if label in index[-5:]:
                correct_5_num += 1

        if batch_id % 10 == 0:
            acc1 = correct_1_num / total_num
            acc5 = correct_5_num / total_num
            avg_time = cost_time / total_num
            print(
                "batch_id {}, acc1 {:.3f}, acc5 {:.3f}, avg time {:.5f} sec/img".
                format(batch_id, acc1, acc5, avg_time))

        if args.test_samples > 0 and \
            (batch_id + 1)* args.batch_size >= args.test_samples:
            break

    acc1 = correct_1_num / total_num
    acc5 = correct_5_num / total_num
    print("End test: test_acc1 {:.3f}, test_acc5 {:.5f}".format(acc1, acc5))
Beispiel #2
0
    def create_paddle_predictor(self, args, inference_model_dir=None):
        if inference_model_dir is None:
            inference_model_dir = args.inference_model_dir
        params_file = os.path.join(inference_model_dir, "inference.pdiparams")
        model_file = os.path.join(inference_model_dir, "inference.pdmodel")
        config = Config(model_file, params_file)

        if args.use_gpu:
            config.enable_use_gpu(args.gpu_mem, 0)
        else:
            config.disable_gpu()
            if args.enable_mkldnn:
                # cache 10 different shapes for mkldnn to avoid memory leak
                config.set_mkldnn_cache_capacity(10)
                config.enable_mkldnn()
        config.set_cpu_math_library_num_threads(args.cpu_num_threads)

        if args.enable_profile:
            config.enable_profile()
        config.disable_glog_info()
        config.switch_ir_optim(args.ir_optim)  # default true
        if args.use_tensorrt:
            config.enable_tensorrt_engine(
                precision_mode=Config.Precision.Half
                if args.use_fp16 else Config.Precision.Float32,
                max_batch_size=args.batch_size,
                min_subgraph_size=30)

        config.enable_memory_optim()
        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        predictor = create_predictor(config)

        return predictor, config
Beispiel #3
0
    def __init__(self, cfg, name='BMN'):
        name = name.upper()
        self.name = name
        model_file = cfg[name]['model_file']
        params_file = cfg[name]['params_file']
        gpu_mem = cfg[name]['gpu_mem']
        device_id = cfg[name]['device_id']

        self.nms_thread = cfg[name]['nms_thread']
        self.min_pred_score = cfg[name]['score_thread']
        self.min_frame_thread = cfg['COMMON']['fps']

        # model init
        config = Config(model_file, params_file)
        config.enable_use_gpu(gpu_mem, device_id)
        config.switch_ir_optim(True)  # default true
        config.enable_memory_optim()
        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        self.predictor = create_predictor(config)

        input_names = self.predictor.get_input_names()
        self.input_tensor = self.predictor.get_input_handle(input_names[0])

        output_names = self.predictor.get_output_names()
        self.output1_tensor = self.predictor.get_output_handle(output_names[0])
        self.output2_tensor = self.predictor.get_output_handle(output_names[1])
        self.output3_tensor = self.predictor.get_output_handle(output_names[2])
def init_predictor(args):
    if args.model_dir is not "":
        config = Config(args.model_dir)
    else:
        config = Config(args.model_file, args.params_file)

    config.enable_memory_optim()
    if args.tune:
        config.collect_shape_range_info(shape_file)
    if args.use_gpu:
        config.enable_use_gpu(1000, 0)
        if args.use_trt:
            # using dynamic shpae mode, the max_batch_size will be ignored.
            config.enable_tensorrt_engine(workspace_size=1 << 30,
                                          max_batch_size=1,
                                          min_subgraph_size=5,
                                          precision_mode=PrecisionType.Float32,
                                          use_static=False,
                                          use_calib_mode=False)
            if args.tuned_dynamic_shape:
                config.enable_tuned_tensorrt_dynamic_shape(shape_file, True)
    else:
        # If not specific mkldnn, you can set the blas thread.
        # The thread num should not be greater than the number of cores in the CPU.
        config.set_cpu_math_library_num_threads(4)
        config.enable_mkldnn()

    predictor = create_predictor(config)
    return predictor
Beispiel #5
0
def create_predictor(args, mode, logger):
    if mode == "det":
        model_dir = args.det_model_dir
    elif mode == 'cls':
        model_dir = args.cls_model_dir
    elif mode == 'rec':
        model_dir = args.rec_model_dir
    else:
        model_dir = args.e2e_model_dir

    if model_dir is None:
        logger.info("not find {} model file path {}".format(mode, model_dir))
        sys.exit(0)
    model_file_path = model_dir + "/inference.pdmodel"
    params_file_path = model_dir + "/inference.pdiparams"
    if not os.path.exists(model_file_path):
        logger.info("not find model file path {}".format(model_file_path))
        sys.exit(0)
    if not os.path.exists(params_file_path):
        logger.info("not find params file path {}".format(params_file_path))
        sys.exit(0)

    config = inference.Config(model_file_path, params_file_path)

    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
        if args.use_tensorrt:
            config.enable_tensorrt_engine(
                precision_mode=inference.PrecisionType.Half
                if args.use_fp16 else inference.PrecisionType.Float32,
                max_batch_size=args.max_batch_size)
    else:
        config.disable_gpu()
        config.set_cpu_math_library_num_threads(6)
        if args.enable_mkldnn:
            # cache 10 different shapes for mkldnn to avoid memory leak
            config.set_mkldnn_cache_capacity(10)
            config.enable_mkldnn()
            #  TODO LDOUBLEV: fix mkldnn bug when bach_size  > 1
            #config.set_mkldnn_op({'conv2d', 'depthwise_conv2d', 'pool2d', 'batch_norm'})
            args.rec_batch_num = 1

    # enable memory optim
    config.enable_memory_optim()
    config.disable_glog_info()

    config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
    config.switch_use_feed_fetch_ops(False)

    # create predictor
    predictor = inference.create_predictor(config)
    input_names = predictor.get_input_names()
    for name in input_names:
        input_tensor = predictor.get_input_handle(name)
    output_names = predictor.get_output_names()
    output_tensors = []
    for output_name in output_names:
        output_tensor = predictor.get_output_handle(output_name)
        output_tensors.append(output_tensor)
    return predictor, input_tensor, output_tensors
Beispiel #6
0
    def get_model(self):
        # Download and unzip model
        URL = self.args.url
        model_name = self.args.model_name
        file_name = self.args.file_name

        # Save model in temporary directory and load model into memory
        with tempfile.TemporaryDirectory() as tmpdirname:
            os.system("wget -P {} {}".format(tmpdirname, URL))
            os.system("tar -zvxf {0}/{1} -C {0}".format(tmpdirname, file_name))
            with open("{}/{}/inference.pdmodel".format(tmpdirname, model_name),
                      "rb") as f:
                model = f.read()
            with open(
                    "{}/{}/inference.pdiparams".format(tmpdirname, model_name),
                    "rb") as f:
                params = f.read()

            # acquire input names
            paddle_config = self.create_inference_config(ir_optim=False)
            paddle_config.set_model_buffer(model, len(model), params,
                                           len(params))
            predictor = paddle_infer.create_predictor(paddle_config)
            self.input_names = predictor.get_input_names()

            return model, params
Beispiel #7
0
    def create_predictor(cls, args, config=None):
        if config is None:
            config = inference.Config(
                os.path.join(args.inference_model_dir, "transformer.pdmodel"),
                os.path.join(args.inference_model_dir,
                             "transformer.pdiparams"))
            if args.use_gpu:
                config.enable_use_gpu(100, 0)
            elif args.use_xpu:
                config.enable_xpu(100)
            else:
                # CPU
                # such as enable_mkldnn, set_cpu_math_library_num_threads
                config.disable_gpu()
            # Use ZeroCopy.
            config.switch_use_feed_fetch_ops(False)

        predictor = inference.create_predictor(config)
        input_handles = [
            predictor.get_input_handle(name)
            for name in predictor.get_input_names()
        ]
        output_handles = [
            predictor.get_input_handle(name)
            for name in predictor.get_output_names()
        ]
        return cls(predictor, input_handles, output_handles)
Beispiel #8
0
    def __init__(self, cfg, name='PPTSM'): 
        name = name.upper()
        self.name           = name
        model_file          = cfg[name]['model_file']
        params_file         = cfg[name]['params_file']
        gpu_mem             = cfg[name]['gpu_mem']
        device_id           = cfg[name]['device_id']

        # model init
        config = Config(model_file, params_file)
        config.enable_use_gpu(gpu_mem, device_id)
        config.switch_ir_optim(True)  # default true
        config.enable_memory_optim()

        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        self.predictor = create_predictor(config)

        input_names = self.predictor.get_input_names()
        self.input_tensor = self.predictor.get_input_handle(input_names[0])

        output_names = self.predictor.get_output_names()
        print("output_names = ", output_names)
        #self.output_tensor = self.predictor.get_output_handle(output_names[1])
        self.output_tensor = self.predictor.get_output_handle(output_names[0])
Beispiel #9
0
    def get_truth_val_by_inference(self):
        try:
            import paddle.inference as paddle_infer
        except:
            # when paddle is not installed, directly return
            return
        data = np.array(
            [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795,
             -0.0332]).astype("float32")[np.newaxis, :]
        input_dict = {"x": data}

        pd_config = paddle_infer.Config("uci_housing_model/")
        pd_config.disable_gpu()
        pd_config.switch_ir_optim(False)

        predictor = paddle_infer.create_predictor(pd_config)

        input_names = predictor.get_input_names()
        for i, input_name in enumerate(input_names):
            input_handle = predictor.get_input_handle(input_name)
            input_handle.copy_from_cpu(input_dict[input_name])

        predictor.run()

        output_data_dict = {}
        output_names = predictor.get_output_names()
        for _, output_data_name in enumerate(output_names):
            output_handle = predictor.get_output_handle(output_data_name)
            output_data = output_handle.copy_to_cpu()
            output_data_dict[output_data_name] = output_data
        # convert to the same format of Serving output
        output_data_dict["prob"] = output_data_dict["fc_0.tmp_1"]
        del output_data_dict["fc_0.tmp_1"]
        self.truth_val = output_data_dict
Beispiel #10
0
def paddle_inference(args):
    import paddle.inference as paddle_infer

    config = paddle_infer.Config(args.model_file, args.params_file)
    predictor = paddle_infer.create_predictor(config)

    input_names = predictor.get_input_names()
    input_handle = predictor.get_input_handle(input_names[0])

    img = cv2.imread(args.image_path)
    # normalize to mean 0.5, std 0.5
    img = (img - 127.5) * 0.00784313725
    # BGR2RGB
    img = img[:, :, ::-1]
    img = img.transpose((2, 0, 1))
    img = np.expand_dims(img, 0)
    img = img.astype('float32')

    input_handle.copy_from_cpu(img)

    predictor.run()

    output_names = predictor.get_output_names()
    output_handle = predictor.get_output_handle(output_names[0])
    output_data = output_handle.copy_to_cpu()

    print('paddle inference result: ', output_data.shape)
Beispiel #11
0
def create_paddle_predictor(args):
    config = Config(args.model_file, args.params_file)

    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
    else:
        config.disable_gpu()
        if args.enable_mkldnn:
            # cache 10 different shapes for mkldnn to avoid memory leak
            config.set_mkldnn_cache_capacity(10)
            config.enable_mkldnn()

    #config.disable_glog_info()
    config.switch_ir_optim(args.ir_optim)  # default true
    if args.use_tensorrt:
        config.enable_tensorrt_engine(
            precision_mode=Config.Precision.Half
            if args.use_fp16 else Config.Precision.Float32,
            max_batch_size=args.batch_size)

    config.enable_memory_optim()
    # use zero copy
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)

    return predictor
Beispiel #12
0
    def __init__(self, cfg, name='ACTION'):
        name = name.upper()
        self.name = name
        model_file = cfg[name]['model_file']
        params_file = cfg[name]['params_file']
        gpu_mem = cfg[name]['gpu_mem']
        device_id = cfg[name]['device_id']

        self.topk = cfg[name]['topk']
        self.frame_offset = cfg[name]['nms_offset']
        self.nms_thread = cfg[name]['nms_thread']
        self.cls_thread = cfg[name]['classify_score_thread']
        self.iou_thread = cfg[name]['iou_score_thread']

        self.label_map_file = cfg['COMMON']['label_dic']
        self.fps = cfg['COMMON']['fps']
        self.nms_id = 5

        # model init
        config = Config(model_file, params_file)
        config.enable_use_gpu(gpu_mem, device_id)
        config.switch_ir_optim(True)  # default true
        config.enable_memory_optim()
        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        self.predictor = create_predictor(config)

        input_names = self.predictor.get_input_names()
        self.input1_tensor = self.predictor.get_input_handle(input_names[0])
        #self.input2_tensor = self.predictor.get_input_handle(input_names[1])

        output_names = self.predictor.get_output_names()
        self.output1_tensor = self.predictor.get_output_handle(output_names[0])
        self.output2_tensor = self.predictor.get_output_handle(output_names[1])
Beispiel #13
0
 def __init__(self, model_path, param_path, use_gpu=False):
     model_path, param_path = self.check_param(model_path, param_path)
     try:
         config = paddle_infer.Config(model_path, param_path)
     except:
         ValueError(" 模型和参数不匹配,请检查模型和参数是否加载错误")
     if not use_gpu:
         config.enable_mkldnn()
         # TODO: fluid要废弃了,研究判断方式
         # if paddle.fluid.core.supports_bfloat16():
         #     config.enable_mkldnn_bfloat16()
         config.switch_ir_optim(True)
         config.set_cpu_math_library_num_threads(10)
     else:
         config.enable_use_gpu(500, 0)
         config.delete_pass("conv_elementwise_add_act_fuse_pass")
         config.delete_pass("conv_elementwise_add2_act_fuse_pass")
         config.delete_pass("conv_elementwise_add_fuse_pass")
         config.switch_ir_optim()
         config.enable_memory_optim()
         # use_tensoret = False  # TODO: 目前Linux和windows下使用TensorRT报错
         # if use_tensoret:
         #     config.enable_tensorrt_engine(
         #         workspace_size=1 << 30,
         #         precision_mode=paddle_infer.PrecisionType.Float32,
         #         max_batch_size=1,
         #         min_subgraph_size=5,
         #         use_static=False,
         #         use_calib_mode=False,
         #     )
     self.model = paddle_infer.create_predictor(config)
Beispiel #14
0
    def load_predictor(self, model_file, params_file):
        config = Config(model_file, params_file)
        if self.predictor_config["use_gpu"]:
            config.enable_use_gpu(200, 0)
            config.switch_ir_optim(True)
        else:
            config.disable_gpu()
            config.set_cpu_math_library_num_threads(
                self.predictor_config["cpu_threads"])

            if self.predictor_config["enable_mkldnn"]:
                try:
                    # cache 10 different shapes for mkldnn to avoid memory leak
                    config.set_mkldnn_cache_capacity(10)
                    config.enable_mkldnn()
                except Exception as e:
                    logging.error(
                        "The current environment does not support `mkldnn`, so disable mkldnn."
                    )
        config.disable_glog_info()
        config.enable_memory_optim()
        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        predictor = create_predictor(config)
        input_names = predictor.get_input_names()
        output_names = predictor.get_output_names()
        return predictor, input_names, output_names
Beispiel #15
0
    def load(self) -> bool:
        def get_model_files(ext: str) -> str:
            file_list = []
            for filename in os.listdir(model_path):
                if filename.endswith(ext):
                    file_list.append(filename)
            if len(file_list) == 0:
                raise Exception("Missing {} model file".format(ext))
            if len(file_list) > 1:
                raise Exception("More than one {} model file".format(ext))
            return os.path.join(model_path, file_list[0])

        model_path = kserve.Storage.download(self.model_dir)
        config = inference.Config(get_model_files('.pdmodel'),
                                  get_model_files('.pdiparams'))
        # TODO: add GPU support
        config.disable_gpu()

        self.predictor = inference.create_predictor(config)

        # TODO: add support for multiple input_names/output_names
        input_names = self.predictor.get_input_names()
        self.input_tensor = self.predictor.get_input_handle(input_names[0])
        output_names = self.predictor.get_output_names()
        self.output_tensor = self.predictor.get_output_handle(output_names[0])

        self.ready = True
        return self.ready
Beispiel #16
0
    def eval(self):
        '''
        create the model predictor by model config
        '''
        # 创建预测器
        self.predictor = create_predictor(self.config)

        # 获取模型的输入输出名称
        self.input_names = self.predictor.get_input_names()
        self.output_names = self.predictor.get_output_names()

        # 获取模型的输入输出节点数量
        self.input_num = len(self.input_names)
        self.output_num = len(self.output_names)

        # 获取输入
        self.input_handles = []
        for input_name in self.input_names:
            self.input_handles.append(
                self.predictor.get_input_handle(input_name))

        # 获取输出
        self.output_handles = []
        for output_name in self.output_names:
            self.output_handles.append(
                self.predictor.get_output_handle(output_name))
Beispiel #17
0
 def test_static_save_and_run_inference_predictor(self):
     paddle.enable_static()
     np_data = np.random.random((1, 1, 28, 28)).astype("float32")
     np_label = np.random.random((1, 1)).astype("int64")
     path_prefix = "custom_op_inference/custom_relu"
     from paddle.inference import Config
     from paddle.inference import create_predictor
     for device in self.devices:
         predict = custom_relu_static_inference(
             self.custom_ops[0], device, np_data, np_label, path_prefix)
         # load inference model
         config = Config(path_prefix + ".pdmodel",
                         path_prefix + ".pdiparams")
         predictor = create_predictor(config)
         input_tensor = predictor.get_input_handle(predictor.get_input_names(
         )[0])
         input_tensor.reshape(np_data.shape)
         input_tensor.copy_from_cpu(np_data.copy())
         predictor.run()
         output_tensor = predictor.get_output_handle(
             predictor.get_output_names()[0])
         predict_infer = output_tensor.copy_to_cpu()
         self.assertTrue(
             np.isclose(
                 predict, predict_infer, rtol=5e-5).any(),
             "custom op predict: {},\n custom op infer predict: {}".format(
                 predict, predict_infer))
     paddle.disable_static()
Beispiel #18
0
    def load_model(self,
                   model_dir,
                   use_gpu=False,
                   enable_mkldnn=False,
                   cpu_threads=1):
        model = os.path.join(model_dir, '__model__')
        params = os.path.join(model_dir, '__params__')
        config = Config(model, params)

        # 设置参数
        if use_gpu:
            config.enable_use_gpu(100, 0)
        else:
            config.disable_gpu()
            config.set_cpu_math_library_num_threads(cpu_threads)
            if enable_mkldnn:
                config.enable_mkldnn()
                config.set_mkldnn_cache_capacity(10)

        config.disable_glog_info()
        config.switch_ir_optim(True)
        config.enable_memory_optim()
        config.switch_use_feed_fetch_ops(False)
        config.switch_specify_input_names(True)

        # 通过参数加载模型预测器
        predictor = create_predictor(config)

        # 获取模型的输入输出
        input_names = predictor.get_input_names()
        output_names = predictor.get_output_names()
        input_handle = predictor.get_input_handle(input_names[0])
        output_handle = predictor.get_output_handle(output_names[0])

        return predictor, input_handle, output_handle
Beispiel #19
0
    def create_predictor(cls,
                         args,
                         config=None,
                         profile=False,
                         model_name=None):
        if config is None:
            config = inference.Config(
                os.path.join(args.inference_model_dir, "transformer.pdmodel"),
                os.path.join(args.inference_model_dir,
                             "transformer.pdiparams"))
            if args.device == "gpu":
                config.enable_use_gpu(100, 0)
            elif args.device == "xpu":
                config.enable_xpu(100)
            else:
                # CPU
                config.disable_gpu()
                if args.use_mkl:
                    config.enable_mkldnn()
                    config.set_cpu_math_library_num_threads(args.threads)
            # Use ZeroCopy.
            config.switch_use_feed_fetch_ops(False)

        if profile:
            if args.mod is recorder:
                autolog = args.mod.Recorder(config, args.infer_batch_size,
                                            args.model_name)
            else:
                pid = os.getpid()
                autolog = args.mod.AutoLogger(
                    model_name=args.model_name,
                    model_precision="fp32",
                    batch_size=args.infer_batch_size,
                    save_path=args.save_log_path,
                    inference_config=config,
                    data_shape="dynamic",
                    pids=pid,
                    process_name=None,
                    gpu_ids=0 if args.device == "gpu" else None,
                    time_keys=[
                        'preprocess_time', 'inference_time', 'postprocess_time'
                    ],
                    warmup=0,
                    logger=logger)
        else:
            autolog = None

        predictor = inference.create_predictor(config)
        input_handles = [
            predictor.get_input_handle(name)
            for name in predictor.get_input_names()
        ]
        output_handles = [
            predictor.get_output_handle(name)
            for name in predictor.get_output_names()
        ]
        return cls(predictor, input_handles, output_handles, autolog)
def load_predictor(model_dir,
                   run_mode='fluid',
                   batch_size=1,
                   use_gpu=False,
                   min_subgraph_size=3):
    """set AnalysisConfig, generate AnalysisPredictor
    Args:
        model_dir (str): root path of __model__ and __params__
        use_gpu (bool): whether use gpu
    Returns:
        predictor (PaddlePredictor): AnalysisPredictor
    Raises:
        ValueError: predict by TensorRT need use_gpu == True.
    """
    if not use_gpu and not run_mode == 'fluid':
        raise ValueError(
            "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
            .format(run_mode, use_gpu))
    if run_mode == 'trt_int8':
        raise ValueError("TensorRT int8 mode is not supported now, "
                         "please use trt_fp32 or trt_fp16 instead.")
    config = Config(os.path.join(model_dir, 'model.pdmodel'),
                    os.path.join(model_dir, 'model.pdiparams'))
    precision_map = {
        'trt_int8': Config.Precision.Int8,
        'trt_fp32': Config.Precision.Float32,
        'trt_fp16': Config.Precision.Half
    }
    if use_gpu:
        # initial GPU memory(M), device ID
        config.enable_use_gpu(200, 0)
        # optimize graph and fuse op
        # FIXME(dkp): ir optimize may prune variable inside graph
        #             and incur error in Paddle 2.0, e.g. in SSDLite
        #             FCOS model, set as False currently and should
        #             be set as True after switch_ir_optim fixed
        config.switch_ir_optim(False)
    else:
        config.disable_gpu()

    if run_mode in precision_map.keys():
        config.enable_tensorrt_engine(workspace_size=1 << 10,
                                      max_batch_size=batch_size,
                                      min_subgraph_size=min_subgraph_size,
                                      precision_mode=precision_map[run_mode],
                                      use_static=False,
                                      use_calib_mode=False)

    # disable print log when predict
    config.disable_glog_info()
    # enable shared memory
    config.enable_memory_optim()
    # disable feed, fetch OP, needed by zero_copy_run
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)
    return predictor
Beispiel #21
0
 def init_resnet50_predictor(model_dir):
     model_file = model_dir + '.pdmodel'
     params_file = model_dir + '.pdiparams'
     config = inference.Config()
     config.set_prog_file(model_file)
     config.set_params_file(params_file)
     config.use_gpu()
     config.enable_use_gpu(500, 0)
     predictor = inference.create_predictor(config)
     return predictor
Beispiel #22
0
def init_predictor(args):
    config = Config()
    if args.model_dir == "":
        config.set_model(args.model_file, args.params_file)
    else:
        config.set_model(args.model_dir)
    #config.disable_glog_info()
    config.enable_use_gpu(1000, 3)
    predictor = create_predictor(config)
    return predictor
Beispiel #23
0
 def test_wrong_input(self):
     with self.assertRaises(TypeError):
         program, params = get_sample_model()
         config = self.get_config(program, params)
         predictor = create_predictor(config)
         in_names = predictor.get_input_names()
         in_handle = predictor.get_input_handle(in_names[0])
         in_data = np.ones((1, 6, 64, 64)).astype(np.float32)
         in_handle.copy_from_cpu(list(in_data))
         predictor.run()
Beispiel #24
0
 def test_apis(self):
     print('trt compile version:', get_trt_compile_version())
     print('trt runtime version:', get_trt_runtime_version())
     program, params = get_sample_model()
     config = self.get_config(program, params)
     predictor = create_predictor(config)
     in_names = predictor.get_input_names()
     in_handle = predictor.get_input_handle(in_names[0])
     in_data = np.ones((1, 6, 32, 32)).astype(np.float32)
     in_handle.copy_from_cpu(in_data)
     predictor.run()
Beispiel #25
0
def init_predictor(model_dir):
    # refer   https://paddle-inference.readthedocs.io/en/latest/api_reference/python_api_doc/Config/GPUConfig.html
    model_file = model_dir+'.pdmodel'
    params_file = model_dir + '.pdiparams'
    config = inference.Config()
    config.set_prog_file(model_file)
    config.set_params_file(params_file)
    # 启用 GPU 进行预测 - 初始化 GPU 显存 50M, Deivce_ID 为 0
    config.enable_use_gpu(50, 0)
    predictor = inference.create_predictor(config)
    return predictor
Beispiel #26
0
    def _set_config(self):
        """
        predictor config setting.
        """

        # create default cpu predictor
        cpu_config = Config(self.default_pretrained_model_path)
        cpu_config.disable_glog_info()
        cpu_config.disable_gpu()
        self.cpu_predictor = create_predictor(cpu_config)

        # create predictors using various types of devices

        # npu
        npu_id = self._get_device_id("FLAGS_selected_npus")
        if npu_id != -1:
            # use npu
            npu_config = Config(self.default_pretrained_model_path)
            npu_config.disable_glog_info()
            npu_config.enable_npu(device_id=npu_id)
            self.npu_predictor = create_predictor(npu_config)

        # gpu
        gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES")
        if gpu_id != -1:
            # use gpu
            gpu_config = Config(self.default_pretrained_model_path)
            gpu_config.disable_glog_info()
            gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000,
                                      device_id=gpu_id)
            self.gpu_predictor = create_predictor(gpu_config)

        # xpu
        xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES")
        if xpu_id != -1:
            # use xpu
            xpu_config = Config(self.default_pretrained_model_path)
            xpu_config.disable_glog_info()
            xpu_config.enable_xpu(100)
            self.xpu_predictor = create_predictor(xpu_config)
Beispiel #27
0
    def __init__(self, args):
        """
        Prepare for prediction.
        The usage and docs of paddle inference, please refer to
        https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html
        """
        self.args = args
        self.cfg = DeployConfig(args.cfg)

        self._init_base_config()
        self._init_cpu_config()

        self.predictor = create_predictor(self.pred_cfg)
Beispiel #28
0
def init_predictor(args):
    if args.model_dir:
        config = Config(args.model_dir)
    else:
        config = Config(args.model_file, args.params_file)

    if args.use_gpu:
        config.enable_use_gpu(1000, 0)
    else:
        config.disable_gpu()
        print(config)
        # config.delete('repeated_fc_relu_fuse_pass')
    predictor = create_predictor(config)
    return predictor
Beispiel #29
0
def infer(args):
    model_name = 'plato-xl'
    tokenizer = UnifiedTransformerTokenizer.from_pretrained(model_name)

    context = [
        "Hi , Becky , what's up ?",
        "Not much , except that my mother-in-law is driving me up the wall .",
        "What's the problem ?"
    ]

    data = tokenizer.dialogue_encode(
        history=context,
        add_start_token_as_response=True,
        return_length=True,
        return_role_ids=args.use_role,
        position_style=args.position_style)

    # Load FasterTransformer lib. 
    load("FasterTransformer", verbose=True)

    config = paddle_infer.Config(args.inference_model_dir + "plato.pdmodel",
                                 args.inference_model_dir + "plato.pdiparams")
    config.enable_use_gpu(100, 0)
    config.disable_glog_info()
    predictor = paddle_infer.create_predictor(config)

    input_handles = {}
    for name in predictor.get_input_names():
        input_handles[name] = predictor.get_input_handle(name)
        if name == "attention_mask":
            input_handles[name].copy_from_cpu(
                np.expand_dims(
                    np.asarray(
                        data[name], dtype="float32"), axis=(0, 1)))
        else:
            input_handles[name].copy_from_cpu(
                np.asarray(
                    data[name], dtype="int32").reshape([1, -1]))

    output_handles = [
        predictor.get_output_handle(name)
        for name in predictor.get_output_names()
    ]

    predictor.run()

    output = [output_handle.copy_to_cpu() for output_handle in output_handles]

    for sample in output[0].transpose([1, 0]).tolist():
        print(" ".join(postprocess_response(sample, tokenizer)))
Beispiel #30
0
def init_predictor(args):

    config = Config(os.path.join(args.model_dir, "inference.pdmodel"),
                    os.path.join(args.model_dir, "inference.pdiparams"))

    config.enable_memory_optim()
    if args.use_gpu:
        config.enable_use_gpu(1000, 0)
    else:
        # If not specific mkldnn, you can set the blas thread.
        # The thread num should not be greater than the number of cores in the CPU.
        config.set_cpu_math_library_num_threads(4)

    predictor = create_predictor(config)
    return predictor