Python Config.enable_mkldnn 예제들, paddle.inference.Config.enable_mkldnn Python 예제들

예제 #1

0

파일 보기

    def load_model(self,
                   model_dir,
                   use_gpu=False,
                   enable_mkldnn=False,
                   cpu_threads=1):
        model = os.path.join(model_dir, '__model__')
        params = os.path.join(model_dir, '__params__')
        config = Config(model, params)

        # 设置参数
        if use_gpu:
            config.enable_use_gpu(100, 0)
        else:
            config.disable_gpu()
            config.set_cpu_math_library_num_threads(cpu_threads)
            if enable_mkldnn:
                config.enable_mkldnn()
                config.set_mkldnn_cache_capacity(10)

        config.disable_glog_info()
        config.switch_ir_optim(True)
        config.enable_memory_optim()
        config.switch_use_feed_fetch_ops(False)
        config.switch_specify_input_names(True)

        # 通过参数加载模型预测器
        predictor = create_predictor(config)

        # 获取模型的输入输出
        input_names = predictor.get_input_names()
        output_names = predictor.get_output_names()
        input_handle = predictor.get_input_handle(input_names[0])
        output_handle = predictor.get_output_handle(output_names[0])

        return predictor, input_handle, output_handle

예제 #2

0

파일 보기

    def create_paddle_predictor(self, args, inference_model_dir=None):
        if inference_model_dir is None:
            inference_model_dir = args.inference_model_dir
        params_file = os.path.join(inference_model_dir, "inference.pdiparams")
        model_file = os.path.join(inference_model_dir, "inference.pdmodel")
        config = Config(model_file, params_file)

        if args.use_gpu:
            config.enable_use_gpu(args.gpu_mem, 0)
        else:
            config.disable_gpu()
            if args.enable_mkldnn:
                # cache 10 different shapes for mkldnn to avoid memory leak
                config.set_mkldnn_cache_capacity(10)
                config.enable_mkldnn()
        config.set_cpu_math_library_num_threads(args.cpu_num_threads)

        if args.enable_profile:
            config.enable_profile()
        config.disable_glog_info()
        config.switch_ir_optim(args.ir_optim)  # default true
        if args.use_tensorrt:
            config.enable_tensorrt_engine(
                precision_mode=Config.Precision.Half
                if args.use_fp16 else Config.Precision.Float32,
                max_batch_size=args.batch_size,
                min_subgraph_size=30)

        config.enable_memory_optim()
        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        predictor = create_predictor(config)

        return predictor, config

예제 #3

0

파일 보기

def create_paddle_predictor(args):
    config = Config(args.model_file, args.params_file)

    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
    else:
        config.disable_gpu()
        if args.enable_mkldnn:
            # cache 10 different shapes for mkldnn to avoid memory leak
            config.set_mkldnn_cache_capacity(10)
            config.enable_mkldnn()

    #config.disable_glog_info()
    config.switch_ir_optim(args.ir_optim)  # default true
    if args.use_tensorrt:
        config.enable_tensorrt_engine(
            precision_mode=Config.Precision.Half
            if args.use_fp16 else Config.Precision.Float32,
            max_batch_size=args.batch_size)

    config.enable_memory_optim()
    # use zero copy
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)

    return predictor

예제 #4

0

파일 보기

파일: infer_tune.py 프로젝트: wangye707/Paddle-Inference-Demo

def init_predictor(args):
    if args.model_dir is not "":
        config = Config(args.model_dir)
    else:
        config = Config(args.model_file, args.params_file)

    config.enable_memory_optim()
    if args.tune:
        config.collect_shape_range_info(shape_file)
    if args.use_gpu:
        config.enable_use_gpu(1000, 0)
        if args.use_trt:
            # using dynamic shpae mode, the max_batch_size will be ignored.
            config.enable_tensorrt_engine(workspace_size=1 << 30,
                                          max_batch_size=1,
                                          min_subgraph_size=5,
                                          precision_mode=PrecisionType.Float32,
                                          use_static=False,
                                          use_calib_mode=False)
            if args.tuned_dynamic_shape:
                config.enable_tuned_tensorrt_dynamic_shape(shape_file, True)
    else:
        # If not specific mkldnn, you can set the blas thread.
        # The thread num should not be greater than the number of cores in the CPU.
        config.set_cpu_math_library_num_threads(4)
        config.enable_mkldnn()

    predictor = create_predictor(config)
    return predictor

예제 #5

0

파일 보기

파일: test_recognition.py 프로젝트: sandyhouse/PLSC

    def load_predictor(self, model_file, params_file):
        config = Config(model_file, params_file)
        if self.predictor_config["use_gpu"]:
            config.enable_use_gpu(200, 0)
            config.switch_ir_optim(True)
        else:
            config.disable_gpu()
            config.set_cpu_math_library_num_threads(
                self.predictor_config["cpu_threads"])

            if self.predictor_config["enable_mkldnn"]:
                try:
                    # cache 10 different shapes for mkldnn to avoid memory leak
                    config.set_mkldnn_cache_capacity(10)
                    config.enable_mkldnn()
                except Exception as e:
                    logging.error(
                        "The current environment does not support `mkldnn`, so disable mkldnn."
                    )
        config.disable_glog_info()
        config.enable_memory_optim()
        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        predictor = create_predictor(config)
        input_names = predictor.get_input_names()
        output_names = predictor.get_output_names()
        return predictor, input_names, output_names

예제 #6

0

파일 보기

    def load_config(self, modelpath, use_gpu, gpu_id, use_mkldnn, cpu_threads):
        '''
        load the model config

        modelpath: inference model path

        use_gpu: use gpu or not

        use_mkldnn: use mkldnn or not
        '''
        # 对运行位置进行配置
        if use_gpu:
            try:
                int(os.environ.get('CUDA_VISIBLE_DEVICES'))
            except Exception:
                print(
                    '''Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU. Now switch to CPU to continue...''')
                use_gpu = False

        if os.path.isdir(modelpath):
            if os.path.exists(os.path.join(modelpath, "__params__")):
                # __model__ + __params__
                model = os.path.join(modelpath, "__model__")
                params = os.path.join(modelpath, "__params__")
                config = Config(model, params)
            elif os.path.exists(os.path.join(modelpath, "params")):
                # model + params
                model = os.path.join(modelpath, "model")
                params = os.path.join(modelpath, "params")
                config = Config(model, params)
            elif os.path.exists(os.path.join(modelpath, "__model__")):
                # __model__ + others
                config = Config(modelpath)
            else:
                raise Exception(
                    "Error! Can\'t find the model in: %s. Please check your model path." % os.path.abspath(modelpath))
        elif os.path.exists(modelpath+".pdmodel"):
            # *.pdmodel + *.pdiparams
            model = modelpath+".pdmodel"
            params = modelpath+".pdiparams"
            config = Config(model, params)
        else:
            raise Exception(
                "Error! Can\'t find the model in: %s. Please check your model path." % os.path.abspath(modelpath))

        # 设置参数
        if use_gpu:
            config.enable_use_gpu(100, gpu_id)
        else:
            config.disable_gpu()
            config.set_cpu_math_library_num_threads(cpu_threads)
            if use_mkldnn:
                config.enable_mkldnn()

        config.disable_glog_info()

        # 返回配置
        return config

예제 #7

0

파일 보기

파일: pd_model.py 프로젝트: zihan987/Paddle-Inference-Demo

    def __init__(self,
                 model_file,
                 params_file,
                 use_mkldnn=True,
                 use_gpu=False,
                 device_id=0):
        config = Config(model_file, params_file)
        config.enable_memory_optim()

        if use_gpu:
            print("ENABLE_GPU")
            config.enable_use_gpu(100, device_id)

        if use_mkldnn:
            config.enable_mkldnn()
        self.predictor = create_predictor(config)

예제 #8

0

파일 보기

def init_predictors(args):
    if args.model_dir is not None:
        config = Config(args.model_dir)
    else:
        config = Config(args.model_file, args.params_file)

    config.enable_memory_optim()
    if args.use_gpu:
        config.enable_use_gpu(1000, 0)
    else:
        # If not specific mkldnn, you can set the blas thread.
        # The thread num should not be greater than the number of cores in the CPU.
        config.set_cpu_math_library_num_threads(4)
        config.enable_mkldnn()

    predictors = PredictorPool(config, args.thread_num)
    return predictors

예제 #9

0

파일 보기

    def init_predictor(self, cfg):
        model_dir = cfg['model_dir']
        params_file = cfg['params_file']
        use_gpu = cfg['use_gpu']

        config = Config(model_dir, params_file)

        # config.enable_memory_optim()

        if use_gpu:
            config.enable_use_gpu(1000, 0)
        else:
            # If not specific mkldnn, you can set the blas thread.
            # The thread num should not be greater than the number of cores in the CPU.
            config.set_cpu_math_library_num_threads(4)
            config.enable_mkldnn()
        config.disable_glog_info()

        predictor = create_predictor(config)
        return predictor

예제 #10

0

파일 보기

def init_predictor(args):
    if args.model_dir:
        config = Config(args.model_dir)
    else:
        config = Config(args.model_file, args.params_file)

    if args.use_gpu:
        config.enable_use_gpu(1000, 0)
        if args.enable_tensorRT:
            config.enable_tensorrt_engine(
                max_batch_size=args.batchsize,
                min_subgraph_size=1,
                precision_mode=paddle.inference.PrecisionType.Float32)
    else:
        config.disable_gpu()
        config.delete_pass("repeated_fc_relu_fuse_pass")
        config.set_cpu_math_library_num_threads(args.cpu_threads)
        if args.enable_mkldnn:
            config.enable_mkldnn()
    predictor = create_predictor(config)
    return predictor, config

예제 #11

0

파일 보기

    def load_config(self, modelpath, use_gpu, use_mkldnn, combined):
        '''
        load the model config

        modelpath: inference model path

        use_gpu: use gpu or not

        use_mkldnn: use mkldnn or not

        combined: inference model format is combined or not
        '''
        # 对运行位置进行配置
        if use_gpu:
            try:
                int(os.environ.get('CUDA_VISIBLE_DEVICES'))
            except Exception:
                print(
                    'Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU.'
                )
                use_gpu = False

        # 加载模型参数
        if combined:
            model = os.path.join(modelpath, "__model__")
            params = os.path.join(modelpath, "__params__")
            config = Config(model, params)
        else:
            config = Config(modelpath)

        # 设置参数
        if use_gpu:
            config.enable_use_gpu(100, 0)
        else:
            config.disable_gpu()
            if use_mkldnn:
                config.enable_mkldnn()

        # 返回配置
        return config

예제 #12

0

파일 보기

def create_paddle_predictor(args):
    config = Config(args.model_file, args.params_file)

    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
    else:
        config.disable_gpu()

    if args.use_mkldnn:
        config.enable_mkldnn()
        config.set_cpu_math_library_num_threads(args.cpu_num_threads)
        config.set_mkldnn_cache_capacity(100)

    config.disable_glog_info()
    config.switch_ir_optim(args.ir_optim)  # default true

    config.enable_memory_optim()
    # use zero copy
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)

    return predictor

예제 #13

0

파일 보기

    def load_model(self, modelpath, use_gpu, use_mkldnn, combined):
        # 对运行位置进行配置
        if use_gpu:
            try:
                int(os.environ.get('CUDA_VISIBLE_DEVICES'))
            except Exception:
                print(
                    'Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU.'
                )
                use_gpu = False

        # 加载模型参数
        if combined:
            model = os.path.join(modelpath, "__model__")
            params = os.path.join(modelpath, "__params__")
            config = Config(model, params)
        else:
            config = Config(modelpath)

        # 设置参数
        if use_gpu:
            config.enable_use_gpu(100, 0)
        else:
            config.disable_gpu()
            if use_mkldnn:
                config.enable_mkldnn()
        config.disable_glog_info()
        config.switch_ir_optim(True)
        config.enable_memory_optim()
        config.switch_use_feed_fetch_ops(False)
        config.switch_specify_input_names(True)

        # 通过参数加载模型预测器
        predictor = create_predictor(config)

        # 返回预测器
        return predictor

예제 #14

0

파일 보기

파일: infer_util.py 프로젝트: Archermmt/yolov3_dcn_nv_hackthon2021

    def init_model(self):
        from paddle.inference import Config
        from paddle.inference import PrecisionType
        from paddle.inference import create_predictor

        precision_mode = PrecisionType.Float32
        use_calib_mode = False

        if self.param_type == "fp16":
            precision_mode = PrecisionType.Half
        elif self.param_type == "int8":
            precision_mode = PrecisionType.Int8
            use_calib_mode = True

        mode_path = os.path.join(self.model_dir,"yolov3/__model__")
        param_path = os.path.join(self.model_dir,"yolov3/__params__")
        config = Config(mode_path, param_path)
        config.enable_use_gpu(100, 0)
        config.switch_ir_optim(True)
        size = (self.batch_size * 101) << 20
        config.enable_tensorrt_engine(
            workspace_size= size,
            max_batch_size=self.batch_size,
            min_subgraph_size=3,
            precision_mode=precision_mode,
            use_static=False,
            use_calib_mode=use_calib_mode)
        if not self.debug:
            config.disable_glog_info()
        else:
            config.enable_profile()

        config.enable_memory_optim()
        config.switch_use_feed_fetch_ops(False)
        config.enable_mkldnn()
        #exit(1)
        self.predictor = create_predictor(config)

예제 #15

0

파일 보기

def init_predictor(args):
    if args.model_dir:
        has_model = 0
        pdmodel_name = 0
        pdiparams_name = 0
        for file_name in os.listdir(args.model_dir):
            if re.search("__model__", file_name):
                has_model = 1
            if file_name.endswith(".pdmodel"):
                pdmodel_name = os.path.join(args.model_dir, file_name)
            if file_name.endswith(".pdiparams"):
                pdiparams_name = os.path.join(args.model_dir, file_name)
        if has_model == 1:
            config = Config(args.model_dir)
        elif pdmodel_name and pdiparams_name:
            config = Config(pdmodel_name, pdiparams_name)
        else:
            raise ValueError(
                "config setting error, please check your model path")
    else:
        config = Config(args.model_file, args.params_file)

    if args.use_gpu:
        config.enable_use_gpu(1000, 0)
        if args.enable_tensorRT:
            config.enable_tensorrt_engine(
                max_batch_size=args.batchsize,
                min_subgraph_size=9,
                precision_mode=paddle.inference.PrecisionType.Float32)
    else:
        config.disable_gpu()
        config.set_cpu_math_library_num_threads(args.cpu_threads)
        if args.enable_mkldnn:
            config.enable_mkldnn()
            config.delete_pass("scale_matmul_fuse_pass")
    predictor = create_predictor(config)
    return predictor, config

예제 #16

0

파일 보기

파일: infer_resnet.py 프로젝트: wangye707/Paddle-Inference-Demo

def init_predictor(args):
    if args.model_dir is not "":
        config = Config(args.model_dir)
    else:
        config = Config(args.model_file, args.params_file)

    config.enable_memory_optim()
    if args.use_gpu:
        config.enable_use_gpu(1000, 0)
    elif args.use_gpu_fp16:
        config.enable_use_gpu(1000, 0)
        config.exp_enable_use_gpu_fp16()
    elif args.use_onnxruntime:
        config.enable_onnxruntime()
        config.enable_ort_optimization()
        config.set_cpu_math_library_num_threads(4)
    else:
        # If not specific mkldnn, you can set the blas thread.
        # The thread num should not be greater than the number of cores in the CPU.
        config.set_cpu_math_library_num_threads(4)
        config.enable_mkldnn()

    predictor = create_predictor(config)
    return predictor

예제 #17

0

파일 보기

class Predictor:
    def __init__(self, args):
        """
        Prepare for prediction.
        The usage and docs of paddle inference, please refer to
        https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html
        """
        self.args = args
        self.cfg = DeployConfig(args.cfg)

        self._init_base_config()

        if args.device == 'cpu':
            self._init_cpu_config()
        else:
            self._init_gpu_config()

        self.predictor = create_predictor(self.pred_cfg)

    def _init_base_config(self):
        self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        if not self.args.print_detail:
            self.pred_cfg.disable_glog_info()
        self.pred_cfg.enable_memory_optim()
        self.pred_cfg.switch_ir_optim(True)

    def _init_cpu_config(self):
        """
        Init the config for x86 cpu.
        """
        logger.info("Using CPU")
        self.pred_cfg.disable_gpu()
        if self.args.enable_mkldnn:
            logger.info("Using MKLDNN")
            # cache 1- different shapes for mkldnn
            self.pred_cfg.set_mkldnn_cache_capacity(10)
            self.pred_cfg.enable_mkldnn()
        self.pred_cfg.set_cpu_math_library_num_threads(self.args.cpu_threads)

    def _init_gpu_config(self):
        """
        Init the config for nvidia gpu.
        """
        logger.info("using GPU")
        self.pred_cfg.enable_use_gpu(100, 0)

    def run(self, imgs, trimaps=None, imgs_dir=None):
        self.imgs_dir = imgs_dir
        num = len(imgs)
        input_names = self.predictor.get_input_names()
        input_handle = {}

        for i in range(len(input_names)):
            input_handle[input_names[i]] = self.predictor.get_input_handle(
                input_names[i])
        output_names = self.predictor.get_output_names()
        output_handle = self.predictor.get_output_handle(output_names[0])
        args = self.args

        for i in tqdm.tqdm(range(0, num, args.batch_size)):
            img_inputs = []
            if trimaps is not None:
                trimap_inputs = []
            trans_info = []
            for j in range(i, i + args.batch_size):
                img = imgs[i]
                trimap = trimaps[i] if trimaps is not None else None
                data = self._preprocess(img=img, trimap=trimap)
                img_inputs.append(data['img'])
                if trimaps is not None:
                    trimap_inputs.append(data['trimap'][np.newaxis, :, :])
                trans_info.append(data['trans_info'])
            img_inputs = np.array(img_inputs)
            if trimaps is not None:
                trimap_inputs = (np.array(trimap_inputs)).astype('float32')

            input_handle['img'].copy_from_cpu(img_inputs)
            if trimaps is not None:
                input_handle['trimap'].copy_from_cpu(trimap_inputs)
            self.predictor.run()
            results = output_handle.copy_to_cpu()

            results = results.squeeze(1)
            for j in range(args.batch_size):
                trimap = trimap_inputs[j] if trimaps is not None else None
                result = self._postprocess(
                    results[j], trans_info[j], trimap=trimap)
                self._save_imgs(result, imgs[i + j])
        logger.info("Finish")

    def _preprocess(self, img, trimap=None):
        data = {}
        data['img'] = img
        if trimap is not None:
            data['trimap'] = trimap
            data['gt_fields'] = ['trimap']
        data = self.cfg.transforms(data)
        return data

    def _postprocess(self, alpha, trans_info, trimap=None):
        """recover pred to origin shape"""
        if trimap is not None:
            trimap = trimap.squeeze(0)
            alpha[trimap == 0] = 0
            alpha[trimap == 255] = 1
        for item in trans_info[::-1]:
            if item[0] == 'resize':
                h, w = item[1][0], item[1][1]
                alpha = cv2.resize(
                    alpha, (w, h), interpolation=cv2.INTER_LINEAR)
            elif item[0] == 'padding':
                h, w = item[1][0], item[1][1]
                alpha = alpha[:, :, 0:h, 0:w]
            else:
                raise Exception("Unexpected info '{}' in im_info".format(
                    item[0]))
        return alpha

    def _save_imgs(self, alpha, img_path):
        ori_img = cv2.imread(img_path)
        alpha = (alpha * 255).astype('uint8')

        if self.imgs_dir is not None:
            img_path = img_path.replace(self.imgs_dir, '')
        name, ext = os.path.splitext(img_path)
        if name[0] == '/':
            name = name[1:]
        alpha_save_path = os.path.join(args.save_dir, 'alpha/', name + '.png')
        clip_save_path = os.path.join(args.save_dir, 'clip/', name + '.png')

        # save alpha
        mkdir(alpha_save_path)
        cv2.imwrite(alpha_save_path, alpha)

        # save clip image
        mkdir(clip_save_path)
        alpha = alpha[:, :, np.newaxis]
        clip = np.concatenate([ori_img, alpha], axis=-1)
        cv2.imwrite(clip_save_path, clip)

예제 #18

0

파일 보기

파일: infer.py 프로젝트: PaddlePaddle/PaddleSeg

class Predictor:
    def __init__(self, args):
        """
        Prepare for prediction.
        The usage and docs of paddle inference, please refer to
        https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html
        """
        self.args = args
        self.cfg = DeployConfig(args.cfg)

        self._init_base_config()
        self._init_cpu_config()

        self.predictor = create_predictor(self.pred_cfg)

    def _init_base_config(self):
        self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        if not self.args.print_detail:
            self.pred_cfg.disable_glog_info()
        self.pred_cfg.enable_memory_optim()
        self.pred_cfg.switch_ir_optim(True)

    def _init_cpu_config(self):
        """
        Init the config for x86 cpu.
        """
        logger.info("Use CPU")
        self.pred_cfg.disable_gpu()
        if self.args.enable_mkldnn:
            logger.info("Use MKLDNN")
            # cache 10 different shapes for mkldnn
            self.pred_cfg.set_mkldnn_cache_capacity(10)
            self.pred_cfg.enable_mkldnn()
        self.pred_cfg.set_cpu_math_library_num_threads(self.args.cpu_threads)

    def run(self, imgs):
        if not isinstance(imgs, (list, tuple)):
            imgs = [imgs]

        input_names = self.predictor.get_input_names()
        input_handle = self.predictor.get_input_handle(input_names[0])
        output_names = self.predictor.get_output_names()
        output_seg_handle = self.predictor.get_output_handle(output_names[0])

        args = self.args
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)
        cut_height = 160
        num_classes = 7
        postprocessor = tusimple_processor.TusimpleProcessor(
            num_classes=num_classes,
            cut_height=cut_height,
            save_dir=args.save_dir)

        for i, im_path in enumerate(imgs):
            im = cv2.imread(im_path)
            im = im[cut_height:, :, :]
            im = im.astype('float32')
            im, _ = self.cfg.transforms(im)
            im = im[np.newaxis, ...]

            input_handle.reshape(im.shape)
            input_handle.copy_from_cpu(im)

            self.predictor.run()

            seg_results = output_seg_handle.copy_to_cpu()

            # get lane points
            seg_results = paddle.to_tensor([seg_results])
            postprocessor.predict(seg_results, im_path)
        logger.info("Finish")

예제 #19

0

파일 보기

파일: model_test.py 프로젝트: zihan987/Paddle-Inference-Demo

    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    assert (args.model_dir != "") or \
            (args.model_file != "" and args.params_file != ""), \
            "Set model path error."
    assert args.img_path != "", "Set img_path error."

    # Init config
    if args.model_dir == "":
        config = Config(args.model_file, args.params_file)
    else:
        config = Config(args.model_dir)
    config.enable_mkldnn()
    config.set_cpu_math_library_num_threads(args.threads)
    config.switch_ir_optim()
    config.enable_memory_optim()

    # Create predictor
    predictor = create_predictor(config)

    # Set input
    img = cv2.imread(args.img_path)
    img = preprocess(img)
    input_names = predictor.get_input_names()
    input_tensor = predictor.get_input_handle(input_names[0])
    input_tensor.reshape(img.shape)
    input_tensor.copy_from_cpu(img.copy())

예제 #20

0

파일 보기

class Predictor:
    def __init__(self, args):
        """
        Prepare for prediction.
        The usage and docs of paddle inference, please refer to
        https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html
        """
        self.args = args
        self.cfg = DeployConfig(args.cfg)

        self._init_base_config()

        if args.device == 'cpu':
            self._init_cpu_config()
        else:
            self._init_gpu_config()

        self.predictor = create_predictor(self.pred_cfg)

        if hasattr(args, 'benchmark') and args.benchmark:
            import auto_log
            pid = os.getpid()
            self.autolog = auto_log.AutoLogger(model_name=args.model_name,
                                               model_precision=args.precision,
                                               batch_size=args.batch_size,
                                               data_shape="dynamic",
                                               save_path=None,
                                               inference_config=self.pred_cfg,
                                               pids=pid,
                                               process_name=None,
                                               gpu_ids=0,
                                               time_keys=[
                                                   'preprocess_time',
                                                   'inference_time',
                                                   'postprocess_time'
                                               ],
                                               warmup=0,
                                               logger=logger)

    def _init_base_config(self):
        self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        if not self.args.print_detail:
            self.pred_cfg.disable_glog_info()
        self.pred_cfg.enable_memory_optim()
        self.pred_cfg.switch_ir_optim(True)

    def _init_cpu_config(self):
        """
        Init the config for x86 cpu.
        """
        logger.info("Using CPU")
        self.pred_cfg.disable_gpu()
        if self.args.enable_mkldnn:
            logger.info("Using MKLDNN")
            # cache 1- different shapes for mkldnn
            self.pred_cfg.set_mkldnn_cache_capacity(10)
            self.pred_cfg.enable_mkldnn()
        self.pred_cfg.set_cpu_math_library_num_threads(self.args.cpu_threads)

    def _init_gpu_config(self):
        """
        Init the config for nvidia gpu.
        """
        logger.info("using GPU")
        self.pred_cfg.enable_use_gpu(100, 0)
        precision_map = {
            "fp16": PrecisionType.Half,
            "fp32": PrecisionType.Float32,
            "int8": PrecisionType.Int8
        }
        precision_mode = precision_map[self.args.precision]

        if self.args.use_trt:
            logger.info("Use TRT")
            self.pred_cfg.enable_tensorrt_engine(workspace_size=1 << 30,
                                                 max_batch_size=1,
                                                 min_subgraph_size=300,
                                                 precision_mode=precision_mode,
                                                 use_static=False,
                                                 use_calib_mode=False)

            if use_auto_tune(self.args) and \
                os.path.exists(self.args.auto_tuned_shape_file):
                logger.info("Use auto tuned dynamic shape")
                allow_build_at_runtime = True
                self.pred_cfg.enable_tuned_tensorrt_dynamic_shape(
                    self.args.auto_tuned_shape_file, allow_build_at_runtime)
            else:
                logger.info("Use manual set dynamic shape")
                min_input_shape = {"x": [1, 3, 100, 100]}
                max_input_shape = {"x": [1, 3, 2000, 3000]}
                opt_input_shape = {"x": [1, 3, 512, 1024]}
                self.pred_cfg.set_trt_dynamic_shape_info(
                    min_input_shape, max_input_shape, opt_input_shape)

    def run(self, imgs, trimaps=None, imgs_dir=None):
        self.imgs_dir = imgs_dir
        num = len(imgs)
        input_names = self.predictor.get_input_names()
        input_handle = {}

        for i in range(len(input_names)):
            input_handle[input_names[i]] = self.predictor.get_input_handle(
                input_names[i])
        output_names = self.predictor.get_output_names()
        output_handle = self.predictor.get_output_handle(output_names[0])
        args = self.args

        for i in tqdm.tqdm(range(0, num, args.batch_size)):
            # warm up
            if i == 0 and args.benchmark:
                for _ in range(5):
                    img_inputs = []
                    if trimaps is not None:
                        trimap_inputs = []
                    trans_info = []
                    for j in range(i, i + args.batch_size):
                        img = imgs[i]
                        trimap = trimaps[i] if trimaps is not None else None
                        data = self._preprocess(img=img, trimap=trimap)
                        img_inputs.append(data['img'])
                        if trimaps is not None:
                            trimap_inputs.append(
                                data['trimap'][np.newaxis, :, :])
                        trans_info.append(data['trans_info'])
                    img_inputs = np.array(img_inputs)
                    if trimaps is not None:
                        trimap_inputs = (
                            np.array(trimap_inputs)).astype('float32')

                    input_handle['img'].copy_from_cpu(img_inputs)
                    if trimaps is not None:
                        input_handle['trimap'].copy_from_cpu(trimap_inputs)
                    self.predictor.run()
                    results = output_handle.copy_to_cpu()

                    results = results.squeeze(1)
                    for j in range(args.batch_size):
                        trimap = trimap_inputs[
                            j] if trimaps is not None else None
                        result = self._postprocess(results[j],
                                                   trans_info[j],
                                                   trimap=trimap)

            # inference
            if args.benchmark:
                self.autolog.times.start()

            img_inputs = []
            if trimaps is not None:
                trimap_inputs = []
            trans_info = []
            for j in range(i, i + args.batch_size):
                img = imgs[i]
                trimap = trimaps[i] if trimaps is not None else None
                data = self._preprocess(img=img, trimap=trimap)
                img_inputs.append(data['img'])
                if trimaps is not None:
                    trimap_inputs.append(data['trimap'][np.newaxis, :, :])
                trans_info.append(data['trans_info'])
            img_inputs = np.array(img_inputs)
            if trimaps is not None:
                trimap_inputs = (np.array(trimap_inputs)).astype('float32')

            input_handle['img'].copy_from_cpu(img_inputs)
            if trimaps is not None:
                input_handle['trimap'].copy_from_cpu(trimap_inputs)

            if args.benchmark:
                self.autolog.times.stamp()

            self.predictor.run()

            if args.benchmark:
                self.autolog.times.stamp()

            results = output_handle.copy_to_cpu()

            results = results.squeeze(1)
            for j in range(args.batch_size):
                trimap = trimap_inputs[j] if trimaps is not None else None
                result = self._postprocess(results[j],
                                           trans_info[j],
                                           trimap=trimap)
                self._save_imgs(result, imgs[i + j])

            if args.benchmark:
                self.autolog.times.end(stamp=True)
        logger.info("Finish")

    def _preprocess(self, img, trimap=None):
        data = {}
        data['img'] = img
        if trimap is not None:
            data['trimap'] = trimap
            data['gt_fields'] = ['trimap']
        data = self.cfg.transforms(data)
        return data

    def _postprocess(self, alpha, trans_info, trimap=None):
        """recover pred to origin shape"""
        if trimap is not None:
            trimap = trimap.squeeze(0)
            alpha[trimap == 0] = 0
            alpha[trimap == 255] = 1
        for item in trans_info[::-1]:
            if item[0] == 'resize':
                h, w = item[1][0], item[1][1]
                alpha = cv2.resize(alpha, (w, h),
                                   interpolation=cv2.INTER_LINEAR)
            elif item[0] == 'padding':
                h, w = item[1][0], item[1][1]
                alpha = alpha[:, :, 0:h, 0:w]
            else:
                raise Exception("Unexpected info '{}' in im_info".format(
                    item[0]))
        return alpha

    def _save_imgs(self, alpha, img_path):
        ori_img = cv2.imread(img_path)
        alpha = (alpha * 255).astype('uint8')

        if self.imgs_dir is not None:
            img_path = img_path.replace(self.imgs_dir, '')
        name, ext = os.path.splitext(img_path)
        if name[0] == '/':
            name = name[1:]
        alpha_save_path = os.path.join(args.save_dir, 'alpha/', name + '.png')
        clip_save_path = os.path.join(args.save_dir, 'clip/', name + '.png')

        # save alpha
        mkdir(alpha_save_path)
        cv2.imwrite(alpha_save_path, alpha)

        # save clip image
        mkdir(clip_save_path)
        alpha = alpha[:, :, np.newaxis]
        clip = np.concatenate([ori_img, alpha], axis=-1)
        cv2.imwrite(clip_save_path, clip)

예제 #21

0

파일 보기

파일: det_infer.py 프로젝트: lvjian0706/PaddleDetection

def load_predictor(model_dir,
                   run_mode='fluid',
                   batch_size=1,
                   device='CPU',
                   min_subgraph_size=3,
                   use_dynamic_shape=False,
                   trt_min_shape=1,
                   trt_max_shape=1280,
                   trt_opt_shape=640,
                   trt_calib_mode=False,
                   cpu_threads=1,
                   enable_mkldnn=False):
    """set AnalysisConfig, generate AnalysisPredictor
    Args:
        model_dir (str): root path of __model__ and __params__
        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8)
        use_dynamic_shape (bool): use dynamic shape or not
        trt_min_shape (int): min shape for dynamic shape in trt
        trt_max_shape (int): max shape for dynamic shape in trt
        trt_opt_shape (int): opt shape for dynamic shape in trt
        trt_calib_mode (bool): If the model is produced by TRT offline quantitative
            calibration, trt_calib_mode need to set True
    Returns:
        predictor (PaddlePredictor): AnalysisPredictor
    Raises:
        ValueError: predict by TensorRT need device == 'GPU'.
    """
    if device != 'GPU' and run_mode != 'fluid':
        raise ValueError(
            "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
            .format(run_mode, device))
    config = Config(os.path.join(model_dir, 'model.pdmodel'),
                    os.path.join(model_dir, 'model.pdiparams'))
    if device == 'GPU':
        # initial GPU memory(M), device ID
        config.enable_use_gpu(200, 0)
        # optimize graph and fuse op
        config.switch_ir_optim(True)
    elif device == 'XPU':
        config.enable_lite_engine()
        config.enable_xpu(10 * 1024 * 1024)
    else:
        config.disable_gpu()
        config.set_cpu_math_library_num_threads(cpu_threads)
        if enable_mkldnn:
            try:
                # cache 10 different shapes for mkldnn to avoid memory leak
                config.set_mkldnn_cache_capacity(10)
                config.enable_mkldnn()
            except Exception as e:
                print(
                    "The current environment does not support `mkldnn`, so disable mkldnn."
                )
                pass

    precision_map = {
        'trt_int8': Config.Precision.Int8,
        'trt_fp32': Config.Precision.Float32,
        'trt_fp16': Config.Precision.Half
    }
    if run_mode in precision_map.keys():
        config.enable_tensorrt_engine(workspace_size=1 << 10,
                                      max_batch_size=batch_size,
                                      min_subgraph_size=min_subgraph_size,
                                      precision_mode=precision_map[run_mode],
                                      use_static=False,
                                      use_calib_mode=trt_calib_mode)

        if use_dynamic_shape:
            min_input_shape = {
                'image': [batch_size, 3, trt_min_shape, trt_min_shape]
            }
            max_input_shape = {
                'image': [batch_size, 3, trt_max_shape, trt_max_shape]
            }
            opt_input_shape = {
                'image': [batch_size, 3, trt_opt_shape, trt_opt_shape]
            }
            config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
                                              opt_input_shape)
            print('trt set dynamic shape done!')

    # disable print log when predict
    config.disable_glog_info()
    # enable shared memory
    config.enable_memory_optim()
    # disable feed, fetch OP, needed by zero_copy_run
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)
    return predictor, config

예제 #22

0

파일 보기

class Predictor:
    def __init__(self, args):
        """
        Prepare for prediction.
        The usage and docs of paddle inference, please refer to
        https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html
        """
        self.args = args
        self.cfg = DeployConfig(args.cfg)

        self._init_base_config()

        if args.device == 'cpu':
            self._init_cpu_config()
        else:
            self._init_gpu_config()

        self.predictor = create_predictor(self.pred_cfg)

        if hasattr(args, 'benchmark') and args.benchmark:
            import auto_log
            pid = os.getpid()
            self.autolog = auto_log.AutoLogger(model_name=args.model_name,
                                               model_precision=args.precision,
                                               batch_size=args.batch_size,
                                               data_shape="dynamic",
                                               save_path=None,
                                               inference_config=self.pred_cfg,
                                               pids=pid,
                                               process_name=None,
                                               gpu_ids=0,
                                               time_keys=[
                                                   'preprocess_time',
                                                   'inference_time',
                                                   'postprocess_time'
                                               ],
                                               warmup=0,
                                               logger=logger)

    def _init_base_config(self):
        self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        if not self.args.print_detail:
            self.pred_cfg.disable_glog_info()
        self.pred_cfg.enable_memory_optim()
        self.pred_cfg.switch_ir_optim(True)

    def _init_cpu_config(self):
        """
        Init the config for x86 cpu.
        """
        logger.info("Use CPU")
        self.pred_cfg.disable_gpu()
        if self.args.enable_mkldnn:
            logger.info("Use MKLDNN")
            # cache 10 different shapes for mkldnn
            self.pred_cfg.set_mkldnn_cache_capacity(10)
            self.pred_cfg.enable_mkldnn()
        self.pred_cfg.set_cpu_math_library_num_threads(self.args.cpu_threads)

    def _init_gpu_config(self):
        """
        Init the config for nvidia gpu.
        """
        logger.info("Use GPU")
        self.pred_cfg.enable_use_gpu(100, 0)
        precision_map = {
            "fp16": PrecisionType.Half,
            "fp32": PrecisionType.Float32,
            "int8": PrecisionType.Int8
        }
        precision_mode = precision_map[self.args.precision]

        if self.args.use_trt:
            logger.info("Use TRT")
            self.pred_cfg.enable_tensorrt_engine(workspace_size=1 << 30,
                                                 max_batch_size=1,
                                                 min_subgraph_size=50,
                                                 precision_mode=precision_mode,
                                                 use_static=False,
                                                 use_calib_mode=False)

            if use_auto_tune(self.args) and \
                os.path.exists(self.args.auto_tuned_shape_file):
                logger.info("Use auto tuned dynamic shape")
                allow_build_at_runtime = True
                self.pred_cfg.enable_tuned_tensorrt_dynamic_shape(
                    self.args.auto_tuned_shape_file, allow_build_at_runtime)
            else:
                logger.info("Use manual set dynamic shape")
                min_input_shape = {"x": [1, 3, 100, 100]}
                max_input_shape = {"x": [1, 3, 2000, 3000]}
                opt_input_shape = {"x": [1, 3, 512, 1024]}
                self.pred_cfg.set_trt_dynamic_shape_info(
                    min_input_shape, max_input_shape, opt_input_shape)

    def run(self, imgs):
        if not isinstance(imgs, (list, tuple)):
            imgs = [imgs]

        num = len(imgs)
        input_names = self.predictor.get_input_names()
        input_handle = self.predictor.get_input_handle(input_names[0])
        output_names = self.predictor.get_output_names()
        output_handle = self.predictor.get_output_handle(output_names[0])
        results = []
        args = self.args

        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        for i in range(0, num, args.batch_size):
            if args.benchmark:
                self.autolog.times.start()
            data = np.array(
                [self._preprocess(img) for img in imgs[i:i + args.batch_size]])

            input_handle.reshape(data.shape)
            input_handle.copy_from_cpu(data)
            if args.benchmark:
                self.autolog.times.stamp()

            self.predictor.run()

            results = output_handle.copy_to_cpu()
            if args.benchmark:
                self.autolog.times.stamp()

            results = self._postprocess(results)

            if args.benchmark:
                self.autolog.times.end(stamp=True)
            self._save_imgs(results, imgs)

        logger.info("Finish")

    def _preprocess(self, img):
        return self.cfg.transforms(img)[0]

    def _postprocess(self, results):
        if self.args.with_argmax:
            results = np.argmax(results, axis=1)
        return results

    def _save_imgs(self, results, imgs):
        for i in range(results.shape[0]):
            result = get_pseudo_color_map(results[i])
            basename = os.path.basename(imgs[i])
            basename, _ = os.path.splitext(basename)
            basename = f'{basename}.png'
            result.save(os.path.join(self.args.save_dir, basename))