Exemple #1
0
def create_predictor(args, mode, model_path):
    model_dir = model_path
    model_file_path = model_dir + "/model"
    params_file_path = model_dir + "/params"
    assert os.path.exists(model_file_path)
    assert os.path.exists(params_file_path)
    config = AnalysisConfig(model_file_path, params_file_path)

    # use CPU
    config.disable_gpu()
    config.set_cpu_math_library_num_threads(6)
    if args['enable_mkldnn']:
        config.enable_mkldnn()

    # config.enable_memory_optim()
    config.disable_glog_info()

    if args['use_zero_copy_run']:
        config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
        config.switch_use_feed_fetch_ops(False)
    else:
        config.switch_use_feed_fetch_ops(True)

    predictor = create_paddle_predictor(config)
    input_names = predictor.get_input_names()
    for name in input_names:
        input_tensor = predictor.get_input_tensor(name)
    output_names = predictor.get_output_names()
    output_tensors = []
    for output_name in output_names:
        output_tensor = predictor.get_output_tensor(output_name)
        output_tensors.append(output_tensor)
    return predictor, input_tensor, output_tensors
Exemple #2
0
    def _set_config(self):
        """
        predictor config setting
        """
        model_file_path = os.path.join(self.pretrained_model_path, 'model')
        params_file_path = os.path.join(self.pretrained_model_path, 'params')

        config = AnalysisConfig(model_file_path, params_file_path)
        try:
            _places = os.environ["CUDA_VISIBLE_DEVICES"]
            int(_places[0])
            use_gpu = True
        except:
            use_gpu = False

        if use_gpu:
            config.enable_use_gpu(8000, 0)
        else:
            config.disable_gpu()

        config.disable_glog_info()

        # use zero copy
        config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
        config.switch_use_feed_fetch_ops(False)
        self.predictor = create_paddle_predictor(config)
        input_names = self.predictor.get_input_names()
        self.input_tensor = self.predictor.get_input_tensor(input_names[0])
        output_names = self.predictor.get_output_names()
        self.output_tensors = []
        for output_name in output_names:
            output_tensor = self.predictor.get_output_tensor(output_name)
            self.output_tensors.append(output_tensor)
Exemple #3
0
    def _set_config(self):
        """
        predictor config setting
        """
        self.model_file_path = self.default_pretrained_model_path
        cpu_config = AnalysisConfig(self.model_file_path)
        cpu_config.disable_glog_info()
        cpu_config.switch_ir_optim(True)
        cpu_config.enable_memory_optim()
        cpu_config.switch_use_feed_fetch_ops(False)
        cpu_config.switch_specify_input_names(True)
        cpu_config.disable_glog_info()
        cpu_config.disable_gpu()
        self.cpu_predictor = create_paddle_predictor(cpu_config)

        try:
            _places = os.environ["CUDA_VISIBLE_DEVICES"]
            int(_places[0])
            use_gpu = True
        except:
            use_gpu = False
        if use_gpu:
            gpu_config = AnalysisConfig(self.model_file_path)
            gpu_config.disable_glog_info()
            gpu_config.switch_ir_optim(True)
            gpu_config.enable_memory_optim()
            gpu_config.switch_use_feed_fetch_ops(False)
            gpu_config.switch_specify_input_names(True)
            gpu_config.disable_glog_info()
            gpu_config.enable_use_gpu(100, 0)
            self.gpu_predictor = create_paddle_predictor(gpu_config)
Exemple #4
0
    def load_model(self, modelpath, use_gpu):
        # 对运行位置进行配置
        if use_gpu:
            try:
                places = os.environ["CUDA_VISIBLE_DEVICES"]
                places = int(places[0])
            except Exception as e:
                print(
                    'Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".'
                    % e)
                use_gpu = False

        # 加载模型参数
        config = AnalysisConfig(modelpath)

        # 设置参数
        if use_gpu:
            config.enable_use_gpu(100, places)
        else:
            config.disable_gpu()
            config.enable_mkldnn()
        config.disable_glog_info()
        config.switch_ir_optim(True)
        config.enable_memory_optim()
        config.switch_use_feed_fetch_ops(False)
        config.switch_specify_input_names(True)

        # 通过参数加载模型预测器
        predictor = create_paddle_predictor(config)

        # 返回预测器
        return predictor
Exemple #5
0
    def _set_config(self):
        """
        predictor config setting
        """
        # encoder
        cpu_config_enc = AnalysisConfig(self.pretrained_encoder_net)
        cpu_config_enc.disable_glog_info()
        cpu_config_enc.disable_gpu()
        self.cpu_predictor_enc = create_paddle_predictor(cpu_config_enc)
        # decoder
        cpu_config_dec = AnalysisConfig(self.pretrained_decoder_net)
        cpu_config_dec.disable_glog_info()
        cpu_config_dec.disable_gpu()
        self.cpu_predictor_dec = create_paddle_predictor(cpu_config_dec)

        try:
            _places = os.environ["CUDA_VISIBLE_DEVICES"]
            int(_places[0])
            use_gpu = True
        except:
            use_gpu = False
        if use_gpu:
            # encoder
            gpu_config_enc = AnalysisConfig(self.pretrained_encoder_net)
            gpu_config_enc.disable_glog_info()
            gpu_config_enc.enable_use_gpu(memory_pool_init_size_mb=1000,
                                          device_id=0)
            self.gpu_predictor_enc = create_paddle_predictor(gpu_config_enc)
            # decoder
            gpu_config_dec = AnalysisConfig(self.pretrained_decoder_net)
            gpu_config_dec.disable_glog_info()
            gpu_config_dec.enable_use_gpu(memory_pool_init_size_mb=1000,
                                          device_id=0)
            self.gpu_predictor_dec = create_paddle_predictor(gpu_config_dec)
Exemple #6
0
 def _set_config(self):
     """
     predictor config setting
     """
     self.model_file_path = os.path.join(self.default_pretrained_model_path,
                                         '__model__')
     self.params_file_path = os.path.join(
         self.default_pretrained_model_path, '__params__')
     cpu_config = AnalysisConfig(self.model_file_path,
                                 self.params_file_path)
     cpu_config.disable_glog_info()
     cpu_config.disable_gpu()
     self.cpu_predictor = create_paddle_predictor(cpu_config)
     try:
         _places = os.environ["CUDA_VISIBLE_DEVICES"]
         int(_places[0])
         use_gpu = True
     except:
         use_gpu = False
     if use_gpu:
         gpu_config = AnalysisConfig(self.model_file_path,
                                     self.params_file_path)
         gpu_config.disable_glog_info()
         gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000,
                                   device_id=0)
         self.gpu_predictor = create_paddle_predictor(gpu_config)
Exemple #7
0
    def __init__(self, model_dir, label_id_path, vocab_path,
            gpu_id=None, gpu_mem=8000, zero_copy=True):
        self.tokenizer = ErnieTokenizer.from_pretrained(vocab_path)
        self.id_2_token = {v: k for k, v in self.tokenizer.vocab.items()}

        label_encoder = LabelEncoder(label_id_info=label_id_path, isFile=True)
        self.id_label_dict = label_encoder.id_label_dict

        # 设置AnalysisConfig
        config = AnalysisConfig(model_dir)
        if gpu_id is None:
            config.disable_gpu()
        else:
            config.enable_use_gpu(gpu_mem, gpu_id)
            logging.info("gpu id: {}".format(config.gpu_device_id()))

        self.zero_copy = zero_copy
        if self.zero_copy:
            config.switch_use_feed_fetch_ops(False)

        # 创建PaddlePredictor
        self.predictor = create_paddle_predictor(config)

        if self.zero_copy:
            input_names = self.predictor.get_input_names()
            #logging.info(input_names)
            self.input_tensor = self.predictor.get_input_tensor(input_names[0])

            output_names = self.predictor.get_output_names()
            #logging.info(output_names)
            self.output_tensor = self.predictor.get_output_tensor(output_names[0])
Exemple #8
0
def predict(args):
    # config AnalysisConfig
    config = AnalysisConfig(args.model_file, args.params_file)
    if args.gpu_id < 0:
        config.disable_gpu()
    else:
        config.enable_use_gpu(args.gpu_mem, args.gpu_id)

    # you can enable tensorrt engine if paddle is installed with tensorrt
    # config.enable_tensorrt_engine()

    predictor = create_paddle_predictor(config)

    # input
    inputs = preprocess_image(args.image_path)
    inputs = PaddleTensor(inputs)

    # predict
    outputs = predictor.run([inputs])

    # get output
    output = outputs[0]
    output = output.as_ndarray().flatten()

    cls = np.argmax(output)
    score = output[cls]
    logger.info("class: {0}".format(cls))
    logger.info("score: {0}".format(score))
    return
Exemple #9
0
    def _set_config(self):
        # predictor config setting.
        cpu_config = AnalysisConfig(self.default_pretrained_model_path)
        cpu_config.disable_glog_info()
        cpu_config.disable_gpu()
        cpu_config.switch_ir_optim(False)
        self.cpu_predictor = create_paddle_predictor(cpu_config)

        try:
            _places = os.environ["CUDA_VISIBLE_DEVICES"]
            int(_places[0])
            use_gpu = True
        except:
            use_gpu = False
        if use_gpu:
            gpu_config = AnalysisConfig(self.default_pretrained_model_path)
            gpu_config.disable_glog_info()
            gpu_config.enable_use_gpu(memory_pool_init_size_mb=500,
                                      device_id=0)
            self.gpu_predictor = create_paddle_predictor(gpu_config)

        # model config setting.
        if not self.model_config:
            with open(os.path.join(self.directory, 'config.yml')) as fp:
                self.model_config = yaml.load(fp.read(),
                                              Loader=yaml.FullLoader)

        self.multi_box_head_config = self.model_config['MultiBoxHead']
        self.output_decoder_config = self.model_config['SSDOutputDecoder']
        self.mobilenet_config = self.model_config['MobileNet']
Exemple #10
0
    def _get_analysis_config(self,
                             use_gpu=False,
                             use_trt=False,
                             use_mkldnn=False):
        '''
        Return a new object of AnalysisConfig. 
        '''
        config = AnalysisConfig(os.path.join(self.path, "model"),
                                os.path.join(self.path, "params"))
        config.disable_gpu()
        config.switch_specify_input_names(True)
        config.switch_ir_optim(True)
        config.switch_use_feed_fetch_ops(False)
        if use_gpu:
            config.enable_use_gpu(100, 0)
            if use_trt:
                config.enable_tensorrt_engine(
                    self.trt_parameters.workspace_size,
                    self.trt_parameters.max_batch_size,
                    self.trt_parameters.min_subgraph_size,
                    self.trt_parameters.precision,
                    self.trt_parameters.use_static,
                    self.trt_parameters.use_calib_mode)
        elif use_mkldnn:
            config.enable_mkldnn()

        return config
Exemple #11
0
def create_predictor(mode):

    if mode == "detect":
        model_file_path = config.det_model_dir
        params_file_path = config.det_param_dir
    else:
        model_file_path = config.rec_model_dir
        params_file_path = config.rec_param_dir

    an_config = AnalysisConfig(model_file_path, params_file_path)

    if config.use_gpu:
        an_config.enable_use_gpu(config.gpu_mem, 0)
    else:
        an_config.disable_gpu()

    an_config.disable_glog_info()

    # use zero copy
    an_config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
    an_config.switch_use_feed_fetch_ops(False)
    predictor = create_paddle_predictor(an_config)
    input_names = predictor.get_input_names()
    input_tensor = predictor.get_input_tensor(input_names[0])
    output_names = predictor.get_output_names()
    output_tensors = []
    for output_name in output_names:
        output_tensor = predictor.get_output_tensor(output_name)
        output_tensors.append(output_tensor)
    return predictor, input_tensor, output_tensors
Exemple #12
0
def create_predictor(mode):
    """
    create predictor for inference
    :param args: params for prediction engine
    :param mode: mode
    :return: predictor
    """
    if mode == "det":
        model_dir = "./src/ai/ocr_paddle/inference/ch_ppocr_mobile_v1.1_det_infer"
    elif mode == 'cls':
        model_dir = "./src/ai/ocr_paddle/inference/ch_ppocr_mobile_v1.1_cls_infer"
    elif mode == 'rec':
        model_dir = "./src/ai/ocr_paddle/inference/ch_ppocr_mobile_v1.1_rec_infer"
    else:
        raise ValueError(
            "'mode' of create_predictor() can only be one of ['det', 'cls', 'rec']"
        )

    if model_dir is None:
        logger.info("not find {} model file path {}".format(mode, model_dir))
        sys.exit(0)

    model_file_path = model_dir + "/model"
    params_file_path = model_dir + "/params"
    if not os.path.exists(model_file_path):
        logger.info("not find model file path {}".format(model_file_path))
        sys.exit(0)
    if not os.path.exists(params_file_path):
        logger.info("not find params file path {}".format(params_file_path))
        sys.exit(0)

    config = AnalysisConfig(model_file_path, params_file_path)

    config.disable_gpu()
    config.set_cpu_math_library_num_threads(6)
    enable_mkldnn = False
    if enable_mkldnn:
        # cache 10 different shapes for mkldnn to avoid memory leak
        config.set_mkldnn_cache_capacity(10)
        config.enable_mkldnn()

    # config.enable_memory_optim()
    config.disable_glog_info()
    use_zero_copy_run = False
    if use_zero_copy_run:
        config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
        config.switch_use_feed_fetch_ops(False)
    else:
        config.switch_use_feed_fetch_ops(True)

    predictor = create_paddle_predictor(config)
    input_names = predictor.get_input_names()
    for name in input_names:
        input_tensor = predictor.get_input_tensor(name)
    output_names = predictor.get_output_names()
    output_tensors = []
    for output_name in output_names:
        output_tensor = predictor.get_output_tensor(output_name)
        output_tensors.append(output_tensor)
    return predictor, input_tensor, output_tensors
Exemple #13
0
def main():
    args = parse_args()
    model_file = args.model_dir + "/__model__"
    params_file = args.model_dir + "/params"
    config = AnalysisConfig(model_file, params_file)
    config.disable_gpu()
    predictor = create_paddle_predictor(config)
    test_image(predictor, args.image_path)
Exemple #14
0
def create_predictor(args, mode, logger):
    if mode == "det":
        model_dir = args.det_model_dir
    elif mode == 'cls':
        model_dir = args.cls_model_dir
    else:
        model_dir = args.rec_model_dir

    if model_dir is None:
        logger.info("not find {} model file path {}".format(mode, model_dir))
        sys.exit(0)
    model_file_path = model_dir + "/inference.pdmodel"
    params_file_path = model_dir + "/inference.pdiparams"
    if not os.path.exists(model_file_path):
        logger.info("not find model file path {}".format(model_file_path))
        sys.exit(0)
    if not os.path.exists(params_file_path):
        logger.info("not find params file path {}".format(params_file_path))
        sys.exit(0)

    config = AnalysisConfig(model_file_path, params_file_path)

    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
        if args.use_tensorrt:
            config.enable_tensorrt_engine(
                precision_mode=AnalysisConfig.Precision.Half
                if args.use_fp16 else AnalysisConfig.Precision.Float32,
                max_batch_size=args.max_batch_size)
    else:
        config.disable_gpu()
        config.set_cpu_math_library_num_threads(6)
        if args.enable_mkldnn:
            # cache 10 different shapes for mkldnn to avoid memory leak
            config.set_mkldnn_cache_capacity(10)
            config.enable_mkldnn()

    # config.enable_memory_optim()
    config.disable_glog_info()

    if args.use_zero_copy_run:
        config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
        config.switch_use_feed_fetch_ops(False)
    else:
        config.switch_use_feed_fetch_ops(True)

    predictor = create_paddle_predictor(config)
    input_names = predictor.get_input_names()
    for name in input_names:
        input_tensor = predictor.get_input_tensor(name)
    output_names = predictor.get_output_names()
    output_tensors = []
    for output_name in output_names:
        output_tensor = predictor.get_output_tensor(output_name)
        output_tensors.append(output_tensor)
    return predictor, input_tensor, output_tensors
Exemple #15
0
    def __init__(self):
        """
        create predictor manager
        """
        self.get_predictor_timeout = float(
            config.get('get.predictor.timeout', default_value=0.5))
        predictor_count = 0
        enable_mkl = False
        gpu_memory = 200
        gpu_device_ids = []

        model_dir = config.get('model.dir')
        device_type = config.get('device.type')
        if device_type == PredictorManager.CPU_DEVICE:
            cpu_predictor_count = int(
                config.getint('cpu.predictor.count', default_value=0))
            predictor_count = cpu_predictor_count
            enable_mkl = config.getboolean('cpu.enable_mkl',
                                           default_value=False)
        elif device_type == PredictorManager.GPU_DEVICE:
            gpu_predictor_count = int(
                config.getint('gpu.predictor.count', default_value=0))
            predictor_count = gpu_predictor_count
            gpu_memory = config.getint('gpu.predictor.memory',
                                       default_value=200)
            gpu_device_ids = config.get('gpu.predictor.device.id').split(',')
            gpu_device_ids = map(int, gpu_device_ids)
            if PYTHON_VERSION == 3:
                gpu_device_ids = list(gpu_device_ids)
            assert len(
                gpu_device_ids
            ) == gpu_predictor_count, "gpu predictor count doesn't match device count"
        else:
            raise Exception("no device to run predictor!")
        assert predictor_count > 0, "no device to predict"
        logger.info(
            "device type:{} predictor count:{} model dir:{} get predictor timeout:{}s"
            .format(device_type, predictor_count, model_dir,
                    self.get_predictor_timeout))
        self.predictor_queue = Queue(maxsize=predictor_count)

        for i in range(predictor_count):
            # Set config
            predictor_config = AnalysisConfig(model_dir)
            # predictor_config.specify_input_name()
            if device_type == PredictorManager.CPU_DEVICE:
                predictor_config.disable_gpu()
                if enable_mkl:
                    predictor_config.enable_mkldnn()
            else:
                device_id = gpu_device_ids[i]
                predictor_config.enable_use_gpu(gpu_memory, device_id)

            # Create PaddlePredictor
            predictor = create_paddle_predictor(predictor_config)
            self.predictor_queue.put(predictor)
 def set_config(self, model_flie, params_file, use_feed_fetch_ops,
                specify_input_names):
     config = AnalysisConfig(model_flie, params_file)
     config.disable_gpu()
     config.enable_mkldnn()
     config.disable_glog_info()
     config.switch_ir_optim(True)
     config.switch_use_feed_fetch_ops(use_feed_fetch_ops)
     config.switch_specify_input_names(specify_input_names)
     return config
    def load_model(self, model_dir, roll_back=False):
        print("load_model==>", model_dir)
        config = AnalysisConfig(model_dir)
        #不启动cpu
        config.disable_gpu()
        #把老的模型留存
        if self.predictor and roll_back:
            self.histroy.push(self.predictor)
        #创建预测
        self.predictor = create_paddle_predictor(config)

        return self.predictor
Exemple #18
0
 def __load_inference_model(self, model_path, use_gpu):
     """
     :param meta_path:
     :return:
     """
     check_cuda(use_gpu)
     config = AnalysisConfig(model_path + "/" + "model", model_path + "/" + "params")
     if use_gpu:
         config.enable_use_gpu(1024)
     else:
         config.disable_gpu()
         config.enable_mkldnn()
     inference = create_paddle_predictor(config.to_native_config())
     return inference
Exemple #19
0
def create_predictor(args, mode):
    if mode == "det":
        model_dir = args.det_model_dir
    else:
        model_dir = args.rec_model_dir

    if model_dir is None:
        logger.info("not find {} model file path {}".format(mode, model_dir))
        sys.exit(0)
    model_file_path = model_dir + "/model"
    params_file_path = model_dir + "/params"
    if not os.path.exists(model_file_path):
        logger.info("not find model file path {}".format(model_file_path))
        sys.exit(0)
    if not os.path.exists(params_file_path):
        logger.info("not find params file path {}".format(params_file_path))
        sys.exit(0)

    config = AnalysisConfig(model_file_path, params_file_path)

    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
    else:
        config.disable_gpu()
        config.set_cpu_math_library_num_threads(6)
        if args.enable_mkldnn:
            config.enable_mkldnn()

    #config.enable_memory_optim()
    config.disable_glog_info()

    if args.use_zero_copy_run:
        config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
        config.switch_use_feed_fetch_ops(False)
    else:
        config.switch_use_feed_fetch_ops(True)

    predictor = create_paddle_predictor(config)
    input_names = predictor.get_input_names()
    for name in input_names:
        input_tensor = predictor.get_input_tensor(name)
    output_names = predictor.get_output_names()
    output_tensors = []
    for output_name in output_names:
        output_tensor = predictor.get_output_tensor(output_name)
        output_tensors.append(output_tensor)
    return predictor, input_tensor, output_tensors
Exemple #20
0
def create_predictor(args, mode):
    if mode == "det":
        model_dir = args.det_model_dir
    else:
        model_dir = args.rec_model_dir

    if model_dir is None:
        logger.info("not find {} model file path {}".format(mode, model_dir))
        sys.exit(0)
    model_file_path = model_dir + "/model"
    params_file_path = model_dir + "/params"
    if not os.path.exists(model_file_path):
        logger.info("not find model file path {}".format(model_file_path))
        sys.exit(0)
    if not os.path.exists(params_file_path):
        logger.info("not find params file path {}".format(params_file_path))
        sys.exit(0)

    config = AnalysisConfig(model_file_path, params_file_path)

    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
    else:
        config.disable_gpu()

    config.disable_glog_info()
    config.switch_ir_optim(args.ir_optim)
    #     if args.use_tensorrt:
    #         config.enable_tensorrt_engine(
    #             precision_mode=AnalysisConfig.Precision.Half
    #             if args.use_fp16 else AnalysisConfig.Precision.Float32,
    #             max_batch_size=args.batch_size)

    config.enable_memory_optim()
    # use zero copy
    config.switch_use_feed_fetch_ops(False)
    predictor = create_paddle_predictor(config)
    input_names = predictor.get_input_names()
    input_tensor = predictor.get_input_tensor(input_names[0])
    output_names = predictor.get_output_names()
    output_tensors = []
    for output_name in output_names:
        output_tensor = predictor.get_output_tensor(output_name)
        output_tensors.append(output_tensor)
    return predictor, input_tensor, output_tensors
Exemple #21
0
    def _get_analysis_config(self,
                             use_gpu=False,
                             use_trt=False,
                             use_mkldnn=False):
        '''
        Return a new object of AnalysisConfig. 
        '''
        config = AnalysisConfig(self.path)
        config.disable_gpu()
        config.switch_specify_input_names(True)
        config.switch_ir_optim(True)
        config.switch_use_feed_fetch_ops(False)
        if use_gpu:
            config.enable_use_gpu(100, 0)
            if use_trt:
                config.enable_tensorrt_engine(
                    self.trt_parameters.workspace_size,
                    self.trt_parameters.max_batch_size,
                    self.trt_parameters.min_subgraph_size,
                    self.trt_parameters.precision,
                    self.trt_parameters.use_static,
                    self.trt_parameters.use_calib_mode)
                if self.trt_parameters.use_inspector:
                    config.enable_tensorrt_inspector()
                    self.assertTrue(
                        config.tensorrt_inspector_enabled(),
                        "The inspector option is not set correctly.")

                if self.dynamic_shape_params:
                    config.set_trt_dynamic_shape_info(
                        self.dynamic_shape_params.min_input_shape,
                        self.dynamic_shape_params.max_input_shape,
                        self.dynamic_shape_params.optim_input_shape,
                        self.dynamic_shape_params.disable_trt_plugin_fp16)
                if self.enable_tensorrt_varseqlen:
                    config.enable_tensorrt_varseqlen()

        elif use_mkldnn:
            config.enable_mkldnn()
            if self.enable_mkldnn_bfloat16:
                config.enable_mkldnn_bfloat16()
        print('config summary:', config.summary())
        return config
Exemple #22
0
    def _set_config(self):
        """
        predictor config setting.
        """
        cpu_config = AnalysisConfig(self.default_pretrained_model_path)
        cpu_config.disable_glog_info()
        cpu_config.disable_gpu()
        cpu_config.switch_ir_optim(False)
        self.cpu_predictor = create_paddle_predictor(cpu_config)

        try:
            _places = os.environ["CUDA_VISIBLE_DEVICES"]
            int(_places[0])
            use_gpu = True
        except:
            use_gpu = False
        if use_gpu:
            gpu_config = AnalysisConfig(self.default_pretrained_model_path)
            gpu_config.disable_glog_info()
            gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0)
            self.gpu_predictor = create_paddle_predictor(gpu_config)
Exemple #23
0
 def set_config(self,
                model_path,
                num_threads,
                mkldnn_cache_capacity,
                warmup_data=None,
                use_analysis=False,
                enable_ptq=False):
     config = AnalysisConfig(model_path)
     config.set_cpu_math_library_num_threads(num_threads)
     if use_analysis:
         config.disable_gpu()
         config.switch_use_feed_fetch_ops(True)
         config.switch_ir_optim(True)
         config.enable_mkldnn()
         config.set_mkldnn_cache_capacity(mkldnn_cache_capacity)
         if enable_ptq:
             # This pass to work properly, must be added before fc_fuse_pass
             config.pass_builder().insert_pass(5, "fc_lstm_fuse_pass")
             config.enable_quantizer()
             config.quantizer_config().set_quant_data(warmup_data)
             config.quantizer_config().set_quant_batch_size(1)
     return config
Exemple #24
0
def create_predictor(args):
    config = AnalysisConfig(args.model_file, args.params_file)

    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
    else:
        config.disable_gpu()

    config.disable_glog_info()
    config.switch_ir_optim(args.ir_optim)  # default true
    if args.use_tensorrt:
        config.enable_tensorrt_engine(
            precision_mode=AnalysisConfig.Precision.Half
            if args.use_fp16 else AnalysisConfig.Precision.Float32,
            max_batch_size=args.batch_size)

    config.enable_memory_optim()
    # use zero copy
    config.switch_use_feed_fetch_ops(False)
    predictor = create_paddle_predictor(config)

    return predictor
    def _load_model_and_set_config(self):
        '''
        load model from file and set analysis config 
        '''
        if os.path.exists(os.path.join(self.model_path, self.params_file)):
            config = AnalysisConfig(
                os.path.join(self.model_path, "__model__"),
                os.path.join(self.model_path, self.params_file))
        else:
            config = AnalysisConfig(os.path.join(self.model_path))

        if fluid.is_compiled_with_cuda():
            config.enable_use_gpu(100, 0)
        else:
            config.disable_gpu()
        config.switch_specify_input_names(True)
        config.switch_use_feed_fetch_ops(False)
        config.enable_memory_optim()
        config.disable_glog_info()
        config.switch_ir_optim(True)

        return config
Exemple #26
0
    def _set_config(self, pretrained_model_path):
        """
        predictor config path
        """
        model_file_path = os.path.join(pretrained_model_path, 'model')
        params_file_path = os.path.join(pretrained_model_path, 'params')

        config = AnalysisConfig(model_file_path, params_file_path)
        try:
            _places = os.environ["CUDA_VISIBLE_DEVICES"]
            int(_places[0])
            use_gpu = True
        except:
            use_gpu = False

        if use_gpu:
            config.enable_use_gpu(8000, 0)
        else:
            config.disable_gpu()
            if self.enable_mkldnn:
                # cache 10 different shapes for mkldnn to avoid memory leak
                config.set_mkldnn_cache_capacity(10)
                config.enable_mkldnn()

        config.disable_glog_info()
        config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
        config.switch_use_feed_fetch_ops(False)

        predictor = create_paddle_predictor(config)

        input_names = predictor.get_input_names()
        input_tensor = predictor.get_input_tensor(input_names[0])
        output_names = predictor.get_output_names()
        output_tensors = []
        for output_name in output_names:
            output_tensor = predictor.get_output_tensor(output_name)
            output_tensors.append(output_tensor)

        return predictor, input_tensor, output_tensors
Exemple #27
0
    def _set_config(self):
        """
        predictor config setting
        """
        model_file_path = os.path.join(self.infer_model_path, 'model')
        params_file_path = os.path.join(self.infer_model_path, 'params')

        config = AnalysisConfig(model_file_path, params_file_path)
        try:
            _places = os.environ["CUDA_VISIBLE_DEVICES"]
            int(_places[0])
            use_gpu = True
        except:
            use_gpu = False

        if use_gpu:
            config.enable_use_gpu(8000, 0)
        else:
            config.disable_gpu()

        config.disable_glog_info()

        self.predictor = create_paddle_predictor(config)
Exemple #28
0
def main():
    """Predictor main"""
    args = parse_args()

    config = AnalysisConfig(args.model_files_path)
    config.disable_gpu()
    config.enable_profile()
    # config.enable_mkldnn()
    config.set_cpu_math_library_num_threads(args.cpu_num)

    predictor = create_paddle_predictor(config)
    tdm_model = TdmInferNet(args)
    first_layer_node = tdm_model.first_layer_node
    first_layer_nums = len(first_layer_node)
    first_layer_node = np.array(first_layer_node)
    first_layer_node = first_layer_node.reshape((1, -1)).astype('int64')
    first_layer_node = first_layer_node.repeat(args.batch_size, axis=0)
    first_layer_mask = (np.zeros(
        (args.batch_size, first_layer_nums))).astype('int64')

    file_list = [
        str(args.test_files_path) + "/%s" % x
        for x in os.listdir(args.test_files_path)
    ]
    test_reader = TDMDataset().infer_reader(file_list, args.batch_size)

    for batch_id, data in enumerate(test_reader()):
        input_emb = data2tensor(data)

        inputs = tdm_input(input_emb, first_layer_node, first_layer_mask)
        outputs = predictor.run(inputs)
        output = outputs[0]
        output_data = output.as_ndarray()

        logger.info("TEST --> batch: {} infer_item {}".format(
            batch_id, output_data))
    def _load_model_and_set_config(self):
        '''
        load model from file and set analysis config 
        '''
        if os.path.exists(os.path.join(self.model_path, self.params_file)):
            config = AnalysisConfig(
                os.path.join(self.model_path, self.model_file),
                os.path.join(self.model_path, self.params_file))
        else:
            config = AnalysisConfig(os.path.join(self.model_path))

        if fluid.is_compiled_with_cuda():
            config.enable_use_gpu(100, 0)
        else:
            config.disable_gpu()
        config.switch_specify_input_names(True)
        config.switch_use_feed_fetch_ops(False)
        config.enable_memory_optim()
        config.disable_glog_info()
        # TODO: set it to True after PaddleInference fix the precision error
        # in CUDA11
        config.switch_ir_optim(False)

        return config
def main(args):
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    reader = ClassifyReader(
        vocab_path=args.vocab_path,
        label_map_config=args.label_map_config,
        max_seq_len=args.max_seq_len,
        do_lower_case=args.do_lower_case,
        in_tokens=False,
        is_inference=True)

    predict_prog = fluid.Program()
    predict_startup = fluid.Program()
    with fluid.program_guard(predict_prog, predict_startup):
        with fluid.unique_name.guard():
            predict_pyreader, probs, feed_target_names = create_model(
                args,
                pyreader_name='predict_reader',
                ernie_config=ernie_config,
                is_classify=True,
                is_prediction=True)

    predict_prog = predict_prog.clone(for_test=True)

    if args.use_cuda:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(predict_startup)

    if args.init_checkpoint:
        init_pretraining_params(exe, args.init_checkpoint, predict_prog)
    else:
        raise ValueError("args 'init_checkpoint' should be set for prediction!")

    assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction"
    _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/'))
    dir_name = ckpt_dir + '_inference_model'
    model_path = os.path.join(args.save_inference_model_path, dir_name)
    log.info("save inference model to %s" % model_path)
    fluid.io.save_inference_model(
        model_path,
        feed_target_names, [probs],
        exe,
        main_program=predict_prog)

    # Set config
    #config = AnalysisConfig(args.model_dir)
    #config = AnalysisConfig(os.path.join(model_path, "__model__"), os.path.join(model_path, ""))
    config = AnalysisConfig(model_path)
    if not args.use_cuda:
        log.info("disable gpu")
        config.disable_gpu()
        config.switch_ir_optim(True) 
    else:
        log.info("using gpu")
        config.enable_use_gpu(1024)

    # Create PaddlePredictor
    predictor = create_paddle_predictor(config)

    predict_data_generator = reader.data_generator(
        input_file=args.predict_set,
        batch_size=args.batch_size,
        epoch=1,
        shuffle=False)

    log.info("-------------- prediction results --------------")
    np.set_printoptions(precision=4, suppress=True)
    index = 0
    total_time = 0
    for sample in predict_data_generator():
        src_ids    = sample[0]
        sent_ids   = sample[1]
        pos_ids    = sample[2]
        task_ids   = sample[3]
        input_mask = sample[4]

        inputs = [array2tensor(ndarray) for ndarray in [src_ids, sent_ids, pos_ids, input_mask]]
        begin_time = time.time()
        outputs = predictor.run(inputs)
        end_time = time.time()
        total_time += end_time - begin_time

        # parse outputs
        output = outputs[0]
        batch_result  = output.as_ndarray()
        for single_example_probs in batch_result:
            print('\t'.join(map(str, single_example_probs.tolist())))
            index += 1
    log.info("qps:{}\ttotal_time:{}\ttotal_example:{}\tbatch_size:{}".format(index/total_time, total_time, index, args.batch_size))