Beispiel #1
0
    def transpile(self, program, place, scope=None):
        '''
        Transpile the program desc and cast the weights to float16 data type to
        enable float16 inference.

        Since the operator in a program desc will automatically choose the
        right compute kernel to run based on the data type of the input tensor.
        We actually don't need to change the program desc to run in float16 mode.

        However, in this way, users who are used to feeding and fetching tensors 
        of float32 data type when running typical inference may find it confusing
        and difficult to run inference in float16 mode as they need to convert
        input data to float16 dtype and then convert the results back to float32 
        dtype to match the rest of code.

        So this function appends cast ops to the program desc where necessary so 
        that users are able to run inference in float16 mode while providing input 
        tensor (feed_holder) of float data type and obtaining output tensor 
        (fetch_holder) of float data type. 

        Moreover, it is desired that when we have the scope and program desc to run
        inference in float32 mode, we can use a single API to do the necessary 
        modification and then user can run float16 inference on the fly. To make 
        this happen, this function also create new parameters in the scope to have the 
        converted float16 weights and change the operators in program desc to use 
        these new parameters.

        :param program: program to transpile 
        :type program: Program
        :param place: inference place 
        :type place: Place
        :param scope: inference scope 
        :type scope: Scope         
        '''
        if not isinstance(program, Program):
            raise TypeError("program should be as Program type")
        if not isinstance(place, core.CPUPlace) and not isinstance(
                place, core.CUDAPlace):
            raise TypeError("place should be as CPUPlace/CUDAPlace type")
        if scope is None:
            scope = global_scope()
        if not isinstance(scope, core.Scope):
            raise TypeError("scope should be as Scope type or None")

        self.scope = scope
        self.place = place
        self.block = program.block(0)
        self.input_map = {}  # store the input names should be adjusted 

        self._modify_feed_fetch()
        self._convert_param_to_float16()
        self._adjust_input(skip=True)
        self._remove_unused_var()

        # TODO(luotao): use clone() method to flush the program.desc in force, 
        # since some large program.desc will not be flushed immediately. 
        # And a better solution will be considered later.
        program = program.clone()
Beispiel #2
0
    def transpile(self, program, place, scope=None):
        '''
        Transpile the program desc and cast the weights to float16 data type to
        enable float16 inference.

        Since the operator in a program desc will automatically choose the
        right compute kernel to run based on the data type of the input tensor.
        We actually don't need to change the program desc to run in float16 mode.

        However, in this way, users who are used to feeding and fetching tensors 
        of float32 data type when running typical inference may find it confusing
        and difficult to run inference in float16 mode as they need to convert
        input data to float16 dtype and then convert the results back to float32 
        dtype to match the rest of code.

        So this function appends cast ops to the program desc where necessary so 
        that users are able to run inference in float16 mode while providing input 
        tensor (feed_holder) of float data type and obtaining output tensor 
        (fetch_holder) of float data type. 

        Moreover, it is desired that when we have the scope and program desc to run
        inference in float32 mode, we can use a single API to do the necessary 
        modification and then user can run float16 inference on the fly. To make 
        this happen, this function also create new parameters in the scope to have the 
        converted float16 weights and change the operators in program desc to use 
        these new parameters.

        :param program: program to transpile 
        :type program: Program
        :param place: inference place 
        :type place: Place
        :param scope: inference scope 
        :type scope: Scope         
        '''
        if not isinstance(program, Program):
            raise TypeError("program should be as Program type")
        if not isinstance(place, core.CPUPlace) and not isinstance(
                place, core.CUDAPlace):
            raise TypeError("place should be as CPUPlace/CUDAPlace type")
        if scope is None:
            scope = global_scope()
        if not isinstance(scope, core.Scope):
            raise TypeError("scope should be as Scope type or None")

        self.scope = scope
        self.place = place
        self.block = program.block(0)
        self.input_map = {}  # store the input names should be adjusted 

        self._modify_feed_fetch()
        self._convert_param_to_float16()
        self._adjust_input(skip=True)
        self._remove_unused_var()

        # TODO(luotao): use clone() method to flush the program.desc in force, 
        # since some large program.desc will not be flushed immediately. 
        # And a better solution will be considered later.
        program = program.clone()
 def sample_data(self):
     '''
     Sampling the tensor data of variable.
     '''
     for i in self.sampling_program.list_vars():
         if i.name in self.sampling_vars:
             np_data = np.array(global_scope().find_var(i.name).get_tensor())
             if i.name not in self._sampling_data:
                 self._sampling_data[i.name] = []
             self._sampling_data[i.name].append(np_data)
Beispiel #4
0
 def _set_var(var, ndarray):
     t = global_scope().find_var(var.name).get_tensor()
     p = t._place()
     if p.is_cpu_place():
         place = core.CPUPlace()
     elif p.is_cuda_pinned_place():
         place = core.CUDAPinnedPlace()
     else:
         p = core.Place()
         p.set_place(t._place())
         place = core.CUDAPlace(p.gpu_device_id())
     t.set(ndarray, place)
Beispiel #5
0
    def _load_optimizer(self, state):
        prog = self._progs.get('train', None)
        optim = list(filter(is_belong_to_optimizer, prog.list_vars()))
        if not optim:
            return

        fluid.core._create_loaded_parameter(
            optim, global_scope(), self._executor._default_executor)

        for var in optim:
            assert var.name in state, \
                "variable [{}] is not in optimizer state file".format(var.name)
            self._set_var(var, state[var.name])
Beispiel #6
0
    def _get_parameter(self, name, scope=None):
        if scope is None:
            scope = global_scope()

        p_t = scope.find_var(name).get_tensor()

        master_name = self._used_master_weights.get(name)
        if master_name is not None:
            master_p_t = scope.find_var(master_name).get_tensor()
            assert master_p_t._dtype() != p_t._dtype()
            assert master_p_t.shape() == p_t.shape()
        else:
            master_p_t = None
        return p_t, master_p_t
Beispiel #7
0
    def load(self, param_state_pairs, optim_state):
        if self._executor is None:
            executor = fluid.Executor(fluid.CPUPlace())._default_executor
        else:
            executor = self._executor._default_executor

        # restore parameter states
        fluid.core._create_loaded_parameter(
            [param for param, state in param_state_pairs], global_scope(),
            executor)
        for param, state in param_state_pairs:
            self._set_var(param, state)

        # restore optimizer states
        # FIXME what if a different optimizer is used?
        if not self.model._optimizer or not optim_state:
            return
        self._load_optimizer(optim_state, executor)
    def _get_parameter(self, name, scope=None):
        if scope is None:
            scope = global_scope()

        master_param = self._param_to_master_param.get(name)
        assert master_param is not None

        master_param_t = scope.find_var(master_param).get_tensor()
        assert master_param_t._dtype() == core.VarDesc.VarType.FP32

        param_t = scope.find_var(name).get_tensor()
        if param_t._dtype() == core.VarDesc.VarType.FP32:
            assert param_t._ptr() == master_param_t._ptr()
            return param_t, None
        else:
            assert param_t._dtype() == core.VarDesc.VarType.FP16
            assert param_t.shape() == master_param_t.shape()
            return param_t, master_param_t
Beispiel #9
0
def _serialize_persistables(program, executor):
    """
    Serialize parameters using given program and executor.
    """
    vars_ = list(filter(is_persistable, program.list_vars()))
    # warn if no variable found in model
    if len(vars_) == 0:
        warnings.warn("no variable in your model, please ensure there are any "
                      "variables in your model to save")
        return None
    # create a new program and clone persitable vars to it
    save_program = Program()
    save_block = save_program.global_block()
    save_var_map = {}
    for var in vars_:
        if var.type != core.VarDesc.VarType.RAW:
            var_copy = _clone_var_in_block(save_block, var)
            save_var_map[var_copy.name] = var

    # create in_vars and out_var, then append a save_combine op to save_program
    in_vars = []
    for name in sorted(save_var_map.keys()):
        in_vars.append(save_var_map[name])

    out_var_name = unique_name.generate("out_var")
    out_var = save_block.create_var(type=core.VarDesc.VarType.RAW,
                                    name=out_var_name)
    out_var.desc.set_persistable(True)
    save_block.append_op(type='save_combine',
                         inputs={'X': in_vars},
                         outputs={'Y': out_var},
                         attrs={
                             'file_path': '',
                             'save_to_memory': True
                         })
    # run save_program to save vars
    # NOTE(zhiqiu): save op will add variable kLookupTablePath to save_program.desc,
    # which leads to diff between save_program and its desc. Call _sync_with_cpp
    # to keep consistency.
    save_program._sync_with_cpp()
    executor.run(save_program)
    # return serialized bytes in out_var
    return global_scope().find_var(out_var_name).get_bytes()
Beispiel #10
0
    def load(self, path):
        def _load(path):
            if not os.path.exists(path):
                return
            with open(path, 'rb') as f:
                return pickle.load(f)

        param_path = path + ".pdparams"
        param_state = _load(param_path)
        assert param_state, "failed to load parameters, please check path"

        if self._executor is None:
            executor = fluid.Executor(fluid.CPUPlace())._default_executor
        else:
            executor = self._executor._default_executor

        fluid.core._create_loaded_parameter(
            list(self.model.state_dict().values()), global_scope(), executor)

        for key, var in self.model.state_dict().items():
            assert key in param_state, \
                "parameter [{}] is not found in model file [{}]".format(
                    key, param_path)
            self._set_var(var, param_state[key])

        # FIXME what if a different optimizer is used?
        if not self.model._optimizer:
            return
        optim_path = path + ".pdopt"
        optim_state = _load(optim_path)
        if optim_state is None:
            return
        assert '__static_graph_only__' in optim_state, \
            "optimizer saved in dygraph mode is not usable in static graph"

        if self._executor is not None:
           self._load_optimizer(optim_state)
        else:
           self._lazy_load_optimizer = optim_state
Beispiel #11
0
    def _initialize(self, mode):
        # Get the current content from the distributed context
        self._serial_main_progs[mode] = self._dist_contexts[
            mode].serial_main_program
        self._serial_startup_progs[mode] = self._dist_contexts[
            mode].serial_startup_program
        self._dist_main_progs[mode] = self._dist_contexts[
            mode].dist_main_programs
        self._dist_startup_progs[mode] = self._dist_contexts[
            mode].dist_startup_programs
        self._feed_vars[mode] = self._dist_contexts[mode].serial_feed_vars
        self._fetch_vars[mode] = self._dist_contexts[mode].serial_fetch_vars

        if self._nranks > 1:
            # Traverse different rank programs and traverse each op of them,
            # instantiate communication by process_mapping.
            all_process_groups = get_all_process_groups()
            for process_group in all_process_groups:
                if self._cur_rank not in process_group.ranks:
                    continue
                process_group.instantiate()

        # initialize
        self._place = _get_device()
        if isinstance(self._place, fluid.CUDAPlace):
            self._place = fluid.CUDAPlace(ParallelEnv().dev_id)
        if self._executor is None:
            self._executor = paddle.static.Executor(self._place)
            uninitialized = []
            dist_startup_prog = self._dist_startup_progs[mode][self._cur_rank]
            for var in dist_startup_prog.list_vars():
                scope_var = global_scope().find_var(var.name)
                if scope_var and scope_var.get_tensor()._is_initialized():
                    continue
                uninitialized.append(var)
            if uninitialized:
                prune_startup_prog = dist_startup_prog._prune(uninitialized)
                self._executor.run(prune_startup_prog)
Beispiel #12
0
    def convert_to_int8(self, program, place, scope=None):
        scope = global_scope() if scope is None else scope
        program = default_main_program() if program is None else program

        def _load_var(name):
            return np.array(scope.find_var(name).get_tensor())

        global_block = program.global_block()

        def convert_to_int8(var):
            int8_var_name = var.name + ".int8"
            int8_var = global_block.create_parameter(
                name=int8_var_name.encode('ascii'),
                type=var.type,
                dtype=core.VarDesc.VarType.INT8,
                shape=var.shape)

            tensor = _load_var(var.name)

            scope.var(int8_var_name)
            int8_tensor = scope.find_var(int8_var_name).get_tensor()
            int8_tensor.set(tensor.astype(np.int8), place)
            return int8_var

        input_map = {}
        for block in program.blocks:
            for op in list(block.ops):
                if op.type in _QUANTIZABLE_OP_TYPES:
                    for name in op.input_arg_names:
                        var = block.var(name)
                        if var.persistable:
                            if name not in input_map:
                                int8_var = convert_to_int8(var)
                                input_map[name] = int8_var.name
                            op._rename_input(name, input_map[name])
        self._remove_unused_var(program)
Beispiel #13
0
    def set_state_dict(self,
                       state_dict,
                       include_sublayers=True,
                       use_structured_name=True):
        '''
        Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict

        Parameters:
            state_dict(dict) : Dict contains all the parameters and persistable buffers.
            include_sublayers(bool, optional) : If true, also include the parameters and peresistable buffers from sublayers. Default: True
            use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key. 
                                                  Default: True
        Returns:
            None

        Examples:
            .. code-block:: python

                import paddle
                
                paddle.disable_static()
                
                emb = paddle.nn.Embedding([10, 10])

                state_dict = emb.state_dict()
                paddle.save(state_dict, "paddle_dy")
                
                para_state_dict, _ = paddle.load("paddle_dy")

                emb.set_state_dict(para_state_dict)

        '''
        def _check_match(key, param):
            state = state_dict.get(key, None)
            if state is None:
                raise ValueError(
                    "{} is not found in the provided dict.".format(key))
            if list(state.shape) != list(param.shape):
                raise ValueError(
                    "{} receives a shape {}, but the expected shape is {}.".
                    format(key, list(state.shape), list(param.shape)))
            return param, state

        matched_param_state = []
        for key, param in self.state_dict().items():
            key_name = key if use_structured_name else param.name
            try:
                match_res = _check_match(key_name, param)
                matched_param_state.append(match_res)
            except ValueError as err:
                warnings.warn(("Skip loading for {}. ".format(key) + str(err)))

        if in_dygraph_mode():
            for param, state in matched_param_state:
                param.set_value(state)
        else:

            def _set_var(var, ndarray):
                t = global_scope().find_var(var.name).get_tensor()
                p = t._place()
                if p.is_cpu_place():
                    place = core.CPUPlace()
                elif p.is_cuda_pinned_place():
                    place = core.CUDAPinnedPlace()
                else:
                    p = core.Place()
                    p.set_place(t._place())
                    place = core.CUDAPlace(p.gpu_device_id())
                t.set(ndarray, place)

            executor = Executor(_get_device())._default_executor
            # restore parameter states
            core._create_loaded_parameter(
                [param for param, state in matched_param_state],
                global_scope(), executor)
            for param, state in matched_param_state:
                _set_var(param, state)
Beispiel #14
0
def train_loop(args, train_program, reader, py_reader, loss, trainer_id):
    train_reader = paddle.batch(
        paddle.reader.shuffle(
            reader.train((args.with_hs or (not args.with_nce))),
            buf_size=args.batch_size * 100),
        batch_size=args.batch_size)

    py_reader.decorate_paddle_reader(train_reader)

    place = fluid.CPUPlace()

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    exec_strategy = fluid.ExecutionStrategy()

    print("CPU_NUM:" + str(os.getenv("CPU_NUM")))
    exec_strategy.num_threads = int(os.getenv("CPU_NUM"))

    build_strategy = fluid.BuildStrategy()
    if int(os.getenv("CPU_NUM")) > 1:
        build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce

    train_exe = fluid.ParallelExecutor(
        use_cuda=False,
        loss_name=loss.name,
        main_program=train_program,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    profile_state = "CPU"
    profiler_step = 0
    profiler_step_start = 20
    profiler_step_end = 30

    for pass_id in range(args.num_passes):
        epoch_start = time.time()
        py_reader.start()
        batch_id = 0
        start = time.clock()

        try:
            while True:

                if profiler_step == profiler_step_start:
                    fluid.profiler.start_profiler(profile_state)

                loss_val = train_exe.run(fetch_list=[loss.name])
                loss_val = np.mean(loss_val)

                if profiler_step == profiler_step_end:
                    fluid.profiler.stop_profiler('total', 'trainer_profile.log')
                    profiler_step += 1
                else:
                    profiler_step += 1

                if batch_id % 50 == 0:
                    logger.info(
                        "TRAIN --> pass: {} batch: {} loss: {} reader queue:{}".
                        format(pass_id, batch_id,
                               loss_val.mean() / args.batch_size,
                               py_reader.queue.size()))
                if args.with_speed:
                    if batch_id % 1000 == 0 and batch_id != 0:
                        elapsed = (time.clock() - start)
                        start = time.clock()
                        samples = 1001 * args.batch_size * int(
                            os.getenv("CPU_NUM"))
                        logger.info("Time used: {}, Samples/Sec: {}".format(
                            elapsed, samples / elapsed))
                # calculate infer result each 100 batches when using --with_infer_test
                if args.with_infer_test:
                    if batch_id % 1000 == 0 and batch_id != 0:
                        model_dir = args.model_output_dir + '/batch-' + str(
                            batch_id)
                        inference_test(global_scope(), model_dir, args)

                if batch_id % 500000 == 0 and batch_id != 0:
                    model_dir = args.model_output_dir + '/batch-' + str(
                        batch_id)
                    fluid.io.save_persistables(executor=exe, dirname=model_dir)
                    with open(model_dir + "/_success", 'w+') as f:
                        f.write(str(batch_id))
                batch_id += 1

        except fluid.core.EOFException:
            py_reader.reset()
            epoch_end = time.time()
            logger.info("Epoch: {0}, Train total expend: {1} ".format(
                pass_id, epoch_end - epoch_start))

            model_dir = args.model_output_dir + '/pass-' + str(pass_id)
            if trainer_id == 0:
                fluid.io.save_persistables(executor=exe, dirname=model_dir)
                with open(model_dir + "/_success", 'w+') as f:
                    f.write(str(pass_id))
Beispiel #15
0
 def get_tensor(var_name):
     t = global_scope().find_var(var_name).get_tensor()
     return numpy.array(t)
Beispiel #16
0
    def save_quantized_model(self,
                             model,
                             path,
                             input_spec=None,
                             onnx_format=False,
                             **config):
        """
        Save the quantized model for the inference.

        Args:
            model (Layer): The model to be saved.
            path (str): The path prefix to save model. The format is 
                ``dirname/file_prefix`` or ``file_prefix``.
            input_spec (list[InputSpec|Tensor], optional): Describes the input
                of the saved model's forward method, which can be described by
                InputSpec or example Tensor. If None, all input variables of 
                the original Layer's forward method would be the inputs of
                the saved model. Default None.
            onnx_format (bool, optional): Whether to export the quantized model 
                with format of ONNX. Default is False.
            **configs (dict, optional): Other save configuration options for
                compatibility. We do not recommend using these configurations,
                they may be removed in the future. If not necessary, DO NOT use
                them. Default None.
                The following options are currently supported:
                (1) output_spec (list[Tensor]): Selects the output targets of
                the saved model. By default, all return variables of original
                Layer's forward method are kept as the output of the saved model.
                If the provided ``output_spec`` list is not all output variables, 
                the saved model will be pruned according to the given
                ``output_spec`` list. 

        Returns:
            None
        """
        assert isinstance(model, dygraph.Layer), \
            "The model must be the instance of dygraph.Layer."

        paddle.jit.save(layer=model,
                        path=path,
                        input_spec=input_spec,
                        **config)

        is_dynamic_mode = False
        if paddle.in_dynamic_mode():
            is_dynamic_mode = True
            paddle.enable_static()

        place = core.CPUPlace()
        scope = global_scope()
        exe = Executor(place)

        dirname = os.path.dirname(path)
        basename = os.path.basename(path)
        model_filename = basename + INFER_MODEL_SUFFIX
        params_filename = basename + INFER_PARAMS_SUFFIX

        [infer_program, feed_target_names, fetch_targets
         ] = (load_inference_model(dirname=dirname,
                                   executor=exe,
                                   model_filename=model_filename,
                                   params_filename=params_filename))

        self._gather_scales(infer_program, scope, fetch_targets)

        # Remove `moving_average_abs_max_scale` node in sub graphs.
        graph = IrGraph(core.Graph(infer_program.desc), for_test=False)
        for sub_graph in graph.all_sub_graphs():
            for _op in sub_graph.all_op_nodes():
                if _op.name() == "moving_average_abs_max_scale":
                    sub_graph.safe_remove_nodes(_op)
            sub_graph.resolve_hazard()
        infer_program = graph.to_program()

        self._set_skip_quant_attr(infer_program)

        clip_extra = False
        if onnx_format:
            graph = IrGraph(core.Graph(infer_program.desc), for_test=False)
            transform_pass = ReplaceFakeQuantDequantPass(scope, place)
            transform_pass.apply(graph)

            quant_weight_pass = QuantWeightPass(scope, place)
            quant_weight_pass.apply(graph)
            infer_program = graph.to_program()

            clip_extra = True

        save_inference_model(dirname=dirname,
                             feeded_var_names=feed_target_names,
                             target_vars=fetch_targets,
                             executor=exe,
                             main_program=infer_program.clone(),
                             model_filename=model_filename,
                             params_filename=params_filename,
                             clip_extra=clip_extra)

        if is_dynamic_mode:
            paddle.disable_static()
Beispiel #17
0
def create_quant_model(model,
                       params,
                       activation_quantize_type='moving_average_abs_max',
                       weight_quantize_type='channel_wise_abs_max',
                       save=False):
    place = paddle.CUDAPlace(0)
    scope = global_scope()
    exe = paddle.static.Executor(place)
    [inference_program, feed_target_names, fetch_targets
     ] = paddle.static.load_inference_model(path_prefix=None,
                                            executor=exe,
                                            model_filename=model,
                                            params_filename=params)
    graph = IrGraph(core.Graph(inference_program.desc), for_test=True)

    out_scale_op_list = [
        "conv2d",
        "depthwise_conv2d",
        "mul",
        "matmul",
        "relu",
        "leaky_relu",
        "relu6",
        "sigmoid",
        "tanh",
        "prelu",
        "swish",
        "softmax",
        "batch_norm",
        "layer_norm",
        "elementwise_add",
        "pool2d",
        "reshape2",
        "transpose2",
        "concat",
        "elementwise_mul",
        "scale",
        "slice",
        "hard_swish",
        "hard_sigmoid",
        "conv2d_transpose",
        "gru",
        "bilinear_interp",
        "nearest_interp",
        "trilinear_interp",
        "flatten",
        "flatten2",
        "transpose",
        "pad2d",
        "reshape",
        "layer_norm",
    ]
    op_real_in_out_name = {
        "conv2d": [["Input", "Filter"], ["Output"]],
        "depthwise_conv2d": [["Input", "Filter"], ["Output"]],
        "conv2d_transpose": [["Input", "Filter"], ["Output"]],
        "mul": [["X", "Y"], ["Out"]],
        "matmul": [["X", "Y"], ["Out"]],
        "pool2d": [["X"], ["Out"]],
        "elementwise_add": [["X", "Y"], ["Out"]],
        "concat": [["X"], ["Out"]],
        "softmax": [["X"], ["Out"]],
        "argmax": [["X"], ["Out"]],
        "transpose": [["X"], ["Out"]],
        "equal": [["X", "Y"], ["Out"]],
        "gather": [["X"], ["Out"]],
        "greater_equal": [["X", "Y"], ["Out"]],
        "greater_than": [["X", "Y"], ["Out"]],
        "less_equal": [["X", "Y"], ["Out"]],
        "less_than": [["X", "Y"], ["Out"]],
        "mean": [["X"], ["Out"]],
        "not_equal": [["X", "Y"], ["Out"]],
        "reshape": [["X"], ["Out"]],
        "reshape2": [["X"], ["Out"]],
        "transpose2": [["X"], ["Out"]],
        "bilinear_interp": [["X"], ["Out"]],
        "nearest_interp": [["X"], ["Out"]],
        "trilinear_interp": [["X"], ["Out"]],
        "slice": [["Input"], ["Out"]],
        "squeeze": [["X"], ["Out"]],
        "elementwise_sub": [["X", "Y"], ["Out"]],
        "relu": [["X"], ["Out"]],
        "relu6": [["X"], ["Out"]],
        "leaky_relu": [["X"], ["Out"]],
        "prelu": [["X"], ["Out"]],
        "tanh": [["X"], ["Out"]],
        "swish": [["X"], ["Out"]],
        "dropout": [["X"], ["Out"]],
        "batch_norm": [["X"], ["Y"]],
        "layer_norm": [["X"], ["Y"]],
        "sigmoid": [["X"], ["Out"]],
        "elementwise_mul": [["X", "Y"], ["Out"]],
        "scale": [["X"], ["Out"]],
        "hard_swish": [["X"], ["Out"]],
        "hard_sigmoid": [["X"], ["Out"]],
        "gru": [["Input", "Weight"], ["Hidden"]],
        "lstm": [["Input", "Weight"], ["Hidden"]],
        "pad2d": [["X"], ["Out"]],
        "flatten": [["X"], ["Out"]],
        "flatten2": [["X"], ["Out"]],
    }

    def _get_op_output_var_names(op):
        """ """
        assert isinstance(op, (IrNode, Operator)), \
            "The input op should be IrNode or Operator."
        var_names = []
        op_name = op.name() if isinstance(op, IrNode) \
            else op.type
        if op_name not in op_real_in_out_name:
            return []

        name_list = op_real_in_out_name[op_name][1]
        for name in name_list:
            var_name = op.output(name)
            if isinstance(var_name, list):
                var_names.extend(var_name)
            else:
                var_names.append(var_name)
        return var_names

    transform_pass = QuantizationTransformPass(
        scope=scope,
        place=place,
        activation_quantize_type=activation_quantize_type,
        weight_quantize_type=weight_quantize_type)
    transform_pass.apply(graph)

    op_nodes = graph.all_op_nodes()
    for op_node in op_nodes:
        if op_node.name() in out_scale_op_list:
            var_names = _get_op_output_var_names(op_node)
            for var_name in var_names:
                in_node = graph._find_node_by_name(op_node.outputs, var_name)
                if in_node.dtype() not in \
                    [core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32]:
                    continue

                op_node.op()._set_attr("out_threshold", 3.0)

    # Freeze graph for inference, but the weight of fc/conv is still float type.
    freeze_pass = QuantizationFreezePass(
        scope=scope, place=place, weight_quantize_type=weight_quantize_type)
    freeze_pass.apply(graph)

    main_program = graph.to_program()

    # modify fake_quantize_moving_average_abs_max(InScale) and fake_channel_wise_dequantize_max_abs(Scales)
    op_nodes = graph.all_op_nodes()
    for op_node in op_nodes:
        if op_node.name() == 'fake_quantize_moving_average_abs_max':
            var_name = op_node.input("InScale")[0]
            tensor = scope.var(var_name).get_tensor()
            tensor.set(np.array([1], dtype=np.float32), place)
        elif op_node.name() == 'fake_channel_wise_dequantize_max_abs':
            var_name = op_node.input("Scales")[0]
            tensor = scope.var(var_name).get_tensor()
            tensor.set(np.ones(tensor.shape(), dtype=np.float32), place)

    if save:
        fluid.io.save_inference_model('test_inference_model',
                                      feed_target_names,
                                      fetch_targets,
                                      exe,
                                      main_program=main_program)

    feed_vars = [
        main_program.global_block().var(name) for name in feed_target_names
    ]
    serialized_program = paddle.static.serialize_program(feed_vars,
                                                         fetch_targets,
                                                         program=main_program)
    serialized_params = paddle.static.serialize_persistables(
        feed_vars, fetch_targets, executor=exe, program=main_program)
    return serialized_program, serialized_params
Beispiel #18
0
    def freeze_program(self, program, place, scope=None):
        """Freeze input training program for inference.

        Args:
            program (Program): the input program to be transpile.
        """

        self.is_test = True
        scope = global_scope() if scope is None else scope
        program = default_main_program() if program is None else program

        persistable_vars = [
            v.name
            for v in filter(lambda var: var.persistable, program.list_vars())
        ]
        op_in_rename_map = [
            collections.OrderedDict() for _ in range(len(program.blocks))
        ]
        op_out_rename_map = [
            collections.OrderedDict() for _ in range(len(program.blocks))
        ]
        var_scale_map = [
            collections.OrderedDict() for _ in range(len(program.blocks))
        ]

        def _remove_fake_quant_and_dequant_op(block, op):
            idx = block.ops.index(op)
            block_id = block.idx
            k = op.output('Out')[0]
            v = op.input('X')[0]
            if v not in op_in_rename_map[block_id]:
                op_in_rename_map[block_id][k] = v
            else:
                op_in_rename_map[block_id][k] = op_in_rename_map[block_id][v]
            block._remove_op(idx)

        def _insert_post_dequant_op(block, op):
            idx = block.ops.index(op)
            block_id = block.idx
            max_range = None
            scale_var = None
            for name in op.input_arg_names:
                #rename input name of the op to the input name of last op which has be removed
                if name in op_in_rename_map[block_id]:
                    op._rename_input(name, op_in_rename_map[block_id][name])

                scale_v = var_scale_map[block_id][_original_var_name(name)]
                if _original_var_name(name) in persistable_vars:
                    param_range = (1 << (self.weight_bits - 1)) - 1
                    act_range = (1 << (self.activation_bits - 1)) - 1
                    assert _is_float(scale_v)
                    max_range = param_range * act_range / scale_v
                else:
                    assert isinstance(scale_v, Variable)
                    scale_var = scale_v

            if len(op.output_arg_names) != 1:
                raise ValueError("Only support one output, but op %s has"
                                 " more than one output." % (op.type))
            out_var = block.var(op.output_arg_names[0])
            dequant_var = block.create_var(name=_dequantized_var_name(
                out_var.name),
                                           type=out_var.type,
                                           shape=out_var.shape,
                                           dtype=out_var.dtype)
            # insert fake_dequantize_op
            dequant_op = block._insert_op(
                idx + 1,
                type="fake_dequantize_max_abs",
                attrs={'max_range': float(max_range)},
                inputs={
                    "X": out_var,
                    'Scale': scale_var
                },
                outputs={"Out": dequant_var})
            op_out_rename_map[block_id][out_var.name] = dequant_var.name
            return dequant_var

        def _load_var(name):
            return np.array(scope.find_var(name).get_tensor())

        def _restore_var(name, arr):
            t = scope.find_var(name).get_tensor()
            t.set(arr, place)

        for block in program.blocks:
            ops = list(block.ops)
            block_id = block.idx
            for op in ops:
                op_type = op.type

                # insert dequant_op after fc/conv, need to rename
                # input of the followed ops(of fc/conv) to the dquant_op
                for name in op.input_arg_names:
                    if name in op_out_rename_map[block_id]:
                        op._rename_input(name,
                                         op_out_rename_map[block_id][name])

                if op_type in self.fake_quant_op_types:
                    in_arg_name = op.input('X')[0]
                    if in_arg_name in persistable_vars:
                        if self.weight_quantize_type == 'abs_max':
                            param = _load_var(in_arg_name)
                            scale_v = np.max(np.abs(param))
                        else:
                            scale_v = _load_var(op.output('OutScale')[0])
                        var_scale_map[block_id][in_arg_name] = scale_v
                    else:
                        scale_v = block.var(op.output('OutScale')[0])
                        var_scale_map[block_id][in_arg_name] = scale_v

                    if in_arg_name in persistable_vars:
                        _remove_fake_quant_and_dequant_op(block, op)
                        # quantize weight and restore
                        param_t = _load_var(in_arg_name)
                        param_q_t = quant(param_t, scale_v, self.weight_bits)
                        _restore_var(in_arg_name, param_q_t)

                if op_type in self.fake_dequant_op_types:
                    _remove_fake_quant_and_dequant_op(block, op)

                if op_type in _QUANTIZABLE_OP_TYPES:
                    dequant_var = _insert_post_dequant_op(block, op)

        # remove the unused var in ProgramDesc
        self._remove_unused_var(program)
Beispiel #19
0
    def _load_optimizer(self, state, executor):
        prog = self._progs.get('train', None)
        optim = list(filter(is_belong_to_optimizer, prog.list_vars()))
        if not optim:
            return

        fluid.core._create_loaded_parameter(optim, global_scope(), executor)

        converted_state = dict(state)
        for var in optim:
            if var.name in ["@LR_DECAY_COUNTER@", "global_step"]:
                # When using learning rate scheduler, dygraph would name the
                # global step var as "global_step" to save, while static-graph
                # would has a state var named as "@LR_DECAY_COUNTER@".
                # NOTE: dygraph saved global_step is 1 larger than that in
                # static-graph, since the time of global_step to increase is
                # different.
                state_val = (
                    np.array(converted_state.pop("global_step")) - 1
                ) if "global_step" in converted_state else converted_state.pop(
                    "@LR_DECAY_COUNTER@", None)
                if state_val is not None:
                    converted_state[var.name] = state_val
            elif var.name.startswith("learning_rate_"):
                # When using static learning rate, static-graph would make it
                # a persistable var named 'unique_name.generate("learning_rate")',
                # However, dygraph wouldn't save it.
                if var.name not in state:
                    continue
            else:
                # moment and other accumulators
                if var.name not in converted_state:
                    # try to convert from dygraph name
                    opt_name = self.model._optimizer._name
                    opt_cls_name = self.model._optimizer.__class__.__name__
                    opt_unq_name = None
                    for name in self.model._optimizer._accumulators.keys():
                        accum_name = name if opt_name is None else name[
                            len(opt_name) + 1:]
                        for param_name, state_var in self.model._optimizer._accumulators[
                                name].items():
                            if opt_unq_name is None:
                                # can not infer out the exact unique(opt_name),
                                # thus try to extract rather than generate
                                for state_key in sorted(state.keys(),
                                                        key=lambda x: len(x),
                                                        reverse=True):
                                    prefix = param_name + "_" + (
                                        opt_cls_name if opt_name is None else
                                        opt_name) + "_"
                                    if state_key.startswith(prefix):
                                        prefix_offset = state_key[len(
                                            prefix):].find("_") + len(prefix)
                                        opt_unq_name = state_key[
                                            len(param_name +
                                                "_"):prefix_offset]
                                        # TODO: assert
                                        # assert opt_unq_name is None
                                    # gen(param.name + "_" + gen(opt_name) + "_" + accum_name)
                                    # always end with "_0" since the unique optimizer._name
                            dy_state_name = (param_name + "_" + opt_unq_name +
                                             "_" + accum_name + "_0")
                            converted_state[
                                state_var.name] = converted_state.pop(
                                    dy_state_name)

            assert var.name in converted_state, \
                "variable [{}] is not in optimizer state file".format(var.name)
            self._set_var(var, converted_state[var.name])
Beispiel #20
0
def save_vars(executor,
              dirname,
              main_program=None,
              vars=None,
              predicate=None,
              filename=None):
    """
    This API saves specific variables in the `Program` to files.

    There are two ways to specify the variables to be saved: set variables in
    a list and assign it to the `vars`, or use the `predicate` function to select
    variables that make `predicate(variable) == True`. The first way has a higher priority.

    The `dirname` is used to specify the folder where to save variables.
    If you prefer to save variables in separate files in the `dirname` floder,
    do not set `filename`. If you prefer to save all variables in a single file,
    use `filename` to specify it.

    Args:
        executor(Executor): The executor to run for saving variables.
        dirname(str, optional): The folder where to save variables.
                            When you need to save the parameter to the memory, set it to None.
        main_program(Program, optional): The program whose variables will be saved.
                                    If it is None, the default main program will
                                    be used automatically.
                                    Default: None
        vars(list[Variable], optional): The list contains all variables to be saved.
                                        Default: None
        predicate(function, optional): The function selects the variables that make
                                       `predicate(variable) == True`.
                                       Default: None
        filename(str, optional): If you prefer to save all variables in a single file,
                                 use `filename` to specify it. Otherwise, let `filename` be None.
                                 Default: None

    Returns:
        str: When saving parameters to a file, returns None.
             When saving parameters to memory, returns a binary string containing parameters.

    Raises:
        TypeError: If `main_program` is not an instance of Program nor None.

    Examples:
        .. code-block:: python

            import paddle.fluid as fluid

            main_prog = fluid.Program()
            startup_prog = fluid.Program()
            with fluid.program_guard(main_prog, startup_prog):
                data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False)
                w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
                b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b')
                hidden_w = fluid.layers.matmul(x=data, y=w)
                hidden_b = fluid.layers.elementwise_add(hidden_w, b)
            place = fluid.CPUPlace()
            exe = fluid.Executor(place)
            exe.run(startup_prog)

            # The first usage: use `vars` to set the saved variables.
            var_list = [w, b]
            path = "./my_paddle_vars"
            fluid.io.save_vars(executor=exe, dirname=path, vars=var_list,
                            filename="vars_file")
            # w and b will be save in a file named "var_file".

            # The second usage: use `predicate` to select the saved variable.
            def name_has_fc(var):
                res = "fc" in var.name
                return res
            param_path = "./my_paddle_model"
            fluid.io.save_vars(executor=exe, dirname=param_path, main_program=main_prog, vars=None, predicate = name_has_fc)
            # all variables whose names contain "fc " are saved.
    """
    save_to_memory = False
    if dirname is None and filename is None:
        save_to_memory = True

    main_program = _get_valid_program(main_program)

    if vars is None:
        return save_vars(
            executor,
            main_program=main_program,
            dirname=dirname,
            vars=list(filter(predicate, main_program.list_vars())),
            filename=filename)
    else:
        params_var_name = unique_name.generate("saved_params")
        # give warning when there is no var in model
        if len(list(vars)) == 0:
            warnings.warn(
                "no variable in your model, please ensure there are any variables in your model to save"
            )
            return None

        save_program = Program()
        save_block = save_program.global_block()

        save_var_map = {}
        for each_var in vars:
            # NOTE: don't save the variable which type is RAW
            if each_var.type == core.VarDesc.VarType.RAW:
                continue
            new_var = _clone_var_in_block_(save_block, each_var)
            if filename is None and save_to_memory is False:
                save_file_path = os.path.join(
                    os.path.normpath(dirname), new_var.name)
                save_block.append_op(
                    type='save',
                    inputs={'X': [new_var]},
                    outputs={},
                    attrs={'file_path': os.path.normpath(save_file_path)})
            else:
                save_var_map[new_var.name] = new_var

        if filename is not None or save_to_memory:
            save_var_list = []
            for name in sorted(save_var_map.keys()):
                save_var_list.append(save_var_map[name])

            save_path = str()
            if save_to_memory is False:
                save_path = os.path.join(os.path.normpath(dirname), filename)

            saved_params = save_block.create_var(
                type=core.VarDesc.VarType.RAW, name=params_var_name)
            saved_params.desc.set_persistable(True)
            save_block.append_op(
                type='save_combine',
                inputs={'X': save_var_list},
                outputs={'Y': saved_params},
                attrs={
                    'file_path': save_path,
                    'save_to_memory': save_to_memory
                })

        #NOTE(zhiqiu): save op will add variable kLookupTablePath in save_program.desc,
        # which leads to diff on save_program and its desc. Call _sync_with_cpp
        # to keep consistency.
        save_program._sync_with_cpp()
        executor.run(save_program)
        if save_to_memory:
            return global_scope().find_var(params_var_name).get_bytes()
Beispiel #21
0
 def __get_reader__():
     scope = global_scope()
     var = scope.find_var(reader.name)
     return var.get_reader()
Beispiel #22
0
def to_numpy(var):
    assert isinstance(var, (Variable, fluid.core.VarBase)), "not a variable"
    if isinstance(var, fluid.core.VarBase):
        return var.numpy()
    t = global_scope().find_var(var.name).get_tensor()
    return np.array(t)
Beispiel #23
0
def ctr_reader(
        feed_dict,
        file_type,  # gzip or plain
        file_format,  # csv or svm
        dense_slot_index,
        sparse_slot_index,
        capacity,
        thread_num,
        batch_size,
        file_list,
        slots,
        name=None):
    """
    Create a CTR reader for data feeding in Python

    This layer returns a Reader Variable.
    The Reader provides :code:`decorate_paddle_reader()` and
    :code:`decorate_tensor_provider()` to set a Python generator as the data
    source in Python side. When :code:`Executor::Run()` is invoked in C++
    side, the data from the generator would be read automatically. Unlike
    :code:`DataFeeder.feed()`, the data reading process and
    :code:`Executor::Run()` process can run in parallel using
    :code:`py_reader`. The :code:`start()` method of the Reader should be
    called when each pass begins, while the :code:`reset()` method should be
    called when the pass ends and :code:`fluid.core.EOFException` raises.
    Note that :code:`Program.clone()` method cannot clone :code:`py_reader`.

    Args:
       feed_dict(list(variable)): a list of data variable.
       file_type('gzip'|'plain'): the type of the data file
       file_format('csv'|'svm'): csv data or svm data format.
        cvs data format is :
            label dense_fea,dense_fea sparse_fea,sparse_fea
        the svm data format is :
            label slot1:fea_sign slot2:fea_sign slot1:fea_sign
       dense_slot_index(list(int)): the index of dense slots
       sparse_slot_index(list(int)): the index of sparse slots
       capacity(int): The buffer capacity maintained by :code:`py_reader`.
       thread_num(int): the thread num to read files by cpp reader.
       batch_size(int): batch size of data.
       file_list(list(str)): List of file names that need to read.
       slots(list(int64)): list of slot id.
       name(string): The prefix Python queue name and Reader name. None will
            be generated automatically.

    Returns:
       Variable: A Reader from which we can get feeding data.

    Examples:

        1. The basic usage of :code:`ctr_reader` is as follows:

     .. code-block:: python

        py_reader = fluid.contrib.ctr_reader.ctr_reader(
          feed_dict=datas, file_type='plain', file_format='csv',
          file_list=file_list, dense_slot_indexs=[1, 2, 3, 4], sparse_slot_indexs=[],
          capacity=64, thread_num=20, batch_size=1000, slots=[], name='ctr_reader')

    """
    if name is None:
        queue_name = unique_name('lod_tensor_blocking_queue')
        reader_name = unique_name('create_ctr_reader')
    else:
        queue_name = "_".join([name, "queue"])
        reader_name = "_".join([name, "reader"])

    var = global_scope().var(queue_name)
    feed_queue = core.init_lod_tensor_blocking_queue(var, capacity)

    startup_blk = default_startup_program().current_block()
    reader_var = startup_blk.create_var(name=reader_name)
    startup_blk.append_op(type='create_ctr_reader',
                          inputs={'blocking_queue': [queue_name]},
                          outputs={'Out': [reader_var]},
                          attrs={
                              'use_data_config': False,
                              'thread_num': thread_num,
                              'batch_size': batch_size,
                              'file_list': file_list,
                              'file_type': file_type,
                              'file_format': file_format,
                              'dense_slot_index': dense_slot_index,
                              'sparse_slot_index': sparse_slot_index,
                              'sparse_slots': slots,
                              'ranks': [],
                              'lod_levels': [],
                              'shape_concat': []
                          })

    dtypes = [data.dtype for data in feed_dict]
    reader_var.desc.set_dtypes(dtypes)
    reader_var.persistable = True

    main_prog_reader_var = _copy_reader_var_(
        default_main_program().current_block(), reader_var)

    reader = monkey_patch_reader_methods(main_prog_reader_var)

    # monkey patch py_reader special methods
    reader.queue = feed_queue
    reader.exited = False

    main_blk = default_main_program().current_block()
    main_blk.append_op(type='read',
                       inputs={'Reader': [reader]},
                       attrs={'infer_out': False},
                       outputs={'Out': feed_dict})

    return reader