Exemple #1
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 pad_mode='same',
                 padding=0,
                 dilation=1,
                 group=1,
                 eps=1e-5,
                 momentum=0.997,
                 weight_init=None,
                 beta_init=None,
                 gamma_init=None,
                 mean_init=None,
                 var_init=None,
                 quant_delay=0,
                 freeze_bn=100000,
                 fake=True,
                 num_bits=8,
                 per_channel=False,
                 symmetric=False,
                 narrow_range=False):
        """init Conv2dBatchNormQuant layer"""
        super(Conv2dBatchNormQuant, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = twice(kernel_size)
        self.stride = twice(stride)
        self.pad_mode = pad_mode
        self.padding = padding
        self.dilation = twice(dilation)
        self.group = group
        self.eps = eps
        self.momentum = momentum
        self.quant_delay = quant_delay
        self.freeze_bn = freeze_bn
        self.fake = fake
        self.num_bits = num_bits
        self.per_channel = per_channel
        self.symmetric = symmetric
        self.narrow_range = narrow_range
        self.is_gpu = context.get_context('device_target') == "GPU"

        # initialize convolution op and Parameter
        if context.get_context('device_target') == "Ascend" and group > 1:
            validator.check_integer('group', group, in_channels, Rel.EQ)
            validator.check_integer('group', group, out_channels, Rel.EQ)
            self.conv = P.DepthwiseConv2dNative(channel_multiplier=1,
                                                kernel_size=self.kernel_size,
                                                pad_mode=pad_mode,
                                                pad=padding,
                                                stride=self.stride,
                                                dilation=self.dilation)
            if weight_init is None:
                weight_init = initializer('normal',
                                          [1, in_channels, *self.kernel_size])
            channel_axis = 1
        else:
            self.conv = P.Conv2D(out_channel=out_channels,
                                 kernel_size=self.kernel_size,
                                 pad_mode=pad_mode,
                                 pad=padding,
                                 stride=self.stride,
                                 dilation=self.dilation,
                                 group=group)
            if weight_init is None:
                weight_init = initializer(
                    'normal',
                    [out_channels, in_channels // group, *self.kernel_size])
            channel_axis = 0
        self.weight = Parameter(weight_init, name='weight')

        # initialize batchnorm Parameter
        if gamma_init is None:
            gamma_init = initializer('ones', [out_channels])
        self.gamma = Parameter(gamma_init, name='gamma')
        if beta_init is None:
            beta_init = initializer('zeros', [out_channels])
        self.beta = Parameter(beta_init, name='beta')
        if mean_init is None:
            mean_init = initializer('zeros', [out_channels])
        self.moving_mean = Parameter(mean_init,
                                     name='moving_mean',
                                     requires_grad=False)
        if var_init is None:
            var_init = initializer('ones', [out_channels])
        self.moving_variance = Parameter(var_init,
                                         name='moving_variance',
                                         requires_grad=False)

        # initialize fake ops
        self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6,
                                                     max_init=6,
                                                     ema=False,
                                                     num_bits=num_bits,
                                                     quant_delay=quant_delay,
                                                     per_channel=per_channel,
                                                     out_channels=out_channels,
                                                     symmetric=symmetric,
                                                     narrow_range=narrow_range)
        self.batchnorm_fold = BatchNormFoldCell(epsilon=eps,
                                                momentum=momentum,
                                                freeze_bn=freeze_bn)
        self.correct_mul = P.CorrectionMul(channel_axis)
        if context.get_context('device_target') == "Ascend":
            self.batchnorm_fold2_train = P.BatchNormFold2_D(
                freeze_bn=freeze_bn)
            self.batchnorm_fold2_infer = P.BatchNormFold2_D(freeze_bn=0)
        elif context.get_context('device_target') == "GPU":
            self.batchnorm_fold2_train = P.BatchNormFold2(freeze_bn=freeze_bn)
            self.batchnorm_fold2_infer = P.BatchNormFold2(freeze_bn=0)
        else:
            raise ValueError("Unsupported platform: {}".format(
                context.get_context('device_target')))
        self.step = Parameter(initializer('normal', [1], dtype=mstype.int32),
                              name='step',
                              requires_grad=False)
        self.one = Tensor(1, mstype.int32)
        self.assignadd = P.AssignAdd()
Exemple #2
0
    def compile(self,
                obj,
                *args,
                phase='predict',
                do_convert=True,
                auto_parallel_mode=False):
        """
        Compiles graph.

        Args:
            obj (Function/Cell): The function or cell instance need compile.
            args (tuple): Function or cell input arguments.
            phase (str): The name of compile phase. Default: 'predict'.
            do_convert (bool): When set to True, convert ME graph to GE graph after compiling graph.
            auto_parallel_mode: When set to True, use auto parallel mode to compile graph.

        Return:
            Str, the full phase of the cell.
            Bool, if the graph has been compiled before, return False, else return True.
        """
        from mindspore import nn
        from mindspore.ops.composite import GradOperation

        class InputsToAttrCell(nn.Cell):
            """The cell that converts non-tensor inputs to attr."""
            def __init__(self, net, args_names, non_tensor_inputs):
                super(InputsToAttrCell, self).__init__()
                self.net = net
                self.args_names = args_names
                self.non_tensor_inputs = non_tensor_inputs
                self.inputs_to_attr = True

            def construct(self, *tensor_inputs):
                real_inputs = ()
                index = 0
                for i in args_names:
                    if i in self.non_tensor_inputs.keys():
                        real_inputs += (self.non_tensor_inputs[i], )
                    else:
                        real_inputs += (tensor_inputs[index], )
                        index += 1
                return self.net(*real_inputs)

        args_names, args_list = _generate_pip_args(obj, *args)
        if not hasattr(obj, "inputs_to_attr"):
            dic = dict(zip(args_names, args_list))
            key = generate_key(phase, dic)
            obj.phase_prefix = str(key[1])
            if 'export' in phase:
                phase = phase + '.' + obj.phase_prefix + '.' + str(
                    obj.create_time)
            else:
                phase = obj.phase_prefix + phase + '.' + str(obj.create_time)

            if phase in self.compile_cache.keys():
                logger.debug("%r graph has existed.", phase)
                return phase, False

        if getattr(obj, "support_non_tensor_inputs", None):
            for i in obj.__dict__.values():
                if isinstance(i, GradOperation):
                    raise ValueError(
                        "Not support set 'support_non_tensor_inputs' to the 'True' for grad net, "
                        "only support forward net.")
            attrs = {}
            inputs = []
            for key, value in dic.items():
                if not isinstance(value, (Tensor, MetaTensor)):
                    attrs[key] = value
                else:
                    inputs.append(value)
            if attrs:
                inputs_to_attr_cell = InputsToAttrCell(obj, args_names, attrs)
                return self.compile(inputs_to_attr_cell, *inputs, phase=phase)

        obj.check_names()
        _check_full_batch()
        self._set_dataset_mode(args_list)

        is_sink_mode = args and isinstance(args[0],
                                           Tensor) and args[0].virtual_flag
        if auto_parallel_mode and _need_to_full(
        ) and not is_sink_mode and obj.auto_parallel_compile_and_run():
            args_full = _to_full_tensor(args, _get_device_num(),
                                        _get_global_rank())
            _, args_list = _generate_pip_args(obj, *args_full)

        enable_debug_runtime = context.get_context("enable_debug_runtime")
        enable_ge = context.get_context("enable_ge")
        use_vm = not enable_ge or (enable_debug_runtime
                                   and context.get_context("mode")
                                   == context.PYNATIVE_MODE)
        result = self._executor.compile(obj, args_list, phase, use_vm)
        self.compile_cache[phase] = phase
        if not result:
            raise RuntimeError("Executor compile failed.")
        graph = self._executor.get_func_graph(phase)

        if graph is None:
            logger.error("%r graph compile failed.", phase)
        if not do_convert:
            return phase, True

        if auto_parallel_mode:
            obj.parameter_layout_dict = self._executor.get_parameter_layout(
                phase)
        replace = obj.init_parameters_data(
            auto_parallel_mode=auto_parallel_mode)
        if not enable_debug_runtime or enable_ge:
            if auto_parallel_mode:
                obj.load_parameter_slice(None)

        self._updata_param_node_default_input(phase, replace)

        # set parallel inputs in sink mode
        if auto_parallel_mode and is_sink_mode:
            obj.set_parallel_input_with_inputs(*args)

        # the following GE init process is not needed when use vm or ms backend
        if enable_ge:
            self._build_data_graph(obj, phase)

            if "export" not in phase:
                init_phase = "init_subgraph" + "." + str(obj.create_time)
                _exec_init_graph(obj, init_phase)
        elif not enable_ge and "export" in phase:
            self._build_data_graph(obj, phase)
        elif BROADCAST_PHASE not in phase and _get_parameter_broadcast():
            auto_split_param_names = []
            if auto_parallel_mode:
                auto_split_param_names = self._get_auto_split_param_names(
                    obj.parameter_layout_dict)

            broadcast_params_dict = obj.parameters_broadcast_dict()
            if auto_split_param_names and broadcast_params_dict:
                broadcast_params_dict = OrderedDict()
                for param_name, param in obj.parameters_broadcast_dict().items(
                ):
                    if param_name not in auto_split_param_names:
                        broadcast_params_dict[param_name] = param
            broadcast_phase = "_broadcast_subgraph"
            self._build_broadcast_graph(broadcast_params_dict, broadcast_phase)

        return phase, True
Exemple #3
0
def _get_device():
    """Get the current device (`GPU`, `CPU`, `Ascend`)"""
    return context.get_context('device_target')
Exemple #4
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 num_layers=1,
                 has_bias=True,
                 batch_first=False,
                 dropout=0,
                 bidirectional=False):
        super(LSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.has_bias = has_bias
        self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name)
        self.dropout = float(dropout)
        self.bidirectional = bidirectional
        if self.batch_first:
            self.transpose1 = P.Transpose()
            self.transpose2 = P.Transpose()
        num_directions = 2 if self.bidirectional else 1
        self.cpu_target = False
        if context.get_context("device_target") == "CPU":
            self.cpu_target = True
        if not self.cpu_target:
            self.lstm = P.LSTM(input_size=self.input_size,
                               hidden_size=self.hidden_size,
                               num_layers=self.num_layers,
                               has_bias=self.has_bias,
                               bidirectional=self.bidirectional,
                               dropout=self.dropout)
            weight_size = 0
            gate_size = 4 * self.hidden_size
            for layer in range(self.num_layers):
                input_layer_size = self.input_size if layer == 0 else self.hidden_size * num_directions
                increment_size = gate_size * input_layer_size
                increment_size += gate_size * self.hidden_size
                if self.has_bias:
                    increment_size += 2 * gate_size
                weight_size += increment_size * num_directions
            self.weight = Parameter(initializer(0.0, [weight_size, 1, 1]), name='weight')
        else:
            input_size_list = []
            input_size_list.append(self.input_size)
            for i in range(self.num_layers - 1):
                input_size_list.append(self.hidden_size * num_directions)
            weights = []
            layers = []
            bias_size = 0 if not self.has_bias else num_directions * self.hidden_size * 4
            for i in range(num_layers):
                weight_size = (input_size_list[i] + self.hidden_size) * num_directions * self.hidden_size * 4
                w_np = np.ones([weight_size, 1, 1]).astype(np.float32) * 0.01
                if has_bias:
                    bias_np = np.zeros([bias_size, 1, 1]).astype(np.float32)
                    w_np = np.concatenate([w_np, bias_np], axis=0)
                weights.append(Parameter(initializer(Tensor(w_np), w_np.shape), name='weight' + str(i)))

                layers.append(nn.LSTMCell(input_size=input_size_list[i],
                                          hidden_size=self.hidden_size,
                                          has_bias=self.has_bias,
                                          bidirectional=self.bidirectional,
                                          dropout=self.dropout))
            self.lstms = layers
            self.weight = ParameterTuple(tuple(weights))
        self.fill = P.Fill()
        self.shape = P.Shape()
Exemple #5
0
    def _train(self,
               epoch,
               train_dataset,
               callbacks=None,
               dataset_sink_mode=True,
               sink_size=-1):
        """
        Training.

        Args:
            epoch (int): Total number of iterations on the data.
            train_dataset (Dataset): A training dataset iterator. If there is no
                                     loss_fn, a tuple with multiply data (data1, data2, data3, ...) will be
                                     returned and passed to the network. Otherwise, a tuple (data, label) will
                                     be returned, and the data and label are passed to the network and loss
                                     function respectively.
            callbacks (list): List of callback object. Callbacks which should be executed while training. Default: None.
            dataset_sink_mode (bool): Determines whether to pass the data through dataset channel. Default: True.
                                      Configure pynative mode, the training process will be performed with
                                      dataset not sink.
            sink_size (int): Control the amount of data each sink. Default: -1.
        """
        epoch = check_int_positive(epoch)
        self._train_network.set_train()

        if self._parameter_broadcast:
            self._train_network.set_broadcast_flag()

        cb_params = _InternalCallbackParam()
        cb_params.train_network = self._train_network
        cb_params.epoch_num = epoch
        if dataset_sink_mode and sink_size > 0:
            cb_params.batch_num = sink_size
        else:
            cb_params.batch_num = train_dataset.get_dataset_size()
        cb_params.mode = "train"
        cb_params.loss_fn = self._loss_fn
        cb_params.optimizer = self._optimizer
        cb_params.parallel_mode = self._parallel_mode
        cb_params.device_number = self._device_number
        cb_params.train_dataset = train_dataset
        cb_params.list_callback = self._transform_callbacks(callbacks)
        cb_params.train_dataset_element = None
        cb_params.network = self._network
        ms_role = os.getenv("MS_ROLE")
        if ms_role in ("MS_PSERVER", "MS_SCHED"):
            epoch = 1

        # build callback list
        with _CallbackManager(callbacks) as list_callback:
            if not dataset_sink_mode:
                self._train_process(epoch, train_dataset, list_callback,
                                    cb_params)
            elif context.get_context("mode") == context.PYNATIVE_MODE:
                logger.warning(
                    "The pynative mode cannot support dataset sink mode currently."
                    "So the training process will be performed with dataset not sink."
                )
                self._train_process(epoch, train_dataset, list_callback,
                                    cb_params)
            else:
                self._train_dataset_sink_process(epoch, train_dataset,
                                                 list_callback, cb_params,
                                                 sink_size)
Exemple #6
0
    def __init__(self,
                 learning_rate,
                 parameters,
                 weight_decay=0.0,
                 loss_scale=1.0):
        super(Optimizer, self).__init__(auto_prefix=False)
        if parameters is not None and not isinstance(parameters, list):
            parameters = list(parameters)

        if not parameters:
            raise ValueError("Optimizer got an empty parameter list.")

        if not isinstance(parameters[0], (dict, Parameter)):
            raise TypeError(
                "Only a list of Parameter or dict can be supported.")

        if isinstance(loss_scale, int):
            loss_scale = float(loss_scale)
        validator.check_value_type("loss_scale", loss_scale, [float],
                                   self.cls_name)
        validator.check_positive_float(loss_scale, "loss_scale", self.cls_name)
        self.loss_scale = loss_scale

        weight_decay = self._preprocess_weight_decay(weight_decay)

        self._unique = True
        self._target = context.get_context("device_target")
        self.dynamic_lr = False
        self.assignadd = None
        self.global_step = None
        self.is_group = False
        self.is_group_lr = False
        self.is_group_params_ordered = False
        learning_rate = self._preprocess_single_lr(learning_rate)
        if isinstance(parameters[0], dict):
            self.is_group = True
            self.group_params = []
            self.group_lr = []
            self.group_weight_decay = []
            self._init_group_params(parameters, learning_rate, weight_decay)

        # The final value of dynamic_lr can be determined after the process of parse_single_lr and init_group_params
        if self.dynamic_lr:
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32),
                                         name='global_step')

        if self.is_group_lr:
            if self.dynamic_lr:
                self.learning_rate = CellList(self.group_lr)
            else:
                self.learning_rate = ParameterTuple(self.group_lr)
        else:
            self.learning_rate = self._build_single_lr(learning_rate,
                                                       'learning_rate')
        if self.is_group:
            self.parameters = ParameterTuple(self.group_params)
            self.weight_decay = tuple(self.group_weight_decay)
            self.weight_decay_tensor_tuple = tuple(
                Tensor(x, mstype.float32) for x in self.group_weight_decay)
            decay_filter = lambda x: x > 0
            self.decay_flags = tuple(
                decay_filter(x) for x in self.weight_decay)
            self.exec_weight_decay = any(self.decay_flags)
        else:
            self.parameters = ParameterTuple(parameters)
            self.weight_decay = weight_decay * loss_scale
            self.weight_decay_tensor = Tensor(self.weight_decay,
                                              mstype.float32)
            decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name
            self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
            self.exec_weight_decay = self.weight_decay > 0
        # when a parameter has been unique, there is no need do another unique in optimizer.
        for param in self.parameters:
            if param.unique:
                self._unique = False
                break
        ps_filter = lambda x: x.is_param_ps
        self.ps_parameters = tuple(ps_filter(x) for x in self.parameters)
        ps_cache_filter = lambda x: x.cache_enable
        self.cache_enable = tuple(ps_cache_filter(x) for x in self.parameters)
        self.reciprocal_scale = Tensor(1.0 / loss_scale, mstype.float32)
        self.need_scale = loss_scale != 1.0
        self.global_step_increase_tensor = Tensor(1, mstype.int32)
        self.param_length = len(self.parameters)
        self.map_ = C.Map()
        if context.get_auto_parallel_context("enable_parallel_optimizer"):
            if _get_parallel_mode(
            ) == ParallelMode.DATA_PARALLEL and context.get_context(
                    "device_target") == "Ascend":
                self.use_parallel = True
            elif _get_parallel_mode() == ParallelMode.DATA_PARALLEL \
                    and context.get_context("device_target") != "Ascend":
                raise RuntimeError(
                    "Parallel optimizer only supports Ascend in data parallel mode."
                )
            elif _get_parallel_mode() in (ParallelMode.STAND_ALONE,
                                          ParallelMode.HYBRID_PARALLEL):
                raise RuntimeError(
                    "Parallel optimizer is not supported in {}.".format(
                        _get_parallel_mode()))
            else:
                self.use_parallel = False
        else:
            self.use_parallel = False
        if self.use_parallel:
            if self.cls_name not in ["Lamb", "AdamWeightDecay"]:
                raise RuntimeError(
                    "Parallel optimizer does not support optimizer {}".format(
                        self.cls_name))
            self.dev_num = _get_device_num()
            if self.dev_num > self.param_length:
                raise RuntimeError(
                    "Parallel optimizer can not be applied when the number of parameters {} is"
                    " less than the number of devices {}".format(
                        self.param_length, self.dev_num))
            self.param_rank = self._get_parameter_group_id()
            self.optim_filter = tuple(
                map(lambda x: x == _get_global_rank(), self.param_rank))
            self.param_names = []
            for param in self.parameters:
                self.param_names.append(param.name)

        else:
            self.optim_filter = (True, ) * self.param_length
Exemple #7
0
    def __init__(self,
                 config,
                 batch_size,
                 num_classes,
                 use_sigmoid_cls,
                 target_means=(.0, .0, .0, .0),
                 target_stds=(1.0, 1.0, 1.0, 1.0)
                 ):
        super(Proposal, self).__init__()
        cfg = config
        self.batch_size = batch_size
        self.num_classes = num_classes
        self.target_means = target_means
        self.target_stds = target_stds
        self.use_sigmoid_cls = use_sigmoid_cls

        if self.use_sigmoid_cls:
            self.cls_out_channels = num_classes - 1
            self.activation = P.Sigmoid()
            self.reshape_shape = (-1, 1)
        else:
            self.cls_out_channels = num_classes
            self.activation = P.Softmax(axis=1)
            self.reshape_shape = (-1, 2)

        if self.cls_out_channels <= 0:
            raise ValueError('num_classes={} is too small'.format(num_classes))

        self.num_pre = cfg.rpn_proposal_nms_pre
        self.min_box_size = cfg.rpn_proposal_min_bbox_size
        self.nms_thr = cfg.rpn_proposal_nms_thr
        self.nms_post = cfg.rpn_proposal_nms_post
        self.nms_across_levels = cfg.rpn_proposal_nms_across_levels
        self.max_num = cfg.rpn_proposal_max_num
        self.num_levels = cfg.fpn_num_outs

        # Op Define
        self.squeeze = P.Squeeze()
        self.reshape = P.Reshape()
        self.cast = P.Cast()

        self.feature_shapes = cfg.feature_shapes

        self.transpose_shape = (1, 2, 0)

        self.decode = P.BoundingBoxDecode(max_shape=(cfg.img_height, cfg.img_width), \
                                          means=self.target_means, \
                                          stds=self.target_stds)

        self.nms = P.NMSWithMask(self.nms_thr)
        self.concat_axis0 = P.Concat(axis=0)
        self.concat_axis1 = P.Concat(axis=1)
        self.split = P.Split(axis=1, output_num=5)
        self.min = P.Minimum()
        self.gatherND = P.GatherNd()
        self.slice = P.Slice()
        self.select = P.Select()
        self.greater = P.Greater()
        self.transpose = P.Transpose()
        self.tile = P.Tile()
        self.set_train_local(config, training=True)

        _mode_16 = bool(context.get_context("device_target") == "Ascend")
        self.dtype = np.float16 if _mode_16 else np.float32
        self.ms_type = mstype.float16 if _mode_16 else mstype.float32

        self.multi_10 = Tensor(10.0, self.ms_type)
Exemple #8
0
    def __init__(self,
                 learning_rate,
                 parameters,
                 weight_decay=0.0,
                 loss_scale=1.0):
        super(Optimizer, self).__init__(auto_prefix=False)
        if parameters is not None and not isinstance(parameters, list):
            parameters = list(parameters)

        if not parameters:
            raise ValueError("Optimizer got an empty parameter list.")

        if not isinstance(parameters[0], (dict, Parameter)):
            raise TypeError(
                "Only a list of Parameter or dict can be supported.")

        if isinstance(loss_scale, int):
            loss_scale = float(loss_scale)
        validator.check_value_type("loss_scale", loss_scale, [float],
                                   self.cls_name)
        validator.check_positive_float(loss_scale, "loss_scale", self.cls_name)
        self.loss_scale = loss_scale

        weight_decay = self._preprocess_weight_decay(weight_decay)
        self.grad_centralization = False

        self._unique = True
        self._target = context.get_context("device_target")
        self.dynamic_lr = False
        self.assignadd = None
        self.global_step = None
        self.is_group = False
        self.is_group_lr = False
        self.is_group_params_ordered = False
        learning_rate = self._preprocess_single_lr(learning_rate)
        if isinstance(parameters[0], dict):
            self.is_group = True
            self.group_params = []
            self.group_lr = []
            self.group_weight_decay = []
            self.group_grad_centralization = []
            self._init_group_params(parameters, learning_rate, weight_decay,
                                    self.grad_centralization)

        # The final value of dynamic_lr can be determined after the process of parse_single_lr and init_group_params
        if self.dynamic_lr:
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32),
                                         name='global_step')

        if self.is_group_lr:
            self.learning_rate = CellList(self.group_lr, auto_prefix=False) if self.dynamic_lr \
                else ParameterTuple(self.group_lr)
        else:
            self.learning_rate = self._build_single_lr(learning_rate,
                                                       'learning_rate')

        if self.is_group:
            self.parameters = ParameterTuple(self.group_params)
            self.weight_decay = tuple(self.group_weight_decay)
            self.weight_decay_tensor_tuple = tuple(
                Tensor(x, mstype.float32) for x in self.group_weight_decay)
            decay_filter = lambda x: x > 0
            self.decay_flags = tuple(
                decay_filter(x) for x in self.weight_decay)
            self.exec_weight_decay = any(self.decay_flags)
            self.grad_centralization_flags = tuple(
                self.group_grad_centralization)
        else:
            self.parameters = ParameterTuple(parameters)
            self.weight_decay = weight_decay * loss_scale
            self.weight_decay_tensor = Tensor(self.weight_decay,
                                              mstype.float32)
            decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name
            self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
            self.exec_weight_decay = self.weight_decay > 0
        # when a parameter has been unique, there is no need do another unique in optimizer.
        for param in self.parameters:
            if param.unique:
                self._unique = False
                break
        ps_filter = lambda x: x.is_param_ps
        self.ps_parameters = tuple(ps_filter(x) for x in self.parameters)
        cache_filter = lambda x: x.cache_enable
        self.cache_enable = tuple(cache_filter(x) for x in self.parameters)
        self.reciprocal_scale = Tensor(1.0 / loss_scale, mstype.float32)
        self.need_scale = loss_scale != 1.0
        self.global_step_increase_tensor = Tensor(1, mstype.int32)
        self.param_length = len(self.parameters)
        self.map_ = C.Map()
        self._use_parallel_optimizer()
Exemple #9
0
    def _verify_data_n_settings(self,
                                check_all=False,
                                check_registration=False,
                                check_data_n_network=False,
                                check_saliency=False,
                                check_hoc=False,
                                check_environment=False):
        """
        Verify the validity of dataset and other settings.

        Args:
            check_all (bool): Set it True for checking everything.
            check_registration (bool): Set it True for checking registrations, check if it is enough to invoke run().
            check_data_n_network (bool): Set it True for checking data and network.
            check_saliency (bool): Set it True for checking saliency related settings.
            check_hoc (bool): Set it True for checking HOC related settings.
            check_environment (bool): Set it True for checking environment conditions.

        Raises:
            ValueError: Be raised for any data or settings' value problem.
            TypeError: Be raised for any data or settings' type problem.
            RuntimeError: Be raised for any runtime problem.
        """
        if check_all:
            check_registration = True
            check_data_n_network = True
            check_saliency = True
            check_hoc = True
            check_environment = True

        if check_environment:
            device_target = context.get_context('device_target')
            if device_target not in ("Ascend", "GPU"):
                raise RuntimeError(
                    f"Unsupported device_target: '{device_target}', "
                    f"only 'Ascend' or 'GPU' is supported. "
                    f"Please call context.set_context(device_target='Ascend') or "
                    f"context.set_context(device_target='GPU').")
        if check_environment or check_saliency:
            if self._is_saliency_registered:
                mode = context.get_context('mode')
                if mode != context.PYNATIVE_MODE:
                    raise RuntimeError(
                        "Context mode: GRAPH_MODE is not supported, "
                        "please call context.set_context(mode=context.PYNATIVE_MODE)."
                    )

        if check_registration:
            if self._is_uncertainty_registered and not self._is_saliency_registered:
                raise ValueError(
                    "Function register_uncertainty() is called but register_saliency() is not."
                )
            if not self._is_saliency_registered and not self._is_hoc_registered:
                raise ValueError(
                    "No explanation module was registered, user should at least call register_saliency() "
                    "or register_hierarchical_occlusion() once with proper arguments."
                )

        if check_data_n_network or check_saliency or check_hoc:
            self._verify_data()

        if check_data_n_network:
            self._verify_network()

        if check_saliency:
            self._verify_saliency()
Exemple #10
0
def train():
    """Train function."""
    args = parse_args()
    devid = int(os.getenv('DEVICE_ID', '0'))
    context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=True,
                        device_target=args.device_target, save_graphs=True, device_id=devid)
    if args.need_profiler:
        from mindspore.profiler.profiling import Profiler
        profiler = Profiler(output_path=args.outputs_dir, is_detail=True, is_show_op_path=True)

    loss_meter = AverageMeter('loss')

    context.reset_auto_parallel_context()
    parallel_mode = ParallelMode.STAND_ALONE
    degree = 1
    if args.is_distributed:
        parallel_mode = ParallelMode.DATA_PARALLEL
        degree = get_group_size()
    context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=degree)

    network = YOLOV3DarkNet53(is_training=True)
    # default is kaiming-normal
    default_recurisive_init(network)
    load_yolov3_params(args, network)

    network = YoloWithLossCell(network)
    args.logger.info('finish get network')

    config = ConfigYOLOV3DarkNet53()

    config.label_smooth = args.label_smooth
    config.label_smooth_factor = args.label_smooth_factor

    if args.training_shape:
        config.multi_scale = [conver_training_shape(args)]
    if args.resize_rate:
        config.resize_rate = args.resize_rate

    ds, data_size = create_yolo_dataset(image_dir=args.data_root, anno_path=args.annFile, is_training=True,
                                        batch_size=args.per_batch_size, max_epoch=args.max_epoch,
                                        device_num=args.group_size, rank=args.rank, config=config)
    args.logger.info('Finish loading dataset')

    args.steps_per_epoch = int(data_size / args.per_batch_size / args.group_size)

    if not args.ckpt_interval:
        args.ckpt_interval = args.steps_per_epoch

    lr = get_lr(args)

    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=args.momentum,
                   weight_decay=args.weight_decay,
                   loss_scale=args.loss_scale)
    is_gpu = context.get_context("device_target") == "GPU"
    if is_gpu:
        loss_scale_value = 1.0
        loss_scale = FixedLossScaleManager(loss_scale_value, drop_overflow_update=False)
        network = amp.build_train_network(network, optimizer=opt, loss_scale_manager=loss_scale,
                                          level="O2", keep_batchnorm_fp32=False)
        keep_loss_fp32(network)
    else:
        network = TrainingWrapper(network, opt)
        network.set_train()

    if args.rank_save_ckpt_flag:
        # checkpoint save
        ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval
        ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval,
                                       keep_checkpoint_max=ckpt_max_num)
        save_ckpt_path = os.path.join(args.outputs_dir, 'ckpt_' + str(args.rank) + '/')
        ckpt_cb = ModelCheckpoint(config=ckpt_config,
                                  directory=save_ckpt_path,
                                  prefix='{}'.format(args.rank))
        cb_params = _InternalCallbackParam()
        cb_params.train_network = network
        cb_params.epoch_num = ckpt_max_num
        cb_params.cur_epoch_num = 1
        run_context = RunContext(cb_params)
        ckpt_cb.begin(run_context)

    old_progress = -1
    t_end = time.time()
    data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1)

    for i, data in enumerate(data_loader):
        images = data["image"]
        input_shape = images.shape[2:4]
        args.logger.info('iter[{}], shape{}'.format(i, input_shape[0]))

        images = Tensor.from_numpy(images)

        batch_y_true_0 = Tensor.from_numpy(data['bbox1'])
        batch_y_true_1 = Tensor.from_numpy(data['bbox2'])
        batch_y_true_2 = Tensor.from_numpy(data['bbox3'])
        batch_gt_box0 = Tensor.from_numpy(data['gt_box1'])
        batch_gt_box1 = Tensor.from_numpy(data['gt_box2'])
        batch_gt_box2 = Tensor.from_numpy(data['gt_box3'])

        input_shape = Tensor(tuple(input_shape[::-1]), ms.float32)
        loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1,
                       batch_gt_box2, input_shape)
        loss_meter.update(loss.asnumpy())

        if args.rank_save_ckpt_flag:
            # ckpt progress
            cb_params.cur_step_num = i + 1  # current step number
            cb_params.batch_num = i + 2
            ckpt_cb.step_end(run_context)

        if i % args.log_interval == 0:
            time_used = time.time() - t_end
            epoch = int(i / args.steps_per_epoch)
            fps = args.per_batch_size * (i - old_progress) * args.group_size / time_used
            if args.rank == 0:
                args.logger.info(
                    'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(epoch, i, loss_meter, fps, lr[i]))
            t_end = time.time()
            loss_meter.reset()
            old_progress = i

        if (i + 1) % args.steps_per_epoch == 0 and args.rank_save_ckpt_flag:
            cb_params.cur_epoch_num += 1

        if args.need_profiler:
            if i == 10:
                profiler.analyse()
                break

    args.logger.info('==========end training===============')
Exemple #11
0
    def __init__(self):
        self.device_cpu = context.get_context('device_target')

        self.arrs = [
            rand_int(2),
            rand_int(2, 3),
            rand_int(2, 3, 4),
            rand_int(2, 3, 4, 5),
        ]

        # scalars expanded across the 0th dimension
        self.scalars = [
            rand_int(),
            rand_int(1),
            rand_int(1, 1),
            rand_int(1, 1, 1, 1),
        ]

        # empty arrays
        self.empty_arrs = [
            rand_int(0),
            rand_int(4, 0),
            rand_int(2, 0, 2),
            rand_int(5, 0, 7, 0),
        ]

        # arrays of the same size expanded across the 0th dimension
        self.expanded_arrs = [
            rand_int(2, 3),
            rand_int(1, 2, 3),
            rand_int(1, 1, 2, 3),
            rand_int(1, 1, 1, 2, 3),
        ]

        # arrays of the same size expanded across the 0th dimension
        self.expanded_arrs = [
            rand_int(2, 3),
            rand_int(1, 2, 3),
            rand_int(1, 1, 2, 3),
            rand_int(1, 1, 1, 2, 3),
        ]

        # arrays with last dimension aligned
        self.aligned_arrs = [
            rand_int(2, 3),
            rand_int(1, 4, 3),
            rand_int(5, 1, 2, 3),
            rand_int(4, 2, 1, 1, 3),
        ]

        # arrays which can be broadcast
        self.broadcastables = [
            rand_int(5),
            rand_int(6, 1),
            rand_int(7, 1, 5),
            rand_int(8, 1, 6, 1)
        ]

        # boolean arrays which can be broadcast
        self.bool_broadcastables = [
            rand_bool(),
            rand_bool(1),
            rand_bool(5),
            rand_bool(6, 1),
            rand_bool(7, 1, 5),
            rand_bool(8, 1, 6, 1),
        ]

        # core dimension 0 is matched for each
        # pair of array[i] and array[i + 1]
        self.core_broadcastables = [
            rand_int(3),
            rand_int(3),
            rand_int(6),
            rand_int(6, 4),
            rand_int(5, 2),
            rand_int(2),
            rand_int(2, 9),
            rand_int(9, 8),
            rand_int(6),
            rand_int(2, 6, 5),
            rand_int(9, 2, 7),
            rand_int(7),
            rand_int(5, 2, 4),
            rand_int(6, 1, 4, 9),
            rand_int(7, 1, 5, 3, 2),
            rand_int(8, 1, 6, 1, 2, 9),
        ]

        # arrays with dimensions of size 1
        self.nested_arrs = [
            rand_int(1),
            rand_int(1, 2),
            rand_int(3, 1, 8),
            rand_int(1, 3, 9, 1),
        ]
Exemple #12
0
def test_switch_mode():
    """ test_switch_mode """
    context.set_context(mode=context.GRAPH_MODE)
    assert context.get_context("mode") == context.GRAPH_MODE
    context.set_context(mode=context.PYNATIVE_MODE)
    assert context.get_context("mode") == context.PYNATIVE_MODE
    def __init__(self, config, batch_size, num_bboxes, add_gt_as_proposals):
        super(BboxAssignSampleForRcnn, self).__init__()
        cfg = config
        _mode_16 = bool(context.get_context("device_target") == "Ascend")
        self.dtype = np.float16 if _mode_16 else np.float32
        self.ms_type = mstype.float16 if _mode_16 else mstype.float32
        self.batch_size = batch_size
        self.neg_iou_thr = cfg.neg_iou_thr_stage2
        self.pos_iou_thr = cfg.pos_iou_thr_stage2
        self.min_pos_iou = cfg.min_pos_iou_stage2
        self.num_gts = cfg.num_gts
        self.num_bboxes = num_bboxes
        self.num_expected_pos = cfg.num_expected_pos_stage2
        self.num_expected_neg = cfg.num_expected_neg_stage2
        self.num_expected_total = cfg.num_expected_total_stage2

        self.add_gt_as_proposals = add_gt_as_proposals
        self.label_inds = Tensor(np.arange(1, self.num_gts + 1).astype(np.int32))
        self.add_gt_as_proposals_valid = Tensor(np.array(self.add_gt_as_proposals * np.ones(self.num_gts),
                                                         dtype=np.int32))

        self.concat = P.Concat(axis=0)
        self.max_gt = P.ArgMaxWithValue(axis=0)
        self.max_anchor = P.ArgMaxWithValue(axis=1)
        self.sum_inds = P.ReduceSum()
        self.iou = P.IOU()
        self.greaterequal = P.GreaterEqual()
        self.greater = P.Greater()
        self.select = P.Select()
        self.gatherND = P.GatherNd()
        self.squeeze = P.Squeeze()
        self.cast = P.Cast()
        self.logicaland = P.LogicalAnd()
        self.less = P.Less()
        self.random_choice_with_mask_pos = P.RandomChoiceWithMask(self.num_expected_pos)
        self.random_choice_with_mask_neg = P.RandomChoiceWithMask(self.num_expected_neg)
        self.reshape = P.Reshape()
        self.equal = P.Equal()
        self.bounding_box_encode = P.BoundingBoxEncode(means=(0.0, 0.0, 0.0, 0.0), stds=(0.1, 0.1, 0.2, 0.2))
        self.concat_axis1 = P.Concat(axis=1)
        self.logicalnot = P.LogicalNot()
        self.tile = P.Tile()

        # Check
        self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=self.dtype))
        self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=self.dtype))

        # Init tensor
        self.assigned_gt_inds = Tensor(np.array(-1 * np.ones(num_bboxes), dtype=np.int32))
        self.assigned_gt_zeros = Tensor(np.array(np.zeros(num_bboxes), dtype=np.int32))
        self.assigned_gt_ones = Tensor(np.array(np.ones(num_bboxes), dtype=np.int32))
        self.assigned_gt_ignores = Tensor(np.array(-1 * np.ones(num_bboxes), dtype=np.int32))
        self.assigned_pos_ones = Tensor(np.array(np.ones(self.num_expected_pos), dtype=np.int32))

        self.gt_ignores = Tensor(np.array(-1 * np.ones(self.num_gts), dtype=np.int32))
        self.range_pos_size = Tensor(np.arange(self.num_expected_pos).astype(self.dtype))
        self.check_neg_mask = Tensor(np.array(np.ones(self.num_expected_neg - self.num_expected_pos), dtype=np.bool))
        self.bboxs_neg_mask = Tensor(np.zeros((self.num_expected_neg, 4), dtype=self.dtype))
        self.labels_neg_mask = Tensor(np.array(np.zeros(self.num_expected_neg), dtype=np.uint8))

        self.reshape_shape_pos = (self.num_expected_pos, 1)
        self.reshape_shape_neg = (self.num_expected_neg, 1)

        self.scalar_zero = Tensor(0.0, dtype=self.ms_type)
        self.scalar_neg_iou_thr = Tensor(self.neg_iou_thr, dtype=self.ms_type)
        self.scalar_pos_iou_thr = Tensor(self.pos_iou_thr, dtype=self.ms_type)
        self.scalar_min_pos_iou = Tensor(self.min_pos_iou, dtype=self.ms_type)
Exemple #14
0
def _get_mode():
    """Get the current mode (0 is Graph mode, 1 is PyNative mode)"""
    return context.get_context('mode')
Exemple #15
0
def fasterrcnn_eval(dataset_path, ckpt_path, ann_file):
    """FasterRcnn evaluation."""
    ds = create_fasterrcnn_dataset(dataset_path,
                                   batch_size=config.test_batch_size,
                                   is_training=False)
    net = Faster_Rcnn_Resnet50(config)
    param_dict = load_checkpoint(ckpt_path)
    if args_opt.device_target == "GPU":
        for key, value in param_dict.items():
            tensor = value.asnumpy().astype(np.float32)
            param_dict[key] = Parameter(tensor, key)
    load_param_into_net(net, param_dict)

    net.set_train(False)
    device_type = "Ascend" if context.get_context(
        "device_target") == "Ascend" else "Others"
    if device_type == "Ascend":
        net.to_float(mstype.float16)

    eval_iter = 0
    total = ds.get_dataset_size()
    outputs = []
    dataset_coco = COCO(ann_file)

    print("\n========================================\n")
    print("total images num: ", total)
    print("Processing, please wait a moment.")
    max_num = 128
    for data in ds.create_dict_iterator(num_epochs=1):
        eval_iter = eval_iter + 1

        img_data = data['image']
        img_metas = data['image_shape']
        gt_bboxes = data['box']
        gt_labels = data['label']
        gt_num = data['valid_num']

        start = time.time()
        # run net
        output = net(img_data, img_metas, gt_bboxes, gt_labels, gt_num)
        end = time.time()
        print("Iter {} cost time {}".format(eval_iter, end - start))

        # output
        all_bbox = output[0]
        all_label = output[1]
        all_mask = output[2]

        for j in range(config.test_batch_size):
            all_bbox_squee = np.squeeze(all_bbox.asnumpy()[j, :, :])
            all_label_squee = np.squeeze(all_label.asnumpy()[j, :, :])
            all_mask_squee = np.squeeze(all_mask.asnumpy()[j, :, :])

            all_bboxes_tmp_mask = all_bbox_squee[all_mask_squee, :]
            all_labels_tmp_mask = all_label_squee[all_mask_squee]

            if all_bboxes_tmp_mask.shape[0] > max_num:
                inds = np.argsort(-all_bboxes_tmp_mask[:, -1])
                inds = inds[:max_num]
                all_bboxes_tmp_mask = all_bboxes_tmp_mask[inds]
                all_labels_tmp_mask = all_labels_tmp_mask[inds]

            outputs_tmp = bbox2result_1image(all_bboxes_tmp_mask,
                                             all_labels_tmp_mask,
                                             config.num_classes)

            outputs.append(outputs_tmp)

    eval_types = ["bbox"]
    result_files = results2json(dataset_coco, outputs, "./results.pkl")

    coco_eval(result_files, eval_types, dataset_coco, single_result=True)
Exemple #16
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride,
                 pad_mode,
                 padding,
                 dilation,
                 group,
                 has_bias,
                 weight_init,
                 bias_init,
                 data_format='NCHW',
                 transposed=False):
        super(_Conv, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.kernel_size = kernel_size
        self.stride = stride
        self.pad_mode = pad_mode
        self.weight_init = weight_init
        self.bias_init = bias_init
        self.format = Validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name)
        if context.get_context("device_target") != "GPU" and self.format == "NHWC":
            raise ValueError("NHWC format only support in GPU target.")
        if isinstance(padding, int):
            Validator.check_non_negative_int(padding, 'padding', self.cls_name)
            self.padding = padding
        elif isinstance(padding, tuple):
            for pad in padding:
                Validator.check_non_negative_int(pad, 'padding item', self.cls_name)
            self.padding = padding
        else:
            raise TypeError("padding type must be int/tuple(int) cannot be {}!".format(type(padding)))

        self.dilation = dilation
        self.group = Validator.check_positive_int(group)
        self.has_bias = has_bias
        if (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \
                isinstance(kernel_size[0], bool) or isinstance(kernel_size[1], bool) or \
                kernel_size[0] < 1 or kernel_size[1] < 1:
            raise ValueError("Attr 'kernel_size' of 'Conv2D' Op passed "
                             + str(self.kernel_size) + ", should be a int or tuple and equal to or greater than 1.")
        if (not isinstance(stride[0], int)) or (not isinstance(stride[1], int)) or \
                isinstance(stride[0], bool) or isinstance(stride[1], bool) or stride[0] < 1 or stride[1] < 1:
            raise ValueError("Attr 'stride' of 'Conv2D' Op passed "
                             + str(self.stride) + ", should be a int or tuple and equal to or greater than 1.")
        if (not isinstance(dilation[0], int)) or (not isinstance(dilation[1], int)) or \
                isinstance(dilation[0], bool) or isinstance(dilation[1], bool) or dilation[0] < 1 or dilation[1] < 1:
            raise ValueError("Attr 'dilation' of 'Conv2D' Op passed "
                             + str(self.dilation) + ", should be a int or tuple and equal to or greater than 1.")
        if in_channels % group != 0:
            raise ValueError("Attr 'in_channels' of 'Conv2D' Op must be divisible by "
                             "attr 'group' of 'Conv2D' Op.")
        if out_channels % group != 0:
            raise ValueError("Attr 'out_channels' of 'Conv2D' Op must be divisible by "
                             "attr 'group' of 'Conv2D' Op.")
        if transposed:
            shape = [in_channels, out_channels // group, *kernel_size]
        else:
            shape = [out_channels, in_channels // group, *kernel_size] if self.format == "NCHW" else \
                [out_channels, *kernel_size, in_channels // group]
        self.weight = Parameter(initializer(self.weight_init, shape), name='weight')

        if Validator.check_bool(has_bias):
            self.bias = Parameter(initializer(self.bias_init, [out_channels]), name='bias')
        else:
            if self.bias_init != 'zeros':
                logger.warning("Value of 'has_bias' is False, value of 'bias_init' will be ignored.")
            self.bias = None
    max_queries = 6000

    def_evaluate = BlackDefenseEvaluate(bb_raw_preds, bb_def_preds,
                                        np.array(raw_query_counts),
                                        np.array(def_query_counts),
                                        np.array(raw_query_time),
                                        np.array(def_query_time),
                                        np.array(def_detection_counts),
                                        true_label, max_queries)

    LOGGER.info(
        TAG, 'query count variance of adversaries is : {:.2f}'.format(
            def_evaluate.qcv()))
    LOGGER.info(
        TAG, 'attack success rate variance of adversaries '
        'is : {:.2f}'.format(def_evaluate.asv()))
    LOGGER.info(
        TAG, 'false positive rate (FPR) of the query-based detector '
        'is : {:.2f}'.format(def_evaluate.fpr()))
    LOGGER.info(
        TAG, 'the benign query response time variance (QRV) '
        'is : {:.2f}'.format(def_evaluate.qrv()))


if __name__ == '__main__':
    # device_target can be "CPU", "GPU" or "Ascend"
    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
    DEVICE = context.get_context("device_target")
    if DEVICE in ("Ascend", "GPU"):
        test_black_defense()
 def __init__(self):
     self.device_target = context.get_context('device_target')
     self.rank_size = None
     self.device_id = None
     self.global_rank_id = None
Exemple #19
0
def _check_mode(class_name):
    """Check for PyNative mode."""
    mode = context.get_context('mode')
    if mode == context.PYNATIVE_MODE:
        raise RuntimeError(
            f'{class_name} operator does not support PyNative mode.')
Exemple #20
0
    def compile(self,
                obj,
                *args,
                phase='predict',
                params=None,
                do_convert=True,
                auto_parallel_mode=False):
        """
        Compiles graph.

        Args:
            obj (Function/Cell): The function or cell instance need compile.
            args (tuple): Function or cell input arguments.
            phase (str): The name of compile phase. Default: 'predict'.
            params (OrderedDict): The parameters dictionary used for init data graph. Default: None.
            do_convert (bool): When set to True, convert ME graph to GE graph after compiling graph.
            auto_parallel_mode: When set to True, use auto parallel mode to compile graph.

        Return:
            Str, the full phase of the cell.
            Bool, if the graph has been compiled before, return False, else return True.
        """
        obj.check_names()
        args_names, args_list = _generate_pip_args(obj, *args)
        dic = dict(zip(args_names, args_list))
        key = generate_key(phase, dic)
        self.phase_prefix = str(key[1])
        if phase == 'export':
            phase = phase + '.' + str(obj.create_time)
        else:
            phase = self.phase_prefix + phase + '.' + str(obj.create_time)
        enable_debug_runtime = context.get_context("enable_debug_runtime")
        enable_ge = context.get_context("enable_ge")

        use_vm = not enable_ge or (enable_debug_runtime
                                   and context.get_context("mode")
                                   == context.PYNATIVE_MODE)

        if phase in self.compile_cache.keys():
            logger.debug("%r graph has existed.", phase)
            return phase, False

        result = self._executor.compile(obj, args_list, phase, use_vm)
        self.compile_cache[phase] = phase
        if not result:
            raise RuntimeError("Executor compile failed.")
        graph = self._executor.get_func_graph(phase)

        if graph is None:
            logger.error("%r graph compile failed.", phase)
        if not do_convert:
            return phase, True

        if auto_parallel_mode and "train" in phase:
            obj.parameter_layout_dict = self._executor.get_parameter_layout(
                phase)
        self._params_init_data(obj, params)
        if not enable_debug_runtime or enable_ge:
            if auto_parallel_mode and "train" in phase:
                obj.load_parameter_slice(params)

        # set parallel inputs in sink mode
        if auto_parallel_mode and (args and isinstance(args[0], Tensor)
                                   and args[0].virtual_flag):
            obj.set_parallel_input_with_inputs(*args)

        # the following GE init process is not needed when use vm or ms backend
        if enable_ge:
            # decide whether to sink based on whether the inputs is virtual or not
            if args_list and isinstance(args_list[0],
                                        Tensor) and args_list[0].virtual_flag:
                _set_dataset_mode_config('sink')
            else:
                _set_dataset_mode_config('normal')

            self._build_data_graph(obj, params, phase)

            if "export" not in phase:
                init_phase = "init_subgraph" + "." + str(obj.create_time)
                _exec_init_graph(obj, init_phase)
        elif not enable_ge and "export" in phase:
            self._build_data_graph(obj, params, phase)

        return phase, True
Exemple #21
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 num_layers=1,
                 has_bias=True,
                 batch_first=False,
                 dropout=0,
                 bidirectional=False):
        super(LSTM, self).__init__()
        validator.check_value_type("batch_first", batch_first, [bool],
                                   self.cls_name)
        validator.check_positive_int(hidden_size, "hidden_size", self.cls_name)
        validator.check_positive_int(num_layers, "num_layers", self.cls_name)
        self.is_ascend = context.get_context("device_target") == "Ascend"

        self.batch_first = batch_first
        self.transpose = P.Transpose()
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.dropout = dropout
        self.reverse_seq = P.ReverseSequence(batch_dim=1, seq_dim=0)
        self.concat = P.Concat(axis=0)
        self.concat_2dim = P.Concat(axis=2)
        self.cast = P.Cast()
        self.shape = P.Shape()
        if dropout != 0:
            self.dropout_op = nn.Dropout(float(dropout))
        self.lstm = P.LSTM(input_size=input_size,
                           hidden_size=hidden_size,
                           num_layers=num_layers,
                           has_bias=has_bias,
                           bidirectional=bidirectional,
                           dropout=float(dropout))

        weight_size = 0
        gate_size = 4 * hidden_size
        stdv = 1 / math.sqrt(hidden_size)
        num_directions = 2 if bidirectional else 1
        b0 = np.zeros(gate_size, dtype=np.float16)
        self.w_list = []
        self.b_list = []
        self.rnns_fw = P.DynamicRNN(forget_bias=0.0)
        self.rnns_bw = P.DynamicRNN(forget_bias=0.0)
        for layer in range(num_layers):
            input_layer_size = input_size if layer == 0 else hidden_size * num_directions
            increment_size = gate_size * input_layer_size
            increment_size += gate_size * hidden_size
            w_shape = input_size if layer == 0 else (num_directions *
                                                     hidden_size)
            w_np = np.random.uniform(
                -stdv, stdv,
                (w_shape + hidden_size, gate_size)).astype(np.float16)
            self.w_list.append(
                Parameter(initializer(Tensor(w_np),
                                      [w_shape + hidden_size, gate_size]),
                          name='weight_fw' + str(layer)))
            if has_bias:
                increment_size += 2 * gate_size
                b_np = np.random.uniform(-stdv, stdv,
                                         gate_size).astype(np.float16)
                self.b_list.append(
                    Parameter(initializer(Tensor(b_np), [gate_size]),
                              name='bias_fw' + str(layer)))
            else:
                self.b_list.append(
                    Parameter(initializer(Tensor(b0), [gate_size]),
                              name='bias_fw' + str(layer)))
            weight_size += increment_size * num_directions
            if bidirectional:
                w_bw_np = np.random.uniform(
                    -stdv, stdv,
                    (w_shape + hidden_size, gate_size)).astype(np.float16)
                self.w_list.append(
                    Parameter(initializer(Tensor(w_bw_np),
                                          [w_shape + hidden_size, gate_size]),
                              name='weight_bw' + str(layer)))
                b_bw_np = np.random.uniform(-stdv, stdv,
                                            (4 * hidden_size)).astype(
                                                np.float16) if has_bias else b0
                self.b_list.append(
                    Parameter(initializer(Tensor(b_bw_np), [gate_size]),
                              name='bias_bw' + str(layer)))
        self.w_list = ParameterTuple(self.w_list)
        self.b_list = ParameterTuple(self.b_list)
        w_np = np.random.uniform(-stdv, stdv,
                                 (weight_size, 1, 1)).astype(np.float32)
        self.weight = Parameter(initializer(Tensor(w_np), [weight_size, 1, 1]),
                                name='weight')
Exemple #22
0
def train():
    """Train function."""
    args = parse_args()
    devid = int(os.getenv('DEVICE_ID', '0'))
    context.set_context(mode=context.GRAPH_MODE,
                        enable_auto_mixed_precision=True,
                        device_target=args.device_target,
                        save_graphs=False,
                        device_id=devid)
    loss_meter = AverageMeter('loss')

    network = YOLOV4CspDarkNet53(is_training=True)
    # default is kaiming-normal
    default_recursive_init(network)

    if args.pretrained_backbone:
        pretrained_backbone_slice = args.pretrained_backbone.split('/')
        backbone_ckpt_file = pretrained_backbone_slice[
            len(pretrained_backbone_slice) - 1]
        local_backbone_ckpt_path = '/cache/' + backbone_ckpt_file
        # download backbone checkpoint
        mox.file.copy_parallel(src_url=args.pretrained_backbone,
                               dst_url=local_backbone_ckpt_path)
        args.pretrained_backbone = local_backbone_ckpt_path
    load_yolov4_params(args, network)

    network = YoloWithLossCell(network)
    args.logger.info('finish get network')

    config = ConfigYOLOV4CspDarkNet53()

    config.label_smooth = args.label_smooth
    config.label_smooth_factor = args.label_smooth_factor

    if args.training_shape:
        config.multi_scale = [convert_training_shape(args)]
    if args.resize_rate:
        config.resize_rate = args.resize_rate

    # data download
    local_data_path = '/cache/data'
    local_ckpt_path = '/cache/ckpt_file'
    print('Download data.')
    mox.file.copy_parallel(src_url=args.data_url, dst_url=local_data_path)

    ds, data_size = create_yolo_dataset(
        image_dir=os.path.join(local_data_path, 'images'),
        anno_path=os.path.join(local_data_path, 'annotation.json'),
        is_training=True,
        batch_size=args.per_batch_size,
        max_epoch=args.max_epoch,
        device_num=args.group_size,
        rank=args.rank,
        config=config)
    args.logger.info('Finish loading dataset')

    args.steps_per_epoch = int(data_size / args.per_batch_size /
                               args.group_size)

    if not args.ckpt_interval:
        args.ckpt_interval = args.steps_per_epoch * 10

    lr = get_lr(args)

    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=args.momentum,
                   weight_decay=args.weight_decay,
                   loss_scale=args.loss_scale)
    is_gpu = context.get_context("device_target") == "GPU"
    if is_gpu:
        loss_scale_value = 1.0
        loss_scale = FixedLossScaleManager(loss_scale_value,
                                           drop_overflow_update=False)
        network = amp.build_train_network(network,
                                          optimizer=opt,
                                          loss_scale_manager=loss_scale,
                                          level="O2",
                                          keep_batchnorm_fp32=False)
        keep_loss_fp32(network)
    else:
        network = TrainingWrapper(network, opt)
        network.set_train()

    # checkpoint save
    ckpt_max_num = 10
    ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval,
                                   keep_checkpoint_max=ckpt_max_num)
    ckpt_cb = ModelCheckpoint(config=ckpt_config,
                              directory=local_ckpt_path,
                              prefix='yolov4')
    cb_params = _InternalCallbackParam()
    cb_params.train_network = network
    cb_params.epoch_num = ckpt_max_num
    cb_params.cur_epoch_num = 1
    run_context = RunContext(cb_params)
    ckpt_cb.begin(run_context)

    old_progress = -1
    t_end = time.time()
    data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1)

    for i, data in enumerate(data_loader):
        images = data["image"]
        input_shape = images.shape[2:4]
        images = Tensor.from_numpy(images)

        batch_y_true_0 = Tensor.from_numpy(data['bbox1'])
        batch_y_true_1 = Tensor.from_numpy(data['bbox2'])
        batch_y_true_2 = Tensor.from_numpy(data['bbox3'])
        batch_gt_box0 = Tensor.from_numpy(data['gt_box1'])
        batch_gt_box1 = Tensor.from_numpy(data['gt_box2'])
        batch_gt_box2 = Tensor.from_numpy(data['gt_box3'])

        input_shape = Tensor(tuple(input_shape[::-1]), ms.float32)
        loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2,
                       batch_gt_box0, batch_gt_box1, batch_gt_box2,
                       input_shape)
        loss_meter.update(loss.asnumpy())

        # ckpt progress
        cb_params.cur_step_num = i + 1  # current step number
        cb_params.batch_num = i + 2
        ckpt_cb.step_end(run_context)

        if i % args.log_interval == 0:
            time_used = time.time() - t_end
            epoch = int(i / args.steps_per_epoch)
            fps = args.per_batch_size * (
                i - old_progress) * args.group_size / time_used
            if args.rank == 0:
                args.logger.info(
                    'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(
                        epoch, i, loss_meter, fps, lr[i]))
            t_end = time.time()
            loss_meter.reset()
            old_progress = i

        if (i + 1) % args.steps_per_epoch == 0:
            cb_params.cur_epoch_num += 1

    args.logger.info('==========end training===============')

    # upload checkpoint files
    print('Upload checkpoint.')
    mox.file.copy_parallel(src_url=local_ckpt_path, dst_url=args.train_url)
Exemple #23
0
    def init(self, train_dataset=None, valid_dataset=None):
        """
        Initializes compute graphs and data graphs with sink mode.

        Note:
            Pre-init process only supports `GRAPH_MODE` and `Ascend` target currently.

        Args:
            train_dataset (Dataset): A training dataset iterator. If define `train_dataset`, training graphs will be
                                     initialized. Default: None.
            valid_dataset (Dataset): A evaluating dataset iterator. If define `valid_dataset`, evaluation graphs will
                                     be initialized, and `metrics` in `Model` can not be None. Default: None.

        Examples:
            >>> train_dataset = get_train_dataset()
            >>> valid_dataset = get_valid_dataset()
            >>> net = Net()
            >>> loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
            >>> optim = Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
            >>> model = Model(net, loss_fn=loss, optimizer=optim, metrics={'acc'})
            >>> model.init(train_dataset, valid_dataset)
            >>> model.train(2, train_dataset)
            >>> model.eval(valid_dataset)
        """
        if context.get_context(
                "mode") != context.GRAPH_MODE or context.get_context(
                    "device_target") != "Ascend":
            raise RuntimeError(
                'Pre-init process only supports GRAPH MODE and Ascend target currently.'
            )

        if not train_dataset and not valid_dataset:
            raise ValueError(
                'Both train_dataset and valid_dataset can not be None or empty.'
            )

        _device_number_check(self._parallel_mode, self._device_number)

        if train_dataset:
            _parameter_broadcast_check(self._parallel_mode,
                                       self._parameter_broadcast)
            self._train_network.set_train()
            self._train_network.phase = 'train'

            if self._parameter_broadcast:
                self._train_network.set_broadcast_flag()
            train_dataset.__no_send__ = True
            train_dataset_helper, train_network = self._exec_preprocess(
                self._train_network,
                is_train=True,
                phase='train',
                dataset=train_dataset,
                dataset_sink_mode=True)
            self._train_network = train_network
            for inputs in train_dataset_helper:
                self._train_network.compile(*inputs)
                break

        if valid_dataset:
            if not self._metric_fns:
                raise RuntimeError(
                    'If define `valid_dataset`, metric fn can not be None or empty.'
                )

            self._eval_network.set_train(False)
            self._eval_network.phase = 'eval'
            valid_dataset.__no_send__ = True
            valid_dataset_helper, eval_network = self._exec_preprocess(
                self._eval_network,
                is_train=False,
                phase='eval',
                dataset=valid_dataset,
                dataset_sink_mode=True)
            self._eval_network = eval_network
            for inputs in valid_dataset_helper:
                self._eval_network.compile(*inputs)
                break
    def __init__(self,
                 num_features,
                 eps=1e-5,
                 momentum=0.9,
                 affine=True,
                 gamma_init='ones',
                 beta_init='zeros',
                 moving_mean_init='zeros',
                 moving_var_init='ones',
                 use_batch_statistics=None,
                 device_num_each_group=1,
                 input_dims='2d'):
        super(_BatchNorm, self).__init__()
        if num_features < 1:
            raise ValueError("num_features must be at least 1")

        if momentum < 0 or momentum > 1:
            raise ValueError(
                "momentum should be a number in range [0, 1], but got {}".
                format(momentum))

        self.use_batch_statistics = use_batch_statistics
        self.num_features = num_features
        self.eps = eps
        self.input_dims = input_dims
        self.moving_mean = Parameter(initializer(moving_mean_init,
                                                 num_features),
                                     name="mean",
                                     requires_grad=False)
        self.moving_variance = Parameter(initializer(moving_var_init,
                                                     num_features),
                                         name="variance",
                                         requires_grad=False)
        self.gamma = Parameter(initializer(gamma_init, num_features),
                               name="gamma",
                               requires_grad=affine)
        self.beta = Parameter(initializer(beta_init, num_features),
                              name="beta",
                              requires_grad=affine)
        self.group = check_int_positive(device_num_each_group)
        self.is_global = False
        if self.group != 1:
            self.rank_id = get_rank()
            self.rank_size = get_group_size()
            self.device_list = [i for i in range(0, self.rank_size)]
            self.rank_list = self.list_group(self.device_list, self.group)
            self.rank_list_idx = len(self.rank_list)
            for i in range(self.rank_list_idx):
                if self.rank_id in self.rank_list[i] and self.group != 1:
                    self.is_global = True
                    management.create_group('group' + str(i),
                                            self.rank_list[i])
                    self.all_reduce = P.AllReduce(
                        P.ReduceOp.SUM,
                        'group' + str(i)).add_prim_attr('fusion', 1)
        self.shape = P.Shape()
        self.reduce_mean = P.ReduceMean(keep_dims=True)
        self.square = P.Square()
        self.sqrt = P.Sqrt()
        self.cast = P.Cast()
        self.dtype = P.DType()
        self.reshape = P.Reshape()
        self.is_ascend = context.get_context("device_target") == "Ascend"
        self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE
        self.momentum = 1.0 - momentum
        if context.get_context("enable_ge"):
            self.is_ge_backend = True
        else:
            self.is_ge_backend = False

        if self.is_graph_mode and (self.is_ge_backend or self.is_ascend):
            self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps)
        else:
            self.bn_train = P.FusedBatchNorm(mode=1,
                                             epsilon=self.eps,
                                             momentum=self.momentum)
        self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps)
        self.enable_global_sync = self.is_global and (self.is_ge_backend or
                                                      (self.is_graph_mode
                                                       and self.is_ascend))
        self.enable_default_train = self.is_graph_mode and not self.is_global and \
                                    (self.is_ge_backend or self.is_ascend)

        data_parallel_strategy = ((1, ), (1, ))
        data_parallel_strategy_one = ((1, ), ())
        self.sub_mean = P.Sub().set_strategy(data_parallel_strategy)
        self.sub_var = P.Sub().set_strategy(data_parallel_strategy)
        self.mul_mean = P.Mul().set_strategy(data_parallel_strategy_one)
        self.mul_var = P.Mul().set_strategy(data_parallel_strategy_one)
        self.assign_sub_mean = P.AssignSub().set_strategy(
            data_parallel_strategy)
        self.assign_sub_var = P.AssignSub().set_strategy(
            data_parallel_strategy)
Exemple #25
0
    def compile(self, obj, *args, phase='predict', do_convert=True, auto_parallel_mode=False):
        """
        Compiles graph.

        Args:
            obj (Function/Cell): The function or cell instance need compile.
            args (tuple): Function or cell input arguments.
            phase (str): The name of compile phase. Default: 'predict'.
            do_convert (bool): When set to True, convert ME graph to GE graph after compiling graph.
            auto_parallel_mode: When set to True, use auto parallel mode to compile graph.

        Return:
            Str, the full phase of the cell.
            Bool, if the graph has been compiled before, return False, else return True.
        """
        args_names, args_list = _generate_pip_args(obj, *args)
        dic = dict(zip(args_names, args_list))
        key = generate_key(phase, dic)
        self.phase_prefix = str(key[1])
        if 'export' in phase:
            phase = phase + '.' + self.phase_prefix + '.' + str(obj.create_time)
        else:
            phase = self.phase_prefix + phase + '.' + str(obj.create_time)

        if phase in self.compile_cache.keys():
            logger.debug("%r graph has existed.", phase)
            return phase, False

        obj.check_names()
        _check_full_batch()
        self._set_dataset_mode(args_list)

        is_sink_mode = args and isinstance(args[0], Tensor) and args[0].virtual_flag
        if auto_parallel_mode and _need_to_full() and not is_sink_mode and obj.auto_parallel_compile_and_run():
            args_full = _to_full_tensor(args, _get_device_num(), _get_global_rank())
            _, args_list = _generate_pip_args(obj, *args_full)

        enable_debug_runtime = context.get_context("enable_debug_runtime")
        enable_ge = context.get_context("enable_ge")
        use_vm = not enable_ge or (enable_debug_runtime and context.get_context("mode") == context.PYNATIVE_MODE)
        result = self._executor.compile(obj, args_list, phase, use_vm)
        self.compile_cache[phase] = phase
        if not result:
            raise RuntimeError("Executor compile failed.")
        graph = self._executor.get_func_graph(phase)

        if graph is None:
            logger.error("%r graph compile failed.", phase)
        if not do_convert:
            return phase, True

        if auto_parallel_mode:
            obj.parameter_layout_dict = self._executor.get_parameter_layout(phase)
        replace = obj.init_parameters_data(auto_parallel_mode=auto_parallel_mode)
        if not enable_debug_runtime or enable_ge:
            if auto_parallel_mode:
                obj.load_parameter_slice(None)

        self._updata_param_node_default_input(phase, replace)

        # set parallel inputs in sink mode
        if auto_parallel_mode and is_sink_mode:
            obj.set_parallel_input_with_inputs(*args)

        # the following GE init process is not needed when use vm or ms backend
        if enable_ge:
            self._build_data_graph(obj, phase)

            if "export" not in phase:
                init_phase = "init_subgraph" + "." + str(obj.create_time)
                _exec_init_graph(obj, init_phase)
        elif not enable_ge and "export" in phase:
            self._build_data_graph(obj, phase)

        return phase, True
Exemple #26
0
    def __init__(self,
                 min_init=-6,
                 max_init=6,
                 num_bits=8,
                 ema=False,
                 ema_decay=0.999,
                 per_channel=False,
                 channel_axis=1,
                 out_channels=1,
                 quant_delay=0,
                 symmetric=False,
                 narrow_range=False):
        """init FakeQuantWithMinMax layer"""
        super(FakeQuantWithMinMax, self).__init__()
        self.min_init = min_init
        self.max_init = max_init
        self.num_bits = num_bits
        self.ema = ema
        self.ema_decay = ema_decay
        self.per_channel = per_channel
        self.out_channels = out_channels
        self.channel_axis = channel_axis
        self.quant_delay = quant_delay
        self.symmetric = symmetric
        self.narrow_range = narrow_range
        self.is_ascend = context.get_context('device_target') == "Ascend"

        # init tensor min and max for fake quant op
        if self.per_channel:
            min_array = np.array([
                self.min_init for i in range(0, self.out_channels)
            ]).astype(np.float32)
            max_array = np.array([
                self.max_init for i in range(0, self.out_channels)
            ]).astype(np.float32)
        else:
            min_array = np.array([self.min_init]).reshape(1).astype(np.float32)
            max_array = np.array([self.max_init]).reshape(1).astype(np.float32)
        self.minq = Parameter(Tensor(min_array),
                              name='quant_min',
                              requires_grad=False)
        self.maxq = Parameter(Tensor(max_array),
                              name='quant_max',
                              requires_grad=False)

        # init fake quant relative op
        if per_channel:
            quant_fun = partial(P.FakeQuantPerChannel,
                                channel_axis=self.channel_axis)
            ema_fun = partial(P.FakeQuantMinMaxPerChannelUpdate,
                              channel_axis=self.channel_axis)
        else:
            quant_fun = P.FakeQuantPerLayer
            ema_fun = P.FakeQuantMinMaxPerLayerUpdate

        if self.is_ascend:
            self.fake_quant = quant_fun(num_bits=self.num_bits,
                                        symmetric=self.symmetric,
                                        narrow_range=self.narrow_range)
        else:
            self.fake_quant = quant_fun(num_bits=self.num_bits,
                                        ema=self.ema,
                                        ema_decay=ema_decay,
                                        quant_delay=quant_delay,
                                        symmetric=self.symmetric,
                                        narrow_range=self.narrow_range)
        self.ema_update = ema_fun(num_bits=self.num_bits,
                                  ema=self.ema,
                                  ema_decay=self.ema_decay,
                                  symmetric=self.symmetric,
                                  narrow_range=self.narrow_range)
Exemple #27
0
 def __call__(self, obj, *args, phase='predict'):
     if context.get_context("precompile_only") or _is_role_pserver():
         return None
     return self.run(obj, *args, phase=phase)
Exemple #28
0
    def __init__(self,
                 min_init=-6,
                 max_init=6,
                 num_bits=8,
                 ema=False,
                 ema_decay=0.999,
                 per_channel=False,
                 out_channels=1,
                 quant_delay=0,
                 symmetric=False,
                 narrow_range=False):
        """init FakeQuantWithMinMax layer"""
        super(FakeQuantWithMinMax, self).__init__()

        self.min_init = min_init
        self.num_bits = num_bits
        self.max_init = max_init
        self.ema = ema
        self.ema_decay = ema_decay
        self.per_channel = per_channel
        self.out_channels = out_channels
        self.quant_delay = quant_delay
        self.symmetric = symmetric
        self.narrow_range = narrow_range

        if per_channel:
            min_array = np.array([
                self.min_init for i in range(0, self.out_channels)
            ]).astype(np.float32)
            max_array = np.array([
                self.max_init for i in range(0, self.channel_size)
            ]).astype(np.float32)
            self.minq = Parameter(Tensor(min_array),
                                  name='quant_min',
                                  requires_grad=False)
            self.maxq = Parameter(Tensor(max_array),
                                  name='quant_max',
                                  requires_grad=False)
            self.fake_quant_train = P.FakeQuantWithMinMaxPerChannel(
                num_bits=self.num_bits,
                ema=self.ema,
                ema_decay=self.ema_decay,
                quant_delay=self.quant_delay,
                symmetric=self.symmetric,
                narrow_range=self.narrow_range,
                training=True)
            self.fake_quant_infer = P.FakeQuantWithMinMaxPerChannel(
                num_bits=self.num_bits,
                ema=self.ema,
                ema_decay=self.ema_decay,
                quant_delay=self.quant_delay,
                symmetric=self.symmetric,
                narrow_range=self.narrow_range,
                training=False)
        else:
            min_array = np.array([min_init]).reshape(1).astype(np.float32)
            max_array = np.array([max_init]).reshape(1).astype(np.float32)
            self.minq = Parameter(Tensor(min_array),
                                  name='quant_min',
                                  requires_grad=False)
            self.maxq = Parameter(Tensor(max_array),
                                  name='quant_max',
                                  requires_grad=False)
            if context.get_context('device_target') == "Ascend":
                self.fake_quant_train = FakeQuantWithMinMaxD(
                    num_bits=self.num_bits,
                    ema=self.ema,
                    ema_decay=self.ema_decay,
                    quant_delay=self.quant_delay,
                    symmetric=self.symmetric,
                    narrow_range=self.narrow_range,
                    training=True,
                    min_init=self.minq,
                    max_init=self.maxq)
                self.fake_quant_infer = FakeQuantWithMinMaxD(
                    num_bits=self.num_bits,
                    ema=self.ema,
                    ema_decay=self.ema_decay,
                    quant_delay=self.quant_delay,
                    symmetric=self.symmetric,
                    narrow_range=self.narrow_range,
                    training=False,
                    min_init=self.minq,
                    max_init=self.maxq)
            elif context.get_context('device_target') == "GPU":
                self.fake_quant_train = P.FakeQuantWithMinMax(
                    num_bits=self.num_bits,
                    ema=self.ema,
                    ema_decay=self.ema_decay,
                    quant_delay=self.quant_delay,
                    symmetric=self.symmetric,
                    narrow_range=self.narrow_range,
                    training=True)
                self.fake_quant_infer = P.FakeQuantWithMinMax(
                    num_bits=self.num_bits,
                    ema=self.ema,
                    ema_decay=ema_decay,
                    quant_delay=quant_delay,
                    symmetric=self.symmetric,
                    narrow_range=self.narrow_range,
                    training=False)
            else:
                raise ValueError("Not support platform.")