def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, eps=1e-5, momentum=0.997, weight_init=None, beta_init=None, gamma_init=None, mean_init=None, var_init=None, quant_delay=0, freeze_bn=100000, fake=True, num_bits=8, per_channel=False, symmetric=False, narrow_range=False): """init Conv2dBatchNormQuant layer""" super(Conv2dBatchNormQuant, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = twice(kernel_size) self.stride = twice(stride) self.pad_mode = pad_mode self.padding = padding self.dilation = twice(dilation) self.group = group self.eps = eps self.momentum = momentum self.quant_delay = quant_delay self.freeze_bn = freeze_bn self.fake = fake self.num_bits = num_bits self.per_channel = per_channel self.symmetric = symmetric self.narrow_range = narrow_range self.is_gpu = context.get_context('device_target') == "GPU" # initialize convolution op and Parameter if context.get_context('device_target') == "Ascend" and group > 1: validator.check_integer('group', group, in_channels, Rel.EQ) validator.check_integer('group', group, out_channels, Rel.EQ) self.conv = P.DepthwiseConv2dNative(channel_multiplier=1, kernel_size=self.kernel_size, pad_mode=pad_mode, pad=padding, stride=self.stride, dilation=self.dilation) if weight_init is None: weight_init = initializer('normal', [1, in_channels, *self.kernel_size]) channel_axis = 1 else: self.conv = P.Conv2D(out_channel=out_channels, kernel_size=self.kernel_size, pad_mode=pad_mode, pad=padding, stride=self.stride, dilation=self.dilation, group=group) if weight_init is None: weight_init = initializer( 'normal', [out_channels, in_channels // group, *self.kernel_size]) channel_axis = 0 self.weight = Parameter(weight_init, name='weight') # initialize batchnorm Parameter if gamma_init is None: gamma_init = initializer('ones', [out_channels]) self.gamma = Parameter(gamma_init, name='gamma') if beta_init is None: beta_init = initializer('zeros', [out_channels]) self.beta = Parameter(beta_init, name='beta') if mean_init is None: mean_init = initializer('zeros', [out_channels]) self.moving_mean = Parameter(mean_init, name='moving_mean', requires_grad=False) if var_init is None: var_init = initializer('ones', [out_channels]) self.moving_variance = Parameter(var_init, name='moving_variance', requires_grad=False) # initialize fake ops self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6, max_init=6, ema=False, num_bits=num_bits, quant_delay=quant_delay, per_channel=per_channel, out_channels=out_channels, symmetric=symmetric, narrow_range=narrow_range) self.batchnorm_fold = BatchNormFoldCell(epsilon=eps, momentum=momentum, freeze_bn=freeze_bn) self.correct_mul = P.CorrectionMul(channel_axis) if context.get_context('device_target') == "Ascend": self.batchnorm_fold2_train = P.BatchNormFold2_D( freeze_bn=freeze_bn) self.batchnorm_fold2_infer = P.BatchNormFold2_D(freeze_bn=0) elif context.get_context('device_target') == "GPU": self.batchnorm_fold2_train = P.BatchNormFold2(freeze_bn=freeze_bn) self.batchnorm_fold2_infer = P.BatchNormFold2(freeze_bn=0) else: raise ValueError("Unsupported platform: {}".format( context.get_context('device_target'))) self.step = Parameter(initializer('normal', [1], dtype=mstype.int32), name='step', requires_grad=False) self.one = Tensor(1, mstype.int32) self.assignadd = P.AssignAdd()
def compile(self, obj, *args, phase='predict', do_convert=True, auto_parallel_mode=False): """ Compiles graph. Args: obj (Function/Cell): The function or cell instance need compile. args (tuple): Function or cell input arguments. phase (str): The name of compile phase. Default: 'predict'. do_convert (bool): When set to True, convert ME graph to GE graph after compiling graph. auto_parallel_mode: When set to True, use auto parallel mode to compile graph. Return: Str, the full phase of the cell. Bool, if the graph has been compiled before, return False, else return True. """ from mindspore import nn from mindspore.ops.composite import GradOperation class InputsToAttrCell(nn.Cell): """The cell that converts non-tensor inputs to attr.""" def __init__(self, net, args_names, non_tensor_inputs): super(InputsToAttrCell, self).__init__() self.net = net self.args_names = args_names self.non_tensor_inputs = non_tensor_inputs self.inputs_to_attr = True def construct(self, *tensor_inputs): real_inputs = () index = 0 for i in args_names: if i in self.non_tensor_inputs.keys(): real_inputs += (self.non_tensor_inputs[i], ) else: real_inputs += (tensor_inputs[index], ) index += 1 return self.net(*real_inputs) args_names, args_list = _generate_pip_args(obj, *args) if not hasattr(obj, "inputs_to_attr"): dic = dict(zip(args_names, args_list)) key = generate_key(phase, dic) obj.phase_prefix = str(key[1]) if 'export' in phase: phase = phase + '.' + obj.phase_prefix + '.' + str( obj.create_time) else: phase = obj.phase_prefix + phase + '.' + str(obj.create_time) if phase in self.compile_cache.keys(): logger.debug("%r graph has existed.", phase) return phase, False if getattr(obj, "support_non_tensor_inputs", None): for i in obj.__dict__.values(): if isinstance(i, GradOperation): raise ValueError( "Not support set 'support_non_tensor_inputs' to the 'True' for grad net, " "only support forward net.") attrs = {} inputs = [] for key, value in dic.items(): if not isinstance(value, (Tensor, MetaTensor)): attrs[key] = value else: inputs.append(value) if attrs: inputs_to_attr_cell = InputsToAttrCell(obj, args_names, attrs) return self.compile(inputs_to_attr_cell, *inputs, phase=phase) obj.check_names() _check_full_batch() self._set_dataset_mode(args_list) is_sink_mode = args and isinstance(args[0], Tensor) and args[0].virtual_flag if auto_parallel_mode and _need_to_full( ) and not is_sink_mode and obj.auto_parallel_compile_and_run(): args_full = _to_full_tensor(args, _get_device_num(), _get_global_rank()) _, args_list = _generate_pip_args(obj, *args_full) enable_debug_runtime = context.get_context("enable_debug_runtime") enable_ge = context.get_context("enable_ge") use_vm = not enable_ge or (enable_debug_runtime and context.get_context("mode") == context.PYNATIVE_MODE) result = self._executor.compile(obj, args_list, phase, use_vm) self.compile_cache[phase] = phase if not result: raise RuntimeError("Executor compile failed.") graph = self._executor.get_func_graph(phase) if graph is None: logger.error("%r graph compile failed.", phase) if not do_convert: return phase, True if auto_parallel_mode: obj.parameter_layout_dict = self._executor.get_parameter_layout( phase) replace = obj.init_parameters_data( auto_parallel_mode=auto_parallel_mode) if not enable_debug_runtime or enable_ge: if auto_parallel_mode: obj.load_parameter_slice(None) self._updata_param_node_default_input(phase, replace) # set parallel inputs in sink mode if auto_parallel_mode and is_sink_mode: obj.set_parallel_input_with_inputs(*args) # the following GE init process is not needed when use vm or ms backend if enable_ge: self._build_data_graph(obj, phase) if "export" not in phase: init_phase = "init_subgraph" + "." + str(obj.create_time) _exec_init_graph(obj, init_phase) elif not enable_ge and "export" in phase: self._build_data_graph(obj, phase) elif BROADCAST_PHASE not in phase and _get_parameter_broadcast(): auto_split_param_names = [] if auto_parallel_mode: auto_split_param_names = self._get_auto_split_param_names( obj.parameter_layout_dict) broadcast_params_dict = obj.parameters_broadcast_dict() if auto_split_param_names and broadcast_params_dict: broadcast_params_dict = OrderedDict() for param_name, param in obj.parameters_broadcast_dict().items( ): if param_name not in auto_split_param_names: broadcast_params_dict[param_name] = param broadcast_phase = "_broadcast_subgraph" self._build_broadcast_graph(broadcast_params_dict, broadcast_phase) return phase, True
def _get_device(): """Get the current device (`GPU`, `CPU`, `Ascend`)""" return context.get_context('device_target')
def __init__(self, input_size, hidden_size, num_layers=1, has_bias=True, batch_first=False, dropout=0, bidirectional=False): super(LSTM, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.has_bias = has_bias self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name) self.dropout = float(dropout) self.bidirectional = bidirectional if self.batch_first: self.transpose1 = P.Transpose() self.transpose2 = P.Transpose() num_directions = 2 if self.bidirectional else 1 self.cpu_target = False if context.get_context("device_target") == "CPU": self.cpu_target = True if not self.cpu_target: self.lstm = P.LSTM(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, has_bias=self.has_bias, bidirectional=self.bidirectional, dropout=self.dropout) weight_size = 0 gate_size = 4 * self.hidden_size for layer in range(self.num_layers): input_layer_size = self.input_size if layer == 0 else self.hidden_size * num_directions increment_size = gate_size * input_layer_size increment_size += gate_size * self.hidden_size if self.has_bias: increment_size += 2 * gate_size weight_size += increment_size * num_directions self.weight = Parameter(initializer(0.0, [weight_size, 1, 1]), name='weight') else: input_size_list = [] input_size_list.append(self.input_size) for i in range(self.num_layers - 1): input_size_list.append(self.hidden_size * num_directions) weights = [] layers = [] bias_size = 0 if not self.has_bias else num_directions * self.hidden_size * 4 for i in range(num_layers): weight_size = (input_size_list[i] + self.hidden_size) * num_directions * self.hidden_size * 4 w_np = np.ones([weight_size, 1, 1]).astype(np.float32) * 0.01 if has_bias: bias_np = np.zeros([bias_size, 1, 1]).astype(np.float32) w_np = np.concatenate([w_np, bias_np], axis=0) weights.append(Parameter(initializer(Tensor(w_np), w_np.shape), name='weight' + str(i))) layers.append(nn.LSTMCell(input_size=input_size_list[i], hidden_size=self.hidden_size, has_bias=self.has_bias, bidirectional=self.bidirectional, dropout=self.dropout)) self.lstms = layers self.weight = ParameterTuple(tuple(weights)) self.fill = P.Fill() self.shape = P.Shape()
def _train(self, epoch, train_dataset, callbacks=None, dataset_sink_mode=True, sink_size=-1): """ Training. Args: epoch (int): Total number of iterations on the data. train_dataset (Dataset): A training dataset iterator. If there is no loss_fn, a tuple with multiply data (data1, data2, data3, ...) will be returned and passed to the network. Otherwise, a tuple (data, label) will be returned, and the data and label are passed to the network and loss function respectively. callbacks (list): List of callback object. Callbacks which should be executed while training. Default: None. dataset_sink_mode (bool): Determines whether to pass the data through dataset channel. Default: True. Configure pynative mode, the training process will be performed with dataset not sink. sink_size (int): Control the amount of data each sink. Default: -1. """ epoch = check_int_positive(epoch) self._train_network.set_train() if self._parameter_broadcast: self._train_network.set_broadcast_flag() cb_params = _InternalCallbackParam() cb_params.train_network = self._train_network cb_params.epoch_num = epoch if dataset_sink_mode and sink_size > 0: cb_params.batch_num = sink_size else: cb_params.batch_num = train_dataset.get_dataset_size() cb_params.mode = "train" cb_params.loss_fn = self._loss_fn cb_params.optimizer = self._optimizer cb_params.parallel_mode = self._parallel_mode cb_params.device_number = self._device_number cb_params.train_dataset = train_dataset cb_params.list_callback = self._transform_callbacks(callbacks) cb_params.train_dataset_element = None cb_params.network = self._network ms_role = os.getenv("MS_ROLE") if ms_role in ("MS_PSERVER", "MS_SCHED"): epoch = 1 # build callback list with _CallbackManager(callbacks) as list_callback: if not dataset_sink_mode: self._train_process(epoch, train_dataset, list_callback, cb_params) elif context.get_context("mode") == context.PYNATIVE_MODE: logger.warning( "The pynative mode cannot support dataset sink mode currently." "So the training process will be performed with dataset not sink." ) self._train_process(epoch, train_dataset, list_callback, cb_params) else: self._train_dataset_sink_process(epoch, train_dataset, list_callback, cb_params, sink_size)
def __init__(self, learning_rate, parameters, weight_decay=0.0, loss_scale=1.0): super(Optimizer, self).__init__(auto_prefix=False) if parameters is not None and not isinstance(parameters, list): parameters = list(parameters) if not parameters: raise ValueError("Optimizer got an empty parameter list.") if not isinstance(parameters[0], (dict, Parameter)): raise TypeError( "Only a list of Parameter or dict can be supported.") if isinstance(loss_scale, int): loss_scale = float(loss_scale) validator.check_value_type("loss_scale", loss_scale, [float], self.cls_name) validator.check_positive_float(loss_scale, "loss_scale", self.cls_name) self.loss_scale = loss_scale weight_decay = self._preprocess_weight_decay(weight_decay) self._unique = True self._target = context.get_context("device_target") self.dynamic_lr = False self.assignadd = None self.global_step = None self.is_group = False self.is_group_lr = False self.is_group_params_ordered = False learning_rate = self._preprocess_single_lr(learning_rate) if isinstance(parameters[0], dict): self.is_group = True self.group_params = [] self.group_lr = [] self.group_weight_decay = [] self._init_group_params(parameters, learning_rate, weight_decay) # The final value of dynamic_lr can be determined after the process of parse_single_lr and init_group_params if self.dynamic_lr: self.assignadd = P.AssignAdd() self.global_step = Parameter(initializer(0, [1], mindspore.int32), name='global_step') if self.is_group_lr: if self.dynamic_lr: self.learning_rate = CellList(self.group_lr) else: self.learning_rate = ParameterTuple(self.group_lr) else: self.learning_rate = self._build_single_lr(learning_rate, 'learning_rate') if self.is_group: self.parameters = ParameterTuple(self.group_params) self.weight_decay = tuple(self.group_weight_decay) self.weight_decay_tensor_tuple = tuple( Tensor(x, mstype.float32) for x in self.group_weight_decay) decay_filter = lambda x: x > 0 self.decay_flags = tuple( decay_filter(x) for x in self.weight_decay) self.exec_weight_decay = any(self.decay_flags) else: self.parameters = ParameterTuple(parameters) self.weight_decay = weight_decay * loss_scale self.weight_decay_tensor = Tensor(self.weight_decay, mstype.float32) decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name self.decay_flags = tuple(decay_filter(x) for x in self.parameters) self.exec_weight_decay = self.weight_decay > 0 # when a parameter has been unique, there is no need do another unique in optimizer. for param in self.parameters: if param.unique: self._unique = False break ps_filter = lambda x: x.is_param_ps self.ps_parameters = tuple(ps_filter(x) for x in self.parameters) ps_cache_filter = lambda x: x.cache_enable self.cache_enable = tuple(ps_cache_filter(x) for x in self.parameters) self.reciprocal_scale = Tensor(1.0 / loss_scale, mstype.float32) self.need_scale = loss_scale != 1.0 self.global_step_increase_tensor = Tensor(1, mstype.int32) self.param_length = len(self.parameters) self.map_ = C.Map() if context.get_auto_parallel_context("enable_parallel_optimizer"): if _get_parallel_mode( ) == ParallelMode.DATA_PARALLEL and context.get_context( "device_target") == "Ascend": self.use_parallel = True elif _get_parallel_mode() == ParallelMode.DATA_PARALLEL \ and context.get_context("device_target") != "Ascend": raise RuntimeError( "Parallel optimizer only supports Ascend in data parallel mode." ) elif _get_parallel_mode() in (ParallelMode.STAND_ALONE, ParallelMode.HYBRID_PARALLEL): raise RuntimeError( "Parallel optimizer is not supported in {}.".format( _get_parallel_mode())) else: self.use_parallel = False else: self.use_parallel = False if self.use_parallel: if self.cls_name not in ["Lamb", "AdamWeightDecay"]: raise RuntimeError( "Parallel optimizer does not support optimizer {}".format( self.cls_name)) self.dev_num = _get_device_num() if self.dev_num > self.param_length: raise RuntimeError( "Parallel optimizer can not be applied when the number of parameters {} is" " less than the number of devices {}".format( self.param_length, self.dev_num)) self.param_rank = self._get_parameter_group_id() self.optim_filter = tuple( map(lambda x: x == _get_global_rank(), self.param_rank)) self.param_names = [] for param in self.parameters: self.param_names.append(param.name) else: self.optim_filter = (True, ) * self.param_length
def __init__(self, config, batch_size, num_classes, use_sigmoid_cls, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0) ): super(Proposal, self).__init__() cfg = config self.batch_size = batch_size self.num_classes = num_classes self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = use_sigmoid_cls if self.use_sigmoid_cls: self.cls_out_channels = num_classes - 1 self.activation = P.Sigmoid() self.reshape_shape = (-1, 1) else: self.cls_out_channels = num_classes self.activation = P.Softmax(axis=1) self.reshape_shape = (-1, 2) if self.cls_out_channels <= 0: raise ValueError('num_classes={} is too small'.format(num_classes)) self.num_pre = cfg.rpn_proposal_nms_pre self.min_box_size = cfg.rpn_proposal_min_bbox_size self.nms_thr = cfg.rpn_proposal_nms_thr self.nms_post = cfg.rpn_proposal_nms_post self.nms_across_levels = cfg.rpn_proposal_nms_across_levels self.max_num = cfg.rpn_proposal_max_num self.num_levels = cfg.fpn_num_outs # Op Define self.squeeze = P.Squeeze() self.reshape = P.Reshape() self.cast = P.Cast() self.feature_shapes = cfg.feature_shapes self.transpose_shape = (1, 2, 0) self.decode = P.BoundingBoxDecode(max_shape=(cfg.img_height, cfg.img_width), \ means=self.target_means, \ stds=self.target_stds) self.nms = P.NMSWithMask(self.nms_thr) self.concat_axis0 = P.Concat(axis=0) self.concat_axis1 = P.Concat(axis=1) self.split = P.Split(axis=1, output_num=5) self.min = P.Minimum() self.gatherND = P.GatherNd() self.slice = P.Slice() self.select = P.Select() self.greater = P.Greater() self.transpose = P.Transpose() self.tile = P.Tile() self.set_train_local(config, training=True) _mode_16 = bool(context.get_context("device_target") == "Ascend") self.dtype = np.float16 if _mode_16 else np.float32 self.ms_type = mstype.float16 if _mode_16 else mstype.float32 self.multi_10 = Tensor(10.0, self.ms_type)
def __init__(self, learning_rate, parameters, weight_decay=0.0, loss_scale=1.0): super(Optimizer, self).__init__(auto_prefix=False) if parameters is not None and not isinstance(parameters, list): parameters = list(parameters) if not parameters: raise ValueError("Optimizer got an empty parameter list.") if not isinstance(parameters[0], (dict, Parameter)): raise TypeError( "Only a list of Parameter or dict can be supported.") if isinstance(loss_scale, int): loss_scale = float(loss_scale) validator.check_value_type("loss_scale", loss_scale, [float], self.cls_name) validator.check_positive_float(loss_scale, "loss_scale", self.cls_name) self.loss_scale = loss_scale weight_decay = self._preprocess_weight_decay(weight_decay) self.grad_centralization = False self._unique = True self._target = context.get_context("device_target") self.dynamic_lr = False self.assignadd = None self.global_step = None self.is_group = False self.is_group_lr = False self.is_group_params_ordered = False learning_rate = self._preprocess_single_lr(learning_rate) if isinstance(parameters[0], dict): self.is_group = True self.group_params = [] self.group_lr = [] self.group_weight_decay = [] self.group_grad_centralization = [] self._init_group_params(parameters, learning_rate, weight_decay, self.grad_centralization) # The final value of dynamic_lr can be determined after the process of parse_single_lr and init_group_params if self.dynamic_lr: self.assignadd = P.AssignAdd() self.global_step = Parameter(initializer(0, [1], mindspore.int32), name='global_step') if self.is_group_lr: self.learning_rate = CellList(self.group_lr, auto_prefix=False) if self.dynamic_lr \ else ParameterTuple(self.group_lr) else: self.learning_rate = self._build_single_lr(learning_rate, 'learning_rate') if self.is_group: self.parameters = ParameterTuple(self.group_params) self.weight_decay = tuple(self.group_weight_decay) self.weight_decay_tensor_tuple = tuple( Tensor(x, mstype.float32) for x in self.group_weight_decay) decay_filter = lambda x: x > 0 self.decay_flags = tuple( decay_filter(x) for x in self.weight_decay) self.exec_weight_decay = any(self.decay_flags) self.grad_centralization_flags = tuple( self.group_grad_centralization) else: self.parameters = ParameterTuple(parameters) self.weight_decay = weight_decay * loss_scale self.weight_decay_tensor = Tensor(self.weight_decay, mstype.float32) decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name self.decay_flags = tuple(decay_filter(x) for x in self.parameters) self.exec_weight_decay = self.weight_decay > 0 # when a parameter has been unique, there is no need do another unique in optimizer. for param in self.parameters: if param.unique: self._unique = False break ps_filter = lambda x: x.is_param_ps self.ps_parameters = tuple(ps_filter(x) for x in self.parameters) cache_filter = lambda x: x.cache_enable self.cache_enable = tuple(cache_filter(x) for x in self.parameters) self.reciprocal_scale = Tensor(1.0 / loss_scale, mstype.float32) self.need_scale = loss_scale != 1.0 self.global_step_increase_tensor = Tensor(1, mstype.int32) self.param_length = len(self.parameters) self.map_ = C.Map() self._use_parallel_optimizer()
def _verify_data_n_settings(self, check_all=False, check_registration=False, check_data_n_network=False, check_saliency=False, check_hoc=False, check_environment=False): """ Verify the validity of dataset and other settings. Args: check_all (bool): Set it True for checking everything. check_registration (bool): Set it True for checking registrations, check if it is enough to invoke run(). check_data_n_network (bool): Set it True for checking data and network. check_saliency (bool): Set it True for checking saliency related settings. check_hoc (bool): Set it True for checking HOC related settings. check_environment (bool): Set it True for checking environment conditions. Raises: ValueError: Be raised for any data or settings' value problem. TypeError: Be raised for any data or settings' type problem. RuntimeError: Be raised for any runtime problem. """ if check_all: check_registration = True check_data_n_network = True check_saliency = True check_hoc = True check_environment = True if check_environment: device_target = context.get_context('device_target') if device_target not in ("Ascend", "GPU"): raise RuntimeError( f"Unsupported device_target: '{device_target}', " f"only 'Ascend' or 'GPU' is supported. " f"Please call context.set_context(device_target='Ascend') or " f"context.set_context(device_target='GPU').") if check_environment or check_saliency: if self._is_saliency_registered: mode = context.get_context('mode') if mode != context.PYNATIVE_MODE: raise RuntimeError( "Context mode: GRAPH_MODE is not supported, " "please call context.set_context(mode=context.PYNATIVE_MODE)." ) if check_registration: if self._is_uncertainty_registered and not self._is_saliency_registered: raise ValueError( "Function register_uncertainty() is called but register_saliency() is not." ) if not self._is_saliency_registered and not self._is_hoc_registered: raise ValueError( "No explanation module was registered, user should at least call register_saliency() " "or register_hierarchical_occlusion() once with proper arguments." ) if check_data_n_network or check_saliency or check_hoc: self._verify_data() if check_data_n_network: self._verify_network() if check_saliency: self._verify_saliency()
def train(): """Train function.""" args = parse_args() devid = int(os.getenv('DEVICE_ID', '0')) context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=True, device_target=args.device_target, save_graphs=True, device_id=devid) if args.need_profiler: from mindspore.profiler.profiling import Profiler profiler = Profiler(output_path=args.outputs_dir, is_detail=True, is_show_op_path=True) loss_meter = AverageMeter('loss') context.reset_auto_parallel_context() parallel_mode = ParallelMode.STAND_ALONE degree = 1 if args.is_distributed: parallel_mode = ParallelMode.DATA_PARALLEL degree = get_group_size() context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=degree) network = YOLOV3DarkNet53(is_training=True) # default is kaiming-normal default_recurisive_init(network) load_yolov3_params(args, network) network = YoloWithLossCell(network) args.logger.info('finish get network') config = ConfigYOLOV3DarkNet53() config.label_smooth = args.label_smooth config.label_smooth_factor = args.label_smooth_factor if args.training_shape: config.multi_scale = [conver_training_shape(args)] if args.resize_rate: config.resize_rate = args.resize_rate ds, data_size = create_yolo_dataset(image_dir=args.data_root, anno_path=args.annFile, is_training=True, batch_size=args.per_batch_size, max_epoch=args.max_epoch, device_num=args.group_size, rank=args.rank, config=config) args.logger.info('Finish loading dataset') args.steps_per_epoch = int(data_size / args.per_batch_size / args.group_size) if not args.ckpt_interval: args.ckpt_interval = args.steps_per_epoch lr = get_lr(args) opt = Momentum(params=get_param_groups(network), learning_rate=Tensor(lr), momentum=args.momentum, weight_decay=args.weight_decay, loss_scale=args.loss_scale) is_gpu = context.get_context("device_target") == "GPU" if is_gpu: loss_scale_value = 1.0 loss_scale = FixedLossScaleManager(loss_scale_value, drop_overflow_update=False) network = amp.build_train_network(network, optimizer=opt, loss_scale_manager=loss_scale, level="O2", keep_batchnorm_fp32=False) keep_loss_fp32(network) else: network = TrainingWrapper(network, opt) network.set_train() if args.rank_save_ckpt_flag: # checkpoint save ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval, keep_checkpoint_max=ckpt_max_num) save_ckpt_path = os.path.join(args.outputs_dir, 'ckpt_' + str(args.rank) + '/') ckpt_cb = ModelCheckpoint(config=ckpt_config, directory=save_ckpt_path, prefix='{}'.format(args.rank)) cb_params = _InternalCallbackParam() cb_params.train_network = network cb_params.epoch_num = ckpt_max_num cb_params.cur_epoch_num = 1 run_context = RunContext(cb_params) ckpt_cb.begin(run_context) old_progress = -1 t_end = time.time() data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1) for i, data in enumerate(data_loader): images = data["image"] input_shape = images.shape[2:4] args.logger.info('iter[{}], shape{}'.format(i, input_shape[0])) images = Tensor.from_numpy(images) batch_y_true_0 = Tensor.from_numpy(data['bbox1']) batch_y_true_1 = Tensor.from_numpy(data['bbox2']) batch_y_true_2 = Tensor.from_numpy(data['bbox3']) batch_gt_box0 = Tensor.from_numpy(data['gt_box1']) batch_gt_box1 = Tensor.from_numpy(data['gt_box2']) batch_gt_box2 = Tensor.from_numpy(data['gt_box3']) input_shape = Tensor(tuple(input_shape[::-1]), ms.float32) loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2, input_shape) loss_meter.update(loss.asnumpy()) if args.rank_save_ckpt_flag: # ckpt progress cb_params.cur_step_num = i + 1 # current step number cb_params.batch_num = i + 2 ckpt_cb.step_end(run_context) if i % args.log_interval == 0: time_used = time.time() - t_end epoch = int(i / args.steps_per_epoch) fps = args.per_batch_size * (i - old_progress) * args.group_size / time_used if args.rank == 0: args.logger.info( 'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(epoch, i, loss_meter, fps, lr[i])) t_end = time.time() loss_meter.reset() old_progress = i if (i + 1) % args.steps_per_epoch == 0 and args.rank_save_ckpt_flag: cb_params.cur_epoch_num += 1 if args.need_profiler: if i == 10: profiler.analyse() break args.logger.info('==========end training===============')
def __init__(self): self.device_cpu = context.get_context('device_target') self.arrs = [ rand_int(2), rand_int(2, 3), rand_int(2, 3, 4), rand_int(2, 3, 4, 5), ] # scalars expanded across the 0th dimension self.scalars = [ rand_int(), rand_int(1), rand_int(1, 1), rand_int(1, 1, 1, 1), ] # empty arrays self.empty_arrs = [ rand_int(0), rand_int(4, 0), rand_int(2, 0, 2), rand_int(5, 0, 7, 0), ] # arrays of the same size expanded across the 0th dimension self.expanded_arrs = [ rand_int(2, 3), rand_int(1, 2, 3), rand_int(1, 1, 2, 3), rand_int(1, 1, 1, 2, 3), ] # arrays of the same size expanded across the 0th dimension self.expanded_arrs = [ rand_int(2, 3), rand_int(1, 2, 3), rand_int(1, 1, 2, 3), rand_int(1, 1, 1, 2, 3), ] # arrays with last dimension aligned self.aligned_arrs = [ rand_int(2, 3), rand_int(1, 4, 3), rand_int(5, 1, 2, 3), rand_int(4, 2, 1, 1, 3), ] # arrays which can be broadcast self.broadcastables = [ rand_int(5), rand_int(6, 1), rand_int(7, 1, 5), rand_int(8, 1, 6, 1) ] # boolean arrays which can be broadcast self.bool_broadcastables = [ rand_bool(), rand_bool(1), rand_bool(5), rand_bool(6, 1), rand_bool(7, 1, 5), rand_bool(8, 1, 6, 1), ] # core dimension 0 is matched for each # pair of array[i] and array[i + 1] self.core_broadcastables = [ rand_int(3), rand_int(3), rand_int(6), rand_int(6, 4), rand_int(5, 2), rand_int(2), rand_int(2, 9), rand_int(9, 8), rand_int(6), rand_int(2, 6, 5), rand_int(9, 2, 7), rand_int(7), rand_int(5, 2, 4), rand_int(6, 1, 4, 9), rand_int(7, 1, 5, 3, 2), rand_int(8, 1, 6, 1, 2, 9), ] # arrays with dimensions of size 1 self.nested_arrs = [ rand_int(1), rand_int(1, 2), rand_int(3, 1, 8), rand_int(1, 3, 9, 1), ]
def test_switch_mode(): """ test_switch_mode """ context.set_context(mode=context.GRAPH_MODE) assert context.get_context("mode") == context.GRAPH_MODE context.set_context(mode=context.PYNATIVE_MODE) assert context.get_context("mode") == context.PYNATIVE_MODE
def __init__(self, config, batch_size, num_bboxes, add_gt_as_proposals): super(BboxAssignSampleForRcnn, self).__init__() cfg = config _mode_16 = bool(context.get_context("device_target") == "Ascend") self.dtype = np.float16 if _mode_16 else np.float32 self.ms_type = mstype.float16 if _mode_16 else mstype.float32 self.batch_size = batch_size self.neg_iou_thr = cfg.neg_iou_thr_stage2 self.pos_iou_thr = cfg.pos_iou_thr_stage2 self.min_pos_iou = cfg.min_pos_iou_stage2 self.num_gts = cfg.num_gts self.num_bboxes = num_bboxes self.num_expected_pos = cfg.num_expected_pos_stage2 self.num_expected_neg = cfg.num_expected_neg_stage2 self.num_expected_total = cfg.num_expected_total_stage2 self.add_gt_as_proposals = add_gt_as_proposals self.label_inds = Tensor(np.arange(1, self.num_gts + 1).astype(np.int32)) self.add_gt_as_proposals_valid = Tensor(np.array(self.add_gt_as_proposals * np.ones(self.num_gts), dtype=np.int32)) self.concat = P.Concat(axis=0) self.max_gt = P.ArgMaxWithValue(axis=0) self.max_anchor = P.ArgMaxWithValue(axis=1) self.sum_inds = P.ReduceSum() self.iou = P.IOU() self.greaterequal = P.GreaterEqual() self.greater = P.Greater() self.select = P.Select() self.gatherND = P.GatherNd() self.squeeze = P.Squeeze() self.cast = P.Cast() self.logicaland = P.LogicalAnd() self.less = P.Less() self.random_choice_with_mask_pos = P.RandomChoiceWithMask(self.num_expected_pos) self.random_choice_with_mask_neg = P.RandomChoiceWithMask(self.num_expected_neg) self.reshape = P.Reshape() self.equal = P.Equal() self.bounding_box_encode = P.BoundingBoxEncode(means=(0.0, 0.0, 0.0, 0.0), stds=(0.1, 0.1, 0.2, 0.2)) self.concat_axis1 = P.Concat(axis=1) self.logicalnot = P.LogicalNot() self.tile = P.Tile() # Check self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=self.dtype)) self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=self.dtype)) # Init tensor self.assigned_gt_inds = Tensor(np.array(-1 * np.ones(num_bboxes), dtype=np.int32)) self.assigned_gt_zeros = Tensor(np.array(np.zeros(num_bboxes), dtype=np.int32)) self.assigned_gt_ones = Tensor(np.array(np.ones(num_bboxes), dtype=np.int32)) self.assigned_gt_ignores = Tensor(np.array(-1 * np.ones(num_bboxes), dtype=np.int32)) self.assigned_pos_ones = Tensor(np.array(np.ones(self.num_expected_pos), dtype=np.int32)) self.gt_ignores = Tensor(np.array(-1 * np.ones(self.num_gts), dtype=np.int32)) self.range_pos_size = Tensor(np.arange(self.num_expected_pos).astype(self.dtype)) self.check_neg_mask = Tensor(np.array(np.ones(self.num_expected_neg - self.num_expected_pos), dtype=np.bool)) self.bboxs_neg_mask = Tensor(np.zeros((self.num_expected_neg, 4), dtype=self.dtype)) self.labels_neg_mask = Tensor(np.array(np.zeros(self.num_expected_neg), dtype=np.uint8)) self.reshape_shape_pos = (self.num_expected_pos, 1) self.reshape_shape_neg = (self.num_expected_neg, 1) self.scalar_zero = Tensor(0.0, dtype=self.ms_type) self.scalar_neg_iou_thr = Tensor(self.neg_iou_thr, dtype=self.ms_type) self.scalar_pos_iou_thr = Tensor(self.pos_iou_thr, dtype=self.ms_type) self.scalar_min_pos_iou = Tensor(self.min_pos_iou, dtype=self.ms_type)
def _get_mode(): """Get the current mode (0 is Graph mode, 1 is PyNative mode)""" return context.get_context('mode')
def fasterrcnn_eval(dataset_path, ckpt_path, ann_file): """FasterRcnn evaluation.""" ds = create_fasterrcnn_dataset(dataset_path, batch_size=config.test_batch_size, is_training=False) net = Faster_Rcnn_Resnet50(config) param_dict = load_checkpoint(ckpt_path) if args_opt.device_target == "GPU": for key, value in param_dict.items(): tensor = value.asnumpy().astype(np.float32) param_dict[key] = Parameter(tensor, key) load_param_into_net(net, param_dict) net.set_train(False) device_type = "Ascend" if context.get_context( "device_target") == "Ascend" else "Others" if device_type == "Ascend": net.to_float(mstype.float16) eval_iter = 0 total = ds.get_dataset_size() outputs = [] dataset_coco = COCO(ann_file) print("\n========================================\n") print("total images num: ", total) print("Processing, please wait a moment.") max_num = 128 for data in ds.create_dict_iterator(num_epochs=1): eval_iter = eval_iter + 1 img_data = data['image'] img_metas = data['image_shape'] gt_bboxes = data['box'] gt_labels = data['label'] gt_num = data['valid_num'] start = time.time() # run net output = net(img_data, img_metas, gt_bboxes, gt_labels, gt_num) end = time.time() print("Iter {} cost time {}".format(eval_iter, end - start)) # output all_bbox = output[0] all_label = output[1] all_mask = output[2] for j in range(config.test_batch_size): all_bbox_squee = np.squeeze(all_bbox.asnumpy()[j, :, :]) all_label_squee = np.squeeze(all_label.asnumpy()[j, :, :]) all_mask_squee = np.squeeze(all_mask.asnumpy()[j, :, :]) all_bboxes_tmp_mask = all_bbox_squee[all_mask_squee, :] all_labels_tmp_mask = all_label_squee[all_mask_squee] if all_bboxes_tmp_mask.shape[0] > max_num: inds = np.argsort(-all_bboxes_tmp_mask[:, -1]) inds = inds[:max_num] all_bboxes_tmp_mask = all_bboxes_tmp_mask[inds] all_labels_tmp_mask = all_labels_tmp_mask[inds] outputs_tmp = bbox2result_1image(all_bboxes_tmp_mask, all_labels_tmp_mask, config.num_classes) outputs.append(outputs_tmp) eval_types = ["bbox"] result_files = results2json(dataset_coco, outputs, "./results.pkl") coco_eval(result_files, eval_types, dataset_coco, single_result=True)
def __init__(self, in_channels, out_channels, kernel_size, stride, pad_mode, padding, dilation, group, has_bias, weight_init, bias_init, data_format='NCHW', transposed=False): super(_Conv, self).__init__() self.in_channels = Validator.check_positive_int(in_channels) self.out_channels = Validator.check_positive_int(out_channels) self.kernel_size = kernel_size self.stride = stride self.pad_mode = pad_mode self.weight_init = weight_init self.bias_init = bias_init self.format = Validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name) if context.get_context("device_target") != "GPU" and self.format == "NHWC": raise ValueError("NHWC format only support in GPU target.") if isinstance(padding, int): Validator.check_non_negative_int(padding, 'padding', self.cls_name) self.padding = padding elif isinstance(padding, tuple): for pad in padding: Validator.check_non_negative_int(pad, 'padding item', self.cls_name) self.padding = padding else: raise TypeError("padding type must be int/tuple(int) cannot be {}!".format(type(padding))) self.dilation = dilation self.group = Validator.check_positive_int(group) self.has_bias = has_bias if (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \ isinstance(kernel_size[0], bool) or isinstance(kernel_size[1], bool) or \ kernel_size[0] < 1 or kernel_size[1] < 1: raise ValueError("Attr 'kernel_size' of 'Conv2D' Op passed " + str(self.kernel_size) + ", should be a int or tuple and equal to or greater than 1.") if (not isinstance(stride[0], int)) or (not isinstance(stride[1], int)) or \ isinstance(stride[0], bool) or isinstance(stride[1], bool) or stride[0] < 1 or stride[1] < 1: raise ValueError("Attr 'stride' of 'Conv2D' Op passed " + str(self.stride) + ", should be a int or tuple and equal to or greater than 1.") if (not isinstance(dilation[0], int)) or (not isinstance(dilation[1], int)) or \ isinstance(dilation[0], bool) or isinstance(dilation[1], bool) or dilation[0] < 1 or dilation[1] < 1: raise ValueError("Attr 'dilation' of 'Conv2D' Op passed " + str(self.dilation) + ", should be a int or tuple and equal to or greater than 1.") if in_channels % group != 0: raise ValueError("Attr 'in_channels' of 'Conv2D' Op must be divisible by " "attr 'group' of 'Conv2D' Op.") if out_channels % group != 0: raise ValueError("Attr 'out_channels' of 'Conv2D' Op must be divisible by " "attr 'group' of 'Conv2D' Op.") if transposed: shape = [in_channels, out_channels // group, *kernel_size] else: shape = [out_channels, in_channels // group, *kernel_size] if self.format == "NCHW" else \ [out_channels, *kernel_size, in_channels // group] self.weight = Parameter(initializer(self.weight_init, shape), name='weight') if Validator.check_bool(has_bias): self.bias = Parameter(initializer(self.bias_init, [out_channels]), name='bias') else: if self.bias_init != 'zeros': logger.warning("Value of 'has_bias' is False, value of 'bias_init' will be ignored.") self.bias = None
max_queries = 6000 def_evaluate = BlackDefenseEvaluate(bb_raw_preds, bb_def_preds, np.array(raw_query_counts), np.array(def_query_counts), np.array(raw_query_time), np.array(def_query_time), np.array(def_detection_counts), true_label, max_queries) LOGGER.info( TAG, 'query count variance of adversaries is : {:.2f}'.format( def_evaluate.qcv())) LOGGER.info( TAG, 'attack success rate variance of adversaries ' 'is : {:.2f}'.format(def_evaluate.asv())) LOGGER.info( TAG, 'false positive rate (FPR) of the query-based detector ' 'is : {:.2f}'.format(def_evaluate.fpr())) LOGGER.info( TAG, 'the benign query response time variance (QRV) ' 'is : {:.2f}'.format(def_evaluate.qrv())) if __name__ == '__main__': # device_target can be "CPU", "GPU" or "Ascend" context.set_context(mode=context.GRAPH_MODE, device_target="GPU") DEVICE = context.get_context("device_target") if DEVICE in ("Ascend", "GPU"): test_black_defense()
def __init__(self): self.device_target = context.get_context('device_target') self.rank_size = None self.device_id = None self.global_rank_id = None
def _check_mode(class_name): """Check for PyNative mode.""" mode = context.get_context('mode') if mode == context.PYNATIVE_MODE: raise RuntimeError( f'{class_name} operator does not support PyNative mode.')
def compile(self, obj, *args, phase='predict', params=None, do_convert=True, auto_parallel_mode=False): """ Compiles graph. Args: obj (Function/Cell): The function or cell instance need compile. args (tuple): Function or cell input arguments. phase (str): The name of compile phase. Default: 'predict'. params (OrderedDict): The parameters dictionary used for init data graph. Default: None. do_convert (bool): When set to True, convert ME graph to GE graph after compiling graph. auto_parallel_mode: When set to True, use auto parallel mode to compile graph. Return: Str, the full phase of the cell. Bool, if the graph has been compiled before, return False, else return True. """ obj.check_names() args_names, args_list = _generate_pip_args(obj, *args) dic = dict(zip(args_names, args_list)) key = generate_key(phase, dic) self.phase_prefix = str(key[1]) if phase == 'export': phase = phase + '.' + str(obj.create_time) else: phase = self.phase_prefix + phase + '.' + str(obj.create_time) enable_debug_runtime = context.get_context("enable_debug_runtime") enable_ge = context.get_context("enable_ge") use_vm = not enable_ge or (enable_debug_runtime and context.get_context("mode") == context.PYNATIVE_MODE) if phase in self.compile_cache.keys(): logger.debug("%r graph has existed.", phase) return phase, False result = self._executor.compile(obj, args_list, phase, use_vm) self.compile_cache[phase] = phase if not result: raise RuntimeError("Executor compile failed.") graph = self._executor.get_func_graph(phase) if graph is None: logger.error("%r graph compile failed.", phase) if not do_convert: return phase, True if auto_parallel_mode and "train" in phase: obj.parameter_layout_dict = self._executor.get_parameter_layout( phase) self._params_init_data(obj, params) if not enable_debug_runtime or enable_ge: if auto_parallel_mode and "train" in phase: obj.load_parameter_slice(params) # set parallel inputs in sink mode if auto_parallel_mode and (args and isinstance(args[0], Tensor) and args[0].virtual_flag): obj.set_parallel_input_with_inputs(*args) # the following GE init process is not needed when use vm or ms backend if enable_ge: # decide whether to sink based on whether the inputs is virtual or not if args_list and isinstance(args_list[0], Tensor) and args_list[0].virtual_flag: _set_dataset_mode_config('sink') else: _set_dataset_mode_config('normal') self._build_data_graph(obj, params, phase) if "export" not in phase: init_phase = "init_subgraph" + "." + str(obj.create_time) _exec_init_graph(obj, init_phase) elif not enable_ge and "export" in phase: self._build_data_graph(obj, params, phase) return phase, True
def __init__(self, input_size, hidden_size, num_layers=1, has_bias=True, batch_first=False, dropout=0, bidirectional=False): super(LSTM, self).__init__() validator.check_value_type("batch_first", batch_first, [bool], self.cls_name) validator.check_positive_int(hidden_size, "hidden_size", self.cls_name) validator.check_positive_int(num_layers, "num_layers", self.cls_name) self.is_ascend = context.get_context("device_target") == "Ascend" self.batch_first = batch_first self.transpose = P.Transpose() self.num_layers = num_layers self.bidirectional = bidirectional self.dropout = dropout self.reverse_seq = P.ReverseSequence(batch_dim=1, seq_dim=0) self.concat = P.Concat(axis=0) self.concat_2dim = P.Concat(axis=2) self.cast = P.Cast() self.shape = P.Shape() if dropout != 0: self.dropout_op = nn.Dropout(float(dropout)) self.lstm = P.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias, bidirectional=bidirectional, dropout=float(dropout)) weight_size = 0 gate_size = 4 * hidden_size stdv = 1 / math.sqrt(hidden_size) num_directions = 2 if bidirectional else 1 b0 = np.zeros(gate_size, dtype=np.float16) self.w_list = [] self.b_list = [] self.rnns_fw = P.DynamicRNN(forget_bias=0.0) self.rnns_bw = P.DynamicRNN(forget_bias=0.0) for layer in range(num_layers): input_layer_size = input_size if layer == 0 else hidden_size * num_directions increment_size = gate_size * input_layer_size increment_size += gate_size * hidden_size w_shape = input_size if layer == 0 else (num_directions * hidden_size) w_np = np.random.uniform( -stdv, stdv, (w_shape + hidden_size, gate_size)).astype(np.float16) self.w_list.append( Parameter(initializer(Tensor(w_np), [w_shape + hidden_size, gate_size]), name='weight_fw' + str(layer))) if has_bias: increment_size += 2 * gate_size b_np = np.random.uniform(-stdv, stdv, gate_size).astype(np.float16) self.b_list.append( Parameter(initializer(Tensor(b_np), [gate_size]), name='bias_fw' + str(layer))) else: self.b_list.append( Parameter(initializer(Tensor(b0), [gate_size]), name='bias_fw' + str(layer))) weight_size += increment_size * num_directions if bidirectional: w_bw_np = np.random.uniform( -stdv, stdv, (w_shape + hidden_size, gate_size)).astype(np.float16) self.w_list.append( Parameter(initializer(Tensor(w_bw_np), [w_shape + hidden_size, gate_size]), name='weight_bw' + str(layer))) b_bw_np = np.random.uniform(-stdv, stdv, (4 * hidden_size)).astype( np.float16) if has_bias else b0 self.b_list.append( Parameter(initializer(Tensor(b_bw_np), [gate_size]), name='bias_bw' + str(layer))) self.w_list = ParameterTuple(self.w_list) self.b_list = ParameterTuple(self.b_list) w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) self.weight = Parameter(initializer(Tensor(w_np), [weight_size, 1, 1]), name='weight')
def train(): """Train function.""" args = parse_args() devid = int(os.getenv('DEVICE_ID', '0')) context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=True, device_target=args.device_target, save_graphs=False, device_id=devid) loss_meter = AverageMeter('loss') network = YOLOV4CspDarkNet53(is_training=True) # default is kaiming-normal default_recursive_init(network) if args.pretrained_backbone: pretrained_backbone_slice = args.pretrained_backbone.split('/') backbone_ckpt_file = pretrained_backbone_slice[ len(pretrained_backbone_slice) - 1] local_backbone_ckpt_path = '/cache/' + backbone_ckpt_file # download backbone checkpoint mox.file.copy_parallel(src_url=args.pretrained_backbone, dst_url=local_backbone_ckpt_path) args.pretrained_backbone = local_backbone_ckpt_path load_yolov4_params(args, network) network = YoloWithLossCell(network) args.logger.info('finish get network') config = ConfigYOLOV4CspDarkNet53() config.label_smooth = args.label_smooth config.label_smooth_factor = args.label_smooth_factor if args.training_shape: config.multi_scale = [convert_training_shape(args)] if args.resize_rate: config.resize_rate = args.resize_rate # data download local_data_path = '/cache/data' local_ckpt_path = '/cache/ckpt_file' print('Download data.') mox.file.copy_parallel(src_url=args.data_url, dst_url=local_data_path) ds, data_size = create_yolo_dataset( image_dir=os.path.join(local_data_path, 'images'), anno_path=os.path.join(local_data_path, 'annotation.json'), is_training=True, batch_size=args.per_batch_size, max_epoch=args.max_epoch, device_num=args.group_size, rank=args.rank, config=config) args.logger.info('Finish loading dataset') args.steps_per_epoch = int(data_size / args.per_batch_size / args.group_size) if not args.ckpt_interval: args.ckpt_interval = args.steps_per_epoch * 10 lr = get_lr(args) opt = Momentum(params=get_param_groups(network), learning_rate=Tensor(lr), momentum=args.momentum, weight_decay=args.weight_decay, loss_scale=args.loss_scale) is_gpu = context.get_context("device_target") == "GPU" if is_gpu: loss_scale_value = 1.0 loss_scale = FixedLossScaleManager(loss_scale_value, drop_overflow_update=False) network = amp.build_train_network(network, optimizer=opt, loss_scale_manager=loss_scale, level="O2", keep_batchnorm_fp32=False) keep_loss_fp32(network) else: network = TrainingWrapper(network, opt) network.set_train() # checkpoint save ckpt_max_num = 10 ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval, keep_checkpoint_max=ckpt_max_num) ckpt_cb = ModelCheckpoint(config=ckpt_config, directory=local_ckpt_path, prefix='yolov4') cb_params = _InternalCallbackParam() cb_params.train_network = network cb_params.epoch_num = ckpt_max_num cb_params.cur_epoch_num = 1 run_context = RunContext(cb_params) ckpt_cb.begin(run_context) old_progress = -1 t_end = time.time() data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1) for i, data in enumerate(data_loader): images = data["image"] input_shape = images.shape[2:4] images = Tensor.from_numpy(images) batch_y_true_0 = Tensor.from_numpy(data['bbox1']) batch_y_true_1 = Tensor.from_numpy(data['bbox2']) batch_y_true_2 = Tensor.from_numpy(data['bbox3']) batch_gt_box0 = Tensor.from_numpy(data['gt_box1']) batch_gt_box1 = Tensor.from_numpy(data['gt_box2']) batch_gt_box2 = Tensor.from_numpy(data['gt_box3']) input_shape = Tensor(tuple(input_shape[::-1]), ms.float32) loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2, input_shape) loss_meter.update(loss.asnumpy()) # ckpt progress cb_params.cur_step_num = i + 1 # current step number cb_params.batch_num = i + 2 ckpt_cb.step_end(run_context) if i % args.log_interval == 0: time_used = time.time() - t_end epoch = int(i / args.steps_per_epoch) fps = args.per_batch_size * ( i - old_progress) * args.group_size / time_used if args.rank == 0: args.logger.info( 'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format( epoch, i, loss_meter, fps, lr[i])) t_end = time.time() loss_meter.reset() old_progress = i if (i + 1) % args.steps_per_epoch == 0: cb_params.cur_epoch_num += 1 args.logger.info('==========end training===============') # upload checkpoint files print('Upload checkpoint.') mox.file.copy_parallel(src_url=local_ckpt_path, dst_url=args.train_url)
def init(self, train_dataset=None, valid_dataset=None): """ Initializes compute graphs and data graphs with sink mode. Note: Pre-init process only supports `GRAPH_MODE` and `Ascend` target currently. Args: train_dataset (Dataset): A training dataset iterator. If define `train_dataset`, training graphs will be initialized. Default: None. valid_dataset (Dataset): A evaluating dataset iterator. If define `valid_dataset`, evaluation graphs will be initialized, and `metrics` in `Model` can not be None. Default: None. Examples: >>> train_dataset = get_train_dataset() >>> valid_dataset = get_valid_dataset() >>> net = Net() >>> loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) >>> optim = Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9) >>> model = Model(net, loss_fn=loss, optimizer=optim, metrics={'acc'}) >>> model.init(train_dataset, valid_dataset) >>> model.train(2, train_dataset) >>> model.eval(valid_dataset) """ if context.get_context( "mode") != context.GRAPH_MODE or context.get_context( "device_target") != "Ascend": raise RuntimeError( 'Pre-init process only supports GRAPH MODE and Ascend target currently.' ) if not train_dataset and not valid_dataset: raise ValueError( 'Both train_dataset and valid_dataset can not be None or empty.' ) _device_number_check(self._parallel_mode, self._device_number) if train_dataset: _parameter_broadcast_check(self._parallel_mode, self._parameter_broadcast) self._train_network.set_train() self._train_network.phase = 'train' if self._parameter_broadcast: self._train_network.set_broadcast_flag() train_dataset.__no_send__ = True train_dataset_helper, train_network = self._exec_preprocess( self._train_network, is_train=True, phase='train', dataset=train_dataset, dataset_sink_mode=True) self._train_network = train_network for inputs in train_dataset_helper: self._train_network.compile(*inputs) break if valid_dataset: if not self._metric_fns: raise RuntimeError( 'If define `valid_dataset`, metric fn can not be None or empty.' ) self._eval_network.set_train(False) self._eval_network.phase = 'eval' valid_dataset.__no_send__ = True valid_dataset_helper, eval_network = self._exec_preprocess( self._eval_network, is_train=False, phase='eval', dataset=valid_dataset, dataset_sink_mode=True) self._eval_network = eval_network for inputs in valid_dataset_helper: self._eval_network.compile(*inputs) break
def __init__(self, num_features, eps=1e-5, momentum=0.9, affine=True, gamma_init='ones', beta_init='zeros', moving_mean_init='zeros', moving_var_init='ones', use_batch_statistics=None, device_num_each_group=1, input_dims='2d'): super(_BatchNorm, self).__init__() if num_features < 1: raise ValueError("num_features must be at least 1") if momentum < 0 or momentum > 1: raise ValueError( "momentum should be a number in range [0, 1], but got {}". format(momentum)) self.use_batch_statistics = use_batch_statistics self.num_features = num_features self.eps = eps self.input_dims = input_dims self.moving_mean = Parameter(initializer(moving_mean_init, num_features), name="mean", requires_grad=False) self.moving_variance = Parameter(initializer(moving_var_init, num_features), name="variance", requires_grad=False) self.gamma = Parameter(initializer(gamma_init, num_features), name="gamma", requires_grad=affine) self.beta = Parameter(initializer(beta_init, num_features), name="beta", requires_grad=affine) self.group = check_int_positive(device_num_each_group) self.is_global = False if self.group != 1: self.rank_id = get_rank() self.rank_size = get_group_size() self.device_list = [i for i in range(0, self.rank_size)] self.rank_list = self.list_group(self.device_list, self.group) self.rank_list_idx = len(self.rank_list) for i in range(self.rank_list_idx): if self.rank_id in self.rank_list[i] and self.group != 1: self.is_global = True management.create_group('group' + str(i), self.rank_list[i]) self.all_reduce = P.AllReduce( P.ReduceOp.SUM, 'group' + str(i)).add_prim_attr('fusion', 1) self.shape = P.Shape() self.reduce_mean = P.ReduceMean(keep_dims=True) self.square = P.Square() self.sqrt = P.Sqrt() self.cast = P.Cast() self.dtype = P.DType() self.reshape = P.Reshape() self.is_ascend = context.get_context("device_target") == "Ascend" self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE self.momentum = 1.0 - momentum if context.get_context("enable_ge"): self.is_ge_backend = True else: self.is_ge_backend = False if self.is_graph_mode and (self.is_ge_backend or self.is_ascend): self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps) else: self.bn_train = P.FusedBatchNorm(mode=1, epsilon=self.eps, momentum=self.momentum) self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps) self.enable_global_sync = self.is_global and (self.is_ge_backend or (self.is_graph_mode and self.is_ascend)) self.enable_default_train = self.is_graph_mode and not self.is_global and \ (self.is_ge_backend or self.is_ascend) data_parallel_strategy = ((1, ), (1, )) data_parallel_strategy_one = ((1, ), ()) self.sub_mean = P.Sub().set_strategy(data_parallel_strategy) self.sub_var = P.Sub().set_strategy(data_parallel_strategy) self.mul_mean = P.Mul().set_strategy(data_parallel_strategy_one) self.mul_var = P.Mul().set_strategy(data_parallel_strategy_one) self.assign_sub_mean = P.AssignSub().set_strategy( data_parallel_strategy) self.assign_sub_var = P.AssignSub().set_strategy( data_parallel_strategy)
def compile(self, obj, *args, phase='predict', do_convert=True, auto_parallel_mode=False): """ Compiles graph. Args: obj (Function/Cell): The function or cell instance need compile. args (tuple): Function or cell input arguments. phase (str): The name of compile phase. Default: 'predict'. do_convert (bool): When set to True, convert ME graph to GE graph after compiling graph. auto_parallel_mode: When set to True, use auto parallel mode to compile graph. Return: Str, the full phase of the cell. Bool, if the graph has been compiled before, return False, else return True. """ args_names, args_list = _generate_pip_args(obj, *args) dic = dict(zip(args_names, args_list)) key = generate_key(phase, dic) self.phase_prefix = str(key[1]) if 'export' in phase: phase = phase + '.' + self.phase_prefix + '.' + str(obj.create_time) else: phase = self.phase_prefix + phase + '.' + str(obj.create_time) if phase in self.compile_cache.keys(): logger.debug("%r graph has existed.", phase) return phase, False obj.check_names() _check_full_batch() self._set_dataset_mode(args_list) is_sink_mode = args and isinstance(args[0], Tensor) and args[0].virtual_flag if auto_parallel_mode and _need_to_full() and not is_sink_mode and obj.auto_parallel_compile_and_run(): args_full = _to_full_tensor(args, _get_device_num(), _get_global_rank()) _, args_list = _generate_pip_args(obj, *args_full) enable_debug_runtime = context.get_context("enable_debug_runtime") enable_ge = context.get_context("enable_ge") use_vm = not enable_ge or (enable_debug_runtime and context.get_context("mode") == context.PYNATIVE_MODE) result = self._executor.compile(obj, args_list, phase, use_vm) self.compile_cache[phase] = phase if not result: raise RuntimeError("Executor compile failed.") graph = self._executor.get_func_graph(phase) if graph is None: logger.error("%r graph compile failed.", phase) if not do_convert: return phase, True if auto_parallel_mode: obj.parameter_layout_dict = self._executor.get_parameter_layout(phase) replace = obj.init_parameters_data(auto_parallel_mode=auto_parallel_mode) if not enable_debug_runtime or enable_ge: if auto_parallel_mode: obj.load_parameter_slice(None) self._updata_param_node_default_input(phase, replace) # set parallel inputs in sink mode if auto_parallel_mode and is_sink_mode: obj.set_parallel_input_with_inputs(*args) # the following GE init process is not needed when use vm or ms backend if enable_ge: self._build_data_graph(obj, phase) if "export" not in phase: init_phase = "init_subgraph" + "." + str(obj.create_time) _exec_init_graph(obj, init_phase) elif not enable_ge and "export" in phase: self._build_data_graph(obj, phase) return phase, True
def __init__(self, min_init=-6, max_init=6, num_bits=8, ema=False, ema_decay=0.999, per_channel=False, channel_axis=1, out_channels=1, quant_delay=0, symmetric=False, narrow_range=False): """init FakeQuantWithMinMax layer""" super(FakeQuantWithMinMax, self).__init__() self.min_init = min_init self.max_init = max_init self.num_bits = num_bits self.ema = ema self.ema_decay = ema_decay self.per_channel = per_channel self.out_channels = out_channels self.channel_axis = channel_axis self.quant_delay = quant_delay self.symmetric = symmetric self.narrow_range = narrow_range self.is_ascend = context.get_context('device_target') == "Ascend" # init tensor min and max for fake quant op if self.per_channel: min_array = np.array([ self.min_init for i in range(0, self.out_channels) ]).astype(np.float32) max_array = np.array([ self.max_init for i in range(0, self.out_channels) ]).astype(np.float32) else: min_array = np.array([self.min_init]).reshape(1).astype(np.float32) max_array = np.array([self.max_init]).reshape(1).astype(np.float32) self.minq = Parameter(Tensor(min_array), name='quant_min', requires_grad=False) self.maxq = Parameter(Tensor(max_array), name='quant_max', requires_grad=False) # init fake quant relative op if per_channel: quant_fun = partial(P.FakeQuantPerChannel, channel_axis=self.channel_axis) ema_fun = partial(P.FakeQuantMinMaxPerChannelUpdate, channel_axis=self.channel_axis) else: quant_fun = P.FakeQuantPerLayer ema_fun = P.FakeQuantMinMaxPerLayerUpdate if self.is_ascend: self.fake_quant = quant_fun(num_bits=self.num_bits, symmetric=self.symmetric, narrow_range=self.narrow_range) else: self.fake_quant = quant_fun(num_bits=self.num_bits, ema=self.ema, ema_decay=ema_decay, quant_delay=quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range) self.ema_update = ema_fun(num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, symmetric=self.symmetric, narrow_range=self.narrow_range)
def __call__(self, obj, *args, phase='predict'): if context.get_context("precompile_only") or _is_role_pserver(): return None return self.run(obj, *args, phase=phase)
def __init__(self, min_init=-6, max_init=6, num_bits=8, ema=False, ema_decay=0.999, per_channel=False, out_channels=1, quant_delay=0, symmetric=False, narrow_range=False): """init FakeQuantWithMinMax layer""" super(FakeQuantWithMinMax, self).__init__() self.min_init = min_init self.num_bits = num_bits self.max_init = max_init self.ema = ema self.ema_decay = ema_decay self.per_channel = per_channel self.out_channels = out_channels self.quant_delay = quant_delay self.symmetric = symmetric self.narrow_range = narrow_range if per_channel: min_array = np.array([ self.min_init for i in range(0, self.out_channels) ]).astype(np.float32) max_array = np.array([ self.max_init for i in range(0, self.channel_size) ]).astype(np.float32) self.minq = Parameter(Tensor(min_array), name='quant_min', requires_grad=False) self.maxq = Parameter(Tensor(max_array), name='quant_max', requires_grad=False) self.fake_quant_train = P.FakeQuantWithMinMaxPerChannel( num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, quant_delay=self.quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=True) self.fake_quant_infer = P.FakeQuantWithMinMaxPerChannel( num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, quant_delay=self.quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=False) else: min_array = np.array([min_init]).reshape(1).astype(np.float32) max_array = np.array([max_init]).reshape(1).astype(np.float32) self.minq = Parameter(Tensor(min_array), name='quant_min', requires_grad=False) self.maxq = Parameter(Tensor(max_array), name='quant_max', requires_grad=False) if context.get_context('device_target') == "Ascend": self.fake_quant_train = FakeQuantWithMinMaxD( num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, quant_delay=self.quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=True, min_init=self.minq, max_init=self.maxq) self.fake_quant_infer = FakeQuantWithMinMaxD( num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, quant_delay=self.quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=False, min_init=self.minq, max_init=self.maxq) elif context.get_context('device_target') == "GPU": self.fake_quant_train = P.FakeQuantWithMinMax( num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, quant_delay=self.quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=True) self.fake_quant_infer = P.FakeQuantWithMinMax( num_bits=self.num_bits, ema=self.ema, ema_decay=ema_decay, quant_delay=quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=False) else: raise ValueError("Not support platform.")