def construct(self, pred, gt): """Construct method""" pos_inds = P.Select()(P.Equal()(gt, 1.0), P.Fill()(P.DType()(gt), P.Shape()(gt), 1.0), P.Fill()(P.DType()(gt), P.Shape()(gt), 0.0)) neg_inds = P.Select()(P.Less()(gt, 1.0), P.Fill()(P.DType()(gt), P.Shape()(gt), 1.0), P.Fill()(P.DType()(gt), P.Shape()(gt), 0.0)) neg_weights = self.pow(1 - gt, 4) # beta=4 # afa=2 pos_loss = self.log(pred) * self.pow(1 - pred, 2) * pos_inds neg_loss = self.log(1 - pred) * self.pow(pred, 2) * neg_weights * neg_inds num_pos = self.sum(pos_inds, ()) num_pos = P.Select()(P.Equal()(num_pos, 0.0), P.Fill()(P.DType()(num_pos), P.Shape()(num_pos), 1.0), num_pos) pos_loss = self.sum(pos_loss, ()) neg_loss = self.sum(neg_loss, ()) loss = -(pos_loss + neg_loss) / num_pos return loss
def erfc_f32_generic(x): """ Calculate erfc for dtype of f32 """ k_maxlog = 88.72283905206835 k_erfc_pcoefficient = [ +2.326819970068386e-2, -1.387039388740657e-1, +3.687424674597105e-1, -5.824733027278666e-1, +6.210004621745983e-1, -4.944515323274145e-1, +3.404879937665872e-1, -2.741127028184656e-1, +5.638259427386472e-1 ] k_erfc_rcoefficient = [ -1.047766399936249e+1, +1.297719955372516e+1, -7.495518717768503e+0, +2.921019019210786e+0, -1.015265279202700e+0, +4.218463358204948e-1, -2.820767439740514e-1, +5.641895067754075e-1 ] abs_cal = P.Abs() select = P.Select() less = P.Less() fill = P.Fill() dtype = P.DType() shape = P.Shape() abs_x = abs_cal(x) z = exp_generic(-x * x) q = 1 / abs_x y = q * q poly1 = _evaluate_polynomial(y, k_erfc_pcoefficient) poly2 = _evaluate_polynomial(y, k_erfc_rcoefficient) p = select(less(abs_x, 2.0), poly1, poly2) y = z * q * p zeros = fill(dtype(x), shape(x), 0) y_clamp = select(less(z, -k_maxlog), zeros, y) return select(less(x, 0), 2.0 - y_clamp, y_clamp)
def __init__(self, bijector, distribution, seed=None, name="transformed_distribution"): """ Constructor of transformed_distribution class. """ param = dict(locals()) validator.check_value_type('bijector', bijector, [nn.probability.bijector.Bijector], type(self).__name__) validator.check_value_type('distribution', distribution, [Distribution], type(self).__name__) super(TransformedDistribution, self).__init__(seed, distribution.dtype, name, param) self._bijector = bijector self._distribution = distribution self._is_linear_transformation = bijector.is_constant_jacobian self.default_parameters = distribution.default_parameters self.parameter_names = distribution.parameter_names self.exp = exp_generic self.log = log_generic self.equal_base = P.Equal() self.select_base = P.Select()
def __init__(self, low=None, high=None, seed=0, dtype=mstype.float32, name="Uniform"): """ Constructor of Uniform distribution. """ param = dict(locals()) super(Uniform, self).__init__(dtype, name, param) if low is not None and high is not None: self._low = convert_to_batch(low, self._broadcast_shape, dtype) self._high = convert_to_batch(high, self._broadcast_shape, dtype) check_greater(self.low, self.high, "low value", "high value") else: self._low = low self._high = high # ops needed for the class self.const = P.ScalarToArray() self.dtypeop = P.DType() self.exp = P.Exp() self.fill = P.Fill() self.less = P.Less() self.lessequal = P.LessEqual() self.log = P.Log() self.logicaland = P.LogicalAnd() self.select = P.Select() self.shape = P.Shape() self.sq = P.Square() self.sqrt = P.Sqrt() self.uniform = P.UniformReal(seed=seed) self.zeroslike = P.ZerosLike()
def __init__(self): super(DiGamma, self).__init__() # const numbers self.k_lanczos_gamma = 7 self.k_base_lanczos_coeff = 0.99999999999980993227684700473478 self.k_lanczos_coefficients = [676.520368121885098567009190444019, -1259.13921672240287047156078755283, 771.3234287776530788486528258894, -176.61502916214059906584551354, 12.507343278686904814458936853, -0.13857109526572011689554707, 9.984369578019570859563e-6, 1.50563273514931155834e-7] self.nan = np.nan self.pi = np.pi self.lanczos_gamma_plus_one_half = self.k_lanczos_gamma + 0.5 self.log_lanczos_gamma_plus_one_half = np.log(self.lanczos_gamma_plus_one_half) # operations self.log1p = P.Log1p() self.abs = P.Abs() self.shape = P.Shape() self.dtype = P.DType() self.fill = P.Fill() self.floor = P.Floor() self.equal = P.Equal() self.less = P.Less() self.select = P.Select() self.sin = P.Sin() self.cos = P.Cos() self.logicaland = P.LogicalAnd()
def log_generic(input_x): """ Log op on Ascend is calculated as log(abs(x)). Fix this with putting negative values as nan. And log op on Ascend doesn't supprot int types. Fix this with casting the type. """ log = P.Log() less = P.Less() lessequal = P.LessEqual() fill = P.Fill() cast = P.Cast() dtype = P.DType() shape = P.Shape() select = P.Select() checktype = P.IsSubClass() if not checktype(dtype(input_x), mstype.float_): input_x = cast(input_x, mstype.float32) nan = fill(dtype(input_x), shape(input_x), np.nan) inf = fill(dtype(input_x), shape(input_x), np.inf) neg_x = less(input_x, 0.0) nonpos_x = lessequal(input_x, 0.0) log_x = log(input_x) result = select(nonpos_x, -inf, log_x) return select(neg_x, nan, result)
def __init__(self, config, roi_layer, out_channels, featmap_strides, batch_size=1, finest_scale=56, mask=False): super(SingleRoIExtractor, self).__init__() cfg = config self.train_batch_size = batch_size self.out_channels = out_channels self.featmap_strides = featmap_strides self.num_levels = len(self.featmap_strides) self.out_size = roi_layer['mask_out_size'] if mask else roi_layer['out_size'] self.mask = mask self.sample_num = roi_layer['sample_num'] self.roi_layers = self.build_roi_layers(self.featmap_strides) self.roi_layers = L.CellList(self.roi_layers) self.sqrt = P.Sqrt() self.log = P.Log() self.finest_scale_ = finest_scale self.clamp = C.clip_by_value self.cast = P.Cast() self.equal = P.Equal() self.select = P.Select() _mode_16 = False self.dtype = np.float16 if _mode_16 else np.float32 self.ms_dtype = mstype.float16 if _mode_16 else mstype.float32 self.set_train_local(cfg, training=True)
def __init__(self, probs=None, seed=None, dtype=mstype.int32, name="Bernoulli"): """ Constructor of Bernoulli. """ param = dict(locals()) param['param_dict'] = {'probs': probs} valid_dtype = mstype.int_type + mstype.uint_type + mstype.float_type Validator.check_type_name("dtype", dtype, valid_dtype, type(self).__name__) super(Bernoulli, self).__init__(seed, dtype, name, param) self._probs = self._add_parameter(probs, 'probs') if self._probs is not None: check_prob(self.probs) # ops needed for the class self.exp = exp_generic self.log = log_generic self.squeeze = P.Squeeze(0) self.cast = P.Cast() self.const = P.ScalarToArray() self.floor = P.Floor() self.fill = P.Fill() self.less = P.Less() self.shape = P.Shape() self.select = P.Select() self.uniform = C.uniform
def __init__(self, probs=None, seed=0, dtype=mstype.int32, name="Geometric"): """ Constructor of Geometric distribution. """ param = dict(locals()) super(Geometric, self).__init__(dtype, name, param) if probs is not None: self._probs = cast_to_tensor(probs, dtype=mstype.float32) check_prob(self._probs) else: self._probs = probs self.minval = np.finfo(np.float).tiny # ops needed for the class self.const = P.ScalarToArray() self.dtypeop = P.DType() self.fill = P.Fill() self.floor = P.Floor() self.issubclass = P.IsSubClass() self.less = P.Less() self.log = P.Log() self.pow = P.Pow() self.select = P.Select() self.shape = P.Shape() self.sq = P.Square() self.sqrt = P.Sqrt() self.uniform = P.UniformReal(seed=seed)
def __init__(self, batch_size=4): super(DiceLoss, self).__init__() self.threshold0 = Tensor(0.5, mstype.float32) self.zero_float32 = Tensor(0.0, mstype.float32) self.k = int(640 * 640) self.negative_one_int32 = Tensor(-1, mstype.int32) self.batch_size = batch_size self.concat = P.Concat() self.less_equal = P.LessEqual() self.greater = P.Greater() self.reduce_sum = P.ReduceSum() self.reduce_sum_keep_dims = P.ReduceSum(keep_dims=True) self.reduce_mean = P.ReduceMean() self.reduce_min = P.ReduceMin() self.cast = P.Cast() self.minimum = P.Minimum() self.expand_dims = P.ExpandDims() self.select = P.Select() self.fill = P.Fill() self.topk = P.TopK(sorted=True) self.shape = P.Shape() self.sigmoid = P.Sigmoid() self.reshape = P.Reshape() self.slice = P.Slice() self.logical_and = P.LogicalAnd() self.logical_or = P.LogicalOr() self.equal = P.Equal() self.zeros_like = P.ZerosLike() self.add = P.TensorAdd() self.gather = P.Gather()
def __init__(self, rate=None, seed=0, dtype=mstype.float32, name="Exponential"): """ Constructor of Exponential distribution. """ param = dict(locals()) super(Exponential, self).__init__(dtype, name, param) if rate is not None: self._rate = cast_to_tensor(rate, mstype.float32) check_greater_zero(self._rate, "rate") else: self._rate = rate self.minval = np.finfo(np.float).tiny # ops needed for the class self.const = P.ScalarToArray() self.dtypeop = P.DType() self.exp = P.Exp() self.fill = P.Fill() self.less = P.Less() self.log = P.Log() self.select = P.Select() self.shape = P.Shape() self.sqrt = P.Sqrt() self.sq = P.Square() self.uniform = P.UniformReal(seed=seed)
def __init__(self, config, batch_size, num_classes, use_sigmoid_cls, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0) ): super(Proposal, self).__init__() cfg = config self.batch_size = batch_size self.num_classes = num_classes self.target_means = target_means self.target_stds = target_stds self.use_sigmoid_cls = config.use_sigmoid_cls if self.use_sigmoid_cls: self.cls_out_channels = 1 self.activation = P.Sigmoid() self.reshape_shape = (-1, 1) else: self.cls_out_channels = num_classes self.activation = P.Softmax(axis=1) self.reshape_shape = (-1, 2) if self.cls_out_channels <= 0: raise ValueError('num_classes={} is too small'.format(num_classes)) self.num_pre = cfg.rpn_proposal_nms_pre self.min_box_size = cfg.rpn_proposal_min_bbox_size self.nms_thr = cfg.rpn_proposal_nms_thr self.nms_post = cfg.rpn_proposal_nms_post self.nms_across_levels = cfg.rpn_proposal_nms_across_levels self.max_num = cfg.rpn_proposal_max_num # Op Define self.squeeze = P.Squeeze() self.reshape = P.Reshape() self.cast = P.Cast() self.feature_shapes = cfg.feature_shapes self.transpose_shape = (1, 2, 0) self.decode = BoundingBoxDecode() self.nms = P.NMSWithMask(self.nms_thr) self.concat_axis0 = P.Concat(axis=0) self.concat_axis1 = P.Concat(axis=1) self.split = P.Split(axis=1, output_num=5) self.min = P.Minimum() self.gatherND = P.GatherNd() self.slice = P.Slice() self.select = P.Select() self.greater = P.Greater() self.transpose = P.Transpose() self.tile = P.Tile() self.set_train_local(config, training=True) self.multi_10 = Tensor(10.0, mstype.float16)
def __init__(self, rate=None, seed=None, dtype=mstype.float32, name="Poisson"): """ Constructor of Poisson. """ param = dict(locals()) param['param_dict'] = {'rate': rate} valid_dtype = mstype.int_type + mstype.uint_type + mstype.float_type Validator.check_type_name("dtype", dtype, valid_dtype, type(self).__name__) super(Poisson, self).__init__(seed, dtype, name, param) self._rate = self._add_parameter(rate, 'rate') if self.rate is not None: check_greater_zero(self.rate, 'rate') # ops needed for the class self.exp = exp_generic self.log = log_generic self.squeeze = P.Squeeze(0) self.cast = P.Cast() self.floor = P.Floor() self.dtypeop = P.DType() self.shape = P.Shape() self.fill = P.Fill() self.less = P.Less() self.equal = P.Equal() self.select = P.Select() self.lgamma = nn.LGamma() self.igamma = nn.IGamma() self.poisson = C.poisson
def erf_generic(x): select = P.Select() less = P.Less() abs_cal = P.Abs() return select(less(abs_cal(x), 1), erf_f32_generic(x), 1 - erfc_f32_generic(x))
def erfc_generic(x): select = P.Select() greater = P.Greater() abs_cal = P.Abs() return select(greater(abs_cal(x), 1), erfc_f32_generic(x), 1 - erf_f32_generic(x))
def __init__(self, rate=None, seed=0, dtype=mstype.float32, name="Exponential"): """ Constructor of Exponential distribution. """ param = dict(locals()) valid_dtype = mstype.float_type check_type(dtype, valid_dtype, type(self).__name__) super(Exponential, self).__init__(seed, dtype, name, param) self.parameter_type = dtype if rate is not None: self._rate = cast_to_tensor(rate, self.parameter_type) check_greater_zero(self._rate, "rate") else: self._rate = rate self.minval = np.finfo(np.float).tiny # ops needed for the class self.exp = exp_generic self.log = log_generic self.squeeze = P.Squeeze(0) self.cast = P.Cast() self.const = P.ScalarToArray() self.dtypeop = P.DType() self.fill = P.Fill() self.less = P.Less() self.select = P.Select() self.shape = P.Shape() self.sqrt = P.Sqrt() self.sq = P.Square() self.uniform = C.uniform
def __init__(self, probs=None, seed=0, dtype=mstype.int32, name="Bernoulli"): """ Constructor of Bernoulli distribution. """ param = dict(locals()) valid_dtype = mstype.int_type + mstype.uint_type + mstype.float_type check_type(dtype, valid_dtype, type(self).__name__) super(Bernoulli, self).__init__(seed, dtype, name, param) self.parameter_type = mstype.float32 if probs is not None: self._probs = cast_to_tensor(probs, mstype.float32) check_prob(self.probs) else: self._probs = probs # ops needed for the class self.exp = exp_generic self.log = log_generic self.erf = erf_generic self.squeeze = P.Squeeze(0) self.cast = P.Cast() self.const = P.ScalarToArray() self.dtypeop = P.DType() self.floor = P.Floor() self.fill = P.Fill() self.less = P.Less() self.shape = P.Shape() self.select = P.Select() self.sq = P.Square() self.sqrt = P.Sqrt() self.uniform = C.uniform
def __init__(self, teacher_config, teacher_ckpt, student_config, is_training, use_one_hot_embeddings=False, is_att_fit=True, is_rep_fit=True): super(BertNetworkWithLoss_gd, self).__init__() # load teacher model self.teacher = BertModel(teacher_config, False, use_one_hot_embeddings) param_dict = load_checkpoint(teacher_ckpt) new_param_dict = {} for key, value in param_dict.items(): new_key = re.sub('^bert.bert.', 'teacher.', key) new_param_dict[new_key] = value load_param_into_net(self.teacher, new_param_dict) # no_grad self.teacher.set_train(False) params = self.teacher.trainable_params() for param in params: param.requires_grad = False # student model self.bert = TinyBertModel(student_config, is_training, use_one_hot_embeddings) self.cast = P.Cast() self.fit_dense = nn.Dense(student_config.hidden_size, teacher_config.hidden_size).to_float(teacher_config.compute_type) self.teacher_layers_num = teacher_config.num_hidden_layers self.student_layers_num = student_config.num_hidden_layers self.layers_per_block = int(self.teacher_layers_num / self.student_layers_num) self.is_att_fit = is_att_fit self.is_rep_fit = is_rep_fit self.loss_mse = nn.MSELoss() self.select = P.Select() self.zeroslike = P.ZerosLike() self.dtype = teacher_config.dtype
def __init__(self, rate=None, seed=None, dtype=mstype.float32, name="Exponential"): """ Constructor of Exponential. """ param = dict(locals()) param['param_dict'] = {'rate': rate} valid_dtype = mstype.float_type Validator.check_type_name("dtype", dtype, valid_dtype, type(self).__name__) super(Exponential, self).__init__(seed, dtype, name, param) self._rate = self._add_parameter(rate, 'rate') if self.rate is not None: check_greater_zero(self.rate, 'rate') self.minval = np.finfo(np.float).tiny # ops needed for the class self.exp = exp_generic self.log = log_generic self.squeeze = P.Squeeze(0) self.cast = P.Cast() self.const = P.ScalarToArray() self.dtypeop = P.DType() self.fill = P.Fill() self.less = P.Less() self.select = P.Select() self.shape = P.Shape() self.uniform = C.uniform
def __init__(self, batch_size, conv_out_dim, encoder_hidden_size, decoder_hidden_size, decoder_output_size, max_length, dropout_p=0.1): super(AttentionOCR, self).__init__() self.encoder = Encoder(batch_size=batch_size, conv_out_dim=conv_out_dim, hidden_size=encoder_hidden_size) self.decoder = Decoder(hidden_size=decoder_hidden_size, output_size=decoder_output_size, max_length=max_length, dropout_p=dropout_p) self.init_decoder_hidden = Tensor( np.zeros((1, batch_size, decoder_hidden_size), dtype=np.float16), mstype.float16) self.shape = P.Shape() self.split = P.Split(axis=1, output_num=max_length) self.concat = P.Concat() self.expand_dims = P.ExpandDims() self.argmax = P.Argmax() self.select = P.Select()
def __init__(self, network, optimizer, sens=1.0): super(TrainOneStepCellWithGradClip, self).__init__(auto_prefix=False) self.network = network self.network.set_grad() self.network.add_flags(defer_inline=True) self.weights = optimizer.parameters self.optimizer = optimizer self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.sens = sens self.reducer_flag = False self.grad_reducer = None self.hyper_map = C.HyperMap() self.greater = P.Greater() self.select = P.Select() self.norm = nn.Norm(keep_dims=True) self.dtype = P.DType() self.cast = P.Cast() self.concat = P.Concat(axis=0) self.ten = Tensor(np.array([10.0]).astype(np.float32)) parallel_mode = _get_parallel_mode() if parallel_mode in (ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL): self.reducer_flag = True if self.reducer_flag: mean = _get_mirror_mean() degree = _get_device_num() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
def __init__(self, sharpness=1.0, name='Softplus'): """ Constructor of Softplus Bijector. """ param = dict(locals()) validator.check_value_type('sharpness', sharpness, [int, float], type(self).__name__) super(Softplus, self).__init__(name=name, param=param) self._sharpness = cast_to_tensor(sharpness) self.exp = exp_generic self.log = log_generic self.expm1 = expm1_generic self.abs = P.Abs() self.dtypeop = P.DType() self.fill = P.Fill() self.greater = P.Greater() self.less = P.Less() self.log_sigmoid = LogSigmoid() self.logicalor = P.LogicalOr() self.select = P.Select() self.shape = P.Shape() self.sigmoid = P.Sigmoid() self.softplus = self._softplus self.inverse_softplus = self._inverse_softplus self.threshold = np.log(np.finfo(np.float32).eps) + 1 self.tiny = np.exp(self.threshold)
def __init__(self): super(IGamma, self).__init__() # const numbers # If more data types are supported, this float max value need to be selected. self.log_maxfloat32 = Tensor(np.log(np.finfo(np.float32).max), mstype.float32) # operations self.logicaland = P.LogicalAnd() self.logicalor = P.LogicalOr() self.logicalnot = P.LogicalNot() self.equal = P.Equal() self.greater = P.Greater() self.less = P.Less() self.neg = P.Neg() self.log = P.Log() self.exp = P.Exp() self.select = P.Select() self.zeroslike = P.ZerosLike() self.fill = P.Fill() self.shape = P.Shape() self.dtype = P.DType() self.lgamma = LGamma() self.const = P.ScalarToArray() self.cast = P.Cast()
def __init__(self, probs=None, seed=0, dtype=mstype.int32, name="Bernoulli"): """ Constructor of Bernoulli distribution. """ param = dict(locals()) super(Bernoulli, self).__init__(dtype, name, param) if probs is not None: self._probs = cast_to_tensor(probs, dtype=mstype.float32) check_prob(self.probs) else: self._probs = probs self.seed = seed # ops needed for the class self.cast = P.Cast() self.const = P.ScalarToArray() self.dtypeop = P.DType() self.erf = P.Erf() self.fill = P.Fill() self.log = P.Log() self.less = P.Less() self.shape = P.Shape() self.select = P.Select() self.sq = P.Square() self.sqrt = P.Sqrt() self.uniform = P.UniformReal(seed=seed)
def construct(self, output, ind, target, wight_mask=None): ''' :param output: [b, c, h, w] to [b, h, w, c] :param ind: :param target: :return: ''' output = self.transpose(output, (0, 2, 3, 1)) # dim = self.shape(output)[3] mask = P.Select()(P.Equal()(ind, 1), P.Fill()(mstype.float32, P.Shape()(ind), 1.0), P.Fill()(mstype.float32, P.Shape()(ind), 0.0)) # ind = self.cast(ind, mstype.float32) target = self.cast(target, mstype.float32) output = self.cast(output, mstype.float32) num = self.cast(self.sum(mask, ()), mstype.float32) mask = self.expand_dims(mask, -1) # [batch,h,w]--[batch,h,w,c] output = output * mask target = target * mask loss = self.smooth_l1_loss(output, target) if wight_mask is not None: loss = loss * wight_mask loss = self.sum(loss, ()) else: #some version need: F.depend(loss, F.sqrt(F.cast(wight_mask, mstype.float32))) loss = self.sum(loss, ()) loss = loss / (num + 1e-4) return loss
def __init__(self, bijector, distribution, seed=None, name="transformed_distribution"): """ Constructor of transformed_distribution class. """ param = dict(locals()) validator.check_value_type('bijector', bijector, [nn.probability.bijector.Bijector], type(self).__name__) validator.check_value_type('distribution', distribution, [Distribution], type(self).__name__) super(TransformedDistribution, self).__init__(seed, distribution.dtype, name, param) self._bijector = bijector self._distribution = distribution self._is_linear_transformation = bijector.is_constant_jacobian self.default_parameters = distribution.default_parameters self.parameter_names = distribution.parameter_names self.exp = exp_generic self.log = log_generic self.isnan = P.IsNan() self.equal_base = P.Equal() self.select_base = P.Select() self.fill = P.Fill() # check if batch shape of the distribution and event shape is broadcastable if hasattr(self.bijector, 'event_shape'): event_shape_tensor = self.fill(self.dtype, self.bijector.event_shape, 0.0) broadcast_shape_tensor = self.fill(self.dtype, self.broadcast_shape, 0.0) self._batch_event = (event_shape_tensor + broadcast_shape_tensor).shape else: self._batch_event = self.broadcast_shape
def __init__(self, loc=None, scale=None, seed=0, dtype=mstype.float32, name="LogNormal"): """ Constructor of LogNormal distribution. """ super(LogNormal, self).__init__(distribution=msd.Normal(loc, scale, dtype=dtype), bijector=msb.Exp(), seed=seed, name=name) # overwrite default_parameters and parameter_names self._reset_parameters() self._loc = self._add_parameter(loc, 'loc') self._scale = self._add_parameter(scale, 'scale') self.log_2pi = np.log(2 * np.pi) #ops needed for the class self.dtypeop = P.DType() self.exp = exp_generic self.expm1 = P.Expm1() self.log = log_generic self.const = P.ScalarToArray() self.erf = P.Erf() self.fill = P.Fill() self.greater = P.Greater() self.select = P.Select() self.shape = P.Shape() self.sq = P.Square() self.sqrt = P.Sqrt() self.cast = P.Cast() self.squeeze = P.Squeeze(0)
def __init__(self, sharpness=1.0, name='Softplus'): """ Constructor of Softplus Bijector. """ param = dict(locals()) param['param_dict'] = {'sharpness': sharpness} super(Softplus, self).__init__(name=name, dtype=None, param=param) self._sharpness = self._add_parameter(sharpness, 'sharpness') self.exp = exp_generic self.log = log_generic self.expm1 = P.Expm1() self.abs = P.Abs() self.dtypeop = P.DType() self.cast = P.Cast() self.fill = P.Fill() self.greater = P.Greater() self.less = P.Less() self.log_sigmoid = LogSigmoid() self.logicalor = P.LogicalOr() self.select = P.Select() self.shape = P.Shape() self.sigmoid = P.Sigmoid() self.softplus = self._softplus self.inverse_softplus = self._inverse_softplus self.threshold = np.log(np.finfo(np.float32).eps) + 1 self.tiny = np.exp(self.threshold)
def __init__(self, network, optimizer, scale_update_cell=None, accumulation_steps=1, enable_global_norm=False): super(BertTrainAccumulateStepsWithLossScaleCell, self).__init__(auto_prefix=False) self.network = network self.network.set_grad() self.weights = optimizer.parameters self.optimizer = optimizer self.accumulation_steps = accumulation_steps self.enable_global_norm = enable_global_norm self.one = Tensor(np.array([1]).astype(np.int32)) self.zero = Tensor(np.array([0]).astype(np.int32)) self.local_step = Parameter(initializer(0, [1], mstype.int32), name="local_step") self.accu_grads = self.weights.clone(prefix="accu_grads", init='zeros') self.accu_overflow = Parameter(initializer(0, [1], mstype.int32), name="accu_overflow") self.loss = Parameter(initializer(0, [1], mstype.float32), name="accu_loss") self.grad = C.GradOperation(get_by_list=True, sens_param=True) self.reducer_flag = False self.parallel_mode = context.get_auto_parallel_context("parallel_mode") if self.parallel_mode in [ ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL ]: self.reducer_flag = True self.grad_reducer = F.identity self.degree = 1 if self.reducer_flag: self.degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, False, self.degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) self.overflow_reducer = F.identity if self.is_distributed: self.overflow_reducer = P.AllReduce() self.cast = P.Cast() self.alloc_status = P.NPUAllocFloatStatus() self.get_status = P.NPUGetFloatStatus() self.clear_before_grad = P.NPUClearFloatStatus() self.reduce_sum = P.ReduceSum(keep_dims=False) self.base = Tensor(1, mstype.float32) self.less_equal = P.LessEqual() self.logical_or = P.LogicalOr() self.not_equal = P.NotEqual() self.select = P.Select() self.reshape = P.Reshape() self.hyper_map = C.HyperMap() self.loss_scale = None self.loss_scaling_manager = scale_update_cell if scale_update_cell: self.loss_scale = Parameter(Tensor( scale_update_cell.get_loss_scale(), dtype=mstype.float32), name="loss_scale")
def test_select(): select = P.Select() cond = Tensor(np.array([[True, False, False], [False, True, True]])) x = Tensor(np.array([[1, 2, 3], [4, 5, 6]])) y = Tensor(np.array([[7, 8, 9], [10, 11, 12]])) output = select(cond, x, y) expect = np.array([[1, 8, 9], [10, 5, 6]]) assert np.all(output.asnumpy() == expect)