def __init__(self, query_vector_dim, input_vector_dim): super(Attention, self).__init__() self.dense1 = nn.Dense(input_vector_dim, query_vector_dim, has_bias=True, activation='tanh') self.dense2 = nn.Dense(query_vector_dim, 1, has_bias=False) self.softmax = nn.Softmax() self.sum_keep_dims = ops.ReduceSum(keep_dims=True) self.sum = ops.ReduceSum(keep_dims=False)
def __init__(self, sparse=False): super(SoftmaxCrossEntropyExpand, self).__init__() self.exp = ops.Exp() self.sum = ops.ReduceSum(keep_dims=True) self.onehot = ops.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.div = ops.RealDiv() self.log = ops.Log() self.sum_cross_entropy = ops.ReduceSum(keep_dims=False) self.mul = ops.Mul() self.mul2 = ops.Mul() self.mean = ops.ReduceMean(keep_dims=False) self.sparse = sparse self.max = ops.ReduceMax(keep_dims=True) self.sub = ops.Sub()
def __init__(self, network, optimizer, scale_update_cell=None): super(BertTrainOneStepWithLossScaleCell, self).__init__(auto_prefix=False) self.network = network self.weights = ParameterTuple(network.trainable_params()) self.optimizer = optimizer self.grad = ops.GradOperation( get_by_list=True, sens_param=True) self.reducer_flag = False self.allreduce = ops.AllReduce() self.parallel_mode = context.get_auto_parallel_context("parallel_mode") if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]: self.reducer_flag = True self.grad_reducer = ops.identity self.degree = 1 if self.reducer_flag: self.degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, False, self.degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) self.cast = ops.Cast() self.alloc_status = ops.NPUAllocFloatStatus() self.get_status = ops.NPUGetFloatStatus() self.clear_before_grad = ops.NPUClearFloatStatus() self.reduce_sum = ops.ReduceSum(keep_dims=False) self.depend_parameter_use = ops.ControlDepend(depend_mode=1) self.base = Tensor(1, mstype.float32) self.less_equal = ops.LessEqual() self.hyper_map = ops.HyperMap() self.loss_scale = None self.loss_scaling_manager = scale_update_cell if scale_update_cell: self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32), name="loss_scale")
def construct(self, s_t_hat, encoder_outputs, encoder_feature, enc_padding_mask, coverage): b, t_k, n = encoder_outputs.shape dec_fea = self.decode_proj(s_t_hat) # (B, 2 * hidden_dim) dec_fea_expand = P.ExpandDims()(dec_fea, 1) dec_fea_expand = P.BroadcastTo()(dec_fea_expand, (b, t_k, n)) att_features = encoder_feature + dec_fea_expand if self.is_coverage: coverage_input = coverage.view(-1, 1) # (B * t_k, 1) coverage_feature = self.W_c( coverage_input) # (B * t_k, 2 * hidden_dim) att_features = att_features + coverage_feature e = P.Tanh()(att_features) # (B * t_k, 2 * hidden_dim) scores = self.v(e) # (B * t_k, 1) scores = scores.view(-1, t_k) # (B, t_k) attn_dist_ = P.Softmax(1)(scores) * enc_padding_mask # (B, t_k) normalization_factor = P.ReduceSum(True)(attn_dist_, 1) attn_dist = attn_dist_ / normalization_factor attn_dist = P.ExpandDims()(attn_dist, 1) # (B, 1, t_k) c_t = P.BatchMatMul(attn_dist, encoder_outputs) # (B, 1, n) c_t = c_t.view(-1, self.hidden_dim * 2) # (B, 2 * hidden_dim) attn_dist = attn_dist.view(-1, t_k) if self.is_coverage: coverage = coverage.view(-1, t_k) coverage = coverage + attn_dist return c_t, attn_dist, coverage
def __init__(self, net_config): super(CenterNetMultiPoseLossCell, self).__init__() self.network = GatherMultiPoseFeatureCell(net_config) self.reduce_sum = ops.ReduceSum() self.crit = FocalLoss() self.crit_hm_hp = nn.MSELoss() if net_config.mse_loss else self.crit self.crit_kp = RegWeightedL1Loss( ) if not net_config.dense_hp else nn.L1Loss(reduction='sum') self.crit_reg = RegLoss(net_config.reg_loss) self.hm_weight = net_config.hm_weight self.hm_hp_weight = net_config.hm_hp_weight self.hp_weight = net_config.hp_weight self.wh_weight = net_config.wh_weight self.off_weight = net_config.off_weight self.hm_hp = net_config.hm_hp self.dense_hp = net_config.dense_hp self.reg_offset = net_config.reg_offset self.reg_hp_offset = net_config.reg_hp_offset self.hm_hp_ind = 3 if self.hm_hp else 2 self.reg_ind = self.hm_hp_ind + 1 if self.reg_offset else self.hm_hp_ind self.reg_hp_ind = self.reg_ind + 1 if self.reg_hp_offset else self.reg_ind # just used for check self.print = ops.Print() self.concat = ops.Concat(axis=1) self.reshape = ops.Reshape()
def __init__(self): super(log_sum_exp, self).__init__() self.maxi = P.ReduceMax() self.maxi_dim = P.ReduceMax(keep_dims=True) self.log = P.Log() self.sums = P.ReduceSum() self.exp = P.Exp()
def __init__(self, bins=10, momentum=0.0, mu=0.02): super(GHMRLoss, self).__init__() self.bins = bins self.momentum = momentum self.mu = mu edges_left = np.array([float(x) / bins for x in range(bins)], dtype=np.float32) self.edges_left = Tensor(edges_left.reshape((bins, 1, 1, 1, 1))) edges_right = np.array([float(x) / bins for x in range(1, bins + 1)], dtype=np.float32) edges_right[-1] += 1e-4 self.edges_right = Tensor(edges_right.reshape((bins, 1, 1, 1, 1))) if momentum >= 0: self.acc_sum = Parameter(initializer(0, [bins], mstype.float32)) self.abs = ops.Abs() self.sqrt = ops.Sqrt() self.cast = ops.Cast() self.select = ops.Select() self.reshape = ops.Reshape() self.reduce_sum = ops.ReduceSum() self.max = ops.Maximum() self.less = ops.Less() self.equal = ops.Equal() self.greater = ops.Greater() self.logical_and = ops.LogicalAnd() self.greater_equal = ops.GreaterEqual() self.zeros_like = ops.ZerosLike() self.expand_dims = ops.ExpandDims()
def construct(self, teacher, student, neg): expand_dims = ops.ExpandDims() # unsqueeze算子 teacher_vgg, student_vgg, neg_vgg = self.vgg(teacher), self.vgg( student), self.vgg(neg) loss = 0 for i in range(len(teacher_vgg)): neg_i = expand_dims(neg_vgg[i], 0) # [8, n_feats, w, h] # neg_i = neg_i.repeat(student_vgg[i].shape[0], axis=0) #TODO:1.3版本才会支持Tensor.repeat neg_i = np.repeat(neg_i, student_vgg[i].shape[0], axis=0) # [16, 8, n_feats, w, h] neg_i = neg_i.transpose((1, 0, 2, 3, 4)) # [8, 16, n_feats, w, h] d_ts = self.l1(stop_gradient(teacher_vgg[i]), student_vgg[i]) # d_sn = (stop_gradient(neg_i) - student_vgg[i]).abs().sum(axis=0).mean() #TODO:1.3版本才支持Tensor.sum d_sn = (stop_gradient(neg_i) - student_vgg[i]).abs() # [8, 16, n_feats, w, h] # print(d_sn.shape) reduceSum = ops.ReduceSum() d_sn = reduceSum(d_sn, 0).mean() # print(d_sn) contrastive = d_ts / (d_sn + 1e-7) loss += self.weights[i] * contrastive return self.get_loss(loss)
def __init__(self, network, optimizer, sens=1): super(CenterNetWithLossScaleCell, self).__init__(auto_prefix=False) self.image = ImagePreProcess() self.network = network self.network.set_grad() self.weights = optimizer.parameters self.optimizer = optimizer self.grad = ops.GradOperation(get_by_list=True, sens_param=True) self.reducer_flag = False self.allreduce = ops.AllReduce() self.parallel_mode = context.get_auto_parallel_context("parallel_mode") if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]: self.reducer_flag = True self.grad_reducer = ops.identity self.degree = 1 if self.reducer_flag: self.degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, False, self.degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) self.cast = ops.Cast() self.alloc_status = ops.NPUAllocFloatStatus() self.get_status = ops.NPUGetFloatStatus() self.clear_before_grad = ops.NPUClearFloatStatus() self.reduce_sum = ops.ReduceSum(keep_dims=False) self.base = Tensor(1, mstype.float32) self.less_equal = ops.LessEqual() self.grad_scale = GradScale() self.loss_scale = sens
def __init__(self): super(log_softmax, self).__init__() self.maxi = P.ReduceMax() self.log = P.Log() self.sums = P.ReduceSum() self.exp = P.Exp() self.axis = -1 self.concat = P.Concat(-1) self.expanddims = P.ExpandDims()
def __init__(self, reduction="mean"): super(CrossEntropyLoss, self).__init__() self.cross_entropy = P.SoftmaxCrossEntropyWithLogits() if reduction == "sum": self.reduction = P.ReduceSum() if reduction == "mean": self.reduction = P.ReduceMean() self.one_hot = P.OneHot() self.one = Tensor(1.0, mstype.float32) self.zero = Tensor(0.0, mstype.float32)
def construct(self, x): pow = P.Pow() sum = P.ReduceSum() div = P.Div() norm = pow(x, self.power) norm = sum(x) norm = pow(norm, 1. / self.power) out = div(x, norm) return out
def __init__(self, mixture_size: int, do_layer_norm: bool = False) -> None: super(Scalar_mix, self).__init__() self.mixture_size = mixture_size self.do_layer_norm = do_layer_norm self.scalar_parameters = ParameterTuple([Parameter(Tensor(np.array([0.0]), mindspore.float32)) \ for _ in range(mixture_size)]) self.gamma = Parameter(Tensor(np.array([0.0]), mindspore.float32)) self.sum = P.ReduceSum() self.sqrt = P.Sqrt() self.cat = P.Concat() self.unsqueeze = P.ExpandDims(0)
def __init__(self, config): super(BertPretrainingLoss, self).__init__() self.vocab_size = config.vocab_size self.onehot = ops.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.reduce_sum = ops.ReduceSum() self.reduce_mean = ops.ReduceMean() self.reshape = ops.Reshape() self.last_idx = (-1,) self.neg = ops.Neg() self.cast = ops.Cast()
def __init__(self, mode='l1'): super(RegLoss, self).__init__() self.reduce_sum = ops.ReduceSum() self.cast = ops.Cast() self.expand_dims = ops.ExpandDims() self.reshape = ops.Reshape() self.gather_feature = TransposeGatherFeature() if mode == 'l1': self.loss = nn.L1Loss(reduction='sum') elif mode == 'sl1': self.loss = nn.SmoothL1Loss() else: self.loss = None
def __init__(self, learning_rate, multi_epochs, steps_per_epoch, factor=10): super(MultiEpochsDecayLR, self).__init__() if not isinstance(multi_epochs, (list, tuple)): raise TypeError("multi_epochs must be list or tuple.") self.multi_epochs = Tensor(np.array(multi_epochs, dtype=np.float32) * steps_per_epoch) self.num = len(multi_epochs) self.start_learning_rate = learning_rate self.steps_per_epoch = steps_per_epoch self.factor = factor self.pow = ops.Pow() self.cast = ops.Cast() self.less_equal = ops.LessEqual() self.reduce_sum = ops.ReduceSum()
def __init__(self, alpha=2, beta=4): super(FocalLoss, self).__init__() self.alpha = alpha self.beta = beta self.pow = ops.Pow() self.log = ops.Log() self.select = ops.Select() self.equal = ops.Equal() self.less = ops.Less() self.cast = ops.Cast() self.fill = ops.Fill() self.dtype = ops.DType() self.shape = ops.Shape() self.reduce_sum = ops.ReduceSum()
def __init__(self, model, config, is_training, dropout_prob=0.0, use_one_hot_embeddings=False): super(BertPoetry, self).__init__(auto_prefix=False) self.num_tokens = 3191 self.poetry = model self.onehot = ops.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.reduce_sum = ops.ReduceSum() self.reduce_mean = ops.ReduceMean() self.reshape = ops.Reshape() self.neg = ops.Neg() self.cast = ops.Cast() self.last_idx = (-1,) self.log = ops.Log() self.max = ops.ArgMaxWithValue(axis=-1)
def __init__(self, reduction='mean'): super(BCEWithLogits, self).__init__() if reduction is None: reduction = 'none' if reduction not in ('mean', 'sum', 'none'): raise ValueError( f"reduction method for {reduction.lower()} is not supported") self.loss = ops.SigmoidCrossEntropyWithLogits() self.reduce = False if reduction == 'sum': self.reduce_mode = ops.ReduceSum() self.reduce = True elif reduction == 'mean': self.reduce_mode = ops.ReduceMean() self.reduce = True
def __init__(self, net_config): super(CenterNetLossCell, self).__init__() self.network = GatherDetectionFeatureCell(net_config) self.net_config = net_config self.reduce_sum = ops.ReduceSum() self.Sigmoid = Sigmoid() self.FocalLoss = FocalLoss() self.crit = nn.MSELoss() if net_config.mse_loss else self.FocalLoss self.crit_reg = RegLoss(net_config.reg_loss) self.crit_wh = RegLoss(net_config.reg_loss) self.num_stacks = net_config.num_stacks self.wh_weight = net_config.wh_weight self.hm_weight = net_config.hm_weight self.off_weight = net_config.off_weight self.reg_offset = net_config.reg_offset self.not_enable_mse_loss = not net_config.mse_loss self.Print = ops.Print()
def __init__(self, log_scale_min=-7.0, reduce=True): super(mix_gaussian_loss, self).__init__() self.log_scale_min = log_scale_min self.reduce = reduce self.transpose_op = P.Transpose() self.maximum = P.Maximum() self.tile = P.Tile() self.exp = P.Exp() self.logsoftmax = P.LogSoftmax(-1) self.expand_dims = P.ExpandDims() self.sums = P.ReduceSum() self.lse = log_sum_exp() self.sq = P.Square() self.sqrt = P.Sqrt() self.const = P.ScalarToArray() self.log = P.Log()
def __init__(self, num_classes=256, log_scale_min=-7.0, reduce=True): super(discretized_mix_logistic_loss, self).__init__() self.num_classes = num_classes self.log_scale_min = log_scale_min self.reduce = reduce self.transpose_op = P.Transpose() self.exp = P.Exp() self.sigmoid = P.Sigmoid() self.softplus = Stable_softplus() self.log = P.Log() self.cast = P.Cast() self.logsoftmax = P.LogSoftmax(-1) self.expand_dims = P.ExpandDims() self.tile = P.Tile() self.maximum = P.Maximum() self.sums = P.ReduceSum() self.lse = log_sum_exp() self.reshape = P.Reshape() self.factor = self.log(Tensor((self.num_classes - 1) / 2, ms.float32))
def __init__(self, log_scale_min=-7.0, reduce=True): super(mix_gaussian_loss, self).__init__() self.log_scale_min = log_scale_min self.reduce = reduce self.transpose_op = P.Transpose() self.maximum = P.Maximum() self.tile = P.Tile() self.exp = P.Exp() self.expand_dims = P.ExpandDims() self.sums = P.ReduceSum() self.lse = log_sum_exp() self.sq = P.Square() self.sqrt = P.Sqrt() self.const = P.ScalarToArray() self.log = P.Log() self.tensor_one = Tensor(1., ms.float32) if context.get_context("device_target") == "CPU": self.logsoftmax = log_softmax() else: self.logsoftmax = P.LogSoftmax(-1)
def __init__(self, network, optimizer, scale_update_cell=None): super(BertPoetryCell, self).__init__(network, optimizer, scale_update_cell) self.network = network self.weights = ParameterTuple(network.trainable_params()) self.optimizer = optimizer self.grad = ops.GradOperation( get_by_list=True, sens_param=True) self.reducer_flag = False self.allreduce = ops.AllReduce() self.parallel_mode = context.get_auto_parallel_context("parallel_mode") if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]: self.reducer_flag = True self.grad_reducer = None if self.reducer_flag: mean = context.get_auto_parallel_context("mirror_mean") degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) self.cast = ops.Cast() self.gpu_target = False if context.get_context("device_target") == "GPU": self.gpu_target = True self.float_status = ops.FloatStatus() self.addn = ops.AddN() self.reshape = ops.Reshape() else: self.alloc_status = ops.NPUAllocFloatStatus() self.get_status = ops.NPUGetFloatStatus() self.clear_before_grad = ops.NPUClearFloatStatus() self.reduce_sum = ops.ReduceSum(keep_dims=False) self.base = Tensor(1, mstype.float32) self.less_equal = ops.LessEqual() self.hyper_map = ops.HyperMap() self.loss_scale = None self.loss_scaling_manager = scale_update_cell if scale_update_cell: self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32), name="loss_scale")
def __init__(self, net_config, K=100, enable_nms_fp16=True): super(MultiPoseDecode, self).__init__() self.K = K self.nms = NMS(enable_nms_fp16=enable_nms_fp16) self.shape = ops.Shape() self.gather_topk = GatherTopK() self.gather_topk_channel = GatherTopKChannel() self.gather_by_ind = GatherFeatureByInd() self.half = ops.Split(axis=-1, output_num=2) self.half_first = ops.Split(axis=0, output_num=2) self.split = ops.Split(axis=-1, output_num=4) self.flip_lr = FlipLR() self.flip_lr_off = FlipLROff() self.flip_tensor = FlipTensor() self.concat = ops.Concat(axis=1) self.concat_a2 = ops.Concat(axis=2) self.concat_a3 = ops.Concat(axis=3) self.trans_gather_feature = TransposeGatherFeature() self.expand_dims = ops.ExpandDims() self.reshape = ops.Reshape() self.add = ops.TensorAdd() self.dtype = ops.DType() self.cast = ops.Cast() self.thresh = 0.1 self.transpose = ops.Transpose() self.perm_list = (0, 2, 1, 3) self.tile = ops.Tile() self.greater = ops.Greater() self.square = ops.Square() self.sqrt = ops.Sqrt() self.reduce_sum = ops.ReduceSum() self.min = ops.ArgMinWithValue(axis=3) self.max = ops.Maximum() self.hm_hp = net_config.hm_hp self.dense_hp = net_config.dense_hp self.reg_offset = net_config.reg_offset self.reg_hp_offset = net_config.reg_hp_offset self.hm_hp_ind = 3 if self.hm_hp else 2 self.reg_ind = self.hm_hp_ind + 1 if self.reg_offset else self.hm_hp_ind self.reg_hp_ind = self.reg_ind + 1 if self.reg_hp_offset else self.reg_ind
def construct(self, img1, img2, label1, label2, modal=0, cpa=False): feat, feat_att, out, out_att = self._backbone(img1, x2=img2, modal=modal, cpa=False) op1 = P.Concat() label = op1((label1, label2)) op2 = P.Cast() label_ = op2(label, ms.int32) loss_id = self._ce_loss(out, label_) # loss_id_att = self._ce_loss(out_att, label_) sum = P.ReduceSum() loss_id = sum(loss_id) / label_.shape[0] # print("loss id is", loss_id) loss_tri = self._tri_loss(feat, label) # loss_tri_att = self._tri_loss(feat_att, label) # print("triplet id is", loss_tri) return loss_id + loss_tri
def global_norm(x): sqrt = P.Sqrt() reduce_sum = P.ReduceSum() l2 = P.L2Normalize() x = sqrt(reduce_sum(P.functional.square(l2(x)))) return x
def __init__(self): super(RegWeightedL1Loss, self).__init__() self.reduce_sum = ops.ReduceSum() self.gather_feature = TransposeGatherFeature() self.cast = ops.Cast() self.l1_loss = nn.L1Loss(reduction='sum')
def __init__(self): super().__init__() self.exp = ops.Exp() self.reduce_sum = ops.ReduceSum(keep_dims=True)
def construct(self, inputs, targets): """ Args: - inputs: feature matrix with shape (batch_size, feat_dim) - targets: ground truth labels with shape (num_classes) """ n = inputs.shape[0] # Compute pairwise distance, replace by the official when merged pow = P.Pow() sum = P.ReduceSum(keep_dims=True) expand = P.BroadcastTo((n, n)) transpose = P.Transpose() mul = P.Mul() add = P.Add() sqrt = P.Sqrt() equal = P.Equal() cat = P.Concat() ones_like = P.OnesLike() dist = pow(inputs, 2) dist = sum(dist, axis=1) dist = expand(dist) dist = dist + transpose(dist, (1, 0)) temp1 = P.matmul(inputs, transpose(inputs, (1, 0))) temp1 = mul(-2, temp1) dist = add(dist, temp1) dist = P.composite.clip_by_value( dist, clip_value_min=1e-12, clip_value_max=100000000 ) # for numerical stability, clip_value_max=? why must set? dist = sqrt(dist) # For each anchor, find the hardest positive and negative targets = expand(targets) mask = equal(targets, transpose(targets, (1, 0))) dist_ap = [] dist_an = [] # only for debugging ##################### # print("dist is") # print(dist.shape) # print(dist) # print("mask is") # print(mask.shape) # print(mask) # print(mask[0]) ##################### for i in range(n): minval = -1.0 maxval = -1.0 for j in range(n): if mask[i][j] and dist[i][j] > maxval: maxval = dist[i][j] if not mask[i][j] and (dist[i][j] < minval or minval == -1): minval = dist[i][j] if (not isinstance(minval, Tensor) or not isinstance(maxval, Tensor) or minval == -1.0 or maxval == -1.0): if self.error_msg is not None: print("Error Msg", file=self.error_msg) print("mask {} is".format(i), file=self.error_msg) print(mask[i], file=self.error_msg) print("dist is:", file=self.error_msg) print(dist[i], file=self.error_msg) print(maxval, file=self.error_msg) print(minval, file=self.error_msg) print(type(maxval), file=self.error_msg) print(type(minval), file=self.error_msg) self.error_msg.flush() # assert minval != -1.0 and isinstance(minval, Tensor) # assert maxval != -1.0 and isinstance(maxval, Tensor) dist_ap.append(maxval.asnumpy()) dist_an.append(minval.asnumpy()) dist_ap = Tensor(dist_ap, ms.float32) dist_an = Tensor(dist_an, ms.float32) # only for debugging ##################### # print(dist_ap) # print(dist_ap.shape) # print(dist_an) ##################### # Compute ranking hinge loss y = ones_like(dist_an) loss = self.ranking_loss(dist_an, dist_ap, y) # # compute accuracy # correct = torch.ge(dist_an, dist_ap).sum().item() return loss # class GradOriTripletLoss(nn.Cell) # def __init__(self, net): # super(GradOriTripletLoss, self).__init__() # self.net = net # self.grad_op = P.GradOperation(get_all=True) # # def construct(self, inputs, targets): # gradient_function = self.grad_op(self.net) # return gradient_function(inputs, targets)