def construct(self, x, hx): if self.batch_first: x = self.transpose(x, (1, 0, 2)) h, c = hx if self.is_ascend: _check_input_dtype(F.dtype(x), "x", [mstype.float32, mstype.float16], self.cls_name) _check_input_dtype(F.dtype(h), "h", [mstype.float32, mstype.float16], self.cls_name) _check_input_dtype(F.dtype(c), "c", [mstype.float32, mstype.float16], self.cls_name) x = self.cast(x, mstype.float16) h = self.cast(h, mstype.float16) c = self.cast(c, mstype.float16) if self.bidirectional: x, h, c = self._stacked_bi_dynamic_rnn(x, h, c, self.w_list, self.b_list) else: x, h, c = self._stacked_dynamic_rnn(x, h, c, self.w_list, self.b_list) else: x, h, c, _, _ = self.lstm(x, h, c, self.weight) if self.batch_first: x = self.transpose(x, (1, 0, 2)) return x, (h, c)
def bprop(x, z, out, dout): if mean_flag: if F.issubclass_(F.typeof(dout), mstype.tensor): if do_mirror: z = F.depend(z, F.assign_add(z, dout)) real_grad = all_reduce(z) dx = real_grad else: dx = dout float_one = F.scalar_cast(1.0, F.dtype(dx)) num = F.scalar_cast(dev_num, F.dtype(dx)) dx = mul(dx, cast(F.scalar_to_array(float_one / num), F.dtype(dx))) else: dx = zeros_like( x) # The grad accumulation do not support row tensor now else: if F.issubclass_(F.typeof(dout), mstype.tensor): if do_mirror: z = F.depend(z, F.assign_add(z, dout)) real_grad = all_reduce(z) dx = real_grad else: dx = dout else: dx = zeros_like( x) # The grad accumulation do not support row tensor now return (dx, zeros_like(z))
def _tensors_allreduce_ps(degree, mean, allgather, allreduce, allreduce_filter, grad, ps_parameter): """ Apply allreduce on gradient. Args: degree (int): The mean coefficient. mean (bool): When mean is true, the mean coefficient (degree) would apply on gradients. allgather (Primitive): The communication operator for sparse gradients. allreduce (Primitive): The communication operator for gradients. allreduce_filter (bool): When it is true, allreduce would apply. grad (Tensor): The gradient tensor before operation. ps_parameter (bool): Use parameter server or not. Returns: Tensor, the gradient tensor after operation. """ if ps_parameter: return grad if allreduce_filter: grad = allreduce(grad) if mean: degree = F.scalar_cast(degree, F.dtype(grad)) cast_op = P.Cast() mul_op = P.Mul() grad = mul_op( grad, cast_op(F.scalar_to_array(1.0 / degree), F.dtype(grad))) return grad return grad
def construct(self, img1, img2): _check_input_4d(F.shape(img1), "img1", self.cls_name) _check_input_4d(F.shape(img2), "img2", self.cls_name) _check_input_dtype(F.dtype(img1), 'img1', mstype.number_type, self.cls_name) P.SameTypeShape()(img1, img2) dtype_max_val = _get_dtype_max(F.dtype(img1)) max_val = F.scalar_cast(self.max_val, F.dtype(img1)) max_val = _convert_img_dtype_to_float32(max_val, dtype_max_val) img1 = _convert_img_dtype_to_float32(img1, dtype_max_val) img2 = _convert_img_dtype_to_float32(img2, dtype_max_val) c1 = (self.k1 * max_val)**2 c2 = (self.k2 * max_val)**2 sim = () mcs = () for i in range(self.level): sim, cs = _compute_multi_channel_loss(c1, c2, img1, img2, self.multi_convs_list[i], self.concat, self.reduce_mean) mcs += (self.relu(cs), ) img1, img2 = _downsample(img1, img2, self.avg_pool) mcs = mcs[0:-1:1] mcs_and_ssim = self.pack(mcs + (self.relu(sim), )) mcs_and_ssim = self.pow(mcs_and_ssim, self.weight_tensor) ms_ssim = self.prod(mcs_and_ssim, -1) loss = self.reduce_mean(ms_ssim, -1) return loss
def TensorDot(x1, x2, axes): """ Computation of Tensor contraction on arbitrary axes between tensors `a` and `b`. Contraction allows for the summation of products of elements of `a` and `b` on specified axes. The same number of axes must be specified for both x1 and x2, and values must be within range of number of dims of both `a` and `b`. Selected dims in both inputs must also match. axes = 0 leads to outer product, and axes = 1 leads to normal matrix multiplication. axes = 1 is the same as axes = ((0,),(1,) where length of input shape is 2 for both `a` and `b` axes = 2 is the same as axes = ((0,1),(1,2)) where length of input shape is 3 for both `a` and `b` Inputs: - **x1** (Tensor) - First tensor in TensorDot op with datatype float16 or float32 - **x2** (Tensor) - Second tensor in TensorDot op with datatype float16 or float32 - **axes** (Union[int, tuple(int), tuple(tuple(int)), list(list(int))]) - Single value or tuple/list of length 2 with dimensions specified for `a` and `b` each. If single value `N` passed, automatically picks up first N dims from `a` input shape and last N dims from `b` input shape. Outputs: Tensor, the shape of the output tensor is :math:`(N + M)`. Where :math:`N` and :math:`M` are the free axes not contracted in both inputs Examples: >>> input_x1 = Tensor(np.ones(shape=[1, 2, 3]), mindspore.float32) >>> input_x2 = Tensor(np.ones(shape=[3, 1, 2]), mindspore.float32) >>> output = C.TensorDot(input_x1, input_x2, ((0,1),(1,2))) >>> print(output) [[2,2,2], [2,2,2], [2,2,2]] """ shape_op = P.Shape() reshape_op = P.Reshape() transpose_op = P.Transpose() matmul_op = P.MatMul(False, False) # input validity checks x1_shape = shape_op(x1) x2_shape = shape_op(x2) x1_type = F.dtype(x1) x2_type = F.dtype(x2) axes = _check_axes(axes) _typecheck_input(x1_type, x2_type) # input compability check & axes format update axes = _validate_input(x1_shape, x2_shape, axes) x1_reshape_fwd, x1_transpose_fwd, x1_ret = _calc_new_shape( x1_shape, axes, 0) x2_reshape_fwd, x2_transpose_fwd, x2_ret = _calc_new_shape( x2_shape, axes, 1) output_shape = x1_ret + x2_ret # combine free axes from both inputs # run TensorDot op x1_transposed = transpose_op(x1, x1_transpose_fwd) x2_transposed = transpose_op(x2, x2_transpose_fwd) x1_reshaped = reshape_op(x1_transposed, x1_reshape_fwd) x2_reshaped = reshape_op(x2_transposed, x2_reshape_fwd) mul_result = matmul_op(x1_reshaped, x2_reshaped) final_result = reshape_op(mul_result, output_shape) return final_result
def _tensors_allreduce_with_sparse_ps(degree, mean, allgather, allreduce, allreduce_filter, grad, ps_parameter): """ Apply allgather on gradient instead of allreduce for sparse feature. Allgather is a communication operation used for distributed deep learning. Args: degree (int): The mean coefficient. mean (bool): When mean is true, the mean coefficient (degree) would apply on gradients. allgather (Primitive): The communication operator for sparse gradients. allreduce (Primitive): The communication operator for gradients. allreduce_filter (bool): When it is true, allgather would apply. grad (tuple): The indices, gradient tensor and tensor_shape before operation. ps_parameter (bool): Use parameter server or not. Returns: RowTensor, the gradient after operation. """ if ps_parameter: return grad if allreduce_filter: indices = allgather(grad.indices) dout = allgather(grad.values) if mean: degree = F.scalar_cast(degree, F.dtype(grad.values)) cast_op = P.Cast() mul_op = P.Mul() dout = mul_op( dout, cast_op(F.scalar_to_array(1.0 / degree), F.dtype(dout))) grad = RowTensor(indices, dout, grad.dense_shape) return grad
def _attn(self, query, key, value, attention_mask): """ Get the weighted score along the seq_length Inputs: query: the query matrix key: the key matrix value: the value matrix attention_mask: the attention mask matrix with shape (batch_size, 1, seq_length, seq_length) Returns: weighted_values: Tensor, the weighted sum scores """ if not self.scale: query = query / F.cast(self.coeff, F.dtype(query)) key = key / F.cast(self.coeff, F.dtype(key)) score = self.batch_matmul(query, key) if self.scale: score = score / P.Cast()(self.scale_factor, P.DType()(score)) ori_dtype = P.DType()(score) score = P.Cast()(score, mstype.float32) multiplu_out = P.Sub()(P.Cast()(F.tuple_to_array((1.0,)), P.DType()(score)), P.Cast()(attention_mask, P.DType()(score))) adder = P.Mul()(multiplu_out, self.multiply_data) attention_scores = adder + score attention_scores = P.Cast()(attention_scores, ori_dtype) attention_probs = Softmax()(attention_scores) attention_probs = self.prob_dropout(attention_probs) weighted_values = self.batch_matmul(attention_probs, value) return weighted_values
def _tensors_allreduce_with_sparse(degree, mean, allgather, allreduce_filter, grad, allreduce): """ Apply allgather on gradient instead of allreduce for sparse feature. Allgather is a communication operation used for distributed deep learning. Args: degree (int): The mean coefficient. mean (bool): When mean is true, the mean coefficient (degree) would apply on gradients. allgather (Primitive): The communication operator for sparse gradients. allreduce_filter (bool): When it is true, allgather would apply. grad (IndexedSlices): The gradient before operation. allreduce (Primitive): The communication operator for gradients. Returns: IndexedSlices, the gradient after operation. """ if allreduce_filter: indices = allgather(grad.indices()) dout = allgather(grad.values()) if mean: degree = F.scalar_cast(degree, F.dtype(grad.values())) cast_op = P.Cast() mul_op = P.Mul() dout = mul_op( dout, cast_op(F.scalar_to_array(1.0 / degree), F.dtype(dout))) grad = IndexedSlices(indices, dout, grad.dense_shape()) return grad
def _update_run_op(beta1, beta2, eps, lr, weight_decay, param, m, v, gradient, decay_flag, optim_filter): """ Update parameters. Args: beta1 (Tensor): The exponential decay rate for the 1st moment estimations. Should be in range (0.0, 1.0). beta2 (Tensor): The exponential decay rate for the 2nd moment estimations. Should be in range (0.0, 1.0). eps (Tensor): Term added to the denominator to improve numerical stability. Should be greater than 0. lr (Tensor): Learning rate. weight_decay (Number): Weight decay. Should be equal to or greater than 0. param (Tensor): Parameters. m (Tensor): m value of parameters. v (Tensor): v value of parameters. gradient (Tensor): Gradient of parameters. decay_flag (bool): Applies weight decay or not. optim_filter (bool): Applies parameter update or not. Returns: Tensor, the new value of v after updating. """ if optim_filter: op_mul = P.Mul() op_square = P.Square() op_sqrt = P.Sqrt() op_cast = P.Cast() op_reshape = P.Reshape() op_shape = P.Shape() param_fp32 = op_cast(param, mstype.float32) m_fp32 = op_cast(m, mstype.float32) v_fp32 = op_cast(v, mstype.float32) gradient_fp32 = op_cast(gradient, mstype.float32) next_m = op_mul(beta1, m_fp32) + op_mul( op_cast(F.tuple_to_array( (1.0, )), mstype.float32) - beta1, gradient_fp32) next_v = op_mul(beta2, v_fp32) + op_mul( op_cast(F.tuple_to_array( (1.0, )), mstype.float32) - beta2, op_square(gradient_fp32)) update = next_m / (eps + op_sqrt(next_v)) if decay_flag: update = op_mul(weight_decay, param_fp32) + update update_with_lr = op_mul(lr, update) next_param = param_fp32 - op_reshape(update_with_lr, op_shape(param_fp32)) next_param = F.depend( next_param, F.assign(param, op_cast(next_param, F.dtype(param)))) next_param = F.depend(next_param, F.assign(m, op_cast(next_m, F.dtype(m)))) next_param = F.depend(next_param, F.assign(v, op_cast(next_v, F.dtype(v)))) return op_cast(next_param, F.dtype(param)) return gradient
def construct(self, input_indices, input_values, field_ids): _check_input_2d(F.shape(input_indices), "input_indices", self.cls_name) _check_input_2d(F.shape(input_values), "input_values", self.cls_name) _check_input_2d(F.shape(field_ids), "field_ids", self.cls_name) _check_input_dtype(F.dtype(input_indices), "input_indices", [mstype.int32, mstype.int64], self.cls_name) _check_input_dtype(F.dtype(input_values), "input_values", [mstype.float32], self.cls_name) _check_input_dtype(F.dtype(field_ids), "field_ids", [mstype.int32], self.cls_name) batch_size = self.shape(input_indices)[0] num_segments = batch_size * self.field_size bias = Range(0, num_segments, self.field_size)() bias = self.reshape(bias, (batch_size, -1)) field_ids = self.bias_add(field_ids, bias) if self.target == "CPU": out = self.embeddinglookup(self.embedding_table, input_indices, 0) else: if self.forward_unique: shp = self.shape(input_indices) + (self.embedding_size,) indices_flatten = self.reshape(input_indices, (-1,)) unique_id, unique_idx = self.unique(indices_flatten) weight_unique = self.gatherv2(self.embedding_table, unique_id, 0) weight_flatten = self.gather_revert(weight_unique, unique_idx, 0) out = self.reshape(weight_flatten, shp) else: out = self.gatherv2(self.embedding_table, input_indices, 0) if self.max_norm is not None: axis = _make_axis_range(F.rank(input_indices), F.rank(out)) clip_by_norm = ClipByNorm(axis) out = clip_by_norm(out, self.max_norm) weights = self.reshape(input_values, (batch_size, self.shape(input_indices)[1], 1)) embedding = self.mul(weights, out) if self.operator == 'MAX': # Fill the padding value to -inf, so the padded value will not influence the results negative_inf_mask = self.cast(self.equal(weights, 0), mstype.float32) inf_mask = self.inf_mask_mul(negative_inf_mask, self.negative_inf_value) embedding = self.inf_add(embedding, inf_mask) embedding = self.reshape(embedding, (-1, self.embedding_size)) field_ids = self.reshape(field_ids, (-1,)) merged_vectors = self.merge_op(embedding, field_ids, num_segments) if self.operator == 'MAX': value_count = self.count_op(self.abs(self.reshape(input_values, (-1,))), field_ids, num_segments) value_zeros = self.cast(self.max_no_equal(value_count, 0.0), mstype.float32) count = self.expand(value_zeros, -1) merged_vectors = self.max_mask_mul(merged_vectors, count) if self.operator == 'MEAN': value_count = self.count_op(self.abs(input_values), field_ids, num_segments) value_count = self.expand(value_count, -1) merged_vectors = self.div_no_nan(merged_vectors, value_count) merged_vectors = self.reshape(merged_vectors, (batch_size, self.field_size, -1)) return merged_vectors
def bprop(x, out, dout): if mean_flag: dx = all_reduce(dout) float_one = F.scalar_cast(1.0, F.dtype(dx)) num = F.scalar_cast(dev_num, F.dtype(dx)) dx = mul(dx, cast(F.scalar_to_array(float_one / num), F.dtype(dx))) else: dx = all_reduce(dout) return (dx, )
def construct(self, img1, img2): _check_input_4d(F.shape(img1), "img1", self.cls_name) _check_input_4d(F.shape(img2), "img2", self.cls_name) P.SameTypeShape()(img1, img2) dtype_max_val = _get_dtype_max(F.dtype(img1)) max_val = F.scalar_cast(self.max_val, F.dtype(img1)) max_val = _convert_img_dtype_to_float32(max_val, dtype_max_val) img1 = _convert_img_dtype_to_float32(img1, dtype_max_val) img2 = _convert_img_dtype_to_float32(img2, dtype_max_val) mse = P.ReduceMean()(F.square(img1 - img2), (-3, -2, -1)) psnr = 10 * P.Log()(F.square(max_val) / mse) / F.scalar_log(10.0) return psnr
def dot(x1, x2): """ Computation a dot product between samples in two tensors. Inputs: - **x1** (Tensor) - First tensor in Dot op with datatype float16 or float32 - **x2** (Tensor) - Second tensor in Dot op with datatype float16 or float32 Outputs: Tensor, dot product of x1 and x2. Raises: TypeError: If type of x1 and x2 are not the same. TypeError: If dtype of x1 or x2 is not float16 or float32. ValueError: If rank of x1 or x2 less than 2. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> input_x1 = Tensor(np.ones(shape=[2, 3]), mindspore.float32) >>> input_x2 = Tensor(np.ones(shape=[1, 3, 2]), mindspore.float32) >>> output = C.dot(input_x1, input_x2) >>> print(output) [[[3. 3.]] [[3. 3.]]] """ shape_op = P.Shape() reshape_op = P.Reshape() transpose_op = P.Transpose() matmul_op = P.MatMul(False, False) x1_shape = shape_op(x1) x2_shape = shape_op(x2) x1_type = F.dtype(x1) x2_type = F.dtype(x2) _typecheck_input_dot(x1_type, x2_type) _check_invalid_input(x1_shape, x2_shape) if len(x1_shape) > 2 or len(x2_shape) > 2: x2_shape_transpose = _get_transpose_shape(x2_shape) x2_transpose = transpose_op(x2, x2_shape_transpose) x1_reshape = reshape_op(x1, (-1, x1_shape[-1])) x2_reshape = reshape_op(x2_transpose, (x2_shape[-2], -1)) mul_result = matmul_op(x1_reshape, x2_reshape) return reshape_op(mul_result, x1_shape[:-1] + x2_shape[:-2] + x2_shape[-1:]) return matmul_op(x1, x2)
def construct(self, img1, img2): _check_input_dtype(F.dtype(img1), "img1", [mstype.float32, mstype.float16], self.cls_name) _check_input_filter_size(F.shape(img1), "img1", self.filter_size, self.cls_name) P.SameTypeShape()(img1, img2) dtype_max_val = _get_dtype_max(F.dtype(img1)) max_val = F.scalar_cast(self.max_val, F.dtype(img1)) max_val = _convert_img_dtype_to_float32(max_val, dtype_max_val) img1 = _convert_img_dtype_to_float32(img1, dtype_max_val) img2 = _convert_img_dtype_to_float32(img2, dtype_max_val) c1 = (self.k1 * max_val) ** 2 c2 = (self.k2 * max_val) ** 2 ssim_ave_channel, _ = _compute_multi_channel_loss(c1, c2, img1, img2, self.conv, self.concat, self.reduce_mean) loss = self.reduce_mean(ssim_ave_channel, -1) return loss
def _tensor_apply_decay_with_sparse(weight_decay, if_apply, weight, gradient): """Get grad with weight_decay.""" if if_apply: indices = gradient.indices values = op_add((op_gather(weight, indices, 0) * F.cast(weight_decay, F.dtype(weight)), gradient.values)) shape = gradient.dense_shape return RowTensor(indices, values, shape) return gradient
def _tensors_allreduce_mean(mul, degree, allreduce, parameters): """ Apply allreduce on parameters. Args: mul(Primitive): The mul operator for parameters. degree (int): The mean coefficient. allreduce (Primitive): The communication operator for parameters. parameters (Tensor): The parameters before operation. Returns: Tensor, the parameters after operation. """ degree = F.scalar_cast(degree, F.dtype(parameters)) parameters = allreduce(parameters) cast_op = P.Cast() return mul(parameters, cast_op(F.scalar_to_array(1.0 / degree), F.dtype(parameters)))
def sequence_mask(lengths, maxlen=None): """ Returns a mask tensor representing the first N positions of each cell. If lengths has shape [d_1, d_2, ..., d_n], then the resulting tensor mask has type dtype and shape [d_1, d_2, ..., d_n, maxlen], with mask[i_1, i_2, ..., i_n, j] = (j < lengths[i_1, i_2, ..., i_n]) Inputs: - **lengths** (Tensor) - Tensor to calculate the mask for. All values in this tensor should be less than or equal to `maxlen`. Values greater than `maxlen` will be treated as `maxlen`. Must be type int32 or int64. - **maxlen** (int) - size of the last dimension of returned tensor. Must be positive and same type as elements in `lengths`. Outputs: One mask tensor of shape lengths.shape + (maxlen,). Supported Platforms: ``GPU`` Examples: >>> x = Tensor(np.array([[1, 3], [2, 0]])) >>> output = C.sequence_mask(x, 3) >>> print(output) [[[True, False, False], [True, True, True]], [[True, True, False], [False, False, False]]] """ argmax_op = P.ArgMaxWithValue() reshape_op = P.Reshape() range_op = P.Range() expand_op = P.ExpandDims() cast_op = P.Cast() shape_op = P.Shape() to_tensor_op = P.ScalarToArray() const_utils.check_type_valid(F.dtype(lengths), [mstype.int64, mstype.int32], 'lengths') _check_sequence_mask_input_len(shape_op(lengths)) if maxlen is None: flatten_data = reshape_op(lengths, (-1, )) flatten_data = cast_op(flatten_data, mstype.float32) _, value = argmax_op(flatten_data) maxlen = cast_op(value, mstype.int32) else: maxlen = _check_positive_int(maxlen, "maxlen", "sequence_mask") maxlen = to_tensor_op(maxlen) range_vector = range_op(to_tensor_op(0), maxlen, to_tensor_op(1)) mask = expand_op(lengths, -1) result = range_vector < mask return result
def _tensors_allreduce_mean(mul, degree, allreduce_filter, grad): """ Apply mean and allreduce on gradient. Allreduce is a communication operation used for distributed deep learning. Args: mul (Primitive): Div operation. degree (int): The mean coefficient. allreduce_filter (bool): When it is true, allreduce would apply. grad (Tensor): The gradient tensor before operation. Returns: Tensor, the gradient tensor after operation. """ if allreduce_filter: degree = F.scalar_cast(degree, F.dtype(grad)) grad = _all_reduce(grad) cast_op = P.Cast() return mul(grad, cast_op(F.scalar_to_array(1.0/degree), F.dtype(grad))) return grad
def _tensors_get_datatype(grad): """ Acquire gradient datatype. Args: grad (Tensor): The gradient tensor before operation. Returns: mstype, the datatype of gradient. """ return F.dtype(grad)
def _tensors_get_datatype_with_sparse(grad): """ Acquire gradient datatype. Args: grad (RowTensor): The gradient before operation. Returns: mstype, the datatype of gradient. """ return F.dtype(grad.values)
def _tensors_get_datatype(parameters): """ Acquire parameters datatype. Args: parameters (Tensor): The parameters before operation. Returns: mstype, the datatype of parameters. """ return F.dtype(parameters)
def _clip_grad(clip_type, clip_value, grad): dt = F.dtype(grad) if clip_type == 0: new_grad = C.clip_by_value( grad, F.cast(F.tuple_to_array((-clip_value, )), dt), F.cast(F.tuple_to_array((clip_value, )), dt)) else: new_grad = nn.ClipByNorm()(grad, F.cast(F.tuple_to_array((clip_value, )), dt)) return new_grad
def construct(self, x1, x2, y): F.same_type_shape(x1, x2) _check_reduced_shape_valid(F.shape(x1), F.shape(y), (1,), self.cls_name) # if target > 0, 1-cosine(x1, x2) # else, max(0, cosine(x1, x2)-margin) np_eps = const_utils.get_np_eps(F.dtype(x1)) eps = F.cast(np_eps, F.dtype(x1)) prod_sum = self.reduce_sum(x1 * x2, (1,)) square1 = self.reduce_sum(F.square(x1), (1,)) + eps square2 = self.reduce_sum(F.square(x2), (1,)) + eps denom = F.sqrt(square1 * square2) cosine = prod_sum / denom pos_value = 1.0 - cosine neg_value = self.maximum(cosine - self.margin, 0.0) zeros = F.zeros_like(cosine) pos_part = F.select(y == 1, pos_value, zeros) neg_part = F.select(y == -1, neg_value, zeros) output_unreduced = pos_part + neg_part return self.get_loss(output_unreduced)
def bprop(x, out, dout): if mean_flag: if F.issubclass_(F.typeof(dout), mstype.tensor): dx = all_reduce(dout) float_one = F.scalar_cast(1.0, F.dtype(dx)) num = F.scalar_cast(dev_num, F.dtype(dx)) dx = mul(dx, cast(F.scalar_to_array(float_one / num), F.dtype(dx))) else: indices = all_gather(dout.indices) grad = all_gather(dout.values) float_one = F.scalar_cast(1.0, F.dtype(grad)) num = F.scalar_cast(dev_num, F.dtype(grad)) grad = mul( grad, cast(F.scalar_to_array(float_one / num), F.dtype(grad))) dx = RowTensor(indices, grad, dout.dense_shape) else: if F.issubclass_(F.typeof(dout), mstype.tensor): dx = all_reduce(dout) else: indices = all_gather(dout.indices) grad = all_gather(dout.values) dx = RowTensor(indices, grad, dout.dense_shape) return (dx, )
def bprop(x, out, dout): if F.issubclass_(F.typeof(dout), mstype.tensor): if F.issubclass_(F.dtype(dout), mstype.bool_) or F.issubclass_(F.dtype(dout), mstype.int32) \ or F.issubclass_(F.dtype(dout), mstype.int16): return (dout,) dx = op(dout, cast(F.scalar_to_array(divisor), dtype(dout))) return (dx,) if F.issubclass_(F.typeof(dout), mstype.tuple_): dx = () input_nums = F.tuple_len(dout) for i in range(input_nums): ele_grad = op(dout[i], cast(F.scalar_to_array(divisor), dtype(dout[i]))) dx = dx + (ele_grad,) return (dx,) dx = [] input_nums = F.list_len(dout) for i in range(input_nums): ele_grad = op(dout[i], cast(F.scalar_to_array(divisor), dtype(dout[i]))) dx.append(ele_grad) return (dx,)
def _tensors_allreduce_mean_with_sparse(mul, degree, allreduce_filter, grad): """ Apply mean and allgather on gradient instead of allreduce for sparse feature. Allgather is a communication operation used for distributed deep learning. Args: mul (Primitive): Div operation. degree (int): The mean coefficient. allreduce_filter (bool): When it is true, allgather would apply. grad (Tuple): The indices, gradient tensor and tensor_shape before operation. Returns: Tuple, include indices, the gradient tensor and tensor_shape after operation. """ if allreduce_filter: indices = _all_gather(grad[0]) degree = F.scalar_cast(degree, F.dtype(grad[1])) dout = _all_gather(grad[1]) cast_op = P.Cast() dout = mul(dout, cast_op(F.scalar_to_array(1.0 / degree), F.dtype(dout))) grad = (indices, dout, grad[2]) return grad
def repeat_elements(x, rep, axis=0): """ Repeat elements of a tensor along an axis, like np.repeat. Args: x (Tensor): The tensor to repeat values for. Must be of type: float16, float32, int8, uint8, int16, int32, or int64. rep (int): The number of times to repeat, must be positive, required. axis (int): The axis along which to repeat, default 0. Outputs: One tensor with values repeated along the specified axis. If x has shape (s1, s2, ..., sn) and axis is i, the output will have shape (s1, s2, ..., si * rep, ..., sn). The output type will be the same as the type of `x`. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> x = Tensor(np.array([[0, 1, 2], [3, 4, 5]]), mindspore.int32) >>> output = C.repeat_elements(x, rep = 2, axis = 0) >>> print(output) [[0 1 2] [0 1 2] [3 4 5] [3 4 5]] """ const_utils.check_type_valid(F.dtype(x), mstype.number_type, 'input x') rep = _check_positive_int(rep, "rep", "repeat_elements") axis = _check_is_int(axis, "axis", "repeat_elements") shape_op = P.Shape() rank_op = P.Rank() tile_op = P.Tile() expand_dims_op = P.ExpandDims() reshape_op = P.Reshape() x_rank = rank_op(x) axis = _check_axis_range(axis, x_rank, "axis", "repeat_elements") expand_axis = axis + 1 x_expand = expand_dims_op(x, expand_axis) rep_dims = _cal_repeat_dims(x_rank, rep, expand_axis) x_expand = tile_op(x_expand, rep_dims) x_shape = shape_op(x) x_reshape = _cal_reshape(x_shape, rep, axis) x_rep = reshape_op(x_expand, x_reshape) return x_rep
def construct(self, x): tensor_dtype = F.dtype(x) _check_input_dtype("input x", tensor_dtype, [mstype.float16, mstype.float32], self.cls_name) if tensor_dtype == mstype.float16: x = self.cast(x, mstype.float32) mean = self.reduce_mean(x, self.axis) variance = self.reduce_mean(self.square_diff(x, F.stop_gradient(mean)), self.axis) if not self.keep_dims: mean = self.squeeze(mean) variance = self.squeeze(variance) if tensor_dtype == mstype.float16: mean = self.cast(mean, mstype.float16) variance = self.cast(variance, mstype.float16) return mean, variance return mean, variance
def _tensors_allreduce(degree, mean, allgather, allreduce, allreduce_filter, grad): """ Apply allreduce on gradient. Args: degree (int): The mean coefficient. mean (bool): When mean is true, the mean coefficient (degree) would apply on gradients. allgather (Primitive): The communication operator for sparse gradients. allreduce (Primitive): The communication operator for gradients. allreduce_filter (bool): When it is true, allreduce would apply. grad (Tensor): The gradient tensor before operation. Returns: Tensor, the gradient tensor after operation. """ if allreduce_filter: grad = allreduce(grad) if mean: degree = F.scalar_cast(degree, F.dtype(grad)) grad = F.tensor_mul( grad, F.cast(F.scalar_to_array(1.0 / degree), F.dtype(grad))) return grad return grad
def construct(self, img1, img2): _check_input_dtype(F.dtype(img1), "img1", [mstype.float32, mstype.float16], self.cls_name) _check_input_filter_size(F.shape(img1), "img1", self.filter_size, self.cls_name) P.SameTypeShape()(img1, img2) max_val = _convert_img_dtype_to_float32(self.max_val, self.max_val) img1 = _convert_img_dtype_to_float32(img1, self.max_val) img2 = _convert_img_dtype_to_float32(img2, self.max_val) kernel = self._fspecial_gauss(self.filter_size, self.filter_sigma) kernel = P.Tile()(kernel, (1, P.Shape()(img1)[1], 1, 1)) mean_ssim = self._calculate_mean_ssim(img1, img2, kernel, max_val, self.k1, self.k2) return mean_ssim