def construct(self, x, seq_lengths): """Defines the ReverseSequence operator computation performed.""" batch_size = x.shape[self.batch_dim] max_seq_len = x.shape[self.seq_dim] seq_lens_type = seq_lengths.dtype back = ops.Sub()(seq_lengths, ops.OnesLike()(seq_lengths)) batch_idx = self.make_shape((batch_size, max_seq_len), seq_lens_type, 0) forward_idx = self.make_shape((batch_size, max_seq_len), seq_lens_type, 1) back = back.view(-1, 1) reverse_idx = ops.Sub()(back, forward_idx) condition = ops.Less()(reverse_idx, ops.ZerosLike()(reverse_idx)) reverse_idx = ops.Select()(condition, forward_idx, reverse_idx) reverse_idx = ops.ExpandDims()(reverse_idx, 2) batch_idx = ops.ExpandDims()(batch_idx, 2) if self.batch_dim > self.seq_dim: batch_idx = ops.Transpose()(batch_idx, (1, 0, 2)) reverse_idx = ops.Transpose()(reverse_idx, (1, 0, 2)) x = ops.Transpose()(x, (1, 0, 2)) start_indices = ops.Concat(2)((batch_idx, reverse_idx)) output = ops.GatherNd()(x, start_indices) return output
def __init__(self, length, max_relative_position): super(RelaPosMatrixGenerator, self).__init__() self._length = length self._max_relative_position = Tensor(max_relative_position, dtype=mstype.int32) self._min_relative_position = Tensor(-max_relative_position, dtype=mstype.int32) self.range_length = -length + 1 self.tile = ops.Tile() self.range_mat = ops.Reshape() self.sub = ops.Sub() self.expanddims = ops.ExpandDims() self.cast = ops.Cast()
def __init__(self, length, max_relative_position): super(RelaPosMatrixGenerator, self).__init__() self._length = length self._max_relative_position = max_relative_position self._min_relative_position = -max_relative_position self.range_length = -length + 1 self.tile = P.Tile() self.range_mat = P.Reshape() self.sub = P.Sub() self.expanddims = P.ExpandDims() self.cast = P.Cast()
def __init__(self, sparse=False): super(SoftmaxCrossEntropyExpand, self).__init__() self.exp = ops.Exp() self.sum = ops.ReduceSum(keep_dims=True) self.onehot = ops.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.div = ops.RealDiv() self.log = ops.Log() self.sum_cross_entropy = ops.ReduceSum(keep_dims=False) self.mul = ops.Mul() self.mul2 = ops.Mul() self.mean = ops.ReduceMean(keep_dims=False) self.sparse = sparse self.max = ops.ReduceMax(keep_dims=True) self.sub = ops.Sub()
def ranking_loss(self, input1, input2, y): sub = P.Sub() mul = P.Mul() add = P.Add() temp1 = -sub(input1, input2) temp2 = mul(temp1, y) temp3 = add(temp2, self.margin) temp3_mask = np.greater_equal(temp3, 0) loss = 0 for i in range(temp3.shape[0]): if temp3_mask[i]: loss += temp3[i] loss = Tensor(loss / temp3.shape[0]) # print(loss) return loss
def __init__(self, batch_size, from_tensor_width, to_tensor_width, from_seq_length, to_seq_length, num_attention_heads=1, size_per_head=512, query_act=None, key_act=None, value_act=None, has_attention_mask=False, attention_probs_dropout_prob=0.0, use_one_hot_embeddings=False, initializer_range=0.02, do_return_2d_tensor=False, use_relative_positions=False, compute_type=mstype.float32): super(BertAttention, self).__init__() self.batch_size = batch_size self.from_seq_length = from_seq_length self.to_seq_length = to_seq_length self.num_attention_heads = num_attention_heads self.size_per_head = size_per_head self.has_attention_mask = has_attention_mask self.use_relative_positions = use_relative_positions self.scores_mul = Tensor([1.0 / math.sqrt(float(self.size_per_head))], dtype=compute_type) self.reshape = ops.Reshape() self.shape_from_2d = (-1, from_tensor_width) self.shape_to_2d = (-1, to_tensor_width) weight = TruncatedNormal(initializer_range) units = num_attention_heads * size_per_head self.query_layer = nn.Dense(from_tensor_width, units, activation=query_act, weight_init=weight).to_float(compute_type) self.key_layer = nn.Dense(to_tensor_width, units, activation=key_act, weight_init=weight).to_float(compute_type) self.value_layer = nn.Dense(to_tensor_width, units, activation=value_act, weight_init=weight).to_float(compute_type) self.shape_from = (batch_size, from_seq_length, num_attention_heads, size_per_head) self.shape_to = ( batch_size, to_seq_length, num_attention_heads, size_per_head) self.matmul_trans_b = ops.BatchMatMul(transpose_b=True) self.multiply = ops.Mul() self.transpose = ops.Transpose() self.trans_shape = (0, 2, 1, 3) self.trans_shape_relative = (2, 0, 1, 3) self.trans_shape_position = (1, 2, 0, 3) #self.multiply_data = Tensor([-10000.0,], dtype=compute_type) self.multiply_data = Tensor([-10000.0,], dtype=mstype.float32) self.batch_num = batch_size * num_attention_heads self.matmul = ops.BatchMatMul() self.softmax = nn.Softmax() self.dropout = nn.Dropout(1 - attention_probs_dropout_prob) if self.has_attention_mask: self.expand_dims = ops.ExpandDims() self.sub = ops.Sub() self.add = ops.TensorAdd() self.cast = ops.Cast() self.get_dtype = ops.DType() if do_return_2d_tensor: self.shape_return = (batch_size * from_seq_length, num_attention_heads * size_per_head) else: self.shape_return = (batch_size, from_seq_length, num_attention_heads * size_per_head) self.cast_compute_type = SaturateCast(dst_type=compute_type) if self.use_relative_positions: self._generate_relative_positions_embeddings = \ RelaPosEmbeddingsGenerator(length=to_seq_length, depth=size_per_head, max_relative_position=16, initializer_range=initializer_range, use_one_hot_embeddings=use_one_hot_embeddings)