def construct(self, x, seq_lengths):
        """Defines the ReverseSequence operator computation performed."""
        batch_size = x.shape[self.batch_dim]
        max_seq_len = x.shape[self.seq_dim]
        seq_lens_type = seq_lengths.dtype

        back = ops.Sub()(seq_lengths, ops.OnesLike()(seq_lengths))

        batch_idx = self.make_shape((batch_size, max_seq_len), seq_lens_type,
                                    0)
        forward_idx = self.make_shape((batch_size, max_seq_len), seq_lens_type,
                                      1)

        back = back.view(-1, 1)
        reverse_idx = ops.Sub()(back, forward_idx)

        condition = ops.Less()(reverse_idx, ops.ZerosLike()(reverse_idx))
        reverse_idx = ops.Select()(condition, forward_idx, reverse_idx)

        reverse_idx = ops.ExpandDims()(reverse_idx, 2)
        batch_idx = ops.ExpandDims()(batch_idx, 2)

        if self.batch_dim > self.seq_dim:
            batch_idx = ops.Transpose()(batch_idx, (1, 0, 2))
            reverse_idx = ops.Transpose()(reverse_idx, (1, 0, 2))
            x = ops.Transpose()(x, (1, 0, 2))
        start_indices = ops.Concat(2)((batch_idx, reverse_idx))

        output = ops.GatherNd()(x, start_indices)

        return output
Beispiel #2
0
    def __init__(self, length, max_relative_position):
        super(RelaPosMatrixGenerator, self).__init__()
        self._length = length
        self._max_relative_position = Tensor(max_relative_position, dtype=mstype.int32)
        self._min_relative_position = Tensor(-max_relative_position, dtype=mstype.int32)
        self.range_length = -length + 1

        self.tile = ops.Tile()
        self.range_mat = ops.Reshape()
        self.sub = ops.Sub()
        self.expanddims = ops.ExpandDims()
        self.cast = ops.Cast()
Beispiel #3
0
    def __init__(self, length, max_relative_position):
        super(RelaPosMatrixGenerator, self).__init__()
        self._length = length
        self._max_relative_position = max_relative_position
        self._min_relative_position = -max_relative_position
        self.range_length = -length + 1

        self.tile = P.Tile()
        self.range_mat = P.Reshape()
        self.sub = P.Sub()
        self.expanddims = P.ExpandDims()
        self.cast = P.Cast()
Beispiel #4
0
 def __init__(self, sparse=False):
     super(SoftmaxCrossEntropyExpand, self).__init__()
     self.exp = ops.Exp()
     self.sum = ops.ReduceSum(keep_dims=True)
     self.onehot = ops.OneHot()
     self.on_value = Tensor(1.0, mstype.float32)
     self.off_value = Tensor(0.0, mstype.float32)
     self.div = ops.RealDiv()
     self.log = ops.Log()
     self.sum_cross_entropy = ops.ReduceSum(keep_dims=False)
     self.mul = ops.Mul()
     self.mul2 = ops.Mul()
     self.mean = ops.ReduceMean(keep_dims=False)
     self.sparse = sparse
     self.max = ops.ReduceMax(keep_dims=True)
     self.sub = ops.Sub()
Beispiel #5
0
    def ranking_loss(self, input1, input2, y):
        sub = P.Sub()
        mul = P.Mul()
        add = P.Add()

        temp1 = -sub(input1, input2)
        temp2 = mul(temp1, y)
        temp3 = add(temp2, self.margin)
        temp3_mask = np.greater_equal(temp3, 0)

        loss = 0
        for i in range(temp3.shape[0]):
            if temp3_mask[i]:
                loss += temp3[i]

        loss = Tensor(loss / temp3.shape[0])
        # print(loss)
        return loss
Beispiel #6
0
    def __init__(self,
                 batch_size,
                 from_tensor_width,
                 to_tensor_width,
                 from_seq_length,
                 to_seq_length,
                 num_attention_heads=1,
                 size_per_head=512,
                 query_act=None,
                 key_act=None,
                 value_act=None,
                 has_attention_mask=False,
                 attention_probs_dropout_prob=0.0,
                 use_one_hot_embeddings=False,
                 initializer_range=0.02,
                 do_return_2d_tensor=False,
                 use_relative_positions=False,
                 compute_type=mstype.float32):

        super(BertAttention, self).__init__()
        self.batch_size = batch_size
        self.from_seq_length = from_seq_length
        self.to_seq_length = to_seq_length
        self.num_attention_heads = num_attention_heads
        self.size_per_head = size_per_head
        self.has_attention_mask = has_attention_mask
        self.use_relative_positions = use_relative_positions

        self.scores_mul = Tensor([1.0 / math.sqrt(float(self.size_per_head))], dtype=compute_type)
        self.reshape = ops.Reshape()
        self.shape_from_2d = (-1, from_tensor_width)
        self.shape_to_2d = (-1, to_tensor_width)
        weight = TruncatedNormal(initializer_range)
        units = num_attention_heads * size_per_head
        self.query_layer = nn.Dense(from_tensor_width,
                                    units,
                                    activation=query_act,
                                    weight_init=weight).to_float(compute_type)
        self.key_layer = nn.Dense(to_tensor_width,
                                  units,
                                  activation=key_act,
                                  weight_init=weight).to_float(compute_type)
        self.value_layer = nn.Dense(to_tensor_width,
                                    units,
                                    activation=value_act,
                                    weight_init=weight).to_float(compute_type)

        self.shape_from = (batch_size, from_seq_length, num_attention_heads, size_per_head)
        self.shape_to = (
            batch_size, to_seq_length, num_attention_heads, size_per_head)

        self.matmul_trans_b = ops.BatchMatMul(transpose_b=True)
        self.multiply = ops.Mul()
        self.transpose = ops.Transpose()
        self.trans_shape = (0, 2, 1, 3)
        self.trans_shape_relative = (2, 0, 1, 3)
        self.trans_shape_position = (1, 2, 0, 3)
        #self.multiply_data = Tensor([-10000.0,], dtype=compute_type)
        self.multiply_data = Tensor([-10000.0,], dtype=mstype.float32)
        self.batch_num = batch_size * num_attention_heads
        self.matmul = ops.BatchMatMul()

        self.softmax = nn.Softmax()
        self.dropout = nn.Dropout(1 - attention_probs_dropout_prob)

        if self.has_attention_mask:
            self.expand_dims = ops.ExpandDims()
            self.sub = ops.Sub()
            self.add = ops.TensorAdd()
            self.cast = ops.Cast()
            self.get_dtype = ops.DType()
        if do_return_2d_tensor:
            self.shape_return = (batch_size * from_seq_length, num_attention_heads * size_per_head)
        else:
            self.shape_return = (batch_size, from_seq_length, num_attention_heads * size_per_head)

        self.cast_compute_type = SaturateCast(dst_type=compute_type)
        if self.use_relative_positions:
            self._generate_relative_positions_embeddings = \
                RelaPosEmbeddingsGenerator(length=to_seq_length,
                                           depth=size_per_head,
                                           max_relative_position=16,
                                           initializer_range=initializer_range,
                                           use_one_hot_embeddings=use_one_hot_embeddings)