Example #1
0
    def __init__(self,
                 length,
                 depth,
                 max_relative_position,
                 initializer_range,
                 use_one_hot_embeddings=False):
        super(RelaPosEmbeddingsGenerator, self).__init__()
        self.depth = depth
        self.vocab_size = max_relative_position * 2 + 1
        self.use_one_hot_embeddings = use_one_hot_embeddings

        self.embeddings_table = Parameter(
            initializer(TruncatedNormal(initializer_range),
                        [self.vocab_size, self.depth]),
            name='embeddings_for_position')

        self.relative_positions_matrix = RelaPosMatrixGenerator(length=length,
                                                                max_relative_position=max_relative_position)
        self.reshape = ops.Reshape()
        self.one_hot = ops.OneHot()
        self.on_value = Tensor(1.0, mstype.float32)
        self.off_value = Tensor(0.0, mstype.float32)
        self.shape = ops.Shape()
        self.gather = ops.GatherV2()  # index_select
        self.matmul = ops.BatchMatMul()
Example #2
0
    def construct(self, s_t_hat, encoder_outputs, encoder_feature,
                  enc_padding_mask, coverage):
        b, t_k, n = encoder_outputs.shape

        dec_fea = self.decode_proj(s_t_hat)  # (B, 2 * hidden_dim)
        dec_fea_expand = P.ExpandDims()(dec_fea, 1)
        dec_fea_expand = P.BroadcastTo()(dec_fea_expand, (b, t_k, n))

        att_features = encoder_feature + dec_fea_expand
        if self.is_coverage:
            coverage_input = coverage.view(-1, 1)  # (B * t_k, 1)
            coverage_feature = self.W_c(
                coverage_input)  # (B * t_k, 2 * hidden_dim)
            att_features = att_features + coverage_feature

        e = P.Tanh()(att_features)  # (B * t_k, 2 * hidden_dim)
        scores = self.v(e)  # (B * t_k, 1)
        scores = scores.view(-1, t_k)  # (B, t_k)

        attn_dist_ = P.Softmax(1)(scores) * enc_padding_mask  # (B, t_k)
        normalization_factor = P.ReduceSum(True)(attn_dist_, 1)
        attn_dist = attn_dist_ / normalization_factor

        attn_dist = P.ExpandDims()(attn_dist, 1)  # (B, 1, t_k)
        c_t = P.BatchMatMul(attn_dist, encoder_outputs)  # (B, 1, n)
        c_t = c_t.view(-1, self.hidden_dim * 2)  # (B, 2 * hidden_dim)

        attn_dist = attn_dist.view(-1, t_k)

        if self.is_coverage:
            coverage = coverage.view(-1, t_k)
            coverage = coverage + attn_dist

        return c_t, attn_dist, coverage
Example #3
0
    def __init__(self, config):
        super(CreateAttentionMaskFromInputMask, self).__init__()
        self.input_mask_from_dataset = config.input_mask_from_dataset
        self.input_mask = None

        if not self.input_mask_from_dataset:
            self.input_mask = initializer(
                "ones", [config.batch_size, config.seq_length], mstype.int32).to_tensor()

        self.cast = ops.Cast()
        self.reshape = ops.Reshape()
        self.shape = (config.batch_size, 1, config.seq_length)
        self.broadcast_ones = initializer(
            "ones", [config.batch_size, config.seq_length, 1], mstype.float32).to_tensor()
        self.batch_matmul = ops.BatchMatMul()
Example #4
0
 def __init__(self, config, is_training, num_tokens, dropout_prob=0.0, use_one_hot_embeddings=False):
     super(BertPoetryModel, self).__init__()
     self.bert = BertModel(config, is_training, use_one_hot_embeddings)
     self.num_tokens = num_tokens
     idx = np.arange(config.seq_length)
     mask = idx[None, :] <= idx[:, None]
     self.mask = Tensor([mask], mstype.float32)
     self.MLM_Dense = nn.Dense(config.hidden_size, config.hidden_size,\
                             has_bias=True, weight_init=TruncatedNormal(0.02),\
                             activation='gelu').to_float(mstype.float16)
     self.layer_norm = nn.LayerNorm((config.hidden_size,))
     self.matmul = ops.MatMul(transpose_b=True)
     self.biasadd = Parameter(initializer('zero', self.num_tokens), name='MLM_output_biasadd')
     self.softmax = ops.Softmax(axis=-1)
     self.seq_length = config.seq_length
     self.hidden_size = config.hidden_size
     self.cast = ops.Cast()
     self.reshape = ops.Reshape()
     self.batch_matmul = ops.BatchMatMul()
     ones = np.ones(shape=(config.batch_size, config.seq_length, config.seq_length))
     self.lower_triangle_mask = Tensor(np.tril(ones), dtype=mstype.float32)
     self.multiply = ops.Mul()
Example #5
0
    def __init__(self,
                 length,
                 depth,
                 max_relative_position,
                 initializer_range,
                 use_one_hot_embeddings=False):
        super(RelaPosEmbeddingsGenerator, self).__init__()
        self.depth = depth
        self.vocab_size = max_relative_position * 2 + 1
        self.use_one_hot_embeddings = use_one_hot_embeddings

        self.embeddings_table = Parameter(
            initializer(TruncatedNormal(initializer_range),
                        [self.vocab_size, self.depth]))

        self.relative_positions_matrix = RelaPosMatrixGenerator(length=length,
                                                                max_relative_position=max_relative_position)
        self.reshape = P.Reshape()
        self.one_hot = nn.OneHot(depth=self.vocab_size)
        self.shape = P.Shape()
        self.gather = P.Gather()  # index_select
        self.matmul = P.BatchMatMul()
Example #6
0
    def __init__(self,
                 batch_size,
                 from_tensor_width,
                 to_tensor_width,
                 from_seq_length,
                 to_seq_length,
                 num_attention_heads=1,
                 size_per_head=512,
                 query_act=None,
                 key_act=None,
                 value_act=None,
                 has_attention_mask=False,
                 attention_probs_dropout_prob=0.0,
                 use_one_hot_embeddings=False,
                 initializer_range=0.02,
                 do_return_2d_tensor=False,
                 use_relative_positions=False,
                 compute_type=mstype.float32):

        super(BertAttention, self).__init__()
        self.batch_size = batch_size
        self.from_seq_length = from_seq_length
        self.to_seq_length = to_seq_length
        self.num_attention_heads = num_attention_heads
        self.size_per_head = size_per_head
        self.has_attention_mask = has_attention_mask
        self.use_relative_positions = use_relative_positions

        self.scores_mul = Tensor([1.0 / math.sqrt(float(self.size_per_head))], dtype=compute_type)
        self.reshape = ops.Reshape()
        self.shape_from_2d = (-1, from_tensor_width)
        self.shape_to_2d = (-1, to_tensor_width)
        weight = TruncatedNormal(initializer_range)
        units = num_attention_heads * size_per_head
        self.query_layer = nn.Dense(from_tensor_width,
                                    units,
                                    activation=query_act,
                                    weight_init=weight).to_float(compute_type)
        self.key_layer = nn.Dense(to_tensor_width,
                                  units,
                                  activation=key_act,
                                  weight_init=weight).to_float(compute_type)
        self.value_layer = nn.Dense(to_tensor_width,
                                    units,
                                    activation=value_act,
                                    weight_init=weight).to_float(compute_type)

        self.shape_from = (batch_size, from_seq_length, num_attention_heads, size_per_head)
        self.shape_to = (
            batch_size, to_seq_length, num_attention_heads, size_per_head)

        self.matmul_trans_b = ops.BatchMatMul(transpose_b=True)
        self.multiply = ops.Mul()
        self.transpose = ops.Transpose()
        self.trans_shape = (0, 2, 1, 3)
        self.trans_shape_relative = (2, 0, 1, 3)
        self.trans_shape_position = (1, 2, 0, 3)
        #self.multiply_data = Tensor([-10000.0,], dtype=compute_type)
        self.multiply_data = Tensor([-10000.0,], dtype=mstype.float32)
        self.batch_num = batch_size * num_attention_heads
        self.matmul = ops.BatchMatMul()

        self.softmax = nn.Softmax()
        self.dropout = nn.Dropout(1 - attention_probs_dropout_prob)

        if self.has_attention_mask:
            self.expand_dims = ops.ExpandDims()
            self.sub = ops.Sub()
            self.add = ops.TensorAdd()
            self.cast = ops.Cast()
            self.get_dtype = ops.DType()
        if do_return_2d_tensor:
            self.shape_return = (batch_size * from_seq_length, num_attention_heads * size_per_head)
        else:
            self.shape_return = (batch_size, from_seq_length, num_attention_heads * size_per_head)

        self.cast_compute_type = SaturateCast(dst_type=compute_type)
        if self.use_relative_positions:
            self._generate_relative_positions_embeddings = \
                RelaPosEmbeddingsGenerator(length=to_seq_length,
                                           depth=size_per_head,
                                           max_relative_position=16,
                                           initializer_range=initializer_range,
                                           use_one_hot_embeddings=use_one_hot_embeddings)
Example #7
0
 def __init__(self):
     super(ClickPredictor, self).__init__()
     self.matmul = ops.BatchMatMul()