def test_generate_square_subsequent_mask(self):
     length = 5
     d_model, n_head, dim_feedforward = 8, 4, 64
     transformer = Transformer(d_model,
                               n_head,
                               dim_feedforward=dim_feedforward)
     mask = transformer.generate_square_subsequent_mask(length)
    def test_transformer(self):
        batch_size, d_model, n_head, dim_feedforward, dropout, _, _, source_length, target_length = generate_basic_params(
            mode="decoder_layer")

        # batch_size, source_length, target_length, d_model, n_head = 4, 8, 8, 64, 8
        with fluid.dygraph.guard(fluid.CPUPlace()):
            transformer = Transformer(d_model,
                                      n_head,
                                      dim_feedforward=dim_feedforward,
                                      dropout=dropout)
            src = paddle.to_variable(
                np.random.rand(batch_size, source_length,
                               d_model).astype("float32"))
            tgt = paddle.to_variable(
                np.random.rand(batch_size, target_length,
                               d_model).astype("float32"))
            src_mask = np.zeros((batch_size, n_head, source_length,
                                 source_length)).astype("float32")
            src_mask[0][0][0][0] = -np.inf
            src_mask = paddle.to_variable(src_mask)
            tgt_mask = np.zeros((batch_size, n_head, target_length,
                                 target_length)).astype("float32")
            tgt_mask[0][0][0][0] = -1e9
            memory_mask = np.zeros((batch_size, n_head, target_length,
                                    source_length)).astype("float32")
            memory_mask[0][0][0][0] = -1e9
            tgt_mask, memory_mask = paddle.to_variable(
                tgt_mask), paddle.to_variable(memory_mask)
            trans_output = transformer(src, tgt, src_mask, tgt_mask,
                                       memory_mask)