def test_generate_square_subsequent_mask(self): length = 5 d_model, n_head, dim_feedforward = 8, 4, 64 transformer = Transformer(d_model, n_head, dim_feedforward=dim_feedforward) mask = transformer.generate_square_subsequent_mask(length)
def test_transformer(self): batch_size, d_model, n_head, dim_feedforward, dropout, _, _, source_length, target_length = generate_basic_params( mode="decoder_layer") # batch_size, source_length, target_length, d_model, n_head = 4, 8, 8, 64, 8 with fluid.dygraph.guard(fluid.CPUPlace()): transformer = Transformer(d_model, n_head, dim_feedforward=dim_feedforward, dropout=dropout) src = paddle.to_variable( np.random.rand(batch_size, source_length, d_model).astype("float32")) tgt = paddle.to_variable( np.random.rand(batch_size, target_length, d_model).astype("float32")) src_mask = np.zeros((batch_size, n_head, source_length, source_length)).astype("float32") src_mask[0][0][0][0] = -np.inf src_mask = paddle.to_variable(src_mask) tgt_mask = np.zeros((batch_size, n_head, target_length, target_length)).astype("float32") tgt_mask[0][0][0][0] = -1e9 memory_mask = np.zeros((batch_size, n_head, target_length, source_length)).astype("float32") memory_mask[0][0][0][0] = -1e9 tgt_mask, memory_mask = paddle.to_variable( tgt_mask), paddle.to_variable(memory_mask) trans_output = transformer(src, tgt, src_mask, tgt_mask, memory_mask)