def bert_trans(): """bert_trans""" net = BertTransformer(batch_size=1, hidden_size=768, seq_length=128, num_hidden_layers=1, num_attention_heads=12, intermediate_size=768, attention_probs_dropout_prob=0.1, use_one_hot_embeddings=False, initializer_range=0.02, use_relative_positions=False, hidden_act="gelu", compute_type=mstype.float32, return_all_encoders=True) net.set_train() return net
initializer_range=0.02, max_position_embeddings=512, dropout_prob=0.0) }, { 'id': 'BertTransformer', 'group': 'BertTransformer', 'block': BertTransformer(batch_size=1, hidden_size=1024, seq_length=128, num_hidden_layers=2, num_attention_heads=16, intermediate_size=4096, attention_probs_dropout_prob=0.0, use_one_hot_embeddings=False, initializer_range=0.02, use_relative_positions=True, hidden_act="gelu", compute_type=mstype.float32, return_all_encoders=True) }, { 'id': 'BertEncoderCell', 'group': 'BertEncoderCell', 'block': BertEncoderCell(batch_size=1, hidden_size=1024, seq_length=128,