def get_config(version='base', batch_size=1): """get config""" if version == 'base': bert_config = BertConfig(batch_size=batch_size, seq_length=128, vocab_size=21136, hidden_size=768, num_hidden_layers=2, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=2, initializer_range=0.02, use_relative_positions=True, input_mask_from_dataset=True, token_type_ids_from_dataset=True, dtype=mstype.float32, compute_type=mstype.float32) elif version == 'large': bert_config = BertConfig(batch_size=batch_size, seq_length=128, vocab_size=21136, hidden_size=1024, num_hidden_layers=2, num_attention_heads=16, intermediate_size=4096, hidden_act="gelu", hidden_dropout_prob=0.0, attention_probs_dropout_prob=0.0, max_position_embeddings=512, type_vocab_size=2, initializer_range=0.02, use_relative_positions=False, input_mask_from_dataset=True, token_type_ids_from_dataset=True, dtype=mstype.float32, compute_type=mstype.float16, enable_fused_layernorm=False) else: bert_config = BertConfig(batch_size=batch_size) return bert_config
train_steps = 200 batch_size = 12 frequency = 100 momentum = 0.9 weight_decay = 5e-4 loss_scale = 1.0 bert_net_cfg = BertConfig( seq_length=512, vocab_size=30522, hidden_size=1024, num_hidden_layers=4, num_attention_heads=16, intermediate_size=4096, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=2, initializer_range=0.02, use_relative_positions=False, dtype=mstype.float32, compute_type=mstype.float16 ) np.random.seed(1) ds.config.set_seed(1) os.environ['GLOG_v'] = str(2) class TimeMonitor(Callback):