def get_config(version='base', batch_size=1):
    """get config"""
    if version == 'base':
        bert_config = BertConfig(batch_size=batch_size,
                                 seq_length=128,
                                 vocab_size=21136,
                                 hidden_size=768,
                                 num_hidden_layers=2,
                                 num_attention_heads=12,
                                 intermediate_size=3072,
                                 hidden_act="gelu",
                                 hidden_dropout_prob=0.1,
                                 attention_probs_dropout_prob=0.1,
                                 max_position_embeddings=512,
                                 type_vocab_size=2,
                                 initializer_range=0.02,
                                 use_relative_positions=True,
                                 input_mask_from_dataset=True,
                                 token_type_ids_from_dataset=True,
                                 dtype=mstype.float32,
                                 compute_type=mstype.float32)
    elif version == 'large':
        bert_config = BertConfig(batch_size=batch_size,
                                 seq_length=128,
                                 vocab_size=21136,
                                 hidden_size=1024,
                                 num_hidden_layers=2,
                                 num_attention_heads=16,
                                 intermediate_size=4096,
                                 hidden_act="gelu",
                                 hidden_dropout_prob=0.0,
                                 attention_probs_dropout_prob=0.0,
                                 max_position_embeddings=512,
                                 type_vocab_size=2,
                                 initializer_range=0.02,
                                 use_relative_positions=False,
                                 input_mask_from_dataset=True,
                                 token_type_ids_from_dataset=True,
                                 dtype=mstype.float32,
                                 compute_type=mstype.float16,
                                 enable_fused_layernorm=False)
    else:
        bert_config = BertConfig(batch_size=batch_size)
    return bert_config
Exemple #2
0
train_steps = 200
batch_size = 12
frequency = 100
momentum = 0.9
weight_decay = 5e-4
loss_scale = 1.0

bert_net_cfg = BertConfig(
    seq_length=512,
    vocab_size=30522,
    hidden_size=1024,
    num_hidden_layers=4,
    num_attention_heads=16,
    intermediate_size=4096,
    hidden_act="gelu",
    hidden_dropout_prob=0.1,
    attention_probs_dropout_prob=0.1,
    max_position_embeddings=512,
    type_vocab_size=2,
    initializer_range=0.02,
    use_relative_positions=False,
    dtype=mstype.float32,
    compute_type=mstype.float16
)

np.random.seed(1)
ds.config.set_seed(1)
os.environ['GLOG_v'] = str(2)


class TimeMonitor(Callback):