Exemple #1
0
def convert_tf_config(json_cfg_path, vocab_size, model_type):
    """Convert the config file"""

    with open(json_cfg_path, encoding='utf-8') as f:
        json_cfg = json.load(f)
    if model_type == 'bert':
        # For bert model, the config file are copied from local configuration file
        # leaving the vocab_size indistinguishable. Actually, the verification of
        # vocab_size would be done in the process of embedding weights conversion.
        cfg = BertModel.get_cfg().clone()
    elif model_type == 'albert':
        assert vocab_size == json_cfg['vocab_size']
        cfg = AlbertModel.get_cfg().clone()
    else:
        raise NotImplementedError
    cfg.defrost()
    cfg.MODEL.vocab_size = vocab_size

    cfg.MODEL.units = json_cfg['hidden_size']
    cfg.MODEL.hidden_size = json_cfg['intermediate_size']
    cfg.MODEL.max_length = json_cfg['max_position_embeddings']
    cfg.MODEL.num_heads = json_cfg['num_attention_heads']
    cfg.MODEL.num_layers = json_cfg['num_hidden_layers']
    cfg.MODEL.pos_embed_type = 'learned'
    if json_cfg['hidden_act'] == 'gelu':
        cfg.MODEL.activation = 'gelu(tanh)'
    else:
        cfg.MODEL.activation = json_cfg['hidden_act']
    cfg.MODEL.layer_norm_eps = 1E-12
    cfg.MODEL.num_token_types = json_cfg['type_vocab_size']
    cfg.MODEL.hidden_dropout_prob = float(json_cfg['hidden_dropout_prob'])
    cfg.MODEL.attention_dropout_prob = float(
        json_cfg['attention_probs_dropout_prob'])
    cfg.MODEL.dtype = 'float32'
    cfg.INITIALIZER.weight = ['truncnorm', 0, json_cfg['initializer_range']
                              ]  # TruncNorm(0, 0.02)
    cfg.INITIALIZER.bias = ['zeros']
    cfg.VERSION = 1
    if model_type == 'albert':
        # The below configurations are not supported in bert
        cfg.MODEL.embed_size = json_cfg['embedding_size']
        cfg.MODEL.num_groups = json_cfg['num_hidden_groups']
    cfg.freeze()
    return cfg
def get_test_cfg():
    vocab_size = 500
    num_token_types = 3
    num_layers = 3
    num_heads = 2
    units = 64
    hidden_size = 96
    hidden_dropout_prob = 0.0
    attention_dropout_prob = 0.0
    cfg = AlbertModel.get_cfg().clone()
    cfg.defrost()
    cfg.MODEL.vocab_size = vocab_size
    cfg.MODEL.num_token_types = num_token_types
    cfg.MODEL.units = units
    cfg.MODEL.hidden_size = hidden_size
    cfg.MODEL.num_heads = num_heads
    cfg.MODEL.num_layers = num_layers
    cfg.MODEL.hidden_dropout_prob = hidden_dropout_prob
    cfg.MODEL.attention_dropout_prob = attention_dropout_prob
    return cfg