Ejemplo n.º 1
0
def test_albert_backbone(static_alloc, static_shape, compute_layout):
    batch_size = 3
    cfg = get_test_cfg()
    cfg.defrost()
    cfg.MODEL.compute_layout = compute_layout
    cfg.freeze()
    model = AlbertModel.from_cfg(cfg, use_pooler=True)
    model.initialize()
    model.hybridize(static_alloc=static_alloc, static_shape=static_shape)
    cfg_tn = cfg.clone()
    cfg_tn.defrost()
    cfg_tn.MODEL.layout = 'TN'
    cfg_tn.freeze()
    model_tn = AlbertModel.from_cfg(cfg_tn, use_pooler=True)
    model_tn.share_parameters(model.collect_params())
    model_tn.hybridize(static_alloc=static_alloc, static_shape=static_shape)

    for seq_length in [64, 96]:
        valid_length = mx.np.random.randint(seq_length // 2, seq_length,
                                            (batch_size, ))
        inputs = mx.np.random.randint(0, cfg.MODEL.vocab_size,
                                      (batch_size, seq_length))
        token_types = mx.np.random.randint(0, cfg.MODEL.num_token_types,
                                           (batch_size, seq_length))
        contextual_embedding, pooled_out = model(inputs, token_types,
                                                 valid_length)
        contextual_embedding_tn, pooled_out_tn = model_tn(
            inputs.T, token_types.T, valid_length)
        # Verify layout
        assert_allclose(np.swapaxes(contextual_embedding_tn.asnumpy(), 0, 1),
                        contextual_embedding.asnumpy(), 1E-4, 1E-4)
        assert_allclose(pooled_out_tn.asnumpy(), pooled_out.asnumpy(), 1E-4,
                        1E-4)
        assert contextual_embedding.shape == (batch_size, seq_length,
                                              cfg.MODEL.units)
        assert pooled_out.shape == (batch_size, cfg.MODEL.units)
        # Ensure the embeddings that exceed valid_length are masked
        contextual_embedding_np = contextual_embedding.asnumpy()
        pooled_out_np = pooled_out.asnumpy()
        for i in range(batch_size):
            ele_valid_length = valid_length[i].asnumpy()
            assert_allclose(
                contextual_embedding_np[i, ele_valid_length:],
                np.zeros_like(contextual_embedding_np[i, ele_valid_length:]),
                1E-5, 1E-5)
        # Ensure that the content are correctly masked
        new_inputs = mx.np.concatenate([inputs, inputs[:, :5]], axis=-1)
        new_token_types = mx.np.concatenate([token_types, token_types[:, :5]],
                                            axis=-1)
        new_contextual_embedding, new_pooled_out = \
            model(new_inputs, new_token_types, valid_length)
        new_contextual_embedding_np = new_contextual_embedding.asnumpy()
        new_pooled_out_np = new_pooled_out.asnumpy()
        for i in range(batch_size):
            ele_valid_length = valid_length[i].asnumpy()
            assert_allclose(new_contextual_embedding_np[i, :ele_valid_length],
                            contextual_embedding_np[i, :ele_valid_length],
                            1E-5, 1E-5)
        assert_allclose(new_pooled_out_np, pooled_out_np, 1E-4, 1E-4)
Ejemplo n.º 2
0
def test_albert_get_pretrained(model_name):
    assert len(list_pretrained_albert()) > 0
    with tempfile.TemporaryDirectory() as root:
        cfg, tokenizer, backbone_params_path, mlm_params_path =\
            get_pretrained_albert(model_name, load_backbone=True, load_mlm=True, root=root)
        assert cfg.MODEL.vocab_size == len(tokenizer.vocab)
        albert_model = AlbertModel.from_cfg(cfg)
        albert_model.load_parameters(backbone_params_path)
        albert_mlm_model = AlbertForMLM(cfg)
        if mlm_params_path is not None:
            albert_mlm_model.load_parameters(mlm_params_path)
        # Just load the backbone
        albert_mlm_model = AlbertForMLM(cfg)
        albert_mlm_model.backbone_model.load_parameters(backbone_params_path)
Ejemplo n.º 3
0
def convert_tf_config(json_cfg_path, vocab_size, model_type):
    """Convert the config file"""

    with open(json_cfg_path, encoding='utf-8') as f:
        json_cfg = json.load(f)
    if model_type == 'bert':
        # For bert model, the config file are copied from local configuration file
        # leaving the vocab_size indistinguishable. Actually, the verification of
        # vocab_size would be done in the process of embedding weights conversion.
        cfg = BertModel.get_cfg().clone()
    elif model_type == 'albert':
        assert vocab_size == json_cfg['vocab_size']
        cfg = AlbertModel.get_cfg().clone()
    else:
        raise NotImplementedError
    cfg.defrost()
    cfg.MODEL.vocab_size = vocab_size

    cfg.MODEL.units = json_cfg['hidden_size']
    cfg.MODEL.hidden_size = json_cfg['intermediate_size']
    cfg.MODEL.max_length = json_cfg['max_position_embeddings']
    cfg.MODEL.num_heads = json_cfg['num_attention_heads']
    cfg.MODEL.num_layers = json_cfg['num_hidden_layers']
    cfg.MODEL.pos_embed_type = 'learned'
    if json_cfg['hidden_act'] == 'gelu':
        cfg.MODEL.activation = 'gelu(tanh)'
    else:
        cfg.MODEL.activation = json_cfg['hidden_act']
    cfg.MODEL.layer_norm_eps = 1E-12
    cfg.MODEL.num_token_types = json_cfg['type_vocab_size']
    cfg.MODEL.hidden_dropout_prob = float(json_cfg['hidden_dropout_prob'])
    cfg.MODEL.attention_dropout_prob = float(
        json_cfg['attention_probs_dropout_prob'])
    cfg.MODEL.dtype = 'float32'
    cfg.INITIALIZER.weight = ['truncnorm', 0, json_cfg['initializer_range']
                              ]  # TruncNorm(0, 0.02)
    cfg.INITIALIZER.bias = ['zeros']
    cfg.VERSION = 1
    if model_type == 'albert':
        # The below configurations are not supported in bert
        cfg.MODEL.embed_size = json_cfg['embedding_size']
        cfg.MODEL.num_groups = json_cfg['num_hidden_groups']
    cfg.freeze()
    return cfg
Ejemplo n.º 4
0
def get_test_cfg():
    vocab_size = 500
    num_token_types = 3
    num_layers = 3
    num_heads = 2
    units = 64
    hidden_size = 96
    hidden_dropout_prob = 0.0
    attention_dropout_prob = 0.0
    cfg = AlbertModel.get_cfg().clone()
    cfg.defrost()
    cfg.MODEL.vocab_size = vocab_size
    cfg.MODEL.num_token_types = num_token_types
    cfg.MODEL.units = units
    cfg.MODEL.hidden_size = hidden_size
    cfg.MODEL.num_heads = num_heads
    cfg.MODEL.num_layers = num_layers
    cfg.MODEL.hidden_dropout_prob = hidden_dropout_prob
    cfg.MODEL.attention_dropout_prob = attention_dropout_prob
    return cfg