Exemple #1
0
def test_gpt2_small_config(compute_layout, ctx):
    cfg = GPT2Model.get_cfg()
    cfg.defrost()
    cfg.MODEL.vocab_size = 1000
    cfg.MODEL.units = 128
    cfg.MODEL.num_layers = 2
    cfg.MODEL.num_heads = 2
    cfg.MODEL.compute_layout = compute_layout
    cfg.freeze()

    # Generate TN layout
    cfg_tn = cfg.clone()
    cfg_tn.defrost()
    cfg_tn.MODEL.layout = 'TN'
    cfg_tn.freeze()

    with ctx:
        batch_size = 4
        sequence_length = 16
        inputs = mx.np.random.randint(0, 1000, (batch_size, sequence_length), ctx=ctx)

        gpt2_model = GPT2Model.from_cfg(cfg)
        gpt2_model.initialize(ctx=ctx)
        gpt2_model.hybridize()
        hiddens, _ = gpt2_model(
            inputs,
            gpt2_model.init_states(batch_size, ctx)
        )
        gpt2_model_tn = GPT2Model.from_cfg(cfg_tn)
        gpt2_model_tn.share_parameters(gpt2_model.collect_params())
        gpt2_model_tn.hybridize()
        hiddens_tn, _ = gpt2_model_tn(
            inputs.T,
            gpt2_model_tn.init_states(batch_size, ctx)
        )
        assert_allclose(np.swapaxes(hiddens_tn.asnumpy(), 0, 1),
                        hiddens.asnumpy(), 1E-4, 1E-4)

        # Test for GPT2ForLM
        gpt2_lm_model = GPT2ForLM(cfg)
        gpt2_lm_model.initialize(ctx=ctx)
        gpt2_lm_model.hybridize()
        logits, states = gpt2_lm_model(
            inputs,
            gpt2_lm_model.init_states(batch_size, ctx)
        )
        gpt2_lm_model_tn = GPT2ForLM(cfg_tn)
        gpt2_lm_model_tn.share_parameters(gpt2_lm_model.collect_params())
        gpt2_lm_model_tn.hybridize()
        logits_tn, states_tn = gpt2_lm_model_tn(
            inputs.T,
            gpt2_lm_model_tn.init_states(batch_size, ctx)
        )
        assert_allclose(np.swapaxes(logits_tn.asnumpy(), 0, 1),
                        logits.asnumpy(), 1E-4, 1E-4)
        assert_allclose(np.swapaxes(states_tn.asnumpy(), 2, 3),
                        states.asnumpy(), 1E-4, 1E-4)
Exemple #2
0
def test_gpt2_incremental_states(ctx):
    with ctx:
        batch_size = 4
        sequence_length = 5
        inputs = mx.np.random.randint(0, 1000, (batch_size, sequence_length), ctx=ctx)

        cfg = GPT2Model.get_cfg()
        gpt2_model = GPT2Model.from_cfg(cfg)
        gpt2_model.initialize(ctx=ctx)
        gpt2_model.hybridize()

        one_time_hiddens, one_time_states = gpt2_model(
            inputs,
            gpt2_model.init_states(batch_size, ctx)
        )

        states = gpt2_model.init_states(batch_size, ctx)
        hiddens_l = []
        for i in range(sequence_length):
            hiddens, states = gpt2_model(
                inputs[:, i:i+1],
                states
            )
            hiddens_l.append(hiddens)
        hiddens_concat = mx.np.concatenate(hiddens_l, axis=1)
        assert_allclose(one_time_states.asnumpy(),
                        states.asnumpy(), 1E-4, 1E-4)
        assert_allclose(one_time_hiddens.asnumpy(),
                        hiddens_concat.asnumpy(), 1E-4, 1E-4)
def test_gpt2(model_name, ctx):
    # test from pretrained
    assert len(list_pretrained_gpt2()) > 0
    with tempfile.TemporaryDirectory() as root, ctx:
        cfg, tokenizer, params_path, lm_params_path =\
            get_pretrained_gpt2(model_name, load_backbone=True, load_lm=True, root=root)
        assert cfg.MODEL.vocab_size == len(tokenizer.vocab)
        # test backbone
        gpt2_model = GPT2Model.from_cfg(cfg)
        gpt2_model.load_parameters(params_path)
        # test lm model
        gpt2_lm_model = GPT2ForLM(cfg)
        gpt2_lm_model.load_parameters(lm_params_path)

        # test forward
        batch_size = 3
        seq_length = 32
        vocab_size = len(tokenizer.vocab)
        input_ids = mx.np.array(np.random.randint(2, vocab_size,
                                                  (batch_size, seq_length)),
                                dtype=np.int32,
                                ctx=ctx)
        logits, _ = gpt2_lm_model(input_ids,
                                  gpt2_lm_model.init_states(batch_size, ctx))
        mx.npx.waitall()
        # test backward
        label_smooth_loss = LabelSmoothCrossEntropyLoss(num_labels=vocab_size)
        with mx.autograd.record():
            logits, _ = gpt2_lm_model(
                input_ids, gpt2_lm_model.init_states(batch_size, ctx))
            loss = label_smooth_loss(logits, input_ids)
            loss.backward()
        mx.npx.waitall()
Exemple #4
0
def test_gpt2_incremental_states(ctx):
    with ctx:
        batch_size = 4
        sequence_length = 5
        inputs = mx.np.random.randint(0, 1000, (batch_size, sequence_length), ctx=ctx)

        cfg = GPT2Model.get_cfg()
        gpt2_model = GPT2Model.from_cfg(cfg)
        gpt2_model.initialize(ctx=ctx)
        gpt2_model.hybridize()

        one_time_hiddens, one_time_states = gpt2_model(
            inputs,
            gpt2_model.init_states(batch_size, ctx),
            mx.np.array(0, dtype=np.int32, ctx=ctx)
        )

        states = gpt2_model.init_states(batch_size, ctx)
        for i in range(sequence_length):
            hiddens, states = gpt2_model(
                inputs[:, i:i+1],
                states,
                mx.np.array(i, dtype=np.int32, ctx=ctx)
            )
        incremental_states = states
        incremental_hiddens = hiddens
        assert_allclose(incremental_states.asnumpy(),
                        states.asnumpy(), 1E-4, 1E-4)
        assert_allclose(incremental_hiddens.asnumpy(),
                        hiddens.asnumpy(), 1E-4, 1E-4)
Exemple #5
0
def test_gpt2_small_config(compute_layout, ctx):
    cfg = GPT2Model.get_cfg()
    cfg.defrost()
    cfg.MODEL.vocab_size = 1000
    cfg.MODEL.units = 128
    cfg.MODEL.num_layers = 2
    cfg.MODEL.num_heads = 2
    cfg.MODEL.compute_layout = compute_layout
    cfg.freeze()

    # Generate TN layout
    cfg_tn = cfg.clone()
    cfg_tn.defrost()
    cfg_tn.MODEL.layout = 'TN'
    cfg_tn.freeze()

    with ctx:
        batch_size = 4
        sequence_length = 16
        inputs = mx.np.random.randint(0,
                                      1000, (batch_size, sequence_length),
                                      ctx=ctx)

        gpt2_model = GPT2Model.from_cfg(cfg)
        gpt2_model.initialize(ctx=ctx)
        gpt2_model.hybridize()
        hiddens, _ = gpt2_model(inputs,
                                gpt2_model.init_states(batch_size, ctx))

        gpt2_model_tn = GPT2Model.from_cfg(cfg_tn)
        gpt2_model_tn.share_parameters(gpt2_model.collect_params())
        gpt2_model_tn.hybridize()
        hiddens_tn, _ = gpt2_model_tn(
            inputs.T, gpt2_model_tn.init_states(batch_size, ctx))
        assert_allclose(np.swapaxes(hiddens_tn.asnumpy(), 0, 1),
                        hiddens.asnumpy(), 1E-4, 1E-4)

        # Test for GPT2ForLM
        gpt2_lm_model = GPT2ForLM(cfg)
        gpt2_lm_model.initialize(ctx=ctx)
        gpt2_lm_model.hybridize()
        logits, states = gpt2_lm_model(
            inputs, gpt2_lm_model.init_states(batch_size, ctx))
        gpt2_lm_model_tn = GPT2ForLM(cfg_tn)
        gpt2_lm_model_tn.share_parameters(gpt2_lm_model.collect_params())
        gpt2_lm_model_tn.hybridize()
        logits_tn, states_tn = gpt2_lm_model_tn(
            inputs.T, gpt2_lm_model_tn.init_states(batch_size, ctx))
        assert_allclose(np.swapaxes(logits_tn.asnumpy(), 0, 1),
                        logits.asnumpy(), 1E-4, 1E-4)
        assert_allclose(np.swapaxes(states_tn.asnumpy(), 2, 3),
                        states.asnumpy(), 1E-4, 1E-4)

        # Verify Float16
        if ctx.device_type == 'gpu':
            verify_backbone_fp16(
                model_cls=GPT2Model,
                cfg=cfg,
                ctx=ctx,
                inputs=[inputs,
                        gpt2_model.init_states(batch_size, ctx)],
                check_amp=False)
            pytest.skip(
                'GPT-2 test has been turned off. '
                'Issue: https://github.com/apache/incubator-mxnet/issues/19463'
            )