コード例 #1
0
def test_robert_small_config(compute_layout, ctx):
    with ctx:
        cfg = RobertaModel.get_cfg()
        cfg.defrost()
        cfg.MODEL.vocab_size = 1000
        cfg.MODEL.num_layers = 2
        cfg.MODEL.hidden_size = 128
        cfg.MODEL.num_heads = 2
        cfg.MODEL.compute_layout = compute_layout
        cfg.freeze()

        # Generate TN layout
        cfg_tn = cfg.clone()
        cfg_tn.defrost()
        cfg_tn.MODEL.layout = 'TN'
        cfg_tn.freeze()

        batch_size = 4
        sequence_length = 16
        num_mask = 3
        inputs = mx.np.random.randint(0, 10, (batch_size, sequence_length))
        valid_length = mx.np.random.randint(3, sequence_length, (batch_size,))
        masked_positions = mx.np.random.randint(0, 3, (batch_size, num_mask))

        roberta_model = RobertaModel.from_cfg(cfg)
        roberta_model.initialize()
        roberta_model.hybridize()
        contextual_embeddings, pooled_out = roberta_model(inputs, valid_length)
        roberta_model_tn = RobertaModel.from_cfg(cfg_tn)
        roberta_model_tn.share_parameters(roberta_model.collect_params())
        roberta_model_tn.hybridize()
        contextual_embeddings_tn, pooled_out_tn = roberta_model_tn(inputs.T, valid_length)
        assert_allclose(np.swapaxes(contextual_embeddings_tn.asnumpy(), 0, 1),
                        contextual_embeddings.asnumpy(), 1E-3, 1E-3)
        assert_allclose(pooled_out_tn.asnumpy(), pooled_out.asnumpy(), 1E-3, 1E-3)

        # Test for RobertaForMLM
        roberta_mlm_model = RobertaForMLM(cfg)
        roberta_mlm_model.initialize()
        roberta_mlm_model.hybridize()
        contextual_embedding, pooled_out, mlm_score = roberta_mlm_model(inputs, valid_length,
                                                                         masked_positions)
        roberta_mlm_model_tn = RobertaForMLM(cfg_tn)
        roberta_mlm_model_tn.share_parameters(roberta_mlm_model.collect_params())
        roberta_mlm_model_tn.hybridize()
        contextual_embedding_tn, pooled_out_tn, mlm_score_tn =\
            roberta_mlm_model_tn(inputs.T, valid_length.T, masked_positions)
        assert_allclose(np.swapaxes(contextual_embedding_tn.asnumpy(), 0, 1),
                        contextual_embedding.asnumpy(), 1E-3, 1E-3)
        assert_allclose(pooled_out_tn.asnumpy(), pooled_out.asnumpy(), 1E-3, 1E-3)
        assert_allclose(mlm_score_tn.asnumpy(), mlm_score.asnumpy(), 1E-3, 1E-3)

        # Test for fp16
        if ctx.device_type == 'gpu':
            verify_backbone_fp16(model_cls=RobertaModel, cfg=cfg, ctx=ctx,
                                 inputs=[inputs, valid_length])
コード例 #2
0
def test_roberta(model_name):
    # test from pretrained
    with tempfile.TemporaryDirectory() as root:
        cfg, tokenizer, params_path, mlm_params_path =\
            get_pretrained_roberta(model_name, load_backbone=True, load_mlm=True, root=root)
        assert cfg.MODEL.vocab_size == len(tokenizer.vocab)
        # test backbone
        roberta_model = RobertaModel.from_cfg(cfg)
        roberta_model.load_parameters(params_path)
        roberta_model.hybridize()
        # test mlm model
        roberta_mlm_model = RobertaForMLM(cfg)
        if mlm_params_path is not None:
            roberta_mlm_model.load_parameters(mlm_params_path)
        roberta_mlm_model = RobertaForMLM(cfg)
        roberta_mlm_model.backbone_model.load_parameters(params_path)

        # test forward
        batch_size = 3
        seq_length = 32
        vocab_size = len(tokenizer.vocab)
        input_ids = mx.np.array(
            np.random.randint(
                2,
                vocab_size,
                (batch_size, seq_length)
            ),
            dtype=np.int32
        )
        valid_length = mx.np.array(
            np.random.randint(
                seq_length // 2,
                seq_length,
                (batch_size,)
            ),
            dtype=np.int32
        )
        roberta_model(input_ids, valid_length)
        mx.npx.waitall()
        # test backward
        label_smooth_loss = LabelSmoothCrossEntropyLoss(num_labels=vocab_size)
        with mx.autograd.record():
            contextual_embeddings, pooled_out = roberta_model(input_ids, valid_length)
            loss = label_smooth_loss(contextual_embeddings, input_ids)
            loss.backward()
        mx.npx.waitall()
コード例 #3
0
def convert_fairseq_model(args):
    if not args.save_dir:
        args.save_dir = os.path.basename(args.fairseq_model_path) + '_gluon'
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    fairseq_roberta = fairseq_RobertaModel.from_pretrained(
        args.fairseq_model_path, checkpoint_file='model.pt')
    vocab_size = convert_vocab(args, fairseq_roberta)

    gluon_cfg = convert_config(fairseq_roberta.args, vocab_size,
                               RobertaModel.get_cfg().clone())
    with open(os.path.join(args.save_dir, 'model.yml'), 'w') as of:
        of.write(gluon_cfg.dump())

    ctx = mx.gpu(args.gpu) if args.gpu is not None else mx.cpu()
    gluon_roberta = convert_params(fairseq_roberta, gluon_cfg, ctx)
    if args.test:
        test_model(fairseq_roberta, gluon_roberta, args.gpu)

    gluon_roberta.save_parameters(os.path.join(args.save_dir,
                                               'model_mlm.params'),
                                  deduplicate=True)
    logging.info('Convert the RoBERTa MLM model in {} to {}'.format(
        os.path.join(args.fairseq_model_path, 'model.pt'),
        os.path.join(args.save_dir, 'model_mlm.params')))
    gluon_roberta.backbone_model.save_parameters(os.path.join(
        args.save_dir, 'model.params'),
                                                 deduplicate=True)
    logging.info('Convert the RoBERTa backbone model in {} to {}'.format(
        os.path.join(args.fairseq_model_path, 'model.pt'),
        os.path.join(args.save_dir, 'model.params')))

    logging.info('Conversion finished!')
    logging.info('Statistics:')
    old_names = os.listdir(args.save_dir)
    for old_name in old_names:
        new_name, long_hash = naming_convention(args.save_dir, old_name)
        old_path = os.path.join(args.save_dir, old_name)
        new_path = os.path.join(args.save_dir, new_name)
        shutil.move(old_path, new_path)
        file_size = os.path.getsize(new_path)
        logging.info('\t{}/{} {} {}'.format(args.save_dir, new_name, long_hash,
                                            file_size))