Beispiel #1
0
def _bert_lm_model(model_name=None, dataset_name=None, vocab=None,
                   pretrained=True, ctx=mx.cpu(),
                   root=os.path.join('~', '.mxnet', 'models'), **kwargs):
    """BERT based pretrained language model.

    Returns
    -------
    BERTRNN, gluonnlp.vocab.BERTVocab
    """
    predefined_args = bert_lm_hparams[model_name]
    mutable_args = ['use_residual', 'dropout', 'embed_dropout', 'word_embed',
                    'rnn_dropout', 'rnn_weight_drop', 'rnn_drop_h', 'rnn_drop_i',
                    'rnn_drop_e', 'rnn_drop_l']
    mutable_args = frozenset(mutable_args)
    assert all((k not in kwargs or k in mutable_args) for k in predefined_args), \
        'Cannot override predefined model settings.'
    predefined_args.update(kwargs)
    # encoder
    encoder = BERTMaskedEncoder(attention_cell=predefined_args['attention_cell'],
                                num_layers=predefined_args['num_layers'],
                                units=predefined_args['units'],
                                hidden_size=predefined_args['hidden_size'],
                                max_length=predefined_args['max_length'],
                                num_heads=predefined_args['num_heads'],
                                scaled=predefined_args['scaled'],
                                dropout=predefined_args['dropout'],
                                use_residual=predefined_args['use_residual'])
    # bert_vocab
    from gluonnlp.vocab.bert import BERTVocab
    bert_vocab = _load_vocab(bert_vocabs[model_name], vocab, root, cls=BERTVocab)
    # BERT
    bert = BERTMaskedModel(encoder, len(bert_vocab),
                           token_type_vocab_size=predefined_args['token_type_vocab_size'],
                           units=predefined_args['units'],
                           embed_size=predefined_args['embed_size'],
                           embed_dropout=predefined_args['embed_dropout'],
                           word_embed=predefined_args['word_embed'],
                           use_pooler=False, use_decoder=False,
                           use_classifier=False)

    # BERT LM
    net = BERTRNN(embedding=bert, mode=predefined_args['rnn_mode'], vocab_size=len(bert_vocab),
                  embed_size=predefined_args['rnn_embed_size'],
                  hidden_size=predefined_args['rnn_hidden_size'],
                  hidden_size_last=predefined_args['rnn_hidden_size_last'],
                  num_layers=predefined_args['rnn_num_layers'],
                  tie_weights=predefined_args['rnn_tie_weights'],
                  dropout=predefined_args['rnn_dropout'],
                  weight_drop=predefined_args['rnn_weight_drop'],
                  drop_h=predefined_args['rnn_drop_h'],
                  drop_i=predefined_args['rnn_drop_i'],
                  drop_e=predefined_args['rnn_drop_e'],
                  drop_l=predefined_args['rnn_drop_l'],
                  num_experts=predefined_args['rnn_num_experts'],
                  upperbound_fixed_layer=predefined_args['upperbound_fixed_layer'], **kwargs)

    if pretrained:
        _load_pretrained_params(net, model_name, dataset_name, root, ctx,
                                ignore_extra=True)
    return net, bert_vocab
Beispiel #2
0
def xlnet_cased_l24_h1024_a16(dataset_name: Optional[str] = None, vocab: Optional[nlp.Vocab] = None,
                              tokenizer: Optional[XLNetTokenizer] = None, pretrained: bool = True,
                              ctx: mx.Context = mx.cpu(),
                              root=os.path.join(get_home_dir(), 'models'),
                              do_lower_case=False, **kwargs):
    """XLNet model.

    References:
    Yang, Z., Dai, Z., Yang, Y., Carbonell, J., Salakhutdinov, R., & Le, Q. V.
    (2019). XLNet: Generalized Autoregressive Pretraining for Language
    Understanding. arXiv preprint arXiv:1906.08237.


    Parameters
    ----------
    dataset_name : str or None, default None
        If not None, the dataset name is used to load a vocabulary for the
        dataset. If the `pretrained` argument is set to True, the dataset name
        is further used to select the pretrained parameters to load.
        Options include 'books_enwiki_giga5_clueweb2012b_commoncrawl'.
    vocab : gluonnlp.vocab.Vocab or None, default None
        Vocabulary for the dataset. Must be provided if dataset_name is not
        specified. Ignored if dataset_name is specified.
    tokenizer : XLNetTokenizer or None, default None
        XLNetTokenizer for the dataset. Must be provided if dataset_name is not
        specified. Ignored if dataset_name is specified.
    pretrained : bool, default True
        Whether to load the pretrained weights for model.
    ctx : Context, default CPU
        The context in which to load the pretrained weights.
    root : str, default '$MXNET_HOME/models'
        Location for keeping the model parameters.
        MXNET_HOME defaults to '~/.mxnet'.

    Returns
    -------
    XLNet, gluonnlp.Vocab, XLNetTokenizer

    """
    kwargs.update(**{
        'hidden_size': 4096,
        'units': 1024,
        'activation': 'approx_gelu',
        'num_heads': 16,
        'num_layers': 24,
    })
    if vocab is None or dataset_name is not None:
        vocab = _load_vocab('xlnet_' + dataset_name, vocab, root)
    net = XLNet(vocab_size=len(vocab), **kwargs)
    if pretrained:
        _load_pretrained_params(net=net, model_name='xlnet_cased_l24_h1024_a16',
                                dataset_name=dataset_name, root=root, ctx=ctx,
                                ignore_extra=not kwargs.get('use_decoder', True))
    if tokenizer is None or dataset_name is not None:
        tokenizer = _get_xlnet_tokenizer(dataset_name, root, do_lower_case)
    return net, vocab, tokenizer
Beispiel #3
0
def get_bort_model(model_name=None,
                   dataset_name=None,
                   vocab=None,
                   pretrained=True,
                   ctx=mx.cpu(),
                   use_decoder=True,
                   output_attention=False,
                   output_all_encodings=False,
                   root=os.path.join(get_home_dir(), 'models'),
                   **kwargs):
    predefined_args = predefined_borts[model_name]
    logging.info(f"get_bort_model: {model_name}")
    mutable_args = ['use_residual', 'dropout', 'embed_dropout', 'word_embed']
    mutable_args = frozenset(mutable_args)
    print("model_name: ", model_name, ", predefined_args: ", predefined_args)
    assert all((k not in kwargs or k in mutable_args) for k in predefined_args), \
        'Cannot override predefined model settings.'
    predefined_args.update(kwargs)
    # encoder
    encoder = BERTEncoder(attention_cell=predefined_args['attention_cell'],
                          num_layers=predefined_args['num_layers'],
                          units=predefined_args['units'],
                          hidden_size=predefined_args['hidden_size'],
                          max_length=predefined_args['max_length'],
                          num_heads=predefined_args['num_heads'],
                          scaled=predefined_args['scaled'],
                          dropout=predefined_args['dropout'],
                          output_attention=output_attention,
                          output_all_encodings=output_all_encodings,
                          use_residual=predefined_args['use_residual'],
                          activation=predefined_args.get('activation', 'gelu'),
                          layer_norm_eps=predefined_args.get(
                              'layer_norm_eps', None))

    from gluonnlp.vocab import Vocab
    bort_vocab = _load_vocab(dataset_name, vocab, root, cls=Vocab)

    net = BortModel(encoder,
                    len(bort_vocab),
                    units=predefined_args['units'],
                    embed_size=predefined_args['embed_size'],
                    embed_dropout=predefined_args['embed_dropout'],
                    word_embed=predefined_args['word_embed'],
                    use_decoder=use_decoder)
    if pretrained:
        ignore_extra = not use_decoder
        _load_pretrained_params(net,
                                model_name,
                                dataset_name,
                                root,
                                ctx,
                                ignore_extra=ignore_extra,
                                allow_missing=False)
    return net, bort_vocab
def _get_gpt2_model(model_name=None,
                    dataset_name=None,
                    vocab=None,
                    pretrained=True,
                    ctx=mx.cpu(),
                    root=os.path.join(get_home_dir(), 'models'),
                    **kwargs):
    """Any predefined GPT-2 model.

    Parameters
    ----------
    model_name : str or None, default None
        Options include 'gpt2_117m' and 'gpt2_345m'.
    dataset_name : str or None, default None
        If not None, the dataset name is used to load a vocabulary for the
        dataset. If the `pretrained` argument is set to True, the dataset name
        is further used to select the pretrained parameters to load.
        The supported datasets for model_name of either bert_24_1024_16 and
        bert_12_768_12 are 'openai_webtext'.
    vocab : gluonnlp.vocab.BERTVocab or None, default None
        Vocabulary for the dataset. Must be provided if dataset_name is not
        specified. Ignored if dataset_name is specified.
    pretrained : bool, default True
        Whether to load the pretrained weights for model.
    ctx : Context, default CPU
        The context in which to load the pretrained weights.
    root : str, default '$MXNET_HOME/models'
        Location for keeping the model parameters.
        MXNET_HOME defaults to '~/.mxnet'.

    Returns
    -------
    GPT2Model, gluonnlp.vocab.Vocab
    """
    predefined_args = gpt2_hparams[model_name]
    mutable_args = ['dropout']
    mutable_args = frozenset(mutable_args)
    assert all((k not in kwargs or k in mutable_args) for k in predefined_args), \
        'Cannot override predefined model settings.'
    predefined_args.update(kwargs)
    vocab = _load_vocab(dataset_name, vocab, root)
    # BERT
    net = GPT2Model(units=predefined_args['units'],
                    vocab_size=len(vocab),
                    max_length=predefined_args['max_length'],
                    num_layers=predefined_args['num_layers'],
                    num_heads=predefined_args['num_heads'],
                    dropout=predefined_args['dropout'],
                    **kwargs)
    if pretrained:
        _load_pretrained_params(net, model_name, dataset_name, root, ctx)
    for i in range(net._num_layers):
        net._ffn_layers[i]._act._support_erf = False
    return net, vocab
Beispiel #5
0
def _get_gpt2_model(model_name=None,
                    dataset_name=None,
                    vocab=None,
                    pretrained=True,
                    ctx=mx.cpu(),
                    root=os.path.join(get_home_dir(), 'models'),
                    hparam_allow_override=False,
                    **kwargs):
    """Any predefined GPT-2 model.

    Parameters
    ----------
    model_name : str or None, default None
        Options include 'gpt2_117m' and 'gpt2_345m'.
    dataset_name : str or None, default None
        If not None, the dataset name is used to load a vocabulary for the
        dataset. If the `pretrained` argument is set to True, the dataset name
        is further used to select the pretrained parameters to load.
        The supported datasets for model_name of either bert_24_1024_16 and
        bert_12_768_12 are 'openai_webtext'.
    vocab : gluonnlp.vocab.BERTVocab or None, default None
        Vocabulary for the dataset. Must be provided if dataset_name is not
        specified. Ignored if dataset_name is specified.
    pretrained : bool, default True
        Whether to load the pretrained weights for model.
    ctx : Context, default CPU
        The context in which to load the pretrained weights.
    root : str, default '$MXNET_HOME/models'
        Location for keeping the model parameters.
        MXNET_HOME defaults to '~/.mxnet'.
    hparam_allow_override : bool, default False
        If set to True, pre-defined hyper-parameters of the model
        (e.g. the number of layers, hidden units) can be overriden.

    Returns
    -------
    GPT2Model, gluonnlp.vocab.Vocab
    """
    predefined_args = gpt2_hparams[model_name].copy()
    if not hparam_allow_override:
        mutable_args = ['dropout']
        mutable_args = frozenset(mutable_args)
        assert all((k not in kwargs or k in mutable_args) for k in predefined_args), \
            'Cannot override predefined model settings.'
    predefined_args.update(kwargs)
    vocab = _load_vocab(dataset_name, vocab, root)
    # GPT2
    net = GPT2Model(vocab_size=len(vocab), **predefined_args)
    if pretrained:
        _load_pretrained_params(net, model_name, dataset_name, root, ctx)
    return net, vocab