def _bert_lm_model(model_name=None, dataset_name=None, vocab=None, pretrained=True, ctx=mx.cpu(), root=os.path.join('~', '.mxnet', 'models'), **kwargs): """BERT based pretrained language model. Returns ------- BERTRNN, gluonnlp.vocab.BERTVocab """ predefined_args = bert_lm_hparams[model_name] mutable_args = ['use_residual', 'dropout', 'embed_dropout', 'word_embed', 'rnn_dropout', 'rnn_weight_drop', 'rnn_drop_h', 'rnn_drop_i', 'rnn_drop_e', 'rnn_drop_l'] mutable_args = frozenset(mutable_args) assert all((k not in kwargs or k in mutable_args) for k in predefined_args), \ 'Cannot override predefined model settings.' predefined_args.update(kwargs) # encoder encoder = BERTMaskedEncoder(attention_cell=predefined_args['attention_cell'], num_layers=predefined_args['num_layers'], units=predefined_args['units'], hidden_size=predefined_args['hidden_size'], max_length=predefined_args['max_length'], num_heads=predefined_args['num_heads'], scaled=predefined_args['scaled'], dropout=predefined_args['dropout'], use_residual=predefined_args['use_residual']) # bert_vocab from gluonnlp.vocab.bert import BERTVocab bert_vocab = _load_vocab(bert_vocabs[model_name], vocab, root, cls=BERTVocab) # BERT bert = BERTMaskedModel(encoder, len(bert_vocab), token_type_vocab_size=predefined_args['token_type_vocab_size'], units=predefined_args['units'], embed_size=predefined_args['embed_size'], embed_dropout=predefined_args['embed_dropout'], word_embed=predefined_args['word_embed'], use_pooler=False, use_decoder=False, use_classifier=False) # BERT LM net = BERTRNN(embedding=bert, mode=predefined_args['rnn_mode'], vocab_size=len(bert_vocab), embed_size=predefined_args['rnn_embed_size'], hidden_size=predefined_args['rnn_hidden_size'], hidden_size_last=predefined_args['rnn_hidden_size_last'], num_layers=predefined_args['rnn_num_layers'], tie_weights=predefined_args['rnn_tie_weights'], dropout=predefined_args['rnn_dropout'], weight_drop=predefined_args['rnn_weight_drop'], drop_h=predefined_args['rnn_drop_h'], drop_i=predefined_args['rnn_drop_i'], drop_e=predefined_args['rnn_drop_e'], drop_l=predefined_args['rnn_drop_l'], num_experts=predefined_args['rnn_num_experts'], upperbound_fixed_layer=predefined_args['upperbound_fixed_layer'], **kwargs) if pretrained: _load_pretrained_params(net, model_name, dataset_name, root, ctx, ignore_extra=True) return net, bert_vocab
def xlnet_cased_l24_h1024_a16(dataset_name: Optional[str] = None, vocab: Optional[nlp.Vocab] = None, tokenizer: Optional[XLNetTokenizer] = None, pretrained: bool = True, ctx: mx.Context = mx.cpu(), root=os.path.join(get_home_dir(), 'models'), do_lower_case=False, **kwargs): """XLNet model. References: Yang, Z., Dai, Z., Yang, Y., Carbonell, J., Salakhutdinov, R., & Le, Q. V. (2019). XLNet: Generalized Autoregressive Pretraining for Language Understanding. arXiv preprint arXiv:1906.08237. Parameters ---------- dataset_name : str or None, default None If not None, the dataset name is used to load a vocabulary for the dataset. If the `pretrained` argument is set to True, the dataset name is further used to select the pretrained parameters to load. Options include 'books_enwiki_giga5_clueweb2012b_commoncrawl'. vocab : gluonnlp.vocab.Vocab or None, default None Vocabulary for the dataset. Must be provided if dataset_name is not specified. Ignored if dataset_name is specified. tokenizer : XLNetTokenizer or None, default None XLNetTokenizer for the dataset. Must be provided if dataset_name is not specified. Ignored if dataset_name is specified. pretrained : bool, default True Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. MXNET_HOME defaults to '~/.mxnet'. Returns ------- XLNet, gluonnlp.Vocab, XLNetTokenizer """ kwargs.update(**{ 'hidden_size': 4096, 'units': 1024, 'activation': 'approx_gelu', 'num_heads': 16, 'num_layers': 24, }) if vocab is None or dataset_name is not None: vocab = _load_vocab('xlnet_' + dataset_name, vocab, root) net = XLNet(vocab_size=len(vocab), **kwargs) if pretrained: _load_pretrained_params(net=net, model_name='xlnet_cased_l24_h1024_a16', dataset_name=dataset_name, root=root, ctx=ctx, ignore_extra=not kwargs.get('use_decoder', True)) if tokenizer is None or dataset_name is not None: tokenizer = _get_xlnet_tokenizer(dataset_name, root, do_lower_case) return net, vocab, tokenizer
def get_bort_model(model_name=None, dataset_name=None, vocab=None, pretrained=True, ctx=mx.cpu(), use_decoder=True, output_attention=False, output_all_encodings=False, root=os.path.join(get_home_dir(), 'models'), **kwargs): predefined_args = predefined_borts[model_name] logging.info(f"get_bort_model: {model_name}") mutable_args = ['use_residual', 'dropout', 'embed_dropout', 'word_embed'] mutable_args = frozenset(mutable_args) print("model_name: ", model_name, ", predefined_args: ", predefined_args) assert all((k not in kwargs or k in mutable_args) for k in predefined_args), \ 'Cannot override predefined model settings.' predefined_args.update(kwargs) # encoder encoder = BERTEncoder(attention_cell=predefined_args['attention_cell'], num_layers=predefined_args['num_layers'], units=predefined_args['units'], hidden_size=predefined_args['hidden_size'], max_length=predefined_args['max_length'], num_heads=predefined_args['num_heads'], scaled=predefined_args['scaled'], dropout=predefined_args['dropout'], output_attention=output_attention, output_all_encodings=output_all_encodings, use_residual=predefined_args['use_residual'], activation=predefined_args.get('activation', 'gelu'), layer_norm_eps=predefined_args.get( 'layer_norm_eps', None)) from gluonnlp.vocab import Vocab bort_vocab = _load_vocab(dataset_name, vocab, root, cls=Vocab) net = BortModel(encoder, len(bort_vocab), units=predefined_args['units'], embed_size=predefined_args['embed_size'], embed_dropout=predefined_args['embed_dropout'], word_embed=predefined_args['word_embed'], use_decoder=use_decoder) if pretrained: ignore_extra = not use_decoder _load_pretrained_params(net, model_name, dataset_name, root, ctx, ignore_extra=ignore_extra, allow_missing=False) return net, bort_vocab
def _get_gpt2_model(model_name=None, dataset_name=None, vocab=None, pretrained=True, ctx=mx.cpu(), root=os.path.join(get_home_dir(), 'models'), **kwargs): """Any predefined GPT-2 model. Parameters ---------- model_name : str or None, default None Options include 'gpt2_117m' and 'gpt2_345m'. dataset_name : str or None, default None If not None, the dataset name is used to load a vocabulary for the dataset. If the `pretrained` argument is set to True, the dataset name is further used to select the pretrained parameters to load. The supported datasets for model_name of either bert_24_1024_16 and bert_12_768_12 are 'openai_webtext'. vocab : gluonnlp.vocab.BERTVocab or None, default None Vocabulary for the dataset. Must be provided if dataset_name is not specified. Ignored if dataset_name is specified. pretrained : bool, default True Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. MXNET_HOME defaults to '~/.mxnet'. Returns ------- GPT2Model, gluonnlp.vocab.Vocab """ predefined_args = gpt2_hparams[model_name] mutable_args = ['dropout'] mutable_args = frozenset(mutable_args) assert all((k not in kwargs or k in mutable_args) for k in predefined_args), \ 'Cannot override predefined model settings.' predefined_args.update(kwargs) vocab = _load_vocab(dataset_name, vocab, root) # BERT net = GPT2Model(units=predefined_args['units'], vocab_size=len(vocab), max_length=predefined_args['max_length'], num_layers=predefined_args['num_layers'], num_heads=predefined_args['num_heads'], dropout=predefined_args['dropout'], **kwargs) if pretrained: _load_pretrained_params(net, model_name, dataset_name, root, ctx) for i in range(net._num_layers): net._ffn_layers[i]._act._support_erf = False return net, vocab
def _get_gpt2_model(model_name=None, dataset_name=None, vocab=None, pretrained=True, ctx=mx.cpu(), root=os.path.join(get_home_dir(), 'models'), hparam_allow_override=False, **kwargs): """Any predefined GPT-2 model. Parameters ---------- model_name : str or None, default None Options include 'gpt2_117m' and 'gpt2_345m'. dataset_name : str or None, default None If not None, the dataset name is used to load a vocabulary for the dataset. If the `pretrained` argument is set to True, the dataset name is further used to select the pretrained parameters to load. The supported datasets for model_name of either bert_24_1024_16 and bert_12_768_12 are 'openai_webtext'. vocab : gluonnlp.vocab.BERTVocab or None, default None Vocabulary for the dataset. Must be provided if dataset_name is not specified. Ignored if dataset_name is specified. pretrained : bool, default True Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '$MXNET_HOME/models' Location for keeping the model parameters. MXNET_HOME defaults to '~/.mxnet'. hparam_allow_override : bool, default False If set to True, pre-defined hyper-parameters of the model (e.g. the number of layers, hidden units) can be overriden. Returns ------- GPT2Model, gluonnlp.vocab.Vocab """ predefined_args = gpt2_hparams[model_name].copy() if not hparam_allow_override: mutable_args = ['dropout'] mutable_args = frozenset(mutable_args) assert all((k not in kwargs or k in mutable_args) for k in predefined_args), \ 'Cannot override predefined model settings.' predefined_args.update(kwargs) vocab = _load_vocab(dataset_name, vocab, root) # GPT2 net = GPT2Model(vocab_size=len(vocab), **predefined_args) if pretrained: _load_pretrained_params(net, model_name, dataset_name, root, ctx) return net, vocab