Beispiel #1
0
    nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]),
    nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]),
    nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32'),
    nlp.data.batchify.Stack('float32'))

berttoken = nlp.data.BERTTokenizer(vocab=vocab, lower=lower)

net = BertForQA(bert=bert)
if pretrained_bert_parameters and not model_parameters:
    bert.load_parameters(pretrained_bert_parameters,
                         ctx=ctx,
                         ignore_extra=True)
if not model_parameters:
    net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx)
else:
    net.load_parameters(model_parameters, ctx=ctx)
net.hybridize(static_alloc=True)

loss_function = BertForQALoss()
loss_function.hybridize(static_alloc=True)


def train():
    """Training function."""
    segment = 'train' if not args.test_mode else 'dev'
    log.info('Loading %s data...', segment)
    if version_2:
        train_data = SQuAD(segment, version='2.0')
    else:
        train_data = SQuAD(segment, version='1.1')
    log.info('Number of records in Train data:{}'.format(len(train_data)))
Beispiel #2
0
    nlp.data.batchify.Stack(),
    nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]),
    nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]),
    nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32'),
    nlp.data.batchify.Stack('float32'))

BERT_DIM = {'bert_12_768_12': 768, 'bert_24_1024_16': 1024}

net = BertForQA(bert=bert, \
    n_rnn_layers = args.n_rnn_layers,
    apply_coattention=args.apply_coattention, bert_out_dim=BERT_DIM[args.bert_model], \
    remove_special_token=args.remove_special_token,
    mask_output=args.mask_output)
if model_parameters:
    # load complete BertForQA parameters
    net.load_parameters(model_parameters, ctx=ctx, cast_dtype=True)
elif pretrained_bert_parameters:
    # only load BertModel parameters
    bert.load_parameters(pretrained_bert_parameters,
                         ctx=ctx,
                         ignore_extra=True,
                         cast_dtype=True)
    net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx)
elif pretrained:
    # only load BertModel parameters
    net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx)
else:
    # no checkpoint is loaded
    net.initialize(init=mx.init.Normal(0.02), ctx=ctx)

if args.apply_coattention: