def load_model(ctx):
    """get model"""
    # model
    pretrained = args.pretrained
    dataset = args.dataset_name
    model, vocabulary = get_model(args.model,
                                  dataset_name=dataset,
                                  pretrained=pretrained,
                                  ctx=ctx)
    if not pretrained:
        model.initialize(init=mx.init.Normal(0.02), ctx=ctx)

    if args.ckpt_dir and args.start_step:
        # Cast the model in case we're loading a fine-tuned float16 model.
        model.cast(args.dtype)
        param_path = os.path.join(args.ckpt_dir,
                                  '%07d.params' % args.start_step)
        model.load_parameters(param_path, ctx=ctx)
        logging.info('Loading step %d checkpoints from %s.', args.start_step,
                     param_path)

    model.cast(args.dtype)
    model.hybridize(static_alloc=True)

    # losses
    nsp_loss = gluon.loss.SoftmaxCELoss()
    mlm_loss = gluon.loss.SoftmaxCELoss()
    nsp_loss.hybridize(static_alloc=True)
    mlm_loss.hybridize(static_alloc=True)

    return model, nsp_loss, mlm_loss, vocabulary
Beispiel #2
0
def get_bert_datasets(class_labels,
                      vectorizer,
                      train_ds,
                      dev_ds,
                      batch_size,
                      max_len,
                      bert_model_name = 'bert_12_768_12',
                      bert_dataset = 'book_corpus_wiki_en_uncased',
                      pad=False,
                      use_bert_vocab=False,
                      ctx=mx.cpu()):
    bert, bert_vocabulary = get_model(
        name=bert_model_name,
        dataset_name=bert_dataset,
        pretrained=True,
        ctx=ctx,
        use_pooler=True,
        use_decoder=False,
        use_classifier=False)
    do_lower_case = 'uncased' in bert_dataset    
    bert_tokenizer = BERTTokenizer(bert_vocabulary, lower=do_lower_case)
    trans = BERTDatasetTransform(bert_tokenizer, max_len,
                                 class_labels=class_labels,
                                 label_alias=None,
                                 pad=pad, pair=False,
                                 has_label=True,
                                 vectorizer=vectorizer,
                                 bert_vocab_size = len(bert_vocabulary) if use_bert_vocab else 0)
    train_data, dev_data, test_data, num_train_examples = preprocess_data(
        trans, class_labels, train_ds, dev_ds, batch_size, max_len, pad)
    return train_data, dev_data, num_train_examples, bert, bert_vocabulary
Beispiel #3
0
def get_bert_datasets(class_labels,
                      vectorizer,
                      train_ds,
                      dev_ds,
                      batch_size,
                      max_len,
                      aux_ds=None,
                      bert_model_name='bert_12_768_12',
                      bert_dataset='book_corpus_wiki_en_uncased',
                      pad=False,
                      use_bert_vocab=False,
                      label_alias=None,
                      num_classes=None,
                      ctx=mx.cpu()):
    if class_labels is None and num_classes is None:
        raise Exception("Must provide class_labels or num_classes")
    bert, bert_vocabulary = get_model(name=bert_model_name,
                                      dataset_name=bert_dataset,
                                      pretrained=True,
                                      ctx=ctx,
                                      use_pooler=True,
                                      use_decoder=False,
                                      use_classifier=False)
    do_lower_case = 'uncased' in bert_dataset
    bert_tokenizer = BERTTokenizer(bert_vocabulary, lower=do_lower_case)
    trans = BERTDatasetTransform(
        bert_tokenizer,
        max_len,
        class_labels=class_labels,
        label_alias=label_alias,
        pad=pad,
        pair=False,
        has_label=True,
        vectorizer=vectorizer,
        bert_vocab_size=len(bert_vocabulary) if use_bert_vocab else 0,
        num_classes=num_classes)
    train_data, num_train_examples = preprocess_seq_data(trans,
                                                         class_labels,
                                                         train_ds,
                                                         batch_size,
                                                         max_len,
                                                         train_mode=True,
                                                         pad=pad,
                                                         aux_dataset=aux_ds)
    dev_data, _ = preprocess_seq_data(trans,
                                      class_labels,
                                      dev_ds,
                                      batch_size,
                                      max_len,
                                      train_mode=False,
                                      pad=pad)
    return train_data, dev_data, num_train_examples, bert, bert_vocabulary
Beispiel #4
0
def get_dual_bert_datasets(class_labels,
                           vectorizer,
                           train_ds1,
                           train_ds2,
                           model_name,
                           dataset,
                           batch_size,
                           dev_bs,
                           max_len1,
                           max_len2,
                           pad,
                           use_bert_vocab=False,
                           shuffle=True,
                           ctx=mx.cpu()):
    bert, bert_vocabulary = get_model(
        name=model_name,
        dataset_name=dataset,
        pretrained=True,
        ctx=ctx,
        use_pooler=True,
        use_decoder=False,
        use_classifier=False)
    do_lower_case = 'uncased' in dataset    
    bert_tokenizer = BERTTokenizer(bert_vocabulary, lower=do_lower_case)

    # transformation for data train and dev
    trans1 = BERTDatasetTransform(bert_tokenizer, max_len1,
                                  class_labels=class_labels,
                                  label_alias=None,
                                  pad=pad, pair=False,
                                  has_label=True,
                                  vectorizer=vectorizer,
                                  bert_vocab_size=len(bert_vocabulary) if use_bert_vocab else 0)

    trans2 = BERTDatasetTransform(bert_tokenizer, max_len2,
                                  class_labels=class_labels,
                                  label_alias=None,
                                  pad=pad, pair=False,
                                  has_label=True,
                                  vectorizer=vectorizer,
                                  bert_vocab_size=len(bert_vocabulary) if use_bert_vocab else 0)
    
    #train_data, num_train_examples = preprocess_data_metriclearn(
    #   trans, class_labels, train_ds1, train_ds2, batch_size, max_len, pad)
    batch_size = len(train_ds2)
    a_train_data, num_train_examples, b_train_data = preprocess_data_metriclearn_separate(
        trans1, trans2, class_labels, train_ds1, train_ds2, batch_size, shuffle=shuffle)
    return a_train_data, num_train_examples, bert, b_train_data, bert_vocabulary
def test_get_tokenizer():
    test_sent = 'Apple, 사과, 沙果'
    models = [
        (
            'roberta_12_768_12', 'openwebtext_ccnews_stories_books_cased', [
                'Apple', ',', 'Ġì', 'Ĥ¬', 'ê', '³', '¼', ',', 'Ġæ', '²', 'Ļ',
                'æ', 'ŀ', 'ľ'
            ]
        ), (
            'roberta_24_1024_16', 'openwebtext_ccnews_stories_books_cased', [
                'Apple', ',', 'Ġì', 'Ĥ¬', 'ê', '³', '¼', ',', 'Ġæ', '²', 'Ļ',
                'æ', 'ŀ', 'ľ'
            ]
        ), (
            'bert_12_768_12', 'book_corpus_wiki_en_cased',
            ['Apple', ',', '[UNK]', ',', '[UNK]', '[UNK]']
        ), (
            'bert_12_768_12', 'book_corpus_wiki_en_uncased',
            ['apple', ',', 'ᄉ', '##ᅡ', '##ᄀ', '##ᅪ', ',', '[UNK]', '[UNK]']
        ), (
            'bert_12_768_12', 'openwebtext_book_corpus_wiki_en_uncased',
            ['apple', ',', 'ᄉ', '##ᅡ', '##ᄀ', '##ᅪ', ',', '[UNK]', '[UNK]']
        ), (
            'bert_12_768_12', 'wiki_multilingual_cased',
            ['app', '##le', ',', '[UNK]', ',', '沙', '果']
        ), (
            'bert_12_768_12', 'wiki_multilingual_uncased',
            ['[UNK]', ',', 'ᄉ', '##ᅡ', u'##\u1100\u116a', ',', '沙', '果']
        ), (
            'bert_12_768_12', 'wiki_cn_cased',
            ['[UNK]', ',', 'ᄉ', '##ᅡ', '##ᄀ', '##ᅪ', ',', '沙', '果']
        ), (
            'bert_24_1024_16', 'book_corpus_wiki_en_cased',
            ['Apple', ',', '[UNK]', ',', '[UNK]', '[UNK]']
        ), (
            'bert_24_1024_16', 'book_corpus_wiki_en_uncased',
            ['apple', ',', 'ᄉ', '##ᅡ', '##ᄀ', '##ᅪ', ',', '[UNK]', '[UNK]']
        ), (
            'bert_12_768_12', 'scibert_scivocab_uncased',
            ['apple', ',', '[UNK]', ',', '[UNK]', '[UNK]']
        ), (
            'bert_12_768_12', 'scibert_scivocab_cased',
            ['Appl', '##e', ',', '[UNK]', ',', '[UNK]', '[UNK]']
        ), (
            'bert_12_768_12', 'scibert_basevocab_uncased',
            ['apple', ',', 'ᄉ', '##ᅡ', '##ᄀ', '##ᅪ', ',', '[UNK]', '[UNK]']
        ), (
            'bert_12_768_12', 'scibert_basevocab_cased',
            ['Apple', ',', '[UNK]', ',', '[UNK]', '[UNK]']
        ), (
            'bert_12_768_12', 'biobert_v1.0_pmc_cased',
            ['Apple', ',', '[UNK]', ',', '[UNK]', '[UNK]']
        ), (
            'bert_12_768_12', 'biobert_v1.0_pubmed_cased',
            ['Apple', ',', '[UNK]', ',', '[UNK]', '[UNK]']
        ), (
            'bert_12_768_12', 'biobert_v1.0_pubmed_pmc_cased',
            ['Apple', ',', '[UNK]', ',', '[UNK]', '[UNK]']
        ), (
            'bert_12_768_12', 'biobert_v1.1_pubmed_cased',
            ['Apple', ',', '[UNK]', ',', '[UNK]', '[UNK]']
        ), (
            'bert_12_768_12', 'clinicalbert_uncased',
            ['apple', ',', 'ᄉ', '##ᅡ', '##ᄀ', '##ᅪ', ',', '[UNK]', '[UNK]']
        ), (
            'bert_12_768_12', 'kobert_news_wiki_ko_cased',
            ['▁A', 'p', 'p', 'le', ',', '▁사과', ',', '▁', '沙果']
        ), (
            'ernie_12_768_12', 'baidu_ernie_uncased',
            ['apple', ',', '[UNK]', ',', '沙', '果']
        )
    ]
    for model_nm, dataset_nm, expected in models:
        _, vocab = get_model(
            model_nm, dataset_name=dataset_nm, pretrained=False
        )
        tok = get_tokenizer(
            model_name=model_nm, dataset_name=dataset_nm, vocab=vocab
        )
        predicted = tok(test_sent)
        assert predicted == expected
# model and loss
only_inference = args.only_inference
model_name = args.bert_model
dataset = args.bert_dataset
pretrained_bert_parameters = args.pretrained_bert_parameters
model_parameters = args.model_parameters
if only_inference and not model_parameters:
    warnings.warn('model_parameters is not set. '
                  'Randomly initialized model will be used for inference.')

get_pretrained = not (pretrained_bert_parameters is not None
                      or model_parameters is not None)
bert, vocabulary = get_model(name=model_name,
                             dataset_name=dataset,
                             pretrained=get_pretrained,
                             ctx=ctx,
                             use_pooler=True,
                             use_decoder=False,
                             use_classifier=False)

if not task.class_labels:
    # STS-B is a regression task.
    # STSBTask().class_labels returns None
    model = BERTRegression(bert, dropout=0.1)
    if not model_parameters:
        model.regression.initialize(init=mx.init.Normal(0.02), ctx=ctx)
    loss_function = gluon.loss.L2Loss()
else:
    model = BERTClassifier(bert,
                           dropout=0.1,
                           num_classes=len(task.class_labels))
Beispiel #7
0
console.setLevel(logging.INFO)
console.setFormatter(formatter)
log.addHandler(console)
log.addHandler(fh)
log.info(args)

###############################################################################
#                              Hybridize the model                            #
###############################################################################

seq_length = args.seq_length

if args.task == 'classification':
    bert, _ = get_model(name=args.model_name,
                        dataset_name=args.dataset_name,
                        pretrained=False,
                        use_pooler=True,
                        use_decoder=False,
                        use_classifier=False)
    net = BERTClassifier(bert, num_classes=2, dropout=args.dropout)
elif args.task == 'regression':
    bert, _ = get_model(name=args.model_name,
                        dataset_name=args.dataset_name,
                        pretrained=False,
                        use_pooler=True,
                        use_decoder=False,
                        use_classifier=False)
    net = BERTClassifier(bert, num_classes=1, dropout=args.dropout)
elif args.task == 'question_answering':
    bert, _ = get_model(name=args.model_name,
                        dataset_name=args.dataset_name,
                        pretrained=False,
Beispiel #8
0
    dev_dataloader = DataLoader(transformed_dev_dataset,
                                batch_size=args.batch_size,
                                shuffle=True,
                                num_workers=multiprocessing.cpu_count() - 3,
                                batchify_fn=batchify_fn)

    slots_count = len(train_dataset.get_slots_map())
    intents_count = len(train_dataset.get_intent_map())

    assert len(
        set(train_dataset.get_slots_map())
        ^ set(dev_dataset.get_slots_map())) == 0

    crf = CRF(train_dataset.get_slots_map(), ctx=context)
    elmo, _ = get_model('elmo_2x4096_512_2048cnn_2xhighway',
                        dataset_name='gbw',
                        pretrained=True,
                        ctx=context)
    model = OneNet(elmo, crf, intents_count, slots_count)
    model.initialize(mx.init.Xavier(magnitude=2.24), ctx=context)

    trainer = Trainer(model.collect_params(), 'ftml',
                      {'learning_rate': args.learning_rate})
    best_model_path = run_training(model, trainer, train_dataloader,
                                   dev_dataloader, intents_count, args.epochs,
                                   args.model_path, context)

    print('Model to use: {}'.format(best_model_path))
    model.load_parameters(best_model_path, ctx=context)
    eval_result = run_evaluation(model, train_dataset.get_intent_map(),
                                 train_dataset.get_slots_map(), context,
                                 args.batch_size)