def load_model(ctx): """get model""" # model pretrained = args.pretrained dataset = args.dataset_name model, vocabulary = get_model(args.model, dataset_name=dataset, pretrained=pretrained, ctx=ctx) if not pretrained: model.initialize(init=mx.init.Normal(0.02), ctx=ctx) if args.ckpt_dir and args.start_step: # Cast the model in case we're loading a fine-tuned float16 model. model.cast(args.dtype) param_path = os.path.join(args.ckpt_dir, '%07d.params' % args.start_step) model.load_parameters(param_path, ctx=ctx) logging.info('Loading step %d checkpoints from %s.', args.start_step, param_path) model.cast(args.dtype) model.hybridize(static_alloc=True) # losses nsp_loss = gluon.loss.SoftmaxCELoss() mlm_loss = gluon.loss.SoftmaxCELoss() nsp_loss.hybridize(static_alloc=True) mlm_loss.hybridize(static_alloc=True) return model, nsp_loss, mlm_loss, vocabulary
def get_bert_datasets(class_labels, vectorizer, train_ds, dev_ds, batch_size, max_len, bert_model_name = 'bert_12_768_12', bert_dataset = 'book_corpus_wiki_en_uncased', pad=False, use_bert_vocab=False, ctx=mx.cpu()): bert, bert_vocabulary = get_model( name=bert_model_name, dataset_name=bert_dataset, pretrained=True, ctx=ctx, use_pooler=True, use_decoder=False, use_classifier=False) do_lower_case = 'uncased' in bert_dataset bert_tokenizer = BERTTokenizer(bert_vocabulary, lower=do_lower_case) trans = BERTDatasetTransform(bert_tokenizer, max_len, class_labels=class_labels, label_alias=None, pad=pad, pair=False, has_label=True, vectorizer=vectorizer, bert_vocab_size = len(bert_vocabulary) if use_bert_vocab else 0) train_data, dev_data, test_data, num_train_examples = preprocess_data( trans, class_labels, train_ds, dev_ds, batch_size, max_len, pad) return train_data, dev_data, num_train_examples, bert, bert_vocabulary
def get_bert_datasets(class_labels, vectorizer, train_ds, dev_ds, batch_size, max_len, aux_ds=None, bert_model_name='bert_12_768_12', bert_dataset='book_corpus_wiki_en_uncased', pad=False, use_bert_vocab=False, label_alias=None, num_classes=None, ctx=mx.cpu()): if class_labels is None and num_classes is None: raise Exception("Must provide class_labels or num_classes") bert, bert_vocabulary = get_model(name=bert_model_name, dataset_name=bert_dataset, pretrained=True, ctx=ctx, use_pooler=True, use_decoder=False, use_classifier=False) do_lower_case = 'uncased' in bert_dataset bert_tokenizer = BERTTokenizer(bert_vocabulary, lower=do_lower_case) trans = BERTDatasetTransform( bert_tokenizer, max_len, class_labels=class_labels, label_alias=label_alias, pad=pad, pair=False, has_label=True, vectorizer=vectorizer, bert_vocab_size=len(bert_vocabulary) if use_bert_vocab else 0, num_classes=num_classes) train_data, num_train_examples = preprocess_seq_data(trans, class_labels, train_ds, batch_size, max_len, train_mode=True, pad=pad, aux_dataset=aux_ds) dev_data, _ = preprocess_seq_data(trans, class_labels, dev_ds, batch_size, max_len, train_mode=False, pad=pad) return train_data, dev_data, num_train_examples, bert, bert_vocabulary
def get_dual_bert_datasets(class_labels, vectorizer, train_ds1, train_ds2, model_name, dataset, batch_size, dev_bs, max_len1, max_len2, pad, use_bert_vocab=False, shuffle=True, ctx=mx.cpu()): bert, bert_vocabulary = get_model( name=model_name, dataset_name=dataset, pretrained=True, ctx=ctx, use_pooler=True, use_decoder=False, use_classifier=False) do_lower_case = 'uncased' in dataset bert_tokenizer = BERTTokenizer(bert_vocabulary, lower=do_lower_case) # transformation for data train and dev trans1 = BERTDatasetTransform(bert_tokenizer, max_len1, class_labels=class_labels, label_alias=None, pad=pad, pair=False, has_label=True, vectorizer=vectorizer, bert_vocab_size=len(bert_vocabulary) if use_bert_vocab else 0) trans2 = BERTDatasetTransform(bert_tokenizer, max_len2, class_labels=class_labels, label_alias=None, pad=pad, pair=False, has_label=True, vectorizer=vectorizer, bert_vocab_size=len(bert_vocabulary) if use_bert_vocab else 0) #train_data, num_train_examples = preprocess_data_metriclearn( # trans, class_labels, train_ds1, train_ds2, batch_size, max_len, pad) batch_size = len(train_ds2) a_train_data, num_train_examples, b_train_data = preprocess_data_metriclearn_separate( trans1, trans2, class_labels, train_ds1, train_ds2, batch_size, shuffle=shuffle) return a_train_data, num_train_examples, bert, b_train_data, bert_vocabulary
def test_get_tokenizer(): test_sent = 'Apple, 사과, 沙果' models = [ ( 'roberta_12_768_12', 'openwebtext_ccnews_stories_books_cased', [ 'Apple', ',', 'Ġì', 'Ĥ¬', 'ê', '³', '¼', ',', 'Ġæ', '²', 'Ļ', 'æ', 'ŀ', 'ľ' ] ), ( 'roberta_24_1024_16', 'openwebtext_ccnews_stories_books_cased', [ 'Apple', ',', 'Ġì', 'Ĥ¬', 'ê', '³', '¼', ',', 'Ġæ', '²', 'Ļ', 'æ', 'ŀ', 'ľ' ] ), ( 'bert_12_768_12', 'book_corpus_wiki_en_cased', ['Apple', ',', '[UNK]', ',', '[UNK]', '[UNK]'] ), ( 'bert_12_768_12', 'book_corpus_wiki_en_uncased', ['apple', ',', 'ᄉ', '##ᅡ', '##ᄀ', '##ᅪ', ',', '[UNK]', '[UNK]'] ), ( 'bert_12_768_12', 'openwebtext_book_corpus_wiki_en_uncased', ['apple', ',', 'ᄉ', '##ᅡ', '##ᄀ', '##ᅪ', ',', '[UNK]', '[UNK]'] ), ( 'bert_12_768_12', 'wiki_multilingual_cased', ['app', '##le', ',', '[UNK]', ',', '沙', '果'] ), ( 'bert_12_768_12', 'wiki_multilingual_uncased', ['[UNK]', ',', 'ᄉ', '##ᅡ', u'##\u1100\u116a', ',', '沙', '果'] ), ( 'bert_12_768_12', 'wiki_cn_cased', ['[UNK]', ',', 'ᄉ', '##ᅡ', '##ᄀ', '##ᅪ', ',', '沙', '果'] ), ( 'bert_24_1024_16', 'book_corpus_wiki_en_cased', ['Apple', ',', '[UNK]', ',', '[UNK]', '[UNK]'] ), ( 'bert_24_1024_16', 'book_corpus_wiki_en_uncased', ['apple', ',', 'ᄉ', '##ᅡ', '##ᄀ', '##ᅪ', ',', '[UNK]', '[UNK]'] ), ( 'bert_12_768_12', 'scibert_scivocab_uncased', ['apple', ',', '[UNK]', ',', '[UNK]', '[UNK]'] ), ( 'bert_12_768_12', 'scibert_scivocab_cased', ['Appl', '##e', ',', '[UNK]', ',', '[UNK]', '[UNK]'] ), ( 'bert_12_768_12', 'scibert_basevocab_uncased', ['apple', ',', 'ᄉ', '##ᅡ', '##ᄀ', '##ᅪ', ',', '[UNK]', '[UNK]'] ), ( 'bert_12_768_12', 'scibert_basevocab_cased', ['Apple', ',', '[UNK]', ',', '[UNK]', '[UNK]'] ), ( 'bert_12_768_12', 'biobert_v1.0_pmc_cased', ['Apple', ',', '[UNK]', ',', '[UNK]', '[UNK]'] ), ( 'bert_12_768_12', 'biobert_v1.0_pubmed_cased', ['Apple', ',', '[UNK]', ',', '[UNK]', '[UNK]'] ), ( 'bert_12_768_12', 'biobert_v1.0_pubmed_pmc_cased', ['Apple', ',', '[UNK]', ',', '[UNK]', '[UNK]'] ), ( 'bert_12_768_12', 'biobert_v1.1_pubmed_cased', ['Apple', ',', '[UNK]', ',', '[UNK]', '[UNK]'] ), ( 'bert_12_768_12', 'clinicalbert_uncased', ['apple', ',', 'ᄉ', '##ᅡ', '##ᄀ', '##ᅪ', ',', '[UNK]', '[UNK]'] ), ( 'bert_12_768_12', 'kobert_news_wiki_ko_cased', ['▁A', 'p', 'p', 'le', ',', '▁사과', ',', '▁', '沙果'] ), ( 'ernie_12_768_12', 'baidu_ernie_uncased', ['apple', ',', '[UNK]', ',', '沙', '果'] ) ] for model_nm, dataset_nm, expected in models: _, vocab = get_model( model_nm, dataset_name=dataset_nm, pretrained=False ) tok = get_tokenizer( model_name=model_nm, dataset_name=dataset_nm, vocab=vocab ) predicted = tok(test_sent) assert predicted == expected
# model and loss only_inference = args.only_inference model_name = args.bert_model dataset = args.bert_dataset pretrained_bert_parameters = args.pretrained_bert_parameters model_parameters = args.model_parameters if only_inference and not model_parameters: warnings.warn('model_parameters is not set. ' 'Randomly initialized model will be used for inference.') get_pretrained = not (pretrained_bert_parameters is not None or model_parameters is not None) bert, vocabulary = get_model(name=model_name, dataset_name=dataset, pretrained=get_pretrained, ctx=ctx, use_pooler=True, use_decoder=False, use_classifier=False) if not task.class_labels: # STS-B is a regression task. # STSBTask().class_labels returns None model = BERTRegression(bert, dropout=0.1) if not model_parameters: model.regression.initialize(init=mx.init.Normal(0.02), ctx=ctx) loss_function = gluon.loss.L2Loss() else: model = BERTClassifier(bert, dropout=0.1, num_classes=len(task.class_labels))
console.setLevel(logging.INFO) console.setFormatter(formatter) log.addHandler(console) log.addHandler(fh) log.info(args) ############################################################################### # Hybridize the model # ############################################################################### seq_length = args.seq_length if args.task == 'classification': bert, _ = get_model(name=args.model_name, dataset_name=args.dataset_name, pretrained=False, use_pooler=True, use_decoder=False, use_classifier=False) net = BERTClassifier(bert, num_classes=2, dropout=args.dropout) elif args.task == 'regression': bert, _ = get_model(name=args.model_name, dataset_name=args.dataset_name, pretrained=False, use_pooler=True, use_decoder=False, use_classifier=False) net = BERTClassifier(bert, num_classes=1, dropout=args.dropout) elif args.task == 'question_answering': bert, _ = get_model(name=args.model_name, dataset_name=args.dataset_name, pretrained=False,
dev_dataloader = DataLoader(transformed_dev_dataset, batch_size=args.batch_size, shuffle=True, num_workers=multiprocessing.cpu_count() - 3, batchify_fn=batchify_fn) slots_count = len(train_dataset.get_slots_map()) intents_count = len(train_dataset.get_intent_map()) assert len( set(train_dataset.get_slots_map()) ^ set(dev_dataset.get_slots_map())) == 0 crf = CRF(train_dataset.get_slots_map(), ctx=context) elmo, _ = get_model('elmo_2x4096_512_2048cnn_2xhighway', dataset_name='gbw', pretrained=True, ctx=context) model = OneNet(elmo, crf, intents_count, slots_count) model.initialize(mx.init.Xavier(magnitude=2.24), ctx=context) trainer = Trainer(model.collect_params(), 'ftml', {'learning_rate': args.learning_rate}) best_model_path = run_training(model, trainer, train_dataloader, dev_dataloader, intents_count, args.epochs, args.model_path, context) print('Model to use: {}'.format(best_model_path)) model.load_parameters(best_model_path, ctx=context) eval_result = run_evaluation(model, train_dataset.get_intent_map(), train_dataset.get_slots_map(), context, args.batch_size)