Beispiel #1
0
work_dir = f'{args.work_dir}/{args.dataset_name.upper()}'
nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch,
                                   local_rank=args.local_rank,
                                   optimization_level=args.amp_opt_level,
                                   log_dir=work_dir,
                                   create_tb_writer=True,
                                   files_to_copy=[__file__],
                                   add_time_to_log_dir=True)
output_file = f'{nf.work_dir}/output.txt'

if args.bert_checkpoint is None:
    """ Use this if you're using a standard BERT model.
    To see the list of pretrained models, call:
    nemo_nlp.huggingface.BERT.list_pretrained_models()
    """
    tokenizer = NemoBertTokenizer(args.pretrained_bert_model)
    pretrained_bert_model = nemo_nlp.huggingface.BERT(
        pretrained_model_name=args.pretrained_bert_model, factory=nf)
else:
    """ Use this if you're using a BERT model that you pre-trained yourself.
    Replace BERT-STEP-150000.pt with the path to your checkpoint.
    """
    tokenizer = SentencePieceTokenizer(model_path=tokenizer_model)
    tokenizer.add_special_tokens(["[MASK]", "[CLS]", "[SEP]"])

    bert_model = nemo_nlp.huggingface.BERT(config_filename=args.bert_config,
                                           factory=nf)
    pretrained_bert_model.restore_from(args.bert_checkpoint)

hidden_size = pretrained_bert_model.local_parameters["hidden_size"]
ner_classifier = nemo_nlp.TokenClassifier(hidden_size=hidden_size,
    raise ValueError(
        f'Dictionary with ids to labels not found at {args.labels_dict}')

nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch,
                                   optimization_level=args.amp_opt_level,
                                   log_dir=None)

labels_dict = get_vocab(args.labels_dict)
""" Load the pretrained BERT parameters
See the list of pretrained models, call:
nemo_nlp.huggingface.BERT.list_pretrained_models()
"""
pretrained_bert_model = nemo_nlp.huggingface.BERT(
    pretrained_model_name=args.pretrained_bert_model)
hidden_size = pretrained_bert_model.local_parameters["hidden_size"]
tokenizer = NemoBertTokenizer(args.pretrained_bert_model)

data_layer = nemo_nlp.BertTokenClassificationInferDataLayer(
    queries=args.queries,
    tokenizer=tokenizer,
    max_seq_length=args.max_seq_length,
    batch_size=1)

classifier = nemo_nlp.TokenClassifier(hidden_size=hidden_size,
                                      num_classes=len(labels_dict),
                                      dropout=args.fc_dropout)

input_ids, input_type_ids, input_mask, _, subtokens_mask = data_layer()

hidden_states = pretrained_bert_model(input_ids=input_ids,
                                      token_type_ids=input_type_ids,