work_dir = f'{args.work_dir}/{args.dataset_name.upper()}' nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch, local_rank=args.local_rank, optimization_level=args.amp_opt_level, log_dir=work_dir, create_tb_writer=True, files_to_copy=[__file__], add_time_to_log_dir=True) output_file = f'{nf.work_dir}/output.txt' if args.bert_checkpoint is None: """ Use this if you're using a standard BERT model. To see the list of pretrained models, call: nemo_nlp.huggingface.BERT.list_pretrained_models() """ tokenizer = NemoBertTokenizer(args.pretrained_bert_model) pretrained_bert_model = nemo_nlp.huggingface.BERT( pretrained_model_name=args.pretrained_bert_model, factory=nf) else: """ Use this if you're using a BERT model that you pre-trained yourself. Replace BERT-STEP-150000.pt with the path to your checkpoint. """ tokenizer = SentencePieceTokenizer(model_path=tokenizer_model) tokenizer.add_special_tokens(["[MASK]", "[CLS]", "[SEP]"]) bert_model = nemo_nlp.huggingface.BERT(config_filename=args.bert_config, factory=nf) pretrained_bert_model.restore_from(args.bert_checkpoint) hidden_size = pretrained_bert_model.local_parameters["hidden_size"] ner_classifier = nemo_nlp.TokenClassifier(hidden_size=hidden_size,
raise ValueError( f'Dictionary with ids to labels not found at {args.labels_dict}') nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch, optimization_level=args.amp_opt_level, log_dir=None) labels_dict = get_vocab(args.labels_dict) """ Load the pretrained BERT parameters See the list of pretrained models, call: nemo_nlp.huggingface.BERT.list_pretrained_models() """ pretrained_bert_model = nemo_nlp.huggingface.BERT( pretrained_model_name=args.pretrained_bert_model) hidden_size = pretrained_bert_model.local_parameters["hidden_size"] tokenizer = NemoBertTokenizer(args.pretrained_bert_model) data_layer = nemo_nlp.BertTokenClassificationInferDataLayer( queries=args.queries, tokenizer=tokenizer, max_seq_length=args.max_seq_length, batch_size=1) classifier = nemo_nlp.TokenClassifier(hidden_size=hidden_size, num_classes=len(labels_dict), dropout=args.fc_dropout) input_ids, input_type_ids, input_mask, _, subtokens_mask = data_layer() hidden_states = pretrained_bert_model(input_ids=input_ids, token_type_ids=input_type_ids,