) args = parser.parse_args() if not os.path.exists(args.checkpoint_dir): raise ValueError(f'Checkpoints folder not found at {args.checkpoint_dir}') if not (os.path.exists(args.punct_labels_dict) and os.path.exists(args.capit_labels_dict)): raise ValueError( f'Dictionary with ids to labels not found at {args.punct_labels_dict} \ or {args.punct_labels_dict}') nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch, log_dir=None) punct_labels_dict = get_vocab(args.punct_labels_dict) capit_labels_dict = get_vocab(args.capit_labels_dict) model = nemo_nlp.nm.trainables.get_pretrained_lm_model( pretrained_model_name=args.pretrained_model_name, config=args.bert_config, vocab=args.vocab_file) tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer( tokenizer_name=args.tokenizer, pretrained_model_name=args.pretrained_model_name, tokenizer_model=args.tokenizer_model, vocab_file=args.vocab_file, do_lower_case=args.do_lower_case, )
parser.add_argument("--labels_dict", default='label_ids.csv', type=str) args = parser.parse_args() logging.info(args) if not os.path.exists(args.checkpoint_dir): raise ValueError( f'Checkpoint directory not found at {args.checkpoint_dir}') if not os.path.exists(args.labels_dict): raise ValueError( f'Dictionary with ids to labels not found at {args.labels_dict}') nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch, log_dir=None) labels_dict = get_vocab(args.labels_dict) """ Load the pretrained BERT parameters See the list of pretrained models, call: nemo_nlp.huggingface.BERT.list_pretrained_models() """ pretrained_bert_model = nemo_nlp.nm.trainables.get_huggingface_model( bert_config=args.bert_config, pretrained_model_name=args.pretrained_model_name) tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer( tokenizer_name=args.tokenizer, pretrained_model_name=args.pretrained_model_name, tokenizer_model=args.tokenizer_model, ) hidden_size = pretrained_bert_model.hidden_size
LABELS_DICT = 'ner_label_ids.csv' BERT_CONFIG = None TOKENIZER = 'nemobert' TOKENIZER_MODEL = None NONE_LABEL = 'O' ADD_BRACKETS = True #QUERIES_PLACEHOLDER = ['we bought four shirts from the nvidia gear store in santa clara', 'Nvidia is a company', 'The Adventures of Tom Sawyer by Mark Twain is an 1876 novel about a young boy growing up along the Mississippi River',] nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch, log_dir=None) in_file = open('cleaned_ner_note_test.txt') out_file = open('ner_output_test.txt', 'w+') QUERIES_PLACEHOLDER = in_file.readlines() labels_dict = get_vocab(LABELS_DICT) """ Load the pretrained BERT parameters See the list of pretrained models, call: nemo_nlp.huggingface.BERT.list_pretrained_models() """ pretrained_bert_model = nemo_nlp.nm.trainables.get_huggingface_model( bert_config=BERT_CONFIG, pretrained_model_name=PRETRAINED_MODEL_NAME) tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer( tokenizer_name=TOKENIZER, pretrained_model_name=PRETRAINED_MODEL_NAME, tokenizer_model=TOKENIZER_MODEL, ) hidden_size = pretrained_bert_model.hidden_size data_layer = nemo_nlp.nm.data_layers.BertTokenClassificationInferDataLayer(