)

args = parser.parse_args()

if not os.path.exists(args.checkpoint_dir):
    raise ValueError(f'Checkpoints folder not found at {args.checkpoint_dir}')
if not (os.path.exists(args.punct_labels_dict)
        and os.path.exists(args.capit_labels_dict)):
    raise ValueError(
        f'Dictionary with ids to labels not found at {args.punct_labels_dict} \
         or {args.punct_labels_dict}')

nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch,
                                   log_dir=None)

punct_labels_dict = get_vocab(args.punct_labels_dict)

capit_labels_dict = get_vocab(args.capit_labels_dict)

model = nemo_nlp.nm.trainables.get_pretrained_lm_model(
    pretrained_model_name=args.pretrained_model_name,
    config=args.bert_config,
    vocab=args.vocab_file)

tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(
    tokenizer_name=args.tokenizer,
    pretrained_model_name=args.pretrained_model_name,
    tokenizer_model=args.tokenizer_model,
    vocab_file=args.vocab_file,
    do_lower_case=args.do_lower_case,
)
parser.add_argument("--labels_dict", default='label_ids.csv', type=str)

args = parser.parse_args()
logging.info(args)

if not os.path.exists(args.checkpoint_dir):
    raise ValueError(
        f'Checkpoint directory not found at {args.checkpoint_dir}')
if not os.path.exists(args.labels_dict):
    raise ValueError(
        f'Dictionary with ids to labels not found at {args.labels_dict}')

nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch,
                                   log_dir=None)

labels_dict = get_vocab(args.labels_dict)
""" Load the pretrained BERT parameters
See the list of pretrained models, call:
nemo_nlp.huggingface.BERT.list_pretrained_models()
"""
pretrained_bert_model = nemo_nlp.nm.trainables.get_huggingface_model(
    bert_config=args.bert_config,
    pretrained_model_name=args.pretrained_model_name)

tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(
    tokenizer_name=args.tokenizer,
    pretrained_model_name=args.pretrained_model_name,
    tokenizer_model=args.tokenizer_model,
)
hidden_size = pretrained_bert_model.hidden_size
Beispiel #3
0
LABELS_DICT = 'ner_label_ids.csv'
BERT_CONFIG = None
TOKENIZER = 'nemobert'
TOKENIZER_MODEL = None
NONE_LABEL = 'O'
ADD_BRACKETS = True
#QUERIES_PLACEHOLDER = ['we bought four shirts from the nvidia gear store in santa clara', 'Nvidia is a company', 'The Adventures of Tom Sawyer by Mark Twain is an 1876 novel about a young boy growing up along the Mississippi River',]

nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch,
                                   log_dir=None)

in_file = open('cleaned_ner_note_test.txt')
out_file = open('ner_output_test.txt', 'w+')
QUERIES_PLACEHOLDER = in_file.readlines()

labels_dict = get_vocab(LABELS_DICT)
""" Load the pretrained BERT parameters
See the list of pretrained models, call:
nemo_nlp.huggingface.BERT.list_pretrained_models()
"""
pretrained_bert_model = nemo_nlp.nm.trainables.get_huggingface_model(
    bert_config=BERT_CONFIG, pretrained_model_name=PRETRAINED_MODEL_NAME)

tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(
    tokenizer_name=TOKENIZER,
    pretrained_model_name=PRETRAINED_MODEL_NAME,
    tokenizer_model=TOKENIZER_MODEL,
)
hidden_size = pretrained_bert_model.hidden_size

data_layer = nemo_nlp.nm.data_layers.BertTokenClassificationInferDataLayer(