def main(cfg: DictConfig) -> None: logging.info( 'During evaluation/testing, it is currently advisable to construct a new Trainer with single GPU and \ no DDP to obtain accurate results') if not hasattr(cfg.model, 'test_ds'): raise ValueError( f'model.test_ds was not found in the config, skipping evaluation') else: gpu = 1 if cfg.trainer.gpus != 0 else 0 trainer = pl.Trainer( gpus=gpu, precision=cfg.trainer.precision, amp_level=cfg.trainer.amp_level, logger=False, checkpoint_callback=False, ) exp_dir = exp_manager(trainer, cfg.exp_manager) if not cfg.pretrained_model: raise ValueError( 'To run evaluation and inference script a pre-trained model or .nemo file must be provided.' f'Choose from {TokenClassificationModel.list_available_models()} or "pretrained_model"="your_model.nemo"' ) if os.path.exists(cfg.pretrained_model): model = TokenClassificationModel.restore_from(cfg.pretrained_model) elif cfg.pretrained_model in TokenClassificationModel.get_available_model_names( ): model = TokenClassificationModel.from_pretrained(cfg.pretrained_model) else: raise ValueError( f'Provide path to the pre-trained .nemo checkpoint or choose from {TokenClassificationModel.list_available_models()}' ) data_dir = cfg.model.dataset.get('data_dir', None) if data_dir is None: logging.error( 'No dataset directory provided. Skipping evaluation. ' 'To run evaluation on a file, specify path to the directory that contains test_ds.text_file and test_ds.labels_file with "model.dataset.data_dir" argument.' ) elif not os.path.exists(data_dir): logging.error( f'{data_dir} is not found, skipping evaluation on the test set.') else: model.update_data_dir(data_dir=data_dir) model._cfg.dataset = cfg.model.dataset if not hasattr(cfg.model, 'test_ds'): logging.error( f'model.test_ds was not found in the config, skipping evaluation' ) elif model.prepare_test(trainer): model.setup_test_data(cfg.model.test_ds) trainer.test(model) else: logging.error( 'Skipping the evaluation. The trainer is not setup properly.') model.evaluate_from_file( text_file=os.path.join(data_dir, cfg.model.test_ds.text_file), labels_file=os.path.join(data_dir, cfg.model.test_ds.labels_file), output_dir=exp_dir, add_confusion_matrix=True, normalize_confusion_matrix=True, ) # run an inference on a few examples queries = [ 'we bought four shirts from the nvidia gear store in santa clara.', 'Nvidia is a company.' ] results = model.add_predictions(queries, output_file='predictions.txt') for query, result in zip(queries, results): logging.info(f'Query : {query}') logging.info(f'Result: {result.strip()}\n') logging.info(f'Results are saved at {exp_dir}')
def main(cfg: DictConfig) -> None: trainer = pl.Trainer(**cfg.trainer) exp_dir = exp_manager(trainer, cfg.get("exp_manager", None)) do_training = True if not cfg.pretrained_model: logging.info(f'Config: {OmegaConf.to_yaml(cfg)}') model = TokenClassificationModel(cfg.model, trainer=trainer) else: logging.info(f'Loading pretrained model {cfg.pretrained_model}') model = TokenClassificationModel.from_pretrained(cfg.pretrained_model) data_dir = cfg.model.dataset.get('data_dir', None) if data_dir: # we can also do finetunining of the pretrained model but it will require # setting up train and validation Pytorch DataLoaders # setup the data dir to get class weights statistics model.update_data_dir(data_dir=data_dir) # then we're setting up loss, use model.dataset.class_balancing, # if you want to add class weights to the CrossEntropyLoss model.setup_loss(class_balancing=cfg.model.dataset.class_balancing) # finally, setup train and validation Pytorch DataLoaders model.setup_training_data() model.setup_validation_data() logging.info(f'Using config file of the pretrained model') else: do_training = False logging.info( f'Data dir should be specified for finetuning the pretrained model. ' f'Using pretrained {cfg.pretrained_model} model weights and skipping finetuning.' ) if do_training: trainer.fit(model) if cfg.model.nemo_path: model.save_to(cfg.model.nemo_path) """ After model training is done, you can use the model for inference. You can either evaluate data from a text_file that follows training data format, or provide a list of queries you want to add entities to During evaluation/testing, it is currently advisable to construct a new Trainer with single GPU and no DDP to obtain accurate results """ logging.info( 'During evaluation/testing, it is currently advisable to construct a new Trainer with single GPU ' 'and no DDP to obtain accurate results') gpu = 1 if cfg.trainer.gpus != 0 else 0 trainer = pl.Trainer(gpus=gpu) model.set_trainer(trainer) if do_training: # run evaluation on a dataset from file # only possible if model.dataset.data_dir is specified # change the path to the file you want to use for the final evaluation model.evaluate_from_file( text_file=os.path.join(cfg.model.dataset.data_dir, cfg.model.validation_ds.text_file), labels_file=os.path.join(cfg.model.dataset.data_dir, cfg.model.validation_ds.labels_file), output_dir=exp_dir, add_confusion_matrix=True, normalize_confusion_matrix=True, ) # run an inference on a few examples queries = [ 'we bought four shirts from the nvidia gear store in santa clara.', 'Nvidia is a company.' ] results = model.add_predictions(queries) for query, result in zip(queries, results): logging.info(f'Query : {query}') logging.info(f'Result: {result.strip()}\n')
def main(cfg: DictConfig) -> None: trainer = pl.Trainer(plugins=[NLPDDPPlugin()], **cfg.trainer) exp_manager(trainer, cfg.get("exp_manager", None)) if not cfg.pretrained_model: logging.info(f'Config: {OmegaConf.to_yaml(cfg)}') model = TokenClassificationModel(cfg.model, trainer=trainer) else: if os.path.exists(cfg.pretrained_model): model = TokenClassificationModel.restore_from(cfg.pretrained_model, trainer=trainer) elif cfg.pretrained_model in TokenClassificationModel.get_available_model_names( ): model = TokenClassificationModel.from_pretrained( cfg.pretrained_model) else: raise ValueError( f'Provide path to the pre-trained .nemo file or choose from {TokenClassificationModel.list_available_models()}' ) data_dir = cfg.model.dataset.get('data_dir', None) if data_dir: if not os.path.exists(data_dir): raise ValueError(f'{data_dir} is not found at') # we can also do finetuning of the pretrained model but it will require # setup the data dir to get class weights statistics model.update_data_dir(data_dir=data_dir) # finally, setup train and validation Pytorch DataLoaders model.setup_training_data() model.setup_validation_data() # then we're setting up loss, use model.dataset.class_balancing, # if you want to add class weights to the CrossEntropyLoss model.setup_loss(class_balancing=cfg.model.dataset.class_balancing) logging.info(f'Using config file of the pretrained model') else: raise ValueError( 'Specify a valid dataset directory that contains test_ds.text_file and test_ds.labels_file \ with "model.dataset.data_dir" argument') trainer.fit(model) if cfg.model.nemo_path: model.save_to(cfg.model.nemo_path) logging.info(f'The model was saved to {cfg.model.nemo_path}')