def run_config(config): params = Params(json.loads(config)) params_copy = params.duplicate() if 'dataset_reader' in params: reader = DatasetReader.from_params(params.pop('dataset_reader')) else: raise RuntimeError('`dataset_reader` section is required') all_instances = [] if 'train_data_path' in params: print('Reading the training data...') train_data = reader.read(params.pop('train_data_path')) all_instances.extend(train_data) else: raise RuntimeError('`train_data_path` section is required') validation_data = None if 'validation_data_path' in params: print('Reading the validation data...') validation_data = reader.read(params.pop('validation_data_path')) all_instances.extend(validation_data) print('Building the vocabulary...') vocab = Vocabulary.from_instances(all_instances) model = None iterator = None if 'model' not in params: # 'dataset' mode — just preview the (first 10) instances print('Showing the first 10 instances:') for inst in all_instances[:10]: print(inst) else: model = Model.from_params(vocab=vocab, params=params.pop('model')) loader_params = deepcopy(params.pop("data_loader")) train_data_loader = DataLoader.from_params(dataset=train_data, params=loader_params) dev_data_loader = DataLoader.from_params(dataset=validation_data, params=loader_params) train_data.index_with(vocab) # set up a temporary, empty directory for serialization with tempfile.TemporaryDirectory() as serialization_dir: trainer = Trainer.from_params( model=model, serialization_dir=serialization_dir, data_loader=train_data_loader, validation_data_loader=dev_data_loader, params=params.pop('trainer')) trainer.train() return { 'params': params_copy, 'dataset_reader': reader, 'vocab': vocab, 'iterator': iterator, 'model': model }
def run_config(config): params = Params(json.loads(config)) params_copy = params.duplicate() if "dataset_reader" in params: reader = DatasetReader.from_params(params.pop("dataset_reader")) else: raise RuntimeError("`dataset_reader` section is required") loader_params = params.pop("data_loader") train_data_loader = DataLoader.from_params( reader=reader, data_path=params.pop("train_data_path"), params=loader_params.duplicate(), ) dev_data_loader = DataLoader.from_params( reader=reader, data_path=params.pop("validation_data_path"), params=loader_params, ) print("Building the vocabulary...") vocab = Vocabulary.from_instances(train_data_loader.iter_instances()) if "model" not in params: # 'dataset' mode — just preview the (first 10) instances print("Showing the first 10 instances:") for inst in train_data_loader.iter_instances(): print(inst) return None model = Model.from_params(vocab=vocab, params=params.pop("model")) train_data_loader.index_with(vocab) dev_data_loader.index_with(vocab) # set up a temporary, empty directory for serialization with tempfile.TemporaryDirectory() as serialization_dir: trainer = Trainer.from_params( model=model, serialization_dir=serialization_dir, data_loader=train_data_loader, validation_data_loader=dev_data_loader, params=params.pop("trainer"), ) trainer.train() return { "params": params_copy, "dataset_reader": reader, "vocab": vocab, "model": model, }
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger("allennlp.common.params").disabled = True logging.getLogger("allennlp.nn.initializers").disabled = True logging.getLogger("allennlp.modules.token_embedders.embedding").setLevel( logging.INFO) # Load from archive archive = load_archive( args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides, ) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop("validation_dataset_reader", None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params( validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params( config.pop("dataset_reader")) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) embedding_sources = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances(instances=instances) model.extend_embedder_vocab(embedding_sources) instances.index_with(model.vocab) data_loader_params = config.pop("validation_data_loader", None) if data_loader_params is None: data_loader_params = config.pop("data_loader") if args.batch_size: data_loader_params["batch_size"] = args.batch_size data_loader = DataLoader.from_params(dataset=instances, params=data_loader_params) metrics = evaluate(model, data_loader, args.cuda_device, args.batch_weight_key) logger.info("Finished evaluating.") dump_metrics(args.output_file, metrics, log=True) return metrics
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: common_logging.FILE_FRIENDLY_LOGGING = args.file_friendly_logging # Disable some of the more verbose logging statements logging.getLogger("allennlp.common.params").disabled = True logging.getLogger("allennlp.nn.initializers").disabled = True logging.getLogger("allennlp.modules.token_embedders.embedding").setLevel( logging.INFO) # Load from archive archive = load_archive( args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides, ) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data dataset_reader = archive.validation_dataset_reader evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) data_loader_params = config.pop("validation_data_loader", None) if data_loader_params is None: data_loader_params = config.pop("data_loader") if args.batch_size: data_loader_params["batch_size"] = args.batch_size data_loader = DataLoader.from_params(params=data_loader_params, reader=dataset_reader, data_path=evaluation_data_path) embedding_sources = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances( instances=data_loader.iter_instances()) model.extend_embedder_vocab(embedding_sources) data_loader.index_with(model.vocab) metrics = evaluate( model, data_loader, args.cuda_device, args.batch_weight_key, output_file=args.output_file, predictions_output_file=args.predictions_output_file, ) logger.info("Finished evaluating.") return metrics
def setUp(self): super().setUp() param_file = self.FIXTURES_ROOT / "simple_tagger" / "experiment_with_regularization.json" self.set_up_model(param_file, self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv") params = Params.from_file(param_file) self.reader = DatasetReader.from_params(params["dataset_reader"]) self.data_loader = DataLoader.from_params(dataset=self.instances, params=params["data_loader"]) self.trainer = Trainer.from_params( model=self.model, data_loader=self.data_loader, serialization_dir=self.TEST_DIR, params=params.get("trainer"), )
def setUp(self): super().setUp() params = Params( { "model": { "type": "simple_tagger", "text_field_embedder": { "token_embedders": {"tokens": {"type": "embedding", "embedding_dim": 5}} }, "encoder": {"type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2}, }, "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), "validation_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), "data_loader": {"batch_size": 2}, "trainer": {"cuda_device": -1, "num_epochs": 2, "optimizer": "adam"}, } ) all_datasets = datasets_from_params(params) vocab = Vocabulary.from_params( params.pop("vocabulary", {}), instances=(instance for dataset in all_datasets.values() for instance in dataset), ) model = Model.from_params(vocab=vocab, params=params.pop("model")) train_data = all_datasets["train"] train_data.index_with(vocab) data_loader = DataLoader.from_params(dataset=train_data, params=params.pop("data_loader")) trainer_params = params.pop("trainer") serialization_dir = os.path.join(self.TEST_DIR, "test_search_learning_rate") self.trainer = TrainerBase.from_params( model=model, serialization_dir=serialization_dir, data_loader=data_loader, train_data=train_data, params=trainer_params, validation_data=None, validation_iterator=None, )
def benchmark_xlmr_mdl(): from allennlp.data import DataLoader from allennlp.training.util import evaluate xlmr = load_xlmr_coref_model() data_loader_params = xlmr.config.pop("data_loader") instances = xlmr.dataset_reader.load_dataset(testset) instances.index_with(xlmr.model.vocab) data_loader = DataLoader.from_params(dataset=instances, params=data_loader_params) start = time.time() metrics = evaluate(xlmr.model, data_loader) print('**XLM-R model**') print_speed_performance(start, num_sentences, num_tokens) print('Precision : ', metrics['coref_precision']) print('Recall : ', metrics['coref_recall']) print('F1 : ', metrics['coref_f1']) print('Mention Recall : ', metrics['mention_recall'])
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger("allennlp.common.params").disabled = True logging.getLogger("allennlp.nn.initializers").disabled = True logging.getLogger("transformers.modeling_utils").disabled = True logging.getLogger("transformers.tokenization_utils").disabled = True logging.getLogger("transformers.configuration_utils").disabled = True logging.basicConfig(level=logging.INFO) # Load from archive archive = load_archive( args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides, ) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop("validation_dataset_reader", None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params(validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params(config.pop("dataset_reader")) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) embedding_sources = ( json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {} ) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances(instances=instances) model.extend_embedder_vocab(embedding_sources) instances.index_with(model.vocab) data_loader_params = config.pop("validation_data_loader", None) if data_loader_params is None: data_loader_params = config.pop("data_loader") if args.batch_size: data_loader_params["batch_size"] = args.batch_size data_loader = DataLoader.from_params(dataset=instances, params=data_loader_params) if "iter_norm" in dir(model.text_field_embedder._token_embedders['tokens']): iter_num = model.text_field_embedder._token_embedders['tokens'].iter_norm else: iter_num = None if iter_num: # Obtrain evaluation info for iterative normalization: iter_mean_eval = [] for iter_norm_i in range(iter_num): logging.info("This is the {} time during iterative normalization for evaluation".format(iter_norm_i)) mean, embeddings = get_iter_norm_mean_eval(model, data_loader, iter_mean_eval, args.cuda_device) logger.info("The degree of isotropy of vectors is {} ".format(degree_anisotropy(embeddings.t(), args.cuda_device))) iter_mean_eval.append(mean) model.text_field_embedder._token_embedders['tokens'].iter_norm = None model.text_field_embedder._token_embedders['tokens']._matched_embedder.mean_emb_eval = iter_mean_eval model.text_field_embedder._token_embedders['tokens']._matched_embedder.is_train = False metrics = evaluate(model, data_loader, args.cuda_device, args.batch_weight_key) logger.info("Finished evaluating.") dump_metrics(args.output_file, metrics, log=True) return metrics
def ensure_model_can_train_save_and_load( self, param_file: Union[PathLike, str], tolerance: float = 1e-4, cuda_device: int = -1, gradients_to_ignore: Set[str] = None, overrides: str = "", metric_to_check: str = None, metric_terminal_value: float = None, metric_tolerance: float = 1e-4, disable_dropout: bool = True, ): save_dir = self.TEST_DIR / "save_and_load_test" archive_file = save_dir / "model.tar.gz" model = train_model_from_file(param_file, save_dir, overrides=overrides) metrics_file = save_dir / "metrics.json" if metric_to_check is not None: metric_value = metrics.get(f"best_validation_{metric_to_check}") or metrics.get( f"training_{metric_to_check}" ) assert metric_value is not None, f"Cannot find {metric_to_check} in metrics.json file" assert metric_terminal_value is not None, "Please specify metric terminal value" assert abs(metric_value - metric_terminal_value) < metric_tolerance loaded_model = load_archive(archive_file, cuda_device=cuda_device).model assert state_keys == loaded_state_keys for key in state_keys: assert_allclose( model.state_dict()[key].cpu().numpy(), loaded_model.state_dict()[key].cpu().numpy(), err_msg=key, ) params = Params.from_file(param_file, params_overrides=overrides) reader = DatasetReader.from_params(params["dataset_reader"]) print("Reading with original model") model_dataset = reader.read(params["validation_data_path"]) print("Reading with loaded model") loaded_dataset = reader.read(params["validation_data_path"]) data_loader_params = params["data_loader"] data_loader_params["shuffle"] = False data_loader_params2 = Params(copy.deepcopy(data_loader_params.as_dict())) data_loader2 = DataLoader.from_params(dataset=loaded_dataset, params=data_loader_params2) model_batch = next(iter(data_loader)) loaded_batch = next(iter(data_loader2)) self.check_model_computes_gradients_correctly( model, model_batch, gradients_to_ignore, disable_dropout ) for key in model_batch.keys(): self.assert_fields_equal(model_batch[key], loaded_batch[key], key, 1e-6) for model_ in [model, loaded_model]: for module in model_.modules(): if hasattr(module, "stateful") and module.stateful: module.reset_states() print("Predicting with original model") model_predictions = model(**model_batch) print("Predicting with loaded model") loaded_model_predictions = loaded_model(**loaded_batch) self.assert_fields_equal( model_predictions[key], loaded_model_predictions[key], name=key, tolerance=tolerance )
def ensure_model_can_train_save_and_load( self, param_file: str, tolerance: float = 1e-4, cuda_device: int = -1, gradients_to_ignore: Set[str] = None, overrides: str = "", metric_to_check: str = None, metric_terminal_value: float = None, metric_tolerance: float = 1e-4, disable_dropout: bool = True, ): """ # Parameters param_file : `str` Path to a training configuration file that we will use to train the model for this test. tolerance : `float`, optional (default=1e-4) When comparing model predictions between the originally-trained model and the model after saving and loading, we will use this tolerance value (passed as `rtol` to `numpy.testing.assert_allclose`). cuda_device : `int`, optional (default=-1) The device to run the test on. gradients_to_ignore : `Set[str]`, optional (default=None) This test runs a gradient check to make sure that we're actually computing gradients for all of the parameters in the model. If you really want to ignore certain parameters when doing that check, you can pass their names here. This is not recommended unless you're `really` sure you don't need to have non-zero gradients for those parameters (e.g., some of the beam search / state machine models have infrequently-used parameters that are hard to force the model to use in a small test). overrides : `str`, optional (default = "") A JSON string that we will use to override values in the input parameter file. metric_to_check: `str`, optional (default = None) We may want to automatically perform a check that model reaches given metric when training (on validation set, if it is specified). It may be useful in CI, for example. You can pass any metric that is in your model returned metrics. metric_terminal_value: `str`, optional (default = None) When you set `metric_to_check`, you need to set the value this metric must converge to metric_tolerance: `float`, optional (default=1e-4) Tolerance to check you model metric against metric terminal value. One can expect some variance in model metrics when the training process is highly stochastic. disable_dropout : `bool`, optional (default = True) If True we will set all dropout to 0 before checking gradients. (Otherwise, with small datasets, you may get zero gradients because of unlucky dropout.) """ save_dir = self.TEST_DIR / "save_and_load_test" archive_file = save_dir / "model.tar.gz" model = train_model_from_file(param_file, save_dir, overrides=overrides) metrics_file = save_dir / "metrics.json" if metric_to_check is not None: metrics = json.loads(metrics_file.read_text()) metric_value = metrics.get( f"best_validation_{metric_to_check}") or metrics.get( f"training_{metric_to_check}") assert metric_value is not None, f"Cannot find {metric_to_check} in metrics.json file" assert metric_terminal_value is not None, "Please specify metric terminal value" assert abs(metric_value - metric_terminal_value) < metric_tolerance loaded_model = load_archive(archive_file, cuda_device=cuda_device).model state_keys = model.state_dict().keys() loaded_state_keys = loaded_model.state_dict().keys() assert state_keys == loaded_state_keys # First we make sure that the state dict (the parameters) are the same for both models. for key in state_keys: assert_allclose( model.state_dict()[key].cpu().numpy(), loaded_model.state_dict()[key].cpu().numpy(), err_msg=key, ) params = Params.from_file(param_file, params_overrides=overrides) reader = DatasetReader.from_params(params["dataset_reader"]) print("Reading with original model") model_dataset = reader.read(params["validation_data_path"]) model_dataset.index_with(model.vocab) print("Reading with loaded model") loaded_dataset = reader.read(params["validation_data_path"]) loaded_dataset.index_with(loaded_model.vocab) # Need to duplicate params because DataLoader.from_params will consume. data_loader_params = params["data_loader"] data_loader_params["shuffle"] = False data_loader_params2 = Params( copy.deepcopy(data_loader_params.as_dict())) data_loader = DataLoader.from_params(dataset=model_dataset, params=data_loader_params) data_loader2 = DataLoader.from_params(dataset=loaded_dataset, params=data_loader_params2) # We'll check that even if we index the dataset with each model separately, we still get # the same result out. model_batch = next(iter(data_loader)) loaded_batch = next(iter(data_loader2)) # Check gradients are None for non-trainable parameters and check that # trainable parameters receive some gradient if they are trainable. self.check_model_computes_gradients_correctly(model, model_batch, gradients_to_ignore, disable_dropout) # The datasets themselves should be identical. assert model_batch.keys() == loaded_batch.keys() for key in model_batch.keys(): self.assert_fields_equal(model_batch[key], loaded_batch[key], key, 1e-6) # Set eval mode, to turn off things like dropout, then get predictions. model.eval() loaded_model.eval() # Models with stateful RNNs need their states reset to have consistent # behavior after loading. for model_ in [model, loaded_model]: for module in model_.modules(): if hasattr(module, "stateful") and module.stateful: module.reset_states() print("Predicting with original model") model_predictions = model(**model_batch) print("Predicting with loaded model") loaded_model_predictions = loaded_model(**loaded_batch) # Check loaded model's loss exists and we can compute gradients, for continuing training. loaded_model_loss = loaded_model_predictions["loss"] assert loaded_model_loss is not None loaded_model_loss.backward() # Both outputs should have the same keys and the values for these keys should be close. for key in model_predictions.keys(): self.assert_fields_equal(model_predictions[key], loaded_model_predictions[key], name=key, tolerance=tolerance) return model, loaded_model
def find_learning_rate_model( params: Params, serialization_dir: str, start_lr: float = 1e-5, end_lr: float = 10, num_batches: int = 100, linear_steps: bool = False, stopping_factor: float = None, force: bool = False, ) -> None: """ Runs learning rate search for given `num_batches` and saves the results in ``serialization_dir`` # Parameters params : `Params` A parameter object specifying an AllenNLP Experiment. serialization_dir : `str` The directory in which to save results. start_lr : `float` Learning rate to start the search. end_lr : `float` Learning rate upto which search is done. num_batches : `int` Number of mini-batches to run Learning rate finder. linear_steps : `bool` Increase learning rate linearly if False exponentially. stopping_factor : `float` Stop the search when the current loss exceeds the best loss recorded by multiple of stopping factor. If `None` search proceeds till the `end_lr` force : `bool` If True and the serialization directory already exists, everything in it will be removed prior to finding the learning rate. """ create_serialization_dir(params, serialization_dir, recover=False, force=force) prepare_environment(params) cuda_device = params.params.get("trainer").get("cuda_device", -1) check_for_gpu(cuda_device) distributed_params = params.params.get("distributed") # See https://github.com/allenai/allennlp/issues/3658 assert not distributed_params, "find-lr is not compatible with DistributedDataParallel." all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set( params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError( f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info( "From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation), ) vocab = Vocabulary.from_params( params.pop("vocabulary", {}), instances=(instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation), ) train_data = all_datasets["train"] train_data.index_with(vocab) model = Model.from_params(vocab=vocab, params=params.pop("model")) data_loader = DataLoader.from_params(dataset=train_data, params=params.pop("data_loader")) trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) trainer_choice = trainer_params.pop("type", "gradient_descent") if trainer_choice != "gradient_descent": raise ConfigurationError( "currently find-learning-rate only works with the GradientDescentTrainer" ) trainer: GradientDescentTrainer = Trainer.from_params( # type: ignore model=model, serialization_dir=serialization_dir, data_loader=data_loader, params=trainer_params, ) logger.info( f"Starting learning rate search from {start_lr} to {end_lr} in {num_batches} iterations." ) learning_rates, losses = search_learning_rate( trainer, start_lr=start_lr, end_lr=end_lr, num_batches=num_batches, linear_steps=linear_steps, stopping_factor=stopping_factor, ) logger.info("Finished learning rate search.") losses = _smooth(losses, 0.98) _save_plot(learning_rates, losses, os.path.join(serialization_dir, "lr-losses.png"))
def data_loaders_from_params( params: Params, train: bool = True, validation: bool = True, test: bool = True, serialization_dir: Optional[Union[str, PathLike]] = None, ) -> Dict[str, DataLoader]: """ Instantiate data loaders specified by the config. """ data_loaders: Dict[str, DataLoader] = {} train = train and ("train_data_path" in params) validation = validation and ("validation_data_path" in params) test = test and ("test_data_path" in params) if not any((train, validation, test)): # Return early so don't unnecessarily initialize the train data reader. return data_loaders dataset_reader_params = params.pop("dataset_reader") dataset_reader = DatasetReader.from_params( dataset_reader_params, serialization_dir=serialization_dir) data_loader_params = params.pop("data_loader") if train: train_data_path = params.pop("train_data_path") logger.info("Reading training data from %s", train_data_path) data_loaders["train"] = DataLoader.from_params( data_loader_params.duplicate(), reader=dataset_reader, data_path=train_data_path) if not validation and not test: # Return early so we don't unnecessarily initialize the validation/test data # reader. return data_loaders validation_and_test_dataset_reader: DatasetReader = dataset_reader validation_dataset_reader_params = params.pop("validation_dataset_reader", None) if validation_dataset_reader_params is not None: logger.info( "Using a separate dataset reader to load validation and test data." ) validation_and_test_dataset_reader = DatasetReader.from_params( validation_dataset_reader_params, serialization_dir=serialization_dir) validation_data_loader_params = params.pop("validation_data_loader", data_loader_params) if validation: validation_data_path = params.pop("validation_data_path") logger.info("Reading validation data from %s", validation_data_path) data_loaders["validation"] = DataLoader.from_params( validation_data_loader_params.duplicate(), reader=validation_and_test_dataset_reader, data_path=validation_data_path, ) if test: test_data_path = params.pop("test_data_path") logger.info("Reading test data from %s", test_data_path) data_loaders["test"] = DataLoader.from_params( validation_data_loader_params, reader=validation_and_test_dataset_reader, data_path=test_data_path, ) return data_loaders
tempdir = tempfile.mkdtemp() with tarfile.open(resolved_archive_file, "r:gz") as archive: archive.extractall(tempdir) atexit.register(_cleanup_archive_dir, tempdir) serialization_dir = tempdir config = Params.from_file(os.path.join(serialization_dir, "config.json"), "") model = SemanticRoleLabeler.from_archive(args.archive_file) archive = Archive(model=model, config=config) prepare_environment(config) model.eval() validation_dataset_reader_params = config.pop("validation_dataset_reader", None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params( validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params(config.pop("dataset_reader")) instances = dataset_reader.read(args.evaluation_data_path) instances.index_with(model.vocab) data_loader_params = config.pop("validation_data_loader", None) if data_loader_params is None: data_loader_params = config.pop("data_loader") data_loader = DataLoader.from_params(dataset=instances, params=data_loader_params) metrics = evaluate(model, data_loader, -1, "") dump_metrics(args.output_file, metrics)
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: common_logging.FILE_FRIENDLY_LOGGING = args.file_friendly_logging # Disable some of the more verbose logging statements logging.getLogger("allennlp.common.params").disabled = True logging.getLogger("allennlp.nn.initializers").disabled = True logging.getLogger("allennlp.modules.token_embedders.embedding").setLevel( logging.INFO) # Load from archive archive = load_archive( args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides, ) config = deepcopy(archive.config) prepare_environment(config) model = archive.model model.eval() # Load the evaluation data dataset_reader = archive.validation_dataset_reader # split files evaluation_data_path_list = args.input_file.split(":") if args.output_file is not None: output_file_list = args.output_file.split(":") assert len(output_file_list) == len( evaluation_data_path_list ), "The number of `output_file` paths must be equal to the number of datasets being evaluated." if args.predictions_output_file is not None: predictions_output_file_list = args.predictions_output_file.split(":") assert len(predictions_output_file_list) == len( evaluation_data_path_list), ( "The number of `predictions_output_file` paths must be equal" + "to the number of datasets being evaluated. ") # output file output_file_path = None predictions_output_file_path = None # embedding sources if args.extend_vocab: logger.info("Vocabulary is being extended with embedding sources.") embedding_sources = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}) for index in range(len(evaluation_data_path_list)): config = deepcopy(archive.config) evaluation_data_path = evaluation_data_path_list[index] if args.output_file is not None: output_file_path = output_file_list[index] if args.predictions_output_file is not None: predictions_output_file_path = predictions_output_file_list[index] logger.info("Reading evaluation data from %s", evaluation_data_path) data_loader_params = config.get("validation_data_loader", None) if data_loader_params is None: data_loader_params = config.get("data_loader") if args.batch_size: data_loader_params["batch_size"] = args.batch_size data_loader = DataLoader.from_params(params=data_loader_params, reader=dataset_reader, data_path=evaluation_data_path) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances( instances=data_loader.iter_instances()) model.extend_embedder_vocab(embedding_sources) data_loader.index_with(model.vocab) metrics = evaluate( model, data_loader, args.cuda_device, args.batch_weight_key, output_file=output_file_path, predictions_output_file=predictions_output_file_path, ) logger.info("Finished evaluating.") return metrics
def test_model(self): xlmr_model = load_xlmr_coref_model() doc = [["Lotte", "arbejder", "med", "Mads", "."], ["Hun", "er", "tandlæge", "."]] # prediction preds = xlmr_model.predict(doc) self.assertEqual(preds['top_spans'], [[0, 0], [1, 3], [5, 5]]) self.assertEqual(preds['antecedent_indices'], [[0, 1, 2], [0, 1, 2], [0, 1, 2]]) self.assertEqual(preds['predicted_antecedents'], [-1, -1, 0]) self.assertEqual(preds['clusters'], [[[0, 0], [5, 5]]]) # evaluation data_loader_params = xlmr_model.config.pop("data_loader") from collections import OrderedDict sentences = [[ OrderedDict([('id', 1), ('form', 'Lotte'), ('lemma', 'Lotte'), ('upos', 'PROPN'), ('coref_rel', '(1086)'), ('doc_id', '1'), ('qid', '-')]), OrderedDict([('id', 2), ('form', 'arbejder'), ('lemma', 'arbejde'), ('upos', 'VERB'), ('coref_rel', '-'), ('doc_id', '1'), ('qid', '-')]), OrderedDict([('id', 3), ('form', 'med'), ('lemma', 'med'), ('upos', 'ADV'), ('coref_rel', '-'), ('doc_id', '1'), ('qid', '-')]), OrderedDict([('id', 4), ('form', 'Mads'), ('lemma', 'Mads'), ('upos', 'PROPN'), ('coref_rel', '(902)'), ('doc_id', '1'), ('qid', '-')]), OrderedDict([('id', 5), ('form', '.'), ('lemma', '.'), ('upos', 'PUNCT'), ('coref_rel', '-'), ('doc_id', '1'), ('qid', '-')]) ], [ OrderedDict([('id', 1), ('form', 'Hun'), ('lemma', 'hun'), ('upos', 'PRON'), ('coref_rel', '(1086)'), ('doc_id', '1'), ('qid', '-')]), OrderedDict([('id', 2), ('form', 'er'), ('lemma', 'vær'), ('upos', 'VERB'), ('coref_rel', '-'), ('doc_id', '1'), ('qid', '-')]), OrderedDict([('id', 3), ('form', 'tandlæge'), ('lemma', 'tandlæge'), ('upos', 'NOUN'), ('coref_rel', '-'), ('doc_id', '1'), ('qid', '-')]), OrderedDict([('id', 5), ('form', '.'), ('lemma', '.'), ('upos', 'PUNCT'), ('coref_rel', '-'), ('doc_id', '1'), ('qid', '-')]) ]] instances = xlmr_model.dataset_reader.load_dataset(sentences) instances.index_with(xlmr_model.model.vocab) data_loader = DataLoader.from_params(dataset=instances, params=data_loader_params) metrics = evaluate(xlmr_model.model, data_loader) self.assertEqual(metrics['coref_precision'], 1.0)
from allennlp.common.params import Params logging.basicConfig(level=logging.INFO) logger = logging.getLogger("cache_image_features.py") CONFIG = "./vilbert_vqa_from_huggingface.jsonnet" logger.info("Reading params") params = Params.from_file(CONFIG) logger.info("Instantiating validation dataset reader and data loader") validation_reader = DatasetReader.from_params( params["validation_dataset_reader"]) validation_data_loader = DataLoader.from_params( params["data_loader"].duplicate(), reader=validation_reader, data_path=params["validation_data_path"], ) for instance in validation_data_loader.iter_instances(): pass del validation_data_loader logger.info("Instantiating train dataset reader and data loader") train_reader = DatasetReader.from_params(params["dataset_reader"]) data_loader = DataLoader.from_params( params["data_loader"].duplicate(), reader=train_reader, data_path=params["train_data_path"], )