def from_params( cls, # type: ignore params: Params, serialization_dir: str, recover: bool = False) -> 'GanTestTrainer': dataset_reader = DatasetReader.from_params(params.pop("data_reader")) data = dataset_reader.read("") noise_reader = DatasetReader.from_params(params.pop("noise_reader")) noise = noise_reader.read("") generator = Model.from_params(params.pop("generator")) discriminator = Model.from_params(params.pop("discriminator")) iterator = DataIterator.from_params(params.pop("iterator")) noise_iterator = DataIterator.from_params(params.pop("noise_iterator")) generator_optimizer = Optimizer.from_params( [[n, p] for n, p in generator.named_parameters() if p.requires_grad], params.pop("generator_optimizer")) discriminator_optimizer = Optimizer.from_params( [[n, p] for n, p in discriminator.named_parameters() if p.requires_grad], params.pop("discriminator_optimizer")) num_epochs = params.pop_int("num_epochs") batches_per_epoch = params.pop_int("batches_per_epoch") params.pop("trainer") params.assert_empty(__name__) return cls(serialization_dir, data, noise, generator, discriminator, iterator, noise_iterator, generator_optimizer, discriminator_optimizer, batches_per_epoch, num_epochs)
def _create_iterator(params: Params) -> DataIterator: val_iterr_params = params.pop("validation_iterator", None) if val_iterr_params is None: raise ValueError("Config file should have validation_iterator") return DataIterator.from_params(val_iterr_params)
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel( logging.INFO) # Load from archive archive = load_archive(args.archive_file, args.cuda_device, args.overrides) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data dataset_reader = DatasetReader.from_params(config.pop('dataset_reader')) evaluation_data_path = args.evaluation_data_file logger.info("Reading evaluation data from %s", evaluation_data_path) dataset = dataset_reader.read(evaluation_data_path) dataset.index_instances(model.vocab) iterator = DataIterator.from_params(config.pop("iterator")) metrics = evaluate(model, dataset, iterator, args.cuda_device) logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric) return metrics
def get_trainer_from_config( config: Params, train_instances: List[Instance], val_instances: List[Instance], device: int, serialization_dir: Optional[str] = None) -> Trainer: trainer_params = config.pop("trainer") trainer_params["cuda_device"] = device model_params = config.pop("model") vocab_dir = config.pop("vocab_dir", None) if vocab_dir is None: vocab = Vocabulary.from_instances(train_instances) else: vocab = Vocabulary.from_files(vocab_dir) model = Model.from_params(model_params, vocab=vocab) iterator = DataIterator.from_params(config.pop("iterator")) trainer_params["num_serialized_models_to_keep"] = 1 iterator.index_with(vocab) trainer = Trainer.from_params(model=model, iterator=iterator, train_data=train_instances, validation_data=val_instances, serialization_dir=serialization_dir, params=trainer_params) return trainer
def _evaluate_nn(self, model_path: str, evaluation_data_file: str, cuda_device: int): """ :param model_path: :param evaluation_data_file: :param cuda_device: :return: """ # import allennlp ontoemma classes (to register -- necessary, do not remove) from emma.allennlp_classes.ontoemma_dataset_reader import OntologyMatchingDatasetReader from emma.allennlp_classes.ontoemma_model import OntoEmmaNN # Load from archive archive = load_archive(model_path, cuda_device) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data dataset_reader = DatasetReader.from_params( config.pop('dataset_reader')) evaluation_data_path = evaluation_data_file dataset = dataset_reader.read(evaluation_data_path) # compute metrics dataset.index_instances(model.vocab) iterator = DataIterator.from_params(config.pop("iterator")) metrics = evaluate_allennlp(model, dataset, iterator, cuda_device) return metrics
def evaluate_from_args(archive_path, overrides=None): """Evaluate on test data.""" # Load from archive device = 0 archive = load_archive(archive_path, device, overrides) config = archive.config prepare_environment(config) model = archive.model model.eval() corpus = Corpus.from_params(config.pop('corpus')) iterator_params = config.pop('validation_iterator', None) if not iterator_params: iterator_params = config.pop('iterator', None) iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) try: evaluate_dataset(corpus.valid, 'validation', model, iterator, device, archive_path) evaluate_dataset(corpus.test, 'test', model, iterator, device, archive_path) except (KeyboardInterrupt) as e: logger.warning(f'Evaluation is interrupted due to {e}. Exiting.')
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger("allennlp.common.params").disabled = True logging.getLogger("allennlp.nn.initializers").disabled = True logging.getLogger("allennlp.modules.token_embedders.embedding").setLevel( logging.INFO) # Load from archive archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop("validation_dataset_reader", None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params( validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params( config.pop("dataset_reader")) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) embedding_sources: Dict[str, str] = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances(Params({}), instances=instances) model.extend_embedder_vocab(embedding_sources) iterator_params = config.pop("validation_iterator", None) if iterator_params is None: iterator_params = config.pop("iterator") if args.batch_size: iterator_params["batch_size"] = args.batch_size iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) metrics = evaluate(model, instances, iterator, args.cuda_device, args.batch_weight_key) logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric) output_file = args.output_file if output_file: with open(output_file, "w") as file: json.dump(metrics, file, indent=4) return metrics
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO) # Load parameter file with open(args.config_file) as config_file: config = Params(replace_none(json.loads(config_file.read()))) model = Model.load(config, weights_file=args.weights_file, cuda_device=args.cuda_device) model.eval() vocab = model._vocab # pylint: disable=protected-access # Load the evaluation data dataset_reader = DatasetReader.from_params(config.pop('dataset_reader')) evaluation_data_path = args.evaluation_data_file logger.info("Reading evaluation data from %s", evaluation_data_path) dataset = dataset_reader.read(evaluation_data_path) dataset.index_instances(vocab) iterator = DataIterator.from_params(config.pop("iterator")) metrics = evaluate(model, dataset, iterator, args.cuda_device) logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric) return metrics
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO) # Load from archive archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params(validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params(config.pop('dataset_reader')) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) iterator_params = config.pop("validation_iterator", None) if iterator_params is None: iterator_params = config.pop("iterator") iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab)
def evaluate_from_file(archive_path, model_path, overrides=None, eval_suffix='', device=0): if archive_path.endswith('gz'): archive = load_archive(archive_path, device, overrides) config = archive.config prepare_environment(config) model = archive.model serialization_dir = os.path.dirname(archive_path) elif archive_path.endswith('yaml'): config = yaml_to_params(archive_path, overrides) prepare_environment(config) config_dir = os.path.dirname(archive_path) serialization_dir = os.path.join(config_dir, 'serialization') all_datasets = datasets_from_params(config) # We want to create the vocab from scratch since it might be of a # different type. Vocabulary.from_files will always create the base # Vocabulary instance. # if os.path.exists(os.path.join(serialization_dir, "vocabulary")): # vocab_path = os.path.join(serialization_dir, "vocabulary") # vocab = Vocabulary.from_files(vocab_path) vocab = Vocabulary.from_params(config.pop('vocabulary')) model = Model.from_params(vocab=vocab, params=config.pop('model')) if model_path: best_model_state = torch.load(model_path) model.load_state_dict(best_model_state) instances = all_datasets.get('test') iterator = DataIterator.from_params(config.pop("validation_iterator")) iterator.index_with(model.vocab) model.eval().to(device) model.evaluate_mode = True metrics = evaluate(model, instances, iterator, device, serialization_dir, eval_suffix, batch_weight_key='') logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric) output_file = os.path.join(serialization_dir, f"evaluate-metrics{eval_suffix}.json") if output_file: with open(output_file, "w") as file: json.dump(metrics, file, indent=4) return metrics
def load_model(url, batch_size=BATCH_SIZE): archive = load_archive(url, cuda_device=CUDA_ID) model = archive.model reader = DatasetReader.from_params(archive.config["dataset_reader"]) iterator_params = archive.config["iterator"] iterator_params["batch_size"] = batch_size data_iterator = DataIterator.from_params(iterator_params) data_iterator.index_with(model.vocab) return model, reader, data_iterator
def test_model_decode(self): params = Params.from_file(self.param_file) iterator_params = params["iterator"] iterator = DataIterator.from_params(iterator_params) iterator.index_with(self.model.vocab) model_batch = next(iterator(self.dataset, shuffle=False)) self.model.training = False forward_output = self.model(**model_batch) decode_output = self.model.decode(forward_output) assert "predicted_actions" in decode_output
def setUp(self): super().setUp() param_file = self.FIXTURES_ROOT / "simple_tagger" / "experiment_with_regularization.json" self.set_up_model(param_file, self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv") params = Params.from_file(param_file) self.reader = DatasetReader.from_params(params["dataset_reader"]) self.iterator = DataIterator.from_params(params["iterator"]) self.trainer = Trainer.from_params(self.model, self.TEST_DIR, self.iterator, self.dataset, None, params.get("trainer"))
def setUp(self): super().setUp() param_file = self.FIXTURES_ROOT / 'simple_tagger' / 'experiment_with_regularization.json' self.set_up_model(param_file, self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv') params = Params.from_file(param_file) self.reader = DatasetReader.from_params(params['dataset_reader']) self.iterator = DataIterator.from_params(params['iterator']) self.trainer = Trainer.from_params(self.model, self.TEST_DIR, self.iterator, self.dataset, None, params.get('trainer'))
def test_multi_iterator(self): params, file_paths = get_dataset_params_paths(['ner', 'ccg']) multitask_reader = DatasetReader.from_params(params) dataset = multitask_reader.read(file_paths) iterator_params = Params({ "type": "multitask_iterator", "iterators": { "ner": { "type": "bucket", "sorting_keys": [["tokens", "num_tokens"]], "padding_noise": 0.0, "batch_size": 2 }, "ccg": { "type": "basic", "batch_size": 1 } }, "names_to_index": ["ner", "ccg"], }) multi_iterator = DataIterator.from_params(iterator_params) # make the vocab vocab = Vocabulary.from_params(Params({}), (instance for instance in dataset)) multi_iterator.index_with(vocab) all_batches = [] for epoch in range(2): all_batches.append([]) for batch in multi_iterator(dataset, shuffle=True, num_epochs=1): all_batches[-1].append(batch) # 3 batches per epoch - self.assertEqual([len(b) for b in all_batches], [3, 3]) ner_batches = [] ccg_batches = [] for epoch_batches in all_batches: ner_batches.append(0) ccg_batches.append(0) for batch in epoch_batches: if 'original_pos_tags' not in batch: ner_batches[-1] += 1 if 'original_pos_tags' in batch: ccg_batches[-1] += 1 # 1 NER batch per epoch, 2 CCG per epoch self.assertEqual(ner_batches, [1, 1]) self.assertEqual(ccg_batches, [2, 2])
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel( logging.INFO) # Load model from archive model_archive = load_archive(args.model_archive_file, args.cuda_device, args.overrides, args.weights_file) config = model_archive.config prepare_environment(config) model = model_archive.model model.eval() # Load sampler sampler_archive = load_archive(args.sampler_archive_file, args.cuda_device, args.overrides, args.weights_file) sampler = sampler_archive.model sampler.eval() # Load the evaluation data. NOTE: We are using the model's reader! validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params( validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params( config.pop('dataset_reader')) evaluation_data_path = args.input_file logger.info('Reading evaluation data from: %s', evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) # To avoid hairy issues with splitting, we opt to use a basic iterator so that we can # generate samples for entire sequences. iterator_params = config.pop('iterator', 'None') iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) # iterator.eval() metrics = evaluate_perplexity(model, sampler, args.num_samples, instances, iterator, args.cuda_device) logger.info('Finished evaluating.') logger.info('Metrics:') for key, metric in metrics.items(): logger.info('%s: %s', key, metric) output_file = args.output_file if output_file: with open(output_file, 'w') as f: json.dump(metrics, f, indent=4) return metrics
def evaluate_from_args(args): # Disable some of the more verbose logging statements logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel( logging.INFO) # Load from archive # do not path any value for default one. if args.unziped_archive_directory != "default": if args.elmo == True: model, config = _load_elmo(args.unziped_archive_directory, args.archive_file, weights_file=None, cuda_device=args.cuda_device) else: model, config = _load(args.unziped_archive_directory, weights_file=None, cuda_device=args.cuda_device) else: archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop('validation_dataset_reader', None) dataset_reader = SquadReaderEval.from_params(config.pop('dataset_reader')) evaluation_data_path = args.evaluation_data_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) iterator = DataIterator.from_params(config.pop("iterator")) iterator.index_with(model.vocab) metrics = evaluate(model, instances, iterator, args.cuda_device) logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric) return metrics
def create_and_set_iterators(params: Params, task_list: List[Task], vocab: Vocabulary) -> List[Task]: ''' Each task/dataset can have its own specific data iterator. If not precised, we use a shared/common data iterator. Parameters ---------- params: ``Params`` A parameter object specifing an experiment. task_list: ``List[Task]`` A list containing the tasks of the model to train. Returns ------- task_list: ``List[Task]`` The list containing the tasks of the model to train, where each task has a new attribute: the data iterator. ''' ### Charge default iterator ### iterators_params = params.pop("iterators") default_iterator_params = iterators_params.pop("iterator") default_iterator = DataIterator.from_params(default_iterator_params) default_iterator.index_with(vocab) ### Charge dataset specific iterators ### for task in task_list: specific_iterator_params = iterators_params.pop( "iterator_" + task._name, None) if specific_iterator_params is not None: specific_iterator = DataIterator.from_params( specific_iterator_params) specific_iterator.index_with(vocab) task.set_data_iterator(specific_iterator) else: task.set_data_iterator(default_iterator) return task_list
def evaluate_from_args(args): # Disable some of the more verbose logging statements logging.getLogger(u'allennlp.common.params').disabled = True logging.getLogger(u'allennlp.nn.initializers').disabled = True logging.getLogger(u'allennlp.modules.token_embedders.embedding').setLevel( logging.INFO) # Load from archive archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop(u'validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params( validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params( config.pop(u'dataset_reader')) evaluation_data_path = args.input_file logger.info(u"Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) iterator_params = config.pop(u"validation_iterator", None) if iterator_params is None: iterator_params = config.pop(u"iterator") iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) metrics = evaluate(model, instances, iterator, args.cuda_device) logger.info(u"Finished evaluating.") logger.info(u"Metrics:") for key, metric in list(metrics.items()): logger.info(u"%s: %s", key, metric) output_file = args.output_file if output_file: with open(output_file, u"w") as file: json.dump(metrics, file, indent=4) return metrics
def load_iterator(name: str = 'validation_iterator', serialization_dir: str = None, config: Params = None, overrides_dict: Dict = None) -> DataIterator: if config is None: if serialization_dir is None: raise ValueError config = load_config(serialization_dir, overrides_dict=overrides_dict) val_iterr_params = config.pop(name, None) if val_iterr_params is None: raise ValueError("Config file should have {}".format(name)) return DataIterator.from_params(val_iterr_params)
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel( logging.INFO) # Load from archive archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file) config = archive.config prepare_environment(config) model = archive.model model.eval() # Set the model to error analysis mode model.error_analysis = True # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params( validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params( config.pop('dataset_reader')) evaluation_data_path = args.evaluation_data_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) iterator = DataIterator.from_params(config.pop("iterator")) iterator.index_with(model.vocab) metrics = evaluate(model, instances, iterator, args.cuda_device) logger.info("Finished evaluating.") print("All Metrics") print("=" * 79) for key, metric in metrics.items(): print("{}\t{}".format(key, metric)) # Turn off error analysis mode model.error_analysis = False return metrics
def setUp(self): super().setUp() param_file = self.FIXTURES_ROOT / 'simple_tagger' / 'experiment_with_regularization.json' self.set_up_model(param_file, self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv') params = Params.from_file(param_file) self.reader = DatasetReader.from_params(params['dataset_reader']) self.iterator = DataIterator.from_params(params['iterator']) self.trainer = Trainer.from_params( self.model, self.TEST_DIR, self.iterator, self.dataset, None, params.get('trainer') )
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel( logging.INFO) # Import any additional modules needed (to register custom classes) for package_name in args.include_package: import_submodules(package_name) # Load from archive archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params( validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params( config.pop('dataset_reader')) evaluation_data_path = args.evaluation_data_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) iterator = DataIterator.from_params(config.pop("iterator")) iterator.index_with(model.vocab) metrics = evaluate(model, instances, iterator, args.cuda_device) logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric) return metrics
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO) # Load from archive archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params(validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params(config.pop('dataset_reader')) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) iterator_params = config.pop("validation_iterator", None) if iterator_params is None: iterator_params = config.pop("iterator") iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) metrics = evaluate(model, instances, iterator, args.cuda_device, args.batch_weight_key) logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric) output_file = args.output_file if output_file: with open(output_file, "w") as file: json.dump(metrics, file, indent=4) return metrics
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel( logging.INFO) # Load from archive torch.cuda.set_device(args.cuda_device) archive = load_archive(args.archive_file, args.cuda_device, args.overrides) config = archive.config prepare_environment(config) model = archive.model model.cuda(args.cuda_device) model.eval() # Load the evaluation data dataset_reader = DatasetReader.from_params(config.pop('dataset_reader')) #dataset_reader.for_training = False evaluation_data_path = args.evaluation_data_file logger.info("Reading evaluation data from %s", evaluation_data_path) dataset = dataset_reader.read(evaluation_data_path) dataset.index_instances(model.vocab) iterator = DataIterator.from_params(config.pop("iterator")) metrics = evaluate(model, dataset, iterator, args.cuda_device) if args.print_predictions: directory = os.path.dirname(args.archive_file) predict_filename = args.print_predictions predict_file = open(predict_filename, 'w') gold_file = open(os.path.join(directory, 'gold.conll'), 'w') predictions = evaluate_predict(model, dataset, iterator, args.cuda_device, predict_file, gold_file) logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric) return metrics
def evaluating(**params): param_is_exist(["model_file", "input_file", "include_package"], params) for package_name in params["include_package"]: import_submodules(package_name) cuda_device = params["cuda_device"] if "cuda_device" in params else -1 overrides = params["overrides"] if "overrides" in params else "" weights_file = params["weights_file"] if "weights_file" in params else "" archive = load_archive(params["model_file"], cuda_device, overrides, weights_file) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params( validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params( config.pop('dataset_reader')) evaluation_data_path = params["input_file"] logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) iterator_params = config.pop("validation_iterator", None) if iterator_params is None: iterator_params = config.pop("iterator") iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) metrics = evaluate(model, instances, iterator, cuda_device) logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric) return metrics
def get_trainer_from_config(config: Params, train_instances: List[Instance], val_instances: List[Instance], vocab: Optional[Vocabulary] = None, device: Optional[int] = -1) -> Trainer: trainer_params = config.pop("trainer") trainer_params["cuda_device"] = device model_params = config.pop("model") vocab = vocab or Vocabulary.from_instances(train_instances) model = Model.from_params(model_params, vocab=vocab) iterator = DataIterator.from_params(config.pop("iterator")) iterator.index_with(vocab) trainer = Trainer.from_params( model=model, iterator=iterator, train_data=train_instances, validation_data=val_instances, serialization_dir=None, params=trainer_params) return trainer
def get_model_from_file(archive_path, model_path, overrides=None, eval_suffix='', device=0): if archive_path.endswith('gz'): archive = load_archive(archive_path, device, overrides) config = archive.config prepare_environment(config) model = archive.model serialization_dir = os.path.dirname(archive_path) elif archive_path.endswith('yaml'): config = yaml_to_params(archive_path, overrides) prepare_environment(config) config_dir = os.path.dirname(archive_path) serialization_dir = os.path.join(config_dir, 'serialization') all_datasets = datasets_from_params(config) # We want to create the vocab from scratch since it might be of a # different type. Vocabulary.from_files will always create the base # Vocabulary instance. if os.path.exists(os.path.join(serialization_dir, "vocabulary")): vocab_path = os.path.join(serialization_dir, "vocabulary") vocab = Vocabulary.from_files(vocab_path) vocab = Vocabulary.from_params(config.pop('vocabulary')) model = Model.from_params(vocab=vocab, params=config.pop('model')) if model_path: best_model_state = torch.load(model_path) model.load_state_dict(best_model_state) # instances = all_datasets.get('test') iterator = DataIterator.from_params(config.pop("validation_iterator")) iterator.index_with(model.vocab) model.eval().to(device) model.evaluate_mode = True return model
def from_params( # type: ignore cls, params: Params, serialization_dir: str, recover: bool = False, cache_directory: str = None, cache_prefix: str = None, ) -> "MultiTaskTrainer": readers = { name: DatasetReader.from_params(reader_params) for name, reader_params in params.pop( "train_dataset_readers").items() } train_file_paths = params.pop("train_file_paths").as_dict() datasets = { name: reader.read(train_file_paths[name]) for name, reader in readers.items() } instances = (instance for dataset in datasets.values() for instance in dataset) vocab = Vocabulary.from_params(Params({}), instances=instances) model = Model.from_params(params.pop("model"), vocab=vocab) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) mingler = DatasetMingler.from_params(params.pop("mingler")) parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, params.pop("optimizer")) num_epochs = params.pop_int("num_epochs", 10) _ = params.pop("trainer", Params({})) params.assert_empty(__name__) return MultiTaskTrainer(model, serialization_dir, iterator, mingler, optimizer, datasets, num_epochs)
# Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params(validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params(config.pop('dataset_reader')) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) config['iterator']['type'] = 'basic' del config['iterator']['sorting_keys'] data_iterator = DataIterator.from_params(config.pop("iterator")) data_iterator.index_with(model.vocab) cuda_device = args.cuda_device #### EVALUATION AQUI model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False, cuda_device=cuda_device, for_training=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) label_probs = [] for batch in generator_tqdm: lol = model(**batch)
def evaluate_from_args(args): logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel( logging.INFO) print(args.version) # Load from archive if args.unziped_archive_directory != "default": if args.elmo == True: model, config = _load_elmo(args.unziped_archive_directory, args.archive_file, weights_file=None, cuda_device=args.cuda_device) else: model, config = _load(args.unziped_archive_directory, weights_file=None, cuda_device=args.cuda_device) else: archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Load evaluation dataset for multilingual evaluation. # The validation_dataset are called from the achieved model, # so you need to reload the multilingual file. validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params( validation_dataset_reader_params) else: dataset_reader = SQuADReaderML.from_params( config.pop('dataset_reader')) dataset_reader.set_nmt_models_resources( args.trans_embedding_model, args.trans_encdec_model, args.trans_train_source, args.trans_train_target, args.use_question_tag, args.replace_UNK, args.version, args.online_trans, args.beam, args.soft) if args.language == "Fr": dataset_reader.set_squad_test_resources_fr() elif args.language == "Ja": dataset_reader.set_squad_test_resources_ja() dataset_reader.set_google_translate_mode(args.use_google_translate) dataset_reader.set_bing_translate_mode(args.use_bing_translate) evaluation_data_path = args.evaluation_data_file logger.info("Reading evaluation data from %s", evaluation_data_path) # TODO: Fix this file path argument, it is not used and misleading. instances = dataset_reader.read("inputdata_tmp/ja_question_v2.csv") iterator = DataIterator.from_params(config.pop("iterator")) iterator.index_with(model.vocab) if args.use_google_translate: metrics = evaluate_mlqa_google_translate(model, instances, iterator, args.cuda_device, args.language, args.version) elif args.use_bing_translate: metrics = evaluate_mlqa_bing_translate(model, instances, iterator, args.cuda_device, args.language, args.version, "bing", args.back_trans_bing) else: if args.back_trans_ours == True: metrics = evaluate_mlqa_back_trans_ours(\ model, instances, iterator, args.cuda_device, args.language,\ args.version, args.enja_emb, args.enja_encdec,args.enja_train_source,args.enja_train_target) else: metrics = evaluate_mlqa(model, instances, iterator, args.cuda_device, args.language, args.version, args.trans_embedding_model, args.beam, args.soft) logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric) return metrics
"The loaded model seems not to be an am-parser (GraphDependencyParser)" ) # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params( validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params(config.pop('dataset_reader')) instances = dataset_reader.read([[args.formalism, args.input_file]]) iterator_params = config.pop("validation_iterator", None) if iterator_params is None: iterator_params = config.pop("iterator") iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) metrics = evaluate(model, instances, iterator, args.cuda_device, args.batch_weight_key) logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric)