def test_batch_predictions_are_consistent(self): # The CNN encoder has problems with this kind of test - it's not properly masked yet, so # changing the amount of padding in the batch will result in small differences in the # output of the encoder. Because BiDAF is so deep, these differences get magnified through # the network and make this test impossible. So, we'll remove the CNN encoder entirely # from the model for this test. If/when we fix the CNN encoder to work correctly with # masking, we can change this back to how the other models run this test, with just a # single line. # pylint: disable=protected-access,attribute-defined-outside-init # Save some state. saved_model = self.model saved_instances = self.instances # Modify the state, run the test with modified state. params = Params.from_file(self.param_file) reader = DatasetReader.from_params(params['dataset_reader']) reader._token_indexers = {'tokens': reader._token_indexers['tokens']} self.instances = reader.read('tests/fixtures/data/squad.json') vocab = Vocabulary.from_instances(self.instances) for instance in self.instances: instance.index_fields(vocab) del params['model']['text_field_embedder']['token_characters'] params['model']['phrase_layer']['input_size'] = 2 self.model = Model.from_params(vocab, params['model']) self.ensure_batch_predictions_are_consistent() # Restore the state. self.model = saved_model self.instances = saved_instances
def setUp(self): super(TestCopyNetReader, self).setUp() params = Params.from_file(self.FIXTURES_ROOT / "encoder_decoder" / "copynet_seq2seq" / "experiment.json") self.reader = DatasetReader.from_params(params["dataset_reader"]) instances = self.reader.read(self.FIXTURES_ROOT / "data" / "copynet" / "copyover.tsv") self.instances = ensure_list(instances) self.vocab = Vocabulary.from_params(params=params["vocabulary"], instances=instances)
def set_up_model(self, param_file, dataset_file): # pylint: disable=attribute-defined-outside-init self.param_file = param_file params = Params.from_file(self.param_file) reader = DatasetReader.from_params(params['dataset_reader']) instances = reader.read(dataset_file) vocab = Vocabulary.from_instances(instances) self.vocab = vocab self.instances = instances self.model = Model.from_params(self.vocab, params['model']) # TODO(joelgrus) get rid of these # (a lot of the model tests use them, so they'll have to be changed) self.dataset = Batch(self.instances) self.dataset.index_instances(self.vocab)
def main(serialization_directory, device): """ serialization_directory : str, required. The directory containing the serialized weights. device: int, default = -1 The device to run the evaluation on. """ config = Params.from_file(os.path.join(serialization_directory, "config.json")) dataset_reader = DatasetReader.from_params(config['dataset_reader']) evaluation_data_path = config['validation_data_path'] model = Model.load(config, serialization_dir=serialization_directory, cuda_device=device) prediction_file_path = os.path.join(serialization_directory, "predictions.txt") gold_file_path = os.path.join(serialization_directory, "gold.txt") prediction_file = open(prediction_file_path, "w+") gold_file = open(gold_file_path, "w+") # Load the evaluation data and index it. print("Reading evaluation data from {}".format(evaluation_data_path)) instances = dataset_reader.read(evaluation_data_path) iterator = BasicIterator(batch_size=32) iterator.index_with(model.vocab) model_predictions = [] batches = iterator(instances, num_epochs=1, shuffle=False, cuda_device=device, for_training=False) for batch in Tqdm.tqdm(batches): result = model(**batch) predictions = model.decode(result) model_predictions.extend(predictions["tags"]) for instance, prediction in zip(instances, model_predictions): fields = instance.fields try: # Most sentences have a verbal predicate, but not all. verb_index = fields["verb_indicator"].labels.index(1) except ValueError: verb_index = None gold_tags = fields["tags"].labels sentence = fields["tokens"].tokens write_to_conll_eval_file(prediction_file, gold_file, verb_index, sentence, prediction, gold_tags) prediction_file.close() gold_file.close()
def set_up_model(self, param_file, dataset_file): # pylint: disable=attribute-defined-outside-init self.param_file = param_file params = Params.from_file(self.param_file) reader = DatasetReader.from_params(params['dataset_reader']) instances = reader.read(dataset_file) # Use parameters for vocabulary if they are present in the config file, so that choices like # "non_padded_namespaces", "min_count" etc. can be set if needed. if 'vocabulary' in params: vocab_params = params['vocabulary'] vocab = Vocabulary.from_params(params=vocab_params, instances=instances) else: vocab = Vocabulary.from_instances(instances) self.vocab = vocab self.instances = instances self.model = Model.from_params(vocab=self.vocab, params=params['model']) # TODO(joelgrus) get rid of these # (a lot of the model tests use them, so they'll have to be changed) self.dataset = Batch(self.instances) self.dataset.index_instances(self.vocab)
def set_up_model(self, param_file, dataset_file): # pylint: disable=attribute-defined-outside-init self.param_file = param_file params = Params.from_file(self.param_file) reader = DatasetReader.from_params(params['dataset_reader']) # The dataset reader might be lazy, but a lazy list here breaks some of our tests. instances = list(reader.read(dataset_file)) # Use parameters for vocabulary if they are present in the config file, so that choices like # "non_padded_namespaces", "min_count" etc. can be set if needed. if 'vocabulary' in params: vocab_params = params['vocabulary'] vocab = Vocabulary.from_params(params=vocab_params, instances=instances) else: vocab = Vocabulary.from_instances(instances) self.vocab = vocab self.instances = instances self.model = Model.from_params(vocab=self.vocab, params=params['model']) # TODO(joelgrus) get rid of these # (a lot of the model tests use them, so they'll have to be changed) self.dataset = Batch(self.instances) self.dataset.index_instances(self.vocab)
def from_archive(cls, archive: Archive, predictor_name: str = None) -> 'Predictor': """ Instantiate a :class:`Predictor` from an :class:`~allennlp.models.archival.Archive`; that is, from the result of training a model. Optionally specify which `Predictor` subclass; otherwise, the default one for the model will be used. """ # Duplicate the config so that the config inside the archive doesn't get consumed config = archive.config.duplicate() if not predictor_name: model_type = config.get("model").get("type") if not model_type in DEFAULT_PREDICTORS: raise ConfigurationError(f"No default predictor for model type {model_type}.\n"\ f"Please specify a predictor explicitly.") predictor_name = DEFAULT_PREDICTORS[model_type] dataset_reader_params = config["dataset_reader"] dataset_reader = DatasetReader.from_params(dataset_reader_params) model = archive.model model.eval() return Predictor.by_name(predictor_name)(model, dataset_reader)
def write_for_official_eval(model_archive_file, test_file, output_file): archive = load_archive(model_archive_file) model = archive.model reader = DatasetReader.from_params(archive.config['dataset_reader']) iterator = DataIterator.from_params( Params({ "type": "basic", "batch_size": 32 })) vocab = Vocabulary.from_params(archive.config['vocabulary']) iterator.index_with(vocab) model.cuda() model.eval() label_ids_to_label = {0: 'F', 1: 'T'} instances = reader.read(test_file) predictions = [] for batch in iterator(instances, num_epochs=1, shuffle=False): batch = move_to_device(batch, cuda_device=0) output = model(**batch) batch_labels = [ label_ids_to_label[i] for i in output['predictions'].cpu().numpy().tolist() ] predictions.extend(batch_labels) assert len(predictions) == 1400 with open(output_file, 'w') as fout: for p in predictions: fout.write("{}\n".format(p))
def set_up_model( self, param_file: PathLike, dataset_file: PathLike, serialization_dir: PathLike = None, seed: int = None, ): if seed is not None: random.seed(seed) numpy.random.seed(seed) torch.manual_seed(seed) self.param_file = str(param_file) params = Params.from_file(self.param_file) reader = DatasetReader.from_params( params["dataset_reader"], serialization_dir=serialization_dir ) # The dataset reader might be lazy, but a lazy list here breaks some of our tests. instances = list(reader.read(str(dataset_file))) # Use parameters for vocabulary if they are present in the config file, so that choices like # "non_padded_namespaces", "min_count" etc. can be set if needed. if "vocabulary" in params: vocab_params = params["vocabulary"] vocab = Vocabulary.from_params(params=vocab_params, instances=instances) else: vocab = Vocabulary.from_instances(instances) self.vocab = vocab self.instances = instances self.model = Model.from_params( vocab=self.vocab, params=params["model"], serialization_dir=serialization_dir ) # TODO(joelgrus) get rid of these # (a lot of the model tests use them, so they'll have to be changed) self.dataset = Batch(self.instances) self.dataset.index_instances(self.vocab)
def from_archive( cls, archive: Archive, predictor_name: str = None, dataset_reader_to_load: str = "validation", frozen: bool = True, ) -> "Predictor": """ Instantiate a `Predictor` from an [`Archive`](../models/archival.md); that is, from the result of training a model. Optionally specify which `Predictor` subclass; otherwise, we try to find a corresponding predictor in `DEFAULT_PREDICTORS`, or if one is not found, the base class (i.e. `Predictor`) will be used. Optionally specify which [`DatasetReader`](../data/dataset_readers/dataset_reader.md) should be loaded; otherwise, the validation one will be used if it exists followed by the training dataset reader. Optionally specify if the loaded model should be frozen, meaning `model.eval()` will be called. """ # Duplicate the config so that the config inside the archive doesn't get consumed config = archive.config.duplicate() if not predictor_name: model_type = config.get("model").get("type") model_class, _ = Model.resolve_class_name(model_type) predictor_name = model_class.default_predictor predictor_class: Type[Predictor] = Predictor.by_name( # type: ignore predictor_name) if predictor_name is not None else cls if dataset_reader_to_load == "validation" and "validation_dataset_reader" in config: dataset_reader_params = config["validation_dataset_reader"] else: dataset_reader_params = config["dataset_reader"] dataset_reader = DatasetReader.from_params(dataset_reader_params) model = archive.model if frozen: model.eval() return predictor_class(model, dataset_reader)
def write_for_official_eval(model_archive_file, test_file, output_file, label_ids_to_label): archive = load_archive(model_archive_file) model = archive.model reader = DatasetReader.from_params(archive.config['dataset_reader']) iterator = DataIterator.from_params( Params({ "type": "basic", "batch_size": 4 })) vocab = Vocabulary.from_params(archive.config['vocabulary']) iterator.index_with(vocab) model.cuda() model.eval() instances = reader.read(test_file) predictions = [] for batch in iterator(instances, num_epochs=1, shuffle=False): batch = move_to_device(batch, cuda_device=0) output = model(**batch) batch_labels = [ label_ids_to_label[i] for i in output['predictions'].cpu().numpy().tolist() ] predictions.extend(batch_labels) to_write = ''.join([ "{}\t{}\n".format(i + 8001, e) for i, e in enumerate(model.metrics[0].pred) ]) with open(output_file, 'w') as fout: fout.write(to_write)
def __init__(self, archive_file=DEFAULT_ARCHIVE_FILE, cuda_device=DEFAULT_CUDA_DEVICE, model_file=None, context_size=3): """ Constructor for NLU class. """ self.context_size = context_size check_for_gpu(cuda_device) if not os.path.isfile(archive_file): if not model_file: raise Exception("No model for MILU is specified!") archive_file = cached_path(model_file) archive = load_archive(archive_file, cuda_device=cuda_device) self.tokenizer = SpacyWordSplitter(language="en_core_web_sm") dataset_reader_params = archive.config["dataset_reader"] self.dataset_reader = DatasetReader.from_params(dataset_reader_params) self.model = archive.model self.model.eval()
def predict(archive_file, test_file, output_file, cuda_device, score_dir): import_submodules("dygie") gold_test_data = load_json(test_file) archive = load_archive(archive_file, cuda_device) model = archive.model model.eval() config = archive.config.duplicate() dataset_reader_params = config["dataset_reader"] dataset_reader = DatasetReader.from_params(dataset_reader_params) instances = dataset_reader.read(test_file) batch = Batch(instances) batch.index_instances(model.vocab) iterator = DocumentIterator() with open(output_file, "w") as f: for doc, gold_data in zip(iterator(batch.instances, num_epochs=1, shuffle=False), gold_test_data): doc = nn_util.move_to_device(doc, cuda_device) # Put on GPU. sentence_lengths = [len(entry["sentence"]) for entry in doc["metadata"]] sentence_starts = np.cumsum(sentence_lengths) sentence_starts = np.roll(sentence_starts, 1) sentence_starts[0] = 0 pred = model(**doc) if score_dir is not None: dump_scores(doc, pred, score_dir) decoded = model.decode(pred) predictions = {} for k, v in decoded.items(): predictions[decode_names[k]] = cleanup(k, v[decode_fields[k]], sentence_starts) res = {} res.update(gold_data) res.update(predictions) if "dataset" in res: del res["dataset"] check_lengths(res) encoded = json.dumps(res, default=int) f.write(encoded + "\n")
def predictor_from_archive(archive: Archive, predictor_name: str = None, paper_features_path: str = None) -> 'Predictor': """ Source: https://github.com/allenai/specter/blob/master/scripts/embed.py Extends allennlp.predictors.predictor.from_archive to allow processing multiprocess reader paper_features_path is passed to replace the correct one if the dataset_reader is multiprocess """ # Duplicate the config so that the config inside the archive doesn't get consumed config = archive.config.duplicate() if not predictor_name: model_type = config.get("model").get("type") if not model_type in DEFAULT_PREDICTORS: raise ConfigurationError(f"No default predictor for model type {model_type}.\n" \ f"Please specify a predictor explicitly.") predictor_name = DEFAULT_PREDICTORS[model_type] dataset_config = config["dataset_reader"].as_dict() if dataset_config['type'] == 'multiprocess': dataset_config = dataset_config['base_reader'] if paper_features_path: dataset_config['paper_features_path'] = paper_features_path dataset_reader_params = Params(dataset_config) else: dataset_reader_params = config["dataset_reader"] dataset_reader = DatasetReader.from_params(dataset_reader_params) model = archive.model model.eval() return Predictor.by_name(predictor_name)(model, dataset_reader)
def from_archive(cls, archive_path: Pathlike, dataset_reader_to_load: str = VALIDATION): """Construct from `allnlp.Archive`'s file.""" # Uses lazy import because allennlp is an extra requirements. try: from allennlp.data import DatasetReader from allennlp.models.archival import load_archive except ImportError: Errors.E0("unofficial-allennlp-nightly") archive = load_archive(str(archive_path)) config = archive.config if dataset_reader_to_load == VALIDATION and VALIDATION_DATASET_READER in config: dataset_reader_params = config[VALIDATION_DATASET_READER] else: dataset_reader_params = config[DATASET_READER] dataset_reader = DatasetReader.from_params(dataset_reader_params) return cls( model=archive.model, dataset_reader=dataset_reader, config={"allen_archive": archive.config}, archive_path=Path(archive_path).absolute(), )
def set_up_model(self, param_file, dataset_file): self.param_file = param_file params = Params.from_file(self.param_file) reader = DatasetReader.from_params(params["dataset_reader"]) # The dataset reader might be lazy, but a lazy list here breaks some of our tests. instances = reader.read(str(dataset_file)) # Use parameters for vocabulary if they are present in the config file, so that choices like # "non_padded_namespaces", "min_count" etc. can be set if needed. if "vocabulary" in params: vocab_params = params["vocabulary"] vocab = Vocabulary.from_params(params=vocab_params, instances=instances) else: vocab = Vocabulary.from_instances(instances) self.vocab = vocab self.instances = instances self.instances.index_with(vocab) self.model = Model.from_params(vocab=self.vocab, params=params["model"]) # TODO(joelgrus) get rid of these # (a lot of the model tests use them, so they'll have to be changed) self.dataset = Batch(list(self.instances)) self.dataset.index_instances(self.vocab)
def __init__(self) -> None: super().__init__(lazy=True) self.reader = DatasetReader.from_params(Params({'type': 'sequence_tagging'}))
def predict(archive_folder, span_file, cluster_file, output_file, cuda_device): combine_span_and_cluster_file(span_file, cluster_file) test_file = 'tmp_relation_42424242.jsonl' relation_threshold = json.load( open(archive_folder + '/metrics.json'))['best_validation__n_ary_rel_global_threshold'] print(relation_threshold) import_submodules("scirex") logging.info("Loading Model from %s", archive_folder) archive_file = os.path.join(archive_folder, "model.tar.gz") archive = load_archive(archive_file, cuda_device) model = archive.model model.eval() model.prediction_mode = True config = archive.config.duplicate() dataset_reader_params = config["dataset_reader"] dataset_reader = DatasetReader.from_params(dataset_reader_params) dataset_reader.prediction_mode = True instances = dataset_reader.read(test_file) for instance in instances: batch = Batch([instance]) batch.index_instances(model.vocab) data_iterator = DataIterator.from_params(config["validation_iterator"]) iterator = data_iterator(instances, num_epochs=1, shuffle=False) with open(output_file, "w") as f: documents = {} for batch in tqdm(iterator): with torch.no_grad(): batch = nn_util.move_to_device(batch, cuda_device) output_res = model.decode_relations(batch) n_ary_relations = output_res['n_ary_relation'] predicted_relations, scores = n_ary_relations[ 'candidates'], n_ary_relations['scores'] try: metadata = output_res['n_ary_relation']['metadata'][0] except (KeyError, IndexError): continue doc_id = metadata['doc_id'] coref_key_map = { k: i for i, k in metadata['document_metadata'] ['cluster_name_to_id'].items() } for i, rel in enumerate(predicted_relations): predicted_relations[i] = tuple([ coref_key_map[k] if k in coref_key_map else None for k in rel ]) if doc_id not in documents: documents[doc_id] = { 'predicted_relations': [], 'doc_id': doc_id } scores_ = list(scores.ravel()) if not scores_: warnings.warn(f"no relation scores defined for {doc_id}") continue label = [1 if x > relation_threshold else 0 for x in scores_] if all(l == 0 for l in label): decoding_mode = os.environ.get("SCIREX_RELATION_DECODING") if decoding_mode == "report_single_most_likely": label[scores.argmax()] = 1 elif decoding_mode == "report_probabilistically": idxs_sorted_by_score = sorted( range(len(label)), key=lambda i: scores[i], reverse=True # highest score first ) possible_decoding_idxs = \ [idxs_sorted_by_score[:i] for i in range(1, 11)] # assuming that >10 relationships would never happen def score_decoding(candidate_idxs): """likelihood function for a geometric distribution fit to the training distribution of number-of-relationships-per-document :param candidate_idxs (List[int]): a list of idxs that represents a relationship distribution :return: likelihood that distribution """ score_from_n_relationships = st.geom.pmf( len(candidate_idxs), 0.4046692607003891 # MLE from training distribution, i.e.: 1 / (1 + E[X]) ) score_from_indiv_relationships = scores[candidate_idxs] return score_from_n_relationships * np.prod( score_from_indiv_relationships) best_decoding_idxs = max(possible_decoding_idxs, key=score_decoding) for idx in best_decoding_idxs: label[idx] = 1 scores = [round(float(x), 4) for x in list(scores.ravel())] documents[doc_id]['predicted_relations'] += list( zip(predicted_relations, scores, label)) for d in documents.values(): predicted_relations = {} for r, s, l in d['predicted_relations']: r = tuple(r) if r not in predicted_relations or predicted_relations[r][ 0] < s: predicted_relations[r] = (s, l) d['predicted_relations'] = [ (r, s, l) for r, (s, l) in predicted_relations.items() ] f.write("\n".join([json.dumps(x) for x in documents.values()]))
def ensure_model_can_train_save_and_load( self, param_file: str, tolerance: float = 1e-4, cuda_device: int = -1, gradients_to_ignore: Set[str] = None, overrides: str = "", metric_to_check: str = None, metric_terminal_value: float = None, metric_tolerance: float = 1e-4, disable_dropout: bool = True, ): """ # Parameters param_file : `str` Path to a training configuration file that we will use to train the model for this test. tolerance : `float`, optional (default=1e-4) When comparing model predictions between the originally-trained model and the model after saving and loading, we will use this tolerance value (passed as `rtol` to `numpy.testing.assert_allclose`). cuda_device : `int`, optional (default=-1) The device to run the test on. gradients_to_ignore : `Set[str]`, optional (default=None) This test runs a gradient check to make sure that we're actually computing gradients for all of the parameters in the model. If you really want to ignore certain parameters when doing that check, you can pass their names here. This is not recommended unless you're `really` sure you don't need to have non-zero gradients for those parameters (e.g., some of the beam search / state machine models have infrequently-used parameters that are hard to force the model to use in a small test). overrides : `str`, optional (default = "") A JSON string that we will use to override values in the input parameter file. metric_to_check: `str`, optional (default = None) We may want to automatically perform a check that model reaches given metric when training (on validation set, if it is specified). It may be useful in CI, for example. You can pass any metric that is in your model returned metrics. metric_terminal_value: `str`, optional (default = None) When you set `metric_to_check`, you need to set the value this metric must converge to metric_tolerance: `float`, optional (default=1e-4) Tolerance to check you model metric against metric terminal value. One can expect some variance in model metrics when the training process is highly stochastic. disable_dropout : `bool`, optional (default = True) If True we will set all dropout to 0 before checking gradients. (Otherwise, with small datasets, you may get zero gradients because of unlucky dropout.) """ save_dir = self.TEST_DIR / "save_and_load_test" archive_file = save_dir / "model.tar.gz" model = train_model_from_file(param_file, save_dir, overrides=overrides) metrics_file = save_dir / "metrics.json" if metric_to_check is not None: metrics = json.loads(metrics_file.read_text()) metric_value = metrics.get( f"best_validation_{metric_to_check}") or metrics.get( f"training_{metric_to_check}") assert metric_value is not None, f"Cannot find {metric_to_check} in metrics.json file" assert metric_terminal_value is not None, "Please specify metric terminal value" assert abs(metric_value - metric_terminal_value) < metric_tolerance loaded_model = load_archive(archive_file, cuda_device=cuda_device).model state_keys = model.state_dict().keys() loaded_state_keys = loaded_model.state_dict().keys() assert state_keys == loaded_state_keys # First we make sure that the state dict (the parameters) are the same for both models. for key in state_keys: assert_allclose( model.state_dict()[key].cpu().numpy(), loaded_model.state_dict()[key].cpu().numpy(), err_msg=key, ) params = Params.from_file(param_file, params_overrides=overrides) reader = DatasetReader.from_params(params["dataset_reader"]) print("Reading with original model") model_dataset = reader.read(params["validation_data_path"]) model_dataset.index_with(model.vocab) print("Reading with loaded model") loaded_dataset = reader.read(params["validation_data_path"]) loaded_dataset.index_with(loaded_model.vocab) # Need to duplicate params because DataLoader.from_params will consume. data_loader_params = params["data_loader"] data_loader_params["shuffle"] = False data_loader_params2 = Params( copy.deepcopy(data_loader_params.as_dict())) data_loader = DataLoader.from_params(dataset=model_dataset, params=data_loader_params) data_loader2 = DataLoader.from_params(dataset=loaded_dataset, params=data_loader_params2) # We'll check that even if we index the dataset with each model separately, we still get # the same result out. model_batch = next(iter(data_loader)) loaded_batch = next(iter(data_loader2)) # Check gradients are None for non-trainable parameters and check that # trainable parameters receive some gradient if they are trainable. self.check_model_computes_gradients_correctly(model, model_batch, gradients_to_ignore, disable_dropout) # The datasets themselves should be identical. assert model_batch.keys() == loaded_batch.keys() for key in model_batch.keys(): self.assert_fields_equal(model_batch[key], loaded_batch[key], key, 1e-6) # Set eval mode, to turn off things like dropout, then get predictions. model.eval() loaded_model.eval() # Models with stateful RNNs need their states reset to have consistent # behavior after loading. for model_ in [model, loaded_model]: for module in model_.modules(): if hasattr(module, "stateful") and module.stateful: module.reset_states() print("Predicting with original model") model_predictions = model(**model_batch) print("Predicting with loaded model") loaded_model_predictions = loaded_model(**loaded_batch) # Check loaded model's loss exists and we can compute gradients, for continuing training. loaded_model_loss = loaded_model_predictions["loss"] assert loaded_model_loss is not None loaded_model_loss.backward() # Both outputs should have the same keys and the values for these keys should be close. for key in model_predictions.keys(): self.assert_fields_equal(model_predictions[key], loaded_model_predictions[key], name=key, tolerance=tolerance) return model, loaded_model
def ensure_model_can_train_save_and_load(self, param_file: str): save_dir = os.path.join(self.TEST_DIR, "save_and_load_test") archive_file = os.path.join(save_dir, "model.tar.gz") model = train_model_from_file(param_file, save_dir) loaded_model = load_archive(archive_file).model state_keys = model.state_dict().keys() loaded_state_keys = loaded_model.state_dict().keys() assert state_keys == loaded_state_keys # First we make sure that the state dict (the parameters) are the same for both models. for key in state_keys: assert_allclose(model.state_dict()[key].numpy(), loaded_model.state_dict()[key].numpy(), err_msg=key) params = Params.from_file(self.param_file) reader = DatasetReader.from_params(params['dataset_reader']) iterator = DataIterator.from_params(params['iterator']) # We'll check that even if we index the dataset with each model separately, we still get # the same result out. model_dataset = reader.read(params['validation_data_path']) model_dataset.index_instances(model.vocab) model_batch_arrays = next(iterator(model_dataset, shuffle=False)) model_batch = arrays_to_variables(model_batch_arrays, for_training=False) loaded_dataset = reader.read(params['validation_data_path']) loaded_dataset.index_instances(loaded_model.vocab) loaded_batch_arrays = next(iterator(loaded_dataset, shuffle=False)) loaded_batch = arrays_to_variables(loaded_batch_arrays, for_training=False) # The datasets themselves should be identical. for key in model_batch.keys(): if key == 'metadata': assert model_batch[key] == loaded_batch[key] continue field = model_batch[key] if isinstance(field, dict): for subfield in field: assert_allclose(model_batch[key][subfield].data.numpy(), loaded_batch[key][subfield].data.numpy(), rtol=1e-6, err_msg=key + "." + subfield) else: assert_allclose(model_batch[key].data.numpy(), loaded_batch[key].data.numpy(), rtol=1e-6, err_msg=key) # Set eval mode, to turn off things like dropout, then get predictions. model.eval() loaded_model.eval() if 'metadata' in model_batch and 'metadata' not in signature( model.forward).parameters: del model_batch['metadata'] del loaded_batch['metadata'] model_predictions = model.forward(**model_batch) loaded_model_predictions = loaded_model.forward(**loaded_batch) # Check loaded model's loss exists and we can compute gradients, for continuing training. loaded_model_loss = loaded_model_predictions["loss"] assert loaded_model_loss is not None loaded_model_loss.backward() # Both outputs should have the same keys and the values for these keys should be close. for key in model_predictions.keys(): if isinstance(model_predictions[key], torch.autograd.Variable): assert_allclose(model_predictions[key].data.numpy(), loaded_model_predictions[key].data.numpy(), rtol=1e-4, err_msg=key) else: assert model_predictions[key] == loaded_model_predictions[key] return model, loaded_model
# break json.dump(correct_labels, open("/backup2/jfchen/data/hotpot/dev/dev_easy_ids.json", 'w')) json.dump(wrong_labels, open("/backup2/jfchen/data/hotpot/dev/dev_hard_ids.json", 'w')) print('correct_labels:', correct_labels) print('wrong_labels', wrong_labels) if __name__ == '__main__': parser = argparse.ArgumentParser(description='hotpot predictor test') parser.add_argument('--model_path', type=str, help='path to the tgz model file') parser.add_argument('--data_path', type=str, help='path to the input data file') args = parser.parse_args() archive = load_archive(args.model_path, cuda_device=0) config = archive.config.duplicate() dataset_reader_params = config["dataset_reader"] dataset_reader = DatasetReader.from_params(dataset_reader_params) model = archive.model model.eval() # predictor = HotpotPredictor.from_archive(archive, 'hotpot_predictor') test(model, dataset_reader, args.data_path, 'Train') # test(predictor, '/scratch/cluster/jfchen/jason/multihopQA/hotpot/dev/dev_distractor.json', 'Validation')
def make_files_for_official_eval(model_archive_file, evaluation_files, output_file, cuda_device): archive = load_archive(model_archive_file) model = archive.model model.eval() if cuda_device != -1: model.cuda(cuda_device) def find_key(d, func): ret = None stack = [d] while len(stack) > 0 and ret is None: s = stack.pop() for k, v in s.items(): if func(k, v): ret = s break elif isinstance(v, dict): stack.append(v) return ret # load reader full_reader_params = copy.deepcopy(archive.config['dataset_reader'].as_dict()) reader_params = find_key(full_reader_params, lambda k, v: k == 'type' and v == 'wordnet_fine_grained') reader_params['is_training'] = False reader_params['should_remap_span_indices'] = True if 'extra_candidate_generators' in reader_params: candidate_generator_params = find_key( full_reader_params, lambda k, v: k == 'tokenizer_and_candidate_generator' )['tokenizer_and_candidate_generator'] candidate_generator = TokenizerAndCandidateGenerator.from_params( Params(candidate_generator_params) ) reader_params = Params(reader_params) print("====================") print(reader_params.as_dict()) print("====================") reader = DatasetReader.from_params(reader_params) synset_to_lemmas = {} for lemma_id, synset_id in reader.mention_generator._lemma_to_synset.items(): if synset_id not in synset_to_lemmas: synset_to_lemmas[synset_id] = [] synset_to_lemmas[synset_id].append(lemma_id) vocab_params = archive.config['vocabulary'] vocab = Vocabulary.from_params(vocab_params) iterator = BasicIterator(batch_size=24) iterator.index_with(vocab) fout = open(output_file, 'w') for ds_file in [evaluation_file]: instances = reader.read(ds_file) # get the metadata ids from the raw file raw_lines = [] with JsonFile(ds_file, 'r') as fin: for sentence in fin: raw_ids = [[token['id'], token['lemma']] for token in sentence if 'senses' in token] if len(raw_ids) > 0: raw_lines.append(raw_ids) raw_i = 0 for batch in iterator(instances, num_epochs=1, shuffle=False): print(raw_i) if cuda_device > -1: b = move_to_device(batch, cuda_device) else: b = batch b['candidates'] = {'wordnet': { 'candidate_entities': b.pop('candidate_entities'), 'candidate_entity_priors': b.pop('candidate_entity_prior'), 'candidate_segment_ids': b.pop('candidate_segment_ids'), 'candidate_spans': b.pop('candidate_spans')}} gold_entities = b.pop('gold_entities') b['gold_entities'] = {'wordnet': gold_entities} if 'extra_candidates' in b: extra_candidates = b.pop('extra_candidates') seq_len = b['tokens']['tokens'].shape[1] bbb = [] for e in extra_candidates: for k in e.keys(): e[k]['candidate_segment_ids'] = [0] * len(e[k]['candidate_spans']) ee = {'tokens': ['[CLS]'] * seq_len, 'segment_ids': [0] * seq_len, 'candidates': e} ee_fields = candidate_generator.convert_tokens_candidates_to_fields(ee) bbb.append(Instance(ee_fields)) eb = Batch(bbb) eb.index_instances(vocab) padding_lengths = eb.get_padding_lengths() tensor_dict = eb.as_tensor_dict(padding_lengths) b['candidates'].update(tensor_dict['candidates']) if cuda_device > -1: b = move_to_device(b, cuda_device) output = model(**b) # predicted entities is list of (batch_index, (start, end), entity_id) predicted_entities = model.soldered_kgs['wordnet'].entity_linker._decode( output['wordnet']['linking_scores'], b['candidates']['wordnet']['candidate_spans'], b['candidates']['wordnet']['candidate_entities']['ids'] ) # make output file predicted_entities_batch_indices = [] batch_size = batch['tokens']['tokens'].shape[0] for k in range(batch_size): predicted_entities_batch_indices.append([]) for b_index, start_end, eid in predicted_entities: try: synset_id = vocab.get_token_from_index(eid, 'entity') except KeyError: synset_id = vocab.get_token_from_index(eid, 'entity_wordnet') all_lemma_ids = synset_to_lemmas[synset_id] predicted_entities_batch_indices[b_index].append(all_lemma_ids) # output lines look like semeval2013.d000.s001.t003 reader%1:19:00:: for k in range(batch_size): raw_ids = raw_lines[raw_i] predicted_lemmas = predicted_entities_batch_indices[k] assert len(predicted_lemmas) == len(raw_ids) for (ii, gold_lemma), pl in zip(raw_ids, predicted_lemmas): # get the predicted lemma_id predicted_lemma_id = None for pp in pl: if pp.partition('%')[0] == gold_lemma: predicted_lemma_id = pp assert predicted_lemma_id is not None line = "{} {}\n".format(ii, predicted_lemma_id) fout.write(line) raw_i += 1 fout.close()
def ensure_model_can_train_save_and_load(self, param_file: str, tolerance: float = 1e-4, cuda_device: int = -1, gradients_to_ignore: Set[str] = None, overrides: str = ""): """ Parameters ---------- param_file : ``str`` Path to a training configuration file that we will use to train the model for this test. tolerance : ``float``, optional (default=1e-4) When comparing model predictions between the originally-trained model and the model after saving and loading, we will use this tolerance value (passed as ``rtol`` to ``numpy.testing.assert_allclose``). cuda_device : ``int``, optional (default=-1) The device to run the test on. gradients_to_ignore : ``Set[str]``, optional (default=None) This test runs a gradient check to make sure that we're actually computing gradients for all of the parameters in the model. If you really want to ignore certain parameters when doing that check, you can pass their names here. This is not recommended unless you're `really` sure you don't need to have non-zero gradients for those parameters (e.g., some of the beam search / state machine models have infrequently-used parameters that are hard to force the model to use in a small test). overrides : ``str``, optional (default = "") A JSON string that we will use to override values in the input parameter file. """ save_dir = self.TEST_DIR / "save_and_load_test" archive_file = save_dir / "model.tar.gz" model = train_model_from_file(param_file, save_dir, overrides=overrides) loaded_model = load_archive(archive_file, cuda_device=cuda_device).model state_keys = model.state_dict().keys() loaded_state_keys = loaded_model.state_dict().keys() assert state_keys == loaded_state_keys # First we make sure that the state dict (the parameters) are the same for both models. for key in state_keys: assert_allclose(model.state_dict()[key].cpu().numpy(), loaded_model.state_dict()[key].cpu().numpy(), err_msg=key) params = Params.from_file(param_file) reader = DatasetReader.from_params(params['dataset_reader']) # Need to duplicate params because Iterator.from_params will consume. iterator_params = params['iterator'] iterator_params2 = Params(copy.deepcopy(iterator_params.as_dict())) iterator = DataIterator.from_params(iterator_params) iterator2 = DataIterator.from_params(iterator_params2) # We'll check that even if we index the dataset with each model separately, we still get # the same result out. model_dataset = reader.read(params['validation_data_path']) iterator.index_with(model.vocab) model_batch = next(iterator(model_dataset, shuffle=False)) loaded_dataset = reader.read(params['validation_data_path']) iterator2.index_with(loaded_model.vocab) loaded_batch = next(iterator2(loaded_dataset, shuffle=False)) # Check gradients are None for non-trainable parameters and check that # trainable parameters receive some gradient if they are trainable. self.check_model_computes_gradients_correctly(model, model_batch, gradients_to_ignore) # The datasets themselves should be identical. assert model_batch.keys() == loaded_batch.keys() for key in model_batch.keys(): self.assert_fields_equal(model_batch[key], loaded_batch[key], key, 1e-6) # Set eval mode, to turn off things like dropout, then get predictions. model.eval() loaded_model.eval() # Models with stateful RNNs need their states reset to have consistent # behavior after loading. for model_ in [model, loaded_model]: for module in model_.modules(): if hasattr(module, 'stateful') and module.stateful: module.reset_states() model_predictions = model(**model_batch) loaded_model_predictions = loaded_model(**loaded_batch) # Check loaded model's loss exists and we can compute gradients, for continuing training. loaded_model_loss = loaded_model_predictions["loss"] assert loaded_model_loss is not None loaded_model_loss.backward() # Both outputs should have the same keys and the values for these keys should be close. for key in model_predictions.keys(): self.assert_fields_equal(model_predictions[key], loaded_model_predictions[key], name=key, tolerance=tolerance) return model, loaded_model
if (load_pretrained_BiDAF): archive = load_archive("https://s3-us-west-2.amazonaws.com/allennlp/models/bidaf-model-2017.09.15-charpad.tar.gz") # Get the model and the config file model = archive.model config = archive.config.duplicate() keys_config = list(config.keys()) print ("Key config list: ", keys_config) for key in keys_config: print ("Params of %s"%(key)) print (config[key]) ### Get the elements ## Data Readers ## dataset_reader_params = config["dataset_reader"] dataset_reader = DatasetReader.from_params(dataset_reader_params) ## Vocabulary ## vocab = model.vocab """ ############ Propagate an instance text ############# """ instance = dataset_reader.text_to_instance("What kind of test succeeded on its first attempt?", "One time I was writing a unit test, and it succeeded on the first attempt.", char_spans=[(6, 10)]) print ("Keys instance: ", instance.fields.keys()) # Batch intances and convert to index using the vocabulary. instances = [instance] dataset = Batch(instances)
def test_kwargs_are_passed_to_superclass(self): params = Params({"type": "text_classification_json", "max_instances": 50}) reader = DatasetReader.from_params(params) assert reader.max_instances == 50
def main(serialization_directory: int, device: int, data: str, prefix: str, domain: str = None): """ serialization_directory : str, required. The directory containing the serialized weights. device: int, default = -1 The device to run the evaluation on. data: str, default = None The data to evaluate on. By default, we use the validation data from the original experiment. prefix: str, default="" The prefix to prepend to the generated gold and prediction files, to distinguish different models/data. domain: str, optional (default = None) If passed, filters the ontonotes evaluation/test dataset to only contain the specified domain. This overwrites the domain in the config file from the model, to allow evaluation on domains other than the one the model was trained on. """ config = Params.from_file(os.path.join(serialization_directory, "config.json")) if domain is not None: # Hack to allow evaluation on different domains than the # model was trained on. config["dataset_reader"]["domain_identifier"] = domain prefix = f"{domain}_{prefix}" else: config["dataset_reader"].pop("domain_identifier", None) dataset_reader = DatasetReader.from_params(config['dataset_reader']) evaluation_data_path = data if data else config['validation_data_path'] archive = load_archive(os.path.join(serialization_directory, "model.tar.gz"), cuda_device=device) model = archive.model model.eval() prediction_file_path = os.path.join(serialization_directory, prefix + "_predictions.txt") gold_file_path = os.path.join(serialization_directory, prefix + "_gold.txt") prediction_file = open(prediction_file_path, "w+") gold_file = open(gold_file_path, "w+") # Load the evaluation data and index it. print("reading evaluation data from {}".format(evaluation_data_path)) instances = dataset_reader.read(evaluation_data_path) with torch.autograd.no_grad(): iterator = BasicIterator(batch_size=32) iterator.index_with(model.vocab) model_predictions = [] batches = iterator(instances, num_epochs=1, shuffle=False, cuda_device=device) for batch in Tqdm.tqdm(batches): result = model(**batch) predictions = model.decode(result) model_predictions.extend(predictions["tags"]) for instance, prediction in zip(instances, model_predictions): fields = instance.fields try: # Most sentences have a verbal predicate, but not all. verb_index = fields["verb_indicator"].labels.index(1) except ValueError: verb_index = None gold_tags = fields["tags"].labels sentence = [x.text for x in fields["tokens"].tokens] write_to_conll_eval_file(prediction_file, gold_file, verb_index, sentence, prediction, gold_tags) prediction_file.close() gold_file.close()
def main(serialization_directory: str, device: int, data: str, prefix: str, domain: str = None): """ serialization_directory : str, required. The directory containing the serialized weights. device: int, default = -1 The device to run the evaluation on. data: str, default = None The data to evaluate on. By default, we use the validation data from the original experiment. prefix: str, default="" The prefix to prepend to the generated gold and prediction files, to distinguish different models/data. domain: str, optional (default = None) If passed, filters the ontonotes evaluation/test dataset to only contain the specified domain. This overwrites the domain in the config file from the model, to allow evaluation on domains other than the one the model was trained on. """ config = Params.from_file( os.path.join(serialization_directory, "config.json")) if domain is not None: # Hack to allow evaluation on different domains than the # model was trained on. config["dataset_reader"]["domain_identifier"] = domain prefix = f"{domain}_{prefix}" else: config["dataset_reader"].pop("domain_identifier", None) dataset_reader = DatasetReader.from_params(config["dataset_reader"]) evaluation_data_path = data if data else config["validation_data_path"] archive = load_archive(os.path.join(serialization_directory, "model.tar.gz"), cuda_device=device) model = archive.model model.eval() prediction_file_path = os.path.join(serialization_directory, prefix + "_predictions.txt") gold_file_path = os.path.join(serialization_directory, prefix + "_gold.txt") prediction_file = open(prediction_file_path, "w+") gold_file = open(gold_file_path, "w+") # Load the evaluation data and index it. print("reading evaluation data from {}".format(evaluation_data_path)) dataset = list(dataset_reader.read(evaluation_data_path)) with torch.autograd.no_grad(): loader = SimpleDataLoader(dataset, 32) model_predictions: List[List[str]] = [] for batch in Tqdm.tqdm(loader): batch = move_to_device(batch, device) result = model(**batch) predictions = model.decode(result) model_predictions.extend(predictions["tags"]) for instance, prediction in zip(dataset, model_predictions): fields = instance.fields verb_index = fields["metadata"]["verb_index"] gold_tags = fields["metadata"]["gold_tags"] sentence = fields["metadata"]["words"] write_to_conll_eval_file(prediction_file, gold_file, verb_index, sentence, prediction, gold_tags) prediction_file.close() gold_file.close()
def ensure_model_can_train_save_and_load( self, param_file: str, tolerance: float = 1e-4, cuda_device: int = -1, gradients_to_ignore: Set[str] = None, overrides: str = ""): """ Parameters ---------- param_file : ``str`` Path to a training configuration file that we will use to train the model for this test. tolerance : ``float``, optional (default=1e-4) When comparing model predictions between the originally-trained model and the model after saving and loading, we will use this tolerance value (passed as ``rtol`` to ``numpy.testing.assert_allclose``). cuda_device : ``int``, optional (default=-1) The device to run the test on. gradients_to_ignore : ``Set[str]``, optional (default=None) This test runs a gradient check to make sure that we're actually computing gradients for all of the parameters in the model. If you really want to ignore certain parameters when doing that check, you can pass their names here. This is not recommended unless you're `really` sure you don't need to have non-zero gradients for those parameters (e.g., some of the beam search / state machine models have infrequently-used parameters that are hard to force the model to use in a small test). overrides : ``str``, optional (default = "") A JSON string that we will use to override values in the input parameter file. """ save_dir = self.TEST_DIR / "save_and_load_test" archive_file = save_dir / "model.tar.gz" model = train_model_from_file(param_file, save_dir, overrides=overrides) loaded_model = load_archive(archive_file, cuda_device=cuda_device).model state_keys = model.state_dict().keys() loaded_state_keys = loaded_model.state_dict().keys() assert state_keys == loaded_state_keys # First we make sure that the state dict (the parameters) are the same for both models. for key in state_keys: assert_allclose(model.state_dict()[key].cpu().numpy(), loaded_model.state_dict()[key].cpu().numpy(), err_msg=key) params = Params.from_file(param_file) # Need to duplicate params because DatasetReader.from_params will consume. reader_params = params['dataset_reader'] reader_params2 = Params(copy.deepcopy(reader_params.as_dict())) reader = DatasetReader.from_params(reader_params) reader2 = DatasetReader.from_params(reader_params2) # Need to duplicate params because Iterator.from_params will consume. iterator_params = params['iterator'] iterator_params2 = Params(copy.deepcopy(iterator_params.as_dict())) iterator = DataIterator.from_params(iterator_params) iterator2 = DataIterator.from_params(iterator_params2) # We'll check that even if we index the dataset with each model separately, we still get # the same result out. seed_params = Params({ "random_seed": 5, "numpy_seed": 5, "pytorch_seed": 5 }) prepare_environment(seed_params) model_dataset = reader.read(params['validation_data_path']) iterator.index_with(model.vocab) model_batch = next(iterator(model_dataset, shuffle=False)) seed_params = Params({ "random_seed": 5, "numpy_seed": 5, "pytorch_seed": 5 }) prepare_environment(seed_params) loaded_dataset = reader2.read(params['validation_data_path']) iterator2.index_with(loaded_model.vocab) loaded_batch = next(iterator2(loaded_dataset, shuffle=False)) # Check gradients are None for non-trainable parameters and check that # trainable parameters receive some gradient if they are trainable. self.check_model_computes_gradients_correctly(model, model_batch, gradients_to_ignore) # The datasets themselves should be identical. assert model_batch.keys() == loaded_batch.keys() # import pdb; pdb.set_trace() for key in model_batch.keys(): self.assert_fields_equal(model_batch[key], loaded_batch[key], key, 1e-6) # Set eval mode, to turn off things like dropout, then get predictions. model.eval() loaded_model.eval() # Models with stateful RNNs need their states reset to have consistent # behavior after loading. for model_ in [model, loaded_model]: for module in model_.modules(): if hasattr(module, 'stateful') and module.stateful: module.reset_states() model_predictions = model(**model_batch) loaded_model_predictions = loaded_model(**loaded_batch) # Check loaded model's loss exists and we can compute gradients, for continuing training. loaded_model_loss = loaded_model_predictions["loss"] assert loaded_model_loss is not None loaded_model_loss.backward() # Both outputs should have the same keys and the values for these keys should be close. for key in model_predictions.keys(): self.assert_fields_equal(model_predictions[key], loaded_model_predictions[key], name=key, tolerance=tolerance) return model, loaded_model
def ensure_model_can_train_save_and_load(self, param_file: str): save_dir = os.path.join(self.TEST_DIR, "save_and_load_test") archive_file = os.path.join(save_dir, "model.tar.gz") model = train_model_from_file(param_file, save_dir) loaded_model = load_archive(archive_file).model state_keys = model.state_dict().keys() loaded_state_keys = loaded_model.state_dict().keys() assert state_keys == loaded_state_keys # First we make sure that the state dict (the parameters) are the same for both models. for key in state_keys: assert_allclose(model.state_dict()[key].numpy(), loaded_model.state_dict()[key].numpy(), err_msg=key) params = Params.from_file(self.param_file) reader = DatasetReader.from_params(params['dataset_reader']) iterator = DataIterator.from_params(params['iterator']) # We'll check that even if we index the dataset with each model separately, we still get # the same result out. model_dataset = reader.read(params['validation_data_path']) model_dataset.index_instances(model.vocab) model_batch_arrays = next(iterator(model_dataset, shuffle=False)) model_batch = arrays_to_variables(model_batch_arrays, for_training=False) loaded_dataset = reader.read(params['validation_data_path']) loaded_dataset.index_instances(loaded_model.vocab) loaded_batch_arrays = next(iterator(loaded_dataset, shuffle=False)) loaded_batch = arrays_to_variables(loaded_batch_arrays, for_training=False) # The datasets themselves should be identical. for key in model_batch.keys(): field = model_batch[key] if isinstance(field, dict): for subfield in field: self.assert_fields_equal(model_batch[key][subfield], loaded_batch[key][subfield], tolerance=1e-6, name=key + '.' + subfield) else: self.assert_fields_equal(model_batch[key], loaded_batch[key], 1e-6, key) # Set eval mode, to turn off things like dropout, then get predictions. model.eval() loaded_model.eval() # Models with stateful RNNs need their states reset to have consistent # behavior after loading. for model_ in [model, loaded_model]: for module in model_.modules(): if hasattr(module, 'stateful') and module.stateful: module.reset_states() model_predictions = model.forward(**model_batch) loaded_model_predictions = loaded_model.forward(**loaded_batch) # Check loaded model's loss exists and we can compute gradients, for continuing training. loaded_model_loss = loaded_model_predictions["loss"] assert loaded_model_loss is not None loaded_model_loss.backward() # Both outputs should have the same keys and the values for these keys should be close. for key in model_predictions.keys(): self.assert_fields_equal(model_predictions[key], loaded_model_predictions[key], tolerance=1e-4, name=key) return model, loaded_model
def ensure_model_can_train_save_and_load( self, param_file: Union[PathLike, str], tolerance: float = 1e-4, cuda_device: int = -1, gradients_to_ignore: Set[str] = None, overrides: str = "", metric_to_check: str = None, metric_terminal_value: float = None, metric_tolerance: float = 1e-4, disable_dropout: bool = True, ): save_dir = self.TEST_DIR / "save_and_load_test" archive_file = save_dir / "model.tar.gz" model = train_model_from_file(param_file, save_dir, overrides=overrides) metrics_file = save_dir / "metrics.json" if metric_to_check is not None: metric_value = metrics.get(f"best_validation_{metric_to_check}") or metrics.get( f"training_{metric_to_check}" ) assert metric_value is not None, f"Cannot find {metric_to_check} in metrics.json file" assert metric_terminal_value is not None, "Please specify metric terminal value" assert abs(metric_value - metric_terminal_value) < metric_tolerance loaded_model = load_archive(archive_file, cuda_device=cuda_device).model assert state_keys == loaded_state_keys for key in state_keys: assert_allclose( model.state_dict()[key].cpu().numpy(), loaded_model.state_dict()[key].cpu().numpy(), err_msg=key, ) params = Params.from_file(param_file, params_overrides=overrides) reader = DatasetReader.from_params(params["dataset_reader"]) print("Reading with original model") model_dataset = reader.read(params["validation_data_path"]) print("Reading with loaded model") loaded_dataset = reader.read(params["validation_data_path"]) data_loader_params = params["data_loader"] data_loader_params["shuffle"] = False data_loader_params2 = Params(copy.deepcopy(data_loader_params.as_dict())) data_loader2 = DataLoader.from_params(dataset=loaded_dataset, params=data_loader_params2) model_batch = next(iter(data_loader)) loaded_batch = next(iter(data_loader2)) self.check_model_computes_gradients_correctly( model, model_batch, gradients_to_ignore, disable_dropout ) for key in model_batch.keys(): self.assert_fields_equal(model_batch[key], loaded_batch[key], key, 1e-6) for model_ in [model, loaded_model]: for module in model_.modules(): if hasattr(module, "stateful") and module.stateful: module.reset_states() print("Predicting with original model") model_predictions = model(**model_batch) print("Predicting with loaded model") loaded_model_predictions = loaded_model(**loaded_batch) self.assert_fields_equal( model_predictions[key], loaded_model_predictions[key], name=key, tolerance=tolerance )
'cannot already exist for you to save a model to it') aux_model_save_fp = None if args.mtl: aux_model_save_fp = Path( model_dir, f'task_{args.aux_name}_model_{run_number}.tar.gz') if aux_model_save_fp.exists(): raise FileExistsError( f'The model run file {aux_model_save_fp} ' 'cannot already exist for you to save a model to it') with tempfile.TemporaryDirectory() as temp_data_dir: results = train_model_from_file(args.model_config_fp, serialization_dir=temp_data_dir, overrides=overrides_string) params = Params.from_file(str(args.model_config_fp)) if 'dataset_reader' not in params: reader = DatasetReader.from_params( params['task_sentiment']['dataset_reader']) else: reader = DatasetReader.from_params(params['dataset_reader']) results.eval() for save_fp, original_fp in zip(save_fps, original_fps): predicted_tags: List[List[str]] = [] for pred_tokens in create_input_sentences(original_fp): instance = reader.text_to_instance(pred_tokens) pred_output = results.forward_on_instance(instance) tags = pred_output['tags'] assert_err = ( f'Number of predicted tags {len(pred_tokens)} should ' f'match the number of tokens being predicted {len(tags)}' ) assert len(tags) == len(pred_tokens), assert_err
def __init__(self): super(LazyFakeReader, self).__init__(lazy=True) self.reader = DatasetReader.from_params( Params({u'type': u'sequence_tagging'}))
def test_can_build_from_params(self): reader = DatasetReader.from_params(Params({"type": "squad2"})) assert reader._tokenizer.__class__.__name__ == "SpacyTokenizer" # type: ignore assert reader._token_indexers[ "tokens"].__class__.__name__ == "SingleIdTokenIndexer" # type: ignore
def ensure_model_can_train_save_and_load(self, param_file: str, tolerance: float = 1e-4, cuda_device: int = -1): save_dir = os.path.join(self.TEST_DIR, "save_and_load_test") archive_file = os.path.join(save_dir, "model.tar.gz") model = train_model_from_file(param_file, save_dir) loaded_model = load_archive(archive_file, cuda_device=cuda_device).model state_keys = model.state_dict().keys() loaded_state_keys = loaded_model.state_dict().keys() assert state_keys == loaded_state_keys # First we make sure that the state dict (the parameters) are the same for both models. for key in state_keys: assert_allclose(model.state_dict()[key].cpu().numpy(), loaded_model.state_dict()[key].cpu().numpy(), err_msg=key) params = Params.from_file(self.param_file) reader = DatasetReader.from_params(params['dataset_reader']) # Need to duplicate params because Iterator.from_params will consume. iterator_params = params['iterator'] iterator_params2 = Params(copy.deepcopy(iterator_params.as_dict())) iterator = DataIterator.from_params(iterator_params) iterator2 = DataIterator.from_params(iterator_params2) # We'll check that even if we index the dataset with each model separately, we still get # the same result out. model_dataset = reader.read(params['validation_data_path']) iterator.index_with(model.vocab) model_batch = next(iterator(model_dataset, shuffle=False, cuda_device=cuda_device)) loaded_dataset = reader.read(params['validation_data_path']) iterator2.index_with(loaded_model.vocab) loaded_batch = next(iterator2(loaded_dataset, shuffle=False, cuda_device=cuda_device)) # Check gradients are None for non-trainable parameters and check that # trainable parameters receive some gradient if they are trainable. self.check_model_computes_gradients_correctly(model, model_batch) # The datasets themselves should be identical. assert model_batch.keys() == loaded_batch.keys() for key in model_batch.keys(): self.assert_fields_equal(model_batch[key], loaded_batch[key], key, 1e-6) # Set eval mode, to turn off things like dropout, then get predictions. model.eval() loaded_model.eval() # Models with stateful RNNs need their states reset to have consistent # behavior after loading. for model_ in [model, loaded_model]: for module in model_.modules(): if hasattr(module, 'stateful') and module.stateful: module.reset_states() model_predictions = model(**model_batch) loaded_model_predictions = loaded_model(**loaded_batch) # Check loaded model's loss exists and we can compute gradients, for continuing training. loaded_model_loss = loaded_model_predictions["loss"] assert loaded_model_loss is not None loaded_model_loss.backward() # Both outputs should have the same keys and the values for these keys should be close. for key in model_predictions.keys(): self.assert_fields_equal(model_predictions[key], loaded_model_predictions[key], name=key, tolerance=tolerance) return model, loaded_model
def predict(archive_folder, span_file, cluster_file, output_file, cuda_device): combine_span_and_cluster_file(span_file, cluster_file) test_file = 'tmp_relation_42424242.jsonl' relation_threshold = json.load( open(archive_folder + '/metrics.json'))['test__n_ary_rel_global_threshold'] print(relation_threshold) import_submodules("scirex") logging.info("Loading Model from %s", archive_folder) archive_file = os.path.join(archive_folder, "model.tar.gz") archive = load_archive(archive_file, cuda_device) model = archive.model model.eval() model.prediction_mode = True config = archive.config.duplicate() dataset_reader_params = config["dataset_reader"] dataset_reader = DatasetReader.from_params(dataset_reader_params) dataset_reader.prediction_mode = True instances = dataset_reader.read(test_file) for instance in instances: batch = Batch([instance]) batch.index_instances(model.vocab) data_iterator = DataIterator.from_params(config["validation_iterator"]) iterator = data_iterator(instances, num_epochs=1, shuffle=False) with open(output_file, "w") as f: documents = {} for batch in tqdm(iterator): with torch.no_grad(): batch = nn_util.move_to_device(batch, cuda_device) output_res = model.decode_relations(batch) n_ary_relations = output_res['n_ary_relation'] predicted_relations, scores = n_ary_relations[ 'candidates'], n_ary_relations['scores'] if 'metadata' not in output_res['n_ary_relation']: continue metadata = output_res['n_ary_relation']['metadata'][0] doc_id = metadata['doc_id'] coref_key_map = { k: i for i, k in metadata['document_metadata'] ['cluster_name_to_id'].items() } for i, rel in enumerate(predicted_relations): predicted_relations[i] = tuple([ coref_key_map[k] if k in coref_key_map else None for k in rel ]) if doc_id not in documents: documents[doc_id] = { 'predicted_relations': [], 'doc_id': doc_id } label = [ 1 if x > relation_threshold else 0 for x in list(scores.ravel()) ] scores = [round(float(x), 4) for x in list(scores.ravel())] documents[doc_id]['predicted_relations'] += list( zip(predicted_relations, scores, label)) for d in documents.values(): predicted_relations = {} for r, s, l in d['predicted_relations']: r = tuple(r) if r not in predicted_relations or predicted_relations[r][ 0] < s: predicted_relations[r] = (s, l) d['predicted_relations'] = [ (r, s, l) for r, (s, l) in predicted_relations.items() ] f.write("\n".join([json.dumps(x) for x in documents.values()]))
def __init__(self) -> None: super().__init__(lazy=True) self.reader = DatasetReader.from_params(Params({"type": "sequence_tagging", "lazy": True}))
def ensure_model_can_train_save_and_load(self, param_file: str, tolerance: float = 1e-4, cuda_device: int = -1): save_dir = self.TEST_DIR / "save_and_load_test" archive_file = save_dir / "model.tar.gz" model = train_model_from_file(param_file, save_dir) loaded_model = load_archive(archive_file, cuda_device=cuda_device).model state_keys = model.state_dict().keys() loaded_state_keys = loaded_model.state_dict().keys() assert state_keys == loaded_state_keys # First we make sure that the state dict (the parameters) are the same for both models. for key in state_keys: assert_allclose(model.state_dict()[key].cpu().numpy(), loaded_model.state_dict()[key].cpu().numpy(), err_msg=key) params = Params.from_file(param_file) reader = DatasetReader.from_params(params['dataset_reader']) # Need to duplicate params because Iterator.from_params will consume. iterator_params = params['iterator'] iterator_params2 = Params(copy.deepcopy(iterator_params.as_dict())) iterator = DataIterator.from_params(iterator_params) iterator2 = DataIterator.from_params(iterator_params2) # We'll check that even if we index the dataset with each model separately, we still get # the same result out. model_dataset = reader.read(params['validation_data_path']) iterator.index_with(model.vocab) model_batch = next(iterator(model_dataset, shuffle=False, cuda_device=cuda_device)) loaded_dataset = reader.read(params['validation_data_path']) iterator2.index_with(loaded_model.vocab) loaded_batch = next(iterator2(loaded_dataset, shuffle=False, cuda_device=cuda_device)) # Check gradients are None for non-trainable parameters and check that # trainable parameters receive some gradient if they are trainable. self.check_model_computes_gradients_correctly(model, model_batch) # The datasets themselves should be identical. assert model_batch.keys() == loaded_batch.keys() for key in model_batch.keys(): self.assert_fields_equal(model_batch[key], loaded_batch[key], key, 1e-6) # Set eval mode, to turn off things like dropout, then get predictions. model.eval() loaded_model.eval() # Models with stateful RNNs need their states reset to have consistent # behavior after loading. for model_ in [model, loaded_model]: for module in model_.modules(): if hasattr(module, 'stateful') and module.stateful: module.reset_states() model_predictions = model(**model_batch) loaded_model_predictions = loaded_model(**loaded_batch) # Check loaded model's loss exists and we can compute gradients, for continuing training. loaded_model_loss = loaded_model_predictions["loss"] assert loaded_model_loss is not None loaded_model_loss.backward() # Both outputs should have the same keys and the values for these keys should be close. for key in model_predictions.keys(): self.assert_fields_equal(model_predictions[key], loaded_model_predictions[key], name=key, tolerance=tolerance) return model, loaded_model
def __init__(self, attacker_params: Dict[str, Any], reader: DatasetReader): super().__init__() self.attacker_params = Params(attacker_params) self.reader = DatasetReader.from_params(self.attacker_params["reader"]) self.reader = reader
def test_semeval2010_task8_reader_with_entity_markers(self): reader_params = Params({ "type": "semeval2010_task8", "entity_masking": "entity_markers", "tokenizer_and_candidate_generator": { "type": "bert_tokenizer_and_candidate_generator", "entity_candidate_generators": { "wordnet": { "type": "wordnet_mention_generator", "entity_file": "tests/fixtures/wordnet/entities_fixture.jsonl" } }, "entity_indexers": { "wordnet": { "type": "characters_tokenizer", "tokenizer": { "type": "word", "word_splitter": { "type": "just_spaces" }, }, "namespace": "entity" } }, "bert_model_type": "tests/fixtures/evaluation/semeval2010_task8/vocab_entity_markers.txt", "do_lower_case": True, }, }) reader = DatasetReader.from_params(reader_params) train_file = 'tests/fixtures/evaluation/semeval2010_task8/semeval2010_task8.json' instances = reader.read(train_file) # check that the offsets are right! segment_ids = instances[0]['segment_ids'].array.tolist() tokens = [t.text for t in instances[0]['tokens'].tokens] tokens_and_segments = list(zip(tokens, segment_ids)) expected_tokens_and_segments = [('[CLS]', 0), ('the', 0), ('[e1start]', 0), ('big', 0), ('cat', 0), ('##s', 0), ('[e1end]', 0), ('jumped', 0), ('[UNK]', 0), ('the', 0), ('[e2start]', 0), ('la', 0), ('##zie', 0), ('##st', 0), ('brown', 0), ('dog', 0), ('##s', 0), ('[e2end]', 0), ('.', 0), ('[SEP]', 0)] self.assertEqual(tokens_and_segments, expected_tokens_and_segments) tokens_1 = [t.text for t in instances[1]['tokens'].tokens] expected_tokens_1 = [ '[CLS]', 'the', '[e2start]', 'big', 'cat', '##s', '[e2end]', 'jumped', '[e1start]', '[UNK]', 'the', 'la', '##zie', '##st', 'brown', 'dog', '##s', '[e1end]', '.', '[SEP]' ] self.assertEqual(tokens_1, expected_tokens_1) self.assertEqual(instances[0].fields['label_ids'].label, 0) self.assertEqual(instances[1].fields['label_ids'].label, 8) all_tokens = [[t.text for t in instances[k]['tokens'].tokens] for k in range(2)] for k in range(2): self.assertEqual( all_tokens[k][instances[k].fields['index_a'].label], '[e1start]') self.assertEqual( all_tokens[k][instances[k].fields['index_b'].label], '[e2start]')