Beispiel #1
0
    def test_batch_predictions_are_consistent(self):
        # The CNN encoder has problems with this kind of test - it's not properly masked yet, so
        # changing the amount of padding in the batch will result in small differences in the
        # output of the encoder.  Because BiDAF is so deep, these differences get magnified through
        # the network and make this test impossible.  So, we'll remove the CNN encoder entirely
        # from the model for this test.  If/when we fix the CNN encoder to work correctly with
        # masking, we can change this back to how the other models run this test, with just a
        # single line.
        # pylint: disable=protected-access,attribute-defined-outside-init

        # Save some state.
        saved_model = self.model
        saved_instances = self.instances

        # Modify the state, run the test with modified state.
        params = Params.from_file(self.param_file)
        reader = DatasetReader.from_params(params['dataset_reader'])
        reader._token_indexers = {'tokens': reader._token_indexers['tokens']}
        self.instances = reader.read('tests/fixtures/data/squad.json')
        vocab = Vocabulary.from_instances(self.instances)
        for instance in self.instances:
            instance.index_fields(vocab)
        del params['model']['text_field_embedder']['token_characters']
        params['model']['phrase_layer']['input_size'] = 2
        self.model = Model.from_params(vocab, params['model'])

        self.ensure_batch_predictions_are_consistent()

        # Restore the state.
        self.model = saved_model
        self.instances = saved_instances
Beispiel #2
0
 def setUp(self):
     super(TestCopyNetReader, self).setUp()
     params = Params.from_file(self.FIXTURES_ROOT / "encoder_decoder" / "copynet_seq2seq" / "experiment.json")
     self.reader = DatasetReader.from_params(params["dataset_reader"])
     instances = self.reader.read(self.FIXTURES_ROOT / "data" / "copynet" / "copyover.tsv")
     self.instances = ensure_list(instances)
     self.vocab = Vocabulary.from_params(params=params["vocabulary"], instances=instances)
    def set_up_model(self, param_file, dataset_file):
        # pylint: disable=attribute-defined-outside-init
        self.param_file = param_file
        params = Params.from_file(self.param_file)

        reader = DatasetReader.from_params(params['dataset_reader'])
        instances = reader.read(dataset_file)
        vocab = Vocabulary.from_instances(instances)
        self.vocab = vocab
        self.instances = instances
        self.model = Model.from_params(self.vocab, params['model'])

        # TODO(joelgrus) get rid of these
        # (a lot of the model tests use them, so they'll have to be changed)
        self.dataset = Batch(self.instances)
        self.dataset.index_instances(self.vocab)
def main(serialization_directory, device):
    """
    serialization_directory : str, required.
        The directory containing the serialized weights.
    device: int, default = -1
        The device to run the evaluation on.
    """

    config = Params.from_file(os.path.join(serialization_directory, "config.json"))
    dataset_reader = DatasetReader.from_params(config['dataset_reader'])
    evaluation_data_path = config['validation_data_path']

    model = Model.load(config, serialization_dir=serialization_directory, cuda_device=device)

    prediction_file_path = os.path.join(serialization_directory, "predictions.txt")
    gold_file_path = os.path.join(serialization_directory, "gold.txt")
    prediction_file = open(prediction_file_path, "w+")
    gold_file = open(gold_file_path, "w+")

    # Load the evaluation data and index it.
    print("Reading evaluation data from {}".format(evaluation_data_path))
    instances = dataset_reader.read(evaluation_data_path)
    iterator = BasicIterator(batch_size=32)
    iterator.index_with(model.vocab)

    model_predictions = []
    batches = iterator(instances, num_epochs=1, shuffle=False, cuda_device=device, for_training=False)
    for batch in Tqdm.tqdm(batches):
        result = model(**batch)
        predictions = model.decode(result)
        model_predictions.extend(predictions["tags"])

    for instance, prediction in zip(instances, model_predictions):
        fields = instance.fields
        try:
            # Most sentences have a verbal predicate, but not all.
            verb_index = fields["verb_indicator"].labels.index(1)
        except ValueError:
            verb_index = None

        gold_tags = fields["tags"].labels
        sentence = fields["tokens"].tokens

        write_to_conll_eval_file(prediction_file, gold_file,
                                 verb_index, sentence, prediction, gold_tags)
    prediction_file.close()
    gold_file.close()
Beispiel #5
0
    def set_up_model(self, param_file, dataset_file):
        # pylint: disable=attribute-defined-outside-init
        self.param_file = param_file
        params = Params.from_file(self.param_file)

        reader = DatasetReader.from_params(params['dataset_reader'])
        instances = reader.read(dataset_file)
        # Use parameters for vocabulary if they are present in the config file, so that choices like
        # "non_padded_namespaces", "min_count" etc. can be set if needed.
        if 'vocabulary' in params:
            vocab_params = params['vocabulary']
            vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
        else:
            vocab = Vocabulary.from_instances(instances)
        self.vocab = vocab
        self.instances = instances
        self.model = Model.from_params(vocab=self.vocab, params=params['model'])

        # TODO(joelgrus) get rid of these
        # (a lot of the model tests use them, so they'll have to be changed)
        self.dataset = Batch(self.instances)
        self.dataset.index_instances(self.vocab)
Beispiel #6
0
    def set_up_model(self, param_file, dataset_file):
        # pylint: disable=attribute-defined-outside-init
        self.param_file = param_file
        params = Params.from_file(self.param_file)

        reader = DatasetReader.from_params(params['dataset_reader'])
        # The dataset reader might be lazy, but a lazy list here breaks some of our tests.
        instances = list(reader.read(dataset_file))
        # Use parameters for vocabulary if they are present in the config file, so that choices like
        # "non_padded_namespaces", "min_count" etc. can be set if needed.
        if 'vocabulary' in params:
            vocab_params = params['vocabulary']
            vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
        else:
            vocab = Vocabulary.from_instances(instances)
        self.vocab = vocab
        self.instances = instances
        self.model = Model.from_params(vocab=self.vocab, params=params['model'])

        # TODO(joelgrus) get rid of these
        # (a lot of the model tests use them, so they'll have to be changed)
        self.dataset = Batch(self.instances)
        self.dataset.index_instances(self.vocab)
Beispiel #7
0
    def from_archive(cls, archive: Archive, predictor_name: str = None) -> 'Predictor':
        """
        Instantiate a :class:`Predictor` from an :class:`~allennlp.models.archival.Archive`;
        that is, from the result of training a model. Optionally specify which `Predictor`
        subclass; otherwise, the default one for the model will be used.
        """
        # Duplicate the config so that the config inside the archive doesn't get consumed
        config = archive.config.duplicate()

        if not predictor_name:
            model_type = config.get("model").get("type")
            if not model_type in DEFAULT_PREDICTORS:
                raise ConfigurationError(f"No default predictor for model type {model_type}.\n"\
                                         f"Please specify a predictor explicitly.")
            predictor_name = DEFAULT_PREDICTORS[model_type]

        dataset_reader_params = config["dataset_reader"]
        dataset_reader = DatasetReader.from_params(dataset_reader_params)

        model = archive.model
        model.eval()

        return Predictor.by_name(predictor_name)(model, dataset_reader)
Beispiel #8
0
    def from_archive(cls, archive: Archive, predictor_name: str = None) -> 'Predictor':
        """
        Instantiate a :class:`Predictor` from an :class:`~allennlp.models.archival.Archive`;
        that is, from the result of training a model. Optionally specify which `Predictor`
        subclass; otherwise, the default one for the model will be used.
        """
        # Duplicate the config so that the config inside the archive doesn't get consumed
        config = archive.config.duplicate()

        if not predictor_name:
            model_type = config.get("model").get("type")
            if not model_type in DEFAULT_PREDICTORS:
                raise ConfigurationError(f"No default predictor for model type {model_type}.\n"\
                                         f"Please specify a predictor explicitly.")
            predictor_name = DEFAULT_PREDICTORS[model_type]

        dataset_reader_params = config["dataset_reader"]
        dataset_reader = DatasetReader.from_params(dataset_reader_params)

        model = archive.model
        model.eval()

        return Predictor.by_name(predictor_name)(model, dataset_reader)
Beispiel #9
0
def write_for_official_eval(model_archive_file, test_file, output_file):
    archive = load_archive(model_archive_file)
    model = archive.model

    reader = DatasetReader.from_params(archive.config['dataset_reader'])

    iterator = DataIterator.from_params(
        Params({
            "type": "basic",
            "batch_size": 32
        }))
    vocab = Vocabulary.from_params(archive.config['vocabulary'])
    iterator.index_with(vocab)

    model.cuda()
    model.eval()

    label_ids_to_label = {0: 'F', 1: 'T'}

    instances = reader.read(test_file)
    predictions = []
    for batch in iterator(instances, num_epochs=1, shuffle=False):
        batch = move_to_device(batch, cuda_device=0)
        output = model(**batch)

        batch_labels = [
            label_ids_to_label[i]
            for i in output['predictions'].cpu().numpy().tolist()
        ]

        predictions.extend(batch_labels)

    assert len(predictions) == 1400

    with open(output_file, 'w') as fout:
        for p in predictions:
            fout.write("{}\n".format(p))
Beispiel #10
0
    def set_up_model(
        self,
        param_file: PathLike,
        dataset_file: PathLike,
        serialization_dir: PathLike = None,
        seed: int = None,
    ):
        if seed is not None:
            random.seed(seed)
            numpy.random.seed(seed)
            torch.manual_seed(seed)

        self.param_file = str(param_file)
        params = Params.from_file(self.param_file)

        reader = DatasetReader.from_params(
            params["dataset_reader"], serialization_dir=serialization_dir
        )
        # The dataset reader might be lazy, but a lazy list here breaks some of our tests.
        instances = list(reader.read(str(dataset_file)))
        # Use parameters for vocabulary if they are present in the config file, so that choices like
        # "non_padded_namespaces", "min_count" etc. can be set if needed.
        if "vocabulary" in params:
            vocab_params = params["vocabulary"]
            vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
        else:
            vocab = Vocabulary.from_instances(instances)
        self.vocab = vocab
        self.instances = instances
        self.model = Model.from_params(
            vocab=self.vocab, params=params["model"], serialization_dir=serialization_dir
        )

        # TODO(joelgrus) get rid of these
        # (a lot of the model tests use them, so they'll have to be changed)
        self.dataset = Batch(self.instances)
        self.dataset.index_instances(self.vocab)
Beispiel #11
0
    def from_archive(
        cls,
        archive: Archive,
        predictor_name: str = None,
        dataset_reader_to_load: str = "validation",
        frozen: bool = True,
    ) -> "Predictor":
        """
        Instantiate a `Predictor` from an [`Archive`](../models/archival.md);
        that is, from the result of training a model. Optionally specify which `Predictor`
        subclass; otherwise, we try to find a corresponding predictor in `DEFAULT_PREDICTORS`, or if
        one is not found, the base class (i.e. `Predictor`) will be used. Optionally specify
        which [`DatasetReader`](../data/dataset_readers/dataset_reader.md) should be loaded;
        otherwise, the validation one will be used if it exists followed by the training dataset reader.
        Optionally specify if the loaded model should be frozen, meaning `model.eval()` will be called.
        """
        # Duplicate the config so that the config inside the archive doesn't get consumed
        config = archive.config.duplicate()

        if not predictor_name:
            model_type = config.get("model").get("type")
            model_class, _ = Model.resolve_class_name(model_type)
            predictor_name = model_class.default_predictor
        predictor_class: Type[Predictor] = Predictor.by_name(  # type: ignore
            predictor_name) if predictor_name is not None else cls

        if dataset_reader_to_load == "validation" and "validation_dataset_reader" in config:
            dataset_reader_params = config["validation_dataset_reader"]
        else:
            dataset_reader_params = config["dataset_reader"]
        dataset_reader = DatasetReader.from_params(dataset_reader_params)

        model = archive.model
        if frozen:
            model.eval()

        return predictor_class(model, dataset_reader)
def write_for_official_eval(model_archive_file, test_file, output_file,
                            label_ids_to_label):
    archive = load_archive(model_archive_file)
    model = archive.model

    reader = DatasetReader.from_params(archive.config['dataset_reader'])

    iterator = DataIterator.from_params(
        Params({
            "type": "basic",
            "batch_size": 4
        }))
    vocab = Vocabulary.from_params(archive.config['vocabulary'])
    iterator.index_with(vocab)

    model.cuda()
    model.eval()

    instances = reader.read(test_file)
    predictions = []
    for batch in iterator(instances, num_epochs=1, shuffle=False):
        batch = move_to_device(batch, cuda_device=0)
        output = model(**batch)

        batch_labels = [
            label_ids_to_label[i]
            for i in output['predictions'].cpu().numpy().tolist()
        ]

        predictions.extend(batch_labels)

    to_write = ''.join([
        "{}\t{}\n".format(i + 8001, e)
        for i, e in enumerate(model.metrics[0].pred)
    ])
    with open(output_file, 'w') as fout:
        fout.write(to_write)
Beispiel #13
0
    def __init__(self,
                archive_file=DEFAULT_ARCHIVE_FILE,
                cuda_device=DEFAULT_CUDA_DEVICE,
                model_file=None,
                context_size=3):
        """ Constructor for NLU class. """

        self.context_size = context_size

        check_for_gpu(cuda_device)

        if not os.path.isfile(archive_file):
            if not model_file:
                raise Exception("No model for MILU is specified!")

            archive_file = cached_path(model_file)

        archive = load_archive(archive_file,
                            cuda_device=cuda_device)
        self.tokenizer = SpacyWordSplitter(language="en_core_web_sm")
        dataset_reader_params = archive.config["dataset_reader"]
        self.dataset_reader = DatasetReader.from_params(dataset_reader_params)
        self.model = archive.model
        self.model.eval()
Beispiel #14
0
def predict(archive_file, test_file, output_file, cuda_device, score_dir):
    import_submodules("dygie")
    gold_test_data = load_json(test_file)
    archive = load_archive(archive_file, cuda_device)
    model = archive.model
    model.eval()
    config = archive.config.duplicate()
    dataset_reader_params = config["dataset_reader"]
    dataset_reader = DatasetReader.from_params(dataset_reader_params)
    instances = dataset_reader.read(test_file)
    batch = Batch(instances)
    batch.index_instances(model.vocab)
    iterator = DocumentIterator()
    with open(output_file, "w") as f:
        for doc, gold_data in zip(iterator(batch.instances, num_epochs=1, shuffle=False),
                                  gold_test_data):
            doc = nn_util.move_to_device(doc, cuda_device)  # Put on GPU.
            sentence_lengths = [len(entry["sentence"]) for entry in doc["metadata"]]
            sentence_starts = np.cumsum(sentence_lengths)
            sentence_starts = np.roll(sentence_starts, 1)
            sentence_starts[0] = 0
            pred = model(**doc)
            if score_dir is not None:
                dump_scores(doc, pred, score_dir)
            decoded = model.decode(pred)
            predictions = {}
            for k, v in decoded.items():
                predictions[decode_names[k]] = cleanup(k, v[decode_fields[k]], sentence_starts)
            res = {}
            res.update(gold_data)
            res.update(predictions)
            if "dataset" in res:
                del res["dataset"]
            check_lengths(res)
            encoded = json.dumps(res, default=int)
            f.write(encoded + "\n")
Beispiel #15
0
def predictor_from_archive(archive: Archive,
                           predictor_name: str = None,
                           paper_features_path: str = None) -> 'Predictor':
    """
    Source: https://github.com/allenai/specter/blob/master/scripts/embed.py

    Extends allennlp.predictors.predictor.from_archive to allow processing multiprocess reader
    paper_features_path is passed to replace the correct one if the dataset_reader is multiprocess
    """

    # Duplicate the config so that the config inside the archive doesn't get consumed
    config = archive.config.duplicate()

    if not predictor_name:
        model_type = config.get("model").get("type")
        if not model_type in DEFAULT_PREDICTORS:
            raise ConfigurationError(f"No default predictor for model type {model_type}.\n" \
                                     f"Please specify a predictor explicitly.")
        predictor_name = DEFAULT_PREDICTORS[model_type]

    dataset_config = config["dataset_reader"].as_dict()
    if dataset_config['type'] == 'multiprocess':
        dataset_config = dataset_config['base_reader']
        if paper_features_path:
            dataset_config['paper_features_path'] = paper_features_path
        dataset_reader_params = Params(dataset_config)

    else:
        dataset_reader_params = config["dataset_reader"]

    dataset_reader = DatasetReader.from_params(dataset_reader_params)

    model = archive.model
    model.eval()

    return Predictor.by_name(predictor_name)(model, dataset_reader)
Beispiel #16
0
    def from_archive(cls,
                     archive_path: Pathlike,
                     dataset_reader_to_load: str = VALIDATION):
        """Construct from `allnlp.Archive`'s file."""
        # Uses lazy import because allennlp is an extra requirements.
        try:
            from allennlp.data import DatasetReader
            from allennlp.models.archival import load_archive
        except ImportError:
            Errors.E0("unofficial-allennlp-nightly")

        archive = load_archive(str(archive_path))
        config = archive.config
        if dataset_reader_to_load == VALIDATION and VALIDATION_DATASET_READER in config:
            dataset_reader_params = config[VALIDATION_DATASET_READER]
        else:
            dataset_reader_params = config[DATASET_READER]
        dataset_reader = DatasetReader.from_params(dataset_reader_params)
        return cls(
            model=archive.model,
            dataset_reader=dataset_reader,
            config={"allen_archive": archive.config},
            archive_path=Path(archive_path).absolute(),
        )
Beispiel #17
0
    def set_up_model(self, param_file, dataset_file):

        self.param_file = param_file
        params = Params.from_file(self.param_file)

        reader = DatasetReader.from_params(params["dataset_reader"])
        # The dataset reader might be lazy, but a lazy list here breaks some of our tests.
        instances = reader.read(str(dataset_file))
        # Use parameters for vocabulary if they are present in the config file, so that choices like
        # "non_padded_namespaces", "min_count" etc. can be set if needed.
        if "vocabulary" in params:
            vocab_params = params["vocabulary"]
            vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
        else:
            vocab = Vocabulary.from_instances(instances)
        self.vocab = vocab
        self.instances = instances
        self.instances.index_with(vocab)
        self.model = Model.from_params(vocab=self.vocab, params=params["model"])

        # TODO(joelgrus) get rid of these
        # (a lot of the model tests use them, so they'll have to be changed)
        self.dataset = Batch(list(self.instances))
        self.dataset.index_instances(self.vocab)
Beispiel #18
0
 def __init__(self) -> None:
     super().__init__(lazy=True)
     self.reader = DatasetReader.from_params(Params({'type': 'sequence_tagging'}))
Beispiel #19
0
def predict(archive_folder, span_file, cluster_file, output_file, cuda_device):
    combine_span_and_cluster_file(span_file, cluster_file)

    test_file = 'tmp_relation_42424242.jsonl'
    relation_threshold = json.load(
        open(archive_folder +
             '/metrics.json'))['best_validation__n_ary_rel_global_threshold']
    print(relation_threshold)

    import_submodules("scirex")
    logging.info("Loading Model from %s", archive_folder)
    archive_file = os.path.join(archive_folder, "model.tar.gz")
    archive = load_archive(archive_file, cuda_device)
    model = archive.model
    model.eval()

    model.prediction_mode = True
    config = archive.config.duplicate()
    dataset_reader_params = config["dataset_reader"]
    dataset_reader = DatasetReader.from_params(dataset_reader_params)
    dataset_reader.prediction_mode = True
    instances = dataset_reader.read(test_file)

    for instance in instances:
        batch = Batch([instance])
        batch.index_instances(model.vocab)

    data_iterator = DataIterator.from_params(config["validation_iterator"])
    iterator = data_iterator(instances, num_epochs=1, shuffle=False)

    with open(output_file, "w") as f:
        documents = {}
        for batch in tqdm(iterator):
            with torch.no_grad():
                batch = nn_util.move_to_device(batch, cuda_device)
                output_res = model.decode_relations(batch)

            n_ary_relations = output_res['n_ary_relation']
            predicted_relations, scores = n_ary_relations[
                'candidates'], n_ary_relations['scores']
            try:
                metadata = output_res['n_ary_relation']['metadata'][0]
            except (KeyError, IndexError):
                continue
            doc_id = metadata['doc_id']
            coref_key_map = {
                k: i
                for i, k in metadata['document_metadata']
                ['cluster_name_to_id'].items()
            }

            for i, rel in enumerate(predicted_relations):
                predicted_relations[i] = tuple([
                    coref_key_map[k] if k in coref_key_map else None
                    for k in rel
                ])

            if doc_id not in documents:
                documents[doc_id] = {
                    'predicted_relations': [],
                    'doc_id': doc_id
                }
            scores_ = list(scores.ravel())
            if not scores_:
                warnings.warn(f"no relation scores defined for {doc_id}")
                continue
            label = [1 if x > relation_threshold else 0 for x in scores_]
            if all(l == 0 for l in label):
                decoding_mode = os.environ.get("SCIREX_RELATION_DECODING")
                if decoding_mode == "report_single_most_likely":
                    label[scores.argmax()] = 1
                elif decoding_mode == "report_probabilistically":
                    idxs_sorted_by_score = sorted(
                        range(len(label)),
                        key=lambda i: scores[i],
                        reverse=True  # highest score first
                    )
                    possible_decoding_idxs = \
                        [idxs_sorted_by_score[:i] for i in range(1, 11)]  # assuming that >10 relationships would never happen

                    def score_decoding(candidate_idxs):
                        """likelihood function for a geometric distribution fit
                        to the training distribution of number-of-relationships-per-document

                        :param candidate_idxs (List[int]): a list of idxs that represents a relationship distribution
                        :return: likelihood that distribution
                        """
                        score_from_n_relationships = st.geom.pmf(
                            len(candidate_idxs),
                            0.4046692607003891  # MLE from training distribution, i.e.: 1 / (1 + E[X])
                        )
                        score_from_indiv_relationships = scores[candidate_idxs]
                        return score_from_n_relationships * np.prod(
                            score_from_indiv_relationships)

                    best_decoding_idxs = max(possible_decoding_idxs,
                                             key=score_decoding)
                    for idx in best_decoding_idxs:
                        label[idx] = 1

            scores = [round(float(x), 4) for x in list(scores.ravel())]
            documents[doc_id]['predicted_relations'] += list(
                zip(predicted_relations, scores, label))

        for d in documents.values():
            predicted_relations = {}
            for r, s, l in d['predicted_relations']:
                r = tuple(r)
                if r not in predicted_relations or predicted_relations[r][
                        0] < s:
                    predicted_relations[r] = (s, l)

            d['predicted_relations'] = [
                (r, s, l) for r, (s, l) in predicted_relations.items()
            ]

        f.write("\n".join([json.dumps(x) for x in documents.values()]))
Beispiel #20
0
    def ensure_model_can_train_save_and_load(
        self,
        param_file: str,
        tolerance: float = 1e-4,
        cuda_device: int = -1,
        gradients_to_ignore: Set[str] = None,
        overrides: str = "",
        metric_to_check: str = None,
        metric_terminal_value: float = None,
        metric_tolerance: float = 1e-4,
        disable_dropout: bool = True,
    ):
        """
        # Parameters

        param_file : `str`
            Path to a training configuration file that we will use to train the model for this
            test.
        tolerance : `float`, optional (default=1e-4)
            When comparing model predictions between the originally-trained model and the model
            after saving and loading, we will use this tolerance value (passed as `rtol` to
            `numpy.testing.assert_allclose`).
        cuda_device : `int`, optional (default=-1)
            The device to run the test on.
        gradients_to_ignore : `Set[str]`, optional (default=None)
            This test runs a gradient check to make sure that we're actually computing gradients
            for all of the parameters in the model.  If you really want to ignore certain
            parameters when doing that check, you can pass their names here.  This is not
            recommended unless you're `really` sure you don't need to have non-zero gradients for
            those parameters (e.g., some of the beam search / state machine models have
            infrequently-used parameters that are hard to force the model to use in a small test).
        overrides : `str`, optional (default = "")
            A JSON string that we will use to override values in the input parameter file.
        metric_to_check: `str`, optional (default = None)
            We may want to automatically perform a check that model reaches given metric when
            training (on validation set, if it is specified). It may be useful in CI, for example.
            You can pass any metric that is in your model returned metrics.
        metric_terminal_value: `str`, optional (default = None)
            When you set `metric_to_check`, you need to set the value this metric must converge to
        metric_tolerance: `float`, optional (default=1e-4)
            Tolerance to check you model metric against metric terminal value. One can expect some
            variance in model metrics when the training process is highly stochastic.
        disable_dropout : `bool`, optional (default = True)
            If True we will set all dropout to 0 before checking gradients. (Otherwise, with small
            datasets, you may get zero gradients because of unlucky dropout.)
        """
        save_dir = self.TEST_DIR / "save_and_load_test"
        archive_file = save_dir / "model.tar.gz"
        model = train_model_from_file(param_file,
                                      save_dir,
                                      overrides=overrides)
        metrics_file = save_dir / "metrics.json"
        if metric_to_check is not None:
            metrics = json.loads(metrics_file.read_text())
            metric_value = metrics.get(
                f"best_validation_{metric_to_check}") or metrics.get(
                    f"training_{metric_to_check}")
            assert metric_value is not None, f"Cannot find {metric_to_check} in metrics.json file"
            assert metric_terminal_value is not None, "Please specify metric terminal value"
            assert abs(metric_value - metric_terminal_value) < metric_tolerance
        loaded_model = load_archive(archive_file,
                                    cuda_device=cuda_device).model
        state_keys = model.state_dict().keys()
        loaded_state_keys = loaded_model.state_dict().keys()
        assert state_keys == loaded_state_keys
        # First we make sure that the state dict (the parameters) are the same for both models.
        for key in state_keys:
            assert_allclose(
                model.state_dict()[key].cpu().numpy(),
                loaded_model.state_dict()[key].cpu().numpy(),
                err_msg=key,
            )
        params = Params.from_file(param_file, params_overrides=overrides)
        reader = DatasetReader.from_params(params["dataset_reader"])

        print("Reading with original model")
        model_dataset = reader.read(params["validation_data_path"])
        model_dataset.index_with(model.vocab)

        print("Reading with loaded model")
        loaded_dataset = reader.read(params["validation_data_path"])
        loaded_dataset.index_with(loaded_model.vocab)

        # Need to duplicate params because DataLoader.from_params will consume.
        data_loader_params = params["data_loader"]
        data_loader_params["shuffle"] = False
        data_loader_params2 = Params(
            copy.deepcopy(data_loader_params.as_dict()))

        data_loader = DataLoader.from_params(dataset=model_dataset,
                                             params=data_loader_params)
        data_loader2 = DataLoader.from_params(dataset=loaded_dataset,
                                              params=data_loader_params2)

        # We'll check that even if we index the dataset with each model separately, we still get
        # the same result out.
        model_batch = next(iter(data_loader))

        loaded_batch = next(iter(data_loader2))

        # Check gradients are None for non-trainable parameters and check that
        # trainable parameters receive some gradient if they are trainable.
        self.check_model_computes_gradients_correctly(model, model_batch,
                                                      gradients_to_ignore,
                                                      disable_dropout)

        # The datasets themselves should be identical.
        assert model_batch.keys() == loaded_batch.keys()
        for key in model_batch.keys():
            self.assert_fields_equal(model_batch[key], loaded_batch[key], key,
                                     1e-6)

        # Set eval mode, to turn off things like dropout, then get predictions.
        model.eval()
        loaded_model.eval()
        # Models with stateful RNNs need their states reset to have consistent
        # behavior after loading.
        for model_ in [model, loaded_model]:
            for module in model_.modules():
                if hasattr(module, "stateful") and module.stateful:
                    module.reset_states()
        print("Predicting with original model")
        model_predictions = model(**model_batch)
        print("Predicting with loaded model")
        loaded_model_predictions = loaded_model(**loaded_batch)

        # Check loaded model's loss exists and we can compute gradients, for continuing training.
        loaded_model_loss = loaded_model_predictions["loss"]
        assert loaded_model_loss is not None
        loaded_model_loss.backward()

        # Both outputs should have the same keys and the values for these keys should be close.
        for key in model_predictions.keys():
            self.assert_fields_equal(model_predictions[key],
                                     loaded_model_predictions[key],
                                     name=key,
                                     tolerance=tolerance)

        return model, loaded_model
Beispiel #21
0
    def ensure_model_can_train_save_and_load(self, param_file: str):
        save_dir = os.path.join(self.TEST_DIR, "save_and_load_test")
        archive_file = os.path.join(save_dir, "model.tar.gz")
        model = train_model_from_file(param_file, save_dir)
        loaded_model = load_archive(archive_file).model
        state_keys = model.state_dict().keys()
        loaded_state_keys = loaded_model.state_dict().keys()
        assert state_keys == loaded_state_keys
        # First we make sure that the state dict (the parameters) are the same for both models.
        for key in state_keys:
            assert_allclose(model.state_dict()[key].numpy(),
                            loaded_model.state_dict()[key].numpy(),
                            err_msg=key)
        params = Params.from_file(self.param_file)
        reader = DatasetReader.from_params(params['dataset_reader'])
        iterator = DataIterator.from_params(params['iterator'])

        # We'll check that even if we index the dataset with each model separately, we still get
        # the same result out.
        model_dataset = reader.read(params['validation_data_path'])
        model_dataset.index_instances(model.vocab)
        model_batch_arrays = next(iterator(model_dataset, shuffle=False))
        model_batch = arrays_to_variables(model_batch_arrays,
                                          for_training=False)
        loaded_dataset = reader.read(params['validation_data_path'])
        loaded_dataset.index_instances(loaded_model.vocab)
        loaded_batch_arrays = next(iterator(loaded_dataset, shuffle=False))
        loaded_batch = arrays_to_variables(loaded_batch_arrays,
                                           for_training=False)

        # The datasets themselves should be identical.
        for key in model_batch.keys():
            if key == 'metadata':
                assert model_batch[key] == loaded_batch[key]
                continue
            field = model_batch[key]
            if isinstance(field, dict):
                for subfield in field:
                    assert_allclose(model_batch[key][subfield].data.numpy(),
                                    loaded_batch[key][subfield].data.numpy(),
                                    rtol=1e-6,
                                    err_msg=key + "." + subfield)
            else:
                assert_allclose(model_batch[key].data.numpy(),
                                loaded_batch[key].data.numpy(),
                                rtol=1e-6,
                                err_msg=key)

        # Set eval mode, to turn off things like dropout, then get predictions.
        model.eval()
        loaded_model.eval()
        if 'metadata' in model_batch and 'metadata' not in signature(
                model.forward).parameters:
            del model_batch['metadata']
            del loaded_batch['metadata']
        model_predictions = model.forward(**model_batch)
        loaded_model_predictions = loaded_model.forward(**loaded_batch)

        # Check loaded model's loss exists and we can compute gradients, for continuing training.
        loaded_model_loss = loaded_model_predictions["loss"]
        assert loaded_model_loss is not None
        loaded_model_loss.backward()

        # Both outputs should have the same keys and the values for these keys should be close.
        for key in model_predictions.keys():
            if isinstance(model_predictions[key], torch.autograd.Variable):
                assert_allclose(model_predictions[key].data.numpy(),
                                loaded_model_predictions[key].data.numpy(),
                                rtol=1e-4,
                                err_msg=key)
            else:
                assert model_predictions[key] == loaded_model_predictions[key]

        return model, loaded_model
        #         break
    json.dump(correct_labels,
              open("/backup2/jfchen/data/hotpot/dev/dev_easy_ids.json", 'w'))
    json.dump(wrong_labels,
              open("/backup2/jfchen/data/hotpot/dev/dev_hard_ids.json", 'w'))
    print('correct_labels:', correct_labels)
    print('wrong_labels', wrong_labels)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='hotpot predictor test')
    parser.add_argument('--model_path',
                        type=str,
                        help='path to the tgz model file')
    parser.add_argument('--data_path',
                        type=str,
                        help='path to the input data file')
    args = parser.parse_args()

    archive = load_archive(args.model_path, cuda_device=0)
    config = archive.config.duplicate()
    dataset_reader_params = config["dataset_reader"]
    dataset_reader = DatasetReader.from_params(dataset_reader_params)
    model = archive.model
    model.eval()

    # predictor = HotpotPredictor.from_archive(archive, 'hotpot_predictor')

    test(model, dataset_reader, args.data_path, 'Train')
    # test(predictor, '/scratch/cluster/jfchen/jason/multihopQA/hotpot/dev/dev_distractor.json', 'Validation')
Beispiel #23
0
def make_files_for_official_eval(model_archive_file, evaluation_files, output_file,
                                 cuda_device):

    archive = load_archive(model_archive_file)
    model = archive.model

    model.eval()
    if cuda_device != -1:
        model.cuda(cuda_device)

    def find_key(d, func):
        ret = None
        stack = [d]
        while len(stack) > 0 and ret is None:
            s = stack.pop()
            for k, v in s.items():
                if func(k, v):
                    ret = s
                    break
                elif isinstance(v, dict):
                    stack.append(v)
        return ret

    # load reader
    full_reader_params = copy.deepcopy(archive.config['dataset_reader'].as_dict())
    reader_params = find_key(full_reader_params,
                             lambda k, v: k == 'type' and v == 'wordnet_fine_grained')
    reader_params['is_training'] = False
    reader_params['should_remap_span_indices'] = True
    if 'extra_candidate_generators' in reader_params:
        candidate_generator_params = find_key(
                full_reader_params,
                lambda k, v: k == 'tokenizer_and_candidate_generator'
        )['tokenizer_and_candidate_generator']
        candidate_generator = TokenizerAndCandidateGenerator.from_params(
                Params(candidate_generator_params)
        )

    reader_params = Params(reader_params)

    print("====================")
    print(reader_params.as_dict())
    print("====================")

    reader = DatasetReader.from_params(reader_params)

    synset_to_lemmas = {}
    for lemma_id, synset_id in reader.mention_generator._lemma_to_synset.items():
        if synset_id not in synset_to_lemmas:
            synset_to_lemmas[synset_id] = []
        synset_to_lemmas[synset_id].append(lemma_id)

    vocab_params = archive.config['vocabulary']
    vocab = Vocabulary.from_params(vocab_params)

    iterator = BasicIterator(batch_size=24)
    iterator.index_with(vocab)

    fout = open(output_file, 'w')

    for ds_file in [evaluation_file]:
        instances = reader.read(ds_file)

        # get the metadata ids from the raw file
        raw_lines = []
        with JsonFile(ds_file, 'r') as fin:
            for sentence in fin:
                raw_ids = [[token['id'], token['lemma']] for token in sentence if 'senses' in token]
                if len(raw_ids) > 0:
                    raw_lines.append(raw_ids)

        raw_i = 0
        for batch in iterator(instances, num_epochs=1, shuffle=False):
            print(raw_i)

            if cuda_device > -1:
                b = move_to_device(batch, cuda_device)
            else:
                b = batch

            b['candidates'] = {'wordnet': {
                    'candidate_entities': b.pop('candidate_entities'),
                    'candidate_entity_priors': b.pop('candidate_entity_prior'),
                    'candidate_segment_ids': b.pop('candidate_segment_ids'),
                    'candidate_spans': b.pop('candidate_spans')}}
            gold_entities = b.pop('gold_entities')
            b['gold_entities'] = {'wordnet': gold_entities}

            if 'extra_candidates' in b:
                extra_candidates = b.pop('extra_candidates')
                seq_len = b['tokens']['tokens'].shape[1]
                bbb = []
                for e in extra_candidates:
                    for k in e.keys():
                        e[k]['candidate_segment_ids'] = [0] * len(e[k]['candidate_spans'])
                    ee = {'tokens': ['[CLS]'] * seq_len, 'segment_ids': [0] * seq_len,
                          'candidates': e}
                    ee_fields = candidate_generator.convert_tokens_candidates_to_fields(ee)
                    bbb.append(Instance(ee_fields))
                eb = Batch(bbb)
                eb.index_instances(vocab)
                padding_lengths = eb.get_padding_lengths()
                tensor_dict = eb.as_tensor_dict(padding_lengths)
                b['candidates'].update(tensor_dict['candidates'])

            if cuda_device > -1:
                b = move_to_device(b, cuda_device)

            output = model(**b)
    
            # predicted entities is list of (batch_index, (start, end), entity_id)
            predicted_entities = model.soldered_kgs['wordnet'].entity_linker._decode(
                          output['wordnet']['linking_scores'], b['candidates']['wordnet']['candidate_spans'], 
                          b['candidates']['wordnet']['candidate_entities']['ids']
            )

            # make output file
            predicted_entities_batch_indices = []
            batch_size = batch['tokens']['tokens'].shape[0]
            for k in range(batch_size):
                predicted_entities_batch_indices.append([])
            for b_index, start_end, eid in predicted_entities:
                try:
                    synset_id = vocab.get_token_from_index(eid, 'entity')
                except KeyError:
                    synset_id = vocab.get_token_from_index(eid, 'entity_wordnet')
                all_lemma_ids = synset_to_lemmas[synset_id]
                predicted_entities_batch_indices[b_index].append(all_lemma_ids)

            # output lines look like semeval2013.d000.s001.t003 reader%1:19:00::
            for k in range(batch_size):
                raw_ids = raw_lines[raw_i]
                predicted_lemmas = predicted_entities_batch_indices[k]
                assert len(predicted_lemmas) == len(raw_ids)
                for (ii, gold_lemma), pl in zip(raw_ids, predicted_lemmas):
                    # get the predicted lemma_id
                    predicted_lemma_id = None
                    for pp in pl:
                        if pp.partition('%')[0] == gold_lemma:
                            predicted_lemma_id = pp
                    assert predicted_lemma_id is not None
                    line = "{} {}\n".format(ii, predicted_lemma_id)
                    fout.write(line)
                raw_i += 1

    fout.close()
Beispiel #24
0
    def ensure_model_can_train_save_and_load(self,
                                             param_file: str,
                                             tolerance: float = 1e-4,
                                             cuda_device: int = -1,
                                             gradients_to_ignore: Set[str] = None,
                                             overrides: str = ""):
        """
        Parameters
        ----------
        param_file : ``str``
            Path to a training configuration file that we will use to train the model for this
            test.
        tolerance : ``float``, optional (default=1e-4)
            When comparing model predictions between the originally-trained model and the model
            after saving and loading, we will use this tolerance value (passed as ``rtol`` to
            ``numpy.testing.assert_allclose``).
        cuda_device : ``int``, optional (default=-1)
            The device to run the test on.
        gradients_to_ignore : ``Set[str]``, optional (default=None)
            This test runs a gradient check to make sure that we're actually computing gradients
            for all of the parameters in the model.  If you really want to ignore certain
            parameters when doing that check, you can pass their names here.  This is not
            recommended unless you're `really` sure you don't need to have non-zero gradients for
            those parameters (e.g., some of the beam search / state machine models have
            infrequently-used parameters that are hard to force the model to use in a small test).
        overrides : ``str``, optional (default = "")
            A JSON string that we will use to override values in the input parameter file.
        """
        save_dir = self.TEST_DIR / "save_and_load_test"
        archive_file = save_dir / "model.tar.gz"
        model = train_model_from_file(param_file, save_dir, overrides=overrides)
        loaded_model = load_archive(archive_file, cuda_device=cuda_device).model
        state_keys = model.state_dict().keys()
        loaded_state_keys = loaded_model.state_dict().keys()
        assert state_keys == loaded_state_keys
        # First we make sure that the state dict (the parameters) are the same for both models.
        for key in state_keys:
            assert_allclose(model.state_dict()[key].cpu().numpy(),
                            loaded_model.state_dict()[key].cpu().numpy(),
                            err_msg=key)
        params = Params.from_file(param_file)
        reader = DatasetReader.from_params(params['dataset_reader'])

        # Need to duplicate params because Iterator.from_params will consume.
        iterator_params = params['iterator']
        iterator_params2 = Params(copy.deepcopy(iterator_params.as_dict()))

        iterator = DataIterator.from_params(iterator_params)
        iterator2 = DataIterator.from_params(iterator_params2)

        # We'll check that even if we index the dataset with each model separately, we still get
        # the same result out.
        model_dataset = reader.read(params['validation_data_path'])
        iterator.index_with(model.vocab)
        model_batch = next(iterator(model_dataset, shuffle=False))

        loaded_dataset = reader.read(params['validation_data_path'])
        iterator2.index_with(loaded_model.vocab)
        loaded_batch = next(iterator2(loaded_dataset, shuffle=False))

        # Check gradients are None for non-trainable parameters and check that
        # trainable parameters receive some gradient if they are trainable.
        self.check_model_computes_gradients_correctly(model, model_batch, gradients_to_ignore)

        # The datasets themselves should be identical.
        assert model_batch.keys() == loaded_batch.keys()
        for key in model_batch.keys():
            self.assert_fields_equal(model_batch[key], loaded_batch[key], key, 1e-6)

        # Set eval mode, to turn off things like dropout, then get predictions.
        model.eval()
        loaded_model.eval()
        # Models with stateful RNNs need their states reset to have consistent
        # behavior after loading.
        for model_ in [model, loaded_model]:
            for module in model_.modules():
                if hasattr(module, 'stateful') and module.stateful:
                    module.reset_states()
        model_predictions = model(**model_batch)
        loaded_model_predictions = loaded_model(**loaded_batch)

        # Check loaded model's loss exists and we can compute gradients, for continuing training.
        loaded_model_loss = loaded_model_predictions["loss"]
        assert loaded_model_loss is not None
        loaded_model_loss.backward()

        # Both outputs should have the same keys and the values for these keys should be close.
        for key in model_predictions.keys():
            self.assert_fields_equal(model_predictions[key],
                                     loaded_model_predictions[key],
                                     name=key,
                                     tolerance=tolerance)

        return model, loaded_model
if (load_pretrained_BiDAF):
    archive = load_archive("https://s3-us-west-2.amazonaws.com/allennlp/models/bidaf-model-2017.09.15-charpad.tar.gz")
    
    # Get the model and the config file
    model = archive.model
    config = archive.config.duplicate()
    
    keys_config = list(config.keys())
    print ("Key config list: ", keys_config)
    for key in keys_config:
        print ("Params of %s"%(key))
        print (config[key])
    ### Get the elements
    ## Data Readers ##
    dataset_reader_params = config["dataset_reader"]
    dataset_reader = DatasetReader.from_params(dataset_reader_params)
    ## Vocabulary ##
    vocab = model.vocab 

    """
    ############  Propagate an instance text #############
    """
    instance = dataset_reader.text_to_instance("What kind of test succeeded on its first attempt?", 
                                               "One time I was writing a unit test, and it succeeded on the first attempt.", 
                                               char_spans=[(6, 10)])
    
    print ("Keys instance: ", instance.fields.keys())
    
    # Batch intances and convert to index using the vocabulary.
    instances = [instance]
    dataset = Batch(instances)
 def test_kwargs_are_passed_to_superclass(self):
     params = Params({"type": "text_classification_json", "max_instances": 50})
     reader = DatasetReader.from_params(params)
     assert reader.max_instances == 50
def main(serialization_directory: int,
         device: int,
         data: str,
         prefix: str,
         domain: str = None):
    """
    serialization_directory : str, required.
        The directory containing the serialized weights.
    device: int, default = -1
        The device to run the evaluation on.
    data: str, default = None
        The data to evaluate on. By default, we use the validation data from
        the original experiment.
    prefix: str, default=""
        The prefix to prepend to the generated gold and prediction files, to distinguish
        different models/data.
    domain: str, optional (default = None)
        If passed, filters the ontonotes evaluation/test dataset to only contain the
        specified domain. This overwrites the domain in the config file from the model,
        to allow evaluation on domains other than the one the model was trained on.
    """
    config = Params.from_file(os.path.join(serialization_directory, "config.json"))

    if domain is not None:
        # Hack to allow evaluation on different domains than the
        # model was trained on.
        config["dataset_reader"]["domain_identifier"] = domain
        prefix = f"{domain}_{prefix}"
    else:
        config["dataset_reader"].pop("domain_identifier", None)

    dataset_reader = DatasetReader.from_params(config['dataset_reader'])
    evaluation_data_path = data if data else config['validation_data_path']

    archive = load_archive(os.path.join(serialization_directory, "model.tar.gz"), cuda_device=device)
    model = archive.model
    model.eval()

    prediction_file_path = os.path.join(serialization_directory, prefix + "_predictions.txt")
    gold_file_path = os.path.join(serialization_directory, prefix + "_gold.txt")
    prediction_file = open(prediction_file_path, "w+")
    gold_file = open(gold_file_path, "w+")

    # Load the evaluation data and index it.
    print("reading evaluation data from {}".format(evaluation_data_path))
    instances = dataset_reader.read(evaluation_data_path)

    with torch.autograd.no_grad():
        iterator = BasicIterator(batch_size=32)
        iterator.index_with(model.vocab)

        model_predictions = []
        batches = iterator(instances, num_epochs=1, shuffle=False, cuda_device=device)
        for batch in Tqdm.tqdm(batches):
            result = model(**batch)
            predictions = model.decode(result)
            model_predictions.extend(predictions["tags"])

        for instance, prediction in zip(instances, model_predictions):
            fields = instance.fields
            try:
                # Most sentences have a verbal predicate, but not all.
                verb_index = fields["verb_indicator"].labels.index(1)
            except ValueError:
                verb_index = None

            gold_tags = fields["tags"].labels
            sentence = [x.text for x in fields["tokens"].tokens]

            write_to_conll_eval_file(prediction_file, gold_file,
                                     verb_index, sentence, prediction, gold_tags)
        prediction_file.close()
        gold_file.close()
def main(serialization_directory: str,
         device: int,
         data: str,
         prefix: str,
         domain: str = None):
    """
    serialization_directory : str, required.
        The directory containing the serialized weights.
    device: int, default = -1
        The device to run the evaluation on.
    data: str, default = None
        The data to evaluate on. By default, we use the validation data from
        the original experiment.
    prefix: str, default=""
        The prefix to prepend to the generated gold and prediction files, to distinguish
        different models/data.
    domain: str, optional (default = None)
        If passed, filters the ontonotes evaluation/test dataset to only contain the
        specified domain. This overwrites the domain in the config file from the model,
        to allow evaluation on domains other than the one the model was trained on.
    """
    config = Params.from_file(
        os.path.join(serialization_directory, "config.json"))

    if domain is not None:
        # Hack to allow evaluation on different domains than the
        # model was trained on.
        config["dataset_reader"]["domain_identifier"] = domain
        prefix = f"{domain}_{prefix}"
    else:
        config["dataset_reader"].pop("domain_identifier", None)

    dataset_reader = DatasetReader.from_params(config["dataset_reader"])
    evaluation_data_path = data if data else config["validation_data_path"]

    archive = load_archive(os.path.join(serialization_directory,
                                        "model.tar.gz"),
                           cuda_device=device)
    model = archive.model
    model.eval()

    prediction_file_path = os.path.join(serialization_directory,
                                        prefix + "_predictions.txt")
    gold_file_path = os.path.join(serialization_directory,
                                  prefix + "_gold.txt")
    prediction_file = open(prediction_file_path, "w+")
    gold_file = open(gold_file_path, "w+")

    # Load the evaluation data and index it.
    print("reading evaluation data from {}".format(evaluation_data_path))
    dataset = list(dataset_reader.read(evaluation_data_path))

    with torch.autograd.no_grad():
        loader = SimpleDataLoader(dataset, 32)
        model_predictions: List[List[str]] = []
        for batch in Tqdm.tqdm(loader):
            batch = move_to_device(batch, device)
            result = model(**batch)
            predictions = model.decode(result)
            model_predictions.extend(predictions["tags"])

        for instance, prediction in zip(dataset, model_predictions):
            fields = instance.fields
            verb_index = fields["metadata"]["verb_index"]
            gold_tags = fields["metadata"]["gold_tags"]
            sentence = fields["metadata"]["words"]
            write_to_conll_eval_file(prediction_file, gold_file, verb_index,
                                     sentence, prediction, gold_tags)
        prediction_file.close()
        gold_file.close()
Beispiel #29
0
    def ensure_model_can_train_save_and_load(
            self,
            param_file: str,
            tolerance: float = 1e-4,
            cuda_device: int = -1,
            gradients_to_ignore: Set[str] = None,
            overrides: str = ""):
        """
        Parameters
        ----------
        param_file : ``str``
            Path to a training configuration file that we will use to train the model for this
            test.
        tolerance : ``float``, optional (default=1e-4)
            When comparing model predictions between the originally-trained model and the model
            after saving and loading, we will use this tolerance value (passed as ``rtol`` to
            ``numpy.testing.assert_allclose``).
        cuda_device : ``int``, optional (default=-1)
            The device to run the test on.
        gradients_to_ignore : ``Set[str]``, optional (default=None)
            This test runs a gradient check to make sure that we're actually computing gradients
            for all of the parameters in the model.  If you really want to ignore certain
            parameters when doing that check, you can pass their names here.  This is not
            recommended unless you're `really` sure you don't need to have non-zero gradients for
            those parameters (e.g., some of the beam search / state machine models have
            infrequently-used parameters that are hard to force the model to use in a small test).
        overrides : ``str``, optional (default = "")
            A JSON string that we will use to override values in the input parameter file.
        """
        save_dir = self.TEST_DIR / "save_and_load_test"
        archive_file = save_dir / "model.tar.gz"
        model = train_model_from_file(param_file,
                                      save_dir,
                                      overrides=overrides)
        loaded_model = load_archive(archive_file,
                                    cuda_device=cuda_device).model
        state_keys = model.state_dict().keys()
        loaded_state_keys = loaded_model.state_dict().keys()
        assert state_keys == loaded_state_keys
        # First we make sure that the state dict (the parameters) are the same for both models.
        for key in state_keys:
            assert_allclose(model.state_dict()[key].cpu().numpy(),
                            loaded_model.state_dict()[key].cpu().numpy(),
                            err_msg=key)
        params = Params.from_file(param_file)

        # Need to duplicate params because DatasetReader.from_params will consume.
        reader_params = params['dataset_reader']
        reader_params2 = Params(copy.deepcopy(reader_params.as_dict()))

        reader = DatasetReader.from_params(reader_params)
        reader2 = DatasetReader.from_params(reader_params2)

        # Need to duplicate params because Iterator.from_params will consume.
        iterator_params = params['iterator']
        iterator_params2 = Params(copy.deepcopy(iterator_params.as_dict()))

        iterator = DataIterator.from_params(iterator_params)
        iterator2 = DataIterator.from_params(iterator_params2)

        # We'll check that even if we index the dataset with each model separately, we still get
        # the same result out.
        seed_params = Params({
            "random_seed": 5,
            "numpy_seed": 5,
            "pytorch_seed": 5
        })
        prepare_environment(seed_params)
        model_dataset = reader.read(params['validation_data_path'])
        iterator.index_with(model.vocab)
        model_batch = next(iterator(model_dataset, shuffle=False))

        seed_params = Params({
            "random_seed": 5,
            "numpy_seed": 5,
            "pytorch_seed": 5
        })
        prepare_environment(seed_params)
        loaded_dataset = reader2.read(params['validation_data_path'])
        iterator2.index_with(loaded_model.vocab)
        loaded_batch = next(iterator2(loaded_dataset, shuffle=False))

        # Check gradients are None for non-trainable parameters and check that
        # trainable parameters receive some gradient if they are trainable.
        self.check_model_computes_gradients_correctly(model, model_batch,
                                                      gradients_to_ignore)

        # The datasets themselves should be identical.
        assert model_batch.keys() == loaded_batch.keys()
        # import pdb; pdb.set_trace()
        for key in model_batch.keys():
            self.assert_fields_equal(model_batch[key], loaded_batch[key], key,
                                     1e-6)

        # Set eval mode, to turn off things like dropout, then get predictions.
        model.eval()
        loaded_model.eval()
        # Models with stateful RNNs need their states reset to have consistent
        # behavior after loading.
        for model_ in [model, loaded_model]:
            for module in model_.modules():
                if hasattr(module, 'stateful') and module.stateful:
                    module.reset_states()
        model_predictions = model(**model_batch)
        loaded_model_predictions = loaded_model(**loaded_batch)

        # Check loaded model's loss exists and we can compute gradients, for continuing training.
        loaded_model_loss = loaded_model_predictions["loss"]
        assert loaded_model_loss is not None
        loaded_model_loss.backward()

        # Both outputs should have the same keys and the values for these keys should be close.
        for key in model_predictions.keys():
            self.assert_fields_equal(model_predictions[key],
                                     loaded_model_predictions[key],
                                     name=key,
                                     tolerance=tolerance)

        return model, loaded_model
Beispiel #30
0
    def ensure_model_can_train_save_and_load(self, param_file: str):
        save_dir = os.path.join(self.TEST_DIR, "save_and_load_test")
        archive_file = os.path.join(save_dir, "model.tar.gz")
        model = train_model_from_file(param_file, save_dir)
        loaded_model = load_archive(archive_file).model
        state_keys = model.state_dict().keys()
        loaded_state_keys = loaded_model.state_dict().keys()
        assert state_keys == loaded_state_keys
        # First we make sure that the state dict (the parameters) are the same for both models.
        for key in state_keys:
            assert_allclose(model.state_dict()[key].numpy(),
                            loaded_model.state_dict()[key].numpy(),
                            err_msg=key)
        params = Params.from_file(self.param_file)
        reader = DatasetReader.from_params(params['dataset_reader'])
        iterator = DataIterator.from_params(params['iterator'])

        # We'll check that even if we index the dataset with each model separately, we still get
        # the same result out.
        model_dataset = reader.read(params['validation_data_path'])
        model_dataset.index_instances(model.vocab)
        model_batch_arrays = next(iterator(model_dataset, shuffle=False))
        model_batch = arrays_to_variables(model_batch_arrays, for_training=False)
        loaded_dataset = reader.read(params['validation_data_path'])
        loaded_dataset.index_instances(loaded_model.vocab)
        loaded_batch_arrays = next(iterator(loaded_dataset, shuffle=False))
        loaded_batch = arrays_to_variables(loaded_batch_arrays, for_training=False)

        # The datasets themselves should be identical.
        for key in model_batch.keys():
            field = model_batch[key]
            if isinstance(field, dict):
                for subfield in field:
                    self.assert_fields_equal(model_batch[key][subfield],
                                             loaded_batch[key][subfield],
                                             tolerance=1e-6,
                                             name=key + '.' + subfield)
            else:
                self.assert_fields_equal(model_batch[key], loaded_batch[key], 1e-6, key)

        # Set eval mode, to turn off things like dropout, then get predictions.
        model.eval()
        loaded_model.eval()
        # Models with stateful RNNs need their states reset to have consistent
        # behavior after loading.
        for model_ in [model, loaded_model]:
            for module in model_.modules():
                if hasattr(module, 'stateful') and module.stateful:
                    module.reset_states()
        model_predictions = model.forward(**model_batch)
        loaded_model_predictions = loaded_model.forward(**loaded_batch)

        # Check loaded model's loss exists and we can compute gradients, for continuing training.
        loaded_model_loss = loaded_model_predictions["loss"]
        assert loaded_model_loss is not None
        loaded_model_loss.backward()

        # Both outputs should have the same keys and the values for these keys should be close.
        for key in model_predictions.keys():
            self.assert_fields_equal(model_predictions[key],
                                     loaded_model_predictions[key],
                                     tolerance=1e-4,
                                     name=key)

        return model, loaded_model
Beispiel #31
0
 def __init__(self) -> None:
     super().__init__(lazy=True)
     self.reader = DatasetReader.from_params(Params({'type': 'sequence_tagging'}))
Beispiel #32
0
    def ensure_model_can_train_save_and_load(
        self,
        param_file: Union[PathLike, str],
        tolerance: float = 1e-4,
        cuda_device: int = -1,
        gradients_to_ignore: Set[str] = None,
        overrides: str = "",
        metric_to_check: str = None,
        metric_terminal_value: float = None,
        metric_tolerance: float = 1e-4,
        disable_dropout: bool = True,
    ):
        save_dir = self.TEST_DIR / "save_and_load_test"
        archive_file = save_dir / "model.tar.gz"
        model = train_model_from_file(param_file, save_dir, overrides=overrides)
        metrics_file = save_dir / "metrics.json"
        if metric_to_check is not None:
            metric_value = metrics.get(f"best_validation_{metric_to_check}") or metrics.get(
                f"training_{metric_to_check}"
            )
            assert metric_value is not None, f"Cannot find {metric_to_check} in metrics.json file"
            assert metric_terminal_value is not None, "Please specify metric terminal value"
            assert abs(metric_value - metric_terminal_value) < metric_tolerance
        loaded_model = load_archive(archive_file, cuda_device=cuda_device).model
        assert state_keys == loaded_state_keys
        for key in state_keys:
            assert_allclose(
                model.state_dict()[key].cpu().numpy(),
                loaded_model.state_dict()[key].cpu().numpy(),
                err_msg=key,
            )
        params = Params.from_file(param_file, params_overrides=overrides)
        reader = DatasetReader.from_params(params["dataset_reader"])

        print("Reading with original model")
        model_dataset = reader.read(params["validation_data_path"])

        print("Reading with loaded model")
        loaded_dataset = reader.read(params["validation_data_path"])

        data_loader_params = params["data_loader"]
        data_loader_params["shuffle"] = False
        data_loader_params2 = Params(copy.deepcopy(data_loader_params.as_dict()))

        data_loader2 = DataLoader.from_params(dataset=loaded_dataset, params=data_loader_params2)

        model_batch = next(iter(data_loader))

        loaded_batch = next(iter(data_loader2))

        self.check_model_computes_gradients_correctly(
            model, model_batch, gradients_to_ignore, disable_dropout
        )

        for key in model_batch.keys():
            self.assert_fields_equal(model_batch[key], loaded_batch[key], key, 1e-6)

        for model_ in [model, loaded_model]:
            for module in model_.modules():
                if hasattr(module, "stateful") and module.stateful:
                    module.reset_states()
        print("Predicting with original model")
        model_predictions = model(**model_batch)
        print("Predicting with loaded model")
        loaded_model_predictions = loaded_model(**loaded_batch)

            self.assert_fields_equal(
                model_predictions[key], loaded_model_predictions[key], name=key, tolerance=tolerance
            )
Beispiel #33
0
                'cannot already exist for you to save a model to it')
        aux_model_save_fp = None
        if args.mtl:
            aux_model_save_fp = Path(
                model_dir, f'task_{args.aux_name}_model_{run_number}.tar.gz')
            if aux_model_save_fp.exists():
                raise FileExistsError(
                    f'The model run file {aux_model_save_fp} '
                    'cannot already exist for you to save a model to it')
        with tempfile.TemporaryDirectory() as temp_data_dir:
            results = train_model_from_file(args.model_config_fp,
                                            serialization_dir=temp_data_dir,
                                            overrides=overrides_string)
            params = Params.from_file(str(args.model_config_fp))
            if 'dataset_reader' not in params:
                reader = DatasetReader.from_params(
                    params['task_sentiment']['dataset_reader'])
            else:
                reader = DatasetReader.from_params(params['dataset_reader'])
            results.eval()

            for save_fp, original_fp in zip(save_fps, original_fps):
                predicted_tags: List[List[str]] = []
                for pred_tokens in create_input_sentences(original_fp):
                    instance = reader.text_to_instance(pred_tokens)
                    pred_output = results.forward_on_instance(instance)
                    tags = pred_output['tags']
                    assert_err = (
                        f'Number of predicted tags {len(pred_tokens)} should '
                        f'match the number of tokens being predicted {len(tags)}'
                    )
                    assert len(tags) == len(pred_tokens), assert_err
Beispiel #34
0
 def __init__(self):
     super(LazyFakeReader, self).__init__(lazy=True)
     self.reader = DatasetReader.from_params(
         Params({u'type': u'sequence_tagging'}))
Beispiel #35
0
    def test_can_build_from_params(self):
        reader = DatasetReader.from_params(Params({"type": "squad2"}))

        assert reader._tokenizer.__class__.__name__ == "SpacyTokenizer"  # type: ignore
        assert reader._token_indexers[
            "tokens"].__class__.__name__ == "SingleIdTokenIndexer"  # type: ignore
    def ensure_model_can_train_save_and_load(self,
                                             param_file: str,
                                             tolerance: float = 1e-4,
                                             cuda_device: int = -1):
        save_dir = os.path.join(self.TEST_DIR, "save_and_load_test")
        archive_file = os.path.join(save_dir, "model.tar.gz")
        model = train_model_from_file(param_file, save_dir)
        loaded_model = load_archive(archive_file, cuda_device=cuda_device).model
        state_keys = model.state_dict().keys()
        loaded_state_keys = loaded_model.state_dict().keys()
        assert state_keys == loaded_state_keys
        # First we make sure that the state dict (the parameters) are the same for both models.
        for key in state_keys:
            assert_allclose(model.state_dict()[key].cpu().numpy(),
                            loaded_model.state_dict()[key].cpu().numpy(),
                            err_msg=key)
        params = Params.from_file(self.param_file)
        reader = DatasetReader.from_params(params['dataset_reader'])

        # Need to duplicate params because Iterator.from_params will consume.
        iterator_params = params['iterator']
        iterator_params2 = Params(copy.deepcopy(iterator_params.as_dict()))

        iterator = DataIterator.from_params(iterator_params)
        iterator2 = DataIterator.from_params(iterator_params2)

        # We'll check that even if we index the dataset with each model separately, we still get
        # the same result out.
        model_dataset = reader.read(params['validation_data_path'])
        iterator.index_with(model.vocab)
        model_batch = next(iterator(model_dataset, shuffle=False, cuda_device=cuda_device))

        loaded_dataset = reader.read(params['validation_data_path'])
        iterator2.index_with(loaded_model.vocab)
        loaded_batch = next(iterator2(loaded_dataset, shuffle=False, cuda_device=cuda_device))

        # Check gradients are None for non-trainable parameters and check that
        # trainable parameters receive some gradient if they are trainable.
        self.check_model_computes_gradients_correctly(model, model_batch)

        # The datasets themselves should be identical.
        assert model_batch.keys() == loaded_batch.keys()
        for key in model_batch.keys():
            self.assert_fields_equal(model_batch[key], loaded_batch[key], key, 1e-6)

        # Set eval mode, to turn off things like dropout, then get predictions.
        model.eval()
        loaded_model.eval()
        # Models with stateful RNNs need their states reset to have consistent
        # behavior after loading.
        for model_ in [model, loaded_model]:
            for module in model_.modules():
                if hasattr(module, 'stateful') and module.stateful:
                    module.reset_states()
        model_predictions = model(**model_batch)
        loaded_model_predictions = loaded_model(**loaded_batch)

        # Check loaded model's loss exists and we can compute gradients, for continuing training.
        loaded_model_loss = loaded_model_predictions["loss"]
        assert loaded_model_loss is not None
        loaded_model_loss.backward()

        # Both outputs should have the same keys and the values for these keys should be close.
        for key in model_predictions.keys():
            self.assert_fields_equal(model_predictions[key],
                                     loaded_model_predictions[key],
                                     name=key,
                                     tolerance=tolerance)

        return model, loaded_model
def predict(archive_folder, span_file, cluster_file, output_file, cuda_device):
    combine_span_and_cluster_file(span_file, cluster_file)

    test_file = 'tmp_relation_42424242.jsonl'
    relation_threshold = json.load(
        open(archive_folder +
             '/metrics.json'))['test__n_ary_rel_global_threshold']
    print(relation_threshold)

    import_submodules("scirex")
    logging.info("Loading Model from %s", archive_folder)
    archive_file = os.path.join(archive_folder, "model.tar.gz")
    archive = load_archive(archive_file, cuda_device)
    model = archive.model
    model.eval()

    model.prediction_mode = True
    config = archive.config.duplicate()
    dataset_reader_params = config["dataset_reader"]
    dataset_reader = DatasetReader.from_params(dataset_reader_params)
    dataset_reader.prediction_mode = True
    instances = dataset_reader.read(test_file)

    for instance in instances:
        batch = Batch([instance])
        batch.index_instances(model.vocab)

    data_iterator = DataIterator.from_params(config["validation_iterator"])
    iterator = data_iterator(instances, num_epochs=1, shuffle=False)

    with open(output_file, "w") as f:
        documents = {}
        for batch in tqdm(iterator):
            with torch.no_grad():
                batch = nn_util.move_to_device(batch, cuda_device)
                output_res = model.decode_relations(batch)

            n_ary_relations = output_res['n_ary_relation']
            predicted_relations, scores = n_ary_relations[
                'candidates'], n_ary_relations['scores']

            if 'metadata' not in output_res['n_ary_relation']:
                continue

            metadata = output_res['n_ary_relation']['metadata'][0]
            doc_id = metadata['doc_id']
            coref_key_map = {
                k: i
                for i, k in metadata['document_metadata']
                ['cluster_name_to_id'].items()
            }

            for i, rel in enumerate(predicted_relations):
                predicted_relations[i] = tuple([
                    coref_key_map[k] if k in coref_key_map else None
                    for k in rel
                ])

            if doc_id not in documents:
                documents[doc_id] = {
                    'predicted_relations': [],
                    'doc_id': doc_id
                }

            label = [
                1 if x > relation_threshold else 0
                for x in list(scores.ravel())
            ]
            scores = [round(float(x), 4) for x in list(scores.ravel())]
            documents[doc_id]['predicted_relations'] += list(
                zip(predicted_relations, scores, label))

        for d in documents.values():
            predicted_relations = {}
            for r, s, l in d['predicted_relations']:
                r = tuple(r)
                if r not in predicted_relations or predicted_relations[r][
                        0] < s:
                    predicted_relations[r] = (s, l)

            d['predicted_relations'] = [
                (r, s, l) for r, (s, l) in predicted_relations.items()
            ]

        f.write("\n".join([json.dumps(x) for x in documents.values()]))
Beispiel #38
0
 def __init__(self) -> None:
     super().__init__(lazy=True)
     self.reader = DatasetReader.from_params(Params({"type": "sequence_tagging", "lazy": True}))
Beispiel #39
0
    def ensure_model_can_train_save_and_load(self,
                                             param_file: str,
                                             tolerance: float = 1e-4,
                                             cuda_device: int = -1):
        save_dir = self.TEST_DIR / "save_and_load_test"
        archive_file = save_dir / "model.tar.gz"
        model = train_model_from_file(param_file, save_dir)
        loaded_model = load_archive(archive_file, cuda_device=cuda_device).model
        state_keys = model.state_dict().keys()
        loaded_state_keys = loaded_model.state_dict().keys()
        assert state_keys == loaded_state_keys
        # First we make sure that the state dict (the parameters) are the same for both models.
        for key in state_keys:
            assert_allclose(model.state_dict()[key].cpu().numpy(),
                            loaded_model.state_dict()[key].cpu().numpy(),
                            err_msg=key)
        params = Params.from_file(param_file)
        reader = DatasetReader.from_params(params['dataset_reader'])

        # Need to duplicate params because Iterator.from_params will consume.
        iterator_params = params['iterator']
        iterator_params2 = Params(copy.deepcopy(iterator_params.as_dict()))

        iterator = DataIterator.from_params(iterator_params)
        iterator2 = DataIterator.from_params(iterator_params2)

        # We'll check that even if we index the dataset with each model separately, we still get
        # the same result out.
        model_dataset = reader.read(params['validation_data_path'])
        iterator.index_with(model.vocab)
        model_batch = next(iterator(model_dataset, shuffle=False, cuda_device=cuda_device))

        loaded_dataset = reader.read(params['validation_data_path'])
        iterator2.index_with(loaded_model.vocab)
        loaded_batch = next(iterator2(loaded_dataset, shuffle=False, cuda_device=cuda_device))

        # Check gradients are None for non-trainable parameters and check that
        # trainable parameters receive some gradient if they are trainable.
        self.check_model_computes_gradients_correctly(model, model_batch)

        # The datasets themselves should be identical.
        assert model_batch.keys() == loaded_batch.keys()
        for key in model_batch.keys():
            self.assert_fields_equal(model_batch[key], loaded_batch[key], key, 1e-6)

        # Set eval mode, to turn off things like dropout, then get predictions.
        model.eval()
        loaded_model.eval()
        # Models with stateful RNNs need their states reset to have consistent
        # behavior after loading.
        for model_ in [model, loaded_model]:
            for module in model_.modules():
                if hasattr(module, 'stateful') and module.stateful:
                    module.reset_states()
        model_predictions = model(**model_batch)
        loaded_model_predictions = loaded_model(**loaded_batch)

        # Check loaded model's loss exists and we can compute gradients, for continuing training.
        loaded_model_loss = loaded_model_predictions["loss"]
        assert loaded_model_loss is not None
        loaded_model_loss.backward()

        # Both outputs should have the same keys and the values for these keys should be close.
        for key in model_predictions.keys():
            self.assert_fields_equal(model_predictions[key],
                                     loaded_model_predictions[key],
                                     name=key,
                                     tolerance=tolerance)

        return model, loaded_model
Beispiel #40
0
 def __init__(self, attacker_params: Dict[str, Any], reader: DatasetReader):
     super().__init__()
     self.attacker_params = Params(attacker_params)
     self.reader = DatasetReader.from_params(self.attacker_params["reader"])
     self.reader = reader
Beispiel #41
0
    def test_semeval2010_task8_reader_with_entity_markers(self):
        reader_params = Params({
            "type": "semeval2010_task8",
            "entity_masking": "entity_markers",
            "tokenizer_and_candidate_generator": {
                "type": "bert_tokenizer_and_candidate_generator",
                "entity_candidate_generators": {
                    "wordnet": {
                        "type":
                        "wordnet_mention_generator",
                        "entity_file":
                        "tests/fixtures/wordnet/entities_fixture.jsonl"
                    }
                },
                "entity_indexers": {
                    "wordnet": {
                        "type": "characters_tokenizer",
                        "tokenizer": {
                            "type": "word",
                            "word_splitter": {
                                "type": "just_spaces"
                            },
                        },
                        "namespace": "entity"
                    }
                },
                "bert_model_type":
                "tests/fixtures/evaluation/semeval2010_task8/vocab_entity_markers.txt",
                "do_lower_case": True,
            },
        })

        reader = DatasetReader.from_params(reader_params)
        train_file = 'tests/fixtures/evaluation/semeval2010_task8/semeval2010_task8.json'

        instances = reader.read(train_file)

        # check that the offsets are right!
        segment_ids = instances[0]['segment_ids'].array.tolist()
        tokens = [t.text for t in instances[0]['tokens'].tokens]

        tokens_and_segments = list(zip(tokens, segment_ids))

        expected_tokens_and_segments = [('[CLS]', 0), ('the', 0),
                                        ('[e1start]', 0),
                                        ('big', 0), ('cat', 0), ('##s', 0),
                                        ('[e1end]', 0), ('jumped', 0),
                                        ('[UNK]', 0), ('the', 0),
                                        ('[e2start]', 0), ('la', 0),
                                        ('##zie', 0), ('##st', 0),
                                        ('brown', 0), ('dog', 0), ('##s', 0),
                                        ('[e2end]', 0), ('.', 0), ('[SEP]', 0)]

        self.assertEqual(tokens_and_segments, expected_tokens_and_segments)

        tokens_1 = [t.text for t in instances[1]['tokens'].tokens]
        expected_tokens_1 = [
            '[CLS]', 'the', '[e2start]', 'big', 'cat', '##s', '[e2end]',
            'jumped', '[e1start]', '[UNK]', 'the', 'la', '##zie', '##st',
            'brown', 'dog', '##s', '[e1end]', '.', '[SEP]'
        ]

        self.assertEqual(tokens_1, expected_tokens_1)

        self.assertEqual(instances[0].fields['label_ids'].label, 0)
        self.assertEqual(instances[1].fields['label_ids'].label, 8)

        all_tokens = [[t.text for t in instances[k]['tokens'].tokens]
                      for k in range(2)]

        for k in range(2):
            self.assertEqual(
                all_tokens[k][instances[k].fields['index_a'].label],
                '[e1start]')
            self.assertEqual(
                all_tokens[k][instances[k].fields['index_b'].label],
                '[e2start]')