Esempio n. 1
0
    def from_archive(cls,
                     generative_archive: Archive,
                     discriminative_archive: Archive,
                     predictor_name: str = None) -> 'Predictor':
        """
        Instantiate a :class:`CompleteTheSentencePredictor` from a :class:`~allennlp.models.archival.Archive`;
        that is, from the result of training a model. Optionally specify which `Predictor`
        subclass; otherwise, the default one for the model will be used.
        """
        # We need to duplicate the configs so that they do not get consumed inside the archive
        generative_config = generative_archive.config.duplicate()
        discriminative_config = discriminative_archive.config.duplicate()

        model = generative_archive.model
        sampler = discriminative_archive.model

        dataset_reader_params_model = generative_config['dataset_reader']
        dataset_reader_params_sampler = discriminative_config['dataset_reader']

        dataset_reader_model = DatasetReader.from_params(
            dataset_reader_params_model)
        dataset_reader_sampler = DatasetReader.from_params(
            dataset_reader_params_sampler)

        model.eval()
        sampler.eval()

        return Predictor.by_name(predictor_name)(model, sampler,
                                                 dataset_reader_model,
                                                 dataset_reader_sampler)
    def _predict_iter(
        self, data: Union[Iterable[Dict[str, Any]], List[Dict[str, Any]]]
    ) -> Iterable[Dict[str, Any]]:
        '''
        Iterates over the predictions and yields one prediction at a time.
        This is a useful wrapper as it performs the data pre-processing and 
        assertion checks.

        The predictions are predicted in batchs so that the model does not 
        load in lots of data at once and thus have memory issues.

        :param data: Iterable or list of dictionaries that the predictor can 
                     take as input e.g. `target-tagger` predictor expects at 
                     most a `text` key and value.
        :yields: A dictionary containing all the values the model outputs e.g.
                 For the `target_tagger` model it would return `logits`, 
                 `class_probabilities`, `mask`, and `tags`.
        :raises AssertionError: If the `model` attribute is None. This can be 
                                overcome by either fitting or loading a model.
        :raises TypeError: If the data given is not of Type List or Iterable.
        '''
        no_model_error = 'There is no model to make predictions, either fit '\
                         'or load a model to resolve this.'
        assert self.model, no_model_error
        self.model.eval()

        all_model_params = Params.from_file(self._param_fp)

        reader_params = all_model_params.get("dataset_reader")
        dataset_reader = DatasetReader.from_params(reader_params)
        predictor = Predictor.by_name(self._predictor_name)(self.model,
                                                            dataset_reader)

        batch_size = 64
        if 'iterator' in all_model_params:
            iter_params = all_model_params.get("iterator")
            if 'batch_size' in iter_params:
                batch_size = iter_params['batch_size']

        # Data has to be an iterator
        if isinstance(data, list) or isinstance(data, collections.Iterable):
            data = iter(data)
        else:
            raise TypeError(
                f'Data given has to be of type {collections.Iterable}'
                f' and not {type(data)}')
        data_exists = True
        while data_exists:
            data_batch = []
            for _ in range(batch_size):
                try:
                    data_batch.append(next(data))
                except StopIteration:
                    data_exists = False
            if data_batch:
                predictions = predictor.predict_batch_json(data_batch)
                for prediction in predictions:
                    yield prediction
Esempio n. 3
0
def smart_data_evaluate(model_path, predictor, test_data, include_package,
                        output_dir, overwrite_dir, use_mock_predictor):
    prepare_dir(output_dir, overwrite_dir)
    common_util.import_module_and_submodules(include_package)
    if "mock" in predictor.lower():
        use_mock_predictor = True
    if use_mock_predictor:
        splitted = predictor.split(".")
        mod = __import__(".".join(splitted[:-1]), fromlist=[splitted[-1]])
        klass = getattr(mod, splitted[-1])
        current_predictor_class = klass
    else:
        current_predictor_class = Predictor.by_name(predictor)

    @timeit
    def load_model(path):
        archive = load_archive(path)
        archive.model.eval()
        return current_predictor_class.from_archive(archive)

    @timeit
    def eval_model(predictor, test_data_path):
        evaluate_runner = EvaluationRunner(predictor, test_data_path,
                                           output_dir)
        evaluate_runner.evaluate(
            MRE=respect_only_mandatory_args,
            Cl=only_relation_classification,
            CRE=all_args_mandatory,
            AR=named_entity_recognition_v2,
            BRE=spert_only_two_mandatory_args,
            MRE_no_trigger=respect_only_mandatory_args_no_trigger,
            AR_no_trigger=named_entity_recognition_v2_no_trigger)
        evaluate_runner.save_report()

    if not use_mock_predictor:
        predictor = load_model(model_path)

    else:
        predictor = current_predictor_class(model_path)

    eval_model(predictor, test_data)
Esempio n. 4
0
    def _predict_iter(
        self,
        data: Union[Iterable[Dict[str, Any]], List[Dict[str, Any]]],
        batch_size: Optional[int] = None,
        yield_original_target: bool = False
    ) -> Iterable[Union[Dict[str, Any], Tuple[Dict[str, Any], Dict[str,
                                                                   Any]]]]:
        '''
        Iterates over the predictions and yields one prediction at a time.
        This is a useful wrapper as it performs the data pre-processing and 
        assertion checks.

        The predictions are predicted in batchs so that the model does not 
        load in lots of data at once and thus have memory issues.

        :param data: Iterable or list of dictionaries that the predictor can 
                     take as input e.g. `target-tagger` predictor expects at 
                     most a `text` key and value.
        :param batch_size: Specify the batch size to predict on. If left None 
                           defaults to 64 unless it is specified in the 
                           `model_param_fp` within the constructor then 
                           the batch size from the param file is used. 
        :param yield_original_target: If True it will then yield the 
                                      dictionary that has been predicted on.
        :yields: A dictionary containing all the values the model outputs e.g.
                 For the `target_tagger` model it would return `logits`, 
                 `class_probabilities`, `mask`, `tags`, `words`, and `text`.
                 If `yield_original_target` is True it will then yield a Tuple 
                 of 2 dictionaries the first being what has already been stated 
                 and the second being the dictionary that is being predicted on.
        :raises AssertionError: If the `model` attribute is None. This can be 
                                overcome by either fitting or loading a model.
        :raises TypeError: If the data given is not of Type List or Iterable.
        '''
        no_model_error = 'There is no model to make predictions, either fit '\
                         'or load a model to resolve this.'
        assert self.model, no_model_error
        self.model.eval()

        all_model_params = Params.from_file(self._param_fp)

        reader_params = all_model_params.get("dataset_reader")
        dataset_reader = DatasetReader.from_params(reader_params)
        predictor = Predictor.by_name(self._predictor_name)(self.model,
                                                            dataset_reader)

        # Argument batch size first then model param file and then default 64
        if batch_size is None:
            if 'iterator' in all_model_params:
                iter_params = all_model_params.get("iterator")
                if 'batch_size' in iter_params:
                    batch_size = iter_params['batch_size']
            batch_size = batch_size or 64

        # Data has to be an iterator
        if isinstance(data, list) or isinstance(data, collections.Iterable):
            data = iter(data)
        else:
            raise TypeError(
                f'Data given has to be of type {collections.Iterable}'
                f' and not {type(data)}')
        data_exists = True
        while data_exists:
            data_batch = []
            for _ in range(batch_size):
                try:
                    data_batch.append(next(data))
                except StopIteration:
                    data_exists = False
            if data_batch:
                predictions = predictor.predict_batch_json(data_batch)
                for prediction_index, prediction in enumerate(predictions):
                    if yield_original_target:
                        yield (prediction, data_batch[prediction_index])
                    else:
                        yield prediction