def from_archive(cls, generative_archive: Archive, discriminative_archive: Archive, predictor_name: str = None) -> 'Predictor': """ Instantiate a :class:`CompleteTheSentencePredictor` from a :class:`~allennlp.models.archival.Archive`; that is, from the result of training a model. Optionally specify which `Predictor` subclass; otherwise, the default one for the model will be used. """ # We need to duplicate the configs so that they do not get consumed inside the archive generative_config = generative_archive.config.duplicate() discriminative_config = discriminative_archive.config.duplicate() model = generative_archive.model sampler = discriminative_archive.model dataset_reader_params_model = generative_config['dataset_reader'] dataset_reader_params_sampler = discriminative_config['dataset_reader'] dataset_reader_model = DatasetReader.from_params( dataset_reader_params_model) dataset_reader_sampler = DatasetReader.from_params( dataset_reader_params_sampler) model.eval() sampler.eval() return Predictor.by_name(predictor_name)(model, sampler, dataset_reader_model, dataset_reader_sampler)
def _predict_iter( self, data: Union[Iterable[Dict[str, Any]], List[Dict[str, Any]]] ) -> Iterable[Dict[str, Any]]: ''' Iterates over the predictions and yields one prediction at a time. This is a useful wrapper as it performs the data pre-processing and assertion checks. The predictions are predicted in batchs so that the model does not load in lots of data at once and thus have memory issues. :param data: Iterable or list of dictionaries that the predictor can take as input e.g. `target-tagger` predictor expects at most a `text` key and value. :yields: A dictionary containing all the values the model outputs e.g. For the `target_tagger` model it would return `logits`, `class_probabilities`, `mask`, and `tags`. :raises AssertionError: If the `model` attribute is None. This can be overcome by either fitting or loading a model. :raises TypeError: If the data given is not of Type List or Iterable. ''' no_model_error = 'There is no model to make predictions, either fit '\ 'or load a model to resolve this.' assert self.model, no_model_error self.model.eval() all_model_params = Params.from_file(self._param_fp) reader_params = all_model_params.get("dataset_reader") dataset_reader = DatasetReader.from_params(reader_params) predictor = Predictor.by_name(self._predictor_name)(self.model, dataset_reader) batch_size = 64 if 'iterator' in all_model_params: iter_params = all_model_params.get("iterator") if 'batch_size' in iter_params: batch_size = iter_params['batch_size'] # Data has to be an iterator if isinstance(data, list) or isinstance(data, collections.Iterable): data = iter(data) else: raise TypeError( f'Data given has to be of type {collections.Iterable}' f' and not {type(data)}') data_exists = True while data_exists: data_batch = [] for _ in range(batch_size): try: data_batch.append(next(data)) except StopIteration: data_exists = False if data_batch: predictions = predictor.predict_batch_json(data_batch) for prediction in predictions: yield prediction
def smart_data_evaluate(model_path, predictor, test_data, include_package, output_dir, overwrite_dir, use_mock_predictor): prepare_dir(output_dir, overwrite_dir) common_util.import_module_and_submodules(include_package) if "mock" in predictor.lower(): use_mock_predictor = True if use_mock_predictor: splitted = predictor.split(".") mod = __import__(".".join(splitted[:-1]), fromlist=[splitted[-1]]) klass = getattr(mod, splitted[-1]) current_predictor_class = klass else: current_predictor_class = Predictor.by_name(predictor) @timeit def load_model(path): archive = load_archive(path) archive.model.eval() return current_predictor_class.from_archive(archive) @timeit def eval_model(predictor, test_data_path): evaluate_runner = EvaluationRunner(predictor, test_data_path, output_dir) evaluate_runner.evaluate( MRE=respect_only_mandatory_args, Cl=only_relation_classification, CRE=all_args_mandatory, AR=named_entity_recognition_v2, BRE=spert_only_two_mandatory_args, MRE_no_trigger=respect_only_mandatory_args_no_trigger, AR_no_trigger=named_entity_recognition_v2_no_trigger) evaluate_runner.save_report() if not use_mock_predictor: predictor = load_model(model_path) else: predictor = current_predictor_class(model_path) eval_model(predictor, test_data)
def _predict_iter( self, data: Union[Iterable[Dict[str, Any]], List[Dict[str, Any]]], batch_size: Optional[int] = None, yield_original_target: bool = False ) -> Iterable[Union[Dict[str, Any], Tuple[Dict[str, Any], Dict[str, Any]]]]: ''' Iterates over the predictions and yields one prediction at a time. This is a useful wrapper as it performs the data pre-processing and assertion checks. The predictions are predicted in batchs so that the model does not load in lots of data at once and thus have memory issues. :param data: Iterable or list of dictionaries that the predictor can take as input e.g. `target-tagger` predictor expects at most a `text` key and value. :param batch_size: Specify the batch size to predict on. If left None defaults to 64 unless it is specified in the `model_param_fp` within the constructor then the batch size from the param file is used. :param yield_original_target: If True it will then yield the dictionary that has been predicted on. :yields: A dictionary containing all the values the model outputs e.g. For the `target_tagger` model it would return `logits`, `class_probabilities`, `mask`, `tags`, `words`, and `text`. If `yield_original_target` is True it will then yield a Tuple of 2 dictionaries the first being what has already been stated and the second being the dictionary that is being predicted on. :raises AssertionError: If the `model` attribute is None. This can be overcome by either fitting or loading a model. :raises TypeError: If the data given is not of Type List or Iterable. ''' no_model_error = 'There is no model to make predictions, either fit '\ 'or load a model to resolve this.' assert self.model, no_model_error self.model.eval() all_model_params = Params.from_file(self._param_fp) reader_params = all_model_params.get("dataset_reader") dataset_reader = DatasetReader.from_params(reader_params) predictor = Predictor.by_name(self._predictor_name)(self.model, dataset_reader) # Argument batch size first then model param file and then default 64 if batch_size is None: if 'iterator' in all_model_params: iter_params = all_model_params.get("iterator") if 'batch_size' in iter_params: batch_size = iter_params['batch_size'] batch_size = batch_size or 64 # Data has to be an iterator if isinstance(data, list) or isinstance(data, collections.Iterable): data = iter(data) else: raise TypeError( f'Data given has to be of type {collections.Iterable}' f' and not {type(data)}') data_exists = True while data_exists: data_batch = [] for _ in range(batch_size): try: data_batch.append(next(data)) except StopIteration: data_exists = False if data_batch: predictions = predictor.predict_batch_json(data_batch) for prediction_index, prediction in enumerate(predictions): if yield_original_target: yield (prediction, data_batch[prediction_index]) else: yield prediction