Ejemplo n.º 1
0
    def predict_dataset(self, dataset, progress_bar=True, apply_preproc=True):
        """ Predict a complete dataset

        Parameters
        ----------
        dataset : Dataset
            Dataset to predict
        progress_bar : bool, optional
            hide or show a progress bar

        Yields
        -------
        PredictionResult
            Single PredictionResult
        dict
            Dataset entry of the prediction result
        """
        input_dataset = InputDataset(
            dataset, self.data_preproc if apply_preproc else None,
            self.text_postproc if apply_preproc else None)
        prediction_results = self.predict_input_dataset(
            input_dataset, progress_bar)

        for prediction, sample in zip(prediction_results, dataset.samples()):
            yield prediction, sample
Ejemplo n.º 2
0
    def run(self,
            _sentinel=None,
            gt_dataset=None,
            pred_dataset=None,
            processes=1,
            progress_bar=False):
        """ evaluate on the given dataset

        Parameters
        ----------
        _sentinel : do not use
            Forcing the use of `gt_dataset` and `pred_dataset` fore safety
        gt_dataset : Dataset, optional
            the ground truth
        pred_dataset : Dataset
            the prediction dataset
        processes : int, optional
            the processes to use for preprocesing and evaluation
        progress_bar : bool, optional
            show a progress bar

        Returns
        -------
        evaluation dictionary
        """
        if _sentinel:
            raise Exception("You must call run by using parameter names.")

        if self.preloaded_gt:
            gt_data = self.preloaded_gt
        else:
            # gt_dataset.load_samples(progress_bar=progress_bar)
            # gt_data = self.text_preprocessor.apply(gt_dataset.text_samples(), progress_bar=progress_bar)
            gt_input_dataset = InputDataset(gt_dataset,
                                            None,
                                            self.text_preprocessor,
                                            processes=processes)
            gt_data = [
                txt for _, txt, _ in tqdm_wrapper(
                    gt_input_dataset.generator(text_only=True),
                    total=len(gt_dataset),
                    progress_bar=progress_bar,
                )
            ]

        pred_input_dataset = InputDataset(pred_dataset,
                                          None,
                                          self.text_preprocessor,
                                          processes=processes)
        pred_data = [
            txt for _, txt, _ in tqdm_wrapper(
                pred_input_dataset.generator(text_only=True),
                total=len(pred_dataset),
                progress_bar=progress_bar,
            )
        ]

        return self.evaluate(gt_data=gt_data,
                             pred_data=pred_data,
                             processes=processes,
                             progress_bar=progress_bar,
                             skip_empty_gt=self.skip_empty_gt)
Ejemplo n.º 3
0
    def predict_dataset(self, dataset, progress_bar=True):
        start_time = time.time()
        # preprocessing step (if all share the same preprocessor)
        if not self.same_preproc:
            raise Exception(
                'Different preprocessors are currently not allowed during prediction'
            )

        input_dataset = InputDataset(
            dataset,
            self.predictors[0].data_preproc,
            self.predictors[0].text_postproc,
            None,
            processes=self.processes,
        )

        def progress_bar_wrapper(l):
            if progress_bar:
                return tqdm(l,
                            total=int(np.ceil(len(dataset) / self.batch_size)),
                            desc="Prediction")
            else:
                return l

        def batched_data_params():
            batch = []
            for data_idx, (image, _, params) in enumerate(
                    input_dataset.generator(epochs=1)):
                batch.append((data_idx, image, params))
                if len(batch) == self.batch_size:
                    yield batch
                    batch = []

            if len(batch) > 0:
                yield batch

        for batch in progress_bar_wrapper(batched_data_params()):
            sample_ids, batch_images, batch_params = zip(*batch)
            samples = [dataset.samples()[i] for i in sample_ids]
            raw_dataset = [
                RawInputDataset(
                    DataSetMode.PREDICT,
                    batch_images,
                    [None] * len(batch_images),
                    batch_params,
                    None,
                    None,
                ) for p in self.predictors
            ]

            # predict_raw returns list of prediction objects
            prediction = [
                predictor.predict_input_dataset(ds, progress_bar=False)
                for ds, predictor in zip(raw_dataset, self.predictors)
            ]

            for result, sample in zip(zip(*prediction), samples):
                yield result, sample

        print("Prediction of {} models took {}s".format(
            len(self.predictors),
            time.time() - start_time))