Example #1
0
 def batch_predict_partition(dataset):
     model = remote_model.load()
     predictor = Predictor(**predictor_kwargs)
     predictions = predictor.batch_predict(model, dataset, *args,
                                           **kwargs)
     ordered_predictions = predictions[output_columns]
     return ordered_predictions
Example #2
0
File: ray.py Project: cxz/ludwig
 def __init__(self):
     self.model = remote_model.load()
     self.output_columns = output_columns
     self.features = features
     self.data_hdf5_fp = data_hdf5_fp
     predictor = Predictor(**predictor_kwargs)
     self.batch_predict = partial(predictor.batch_predict, *args, **kwargs)
Example #3
0
    def evaluation(
        self,
        dataset: "Dataset",  # noqa: F821
        dataset_name: str,
        metrics_log: Dict[str, Dict[str, List[TrainerMetric]]],
        tables: Dict[str, List[List[str]]],
        batch_size: int,
        progress_tracker: ProgressTracker,
    ):
        predictor = Predictor(self.model,
                              batch_size=batch_size,
                              horovod=self.horovod,
                              report_tqdm_to_ray=self.report_tqdm_to_ray)
        metrics, predictions = predictor.batch_evaluation(
            dataset, collect_predictions=False, dataset_name=dataset_name)

        self.append_metrics(dataset_name, metrics, metrics_log, tables,
                            progress_tracker)

        return metrics_log, tables
Example #4
0
 def __init__(self):
     self.model = model
     self.output_columns = output_columns
     self.features = features
     self.training_set_metadata = training_set_metadata
     self.reshape_map = {
         f[PROC_COLUMN]:
         training_set_metadata[f[NAME]].get("reshape")
         for f in features.values()
     }
     predictor = Predictor(model, **predictor_kwargs)
     self.predict = partial(predictor.predict_single, *args,
                            **kwargs)
Example #5
0
    def collect_activations(
            self,
            layer_names,
            dataset,
            data_format=None,
            batch_size=128,
            # output_directory='results',
            debug=False,
            **kwargs
    ):
        self._check_initialization()
        logger.debug('Preprocessing')
        # Added [:] to next line, before I was just assigning,
        # this way I'm copying the list. If you don't do it, you are actually
        # modifying the input feature list when you add output features,
        # which you definitely don't want to do
        features_to_load = self.model_definition['input_features'][:]

        # preprocessing
        dataset, training_set_metadata = preprocess_for_prediction(
            self.model_definition,
            dataset=dataset,
            data_format=data_format,
            training_set_metadata=self.training_set_metadata,
            include_outputs=False,
        )

        logger.debug('Predicting')
        predictor = Predictor(
            batch_size=batch_size, horovod=self._horovod, debug=debug
        )
        activations = predictor.batch_collect_activations(
            self.model,
            layer_names,
            dataset,
        )

        return activations
Example #6
0
            def __init__(self):
                model = ray.get(model_ref)
                device = "cuda" if torch.cuda.is_available() else "cpu"
                self.model = model.to(device)

                self.output_columns = output_columns
                self.features = features
                self.training_set_metadata = training_set_metadata
                self.reshape_map = {
                    f[PROC_COLUMN]:
                    training_set_metadata[f[NAME]].get("reshape")
                    for f in features.values()
                }
                predictor = Predictor(model, **predictor_kwargs)
                self.predict = partial(predictor.predict_single, *args,
                                       **kwargs)
Example #7
0
 def create_predictor(self, **kwargs):
     return Predictor(**kwargs)
Example #8
0
 def create_predictor(self, model: ECD, **kwargs):
     return Predictor(model, horovod=self._horovod, **kwargs)
Example #9
0
 def create_predictor(self, **kwargs):
     return Predictor(horovod=self._horovod, **kwargs)
Example #10
0
    def create_predictor(self, model: ECD, **kwargs):
        from ludwig.models.predictor import Predictor

        return Predictor(model, **kwargs)
Example #11
0
    def evaluate(
            self,
            dataset=None,
            data_format=None,
            batch_size=128,
            skip_save_unprocessed_output=True,
            skip_save_predictions=True,
            skip_save_eval_stats=True,
            collect_predictions=False,
            collect_overall_stats=False,
            output_directory='results',
            return_type=pd.DataFrame,
            debug=False,
            **kwargs
    ):
        self._check_initialization()

        logger.debug('Preprocessing')

        # preprocessing
        dataset, training_set_metadata = preprocess_for_prediction(
            self.model_definition,
            dataset=dataset,
            data_format=data_format,
            training_set_metadata=self.training_set_metadata,
            include_outputs=True,
        )

        logger.debug('Predicting')
        predictor = Predictor(
            batch_size=batch_size, horovod=self._horovod, debug=debug
        )
        stats, predictions = predictor.batch_evaluation(
            self.model,
            dataset,
            collect_predictions=collect_predictions or collect_overall_stats,
        )

        # calculate the overall metrics
        if collect_overall_stats:
            overall_stats = calculate_overall_stats(
                self.model.output_features,
                predictions,
                dataset,
                training_set_metadata
            )
            stats = {of_name: {**stats[of_name], **overall_stats[of_name]}
            # account for presence of 'combined' key
            if of_name in overall_stats else {**stats[of_name]}
                     for of_name in stats}

        if is_on_master():
            # if we are skipping all saving,
            # there is no need to create a directory that will remain empty
            should_create_exp_dir = not (
                    skip_save_unprocessed_output and
                    skip_save_predictions and
                    skip_save_eval_stats
            )
            if should_create_exp_dir:
                os.makedirs(output_directory, exist_ok=True)

        if collect_predictions:
            logger.debug('Postprocessing')
            postproc_predictions = postprocess(
                predictions,
                self.model.output_features,
                self.training_set_metadata,
                output_directory=output_directory,
                skip_save_unprocessed_output=skip_save_unprocessed_output
                                             or not is_on_master(),
            )
        else:
            postproc_predictions = predictions  # = {}

        if is_on_master():
            if postproc_predictions is not None and not skip_save_predictions:
                save_prediction_outputs(postproc_predictions,
                                        output_directory)

            print_evaluation_stats(stats)
            if not skip_save_eval_stats:
                save_evaluation_stats(stats, output_directory)

            if not skip_save_predictions or not skip_save_eval_stats:
                logger.info('Saved to: {0}'.format(output_directory))

        if collect_predictions:
            postproc_predictions = convert_predictions(
                postproc_predictions,
                self.model.output_features,
                self.training_set_metadata,
                return_type=return_type)

        return stats, postproc_predictions, output_directory
Example #12
0
    def predict(
            self,
            dataset=None,
            data_format=None,
            batch_size=128,
            skip_save_unprocessed_output=True,
            skip_save_predictions=True,
            output_directory='results',
            return_type=pd.DataFrame,
            debug=False,
            **kwargs
    ):
        self._check_initialization()

        logger.debug('Preprocessing')
        # Added [:] to next line, before I was just assigning,
        # this way I'm copying the list. If you don't do it, you are actually
        # modifying the input feature list when you add output features,
        # which you definitely don't want to do
        features_to_load = self.model_definition['input_features'][:]

        # preprocessing
        dataset, training_set_metadata = preprocess_for_prediction(
            self.model_definition,
            dataset=dataset,
            data_format=data_format,
            training_set_metadata=self.training_set_metadata,
            include_outputs=False,
        )

        logger.debug('Predicting')
        predictor = Predictor(
            batch_size=batch_size, horovod=self._horovod, debug=debug
        )
        predictions = predictor.batch_predict(
            self.model,
            dataset,
        )

        if is_on_master():
            # if we are skipping all saving,
            # there is no need to create a directory that will remain empty
            should_create_exp_dir = not (
                    skip_save_unprocessed_output and skip_save_predictions
            )
            if should_create_exp_dir:
                os.makedirs(output_directory, exist_ok=True)

        logger.debug('Postprocessing')
        postproc_predictions = convert_predictions(
            postprocess(
                predictions,
                self.model.output_features,
                self.training_set_metadata,
                output_directory=output_directory,
                skip_save_unprocessed_output=skip_save_unprocessed_output
                                             or not is_on_master(),
            ),
            self.model.output_features,
            self.training_set_metadata,
            return_type=return_type
        )

        if is_on_master():
            if not skip_save_predictions:
                save_prediction_outputs(postproc_predictions,
                                        output_directory)

                logger.info('Saved to: {0}'.format(output_directory))

        return postproc_predictions, output_directory
Example #13
0
    def evaluate(self,
                 dataset=None,
                 data_format=None,
                 batch_size=128,
                 skip_save_unprocessed_output=True,
                 skip_save_predictions=True,
                 skip_save_eval_stats=True,
                 collect_predictions=False,
                 collect_overall_stats=False,
                 output_directory='results',
                 return_type=pd.DataFrame,
                 debug=False,
                 **kwargs):
        self._check_initialization()

        logger.debug('Preprocessing')
        # Added [:] to next line, before I was just assigning,
        # this way I'm copying the list. If you don't do it, you are actually
        # modifying the input feature list when you add output features,
        # which you definitely don't want to do
        features_to_load = self.model_definition['input_features'] + \
                           self.model_definition['output_features']

        # preprocessing
        # todo refactoring: maybe replace the self.model_definition paramter
        #  here with features_to_load
        dataset, training_set_metadata = preprocess_for_prediction(
            self.model_definition,
            dataset=dataset,
            data_format=data_format,
            training_set_metadata=self.training_set_metadata,
            include_outputs=True,
        )

        logger.debug('Predicting')
        predictor = Predictor(batch_size=batch_size,
                              horovod=self._horovod,
                              debug=debug)
        stats, predictions = predictor.batch_evaluation(
            self.model,
            dataset,
            collect_predictions=collect_predictions or collect_overall_stats,
        )

        # calculate the overall metrics
        if collect_overall_stats:
            overall_stats = calculate_overall_stats(self.model.output_features,
                                                    predictions, dataset,
                                                    training_set_metadata)
            stats = {
                of_name: {
                    **stats[of_name],
                    **overall_stats[of_name]
                }
                # account for presence of 'combined' key
                if of_name in overall_stats else {
                    **stats[of_name]
                }
                for of_name in stats
            }

        if is_on_master():
            # if we are skipping all saving,
            # there is no need to create a directory that will remain empty
            should_create_exp_dir = not (skip_save_unprocessed_output
                                         and skip_save_predictions
                                         and skip_save_eval_stats)
            if should_create_exp_dir:
                os.makedirs(output_directory, exist_ok=True)

        if collect_predictions:
            logger.debug('Postprocessing')
            postproc_predictions = postprocess(
                predictions,
                self.model.output_features,
                self.training_set_metadata,
                output_directory=output_directory,
                skip_save_unprocessed_output=skip_save_unprocessed_output
                or not is_on_master(),
            )
        else:
            postproc_predictions = predictions  # = {}

        if is_on_master():
            if postproc_predictions is not None and not skip_save_predictions:
                save_prediction_outputs(postproc_predictions, output_directory)

            print_evaluation_stats(stats)
            if not skip_save_eval_stats:
                save_evaluation_stats(stats, output_directory)

            if not skip_save_predictions or not skip_save_eval_stats:
                logger.info('Saved to: {0}'.format(output_directory))

        if collect_predictions:
            postproc_predictions = convert_predictions(
                postproc_predictions,
                self.model.output_features,
                self.training_set_metadata,
                return_type=return_type)

        return stats, postproc_predictions, output_directory
Example #14
0
 def create_predictor(self, model: ECD, **kwargs):
     return Predictor(model, **kwargs)