def batch_predict_partition(dataset): model = remote_model.load() predictor = Predictor(**predictor_kwargs) predictions = predictor.batch_predict(model, dataset, *args, **kwargs) ordered_predictions = predictions[output_columns] return ordered_predictions
def __init__(self): self.model = remote_model.load() self.output_columns = output_columns self.features = features self.data_hdf5_fp = data_hdf5_fp predictor = Predictor(**predictor_kwargs) self.batch_predict = partial(predictor.batch_predict, *args, **kwargs)
def evaluation( self, dataset: "Dataset", # noqa: F821 dataset_name: str, metrics_log: Dict[str, Dict[str, List[TrainerMetric]]], tables: Dict[str, List[List[str]]], batch_size: int, progress_tracker: ProgressTracker, ): predictor = Predictor(self.model, batch_size=batch_size, horovod=self.horovod, report_tqdm_to_ray=self.report_tqdm_to_ray) metrics, predictions = predictor.batch_evaluation( dataset, collect_predictions=False, dataset_name=dataset_name) self.append_metrics(dataset_name, metrics, metrics_log, tables, progress_tracker) return metrics_log, tables
def __init__(self): self.model = model self.output_columns = output_columns self.features = features self.training_set_metadata = training_set_metadata self.reshape_map = { f[PROC_COLUMN]: training_set_metadata[f[NAME]].get("reshape") for f in features.values() } predictor = Predictor(model, **predictor_kwargs) self.predict = partial(predictor.predict_single, *args, **kwargs)
def collect_activations( self, layer_names, dataset, data_format=None, batch_size=128, # output_directory='results', debug=False, **kwargs ): self._check_initialization() logger.debug('Preprocessing') # Added [:] to next line, before I was just assigning, # this way I'm copying the list. If you don't do it, you are actually # modifying the input feature list when you add output features, # which you definitely don't want to do features_to_load = self.model_definition['input_features'][:] # preprocessing dataset, training_set_metadata = preprocess_for_prediction( self.model_definition, dataset=dataset, data_format=data_format, training_set_metadata=self.training_set_metadata, include_outputs=False, ) logger.debug('Predicting') predictor = Predictor( batch_size=batch_size, horovod=self._horovod, debug=debug ) activations = predictor.batch_collect_activations( self.model, layer_names, dataset, ) return activations
def __init__(self): model = ray.get(model_ref) device = "cuda" if torch.cuda.is_available() else "cpu" self.model = model.to(device) self.output_columns = output_columns self.features = features self.training_set_metadata = training_set_metadata self.reshape_map = { f[PROC_COLUMN]: training_set_metadata[f[NAME]].get("reshape") for f in features.values() } predictor = Predictor(model, **predictor_kwargs) self.predict = partial(predictor.predict_single, *args, **kwargs)
def create_predictor(self, **kwargs): return Predictor(**kwargs)
def create_predictor(self, model: ECD, **kwargs): return Predictor(model, horovod=self._horovod, **kwargs)
def create_predictor(self, **kwargs): return Predictor(horovod=self._horovod, **kwargs)
def create_predictor(self, model: ECD, **kwargs): from ludwig.models.predictor import Predictor return Predictor(model, **kwargs)
def evaluate( self, dataset=None, data_format=None, batch_size=128, skip_save_unprocessed_output=True, skip_save_predictions=True, skip_save_eval_stats=True, collect_predictions=False, collect_overall_stats=False, output_directory='results', return_type=pd.DataFrame, debug=False, **kwargs ): self._check_initialization() logger.debug('Preprocessing') # preprocessing dataset, training_set_metadata = preprocess_for_prediction( self.model_definition, dataset=dataset, data_format=data_format, training_set_metadata=self.training_set_metadata, include_outputs=True, ) logger.debug('Predicting') predictor = Predictor( batch_size=batch_size, horovod=self._horovod, debug=debug ) stats, predictions = predictor.batch_evaluation( self.model, dataset, collect_predictions=collect_predictions or collect_overall_stats, ) # calculate the overall metrics if collect_overall_stats: overall_stats = calculate_overall_stats( self.model.output_features, predictions, dataset, training_set_metadata ) stats = {of_name: {**stats[of_name], **overall_stats[of_name]} # account for presence of 'combined' key if of_name in overall_stats else {**stats[of_name]} for of_name in stats} if is_on_master(): # if we are skipping all saving, # there is no need to create a directory that will remain empty should_create_exp_dir = not ( skip_save_unprocessed_output and skip_save_predictions and skip_save_eval_stats ) if should_create_exp_dir: os.makedirs(output_directory, exist_ok=True) if collect_predictions: logger.debug('Postprocessing') postproc_predictions = postprocess( predictions, self.model.output_features, self.training_set_metadata, output_directory=output_directory, skip_save_unprocessed_output=skip_save_unprocessed_output or not is_on_master(), ) else: postproc_predictions = predictions # = {} if is_on_master(): if postproc_predictions is not None and not skip_save_predictions: save_prediction_outputs(postproc_predictions, output_directory) print_evaluation_stats(stats) if not skip_save_eval_stats: save_evaluation_stats(stats, output_directory) if not skip_save_predictions or not skip_save_eval_stats: logger.info('Saved to: {0}'.format(output_directory)) if collect_predictions: postproc_predictions = convert_predictions( postproc_predictions, self.model.output_features, self.training_set_metadata, return_type=return_type) return stats, postproc_predictions, output_directory
def predict( self, dataset=None, data_format=None, batch_size=128, skip_save_unprocessed_output=True, skip_save_predictions=True, output_directory='results', return_type=pd.DataFrame, debug=False, **kwargs ): self._check_initialization() logger.debug('Preprocessing') # Added [:] to next line, before I was just assigning, # this way I'm copying the list. If you don't do it, you are actually # modifying the input feature list when you add output features, # which you definitely don't want to do features_to_load = self.model_definition['input_features'][:] # preprocessing dataset, training_set_metadata = preprocess_for_prediction( self.model_definition, dataset=dataset, data_format=data_format, training_set_metadata=self.training_set_metadata, include_outputs=False, ) logger.debug('Predicting') predictor = Predictor( batch_size=batch_size, horovod=self._horovod, debug=debug ) predictions = predictor.batch_predict( self.model, dataset, ) if is_on_master(): # if we are skipping all saving, # there is no need to create a directory that will remain empty should_create_exp_dir = not ( skip_save_unprocessed_output and skip_save_predictions ) if should_create_exp_dir: os.makedirs(output_directory, exist_ok=True) logger.debug('Postprocessing') postproc_predictions = convert_predictions( postprocess( predictions, self.model.output_features, self.training_set_metadata, output_directory=output_directory, skip_save_unprocessed_output=skip_save_unprocessed_output or not is_on_master(), ), self.model.output_features, self.training_set_metadata, return_type=return_type ) if is_on_master(): if not skip_save_predictions: save_prediction_outputs(postproc_predictions, output_directory) logger.info('Saved to: {0}'.format(output_directory)) return postproc_predictions, output_directory
def evaluate(self, dataset=None, data_format=None, batch_size=128, skip_save_unprocessed_output=True, skip_save_predictions=True, skip_save_eval_stats=True, collect_predictions=False, collect_overall_stats=False, output_directory='results', return_type=pd.DataFrame, debug=False, **kwargs): self._check_initialization() logger.debug('Preprocessing') # Added [:] to next line, before I was just assigning, # this way I'm copying the list. If you don't do it, you are actually # modifying the input feature list when you add output features, # which you definitely don't want to do features_to_load = self.model_definition['input_features'] + \ self.model_definition['output_features'] # preprocessing # todo refactoring: maybe replace the self.model_definition paramter # here with features_to_load dataset, training_set_metadata = preprocess_for_prediction( self.model_definition, dataset=dataset, data_format=data_format, training_set_metadata=self.training_set_metadata, include_outputs=True, ) logger.debug('Predicting') predictor = Predictor(batch_size=batch_size, horovod=self._horovod, debug=debug) stats, predictions = predictor.batch_evaluation( self.model, dataset, collect_predictions=collect_predictions or collect_overall_stats, ) # calculate the overall metrics if collect_overall_stats: overall_stats = calculate_overall_stats(self.model.output_features, predictions, dataset, training_set_metadata) stats = { of_name: { **stats[of_name], **overall_stats[of_name] } # account for presence of 'combined' key if of_name in overall_stats else { **stats[of_name] } for of_name in stats } if is_on_master(): # if we are skipping all saving, # there is no need to create a directory that will remain empty should_create_exp_dir = not (skip_save_unprocessed_output and skip_save_predictions and skip_save_eval_stats) if should_create_exp_dir: os.makedirs(output_directory, exist_ok=True) if collect_predictions: logger.debug('Postprocessing') postproc_predictions = postprocess( predictions, self.model.output_features, self.training_set_metadata, output_directory=output_directory, skip_save_unprocessed_output=skip_save_unprocessed_output or not is_on_master(), ) else: postproc_predictions = predictions # = {} if is_on_master(): if postproc_predictions is not None and not skip_save_predictions: save_prediction_outputs(postproc_predictions, output_directory) print_evaluation_stats(stats) if not skip_save_eval_stats: save_evaluation_stats(stats, output_directory) if not skip_save_predictions or not skip_save_eval_stats: logger.info('Saved to: {0}'.format(output_directory)) if collect_predictions: postproc_predictions = convert_predictions( postproc_predictions, self.model.output_features, self.training_set_metadata, return_type=return_type) return stats, postproc_predictions, output_directory
def create_predictor(self, model: ECD, **kwargs): return Predictor(model, **kwargs)