def batch_predict(self, model, dataset, dataset_name=None): batcher = initialize_batcher(dataset, self._batch_size, should_shuffle=False, horovod=self._horovod) progress_bar = None if is_on_master(): progress_bar = tqdm(desc='Prediction' if dataset_name is None else 'Prediction {0: <5.5}'.format(dataset_name), total=batcher.steps_per_epoch, file=sys.stdout, disable=is_progressbar_disabled()) predictions = {} while not batcher.last_batch(): batch = batcher.next_batch() inputs = { i_feat.feature_name: batch[i_feat.feature_name] for i_feat in model.input_features.values() } preds = model.predict_step(inputs) # accumulate predictions from batch for each output feature for of_name, of_preds in preds.items(): if of_name not in predictions: predictions[of_name] = {} for pred_name, pred_values in of_preds.items(): if pred_name not in EXCLUE_PRED_SET: if pred_name not in predictions[of_name]: predictions[of_name][pred_name] = [pred_values] else: predictions[of_name][pred_name].append(pred_values) if is_on_master(): progress_bar.update(1) if is_on_master(): progress_bar.close() # consolidate predictions from each batch to a single tensor for of_name, of_predictions in predictions.items(): for pred_name, pred_value_list in of_predictions.items(): predictions[of_name][pred_name] = tf.concat(pred_value_list, axis=0) return predictions
def setup_model_scaffolding(raw_df, input_features, output_features): # setup input feature for testing config = { 'input_features': input_features, 'output_features': output_features } # setup model scaffolding to for testing model = LudwigModel(config) training_set, _, _, training_set_metadata = preprocess_for_training( config, training_set=raw_df, skip_save_processed_input=True) model.training_set_metadata = training_set_metadata update_config_with_metadata(model.config, training_set_metadata) model.model = model.create_model(model.config) # setup batcher to go through synthetic data batcher = initialize_batcher(training_set) return model, batcher
def batch_collect_activations(self, model, layer_names, dataset, bucketing_field=None): if bucketing_field: raise ValueError('BucketedBatcher is not supported yet') # Build static graph for the trained model tf.keras.backend.reset_uids() keras_model_inputs = model.get_model_inputs(training=False) keras_model = model.get_connected_model(inputs=keras_model_inputs, training=False) # Create a new model that routes activations to outputs tf.keras.backend.reset_uids() output_nodes = { layer_name: keras_model.get_layer(layer_name).output for layer_name in layer_names } activation_model = tf.keras.Model(inputs=keras_model_inputs, outputs=output_nodes) batcher = initialize_batcher(dataset, self._batch_size, should_shuffle=False) progress_bar = tqdm(desc='Collecting Tensors', total=batcher.steps_per_epoch, file=sys.stdout, disable=is_progressbar_disabled()) collected_tensors = [] while not batcher.last_batch(): batch = batcher.next_batch() inputs = { i_feat.feature_name: batch[i_feat.feature_name] for i_feat in model.input_features.values() } outputs = activation_model(inputs) for layer_name, output in outputs.items(): if isinstance(output, tuple): output = list(output) if isinstance(output, tf.Tensor): output = [('', output)] elif isinstance(output, dict): output = [(f'_{key}', tensor) for key, tensor in output.items()] elif isinstance(output, list): output = [(f'_{idx}', tensor) for idx, tensor in enumerate(output)] for suffix, tensor in output: full_name = f'{layer_name}{suffix}' collected_tensors.append((full_name, tensor)) progress_bar.update(1) progress_bar.close() return collected_tensors
def batch_evaluation(self, model, dataset, collect_predictions=False, dataset_name=None): batcher = initialize_batcher(dataset, self._batch_size, should_shuffle=False, horovod=self._horovod) progress_bar = None if is_on_master(): progress_bar = tqdm(desc='Evaluation' if dataset_name is None else 'Evaluation {0: <5.5}'.format(dataset_name), total=batcher.steps_per_epoch, file=sys.stdout, disable=is_progressbar_disabled()) predictions = {} while not batcher.last_batch(): batch = batcher.next_batch() inputs = { i_feat.feature_name: batch[i_feat.feature_name] for i_feat in model.input_features.values() } targets = { o_feat.feature_name: batch[o_feat.feature_name] for o_feat in model.output_features.values() } preds = model.evaluation_step(inputs, targets) # todo refactoring: remove logits from predictions # accumulate predictions from batch for each output feature if collect_predictions: for of_name, of_preds in preds.items(): if of_name not in predictions: predictions[of_name] = {} for pred_name, pred_values in of_preds.items(): if pred_name not in EXCLUE_PRED_SET and pred_values is not None: if pred_name not in predictions[of_name]: predictions[of_name][pred_name] = [pred_values] else: predictions[of_name][pred_name].append( pred_values) if is_on_master(): progress_bar.update(1) if is_on_master(): progress_bar.close() # consolidate predictions from each batch to a single tensor if collect_predictions: for of_name, of_predictions in predictions.items(): for pred_name, pred_value_list in of_predictions.items(): predictions[of_name][pred_name] = tf.concat( pred_value_list, axis=0) metrics = model.get_metrics() metrics = self.merge_workers_metrics(metrics) model.reset_metrics() return metrics, predictions