예제 #1
0
    def batch_predict(
        self,
        dataset: Dataset,
        dataset_name: str = None,
    ):
        with dataset.initialize_batcher(self._batch_size,
                                        should_shuffle=False,
                                        horovod=self._horovod) as batcher:

            progress_bar = None
            if self.is_coordinator():
                progress_bar = tqdm(
                    desc="Prediction" if dataset_name is None else
                    f"Prediction {dataset_name: <5.5}",
                    total=batcher.steps_per_epoch,
                    file=sys.stdout,
                    disable=is_progressbar_disabled(),
                )

            predictions = defaultdict(list)
            while not batcher.last_batch():
                batch = batcher.next_batch()
                preds = self._predict(self.model, batch)
                self._accumulate_preds(preds, predictions)

                if self.is_coordinator():
                    progress_bar.update(1)

            if self.is_coordinator():
                progress_bar.close()

        # consolidate predictions from each batch to a single tensor
        self._concat_preds(predictions)

        return from_numpy_dataset(predictions)
예제 #2
0
파일: predictor.py 프로젝트: yarenty/ludwig
    def batch_collect_activations(self, layer_names, dataset, bucketing_field=None):
        if bucketing_field:
            raise ValueError("BucketedBatcher is not supported yet")

        prev_model_training_mode = self.model.training  # store previous model training mode
        self.model.eval()  # set model to eval mode

        with torch.no_grad():
            with dataset.initialize_batcher(self._batch_size, should_shuffle=False) as batcher:
                progress_bar = tqdm(
                    desc="Collecting Tensors",
                    total=batcher.steps_per_epoch,
                    file=sys.stdout,
                    disable=is_progressbar_disabled(),
                )

                collected_tensors = []
                while not batcher.last_batch():
                    batch = batcher.next_batch()

                    inputs = {
                        i_feat.feature_name: torch.from_numpy(batch[i_feat.proc_column]).to(self.device)
                        for i_feat in self.model.input_features.values()
                    }
                    outputs = self.model(inputs)
                    collected_tensors = [(concat_name, tensor) for concat_name, tensor in outputs.items()]

                    progress_bar.update(1)

                progress_bar.close()

        self.model.train(prev_model_training_mode)  # Restores previous model training mode.

        return collected_tensors
예제 #3
0
    def batch_evaluation(self, dataset, collect_predictions=False, dataset_name=None):
        with dataset.initialize_batcher(self._batch_size, should_shuffle=False, horovod=self._horovod) as batcher:

            progress_bar = None
            if self.is_coordinator():
                progress_bar = tqdm(
                    desc="Evaluation" if dataset_name is None else f"Evaluation {dataset_name: <5.5}",
                    total=batcher.steps_per_epoch,
                    file=sys.stdout,
                    disable=is_progressbar_disabled(),
                )

            predictions = defaultdict(list)
            while not batcher.last_batch():
                batch = batcher.next_batch()
                logger.debug(
                    f"evaluation for {dataset_name}: obtained next batch "
                    f"memory used: {psutil.Process(os.getpid()).memory_info()[0] / 1e6:0.2f}MB"
                )
                inputs = {
                    i_feat.feature_name: torch.from_numpy(batch[i_feat.proc_column]).to(self.device)
                    for i_feat in self.model.input_features.values()
                }
                targets = {
                    o_feat.feature_name: torch.from_numpy(batch[o_feat.proc_column]).to(self.device)
                    for o_feat in self.model.output_features.values()
                }

                preds = self.model.evaluation_step(inputs, targets)

                # accumulate predictions from batch for each output feature
                if collect_predictions:
                    for of_name, of_preds in preds.items():
                        for pred_name, pred_values in of_preds.items():
                            if pred_name not in EXCLUDE_PRED_SET:
                                key = f"{of_name}_{pred_name}"
                                predictions[key].append(pred_values)

                if self.is_coordinator():
                    progress_bar.update(1)
                    logger.debug(
                        f"evaluation for {dataset_name}: completed batch {progress_bar.n} "
                        f"memory used: {psutil.Process(os.getpid()).memory_info()[0] / 1e6:0.2f}MB"
                    )

            if self.is_coordinator():
                progress_bar.close()

        # consolidate predictions from each batch to a single tensor
        if collect_predictions:
            for key, pred_value_list in predictions.items():
                predictions[key] = torch.cat(pred_value_list, dim=0).clone().detach().cpu().numpy()

        metrics = self.model.get_metrics()
        metrics = self.merge_workers_metrics(metrics)
        self.model.reset_metrics()

        return metrics, from_numpy_dataset(predictions)
예제 #4
0
    def batch_evaluation(self,
                         model,
                         dataset,
                         collect_predictions=False,
                         dataset_name=None):
        with dataset.initialize_batcher(self._batch_size,
                                        should_shuffle=False,
                                        horovod=self._horovod) as batcher:

            progress_bar = None
            if self.is_coordinator():
                progress_bar = tqdm(
                    desc='Evaluation' if dataset_name is None else
                    'Evaluation {0: <5.5}'.format(dataset_name),
                    total=batcher.steps_per_epoch,
                    file=sys.stdout,
                    disable=is_progressbar_disabled())

            predictions = defaultdict(list)
            while not batcher.last_batch():
                batch = batcher.next_batch()

                inputs = {
                    i_feat.feature_name: batch[i_feat.proc_column]
                    for i_feat in model.input_features.values()
                }
                targets = {
                    o_feat.feature_name: batch[o_feat.proc_column]
                    for o_feat in model.output_features.values()
                }

                preds = model.evaluation_step(inputs, targets)

                # accumulate predictions from batch for each output feature
                if collect_predictions:
                    for of_name, of_preds in preds.items():
                        for pred_name, pred_values in of_preds.items():
                            if pred_name not in EXCLUE_PRED_SET:
                                key = f'{of_name}_{pred_name}'
                                predictions[key].append(pred_values)

                if self.is_coordinator():
                    progress_bar.update(1)

            if self.is_coordinator():
                progress_bar.close()

        # consolidate predictions from each batch to a single tensor
        if collect_predictions:
            for key, pred_value_list in predictions.items():
                predictions[key] = tf.concat(pred_value_list, axis=0).numpy()

        metrics = model.get_metrics()
        metrics = self.merge_workers_metrics(metrics)
        model.reset_metrics()

        return metrics, from_numpy_dataset(predictions)
예제 #5
0
    def batch_predict(
            self,
            model,
            dataset,
            dataset_name=None
    ):
        batcher = dataset.initialize_batcher(
            self._batch_size,
            should_shuffle=False,
            horovod=self._horovod
        )

        progress_bar = None
        if is_on_master():
            progress_bar = tqdm(
                desc='Prediction' if dataset_name is None
                else 'Prediction {0: <5.5}'.format(dataset_name),
                total=batcher.steps_per_epoch,
                file=sys.stdout,
                disable=is_progressbar_disabled()
            )

        predictions = {}
        while not batcher.last_batch():
            batch = batcher.next_batch()

            inputs = {
                i_feat.feature_name: batch[i_feat.proc_column]
                for i_feat in model.input_features.values()
            }

            preds = model.predict_step(inputs)

            # accumulate predictions from batch for each output feature
            for of_name, of_preds in preds.items():
                if of_name not in predictions:
                    predictions[of_name] = {}
                for pred_name, pred_values in of_preds.items():
                    if pred_name not in EXCLUE_PRED_SET:
                        if pred_name not in predictions[of_name]:
                            predictions[of_name][pred_name] = [pred_values]
                        else:
                            predictions[of_name][pred_name].append(pred_values)

            if is_on_master():
                progress_bar.update(1)

        if is_on_master():
            progress_bar.close()

        # consolidate predictions from each batch to a single tensor
        for of_name, of_predictions in predictions.items():
            for pred_name, pred_value_list in of_predictions.items():
                predictions[of_name][pred_name] = tf.concat(pred_value_list,
                                                            axis=0)

        return predictions
예제 #6
0
    def batch_collect_activations(self, layer_names, dataset, bucketing_field=None):
        if bucketing_field:
            raise ValueError("BucketedBatcher is not supported yet")

        activation_model = self.model

        with dataset.initialize_batcher(self._batch_size, should_shuffle=False) as batcher:
            progress_bar = tqdm(
                desc="Collecting Tensors",
                total=batcher.steps_per_epoch,
                file=sys.stdout,
                disable=is_progressbar_disabled(),
            )

            collected_tensors = []
            while not batcher.last_batch():
                batch = batcher.next_batch()

                inputs = {
                    i_feat.feature_name: torch.from_numpy(batch[i_feat.proc_column]).to(self.device)
                    for i_feat in self.model.input_features.values()
                }
                outputs = activation_model(inputs)
예제 #7
0
    def batch_collect_activations(self,
                                  model,
                                  layer_names,
                                  dataset,
                                  bucketing_field=None):
        if bucketing_field:
            raise ValueError('BucketedBatcher is not supported yet')

        # Build static graph for the trained model
        tf.keras.backend.reset_uids()
        keras_model_inputs = model.get_model_inputs(training=False)
        keras_model = model.get_connected_model(inputs=keras_model_inputs,
                                                training=False)

        # Create a new model that routes activations to outputs
        tf.keras.backend.reset_uids()
        output_nodes = {
            layer_name: keras_model.get_layer(layer_name).output
            for layer_name in layer_names
        }
        activation_model = tf.keras.Model(inputs=keras_model_inputs,
                                          outputs=output_nodes)

        with dataset.initialize_batcher(self._batch_size,
                                        should_shuffle=False) as batcher:

            progress_bar = tqdm(desc='Collecting Tensors',
                                total=batcher.steps_per_epoch,
                                file=sys.stdout,
                                disable=is_progressbar_disabled())

            collected_tensors = []
            while not batcher.last_batch():
                batch = batcher.next_batch()

                inputs = {
                    i_feat.feature_name: batch[i_feat.proc_column]
                    for i_feat in model.input_features.values()
                }
                outputs = activation_model(inputs)

                for layer_name, output in outputs.items():
                    if isinstance(output, tuple):
                        output = list(output)

                    if isinstance(output, tf.Tensor):
                        output = [('', output)]
                    elif isinstance(output, dict):
                        output = [(f'_{key}', tensor)
                                  for key, tensor in output.items()]
                    elif isinstance(output, list):
                        output = [(f'_{idx}', tensor)
                                  for idx, tensor in enumerate(output)]

                    for suffix, tensor in output:
                        full_name = f'{layer_name}{suffix}'
                        collected_tensors.append((full_name, tensor))

                progress_bar.update(1)

            progress_bar.close()

        return collected_tensors
예제 #8
0
파일: predictor.py 프로젝트: tru247/ludwig
    def batch_evaluation(self,
                         model,
                         dataset,
                         collect_predictions=False,
                         dataset_name=None):
        batcher = initialize_batcher(dataset,
                                     self._batch_size,
                                     should_shuffle=False,
                                     horovod=self._horovod)

        progress_bar = None
        if is_on_master():
            progress_bar = tqdm(desc='Evaluation' if dataset_name is None else
                                'Evaluation {0: <5.5}'.format(dataset_name),
                                total=batcher.steps_per_epoch,
                                file=sys.stdout,
                                disable=is_progressbar_disabled())

        predictions = {}
        while not batcher.last_batch():
            batch = batcher.next_batch()

            inputs = {
                i_feat.feature_name: batch[i_feat.feature_name]
                for i_feat in model.input_features.values()
            }
            targets = {
                o_feat.feature_name: batch[o_feat.feature_name]
                for o_feat in model.output_features.values()
            }

            preds = model.evaluation_step(inputs, targets)

            # accumulate predictions from batch for each output feature
            if collect_predictions:
                for of_name, of_preds in preds.items():
                    if of_name not in predictions:
                        predictions[of_name] = {}
                    for pred_name, pred_values in of_preds.items():
                        if pred_name not in EXCLUE_PRED_SET and pred_values is not None:
                            if pred_name not in predictions[of_name]:
                                predictions[of_name][pred_name] = [pred_values]
                            else:
                                predictions[of_name][pred_name].append(
                                    pred_values)

            if is_on_master():
                progress_bar.update(1)

        if is_on_master():
            progress_bar.close()

        # consolidate predictions from each batch to a single tensor
        if collect_predictions:
            for of_name, of_predictions in predictions.items():
                for pred_name, pred_value_list in of_predictions.items():
                    predictions[of_name][pred_name] = tf.concat(
                        pred_value_list, axis=0)

        metrics = model.get_metrics()
        metrics = self.merge_workers_metrics(metrics)
        model.reset_metrics()

        return metrics, predictions