コード例 #1
0
ファイル: predictor.py プロジェクト: tru247/ludwig
    def batch_predict(self, model, dataset, dataset_name=None):
        batcher = initialize_batcher(dataset,
                                     self._batch_size,
                                     should_shuffle=False,
                                     horovod=self._horovod)

        progress_bar = None
        if is_on_master():
            progress_bar = tqdm(desc='Prediction' if dataset_name is None else
                                'Prediction {0: <5.5}'.format(dataset_name),
                                total=batcher.steps_per_epoch,
                                file=sys.stdout,
                                disable=is_progressbar_disabled())

        predictions = {}
        while not batcher.last_batch():
            batch = batcher.next_batch()

            inputs = {
                i_feat.feature_name: batch[i_feat.feature_name]
                for i_feat in model.input_features.values()
            }

            preds = model.predict_step(inputs)

            # accumulate predictions from batch for each output feature
            for of_name, of_preds in preds.items():
                if of_name not in predictions:
                    predictions[of_name] = {}
                for pred_name, pred_values in of_preds.items():
                    if pred_name not in EXCLUE_PRED_SET:
                        if pred_name not in predictions[of_name]:
                            predictions[of_name][pred_name] = [pred_values]
                        else:
                            predictions[of_name][pred_name].append(pred_values)

            if is_on_master():
                progress_bar.update(1)

        if is_on_master():
            progress_bar.close()

        # consolidate predictions from each batch to a single tensor
        for of_name, of_predictions in predictions.items():
            for pred_name, pred_value_list in of_predictions.items():
                predictions[of_name][pred_name] = tf.concat(pred_value_list,
                                                            axis=0)

        return predictions
コード例 #2
0
def setup_model_scaffolding(raw_df, input_features, output_features):
    # setup input feature for testing
    config = {
        'input_features': input_features,
        'output_features': output_features
    }

    # setup model scaffolding to for testing
    model = LudwigModel(config)
    training_set, _, _, training_set_metadata = preprocess_for_training(
        config, training_set=raw_df, skip_save_processed_input=True)
    model.training_set_metadata = training_set_metadata
    update_config_with_metadata(model.config, training_set_metadata)
    model.model = model.create_model(model.config)

    # setup batcher to go through synthetic data
    batcher = initialize_batcher(training_set)

    return model, batcher
コード例 #3
0
ファイル: predictor.py プロジェクト: tru247/ludwig
    def batch_collect_activations(self,
                                  model,
                                  layer_names,
                                  dataset,
                                  bucketing_field=None):
        if bucketing_field:
            raise ValueError('BucketedBatcher is not supported yet')

        # Build static graph for the trained model
        tf.keras.backend.reset_uids()
        keras_model_inputs = model.get_model_inputs(training=False)
        keras_model = model.get_connected_model(inputs=keras_model_inputs,
                                                training=False)

        # Create a new model that routes activations to outputs
        tf.keras.backend.reset_uids()
        output_nodes = {
            layer_name: keras_model.get_layer(layer_name).output
            for layer_name in layer_names
        }
        activation_model = tf.keras.Model(inputs=keras_model_inputs,
                                          outputs=output_nodes)

        batcher = initialize_batcher(dataset,
                                     self._batch_size,
                                     should_shuffle=False)

        progress_bar = tqdm(desc='Collecting Tensors',
                            total=batcher.steps_per_epoch,
                            file=sys.stdout,
                            disable=is_progressbar_disabled())

        collected_tensors = []
        while not batcher.last_batch():
            batch = batcher.next_batch()

            inputs = {
                i_feat.feature_name: batch[i_feat.feature_name]
                for i_feat in model.input_features.values()
            }
            outputs = activation_model(inputs)

            for layer_name, output in outputs.items():
                if isinstance(output, tuple):
                    output = list(output)

                if isinstance(output, tf.Tensor):
                    output = [('', output)]
                elif isinstance(output, dict):
                    output = [(f'_{key}', tensor)
                              for key, tensor in output.items()]
                elif isinstance(output, list):
                    output = [(f'_{idx}', tensor)
                              for idx, tensor in enumerate(output)]

                for suffix, tensor in output:
                    full_name = f'{layer_name}{suffix}'
                    collected_tensors.append((full_name, tensor))

            progress_bar.update(1)

        progress_bar.close()

        return collected_tensors
コード例 #4
0
ファイル: predictor.py プロジェクト: prmrreddy/ludwig
    def batch_evaluation(self,
                         model,
                         dataset,
                         collect_predictions=False,
                         dataset_name=None):
        batcher = initialize_batcher(dataset,
                                     self._batch_size,
                                     should_shuffle=False,
                                     horovod=self._horovod)

        progress_bar = None
        if is_on_master():
            progress_bar = tqdm(desc='Evaluation' if dataset_name is None else
                                'Evaluation {0: <5.5}'.format(dataset_name),
                                total=batcher.steps_per_epoch,
                                file=sys.stdout,
                                disable=is_progressbar_disabled())

        predictions = {}
        while not batcher.last_batch():
            batch = batcher.next_batch()

            inputs = {
                i_feat.feature_name: batch[i_feat.feature_name]
                for i_feat in model.input_features.values()
            }
            targets = {
                o_feat.feature_name: batch[o_feat.feature_name]
                for o_feat in model.output_features.values()
            }

            preds = model.evaluation_step(inputs, targets)

            # todo refactoring: remove logits from predictions

            # accumulate predictions from batch for each output feature
            if collect_predictions:
                for of_name, of_preds in preds.items():
                    if of_name not in predictions:
                        predictions[of_name] = {}
                    for pred_name, pred_values in of_preds.items():
                        if pred_name not in EXCLUE_PRED_SET and pred_values is not None:
                            if pred_name not in predictions[of_name]:
                                predictions[of_name][pred_name] = [pred_values]
                            else:
                                predictions[of_name][pred_name].append(
                                    pred_values)

            if is_on_master():
                progress_bar.update(1)

        if is_on_master():
            progress_bar.close()

        # consolidate predictions from each batch to a single tensor
        if collect_predictions:
            for of_name, of_predictions in predictions.items():
                for pred_name, pred_value_list in of_predictions.items():
                    predictions[of_name][pred_name] = tf.concat(
                        pred_value_list, axis=0)

        metrics = model.get_metrics()
        metrics = self.merge_workers_metrics(metrics)
        model.reset_metrics()

        return metrics, predictions