Esempio n. 1
0
    def batch_predict(self, dataset: RayDataset, *args, **kwargs):
        self._check_dataset(dataset)

        predictor_kwargs = self.predictor_kwargs
        output_columns = get_output_columns(self.model.output_features)
        batch_predictor = self.get_batch_infer_model(
            self.model,
            predictor_kwargs,
            output_columns,
            dataset.features,
            dataset.training_set_metadata,
            *args,
            **kwargs,
        )

        columns = [f.proc_column for f in self.model.input_features.values()]

        def to_tensors(df: pd.DataFrame) -> pd.DataFrame:
            for c in columns:
                df[c] = df[c].astype(TensorDtype())
            return df

        num_gpus = int(ray.cluster_resources().get("GPU", 0) > 0)
        dask_dataset = (dataset.ds.map_batches(
            to_tensors,
            batch_format="pandas").map_batches(batch_predictor,
                                               batch_size=self.batch_size,
                                               compute="actors",
                                               batch_format="pandas",
                                               num_gpus=num_gpus).to_dask())

        for of_feature in self.model.output_features.values():
            dask_dataset = of_feature.unflatten(dask_dataset)

        return dask_dataset
Esempio n. 2
0
    def batch_predict(self,
                      dataset: RayDataset,
                      *args,
                      collect_logits: bool = False,
                      **kwargs):
        self._check_dataset(dataset)

        predictor_kwargs = self.predictor_kwargs
        output_columns = get_output_columns(self.model.output_features,
                                            include_logits=collect_logits)
        batch_predictor = self.get_batch_infer_model(
            self.model,
            predictor_kwargs,
            output_columns,
            dataset.features,
            dataset.training_set_metadata,
            *args,
            collect_logits=collect_logits,
            **kwargs,
        )

        columns = [f.proc_column for f in self.model.input_features.values()]

        def to_tensors(df: pd.DataFrame) -> pd.DataFrame:
            for c in columns:
                df[c] = cast_as_tensor_dtype(df[c])
            return df

        # TODO(shreya): self.trainer_kwargs should have the correct resources; debug.
        # trainer_kwargs = {**get_trainer_kwargs(), **self.trainer_kwargs}
        num_cpus, num_gpus = self.get_resources_per_worker()

        predictions = dataset.ds.map_batches(
            to_tensors, batch_format="pandas").map_batches(
                batch_predictor,
                batch_size=self.batch_size,
                compute="actors",
                batch_format="pandas",
                num_cpus=num_cpus,
                num_gpus=num_gpus,
            )

        predictions = self.df_engine.from_ray_dataset(predictions)

        for of_feature in self.model.output_features.values():
            predictions = of_feature.unflatten(predictions)

        return predictions
Esempio n. 3
0
    def batch_predict(self, model, dataset, *args, **kwargs):
        self._check_dataset(dataset)

        remote_model = RayRemoteModel(model)
        predictor_kwargs = self.predictor_kwargs
        output_columns = get_output_columns(model.output_features)

        def batch_predict_partition(dataset):
            model = remote_model.load()
            predictor = Predictor(**predictor_kwargs)
            predictions = predictor.batch_predict(model, dataset, *args, **kwargs)
            ordered_predictions = predictions[output_columns]
            return ordered_predictions

        return dataset.map_dataset_partitions(
            batch_predict_partition,
            meta=[(c, 'object') for c in output_columns]
        )
Esempio n. 4
0
    def batch_predict(self, model: ECD, dataset: RayDataset, *args, **kwargs):
        self._check_dataset(dataset)

        remote_model = RayRemoteModel(model)
        predictor_kwargs = self.predictor_kwargs
        output_columns = get_output_columns(model.output_features)
        batch_predictor = self.get_batch_infer_model(
            remote_model, predictor_kwargs, output_columns, dataset.features,
            dataset.data_hdf5_fp, *args, **kwargs)

        num_gpus = int(ray.cluster_resources().get('GPU', 0) > 0)
        dask_dataset = dataset.ds.map_batches(batch_predictor,
                                              compute='actors',
                                              batch_format='pandas',
                                              num_gpus=num_gpus).to_dask()

        for of_feature in model.output_features.values():
            dask_dataset = of_feature.unflatten(dask_dataset)

        return dask_dataset