Example #1
0
    def _convert_to_tensor(
        self,
        data: pd.DataFrame,
        feature_columns: Optional[Union[List[str], List[List[str]], List[int],
                                        List[List[int]]]] = None,
        dtypes: Optional[torch.dtype] = None,
        unsqueeze: bool = True,
    ) -> torch.Tensor:
        """Handle conversion of data to tensor.

        Same arguments as in ``convert_pandas_to_torch_tensor``."""
        # TODO(amog): Add `_convert_numpy_to_torch_tensor to use based on input type.
        # Reduce conversion cost if input is in Numpy
        if isinstance(feature_columns, dict):
            features_tensor = {
                key: convert_pandas_to_torch_tensor(
                    data,
                    feature_columns[key],
                    dtypes[key] if isinstance(dtypes, dict) else dtypes,
                    unsqueeze=unsqueeze,
                )
                for key in feature_columns
            }
        else:
            features_tensor = convert_pandas_to_torch_tensor(
                data,
                columns=feature_columns,
                column_dtypes=dtypes,
                unsqueeze=unsqueeze,
            )
        return features_tensor
Example #2
0
    def test_multi_input(self):
        tensors = convert_pandas_to_torch_tensor(data_batch,
                                                 columns=[["A"], ["B"]])
        assert len(tensors) == 2

        for i in range(len(tensors)):
            tensor = tensors[i]
            assert tensor.size() == (len(data_batch), 1)
            assert np.array_equal(
                tensor.numpy(), data_batch[[data_batch.columns[i]]].to_numpy())
Example #3
0
 def test_single_tensor_columns(self):
     tensor = convert_pandas_to_torch_tensor(data_batch, columns=["A"])
     assert tensor.size() == (len(data_batch), len(data_batch.columns) - 1)
     assert np.array_equal(tensor.numpy(), data_batch[["A"]].to_numpy())
Example #4
0
 def test_single_tensor_dtype(self):
     tensor = convert_pandas_to_torch_tensor(data_batch,
                                             column_dtypes=torch.float)
     assert tensor.size() == (len(data_batch), len(data_batch.columns))
     assert tensor.dtype == torch.float
     assert np.array_equal(tensor.numpy(), data_batch.to_numpy())
Example #5
0
 def test_invalid_args(self):
     with pytest.raises(TypeError):
         convert_pandas_to_torch_tensor(
             data_batch,
             columns=["A", "B"],
             column_dtypes=[torch.float, torch.float])
Example #6
0
    def predict(
        self,
        data: DataBatchType,
        feature_columns: Optional[
            Union[List[str], List[List[str]], List[int], List[List[int]]]
        ] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> DataBatchType:
        """Run inference on data batch.

        The data is converted into a torch Tensor before being inputted to
        the model.

        Args:
            data: A batch of input data. Either a pandas DataFrame or numpy
                array.
            feature_columns: The names or indices of the columns in the
                data to use as features to predict on. If this arg is a
                list of lists, then the data batch will be converted into a
                multiple tensors which are then concatenated before feeding
                into the model. This is useful for multi-input models. If
                None, then use all columns in ``data``.
            dtype: The torch dtype to use when creating the torch tensor.
                If set to None, then automatically infer the dtype.

        Examples:

        .. code-block:: python

            import numpy as np
            import torch
            from ray.ml.predictors.torch import TorchPredictor

            model = torch.nn.Linear(1, 1)
            predictor = TorchPredictor(model=model)

            data = np.array([[1, 2], [3, 4]])
            predictions = predictor.predict(data)

            # Only use first column as the feature
            predictions = predictor.predict(data, feature_columns=[0])

        .. code-block:: python

            import pandas as pd
            import torch
            from ray.ml.predictors.torch import TorchPredictor

            model = torch.nn.Linear(1, 1)
            predictor = TorchPredictor(model=model)

            # Pandas dataframe.
            data = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])

            predictions = predictor.predict(data)

            # Only use first column as the feature
            predictions = predictor.predict(data, feature_columns=["A"])


        Returns:
            DataBatchType: Prediction result.
        """
        if self.preprocessor:
            data = self.preprocessor.transform_batch(data)

        if isinstance(data, np.ndarray):
            # If numpy array, then convert to pandas dataframe.
            data = pd.DataFrame(data)

        # TODO(amog): Add `_convert_numpy_to_torch_tensor to use based on input type.
        # Reduce conversion cost if input is in Numpy
        tensor = convert_pandas_to_torch_tensor(
            data, columns=feature_columns, column_dtypes=dtype
        )
        prediction = self.model(tensor).cpu().detach().numpy()
        return pd.DataFrame(prediction, columns=["predictions"])