def _convert_to_tensor( self, data: pd.DataFrame, feature_columns: Optional[Union[List[str], List[List[str]], List[int], List[List[int]]]] = None, dtypes: Optional[torch.dtype] = None, unsqueeze: bool = True, ) -> torch.Tensor: """Handle conversion of data to tensor. Same arguments as in ``convert_pandas_to_torch_tensor``.""" # TODO(amog): Add `_convert_numpy_to_torch_tensor to use based on input type. # Reduce conversion cost if input is in Numpy if isinstance(feature_columns, dict): features_tensor = { key: convert_pandas_to_torch_tensor( data, feature_columns[key], dtypes[key] if isinstance(dtypes, dict) else dtypes, unsqueeze=unsqueeze, ) for key in feature_columns } else: features_tensor = convert_pandas_to_torch_tensor( data, columns=feature_columns, column_dtypes=dtypes, unsqueeze=unsqueeze, ) return features_tensor
def test_multi_input(self): tensors = convert_pandas_to_torch_tensor(data_batch, columns=[["A"], ["B"]]) assert len(tensors) == 2 for i in range(len(tensors)): tensor = tensors[i] assert tensor.size() == (len(data_batch), 1) assert np.array_equal( tensor.numpy(), data_batch[[data_batch.columns[i]]].to_numpy())
def test_single_tensor_columns(self): tensor = convert_pandas_to_torch_tensor(data_batch, columns=["A"]) assert tensor.size() == (len(data_batch), len(data_batch.columns) - 1) assert np.array_equal(tensor.numpy(), data_batch[["A"]].to_numpy())
def test_single_tensor_dtype(self): tensor = convert_pandas_to_torch_tensor(data_batch, column_dtypes=torch.float) assert tensor.size() == (len(data_batch), len(data_batch.columns)) assert tensor.dtype == torch.float assert np.array_equal(tensor.numpy(), data_batch.to_numpy())
def test_invalid_args(self): with pytest.raises(TypeError): convert_pandas_to_torch_tensor( data_batch, columns=["A", "B"], column_dtypes=[torch.float, torch.float])
def predict( self, data: DataBatchType, feature_columns: Optional[ Union[List[str], List[List[str]], List[int], List[List[int]]] ] = None, dtype: Optional[torch.dtype] = None, ) -> DataBatchType: """Run inference on data batch. The data is converted into a torch Tensor before being inputted to the model. Args: data: A batch of input data. Either a pandas DataFrame or numpy array. feature_columns: The names or indices of the columns in the data to use as features to predict on. If this arg is a list of lists, then the data batch will be converted into a multiple tensors which are then concatenated before feeding into the model. This is useful for multi-input models. If None, then use all columns in ``data``. dtype: The torch dtype to use when creating the torch tensor. If set to None, then automatically infer the dtype. Examples: .. code-block:: python import numpy as np import torch from ray.ml.predictors.torch import TorchPredictor model = torch.nn.Linear(1, 1) predictor = TorchPredictor(model=model) data = np.array([[1, 2], [3, 4]]) predictions = predictor.predict(data) # Only use first column as the feature predictions = predictor.predict(data, feature_columns=[0]) .. code-block:: python import pandas as pd import torch from ray.ml.predictors.torch import TorchPredictor model = torch.nn.Linear(1, 1) predictor = TorchPredictor(model=model) # Pandas dataframe. data = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) predictions = predictor.predict(data) # Only use first column as the feature predictions = predictor.predict(data, feature_columns=["A"]) Returns: DataBatchType: Prediction result. """ if self.preprocessor: data = self.preprocessor.transform_batch(data) if isinstance(data, np.ndarray): # If numpy array, then convert to pandas dataframe. data = pd.DataFrame(data) # TODO(amog): Add `_convert_numpy_to_torch_tensor to use based on input type. # Reduce conversion cost if input is in Numpy tensor = convert_pandas_to_torch_tensor( data, columns=feature_columns, column_dtypes=dtype ) prediction = self.model(tensor).cpu().detach().numpy() return pd.DataFrame(prediction, columns=["predictions"])