def predict(self, data: DataBatchType, **kwargs) -> DataBatchType: """Perform inference on a batch of data. Args: data: A batch of input data of type ``DataBatchType``. kwargs: Arguments specific to predictor implementations. These are passed directly to ``_predict_pandas``. Returns: DataBatchType: Prediction result. The return type will be the same as the input type. """ data_df = convert_batch_type_to_pandas(data) if not hasattr(self, "_preprocessor"): raise NotImplementedError( "Subclasses of Predictor must call Predictor.__init__(preprocessor)." ) if self._preprocessor: data_df = self._preprocessor.transform_batch(data_df) predictions_df = self._predict_pandas(data_df, **kwargs) return convert_pandas_to_batch_type( predictions_df, type=TYPE_TO_ENUM[type(data)] )
def test_pandas_pandas(): input_data = pd.DataFrame({"x": [1, 2, 3]}) expected_output = input_data actual_output = convert_batch_type_to_pandas(input_data) assert expected_output.equals(actual_output) assert convert_pandas_to_batch_type( actual_output, type=DataType.PANDAS).equals(input_data)
def test_dict_pandas(): input_data = {"x": np.array([1, 2, 3])} expected_output = pd.DataFrame({"x": TensorArray(input_data["x"])}) actual_output = convert_batch_type_to_pandas(input_data) assert expected_output.equals(actual_output) output_array = convert_pandas_to_batch_type(actual_output, type=DataType.NUMPY) assert np.array_equal(output_array, input_data["x"])
def test_arrow_pandas(): df = pd.DataFrame({"x": [1, 2, 3]}) input_data = pa.Table.from_pandas(df) expected_output = df actual_output = convert_batch_type_to_pandas(input_data) assert expected_output.equals(actual_output) assert convert_pandas_to_batch_type(actual_output, type=DataType.ARROW).equals(input_data)
def test_dict_multi_dim_to_pandas(): tensor = np.arange(12).reshape((3, 2, 2)) input_data = {"x": tensor} expected_output = pd.DataFrame({"x": TensorArray(tensor)}) actual_output = convert_batch_type_to_pandas(input_data) assert expected_output.equals(actual_output) output_array = convert_pandas_to_batch_type(actual_output, type=DataType.NUMPY) assert np.array_equal(output_array, input_data["x"])
def test_numpy_object_pandas(): input_data = np.array([[1, 2, 3], [1]], dtype=object) expected_output = pd.DataFrame( {TENSOR_COLUMN_NAME: TensorArray(input_data)}) actual_output = convert_batch_type_to_pandas(input_data) assert expected_output.equals(actual_output) assert np.array_equal( convert_pandas_to_batch_type(actual_output, type=DataType.NUMPY), input_data)
def test_numpy_multi_dim_pandas(): input_data = np.arange(12).reshape((3, 2, 2)) expected_output = pd.DataFrame( {TENSOR_COLUMN_NAME: TensorArray(input_data)}) actual_output = convert_batch_type_to_pandas(input_data) assert expected_output.equals(actual_output) assert np.array_equal( convert_pandas_to_batch_type(actual_output, type=DataType.NUMPY), input_data)
def test_predict(batch_type): predictor = TorchPredictor(model=DummyModelMultiInput()) raw_batch = pd.DataFrame({"X0": [0.0, 0.0, 0.0], "X1": [1.0, 2.0, 3.0]}) data_batch = convert_pandas_to_batch_type(raw_batch, type=TYPE_TO_ENUM[batch_type]) raw_predictions = predictor.predict(data_batch, dtype=torch.float) predictions = convert_batch_type_to_pandas(raw_predictions) assert len(predictions) == 3 assert predictions.to_numpy().flatten().tolist() == [1.0, 2.0, 3.0]
def test_predict(batch_type): predictor = TensorflowPredictor(model_definition=build_model_multi_input) raw_batch = pd.DataFrame({"A": [0.0, 0.0, 0.0], "B": [1.0, 2.0, 3.0]}) data_batch = convert_pandas_to_batch_type(raw_batch, type=TYPE_TO_ENUM[batch_type]) raw_predictions = predictor.predict(data_batch) predictions = convert_batch_type_to_pandas(raw_predictions) assert len(predictions) == 3 assert predictions.to_numpy().flatten().tolist() == [1.0, 2.0, 3.0]
def test_arrow_tensor_pandas(): np_array = np.array([1, 2, 3]) df = pd.DataFrame({"x": TensorArray(np_array)}) input_data = pa.Table.from_arrays([ArrowTensorArray.from_numpy(np_array)], names=["x"]) expected_output = df actual_output = convert_batch_type_to_pandas(input_data) assert expected_output.equals(actual_output) assert convert_pandas_to_batch_type(actual_output, type=DataType.ARROW).equals(input_data)
def test_dict_pandas_multi_column(): array_dict = {"x": np.array([1, 2, 3]), "y": np.array([4, 5, 6])} expected_output = pd.DataFrame( {k: TensorArray(v) for k, v in array_dict.items()}) actual_output = convert_batch_type_to_pandas(array_dict) assert expected_output.equals(actual_output) output_dict = convert_pandas_to_batch_type(actual_output, type=DataType.NUMPY) for k, v in output_dict.items(): assert np.array_equal(v, array_dict[k])
def test_predict_no_preprocessor(batch_type, batch_size): checkpoint = create_checkpoint() predictor = RLPredictor.from_checkpoint(checkpoint) # Observations data = pd.DataFrame([[1.0] * 10] * batch_size) obs = convert_pandas_to_batch_type(data, type=TYPE_TO_ENUM[batch_type]) # Predictions predictions = predictor.predict(obs) actions = convert_batch_type_to_pandas(predictions) assert len(actions) == batch_size # We add [0., 1.) to 1.0, so actions should be in [1., 2.) assert all(1.0 <= action.item() < 2.0 for action in np.array(actions))
def test_predict_with_preprocessor(batch_type, batch_size): preprocessor = _DummyPreprocessor() checkpoint = create_checkpoint(preprocessor=preprocessor) predictor = RLPredictor.from_checkpoint(checkpoint) # Observations data = pd.DataFrame([[1.0] * 10] * batch_size) obs = convert_pandas_to_batch_type(data, type=TYPE_TO_ENUM[batch_type]) # Predictions predictions = predictor.predict(obs) actions = convert_batch_type_to_pandas(predictions) assert len(actions) == batch_size # Preprocessor doubles observations to 2.0, then we add [0., 1.), # so actions should be in [2., 3.) assert all(2.0 <= action.item() < 3.0 for action in np.array(actions))
def predict(self, data: DataBatchType, **kwargs) -> DataBatchType: """Perform inference on a batch of data. Args: data: A batch of input data of type ``DataBatchType``. kwargs: Arguments specific to predictor implementations. These are passed directly to ``_predict_pandas``. Returns: DataBatchType: Prediction result. """ data_df = convert_batch_type_to_pandas(data) if getattr(self, "preprocessor", None): data_df = self.preprocessor.transform_batch(data_df) predictions_df = self._predict_pandas(data_df, **kwargs) return convert_pandas_to_batch_type(predictions_df, type=TYPE_TO_ENUM[type(data)])
def __call__(self, batch): prediction_output = self.predictor.predict(batch, **predict_kwargs) return convert_batch_type_to_pandas(prediction_output)
def test_dict_fail(): input_data = {"x": "y"} with pytest.raises(ValueError): convert_batch_type_to_pandas(input_data)