Esempio n. 1
0
    def predict(self, data: DataBatchType, **kwargs) -> DataBatchType:
        """Perform inference on a batch of data.

        Args:
            data: A batch of input data of type ``DataBatchType``.
            kwargs: Arguments specific to predictor implementations. These are passed
            directly to ``_predict_pandas``.

        Returns:
            DataBatchType: Prediction result. The return type will be the same as the
                input type.
        """
        data_df = convert_batch_type_to_pandas(data)

        if not hasattr(self, "_preprocessor"):
            raise NotImplementedError(
                "Subclasses of Predictor must call Predictor.__init__(preprocessor)."
            )

        if self._preprocessor:
            data_df = self._preprocessor.transform_batch(data_df)

        predictions_df = self._predict_pandas(data_df, **kwargs)
        return convert_pandas_to_batch_type(
            predictions_df, type=TYPE_TO_ENUM[type(data)]
        )
Esempio n. 2
0
def test_pandas_pandas():
    input_data = pd.DataFrame({"x": [1, 2, 3]})
    expected_output = input_data
    actual_output = convert_batch_type_to_pandas(input_data)
    assert expected_output.equals(actual_output)

    assert convert_pandas_to_batch_type(
        actual_output, type=DataType.PANDAS).equals(input_data)
Esempio n. 3
0
def test_dict_pandas():
    input_data = {"x": np.array([1, 2, 3])}
    expected_output = pd.DataFrame({"x": TensorArray(input_data["x"])})
    actual_output = convert_batch_type_to_pandas(input_data)
    assert expected_output.equals(actual_output)

    output_array = convert_pandas_to_batch_type(actual_output,
                                                type=DataType.NUMPY)
    assert np.array_equal(output_array, input_data["x"])
Esempio n. 4
0
def test_arrow_pandas():
    df = pd.DataFrame({"x": [1, 2, 3]})
    input_data = pa.Table.from_pandas(df)
    expected_output = df
    actual_output = convert_batch_type_to_pandas(input_data)
    assert expected_output.equals(actual_output)

    assert convert_pandas_to_batch_type(actual_output,
                                        type=DataType.ARROW).equals(input_data)
Esempio n. 5
0
def test_dict_multi_dim_to_pandas():
    tensor = np.arange(12).reshape((3, 2, 2))
    input_data = {"x": tensor}
    expected_output = pd.DataFrame({"x": TensorArray(tensor)})
    actual_output = convert_batch_type_to_pandas(input_data)
    assert expected_output.equals(actual_output)

    output_array = convert_pandas_to_batch_type(actual_output,
                                                type=DataType.NUMPY)
    assert np.array_equal(output_array, input_data["x"])
Esempio n. 6
0
def test_numpy_object_pandas():
    input_data = np.array([[1, 2, 3], [1]], dtype=object)
    expected_output = pd.DataFrame(
        {TENSOR_COLUMN_NAME: TensorArray(input_data)})
    actual_output = convert_batch_type_to_pandas(input_data)
    assert expected_output.equals(actual_output)

    assert np.array_equal(
        convert_pandas_to_batch_type(actual_output, type=DataType.NUMPY),
        input_data)
Esempio n. 7
0
def test_numpy_multi_dim_pandas():
    input_data = np.arange(12).reshape((3, 2, 2))
    expected_output = pd.DataFrame(
        {TENSOR_COLUMN_NAME: TensorArray(input_data)})
    actual_output = convert_batch_type_to_pandas(input_data)
    assert expected_output.equals(actual_output)

    assert np.array_equal(
        convert_pandas_to_batch_type(actual_output, type=DataType.NUMPY),
        input_data)
Esempio n. 8
0
def test_predict(batch_type):
    predictor = TorchPredictor(model=DummyModelMultiInput())

    raw_batch = pd.DataFrame({"X0": [0.0, 0.0, 0.0], "X1": [1.0, 2.0, 3.0]})
    data_batch = convert_pandas_to_batch_type(raw_batch, type=TYPE_TO_ENUM[batch_type])
    raw_predictions = predictor.predict(data_batch, dtype=torch.float)
    predictions = convert_batch_type_to_pandas(raw_predictions)

    assert len(predictions) == 3
    assert predictions.to_numpy().flatten().tolist() == [1.0, 2.0, 3.0]
Esempio n. 9
0
def test_predict(batch_type):
    predictor = TensorflowPredictor(model_definition=build_model_multi_input)

    raw_batch = pd.DataFrame({"A": [0.0, 0.0, 0.0], "B": [1.0, 2.0, 3.0]})
    data_batch = convert_pandas_to_batch_type(raw_batch,
                                              type=TYPE_TO_ENUM[batch_type])
    raw_predictions = predictor.predict(data_batch)
    predictions = convert_batch_type_to_pandas(raw_predictions)

    assert len(predictions) == 3
    assert predictions.to_numpy().flatten().tolist() == [1.0, 2.0, 3.0]
Esempio n. 10
0
def test_arrow_tensor_pandas():
    np_array = np.array([1, 2, 3])
    df = pd.DataFrame({"x": TensorArray(np_array)})
    input_data = pa.Table.from_arrays([ArrowTensorArray.from_numpy(np_array)],
                                      names=["x"])
    expected_output = df
    actual_output = convert_batch_type_to_pandas(input_data)
    assert expected_output.equals(actual_output)

    assert convert_pandas_to_batch_type(actual_output,
                                        type=DataType.ARROW).equals(input_data)
Esempio n. 11
0
def test_dict_pandas_multi_column():
    array_dict = {"x": np.array([1, 2, 3]), "y": np.array([4, 5, 6])}
    expected_output = pd.DataFrame(
        {k: TensorArray(v)
         for k, v in array_dict.items()})
    actual_output = convert_batch_type_to_pandas(array_dict)
    assert expected_output.equals(actual_output)

    output_dict = convert_pandas_to_batch_type(actual_output,
                                               type=DataType.NUMPY)
    for k, v in output_dict.items():
        assert np.array_equal(v, array_dict[k])
Esempio n. 12
0
def test_predict_no_preprocessor(batch_type, batch_size):
    checkpoint = create_checkpoint()
    predictor = RLPredictor.from_checkpoint(checkpoint)

    # Observations
    data = pd.DataFrame([[1.0] * 10] * batch_size)
    obs = convert_pandas_to_batch_type(data, type=TYPE_TO_ENUM[batch_type])

    # Predictions
    predictions = predictor.predict(obs)
    actions = convert_batch_type_to_pandas(predictions)

    assert len(actions) == batch_size
    # We add [0., 1.) to 1.0, so actions should be in [1., 2.)
    assert all(1.0 <= action.item() < 2.0 for action in np.array(actions))
Esempio n. 13
0
def test_predict_with_preprocessor(batch_type, batch_size):
    preprocessor = _DummyPreprocessor()
    checkpoint = create_checkpoint(preprocessor=preprocessor)
    predictor = RLPredictor.from_checkpoint(checkpoint)

    # Observations
    data = pd.DataFrame([[1.0] * 10] * batch_size)
    obs = convert_pandas_to_batch_type(data, type=TYPE_TO_ENUM[batch_type])

    # Predictions
    predictions = predictor.predict(obs)
    actions = convert_batch_type_to_pandas(predictions)

    assert len(actions) == batch_size
    # Preprocessor doubles observations to 2.0, then we add [0., 1.),
    # so actions should be in [2., 3.)
    assert all(2.0 <= action.item() < 3.0 for action in np.array(actions))
Esempio n. 14
0
    def predict(self, data: DataBatchType, **kwargs) -> DataBatchType:
        """Perform inference on a batch of data.

        Args:
            data: A batch of input data of type ``DataBatchType``.
            kwargs: Arguments specific to predictor implementations. These are passed
            directly to ``_predict_pandas``.

        Returns:
            DataBatchType: Prediction result.
        """
        data_df = convert_batch_type_to_pandas(data)

        if getattr(self, "preprocessor", None):
            data_df = self.preprocessor.transform_batch(data_df)

        predictions_df = self._predict_pandas(data_df, **kwargs)
        return convert_pandas_to_batch_type(predictions_df,
                                            type=TYPE_TO_ENUM[type(data)])
Esempio n. 15
0
 def __call__(self, batch):
     prediction_output = self.predictor.predict(batch, **predict_kwargs)
     return convert_batch_type_to_pandas(prediction_output)
Esempio n. 16
0
def test_dict_fail():
    input_data = {"x": "y"}
    with pytest.raises(ValueError):
        convert_batch_type_to_pandas(input_data)