def test_predict(): preprocessor = DummyPreprocessor() predictor = SklearnPredictor(estimator=model, preprocessor=preprocessor) data_batch = np.array([[1, 2], [3, 4], [5, 6]]) predictions = predictor.predict(data_batch) assert len(predictions) == 3 assert hasattr(predictor.preprocessor, "_batch_transformed")
def test_predict_feature_columns(): preprocessor = DummyPreprocessor() predictor = SklearnPredictor(estimator=model, preprocessor=preprocessor) data_batch = np.array([[1, 2, 7], [3, 4, 8], [5, 6, 9]]) predictions = predictor.predict(data_batch, feature_columns=[0, 1]) assert len(predictions) == 3 assert hasattr(predictor.get_preprocessor(), "_batch_transformed")
def test_predict_set_cpus(ray_start_4_cpus): preprocessor = DummyPreprocessor() predictor = SklearnPredictor(estimator=model, preprocessor=preprocessor) data_batch = np.array([[1, 2], [3, 4], [5, 6]]) predictions = predictor.predict(data_batch, num_estimator_cpus=2) assert len(predictions) == 3 assert hasattr(predictor.get_preprocessor(), "_batch_transformed") assert predictor.estimator.n_jobs == 2
def test_predict(batch_type): preprocessor = DummyPreprocessor() predictor = SklearnPredictor(estimator=model, preprocessor=preprocessor) raw_batch = pd.DataFrame([[1, 2], [3, 4], [5, 6]]) data_batch = convert_pandas_to_batch_type(raw_batch, type=TYPE_TO_ENUM[batch_type]) predictions = predictor.predict(data_batch) assert len(predictions) == 3 assert hasattr(predictor.get_preprocessor(), "_batch_transformed")
def test_predict_feature_columns_pandas(): pandas_data = pd.DataFrame(dummy_data, columns=["A", "B"]) pandas_target = pd.Series(dummy_target) pandas_model = RandomForestClassifier(n_estimators=10, random_state=0).fit( pandas_data, pandas_target) preprocessor = DummyPreprocessor() predictor = SklearnPredictor(estimator=pandas_model, preprocessor=preprocessor) data_batch = pd.DataFrame(np.array([[1, 2, 7], [3, 4, 8], [5, 6, 9]]), columns=["A", "B", "C"]) predictions = predictor.predict(data_batch, feature_columns=["A", "B"]) assert len(predictions) == 3 assert hasattr(predictor.preprocessor, "_batch_transformed")
def test_init(): preprocessor = DummyPreprocessor() preprocessor.attr = 1 predictor = SklearnPredictor(estimator=model, preprocessor=preprocessor) with tempfile.TemporaryDirectory() as tmpdir: with open(os.path.join(tmpdir, MODEL_KEY), "wb") as f: cpickle.dump(model, f) save_preprocessor_to_dir(preprocessor, tmpdir) checkpoint = Checkpoint.from_directory(tmpdir) checkpoint_predictor = SklearnPredictor.from_checkpoint(checkpoint) assert np.allclose( checkpoint_predictor.estimator.feature_importances_, predictor.estimator.feature_importances_, ) assert checkpoint_predictor.preprocessor.attr == predictor.preprocessor.attr
def test_predict_no_preprocessor(): with tempfile.TemporaryDirectory() as tmpdir: with open(os.path.join(tmpdir, MODEL_KEY), "wb") as f: cpickle.dump(model, f) checkpoint = Checkpoint.from_directory(tmpdir) predictor = SklearnPredictor.from_checkpoint(checkpoint) data_batch = np.array([[1, 2], [3, 4], [5, 6]]) predictions = predictor.predict(data_batch) assert len(predictions) == 3