def test_automl(): st_helper = SklearnTestHelper() data = np.array( [[4, 5, np.nan, 7], [0, np.nan, 2, 3], [8, 9, 10, 11], [np.nan, 13, 14, 15]], dtype=np.float32, ) pipeline = Pipeline( steps=[("robustimputer", RobustImputer(fill_values=np.nan, strategy="constant"))]) ct = ColumnTransformer(transformers=[("numeric_processing", pipeline, [0, 1, 2, 3])]) ct.fit(data) pipeline = Pipeline(steps=[("column_transformer", ct)]) header = Header(column_names=["x1", "x2", "x3", "class"], target_column_name="class") na = NALabelEncoder() na.fit(data) automl_transformer = AutoMLTransformer(header, pipeline, na) dshape = (relay.Any(), relay.Any()) _test_model_impl(st_helper, automl_transformer, dshape, data, auto_ml=True)
def test_na_label_encoder(): st_helper = SklearnTestHelper() nle = NALabelEncoder() i_put = np.array([[1, 2, 2, 6]], dtype=np.float32) nle.fit(i_put) data = np.array([[np.nan, 0, 1, 2, 6]], dtype=np.float32) dshape = (relay.Any(), len(data)) _test_model_impl(st_helper, nle, dshape, data)
def test_automl_transformer_regression(): """Tests that rows in a regression dataset where the target column is not a finite numeric are imputed""" data = read_csv_data(source="test/data/csv/regression_na_labels.csv") X = data[:, :3] y = data[:, 3] header = Header(column_names=["x1", "x2", "x3", "class"], target_column_name="class") automl_transformer = AutoMLTransformer( header=header, feature_transformer=RobustImputer(strategy="constant", fill_values=0), target_transformer=NALabelEncoder(), ) model = automl_transformer.fit(X, y) X_transformed = model.transform(X) assert X_transformed.shape == X.shape Xy = np.concatenate((X, y.reshape(-1, 1)), axis=1) Xy_transformed = model.transform(Xy) assert Xy_transformed.shape == (3, 4) assert np.array_equal( Xy_transformed, np.array([[1.1, 1.0, 2.0, 3.0], [2.2, 4.0, 0.0, 5.0], [3.3, 12.0, 13.0, 14.0]]))
def test_na_label_encoder(y, y_expected): na_label_encoder = NALabelEncoder() na_label_encoder.fit(y) y_transform = na_label_encoder.transform(y) assert_array_equal(y_transform, y_expected)
from sagemaker_sklearn_extension.impute import RobustMissingIndicator from sagemaker_sklearn_extension.preprocessing import LogExtremeValuesTransformer from sagemaker_sklearn_extension.preprocessing import NALabelEncoder from sagemaker_sklearn_extension.preprocessing import QuadraticFeatures from sagemaker_sklearn_extension.preprocessing import QuantileExtremeValuesTransformer from sagemaker_sklearn_extension.preprocessing import RemoveConstantColumnsTransformer from sagemaker_sklearn_extension.preprocessing import RobustLabelEncoder from sagemaker_sklearn_extension.preprocessing import RobustStandardScaler from sagemaker_sklearn_extension.preprocessing import ThresholdOneHotEncoder @pytest.mark.parametrize( "Estimator", [ DateTimeVectorizer(), LogExtremeValuesTransformer(), MultiColumnTfidfVectorizer(), NALabelEncoder(), QuadraticFeatures(), QuantileExtremeValuesTransformer(), RobustImputer(), RemoveConstantColumnsTransformer(), RobustLabelEncoder(), RobustMissingIndicator(), RobustStandardScaler(), ThresholdOneHotEncoder(), ], ) def test_all_estimators(Estimator): return check_estimator(Estimator)
def to_csr(X): return csr_matrix(X.shape, dtype=np.int8) impute_pca_pipeline = Pipeline( steps=[("impute", SimpleImputer()), ("pca", PCA(n_components=2))]) @pytest.mark.parametrize( "feature_transformer, target_transformer, " "expected_X_transformed_shape, expected_Xy_transformed_shape", [ (impute_pca_pipeline, LabelEncoder(), (10, 2), (10, 3)), (impute_pca_pipeline, NALabelEncoder(), (10, 2), (9, 3)), (FunctionTransformer(to_csr, validate=False), None, (10, 3), (9, 4)), ], ) def test_automl_transformer(feature_transformer, target_transformer, expected_X_transformed_shape, expected_Xy_transformed_shape): X = np.arange(0, 3 * 10).reshape((10, 3)).astype(np.str) y = np.array([0] * 5 + [1] * 4 + [np.nan]).astype(np.str) header = Header(column_names=["x1", "x2", "x3", "class"], target_column_name="class") automl_transformer = AutoMLTransformer( header=header, feature_transformer=feature_transformer, target_transformer=target_transformer,