Beispiel #1
0
def test_log_extreme_values_transformer():
    st_helper = SklearnTestHelper()
    levt = LogExtremeValuesTransformer(threshold_std=2.0)
    x_extreme_vals = np.array(
        [
            [0.0, 0.0, 0.0],
            [-1.0, 1.0, 1.0],
            [-2.0, 2.0, 2.0],
            [-3.0, 3.0, 3.0],
            [-4.0, 4.0, 4.0],
            [-5.0, 5.0, 5.0],
            [-6.0, 6.0, 6.0],
            [-7.0, 7.0, 7.0],
            [-8.0, 8.0, 8.0],
            [-9.0, 9.0, 9.0],
            [-10.0, 10.0, 10.0],
            [-1e5, 1e6, 11.0],
        ],
        dtype=np.float32,
    )
    x_log_extreme_vals = np.column_stack([
        log_transform(x_extreme_vals.copy()[:, 0]),
        log_transform(x_extreme_vals.copy()[:, 1]),
        x_extreme_vals[:, 2],
    ])
    sklearn_out = levt.fit_transform(x_log_extreme_vals)
    dshape = (relay.Any(), len(x_log_extreme_vals[0]))
    _test_model_impl(st_helper, levt, dshape, x_log_extreme_vals)
Beispiel #2
0
def build_feature_transform():
    """ Returns the model definition representing feature processing."""

    # These features can be parsed as numeric.
    numeric = HEADER.as_feature_indices([
        'Account Length', 'VMail Message', 'Day Mins', 'Day Calls', 'Eve Mins',
        'Eve Calls', 'Night Mins', 'Night Calls', 'Intl Mins', 'Intl Calls',
        'CustServ Calls', 'State_AK', 'State_AL', 'State_AR', 'State_AZ',
        'State_CA', 'State_CO', 'State_CT', 'State_DC', 'State_DE', 'State_FL',
        'State_GA', 'State_HI', 'State_IA', 'State_ID', 'State_IL', 'State_IN',
        'State_KS', 'State_KY', 'State_LA', 'State_MA', 'State_MD', 'State_ME',
        'State_MI', 'State_MN', 'State_MO', 'State_MS', 'State_MT', 'State_NC',
        'State_ND', 'State_NE', 'State_NH', 'State_NJ', 'State_NM', 'State_NV',
        'State_NY', 'State_OH', 'State_OK', 'State_OR', 'State_PA', 'State_RI',
        'State_SC', 'State_SD', 'State_TN', 'State_TX', 'State_UT', 'State_VA',
        'State_VT', 'State_WA', 'State_WI', 'State_WV', 'State_WY',
        'Area Code_408', 'Area Code_415', 'Area Code_510', "Int'l Plan_no",
        "Int'l Plan_yes", 'VMail Plan_no', 'VMail Plan_yes'
    ])

    numeric_processors = Pipeline(steps=[(
        'featureunion',
        FeatureUnion([('robust_imputer',
                       RobustImputer(strategy='constant', fill_values=nan)
                       ), ('robust_missing_indicator',
                           RobustMissingIndicator())])
    ), ('logextremevaluestransformer', LogExtremeValuesTransformer())])

    column_transformer = ColumnTransformer(transformers=[('numeric_processing',
                                                          numeric_processors,
                                                          numeric)])

    return Pipeline(steps=[(
        'column_transformer',
        column_transformer), ('robustpca', RobustPCA(
            n_components=338)), ('robuststandardscaler',
                                 RobustStandardScaler())])
Beispiel #3
0
def test_log_extreme_value_transformer(X, X_expected):
    transformer = LogExtremeValuesTransformer(threshold_std=2.0)
    X_observed = transformer.fit_transform(X)

    assert_array_almost_equal(X_observed, X_expected)
Beispiel #4
0
def test_log_extreme_value_transformer_state():
    t = LogExtremeValuesTransformer(threshold_std=2.0)
    X_observed = t.fit_transform(X_extreme_vals)

    assert_array_almost_equal(t.nonnegative_cols_, [1, 2])
    assert_array_almost_equal(X_observed, X_log_extreme_vals)
from sagemaker_sklearn_extension.impute import RobustMissingIndicator
from sagemaker_sklearn_extension.preprocessing import LogExtremeValuesTransformer
from sagemaker_sklearn_extension.preprocessing import NALabelEncoder
from sagemaker_sklearn_extension.preprocessing import QuadraticFeatures
from sagemaker_sklearn_extension.preprocessing import QuantileExtremeValuesTransformer
from sagemaker_sklearn_extension.preprocessing import RemoveConstantColumnsTransformer
from sagemaker_sklearn_extension.preprocessing import RobustLabelEncoder
from sagemaker_sklearn_extension.preprocessing import RobustStandardScaler
from sagemaker_sklearn_extension.preprocessing import ThresholdOneHotEncoder


@pytest.mark.parametrize(
    "Estimator",
    [
        DateTimeVectorizer(),
        LogExtremeValuesTransformer(),
        MultiColumnTfidfVectorizer(),
        NALabelEncoder(),
        QuadraticFeatures(),
        QuantileExtremeValuesTransformer(),
        RobustImputer(),
        RemoveConstantColumnsTransformer(),
        RobustLabelEncoder(),
        RobustMissingIndicator(),
        RobustStandardScaler(),
        ThresholdOneHotEncoder(),
    ],
)
def test_all_estimators(Estimator):
    return check_estimator(Estimator)