def test_robust_standard_scaler_sparse():
    scaler = RobustStandardScaler()
    X_observed = scaler.fit_transform(X_sparse)

    assert issparse(X_observed)
    np.testing.assert_array_almost_equal(X_observed.toarray(),
                                         X / np.std(X, axis=0))
Exemple #2
0
def test_robust_scaler():
    st_helper = SklearnTestHelper()
    rss = RobustStandardScaler()

    data = np.array([[-1, 0], [0, 0], [1, 1], [1, 1]], dtype=np.float32)
    rss.fit(data)

    dshape = (relay.Any(), len(data[0]))
    _test_model_impl(st_helper, rss, dshape, data)
def build_feature_transform():
    """ Returns the model definition representing feature processing."""

    # These features can be parsed as natural language.
    text = HEADER.as_feature_indices(['features'])

    text_processors = Pipeline(
        steps=[
            (
                'multicolumntfidfvectorizer',
                MultiColumnTfidfVectorizer(
                    max_df=0.9684,
                    min_df=0.013108614232209739,
                    analyzer='word',
                    max_features=10000
                )
            )
        ]
    )

    column_transformer = ColumnTransformer(
        transformers=[('text_processing', text_processors, text)]
    )

    return Pipeline(
        steps=[
            ('column_transformer', column_transformer
            ), ('robuststandardscaler', RobustStandardScaler())
        ]
    )
Exemple #4
0
def build_feature_transform():
    """ Returns the model definition representing feature processing."""

    # These features can be parsed as numeric.
    numeric = HEADER.as_feature_indices([
        'age', 'duration', 'campaign', 'pdays', 'previous', 'emp.var.rate',
        'cons.price.idx', 'cons.conf.idx', 'euribor3m', 'nr.employed'
    ])

    # These features contain a relatively small number of unique items.
    categorical = HEADER.as_feature_indices([
        'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact',
        'month', 'day_of_week', 'poutcome'
    ])

    numeric_processors = Pipeline(steps=[('robustimputer', RobustImputer())])

    categorical_processors = Pipeline(steps=[('thresholdonehotencoder',
                                              ThresholdOneHotEncoder(
                                                  threshold=11))])

    column_transformer = ColumnTransformer(
        transformers=[('numeric_processing', numeric_processors, numeric),
                      ('categorical_processing', categorical_processors,
                       categorical)])

    return Pipeline(steps=[(
        'column_transformer',
        column_transformer), ('robustpca', RobustPCA(
            n_components=98)), ('robuststandardscaler',
                                RobustStandardScaler())])
def build_feature_transform():
    """ Returns the model definition representing feature processing."""

    # These features can be parsed as numeric.

    numeric = HEADER.as_feature_indices(
        [
            'Unnamed: 0', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9',
            'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18',
            'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27',
            'V28', 'amt'
        ]
    )

    numeric_processors = Pipeline(
        steps=[
            (
                'robustimputer',
                RobustImputer(strategy='constant', fill_values=nan)
            )
        ]
    )

    column_transformer = ColumnTransformer(
        transformers=[('numeric_processing', numeric_processors, numeric)]
    )

    return Pipeline(
        steps=[
            ('column_transformer', column_transformer
            ), ('robuststandardscaler', RobustStandardScaler())
        ]
    )
Exemple #6
0
def build_feature_transform():
    """ Returns the model definition representing feature processing."""

    # These features can be parsed as numeric.
    numeric = HEADER.as_feature_indices([
        'Account Length', 'VMail Message', 'Day Mins', 'Day Calls', 'Eve Mins',
        'Eve Calls', 'Night Mins', 'Night Calls', 'Intl Mins', 'Intl Calls',
        'CustServ Calls', 'State_AK', 'State_AL', 'State_AR', 'State_AZ',
        'State_CA', 'State_CO', 'State_CT', 'State_DC', 'State_DE', 'State_FL',
        'State_GA', 'State_HI', 'State_IA', 'State_ID', 'State_IL', 'State_IN',
        'State_KS', 'State_KY', 'State_LA', 'State_MA', 'State_MD', 'State_ME',
        'State_MI', 'State_MN', 'State_MO', 'State_MS', 'State_MT', 'State_NC',
        'State_ND', 'State_NE', 'State_NH', 'State_NJ', 'State_NM', 'State_NV',
        'State_NY', 'State_OH', 'State_OK', 'State_OR', 'State_PA', 'State_RI',
        'State_SC', 'State_SD', 'State_TN', 'State_TX', 'State_UT', 'State_VA',
        'State_VT', 'State_WA', 'State_WI', 'State_WV', 'State_WY',
        'Area Code_408', 'Area Code_415', 'Area Code_510', "Int'l Plan_no",
        "Int'l Plan_yes", 'VMail Plan_no', 'VMail Plan_yes'
    ])

    numeric_processors = Pipeline(steps=[('robustimputer', RobustImputer())])

    column_transformer = ColumnTransformer(transformers=[('numeric_processing',
                                                          numeric_processors,
                                                          numeric)])

    return Pipeline(steps=[(
        'column_transformer',
        column_transformer), ('robustpca', RobustPCA(
            n_components=117)), ('robuststandardscaler',
                                 RobustStandardScaler())])
Exemple #7
0
def build_feature_transform():
    """ Returns the model definition representing feature processing."""

    # These features can be parsed as natural language.

    text = HEADER.as_feature_indices(['review_body'])

    text_processors = Pipeline(
        steps=[
            (
                'multicolumntfidfvectorizer',
                MultiColumnTfidfVectorizer(
                    max_df=0.99,
                    min_df=0.0021,
                    analyzer='char_wb',
                    max_features=10000
                )
            )
        ]
    )

    column_transformer = ColumnTransformer(
        transformers=[('text_processing', text_processors, text)]
    )

    return Pipeline(
        steps=[
            ('column_transformer',
             column_transformer), ('robustpca', RobustPCA(n_components=5)),
            ('robuststandardscaler', RobustStandardScaler())
        ]
    )
def build_feature_transform():
    """ Returns the model definition representing feature processing."""

    # These features can be parsed as numeric.

    numeric = HEADER.as_feature_indices([
        'Unnamed: 0', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9',
        'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19',
        'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'amt'
    ])

    # These features contain a relatively small number of unique items.

    categorical = HEADER.as_feature_indices(['amt'])

    numeric_processors = Pipeline(
        steps=[('robustimputer',
                RobustImputer(strategy='constant', fill_values=nan))])

    categorical_processors = Pipeline(steps=[('thresholdonehotencoder',
                                              ThresholdOneHotEncoder(
                                                  threshold=635))])

    column_transformer = ColumnTransformer(
        transformers=[('numeric_processing', numeric_processors, numeric),
                      ('categorical_processing', categorical_processors,
                       categorical)])

    return Pipeline(steps=[(
        'column_transformer',
        column_transformer), ('robuststandardscaler', RobustStandardScaler())])
Exemple #9
0
def build_feature_transform():
    """ Returns the model definition representing feature processing."""

    # These features contain a relatively small number of unique items.
    categorical = HEADER.as_feature_indices(['dev_platform_vec'])

    # These features can be parsed as natural language.
    text = HEADER.as_feature_indices([
        'ifa', 'bundle_vec', 'persona_segment_vec', 'persona_L1_vec',
        'persona_L2_vec', 'persona_L3_vec', 'device_vendor_vec',
        'device_name_vec', 'device_manufacturer_vec', 'device_model_vec',
        'device_year_of_release_vec', 'major_os_vec'
    ])

    categorical_processors = Pipeline(steps=[('thresholdonehotencoder',
                                              ThresholdOneHotEncoder(
                                                  threshold=5))])

    text_processors = Pipeline(
        steps=[('multicolumntfidfvectorizer',
                MultiColumnTfidfVectorizer(max_df=0.9365,
                                           min_df=0.011235955056179775,
                                           analyzer='word',
                                           max_features=10000))])

    column_transformer = ColumnTransformer(
        transformers=[('categorical_processing', categorical_processors,
                       categorical), ('text_processing', text_processors,
                                      text)])

    return Pipeline(steps=[(
        'column_transformer',
        column_transformer), ('robustpca', RobustPCA(
            n_components=53)), ('robuststandardscaler',
                                RobustStandardScaler())])
Exemple #10
0
def test_pipeline():
    st_helper = SklearnTestHelper()
    pipe = Pipeline([("imputer", RobustImputer()), ("scaler", RobustStandardScaler())])
    data = np.array([[0.0, 1.0, 3], [2.0, 2.0, 5]], dtype=np.float32)
    pipe.fit(data)
    dshape = (relay.Any(), len(data[0]))
    _test_model_impl(st_helper, pipe, dshape, data)
Exemple #11
0
def build_feature_transform():
    """ Returns the model definition representing feature processing."""

    # These features can be parsed as numeric.
    numeric = HEADER.as_feature_indices([
        'Account Length', 'VMail Message', 'Day Mins', 'Day Calls', 'Eve Mins',
        'Eve Calls', 'Night Mins', 'Night Calls', 'Intl Mins', 'Intl Calls',
        'CustServ Calls', 'State_AK', 'State_AL', 'State_AR', 'State_AZ',
        'State_CA', 'State_CO', 'State_CT', 'State_DC', 'State_DE', 'State_FL',
        'State_GA', 'State_HI', 'State_IA', 'State_ID', 'State_IL', 'State_IN',
        'State_KS', 'State_KY', 'State_LA', 'State_MA', 'State_MD', 'State_ME',
        'State_MI', 'State_MN', 'State_MO', 'State_MS', 'State_MT', 'State_NC',
        'State_ND', 'State_NE', 'State_NH', 'State_NJ', 'State_NM', 'State_NV',
        'State_NY', 'State_OH', 'State_OK', 'State_OR', 'State_PA', 'State_RI',
        'State_SC', 'State_SD', 'State_TN', 'State_TX', 'State_UT', 'State_VA',
        'State_VT', 'State_WA', 'State_WI', 'State_WV', 'State_WY',
        'Area Code_408', 'Area Code_415', 'Area Code_510', "Int'l Plan_no",
        "Int'l Plan_yes", 'VMail Plan_no', 'VMail Plan_yes'
    ])

    # These features contain a relatively small number of unique items.
    categorical = HEADER.as_feature_indices([
        'Account Length', 'VMail Message', 'Day Calls', 'Eve Calls',
        'Night Calls', 'Intl Mins', 'Intl Calls', 'CustServ Calls', 'State_AK',
        'State_AL', 'State_AR', 'State_AZ', 'State_CA', 'State_CO', 'State_CT',
        'State_DC', 'State_DE', 'State_FL', 'State_GA', 'State_HI', 'State_IA',
        'State_ID', 'State_IL', 'State_IN', 'State_KS', 'State_KY', 'State_LA',
        'State_MA', 'State_MD', 'State_ME', 'State_MI', 'State_MN', 'State_MO',
        'State_MS', 'State_MT', 'State_NC', 'State_ND', 'State_NE', 'State_NH',
        'State_NJ', 'State_NM', 'State_NV', 'State_NY', 'State_OH', 'State_OK',
        'State_OR', 'State_PA', 'State_RI', 'State_SC', 'State_SD', 'State_TN',
        'State_TX', 'State_UT', 'State_VA', 'State_VT', 'State_WA', 'State_WI',
        'State_WV', 'State_WY', 'Area Code_408', 'Area Code_415',
        'Area Code_510', "Int'l Plan_no", "Int'l Plan_yes", 'VMail Plan_no',
        'VMail Plan_yes'
    ])

    numeric_processors = Pipeline(
        steps=[('robustimputer',
                RobustImputer(strategy='constant', fill_values=nan))])

    categorical_processors = Pipeline(steps=[('thresholdonehotencoder',
                                              ThresholdOneHotEncoder(
                                                  threshold=6))])

    column_transformer = ColumnTransformer(
        transformers=[('numeric_processing', numeric_processors, numeric),
                      ('categorical_processing', categorical_processors,
                       categorical)])

    return Pipeline(steps=[(
        'column_transformer',
        column_transformer), ('robuststandardscaler', RobustStandardScaler())])
Exemple #12
0
def build_feature_transform():
    """ Returns the model definition representing feature processing."""

    # These features can be parsed as natural language.

    text = HEADER.as_feature_indices(["review_body"])

    text_processors = Pipeline(steps=[(
        "multicolumntfidfvectorizer",
        MultiColumnTfidfVectorizer(
            max_df=0.9941, min_df=0.0007, analyzer="word", max_features=10000),
    )])

    column_transformer = ColumnTransformer(transformers=[("text_processing",
                                                          text_processors,
                                                          text)])

    return Pipeline(steps=[(
        "column_transformer",
        column_transformer), ("robuststandardscaler", RobustStandardScaler())])
Exemple #13
0
def build_feature_transform():
    """ Returns the model definition representing feature processing."""

    # These features can be parsed as numeric.

    numeric = HEADER.as_feature_indices([
        'Unnamed: 0', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9',
        'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19',
        'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'amt'
    ])

    # These features contain a relatively small number of unique items.

    categorical = HEADER.as_feature_indices(['amt'])

    numeric_processors = Pipeline(steps=[(
        'featureunion',
        FeatureUnion([('robust_imputer', RobustImputer()
                       ), ('robust_missing_indicator',
                           RobustMissingIndicator())])
    ), ('quantileextremevaluestransformer',
        QuantileExtremeValuesTransformer())])

    categorical_processors = Pipeline(steps=[('thresholdonehotencoder',
                                              ThresholdOneHotEncoder(
                                                  threshold=31))])

    column_transformer = ColumnTransformer(
        transformers=[('numeric_processing', numeric_processors, numeric),
                      ('categorical_processing', categorical_processors,
                       categorical)])

    return Pipeline(steps=[(
        'column_transformer',
        column_transformer), ('robustpca', RobustPCA(
            n_components=147)), ('robuststandardscaler',
                                 RobustStandardScaler())])
def test_robust_standard_scaler_dense():
    scaler = RobustStandardScaler()
    X_observed = scaler.fit_transform(X)

    np.testing.assert_array_equal(X_observed, X_standardized)
from sagemaker_sklearn_extension.impute import RobustMissingIndicator
from sagemaker_sklearn_extension.preprocessing import LogExtremeValuesTransformer
from sagemaker_sklearn_extension.preprocessing import NALabelEncoder
from sagemaker_sklearn_extension.preprocessing import QuadraticFeatures
from sagemaker_sklearn_extension.preprocessing import QuantileExtremeValuesTransformer
from sagemaker_sklearn_extension.preprocessing import RemoveConstantColumnsTransformer
from sagemaker_sklearn_extension.preprocessing import RobustLabelEncoder
from sagemaker_sklearn_extension.preprocessing import RobustStandardScaler
from sagemaker_sklearn_extension.preprocessing import ThresholdOneHotEncoder


@pytest.mark.parametrize(
    "Estimator",
    [
        DateTimeVectorizer(),
        LogExtremeValuesTransformer(),
        MultiColumnTfidfVectorizer(),
        NALabelEncoder(),
        QuadraticFeatures(),
        QuantileExtremeValuesTransformer(),
        RobustImputer(),
        RemoveConstantColumnsTransformer(),
        RobustLabelEncoder(),
        RobustMissingIndicator(),
        RobustStandardScaler(),
        ThresholdOneHotEncoder(),
    ],
)
def test_all_estimators(Estimator):
    return check_estimator(Estimator)
Exemple #16
0
def build_feature_transform():
    """ Returns the model definition representing feature processing."""

    # These features can be parsed as numeric.
    numeric = HEADER.as_feature_indices([
        'tBodyAcc.mean.X', 'tBodyAcc.mean.Y', 'tBodyAcc.mean.Z',
        'tBodyAcc.std.X', 'tBodyAcc.std.Y', 'tBodyAcc.std.Z', 'tBodyAcc.mad.X',
        'tBodyAcc.mad.Y', 'tBodyAcc.mad.Z', 'tBodyAcc.max.X', 'tBodyAcc.max.Y',
        'tBodyAcc.max.Z', 'tBodyAcc.min.X', 'tBodyAcc.min.Y', 'tBodyAcc.min.Z',
        'tBodyAcc.sma', 'tBodyAcc.energy.X', 'tBodyAcc.energy.Y',
        'tBodyAcc.energy.Z', 'tBodyAcc.iqr.X', 'tBodyAcc.iqr.Y',
        'tBodyAcc.iqr.Z', 'tBodyAcc.entropy.X', 'tBodyAcc.entropy.Y',
        'tBodyAcc.entropy.Z', 'tBodyAcc.arCoeff.X.1', 'tBodyAcc.arCoeff.X.2',
        'tBodyAcc.arCoeff.X.3', 'tBodyAcc.arCoeff.X.4', 'tBodyAcc.arCoeff.Y.1',
        'tBodyAcc.arCoeff.Y.2', 'tBodyAcc.arCoeff.Y.3', 'tBodyAcc.arCoeff.Y.4',
        'tBodyAcc.arCoeff.Z.1', 'tBodyAcc.arCoeff.Z.2', 'tBodyAcc.arCoeff.Z.3',
        'tBodyAcc.arCoeff.Z.4', 'tBodyAcc.correlation.X.Y',
        'tBodyAcc.correlation.X.Z', 'tBodyAcc.correlation.Y.Z',
        'tGravityAcc.mean.X', 'tGravityAcc.mean.Y', 'tGravityAcc.mean.Z',
        'tGravityAcc.std.X', 'tGravityAcc.std.Y', 'tGravityAcc.std.Z',
        'tGravityAcc.mad.X', 'tGravityAcc.mad.Y', 'tGravityAcc.mad.Z',
        'tGravityAcc.max.X', 'tGravityAcc.max.Y', 'tGravityAcc.max.Z',
        'tGravityAcc.min.X', 'tGravityAcc.min.Y', 'tGravityAcc.min.Z',
        'tGravityAcc.sma', 'tGravityAcc.energy.X', 'tGravityAcc.energy.Y',
        'tGravityAcc.energy.Z', 'tGravityAcc.iqr.X', 'tGravityAcc.iqr.Y',
        'tGravityAcc.iqr.Z', 'tGravityAcc.entropy.X', 'tGravityAcc.entropy.Y',
        'tGravityAcc.entropy.Z', 'tGravityAcc.arCoeff.X.1',
        'tGravityAcc.arCoeff.X.2', 'tGravityAcc.arCoeff.X.3',
        'tGravityAcc.arCoeff.X.4', 'tGravityAcc.arCoeff.Y.1',
        'tGravityAcc.arCoeff.Y.2', 'tGravityAcc.arCoeff.Y.3',
        'tGravityAcc.arCoeff.Y.4', 'tGravityAcc.arCoeff.Z.1',
        'tGravityAcc.arCoeff.Z.2', 'tGravityAcc.arCoeff.Z.3',
        'tGravityAcc.arCoeff.Z.4', 'tGravityAcc.correlation.X.Y',
        'tGravityAcc.correlation.X.Z', 'tGravityAcc.correlation.Y.Z',
        'tBodyAccJerk.mean.X', 'tBodyAccJerk.mean.Y', 'tBodyAccJerk.mean.Z',
        'tBodyAccJerk.std.X', 'tBodyAccJerk.std.Y', 'tBodyAccJerk.std.Z',
        'tBodyAccJerk.mad.X', 'tBodyAccJerk.mad.Y', 'tBodyAccJerk.mad.Z',
        'tBodyAccJerk.max.X', 'tBodyAccJerk.max.Y', 'tBodyAccJerk.max.Z',
        'tBodyAccJerk.min.X', 'tBodyAccJerk.min.Y', 'tBodyAccJerk.min.Z',
        'tBodyAccJerk.sma', 'tBodyAccJerk.energy.X', 'tBodyAccJerk.energy.Y',
        'tBodyAccJerk.energy.Z', 'tBodyAccJerk.iqr.X', 'tBodyAccJerk.iqr.Y',
        'tBodyAccJerk.iqr.Z', 'tBodyAccJerk.entropy.X',
        'tBodyAccJerk.entropy.Y', 'tBodyAccJerk.entropy.Z',
        'tBodyAccJerk.arCoeff.X.1', 'tBodyAccJerk.arCoeff.X.2',
        'tBodyAccJerk.arCoeff.X.3', 'tBodyAccJerk.arCoeff.X.4',
        'tBodyAccJerk.arCoeff.Y.1', 'tBodyAccJerk.arCoeff.Y.2',
        'tBodyAccJerk.arCoeff.Y.3', 'tBodyAccJerk.arCoeff.Y.4',
        'tBodyAccJerk.arCoeff.Z.1', 'tBodyAccJerk.arCoeff.Z.2',
        'tBodyAccJerk.arCoeff.Z.3', 'tBodyAccJerk.arCoeff.Z.4',
        'tBodyAccJerk.correlation.X.Y', 'tBodyAccJerk.correlation.X.Z',
        'tBodyAccJerk.correlation.Y.Z', 'tBodyGyro.mean.X', 'tBodyGyro.mean.Y',
        'tBodyGyro.mean.Z', 'tBodyGyro.std.X', 'tBodyGyro.std.Y',
        'tBodyGyro.std.Z', 'tBodyGyro.mad.X', 'tBodyGyro.mad.Y',
        'tBodyGyro.mad.Z', 'tBodyGyro.max.X', 'tBodyGyro.max.Y',
        'tBodyGyro.max.Z', 'tBodyGyro.min.X', 'tBodyGyro.min.Y',
        'tBodyGyro.min.Z', 'tBodyGyro.sma', 'tBodyGyro.energy.X',
        'tBodyGyro.energy.Y', 'tBodyGyro.energy.Z', 'tBodyGyro.iqr.X',
        'tBodyGyro.iqr.Y', 'tBodyGyro.iqr.Z', 'tBodyGyro.entropy.X',
        'tBodyGyro.entropy.Y', 'tBodyGyro.entropy.Z', 'tBodyGyro.arCoeff.X.1',
        'tBodyGyro.arCoeff.X.2', 'tBodyGyro.arCoeff.X.3',
        'tBodyGyro.arCoeff.X.4', 'tBodyGyro.arCoeff.Y.1',
        'tBodyGyro.arCoeff.Y.2', 'tBodyGyro.arCoeff.Y.3',
        'tBodyGyro.arCoeff.Y.4', 'tBodyGyro.arCoeff.Z.1',
        'tBodyGyro.arCoeff.Z.2', 'tBodyGyro.arCoeff.Z.3',
        'tBodyGyro.arCoeff.Z.4', 'tBodyGyro.correlation.X.Y',
        'tBodyGyro.correlation.X.Z', 'tBodyGyro.correlation.Y.Z',
        'tBodyGyroJerk.mean.X', 'tBodyGyroJerk.mean.Y', 'tBodyGyroJerk.mean.Z',
        'tBodyGyroJerk.std.X', 'tBodyGyroJerk.std.Y', 'tBodyGyroJerk.std.Z',
        'tBodyGyroJerk.mad.X', 'tBodyGyroJerk.mad.Y', 'tBodyGyroJerk.mad.Z',
        'tBodyGyroJerk.max.X', 'tBodyGyroJerk.max.Y', 'tBodyGyroJerk.max.Z',
        'tBodyGyroJerk.min.X', 'tBodyGyroJerk.min.Y', 'tBodyGyroJerk.min.Z',
        'tBodyGyroJerk.sma', 'tBodyGyroJerk.energy.X',
        'tBodyGyroJerk.energy.Y', 'tBodyGyroJerk.energy.Z',
        'tBodyGyroJerk.iqr.X', 'tBodyGyroJerk.iqr.Y', 'tBodyGyroJerk.iqr.Z',
        'tBodyGyroJerk.entropy.X', 'tBodyGyroJerk.entropy.Y',
        'tBodyGyroJerk.entropy.Z', 'tBodyGyroJerk.arCoeff.X.1',
        'tBodyGyroJerk.arCoeff.X.2', 'tBodyGyroJerk.arCoeff.X.3',
        'tBodyGyroJerk.arCoeff.X.4', 'tBodyGyroJerk.arCoeff.Y.1',
        'tBodyGyroJerk.arCoeff.Y.2', 'tBodyGyroJerk.arCoeff.Y.3',
        'tBodyGyroJerk.arCoeff.Y.4', 'tBodyGyroJerk.arCoeff.Z.1',
        'tBodyGyroJerk.arCoeff.Z.2', 'tBodyGyroJerk.arCoeff.Z.3',
        'tBodyGyroJerk.arCoeff.Z.4', 'tBodyGyroJerk.correlation.X.Y',
        'tBodyGyroJerk.correlation.X.Z', 'tBodyGyroJerk.correlation.Y.Z',
        'tBodyAccMag.mean', 'tBodyAccMag.std', 'tBodyAccMag.mad',
        'tBodyAccMag.max', 'tBodyAccMag.min', 'tBodyAccMag.sma',
        'tBodyAccMag.energy', 'tBodyAccMag.iqr', 'tBodyAccMag.entropy',
        'tBodyAccMag.arCoeff1', 'tBodyAccMag.arCoeff2', 'tBodyAccMag.arCoeff3',
        'tBodyAccMag.arCoeff4', 'tGravityAccMag.mean', 'tGravityAccMag.std',
        'tGravityAccMag.mad', 'tGravityAccMag.max', 'tGravityAccMag.min',
        'tGravityAccMag.sma', 'tGravityAccMag.energy', 'tGravityAccMag.iqr',
        'tGravityAccMag.entropy', 'tGravityAccMag.arCoeff1',
        'tGravityAccMag.arCoeff2', 'tGravityAccMag.arCoeff3',
        'tGravityAccMag.arCoeff4', 'tBodyAccJerkMag.mean',
        'tBodyAccJerkMag.std', 'tBodyAccJerkMag.mad', 'tBodyAccJerkMag.max',
        'tBodyAccJerkMag.min', 'tBodyAccJerkMag.sma', 'tBodyAccJerkMag.energy',
        'tBodyAccJerkMag.iqr', 'tBodyAccJerkMag.entropy',
        'tBodyAccJerkMag.arCoeff1', 'tBodyAccJerkMag.arCoeff2',
        'tBodyAccJerkMag.arCoeff3', 'tBodyAccJerkMag.arCoeff4',
        'tBodyGyroMag.mean', 'tBodyGyroMag.std', 'tBodyGyroMag.mad',
        'tBodyGyroMag.max', 'tBodyGyroMag.min', 'tBodyGyroMag.sma',
        'tBodyGyroMag.energy', 'tBodyGyroMag.iqr', 'tBodyGyroMag.entropy',
        'tBodyGyroMag.arCoeff1', 'tBodyGyroMag.arCoeff2',
        'tBodyGyroMag.arCoeff3', 'tBodyGyroMag.arCoeff4',
        'tBodyGyroJerkMag.mean', 'tBodyGyroJerkMag.std',
        'tBodyGyroJerkMag.mad', 'tBodyGyroJerkMag.max', 'tBodyGyroJerkMag.min',
        'tBodyGyroJerkMag.sma', 'tBodyGyroJerkMag.energy',
        'tBodyGyroJerkMag.iqr', 'tBodyGyroJerkMag.entropy',
        'tBodyGyroJerkMag.arCoeff1', 'tBodyGyroJerkMag.arCoeff2',
        'tBodyGyroJerkMag.arCoeff3', 'tBodyGyroJerkMag.arCoeff4',
        'fBodyAcc.mean.X', 'fBodyAcc.mean.Y', 'fBodyAcc.mean.Z',
        'fBodyAcc.std.X', 'fBodyAcc.std.Y', 'fBodyAcc.std.Z', 'fBodyAcc.mad.X',
        'fBodyAcc.mad.Y', 'fBodyAcc.mad.Z', 'fBodyAcc.max.X', 'fBodyAcc.max.Y',
        'fBodyAcc.max.Z', 'fBodyAcc.min.X', 'fBodyAcc.min.Y', 'fBodyAcc.min.Z',
        'fBodyAcc.sma', 'fBodyAcc.energy.X', 'fBodyAcc.energy.Y',
        'fBodyAcc.energy.Z', 'fBodyAcc.iqr.X', 'fBodyAcc.iqr.Y',
        'fBodyAcc.iqr.Z', 'fBodyAcc.entropy.X', 'fBodyAcc.entropy.Y',
        'fBodyAcc.entropy.Z', 'fBodyAcc.maxInds.X', 'fBodyAcc.maxInds.Y',
        'fBodyAcc.maxInds.Z', 'fBodyAcc.meanFreq.X', 'fBodyAcc.meanFreq.Y',
        'fBodyAcc.meanFreq.Z', 'fBodyAcc.skewness.X', 'fBodyAcc.kurtosis.X',
        'fBodyAcc.skewness.Y', 'fBodyAcc.kurtosis.Y', 'fBodyAcc.skewness.Z',
        'fBodyAcc.kurtosis.Z', 'fBodyAcc.bandsEnergy.1.8',
        'fBodyAcc.bandsEnergy.9.16', 'fBodyAcc.bandsEnergy.17.24',
        'fBodyAcc.bandsEnergy.25.32', 'fBodyAcc.bandsEnergy.33.40',
        'fBodyAcc.bandsEnergy.41.48', 'fBodyAcc.bandsEnergy.49.56',
        'fBodyAcc.bandsEnergy.57.64', 'fBodyAcc.bandsEnergy.1.16',
        'fBodyAcc.bandsEnergy.17.32', 'fBodyAcc.bandsEnergy.33.48',
        'fBodyAcc.bandsEnergy.49.64', 'fBodyAcc.bandsEnergy.1.24',
        'fBodyAcc.bandsEnergy.25.48', 'fBodyAcc.bandsEnergy.1.8.1',
        'fBodyAcc.bandsEnergy.9.16.1', 'fBodyAcc.bandsEnergy.17.24.1',
        'fBodyAcc.bandsEnergy.25.32.1', 'fBodyAcc.bandsEnergy.33.40.1',
        'fBodyAcc.bandsEnergy.41.48.1', 'fBodyAcc.bandsEnergy.49.56.1',
        'fBodyAcc.bandsEnergy.57.64.1', 'fBodyAcc.bandsEnergy.1.16.1',
        'fBodyAcc.bandsEnergy.17.32.1', 'fBodyAcc.bandsEnergy.33.48.1',
        'fBodyAcc.bandsEnergy.49.64.1', 'fBodyAcc.bandsEnergy.1.24.1',
        'fBodyAcc.bandsEnergy.25.48.1', 'fBodyAcc.bandsEnergy.1.8.2',
        'fBodyAcc.bandsEnergy.9.16.2', 'fBodyAcc.bandsEnergy.17.24.2',
        'fBodyAcc.bandsEnergy.25.32.2', 'fBodyAcc.bandsEnergy.33.40.2',
        'fBodyAcc.bandsEnergy.41.48.2', 'fBodyAcc.bandsEnergy.49.56.2',
        'fBodyAcc.bandsEnergy.57.64.2', 'fBodyAcc.bandsEnergy.1.16.2',
        'fBodyAcc.bandsEnergy.17.32.2', 'fBodyAcc.bandsEnergy.33.48.2',
        'fBodyAcc.bandsEnergy.49.64.2', 'fBodyAcc.bandsEnergy.1.24.2',
        'fBodyAcc.bandsEnergy.25.48.2', 'fBodyAccJerk.mean.X',
        'fBodyAccJerk.mean.Y', 'fBodyAccJerk.mean.Z', 'fBodyAccJerk.std.X',
        'fBodyAccJerk.std.Y', 'fBodyAccJerk.std.Z', 'fBodyAccJerk.mad.X',
        'fBodyAccJerk.mad.Y', 'fBodyAccJerk.mad.Z', 'fBodyAccJerk.max.X',
        'fBodyAccJerk.max.Y', 'fBodyAccJerk.max.Z', 'fBodyAccJerk.min.X',
        'fBodyAccJerk.min.Y', 'fBodyAccJerk.min.Z', 'fBodyAccJerk.sma',
        'fBodyAccJerk.energy.X', 'fBodyAccJerk.energy.Y',
        'fBodyAccJerk.energy.Z', 'fBodyAccJerk.iqr.X', 'fBodyAccJerk.iqr.Y',
        'fBodyAccJerk.iqr.Z', 'fBodyAccJerk.entropy.X',
        'fBodyAccJerk.entropy.Y', 'fBodyAccJerk.entropy.Z',
        'fBodyAccJerk.maxInds.X', 'fBodyAccJerk.maxInds.Y',
        'fBodyAccJerk.maxInds.Z', 'fBodyAccJerk.meanFreq.X',
        'fBodyAccJerk.meanFreq.Y', 'fBodyAccJerk.meanFreq.Z',
        'fBodyAccJerk.skewness.X', 'fBodyAccJerk.kurtosis.X',
        'fBodyAccJerk.skewness.Y', 'fBodyAccJerk.kurtosis.Y',
        'fBodyAccJerk.skewness.Z', 'fBodyAccJerk.kurtosis.Z',
        'fBodyAccJerk.bandsEnergy.1.8', 'fBodyAccJerk.bandsEnergy.9.16',
        'fBodyAccJerk.bandsEnergy.17.24', 'fBodyAccJerk.bandsEnergy.25.32',
        'fBodyAccJerk.bandsEnergy.33.40', 'fBodyAccJerk.bandsEnergy.41.48',
        'fBodyAccJerk.bandsEnergy.49.56', 'fBodyAccJerk.bandsEnergy.57.64',
        'fBodyAccJerk.bandsEnergy.1.16', 'fBodyAccJerk.bandsEnergy.17.32',
        'fBodyAccJerk.bandsEnergy.33.48', 'fBodyAccJerk.bandsEnergy.49.64',
        'fBodyAccJerk.bandsEnergy.1.24', 'fBodyAccJerk.bandsEnergy.25.48',
        'fBodyAccJerk.bandsEnergy.1.8.1', 'fBodyAccJerk.bandsEnergy.9.16.1',
        'fBodyAccJerk.bandsEnergy.17.24.1', 'fBodyAccJerk.bandsEnergy.25.32.1',
        'fBodyAccJerk.bandsEnergy.33.40.1', 'fBodyAccJerk.bandsEnergy.41.48.1',
        'fBodyAccJerk.bandsEnergy.49.56.1', 'fBodyAccJerk.bandsEnergy.57.64.1',
        'fBodyAccJerk.bandsEnergy.1.16.1', 'fBodyAccJerk.bandsEnergy.17.32.1',
        'fBodyAccJerk.bandsEnergy.33.48.1', 'fBodyAccJerk.bandsEnergy.49.64.1',
        'fBodyAccJerk.bandsEnergy.1.24.1', 'fBodyAccJerk.bandsEnergy.25.48.1',
        'fBodyAccJerk.bandsEnergy.1.8.2', 'fBodyAccJerk.bandsEnergy.9.16.2',
        'fBodyAccJerk.bandsEnergy.17.24.2', 'fBodyAccJerk.bandsEnergy.25.32.2',
        'fBodyAccJerk.bandsEnergy.33.40.2', 'fBodyAccJerk.bandsEnergy.41.48.2',
        'fBodyAccJerk.bandsEnergy.49.56.2', 'fBodyAccJerk.bandsEnergy.57.64.2',
        'fBodyAccJerk.bandsEnergy.1.16.2', 'fBodyAccJerk.bandsEnergy.17.32.2',
        'fBodyAccJerk.bandsEnergy.33.48.2', 'fBodyAccJerk.bandsEnergy.49.64.2',
        'fBodyAccJerk.bandsEnergy.1.24.2', 'fBodyAccJerk.bandsEnergy.25.48.2',
        'fBodyGyro.mean.X', 'fBodyGyro.mean.Y', 'fBodyGyro.mean.Z',
        'fBodyGyro.std.X', 'fBodyGyro.std.Y', 'fBodyGyro.std.Z',
        'fBodyGyro.mad.X', 'fBodyGyro.mad.Y', 'fBodyGyro.mad.Z',
        'fBodyGyro.max.X', 'fBodyGyro.max.Y', 'fBodyGyro.max.Z',
        'fBodyGyro.min.X', 'fBodyGyro.min.Y', 'fBodyGyro.min.Z',
        'fBodyGyro.sma', 'fBodyGyro.energy.X', 'fBodyGyro.energy.Y',
        'fBodyGyro.energy.Z', 'fBodyGyro.iqr.X', 'fBodyGyro.iqr.Y',
        'fBodyGyro.iqr.Z', 'fBodyGyro.entropy.X', 'fBodyGyro.entropy.Y',
        'fBodyGyro.entropy.Z', 'fBodyGyro.maxInds.X', 'fBodyGyro.maxInds.Y',
        'fBodyGyro.maxInds.Z', 'fBodyGyro.meanFreq.X', 'fBodyGyro.meanFreq.Y',
        'fBodyGyro.meanFreq.Z', 'fBodyGyro.skewness.X', 'fBodyGyro.kurtosis.X',
        'fBodyGyro.skewness.Y', 'fBodyGyro.kurtosis.Y', 'fBodyGyro.skewness.Z',
        'fBodyGyro.kurtosis.Z', 'fBodyGyro.bandsEnergy.1.8',
        'fBodyGyro.bandsEnergy.9.16', 'fBodyGyro.bandsEnergy.17.24',
        'fBodyGyro.bandsEnergy.25.32', 'fBodyGyro.bandsEnergy.33.40',
        'fBodyGyro.bandsEnergy.41.48', 'fBodyGyro.bandsEnergy.49.56',
        'fBodyGyro.bandsEnergy.57.64', 'fBodyGyro.bandsEnergy.1.16',
        'fBodyGyro.bandsEnergy.17.32', 'fBodyGyro.bandsEnergy.33.48',
        'fBodyGyro.bandsEnergy.49.64', 'fBodyGyro.bandsEnergy.1.24',
        'fBodyGyro.bandsEnergy.25.48', 'fBodyGyro.bandsEnergy.1.8.1',
        'fBodyGyro.bandsEnergy.9.16.1', 'fBodyGyro.bandsEnergy.17.24.1',
        'fBodyGyro.bandsEnergy.25.32.1', 'fBodyGyro.bandsEnergy.33.40.1',
        'fBodyGyro.bandsEnergy.41.48.1', 'fBodyGyro.bandsEnergy.49.56.1',
        'fBodyGyro.bandsEnergy.57.64.1', 'fBodyGyro.bandsEnergy.1.16.1',
        'fBodyGyro.bandsEnergy.17.32.1', 'fBodyGyro.bandsEnergy.33.48.1',
        'fBodyGyro.bandsEnergy.49.64.1', 'fBodyGyro.bandsEnergy.1.24.1',
        'fBodyGyro.bandsEnergy.25.48.1', 'fBodyGyro.bandsEnergy.1.8.2',
        'fBodyGyro.bandsEnergy.9.16.2', 'fBodyGyro.bandsEnergy.17.24.2',
        'fBodyGyro.bandsEnergy.25.32.2', 'fBodyGyro.bandsEnergy.33.40.2',
        'fBodyGyro.bandsEnergy.41.48.2', 'fBodyGyro.bandsEnergy.49.56.2',
        'fBodyGyro.bandsEnergy.57.64.2', 'fBodyGyro.bandsEnergy.1.16.2',
        'fBodyGyro.bandsEnergy.17.32.2', 'fBodyGyro.bandsEnergy.33.48.2',
        'fBodyGyro.bandsEnergy.49.64.2', 'fBodyGyro.bandsEnergy.1.24.2',
        'fBodyGyro.bandsEnergy.25.48.2', 'fBodyAccMag.mean', 'fBodyAccMag.std',
        'fBodyAccMag.mad', 'fBodyAccMag.max', 'fBodyAccMag.min',
        'fBodyAccMag.sma', 'fBodyAccMag.energy', 'fBodyAccMag.iqr',
        'fBodyAccMag.entropy', 'fBodyAccMag.maxInds', 'fBodyAccMag.meanFreq',
        'fBodyAccMag.skewness', 'fBodyAccMag.kurtosis',
        'fBodyBodyAccJerkMag.mean', 'fBodyBodyAccJerkMag.std',
        'fBodyBodyAccJerkMag.mad', 'fBodyBodyAccJerkMag.max',
        'fBodyBodyAccJerkMag.min', 'fBodyBodyAccJerkMag.sma',
        'fBodyBodyAccJerkMag.energy', 'fBodyBodyAccJerkMag.iqr',
        'fBodyBodyAccJerkMag.entropy', 'fBodyBodyAccJerkMag.maxInds',
        'fBodyBodyAccJerkMag.meanFreq', 'fBodyBodyAccJerkMag.skewness',
        'fBodyBodyAccJerkMag.kurtosis', 'fBodyBodyGyroMag.mean',
        'fBodyBodyGyroMag.std', 'fBodyBodyGyroMag.mad', 'fBodyBodyGyroMag.max',
        'fBodyBodyGyroMag.min', 'fBodyBodyGyroMag.sma',
        'fBodyBodyGyroMag.energy', 'fBodyBodyGyroMag.iqr',
        'fBodyBodyGyroMag.entropy', 'fBodyBodyGyroMag.maxInds',
        'fBodyBodyGyroMag.meanFreq', 'fBodyBodyGyroMag.skewness',
        'fBodyBodyGyroMag.kurtosis', 'fBodyBodyGyroJerkMag.mean',
        'fBodyBodyGyroJerkMag.std', 'fBodyBodyGyroJerkMag.mad',
        'fBodyBodyGyroJerkMag.max', 'fBodyBodyGyroJerkMag.min',
        'fBodyBodyGyroJerkMag.sma', 'fBodyBodyGyroJerkMag.energy',
        'fBodyBodyGyroJerkMag.iqr', 'fBodyBodyGyroJerkMag.entropy',
        'fBodyBodyGyroJerkMag.maxInds', 'fBodyBodyGyroJerkMag.meanFreq',
        'fBodyBodyGyroJerkMag.skewness', 'fBodyBodyGyroJerkMag.kurtosis',
        'angle.tBodyAccMean.gravity', 'angle.tBodyAccJerkMean.gravityMean',
        'angle.tBodyGyroMean.gravityMean',
        'angle.tBodyGyroJerkMean.gravityMean', 'angle.X.gravityMean',
        'angle.Y.gravityMean', 'angle.Z.gravityMean'
    ])

    # These features contain a relatively small number of unique items.
    categorical = HEADER.as_feature_indices([
        'tBodyAcc.mean.X', 'tBodyAcc.energy.Y', 'tBodyAcc.energy.Z',
        'tGravityAcc.std.X', 'tGravityAcc.std.Y', 'tGravityAcc.std.Z',
        'tGravityAcc.mad.X', 'tGravityAcc.mad.Y', 'tGravityAcc.mad.Z',
        'tGravityAcc.iqr.X', 'tGravityAcc.iqr.Y', 'tGravityAcc.iqr.Z',
        'tGravityAcc.entropy.Y', 'tBodyAccJerk.energy.Z', 'tBodyGyro.energy.X',
        'tBodyGyro.energy.Y', 'tBodyGyro.energy.Z', 'tBodyGyroJerk.energy.X',
        'tBodyGyroJerk.energy.Y', 'tBodyGyroJerk.energy.Z', 'tBodyAccMag.min',
        'tGravityAccMag.min', 'tBodyAccJerkMag.energy',
        'tBodyGyroJerkMag.energy', 'fBodyAcc.min.Y', 'fBodyAcc.min.Z',
        'fBodyAcc.maxInds.X', 'fBodyAcc.maxInds.Y', 'fBodyAcc.maxInds.Z',
        'fBodyAcc.bandsEnergy.9.16', 'fBodyAcc.bandsEnergy.25.32',
        'fBodyAcc.bandsEnergy.33.40', 'fBodyAcc.bandsEnergy.41.48',
        'fBodyAcc.bandsEnergy.49.56', 'fBodyAcc.bandsEnergy.57.64',
        'fBodyAcc.bandsEnergy.33.48', 'fBodyAcc.bandsEnergy.49.64',
        'fBodyAcc.bandsEnergy.25.48', 'fBodyAcc.bandsEnergy.25.32.1',
        'fBodyAcc.bandsEnergy.33.40.1', 'fBodyAcc.bandsEnergy.41.48.1',
        'fBodyAcc.bandsEnergy.49.56.1', 'fBodyAcc.bandsEnergy.57.64.1',
        'fBodyAcc.bandsEnergy.33.48.1', 'fBodyAcc.bandsEnergy.49.64.1',
        'fBodyAcc.bandsEnergy.25.48.1', 'fBodyAcc.bandsEnergy.9.16.2',
        'fBodyAcc.bandsEnergy.17.24.2', 'fBodyAcc.bandsEnergy.25.32.2',
        'fBodyAcc.bandsEnergy.33.40.2', 'fBodyAcc.bandsEnergy.41.48.2',
        'fBodyAcc.bandsEnergy.49.56.2', 'fBodyAcc.bandsEnergy.57.64.2',
        'fBodyAcc.bandsEnergy.1.16.2', 'fBodyAcc.bandsEnergy.17.32.2',
        'fBodyAcc.bandsEnergy.33.48.2', 'fBodyAcc.bandsEnergy.49.64.2',
        'fBodyAcc.bandsEnergy.25.48.2', 'fBodyAccJerk.min.X',
        'fBodyAccJerk.min.Z', 'fBodyAccJerk.energy.Z',
        'fBodyAccJerk.maxInds.X', 'fBodyAccJerk.maxInds.Y',
        'fBodyAccJerk.maxInds.Z', 'fBodyAccJerk.kurtosis.Y',
        'fBodyAccJerk.kurtosis.Z', 'fBodyAccJerk.bandsEnergy.1.8',
        'fBodyAccJerk.bandsEnergy.9.16', 'fBodyAccJerk.bandsEnergy.17.24',
        'fBodyAccJerk.bandsEnergy.25.32', 'fBodyAccJerk.bandsEnergy.33.40',
        'fBodyAccJerk.bandsEnergy.41.48', 'fBodyAccJerk.bandsEnergy.49.56',
        'fBodyAccJerk.bandsEnergy.57.64', 'fBodyAccJerk.bandsEnergy.1.16',
        'fBodyAccJerk.bandsEnergy.33.48', 'fBodyAccJerk.bandsEnergy.49.64',
        'fBodyAccJerk.bandsEnergy.9.16.1', 'fBodyAccJerk.bandsEnergy.25.32.1',
        'fBodyAccJerk.bandsEnergy.33.40.1', 'fBodyAccJerk.bandsEnergy.41.48.1',
        'fBodyAccJerk.bandsEnergy.49.56.1', 'fBodyAccJerk.bandsEnergy.57.64.1',
        'fBodyAccJerk.bandsEnergy.33.48.1', 'fBodyAccJerk.bandsEnergy.49.64.1',
        'fBodyAccJerk.bandsEnergy.25.48.1', 'fBodyAccJerk.bandsEnergy.1.8.2',
        'fBodyAccJerk.bandsEnergy.9.16.2', 'fBodyAccJerk.bandsEnergy.17.24.2',
        'fBodyAccJerk.bandsEnergy.25.32.2', 'fBodyAccJerk.bandsEnergy.33.40.2',
        'fBodyAccJerk.bandsEnergy.41.48.2', 'fBodyAccJerk.bandsEnergy.49.56.2',
        'fBodyAccJerk.bandsEnergy.57.64.2', 'fBodyAccJerk.bandsEnergy.1.16.2',
        'fBodyAccJerk.bandsEnergy.17.32.2', 'fBodyAccJerk.bandsEnergy.33.48.2',
        'fBodyAccJerk.bandsEnergy.49.64.2', 'fBodyAccJerk.bandsEnergy.1.24.2',
        'fBodyAccJerk.bandsEnergy.25.48.2', 'fBodyGyro.min.X',
        'fBodyGyro.min.Y', 'fBodyGyro.min.Z', 'fBodyGyro.energy.X',
        'fBodyGyro.energy.Y', 'fBodyGyro.energy.Z', 'fBodyGyro.maxInds.X',
        'fBodyGyro.maxInds.Y', 'fBodyGyro.maxInds.Z',
        'fBodyGyro.bandsEnergy.1.8', 'fBodyGyro.bandsEnergy.9.16',
        'fBodyGyro.bandsEnergy.17.24', 'fBodyGyro.bandsEnergy.25.32',
        'fBodyGyro.bandsEnergy.33.40', 'fBodyGyro.bandsEnergy.41.48',
        'fBodyGyro.bandsEnergy.49.56', 'fBodyGyro.bandsEnergy.57.64',
        'fBodyGyro.bandsEnergy.1.16', 'fBodyGyro.bandsEnergy.17.32',
        'fBodyGyro.bandsEnergy.33.48', 'fBodyGyro.bandsEnergy.49.64',
        'fBodyGyro.bandsEnergy.1.24', 'fBodyGyro.bandsEnergy.25.48',
        'fBodyGyro.bandsEnergy.1.8.1', 'fBodyGyro.bandsEnergy.9.16.1',
        'fBodyGyro.bandsEnergy.17.24.1', 'fBodyGyro.bandsEnergy.25.32.1',
        'fBodyGyro.bandsEnergy.33.40.1', 'fBodyGyro.bandsEnergy.41.48.1',
        'fBodyGyro.bandsEnergy.49.56.1', 'fBodyGyro.bandsEnergy.57.64.1',
        'fBodyGyro.bandsEnergy.1.16.1', 'fBodyGyro.bandsEnergy.17.32.1',
        'fBodyGyro.bandsEnergy.33.48.1', 'fBodyGyro.bandsEnergy.49.64.1',
        'fBodyGyro.bandsEnergy.1.24.1', 'fBodyGyro.bandsEnergy.25.48.1',
        'fBodyGyro.bandsEnergy.1.8.2', 'fBodyGyro.bandsEnergy.9.16.2',
        'fBodyGyro.bandsEnergy.17.24.2', 'fBodyGyro.bandsEnergy.25.32.2',
        'fBodyGyro.bandsEnergy.33.40.2', 'fBodyGyro.bandsEnergy.41.48.2',
        'fBodyGyro.bandsEnergy.49.56.2', 'fBodyGyro.bandsEnergy.57.64.2',
        'fBodyGyro.bandsEnergy.1.16.2', 'fBodyGyro.bandsEnergy.17.32.2',
        'fBodyGyro.bandsEnergy.33.48.2', 'fBodyGyro.bandsEnergy.49.64.2',
        'fBodyGyro.bandsEnergy.1.24.2', 'fBodyGyro.bandsEnergy.25.48.2',
        'fBodyAccMag.min', 'fBodyAccMag.maxInds',
        'fBodyBodyAccJerkMag.maxInds', 'fBodyBodyGyroMag.min',
        'fBodyBodyGyroMag.energy', 'fBodyBodyGyroMag.maxInds',
        'fBodyBodyGyroJerkMag.min', 'fBodyBodyGyroJerkMag.energy',
        'fBodyBodyGyroJerkMag.maxInds'
    ])

    numeric_processors = Pipeline(steps=[('robustimputer', RobustImputer())])

    categorical_processors = Pipeline(steps=[('thresholdonehotencoder',
                                              ThresholdOneHotEncoder(
                                                  threshold=7))])

    column_transformer = ColumnTransformer(
        transformers=[('numeric_processing', numeric_processors, numeric),
                      ('categorical_processing', categorical_processors,
                       categorical)])

    return Pipeline(steps=[(
        'column_transformer',
        column_transformer), ('robustpca', RobustPCA(
            n_components=171)), ('robuststandardscaler',
                                 RobustStandardScaler())])
Exemple #17
0
def build_feature_transform():
    """ Returns the model definition representing feature processing."""

    # These features can be parsed as numeric.
    numeric = HEADER.as_feature_indices(
        [
            'tBodyAcc.mean.X', 'tBodyAcc.mean.Y', 'tBodyAcc.mean.Z',
            'tBodyAcc.std.X', 'tBodyAcc.std.Y', 'tBodyAcc.std.Z',
            'tBodyAcc.mad.X', 'tBodyAcc.mad.Y', 'tBodyAcc.mad.Z',
            'tBodyAcc.max.X', 'tBodyAcc.max.Y', 'tBodyAcc.max.Z',
            'tBodyAcc.min.X', 'tBodyAcc.min.Y', 'tBodyAcc.min.Z',
            'tBodyAcc.sma', 'tBodyAcc.energy.X', 'tBodyAcc.energy.Y',
            'tBodyAcc.energy.Z', 'tBodyAcc.iqr.X', 'tBodyAcc.iqr.Y',
            'tBodyAcc.iqr.Z', 'tBodyAcc.entropy.X', 'tBodyAcc.entropy.Y',
            'tBodyAcc.entropy.Z', 'tBodyAcc.arCoeff.X.1',
            'tBodyAcc.arCoeff.X.2', 'tBodyAcc.arCoeff.X.3',
            'tBodyAcc.arCoeff.X.4', 'tBodyAcc.arCoeff.Y.1',
            'tBodyAcc.arCoeff.Y.2', 'tBodyAcc.arCoeff.Y.3',
            'tBodyAcc.arCoeff.Y.4', 'tBodyAcc.arCoeff.Z.1',
            'tBodyAcc.arCoeff.Z.2', 'tBodyAcc.arCoeff.Z.3',
            'tBodyAcc.arCoeff.Z.4', 'tBodyAcc.correlation.X.Y',
            'tBodyAcc.correlation.X.Z', 'tBodyAcc.correlation.Y.Z',
            'tGravityAcc.mean.X', 'tGravityAcc.mean.Y', 'tGravityAcc.mean.Z',
            'tGravityAcc.std.X', 'tGravityAcc.std.Y', 'tGravityAcc.std.Z',
            'tGravityAcc.mad.X', 'tGravityAcc.mad.Y', 'tGravityAcc.mad.Z',
            'tGravityAcc.max.X', 'tGravityAcc.max.Y', 'tGravityAcc.max.Z',
            'tGravityAcc.min.X', 'tGravityAcc.min.Y', 'tGravityAcc.min.Z',
            'tGravityAcc.sma', 'tGravityAcc.energy.X', 'tGravityAcc.energy.Y',
            'tGravityAcc.energy.Z', 'tGravityAcc.iqr.X', 'tGravityAcc.iqr.Y',
            'tGravityAcc.iqr.Z', 'tGravityAcc.entropy.X',
            'tGravityAcc.entropy.Y', 'tGravityAcc.entropy.Z',
            'tGravityAcc.arCoeff.X.1', 'tGravityAcc.arCoeff.X.2',
            'tGravityAcc.arCoeff.X.3', 'tGravityAcc.arCoeff.X.4',
            'tGravityAcc.arCoeff.Y.1', 'tGravityAcc.arCoeff.Y.2',
            'tGravityAcc.arCoeff.Y.3', 'tGravityAcc.arCoeff.Y.4',
            'tGravityAcc.arCoeff.Z.1', 'tGravityAcc.arCoeff.Z.2',
            'tGravityAcc.arCoeff.Z.3', 'tGravityAcc.arCoeff.Z.4',
            'tGravityAcc.correlation.X.Y', 'tGravityAcc.correlation.X.Z',
            'tGravityAcc.correlation.Y.Z', 'tBodyAccJerk.mean.X',
            'tBodyAccJerk.mean.Y', 'tBodyAccJerk.mean.Z', 'tBodyAccJerk.std.X',
            'tBodyAccJerk.std.Y', 'tBodyAccJerk.std.Z', 'tBodyAccJerk.mad.X',
            'tBodyAccJerk.mad.Y', 'tBodyAccJerk.mad.Z', 'tBodyAccJerk.max.X',
            'tBodyAccJerk.max.Y', 'tBodyAccJerk.max.Z', 'tBodyAccJerk.min.X',
            'tBodyAccJerk.min.Y', 'tBodyAccJerk.min.Z', 'tBodyAccJerk.sma',
            'tBodyAccJerk.energy.X', 'tBodyAccJerk.energy.Y',
            'tBodyAccJerk.energy.Z', 'tBodyAccJerk.iqr.X', 'tBodyAccJerk.iqr.Y',
            'tBodyAccJerk.iqr.Z', 'tBodyAccJerk.entropy.X',
            'tBodyAccJerk.entropy.Y', 'tBodyAccJerk.entropy.Z',
            'tBodyAccJerk.arCoeff.X.1', 'tBodyAccJerk.arCoeff.X.2',
            'tBodyAccJerk.arCoeff.X.3', 'tBodyAccJerk.arCoeff.X.4',
            'tBodyAccJerk.arCoeff.Y.1', 'tBodyAccJerk.arCoeff.Y.2',
            'tBodyAccJerk.arCoeff.Y.3', 'tBodyAccJerk.arCoeff.Y.4',
            'tBodyAccJerk.arCoeff.Z.1', 'tBodyAccJerk.arCoeff.Z.2',
            'tBodyAccJerk.arCoeff.Z.3', 'tBodyAccJerk.arCoeff.Z.4',
            'tBodyAccJerk.correlation.X.Y', 'tBodyAccJerk.correlation.X.Z',
            'tBodyAccJerk.correlation.Y.Z', 'tBodyGyro.mean.X',
            'tBodyGyro.mean.Y', 'tBodyGyro.mean.Z', 'tBodyGyro.std.X',
            'tBodyGyro.std.Y', 'tBodyGyro.std.Z', 'tBodyGyro.mad.X',
            'tBodyGyro.mad.Y', 'tBodyGyro.mad.Z', 'tBodyGyro.max.X',
            'tBodyGyro.max.Y', 'tBodyGyro.max.Z', 'tBodyGyro.min.X',
            'tBodyGyro.min.Y', 'tBodyGyro.min.Z', 'tBodyGyro.sma',
            'tBodyGyro.energy.X', 'tBodyGyro.energy.Y', 'tBodyGyro.energy.Z',
            'tBodyGyro.iqr.X', 'tBodyGyro.iqr.Y', 'tBodyGyro.iqr.Z',
            'tBodyGyro.entropy.X', 'tBodyGyro.entropy.Y', 'tBodyGyro.entropy.Z',
            'tBodyGyro.arCoeff.X.1', 'tBodyGyro.arCoeff.X.2',
            'tBodyGyro.arCoeff.X.3', 'tBodyGyro.arCoeff.X.4',
            'tBodyGyro.arCoeff.Y.1', 'tBodyGyro.arCoeff.Y.2',
            'tBodyGyro.arCoeff.Y.3', 'tBodyGyro.arCoeff.Y.4',
            'tBodyGyro.arCoeff.Z.1', 'tBodyGyro.arCoeff.Z.2',
            'tBodyGyro.arCoeff.Z.3', 'tBodyGyro.arCoeff.Z.4',
            'tBodyGyro.correlation.X.Y', 'tBodyGyro.correlation.X.Z',
            'tBodyGyro.correlation.Y.Z', 'tBodyGyroJerk.mean.X',
            'tBodyGyroJerk.mean.Y', 'tBodyGyroJerk.mean.Z',
            'tBodyGyroJerk.std.X', 'tBodyGyroJerk.std.Y', 'tBodyGyroJerk.std.Z',
            'tBodyGyroJerk.mad.X', 'tBodyGyroJerk.mad.Y', 'tBodyGyroJerk.mad.Z',
            'tBodyGyroJerk.max.X', 'tBodyGyroJerk.max.Y', 'tBodyGyroJerk.max.Z',
            'tBodyGyroJerk.min.X', 'tBodyGyroJerk.min.Y', 'tBodyGyroJerk.min.Z',
            'tBodyGyroJerk.sma', 'tBodyGyroJerk.energy.X',
            'tBodyGyroJerk.energy.Y', 'tBodyGyroJerk.energy.Z',
            'tBodyGyroJerk.iqr.X', 'tBodyGyroJerk.iqr.Y', 'tBodyGyroJerk.iqr.Z',
            'tBodyGyroJerk.entropy.X', 'tBodyGyroJerk.entropy.Y',
            'tBodyGyroJerk.entropy.Z', 'tBodyGyroJerk.arCoeff.X.1',
            'tBodyGyroJerk.arCoeff.X.2', 'tBodyGyroJerk.arCoeff.X.3',
            'tBodyGyroJerk.arCoeff.X.4', 'tBodyGyroJerk.arCoeff.Y.1',
            'tBodyGyroJerk.arCoeff.Y.2', 'tBodyGyroJerk.arCoeff.Y.3',
            'tBodyGyroJerk.arCoeff.Y.4', 'tBodyGyroJerk.arCoeff.Z.1',
            'tBodyGyroJerk.arCoeff.Z.2', 'tBodyGyroJerk.arCoeff.Z.3',
            'tBodyGyroJerk.arCoeff.Z.4', 'tBodyGyroJerk.correlation.X.Y',
            'tBodyGyroJerk.correlation.X.Z', 'tBodyGyroJerk.correlation.Y.Z',
            'tBodyAccMag.mean', 'tBodyAccMag.std', 'tBodyAccMag.mad',
            'tBodyAccMag.max', 'tBodyAccMag.min', 'tBodyAccMag.sma',
            'tBodyAccMag.energy', 'tBodyAccMag.iqr', 'tBodyAccMag.entropy',
            'tBodyAccMag.arCoeff1', 'tBodyAccMag.arCoeff2',
            'tBodyAccMag.arCoeff3', 'tBodyAccMag.arCoeff4',
            'tGravityAccMag.mean', 'tGravityAccMag.std', 'tGravityAccMag.mad',
            'tGravityAccMag.max', 'tGravityAccMag.min', 'tGravityAccMag.sma',
            'tGravityAccMag.energy', 'tGravityAccMag.iqr',
            'tGravityAccMag.entropy', 'tGravityAccMag.arCoeff1',
            'tGravityAccMag.arCoeff2', 'tGravityAccMag.arCoeff3',
            'tGravityAccMag.arCoeff4', 'tBodyAccJerkMag.mean',
            'tBodyAccJerkMag.std', 'tBodyAccJerkMag.mad', 'tBodyAccJerkMag.max',
            'tBodyAccJerkMag.min', 'tBodyAccJerkMag.sma',
            'tBodyAccJerkMag.energy', 'tBodyAccJerkMag.iqr',
            'tBodyAccJerkMag.entropy', 'tBodyAccJerkMag.arCoeff1',
            'tBodyAccJerkMag.arCoeff2', 'tBodyAccJerkMag.arCoeff3',
            'tBodyAccJerkMag.arCoeff4', 'tBodyGyroMag.mean', 'tBodyGyroMag.std',
            'tBodyGyroMag.mad', 'tBodyGyroMag.max', 'tBodyGyroMag.min',
            'tBodyGyroMag.sma', 'tBodyGyroMag.energy', 'tBodyGyroMag.iqr',
            'tBodyGyroMag.entropy', 'tBodyGyroMag.arCoeff1',
            'tBodyGyroMag.arCoeff2', 'tBodyGyroMag.arCoeff3',
            'tBodyGyroMag.arCoeff4', 'tBodyGyroJerkMag.mean',
            'tBodyGyroJerkMag.std', 'tBodyGyroJerkMag.mad',
            'tBodyGyroJerkMag.max', 'tBodyGyroJerkMag.min',
            'tBodyGyroJerkMag.sma', 'tBodyGyroJerkMag.energy',
            'tBodyGyroJerkMag.iqr', 'tBodyGyroJerkMag.entropy',
            'tBodyGyroJerkMag.arCoeff1', 'tBodyGyroJerkMag.arCoeff2',
            'tBodyGyroJerkMag.arCoeff3', 'tBodyGyroJerkMag.arCoeff4',
            'fBodyAcc.mean.X', 'fBodyAcc.mean.Y', 'fBodyAcc.mean.Z',
            'fBodyAcc.std.X', 'fBodyAcc.std.Y', 'fBodyAcc.std.Z',
            'fBodyAcc.mad.X', 'fBodyAcc.mad.Y', 'fBodyAcc.mad.Z',
            'fBodyAcc.max.X', 'fBodyAcc.max.Y', 'fBodyAcc.max.Z',
            'fBodyAcc.min.X', 'fBodyAcc.min.Y', 'fBodyAcc.min.Z',
            'fBodyAcc.sma', 'fBodyAcc.energy.X', 'fBodyAcc.energy.Y',
            'fBodyAcc.energy.Z', 'fBodyAcc.iqr.X', 'fBodyAcc.iqr.Y',
            'fBodyAcc.iqr.Z', 'fBodyAcc.entropy.X', 'fBodyAcc.entropy.Y',
            'fBodyAcc.entropy.Z', 'fBodyAcc.maxInds.X', 'fBodyAcc.maxInds.Y',
            'fBodyAcc.maxInds.Z', 'fBodyAcc.meanFreq.X', 'fBodyAcc.meanFreq.Y',
            'fBodyAcc.meanFreq.Z', 'fBodyAcc.skewness.X', 'fBodyAcc.kurtosis.X',
            'fBodyAcc.skewness.Y', 'fBodyAcc.kurtosis.Y', 'fBodyAcc.skewness.Z',
            'fBodyAcc.kurtosis.Z', 'fBodyAcc.bandsEnergy.1.8',
            'fBodyAcc.bandsEnergy.9.16', 'fBodyAcc.bandsEnergy.17.24',
            'fBodyAcc.bandsEnergy.25.32', 'fBodyAcc.bandsEnergy.33.40',
            'fBodyAcc.bandsEnergy.41.48', 'fBodyAcc.bandsEnergy.49.56',
            'fBodyAcc.bandsEnergy.57.64', 'fBodyAcc.bandsEnergy.1.16',
            'fBodyAcc.bandsEnergy.17.32', 'fBodyAcc.bandsEnergy.33.48',
            'fBodyAcc.bandsEnergy.49.64', 'fBodyAcc.bandsEnergy.1.24',
            'fBodyAcc.bandsEnergy.25.48', 'fBodyAcc.bandsEnergy.1.8.1',
            'fBodyAcc.bandsEnergy.9.16.1', 'fBodyAcc.bandsEnergy.17.24.1',
            'fBodyAcc.bandsEnergy.25.32.1', 'fBodyAcc.bandsEnergy.33.40.1',
            'fBodyAcc.bandsEnergy.41.48.1', 'fBodyAcc.bandsEnergy.49.56.1',
            'fBodyAcc.bandsEnergy.57.64.1', 'fBodyAcc.bandsEnergy.1.16.1',
            'fBodyAcc.bandsEnergy.17.32.1', 'fBodyAcc.bandsEnergy.33.48.1',
            'fBodyAcc.bandsEnergy.49.64.1', 'fBodyAcc.bandsEnergy.1.24.1',
            'fBodyAcc.bandsEnergy.25.48.1', 'fBodyAcc.bandsEnergy.1.8.2',
            'fBodyAcc.bandsEnergy.9.16.2', 'fBodyAcc.bandsEnergy.17.24.2',
            'fBodyAcc.bandsEnergy.25.32.2', 'fBodyAcc.bandsEnergy.33.40.2',
            'fBodyAcc.bandsEnergy.41.48.2', 'fBodyAcc.bandsEnergy.49.56.2',
            'fBodyAcc.bandsEnergy.57.64.2', 'fBodyAcc.bandsEnergy.1.16.2',
            'fBodyAcc.bandsEnergy.17.32.2', 'fBodyAcc.bandsEnergy.33.48.2',
            'fBodyAcc.bandsEnergy.49.64.2', 'fBodyAcc.bandsEnergy.1.24.2',
            'fBodyAcc.bandsEnergy.25.48.2', 'fBodyAccJerk.mean.X',
            'fBodyAccJerk.mean.Y', 'fBodyAccJerk.mean.Z', 'fBodyAccJerk.std.X',
            'fBodyAccJerk.std.Y', 'fBodyAccJerk.std.Z', 'fBodyAccJerk.mad.X',
            'fBodyAccJerk.mad.Y', 'fBodyAccJerk.mad.Z', 'fBodyAccJerk.max.X',
            'fBodyAccJerk.max.Y', 'fBodyAccJerk.max.Z', 'fBodyAccJerk.min.X',
            'fBodyAccJerk.min.Y', 'fBodyAccJerk.min.Z', 'fBodyAccJerk.sma',
            'fBodyAccJerk.energy.X', 'fBodyAccJerk.energy.Y',
            'fBodyAccJerk.energy.Z', 'fBodyAccJerk.iqr.X', 'fBodyAccJerk.iqr.Y',
            'fBodyAccJerk.iqr.Z', 'fBodyAccJerk.entropy.X',
            'fBodyAccJerk.entropy.Y', 'fBodyAccJerk.entropy.Z',
            'fBodyAccJerk.maxInds.X', 'fBodyAccJerk.maxInds.Y',
            'fBodyAccJerk.maxInds.Z', 'fBodyAccJerk.meanFreq.X',
            'fBodyAccJerk.meanFreq.Y', 'fBodyAccJerk.meanFreq.Z',
            'fBodyAccJerk.skewness.X', 'fBodyAccJerk.kurtosis.X',
            'fBodyAccJerk.skewness.Y', 'fBodyAccJerk.kurtosis.Y',
            'fBodyAccJerk.skewness.Z', 'fBodyAccJerk.kurtosis.Z',
            'fBodyAccJerk.bandsEnergy.1.8', 'fBodyAccJerk.bandsEnergy.9.16',
            'fBodyAccJerk.bandsEnergy.17.24', 'fBodyAccJerk.bandsEnergy.25.32',
            'fBodyAccJerk.bandsEnergy.33.40', 'fBodyAccJerk.bandsEnergy.41.48',
            'fBodyAccJerk.bandsEnergy.49.56', 'fBodyAccJerk.bandsEnergy.57.64',
            'fBodyAccJerk.bandsEnergy.1.16', 'fBodyAccJerk.bandsEnergy.17.32',
            'fBodyAccJerk.bandsEnergy.33.48', 'fBodyAccJerk.bandsEnergy.49.64',
            'fBodyAccJerk.bandsEnergy.1.24', 'fBodyAccJerk.bandsEnergy.25.48',
            'fBodyAccJerk.bandsEnergy.1.8.1', 'fBodyAccJerk.bandsEnergy.9.16.1',
            'fBodyAccJerk.bandsEnergy.17.24.1',
            'fBodyAccJerk.bandsEnergy.25.32.1',
            'fBodyAccJerk.bandsEnergy.33.40.1',
            'fBodyAccJerk.bandsEnergy.41.48.1',
            'fBodyAccJerk.bandsEnergy.49.56.1',
            'fBodyAccJerk.bandsEnergy.57.64.1',
            'fBodyAccJerk.bandsEnergy.1.16.1',
            'fBodyAccJerk.bandsEnergy.17.32.1',
            'fBodyAccJerk.bandsEnergy.33.48.1',
            'fBodyAccJerk.bandsEnergy.49.64.1',
            'fBodyAccJerk.bandsEnergy.1.24.1',
            'fBodyAccJerk.bandsEnergy.25.48.1',
            'fBodyAccJerk.bandsEnergy.1.8.2', 'fBodyAccJerk.bandsEnergy.9.16.2',
            'fBodyAccJerk.bandsEnergy.17.24.2',
            'fBodyAccJerk.bandsEnergy.25.32.2',
            'fBodyAccJerk.bandsEnergy.33.40.2',
            'fBodyAccJerk.bandsEnergy.41.48.2',
            'fBodyAccJerk.bandsEnergy.49.56.2',
            'fBodyAccJerk.bandsEnergy.57.64.2',
            'fBodyAccJerk.bandsEnergy.1.16.2',
            'fBodyAccJerk.bandsEnergy.17.32.2',
            'fBodyAccJerk.bandsEnergy.33.48.2',
            'fBodyAccJerk.bandsEnergy.49.64.2',
            'fBodyAccJerk.bandsEnergy.1.24.2',
            'fBodyAccJerk.bandsEnergy.25.48.2', 'fBodyGyro.mean.X',
            'fBodyGyro.mean.Y', 'fBodyGyro.mean.Z', 'fBodyGyro.std.X',
            'fBodyGyro.std.Y', 'fBodyGyro.std.Z', 'fBodyGyro.mad.X',
            'fBodyGyro.mad.Y', 'fBodyGyro.mad.Z', 'fBodyGyro.max.X',
            'fBodyGyro.max.Y', 'fBodyGyro.max.Z', 'fBodyGyro.min.X',
            'fBodyGyro.min.Y', 'fBodyGyro.min.Z', 'fBodyGyro.sma',
            'fBodyGyro.energy.X', 'fBodyGyro.energy.Y', 'fBodyGyro.energy.Z',
            'fBodyGyro.iqr.X', 'fBodyGyro.iqr.Y', 'fBodyGyro.iqr.Z',
            'fBodyGyro.entropy.X', 'fBodyGyro.entropy.Y', 'fBodyGyro.entropy.Z',
            'fBodyGyro.maxInds.X', 'fBodyGyro.maxInds.Y', 'fBodyGyro.maxInds.Z',
            'fBodyGyro.meanFreq.X', 'fBodyGyro.meanFreq.Y',
            'fBodyGyro.meanFreq.Z', 'fBodyGyro.skewness.X',
            'fBodyGyro.kurtosis.X', 'fBodyGyro.skewness.Y',
            'fBodyGyro.kurtosis.Y', 'fBodyGyro.skewness.Z',
            'fBodyGyro.kurtosis.Z', 'fBodyGyro.bandsEnergy.1.8',
            'fBodyGyro.bandsEnergy.9.16', 'fBodyGyro.bandsEnergy.17.24',
            'fBodyGyro.bandsEnergy.25.32', 'fBodyGyro.bandsEnergy.33.40',
            'fBodyGyro.bandsEnergy.41.48', 'fBodyGyro.bandsEnergy.49.56',
            'fBodyGyro.bandsEnergy.57.64', 'fBodyGyro.bandsEnergy.1.16',
            'fBodyGyro.bandsEnergy.17.32', 'fBodyGyro.bandsEnergy.33.48',
            'fBodyGyro.bandsEnergy.49.64', 'fBodyGyro.bandsEnergy.1.24',
            'fBodyGyro.bandsEnergy.25.48', 'fBodyGyro.bandsEnergy.1.8.1',
            'fBodyGyro.bandsEnergy.9.16.1', 'fBodyGyro.bandsEnergy.17.24.1',
            'fBodyGyro.bandsEnergy.25.32.1', 'fBodyGyro.bandsEnergy.33.40.1',
            'fBodyGyro.bandsEnergy.41.48.1', 'fBodyGyro.bandsEnergy.49.56.1',
            'fBodyGyro.bandsEnergy.57.64.1', 'fBodyGyro.bandsEnergy.1.16.1',
            'fBodyGyro.bandsEnergy.17.32.1', 'fBodyGyro.bandsEnergy.33.48.1',
            'fBodyGyro.bandsEnergy.49.64.1', 'fBodyGyro.bandsEnergy.1.24.1',
            'fBodyGyro.bandsEnergy.25.48.1', 'fBodyGyro.bandsEnergy.1.8.2',
            'fBodyGyro.bandsEnergy.9.16.2', 'fBodyGyro.bandsEnergy.17.24.2',
            'fBodyGyro.bandsEnergy.25.32.2', 'fBodyGyro.bandsEnergy.33.40.2',
            'fBodyGyro.bandsEnergy.41.48.2', 'fBodyGyro.bandsEnergy.49.56.2',
            'fBodyGyro.bandsEnergy.57.64.2', 'fBodyGyro.bandsEnergy.1.16.2',
            'fBodyGyro.bandsEnergy.17.32.2', 'fBodyGyro.bandsEnergy.33.48.2',
            'fBodyGyro.bandsEnergy.49.64.2', 'fBodyGyro.bandsEnergy.1.24.2',
            'fBodyGyro.bandsEnergy.25.48.2', 'fBodyAccMag.mean',
            'fBodyAccMag.std', 'fBodyAccMag.mad', 'fBodyAccMag.max',
            'fBodyAccMag.min', 'fBodyAccMag.sma', 'fBodyAccMag.energy',
            'fBodyAccMag.iqr', 'fBodyAccMag.entropy', 'fBodyAccMag.maxInds',
            'fBodyAccMag.meanFreq', 'fBodyAccMag.skewness',
            'fBodyAccMag.kurtosis', 'fBodyBodyAccJerkMag.mean',
            'fBodyBodyAccJerkMag.std', 'fBodyBodyAccJerkMag.mad',
            'fBodyBodyAccJerkMag.max', 'fBodyBodyAccJerkMag.min',
            'fBodyBodyAccJerkMag.sma', 'fBodyBodyAccJerkMag.energy',
            'fBodyBodyAccJerkMag.iqr', 'fBodyBodyAccJerkMag.entropy',
            'fBodyBodyAccJerkMag.maxInds', 'fBodyBodyAccJerkMag.meanFreq',
            'fBodyBodyAccJerkMag.skewness', 'fBodyBodyAccJerkMag.kurtosis',
            'fBodyBodyGyroMag.mean', 'fBodyBodyGyroMag.std',
            'fBodyBodyGyroMag.mad', 'fBodyBodyGyroMag.max',
            'fBodyBodyGyroMag.min', 'fBodyBodyGyroMag.sma',
            'fBodyBodyGyroMag.energy', 'fBodyBodyGyroMag.iqr',
            'fBodyBodyGyroMag.entropy', 'fBodyBodyGyroMag.maxInds',
            'fBodyBodyGyroMag.meanFreq', 'fBodyBodyGyroMag.skewness',
            'fBodyBodyGyroMag.kurtosis', 'fBodyBodyGyroJerkMag.mean',
            'fBodyBodyGyroJerkMag.std', 'fBodyBodyGyroJerkMag.mad',
            'fBodyBodyGyroJerkMag.max', 'fBodyBodyGyroJerkMag.min',
            'fBodyBodyGyroJerkMag.sma', 'fBodyBodyGyroJerkMag.energy',
            'fBodyBodyGyroJerkMag.iqr', 'fBodyBodyGyroJerkMag.entropy',
            'fBodyBodyGyroJerkMag.maxInds', 'fBodyBodyGyroJerkMag.meanFreq',
            'fBodyBodyGyroJerkMag.skewness', 'fBodyBodyGyroJerkMag.kurtosis',
            'angle.tBodyAccMean.gravity', 'angle.tBodyAccJerkMean.gravityMean',
            'angle.tBodyGyroMean.gravityMean',
            'angle.tBodyGyroJerkMean.gravityMean', 'angle.X.gravityMean',
            'angle.Y.gravityMean', 'angle.Z.gravityMean'
        ]
    )

    numeric_processors = Pipeline(
        steps=[
            (
                'robustimputer',
                RobustImputer(strategy='constant', fill_values=nan)
            )
        ]
    )

    column_transformer = ColumnTransformer(
        transformers=[('numeric_processing', numeric_processors, numeric)]
    )

    return Pipeline(
        steps=[
            ('column_transformer', column_transformer
            ), ('robuststandardscaler', RobustStandardScaler())
        ]
    )
def test_robust_standard_dense_with_low_nnz_columns():
    scaler = RobustStandardScaler()
    X_observed = scaler.fit_transform(X_low_nnz)
    np.testing.assert_array_almost_equal(X_observed, X_low_nnz_standardized)