def test_robust_imputer_categorical_custom_function(): robust_imputer = RobustImputer( dtype=np.dtype("O"), strategy="constant", fill_values="not hot dog", mask_function=lambda x: x == "hot dog" ) robust_imputer.fit(X_impute_categorical) X_observed = robust_imputer.transform(X_impute_categorical) assert_array_equal(X_observed, X_imputed_categorical)
def test_robust_imputer(): st_helper = SklearnTestHelper() data = np.array( [[4, 5, np.nan, 7], [0, np.nan, 2, 3], [8, 9, 10, 11], [np.inf, 13, 14, 15]], dtype=np.float32, ) ri = RobustImputer(dtype=None, strategy="constant", fill_values=np.nan, mask_function=None) ri.fit(data) dshape = (relay.Any(), len(data[0])) _test_model_impl(st_helper, ri, dshape, data)
def build_feature_transform(): """ Returns the model definition representing feature processing.""" # These features can be parsed as numeric. numeric = HEADER.as_feature_indices([ 'Account Length', 'VMail Message', 'Day Mins', 'Day Calls', 'Eve Mins', 'Eve Calls', 'Night Mins', 'Night Calls', 'Intl Mins', 'Intl Calls', 'CustServ Calls', 'State_AK', 'State_AL', 'State_AR', 'State_AZ', 'State_CA', 'State_CO', 'State_CT', 'State_DC', 'State_DE', 'State_FL', 'State_GA', 'State_HI', 'State_IA', 'State_ID', 'State_IL', 'State_IN', 'State_KS', 'State_KY', 'State_LA', 'State_MA', 'State_MD', 'State_ME', 'State_MI', 'State_MN', 'State_MO', 'State_MS', 'State_MT', 'State_NC', 'State_ND', 'State_NE', 'State_NH', 'State_NJ', 'State_NM', 'State_NV', 'State_NY', 'State_OH', 'State_OK', 'State_OR', 'State_PA', 'State_RI', 'State_SC', 'State_SD', 'State_TN', 'State_TX', 'State_UT', 'State_VA', 'State_VT', 'State_WA', 'State_WI', 'State_WV', 'State_WY', 'Area Code_408', 'Area Code_415', 'Area Code_510', "Int'l Plan_no", "Int'l Plan_yes", 'VMail Plan_no', 'VMail Plan_yes' ]) numeric_processors = Pipeline(steps=[('robustimputer', RobustImputer())]) column_transformer = ColumnTransformer(transformers=[('numeric_processing', numeric_processors, numeric)]) return Pipeline(steps=[( 'column_transformer', column_transformer), ('robustpca', RobustPCA( n_components=117)), ('robuststandardscaler', RobustStandardScaler())])
def test_pipeline(): st_helper = SklearnTestHelper() pipe = Pipeline([("imputer", RobustImputer()), ("scaler", RobustStandardScaler())]) data = np.array([[0.0, 1.0, 3], [2.0, 2.0, 5]], dtype=np.float32) pipe.fit(data) dshape = (relay.Any(), len(data[0])) _test_model_impl(st_helper, pipe, dshape, data)
def fit(self, y): """Fit the encoder on y. Parameters ---------- y : {array-like}, shape (n_samples,) Input column, where `n_samples` is the number of samples. Returns ------- self : NALabelEncoder """ self.model_ = RobustImputer(strategy="constant", fill_values=np.nan, mask_function=self.mask_function) y = y.reshape(-1, 1) self.model_.fit(X=y) return self
def build_feature_transform(): """ Returns the model definition representing feature processing.""" # These features can be parsed as numeric. numeric = HEADER.as_feature_indices([ 'age', 'duration', 'campaign', 'pdays', 'previous', 'emp.var.rate', 'cons.price.idx', 'cons.conf.idx', 'euribor3m', 'nr.employed' ]) # These features contain a relatively small number of unique items. categorical = HEADER.as_feature_indices([ 'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day_of_week', 'poutcome' ]) numeric_processors = Pipeline(steps=[('robustimputer', RobustImputer())]) categorical_processors = Pipeline(steps=[('thresholdonehotencoder', ThresholdOneHotEncoder( threshold=11))]) column_transformer = ColumnTransformer( transformers=[('numeric_processing', numeric_processors, numeric), ('categorical_processing', categorical_processors, categorical)]) return Pipeline(steps=[( 'column_transformer', column_transformer), ('robustpca', RobustPCA( n_components=98)), ('robuststandardscaler', RobustStandardScaler())])
def build_feature_transform(): """ Returns the model definition representing feature processing.""" # These features can be parsed as numeric. numeric = HEADER.as_feature_indices([ 'Unnamed: 0', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'amt' ]) # These features contain a relatively small number of unique items. categorical = HEADER.as_feature_indices(['amt']) numeric_processors = Pipeline( steps=[('robustimputer', RobustImputer(strategy='constant', fill_values=nan))]) categorical_processors = Pipeline(steps=[('thresholdonehotencoder', ThresholdOneHotEncoder( threshold=635))]) column_transformer = ColumnTransformer( transformers=[('numeric_processing', numeric_processors, numeric), ('categorical_processing', categorical_processors, categorical)]) return Pipeline(steps=[( 'column_transformer', column_transformer), ('robuststandardscaler', RobustStandardScaler())])
def test_automl(): st_helper = SklearnTestHelper() data = np.array( [[4, 5, np.nan, 7], [0, np.nan, 2, 3], [8, 9, 10, 11], [np.nan, 13, 14, 15]], dtype=np.float32, ) pipeline = Pipeline( steps=[("robustimputer", RobustImputer(fill_values=np.nan, strategy="constant"))]) ct = ColumnTransformer(transformers=[("numeric_processing", pipeline, [0, 1, 2, 3])]) ct.fit(data) pipeline = Pipeline(steps=[("column_transformer", ct)]) header = Header(column_names=["x1", "x2", "x3", "class"], target_column_name="class") na = NALabelEncoder() na.fit(data) automl_transformer = AutoMLTransformer(header, pipeline, na) dshape = (relay.Any(), relay.Any()) _test_model_impl(st_helper, automl_transformer, dshape, data, auto_ml=True)
def build_feature_transform(): """ Returns the model definition representing feature processing.""" # These features can be parsed as numeric. numeric = HEADER.as_feature_indices( [ 'Unnamed: 0', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'amt' ] ) numeric_processors = Pipeline( steps=[ ( 'robustimputer', RobustImputer(strategy='constant', fill_values=nan) ) ] ) column_transformer = ColumnTransformer( transformers=[('numeric_processing', numeric_processors, numeric)] ) return Pipeline( steps=[ ('column_transformer', column_transformer ), ('robuststandardscaler', RobustStandardScaler()) ] )
def build_feature_transform(): """ Returns the model definition representing feature processing.""" # These features can be parsed as numeric. numeric = HEADER.as_feature_indices([ 'Account Length', 'VMail Message', 'Day Mins', 'Day Calls', 'Eve Mins', 'Eve Calls', 'Night Mins', 'Night Calls', 'Intl Mins', 'Intl Calls', 'CustServ Calls', 'State_AK', 'State_AL', 'State_AR', 'State_AZ', 'State_CA', 'State_CO', 'State_CT', 'State_DC', 'State_DE', 'State_FL', 'State_GA', 'State_HI', 'State_IA', 'State_ID', 'State_IL', 'State_IN', 'State_KS', 'State_KY', 'State_LA', 'State_MA', 'State_MD', 'State_ME', 'State_MI', 'State_MN', 'State_MO', 'State_MS', 'State_MT', 'State_NC', 'State_ND', 'State_NE', 'State_NH', 'State_NJ', 'State_NM', 'State_NV', 'State_NY', 'State_OH', 'State_OK', 'State_OR', 'State_PA', 'State_RI', 'State_SC', 'State_SD', 'State_TN', 'State_TX', 'State_UT', 'State_VA', 'State_VT', 'State_WA', 'State_WI', 'State_WV', 'State_WY', 'Area Code_408', 'Area Code_415', 'Area Code_510', "Int'l Plan_no", "Int'l Plan_yes", 'VMail Plan_no', 'VMail Plan_yes' ]) # These features contain a relatively small number of unique items. categorical = HEADER.as_feature_indices([ 'Account Length', 'VMail Message', 'Day Calls', 'Eve Calls', 'Night Calls', 'Intl Mins', 'Intl Calls', 'CustServ Calls', 'State_AK', 'State_AL', 'State_AR', 'State_AZ', 'State_CA', 'State_CO', 'State_CT', 'State_DC', 'State_DE', 'State_FL', 'State_GA', 'State_HI', 'State_IA', 'State_ID', 'State_IL', 'State_IN', 'State_KS', 'State_KY', 'State_LA', 'State_MA', 'State_MD', 'State_ME', 'State_MI', 'State_MN', 'State_MO', 'State_MS', 'State_MT', 'State_NC', 'State_ND', 'State_NE', 'State_NH', 'State_NJ', 'State_NM', 'State_NV', 'State_NY', 'State_OH', 'State_OK', 'State_OR', 'State_PA', 'State_RI', 'State_SC', 'State_SD', 'State_TN', 'State_TX', 'State_UT', 'State_VA', 'State_VT', 'State_WA', 'State_WI', 'State_WV', 'State_WY', 'Area Code_408', 'Area Code_415', 'Area Code_510', "Int'l Plan_no", "Int'l Plan_yes", 'VMail Plan_no', 'VMail Plan_yes' ]) numeric_processors = Pipeline( steps=[('robustimputer', RobustImputer(strategy='constant', fill_values=nan))]) categorical_processors = Pipeline(steps=[('thresholdonehotencoder', ThresholdOneHotEncoder( threshold=6))]) column_transformer = ColumnTransformer( transformers=[('numeric_processing', numeric_processors, numeric), ('categorical_processing', categorical_processors, categorical)]) return Pipeline(steps=[( 'column_transformer', column_transformer), ('robuststandardscaler', RobustStandardScaler())])
def build_feature_transform(): """ Returns the model definition representing feature processing.""" # These features can be parsed as numeric. numeric = HEADER.as_feature_indices([ 'Unnamed: 0', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'amt' ]) # These features contain a relatively small number of unique items. categorical = HEADER.as_feature_indices(['amt']) numeric_processors = Pipeline(steps=[( 'featureunion', FeatureUnion([('robust_imputer', RobustImputer() ), ('robust_missing_indicator', RobustMissingIndicator())]) ), ('quantileextremevaluestransformer', QuantileExtremeValuesTransformer())]) categorical_processors = Pipeline(steps=[('thresholdonehotencoder', ThresholdOneHotEncoder( threshold=31))]) column_transformer = ColumnTransformer( transformers=[('numeric_processing', numeric_processors, numeric), ('categorical_processing', categorical_processors, categorical)]) return Pipeline(steps=[( 'column_transformer', column_transformer), ('robustpca', RobustPCA( n_components=147)), ('robuststandardscaler', RobustStandardScaler())])
def test_automl_transformer_regression(): """Tests that rows in a regression dataset where the target column is not a finite numeric are imputed""" data = read_csv_data(source="test/data/csv/regression_na_labels.csv") X = data[:, :3] y = data[:, 3] header = Header(column_names=["x1", "x2", "x3", "class"], target_column_name="class") automl_transformer = AutoMLTransformer( header=header, feature_transformer=RobustImputer(strategy="constant", fill_values=0), target_transformer=NALabelEncoder(), ) model = automl_transformer.fit(X, y) X_transformed = model.transform(X) assert X_transformed.shape == X.shape Xy = np.concatenate((X, y.reshape(-1, 1)), axis=1) Xy_transformed = model.transform(Xy) assert Xy_transformed.shape == (3, 4) assert np.array_equal( Xy_transformed, np.array([[1.1, 1.0, 2.0, 3.0], [2.2, 4.0, 0.0, 5.0], [3.3, 12.0, 13.0, 14.0]]))
def test_robust_imputer_fill_values_dim_error(): with pytest.raises(ValueError, match=fill_values_error_msg): robust_imputer = RobustImputer(strategy="constant", fill_values=np.zeros(5)) robust_imputer.fit(X_impute)
def build_feature_transform(): """ Returns the model definition representing feature processing.""" # These features can be parsed as numeric. numeric = HEADER.as_feature_indices( [ 'tBodyAcc.mean.X', 'tBodyAcc.mean.Y', 'tBodyAcc.mean.Z', 'tBodyAcc.std.X', 'tBodyAcc.std.Y', 'tBodyAcc.std.Z', 'tBodyAcc.mad.X', 'tBodyAcc.mad.Y', 'tBodyAcc.mad.Z', 'tBodyAcc.max.X', 'tBodyAcc.max.Y', 'tBodyAcc.max.Z', 'tBodyAcc.min.X', 'tBodyAcc.min.Y', 'tBodyAcc.min.Z', 'tBodyAcc.sma', 'tBodyAcc.energy.X', 'tBodyAcc.energy.Y', 'tBodyAcc.energy.Z', 'tBodyAcc.iqr.X', 'tBodyAcc.iqr.Y', 'tBodyAcc.iqr.Z', 'tBodyAcc.entropy.X', 'tBodyAcc.entropy.Y', 'tBodyAcc.entropy.Z', 'tBodyAcc.arCoeff.X.1', 'tBodyAcc.arCoeff.X.2', 'tBodyAcc.arCoeff.X.3', 'tBodyAcc.arCoeff.X.4', 'tBodyAcc.arCoeff.Y.1', 'tBodyAcc.arCoeff.Y.2', 'tBodyAcc.arCoeff.Y.3', 'tBodyAcc.arCoeff.Y.4', 'tBodyAcc.arCoeff.Z.1', 'tBodyAcc.arCoeff.Z.2', 'tBodyAcc.arCoeff.Z.3', 'tBodyAcc.arCoeff.Z.4', 'tBodyAcc.correlation.X.Y', 'tBodyAcc.correlation.X.Z', 'tBodyAcc.correlation.Y.Z', 'tGravityAcc.mean.X', 'tGravityAcc.mean.Y', 'tGravityAcc.mean.Z', 'tGravityAcc.std.X', 'tGravityAcc.std.Y', 'tGravityAcc.std.Z', 'tGravityAcc.mad.X', 'tGravityAcc.mad.Y', 'tGravityAcc.mad.Z', 'tGravityAcc.max.X', 'tGravityAcc.max.Y', 'tGravityAcc.max.Z', 'tGravityAcc.min.X', 'tGravityAcc.min.Y', 'tGravityAcc.min.Z', 'tGravityAcc.sma', 'tGravityAcc.energy.X', 'tGravityAcc.energy.Y', 'tGravityAcc.energy.Z', 'tGravityAcc.iqr.X', 'tGravityAcc.iqr.Y', 'tGravityAcc.iqr.Z', 'tGravityAcc.entropy.X', 'tGravityAcc.entropy.Y', 'tGravityAcc.entropy.Z', 'tGravityAcc.arCoeff.X.1', 'tGravityAcc.arCoeff.X.2', 'tGravityAcc.arCoeff.X.3', 'tGravityAcc.arCoeff.X.4', 'tGravityAcc.arCoeff.Y.1', 'tGravityAcc.arCoeff.Y.2', 'tGravityAcc.arCoeff.Y.3', 'tGravityAcc.arCoeff.Y.4', 'tGravityAcc.arCoeff.Z.1', 'tGravityAcc.arCoeff.Z.2', 'tGravityAcc.arCoeff.Z.3', 'tGravityAcc.arCoeff.Z.4', 'tGravityAcc.correlation.X.Y', 'tGravityAcc.correlation.X.Z', 'tGravityAcc.correlation.Y.Z', 'tBodyAccJerk.mean.X', 'tBodyAccJerk.mean.Y', 'tBodyAccJerk.mean.Z', 'tBodyAccJerk.std.X', 'tBodyAccJerk.std.Y', 'tBodyAccJerk.std.Z', 'tBodyAccJerk.mad.X', 'tBodyAccJerk.mad.Y', 'tBodyAccJerk.mad.Z', 'tBodyAccJerk.max.X', 'tBodyAccJerk.max.Y', 'tBodyAccJerk.max.Z', 'tBodyAccJerk.min.X', 'tBodyAccJerk.min.Y', 'tBodyAccJerk.min.Z', 'tBodyAccJerk.sma', 'tBodyAccJerk.energy.X', 'tBodyAccJerk.energy.Y', 'tBodyAccJerk.energy.Z', 'tBodyAccJerk.iqr.X', 'tBodyAccJerk.iqr.Y', 'tBodyAccJerk.iqr.Z', 'tBodyAccJerk.entropy.X', 'tBodyAccJerk.entropy.Y', 'tBodyAccJerk.entropy.Z', 'tBodyAccJerk.arCoeff.X.1', 'tBodyAccJerk.arCoeff.X.2', 'tBodyAccJerk.arCoeff.X.3', 'tBodyAccJerk.arCoeff.X.4', 'tBodyAccJerk.arCoeff.Y.1', 'tBodyAccJerk.arCoeff.Y.2', 'tBodyAccJerk.arCoeff.Y.3', 'tBodyAccJerk.arCoeff.Y.4', 'tBodyAccJerk.arCoeff.Z.1', 'tBodyAccJerk.arCoeff.Z.2', 'tBodyAccJerk.arCoeff.Z.3', 'tBodyAccJerk.arCoeff.Z.4', 'tBodyAccJerk.correlation.X.Y', 'tBodyAccJerk.correlation.X.Z', 'tBodyAccJerk.correlation.Y.Z', 'tBodyGyro.mean.X', 'tBodyGyro.mean.Y', 'tBodyGyro.mean.Z', 'tBodyGyro.std.X', 'tBodyGyro.std.Y', 'tBodyGyro.std.Z', 'tBodyGyro.mad.X', 'tBodyGyro.mad.Y', 'tBodyGyro.mad.Z', 'tBodyGyro.max.X', 'tBodyGyro.max.Y', 'tBodyGyro.max.Z', 'tBodyGyro.min.X', 'tBodyGyro.min.Y', 'tBodyGyro.min.Z', 'tBodyGyro.sma', 'tBodyGyro.energy.X', 'tBodyGyro.energy.Y', 'tBodyGyro.energy.Z', 'tBodyGyro.iqr.X', 'tBodyGyro.iqr.Y', 'tBodyGyro.iqr.Z', 'tBodyGyro.entropy.X', 'tBodyGyro.entropy.Y', 'tBodyGyro.entropy.Z', 'tBodyGyro.arCoeff.X.1', 'tBodyGyro.arCoeff.X.2', 'tBodyGyro.arCoeff.X.3', 'tBodyGyro.arCoeff.X.4', 'tBodyGyro.arCoeff.Y.1', 'tBodyGyro.arCoeff.Y.2', 'tBodyGyro.arCoeff.Y.3', 'tBodyGyro.arCoeff.Y.4', 'tBodyGyro.arCoeff.Z.1', 'tBodyGyro.arCoeff.Z.2', 'tBodyGyro.arCoeff.Z.3', 'tBodyGyro.arCoeff.Z.4', 'tBodyGyro.correlation.X.Y', 'tBodyGyro.correlation.X.Z', 'tBodyGyro.correlation.Y.Z', 'tBodyGyroJerk.mean.X', 'tBodyGyroJerk.mean.Y', 'tBodyGyroJerk.mean.Z', 'tBodyGyroJerk.std.X', 'tBodyGyroJerk.std.Y', 'tBodyGyroJerk.std.Z', 'tBodyGyroJerk.mad.X', 'tBodyGyroJerk.mad.Y', 'tBodyGyroJerk.mad.Z', 'tBodyGyroJerk.max.X', 'tBodyGyroJerk.max.Y', 'tBodyGyroJerk.max.Z', 'tBodyGyroJerk.min.X', 'tBodyGyroJerk.min.Y', 'tBodyGyroJerk.min.Z', 'tBodyGyroJerk.sma', 'tBodyGyroJerk.energy.X', 'tBodyGyroJerk.energy.Y', 'tBodyGyroJerk.energy.Z', 'tBodyGyroJerk.iqr.X', 'tBodyGyroJerk.iqr.Y', 'tBodyGyroJerk.iqr.Z', 'tBodyGyroJerk.entropy.X', 'tBodyGyroJerk.entropy.Y', 'tBodyGyroJerk.entropy.Z', 'tBodyGyroJerk.arCoeff.X.1', 'tBodyGyroJerk.arCoeff.X.2', 'tBodyGyroJerk.arCoeff.X.3', 'tBodyGyroJerk.arCoeff.X.4', 'tBodyGyroJerk.arCoeff.Y.1', 'tBodyGyroJerk.arCoeff.Y.2', 'tBodyGyroJerk.arCoeff.Y.3', 'tBodyGyroJerk.arCoeff.Y.4', 'tBodyGyroJerk.arCoeff.Z.1', 'tBodyGyroJerk.arCoeff.Z.2', 'tBodyGyroJerk.arCoeff.Z.3', 'tBodyGyroJerk.arCoeff.Z.4', 'tBodyGyroJerk.correlation.X.Y', 'tBodyGyroJerk.correlation.X.Z', 'tBodyGyroJerk.correlation.Y.Z', 'tBodyAccMag.mean', 'tBodyAccMag.std', 'tBodyAccMag.mad', 'tBodyAccMag.max', 'tBodyAccMag.min', 'tBodyAccMag.sma', 'tBodyAccMag.energy', 'tBodyAccMag.iqr', 'tBodyAccMag.entropy', 'tBodyAccMag.arCoeff1', 'tBodyAccMag.arCoeff2', 'tBodyAccMag.arCoeff3', 'tBodyAccMag.arCoeff4', 'tGravityAccMag.mean', 'tGravityAccMag.std', 'tGravityAccMag.mad', 'tGravityAccMag.max', 'tGravityAccMag.min', 'tGravityAccMag.sma', 'tGravityAccMag.energy', 'tGravityAccMag.iqr', 'tGravityAccMag.entropy', 'tGravityAccMag.arCoeff1', 'tGravityAccMag.arCoeff2', 'tGravityAccMag.arCoeff3', 'tGravityAccMag.arCoeff4', 'tBodyAccJerkMag.mean', 'tBodyAccJerkMag.std', 'tBodyAccJerkMag.mad', 'tBodyAccJerkMag.max', 'tBodyAccJerkMag.min', 'tBodyAccJerkMag.sma', 'tBodyAccJerkMag.energy', 'tBodyAccJerkMag.iqr', 'tBodyAccJerkMag.entropy', 'tBodyAccJerkMag.arCoeff1', 'tBodyAccJerkMag.arCoeff2', 'tBodyAccJerkMag.arCoeff3', 'tBodyAccJerkMag.arCoeff4', 'tBodyGyroMag.mean', 'tBodyGyroMag.std', 'tBodyGyroMag.mad', 'tBodyGyroMag.max', 'tBodyGyroMag.min', 'tBodyGyroMag.sma', 'tBodyGyroMag.energy', 'tBodyGyroMag.iqr', 'tBodyGyroMag.entropy', 'tBodyGyroMag.arCoeff1', 'tBodyGyroMag.arCoeff2', 'tBodyGyroMag.arCoeff3', 'tBodyGyroMag.arCoeff4', 'tBodyGyroJerkMag.mean', 'tBodyGyroJerkMag.std', 'tBodyGyroJerkMag.mad', 'tBodyGyroJerkMag.max', 'tBodyGyroJerkMag.min', 'tBodyGyroJerkMag.sma', 'tBodyGyroJerkMag.energy', 'tBodyGyroJerkMag.iqr', 'tBodyGyroJerkMag.entropy', 'tBodyGyroJerkMag.arCoeff1', 'tBodyGyroJerkMag.arCoeff2', 'tBodyGyroJerkMag.arCoeff3', 'tBodyGyroJerkMag.arCoeff4', 'fBodyAcc.mean.X', 'fBodyAcc.mean.Y', 'fBodyAcc.mean.Z', 'fBodyAcc.std.X', 'fBodyAcc.std.Y', 'fBodyAcc.std.Z', 'fBodyAcc.mad.X', 'fBodyAcc.mad.Y', 'fBodyAcc.mad.Z', 'fBodyAcc.max.X', 'fBodyAcc.max.Y', 'fBodyAcc.max.Z', 'fBodyAcc.min.X', 'fBodyAcc.min.Y', 'fBodyAcc.min.Z', 'fBodyAcc.sma', 'fBodyAcc.energy.X', 'fBodyAcc.energy.Y', 'fBodyAcc.energy.Z', 'fBodyAcc.iqr.X', 'fBodyAcc.iqr.Y', 'fBodyAcc.iqr.Z', 'fBodyAcc.entropy.X', 'fBodyAcc.entropy.Y', 'fBodyAcc.entropy.Z', 'fBodyAcc.maxInds.X', 'fBodyAcc.maxInds.Y', 'fBodyAcc.maxInds.Z', 'fBodyAcc.meanFreq.X', 'fBodyAcc.meanFreq.Y', 'fBodyAcc.meanFreq.Z', 'fBodyAcc.skewness.X', 'fBodyAcc.kurtosis.X', 'fBodyAcc.skewness.Y', 'fBodyAcc.kurtosis.Y', 'fBodyAcc.skewness.Z', 'fBodyAcc.kurtosis.Z', 'fBodyAcc.bandsEnergy.1.8', 'fBodyAcc.bandsEnergy.9.16', 'fBodyAcc.bandsEnergy.17.24', 'fBodyAcc.bandsEnergy.25.32', 'fBodyAcc.bandsEnergy.33.40', 'fBodyAcc.bandsEnergy.41.48', 'fBodyAcc.bandsEnergy.49.56', 'fBodyAcc.bandsEnergy.57.64', 'fBodyAcc.bandsEnergy.1.16', 'fBodyAcc.bandsEnergy.17.32', 'fBodyAcc.bandsEnergy.33.48', 'fBodyAcc.bandsEnergy.49.64', 'fBodyAcc.bandsEnergy.1.24', 'fBodyAcc.bandsEnergy.25.48', 'fBodyAcc.bandsEnergy.1.8.1', 'fBodyAcc.bandsEnergy.9.16.1', 'fBodyAcc.bandsEnergy.17.24.1', 'fBodyAcc.bandsEnergy.25.32.1', 'fBodyAcc.bandsEnergy.33.40.1', 'fBodyAcc.bandsEnergy.41.48.1', 'fBodyAcc.bandsEnergy.49.56.1', 'fBodyAcc.bandsEnergy.57.64.1', 'fBodyAcc.bandsEnergy.1.16.1', 'fBodyAcc.bandsEnergy.17.32.1', 'fBodyAcc.bandsEnergy.33.48.1', 'fBodyAcc.bandsEnergy.49.64.1', 'fBodyAcc.bandsEnergy.1.24.1', 'fBodyAcc.bandsEnergy.25.48.1', 'fBodyAcc.bandsEnergy.1.8.2', 'fBodyAcc.bandsEnergy.9.16.2', 'fBodyAcc.bandsEnergy.17.24.2', 'fBodyAcc.bandsEnergy.25.32.2', 'fBodyAcc.bandsEnergy.33.40.2', 'fBodyAcc.bandsEnergy.41.48.2', 'fBodyAcc.bandsEnergy.49.56.2', 'fBodyAcc.bandsEnergy.57.64.2', 'fBodyAcc.bandsEnergy.1.16.2', 'fBodyAcc.bandsEnergy.17.32.2', 'fBodyAcc.bandsEnergy.33.48.2', 'fBodyAcc.bandsEnergy.49.64.2', 'fBodyAcc.bandsEnergy.1.24.2', 'fBodyAcc.bandsEnergy.25.48.2', 'fBodyAccJerk.mean.X', 'fBodyAccJerk.mean.Y', 'fBodyAccJerk.mean.Z', 'fBodyAccJerk.std.X', 'fBodyAccJerk.std.Y', 'fBodyAccJerk.std.Z', 'fBodyAccJerk.mad.X', 'fBodyAccJerk.mad.Y', 'fBodyAccJerk.mad.Z', 'fBodyAccJerk.max.X', 'fBodyAccJerk.max.Y', 'fBodyAccJerk.max.Z', 'fBodyAccJerk.min.X', 'fBodyAccJerk.min.Y', 'fBodyAccJerk.min.Z', 'fBodyAccJerk.sma', 'fBodyAccJerk.energy.X', 'fBodyAccJerk.energy.Y', 'fBodyAccJerk.energy.Z', 'fBodyAccJerk.iqr.X', 'fBodyAccJerk.iqr.Y', 'fBodyAccJerk.iqr.Z', 'fBodyAccJerk.entropy.X', 'fBodyAccJerk.entropy.Y', 'fBodyAccJerk.entropy.Z', 'fBodyAccJerk.maxInds.X', 'fBodyAccJerk.maxInds.Y', 'fBodyAccJerk.maxInds.Z', 'fBodyAccJerk.meanFreq.X', 'fBodyAccJerk.meanFreq.Y', 'fBodyAccJerk.meanFreq.Z', 'fBodyAccJerk.skewness.X', 'fBodyAccJerk.kurtosis.X', 'fBodyAccJerk.skewness.Y', 'fBodyAccJerk.kurtosis.Y', 'fBodyAccJerk.skewness.Z', 'fBodyAccJerk.kurtosis.Z', 'fBodyAccJerk.bandsEnergy.1.8', 'fBodyAccJerk.bandsEnergy.9.16', 'fBodyAccJerk.bandsEnergy.17.24', 'fBodyAccJerk.bandsEnergy.25.32', 'fBodyAccJerk.bandsEnergy.33.40', 'fBodyAccJerk.bandsEnergy.41.48', 'fBodyAccJerk.bandsEnergy.49.56', 'fBodyAccJerk.bandsEnergy.57.64', 'fBodyAccJerk.bandsEnergy.1.16', 'fBodyAccJerk.bandsEnergy.17.32', 'fBodyAccJerk.bandsEnergy.33.48', 'fBodyAccJerk.bandsEnergy.49.64', 'fBodyAccJerk.bandsEnergy.1.24', 'fBodyAccJerk.bandsEnergy.25.48', 'fBodyAccJerk.bandsEnergy.1.8.1', 'fBodyAccJerk.bandsEnergy.9.16.1', 'fBodyAccJerk.bandsEnergy.17.24.1', 'fBodyAccJerk.bandsEnergy.25.32.1', 'fBodyAccJerk.bandsEnergy.33.40.1', 'fBodyAccJerk.bandsEnergy.41.48.1', 'fBodyAccJerk.bandsEnergy.49.56.1', 'fBodyAccJerk.bandsEnergy.57.64.1', 'fBodyAccJerk.bandsEnergy.1.16.1', 'fBodyAccJerk.bandsEnergy.17.32.1', 'fBodyAccJerk.bandsEnergy.33.48.1', 'fBodyAccJerk.bandsEnergy.49.64.1', 'fBodyAccJerk.bandsEnergy.1.24.1', 'fBodyAccJerk.bandsEnergy.25.48.1', 'fBodyAccJerk.bandsEnergy.1.8.2', 'fBodyAccJerk.bandsEnergy.9.16.2', 'fBodyAccJerk.bandsEnergy.17.24.2', 'fBodyAccJerk.bandsEnergy.25.32.2', 'fBodyAccJerk.bandsEnergy.33.40.2', 'fBodyAccJerk.bandsEnergy.41.48.2', 'fBodyAccJerk.bandsEnergy.49.56.2', 'fBodyAccJerk.bandsEnergy.57.64.2', 'fBodyAccJerk.bandsEnergy.1.16.2', 'fBodyAccJerk.bandsEnergy.17.32.2', 'fBodyAccJerk.bandsEnergy.33.48.2', 'fBodyAccJerk.bandsEnergy.49.64.2', 'fBodyAccJerk.bandsEnergy.1.24.2', 'fBodyAccJerk.bandsEnergy.25.48.2', 'fBodyGyro.mean.X', 'fBodyGyro.mean.Y', 'fBodyGyro.mean.Z', 'fBodyGyro.std.X', 'fBodyGyro.std.Y', 'fBodyGyro.std.Z', 'fBodyGyro.mad.X', 'fBodyGyro.mad.Y', 'fBodyGyro.mad.Z', 'fBodyGyro.max.X', 'fBodyGyro.max.Y', 'fBodyGyro.max.Z', 'fBodyGyro.min.X', 'fBodyGyro.min.Y', 'fBodyGyro.min.Z', 'fBodyGyro.sma', 'fBodyGyro.energy.X', 'fBodyGyro.energy.Y', 'fBodyGyro.energy.Z', 'fBodyGyro.iqr.X', 'fBodyGyro.iqr.Y', 'fBodyGyro.iqr.Z', 'fBodyGyro.entropy.X', 'fBodyGyro.entropy.Y', 'fBodyGyro.entropy.Z', 'fBodyGyro.maxInds.X', 'fBodyGyro.maxInds.Y', 'fBodyGyro.maxInds.Z', 'fBodyGyro.meanFreq.X', 'fBodyGyro.meanFreq.Y', 'fBodyGyro.meanFreq.Z', 'fBodyGyro.skewness.X', 'fBodyGyro.kurtosis.X', 'fBodyGyro.skewness.Y', 'fBodyGyro.kurtosis.Y', 'fBodyGyro.skewness.Z', 'fBodyGyro.kurtosis.Z', 'fBodyGyro.bandsEnergy.1.8', 'fBodyGyro.bandsEnergy.9.16', 'fBodyGyro.bandsEnergy.17.24', 'fBodyGyro.bandsEnergy.25.32', 'fBodyGyro.bandsEnergy.33.40', 'fBodyGyro.bandsEnergy.41.48', 'fBodyGyro.bandsEnergy.49.56', 'fBodyGyro.bandsEnergy.57.64', 'fBodyGyro.bandsEnergy.1.16', 'fBodyGyro.bandsEnergy.17.32', 'fBodyGyro.bandsEnergy.33.48', 'fBodyGyro.bandsEnergy.49.64', 'fBodyGyro.bandsEnergy.1.24', 'fBodyGyro.bandsEnergy.25.48', 'fBodyGyro.bandsEnergy.1.8.1', 'fBodyGyro.bandsEnergy.9.16.1', 'fBodyGyro.bandsEnergy.17.24.1', 'fBodyGyro.bandsEnergy.25.32.1', 'fBodyGyro.bandsEnergy.33.40.1', 'fBodyGyro.bandsEnergy.41.48.1', 'fBodyGyro.bandsEnergy.49.56.1', 'fBodyGyro.bandsEnergy.57.64.1', 'fBodyGyro.bandsEnergy.1.16.1', 'fBodyGyro.bandsEnergy.17.32.1', 'fBodyGyro.bandsEnergy.33.48.1', 'fBodyGyro.bandsEnergy.49.64.1', 'fBodyGyro.bandsEnergy.1.24.1', 'fBodyGyro.bandsEnergy.25.48.1', 'fBodyGyro.bandsEnergy.1.8.2', 'fBodyGyro.bandsEnergy.9.16.2', 'fBodyGyro.bandsEnergy.17.24.2', 'fBodyGyro.bandsEnergy.25.32.2', 'fBodyGyro.bandsEnergy.33.40.2', 'fBodyGyro.bandsEnergy.41.48.2', 'fBodyGyro.bandsEnergy.49.56.2', 'fBodyGyro.bandsEnergy.57.64.2', 'fBodyGyro.bandsEnergy.1.16.2', 'fBodyGyro.bandsEnergy.17.32.2', 'fBodyGyro.bandsEnergy.33.48.2', 'fBodyGyro.bandsEnergy.49.64.2', 'fBodyGyro.bandsEnergy.1.24.2', 'fBodyGyro.bandsEnergy.25.48.2', 'fBodyAccMag.mean', 'fBodyAccMag.std', 'fBodyAccMag.mad', 'fBodyAccMag.max', 'fBodyAccMag.min', 'fBodyAccMag.sma', 'fBodyAccMag.energy', 'fBodyAccMag.iqr', 'fBodyAccMag.entropy', 'fBodyAccMag.maxInds', 'fBodyAccMag.meanFreq', 'fBodyAccMag.skewness', 'fBodyAccMag.kurtosis', 'fBodyBodyAccJerkMag.mean', 'fBodyBodyAccJerkMag.std', 'fBodyBodyAccJerkMag.mad', 'fBodyBodyAccJerkMag.max', 'fBodyBodyAccJerkMag.min', 'fBodyBodyAccJerkMag.sma', 'fBodyBodyAccJerkMag.energy', 'fBodyBodyAccJerkMag.iqr', 'fBodyBodyAccJerkMag.entropy', 'fBodyBodyAccJerkMag.maxInds', 'fBodyBodyAccJerkMag.meanFreq', 'fBodyBodyAccJerkMag.skewness', 'fBodyBodyAccJerkMag.kurtosis', 'fBodyBodyGyroMag.mean', 'fBodyBodyGyroMag.std', 'fBodyBodyGyroMag.mad', 'fBodyBodyGyroMag.max', 'fBodyBodyGyroMag.min', 'fBodyBodyGyroMag.sma', 'fBodyBodyGyroMag.energy', 'fBodyBodyGyroMag.iqr', 'fBodyBodyGyroMag.entropy', 'fBodyBodyGyroMag.maxInds', 'fBodyBodyGyroMag.meanFreq', 'fBodyBodyGyroMag.skewness', 'fBodyBodyGyroMag.kurtosis', 'fBodyBodyGyroJerkMag.mean', 'fBodyBodyGyroJerkMag.std', 'fBodyBodyGyroJerkMag.mad', 'fBodyBodyGyroJerkMag.max', 'fBodyBodyGyroJerkMag.min', 'fBodyBodyGyroJerkMag.sma', 'fBodyBodyGyroJerkMag.energy', 'fBodyBodyGyroJerkMag.iqr', 'fBodyBodyGyroJerkMag.entropy', 'fBodyBodyGyroJerkMag.maxInds', 'fBodyBodyGyroJerkMag.meanFreq', 'fBodyBodyGyroJerkMag.skewness', 'fBodyBodyGyroJerkMag.kurtosis', 'angle.tBodyAccMean.gravity', 'angle.tBodyAccJerkMean.gravityMean', 'angle.tBodyGyroMean.gravityMean', 'angle.tBodyGyroJerkMean.gravityMean', 'angle.X.gravityMean', 'angle.Y.gravityMean', 'angle.Z.gravityMean' ] ) numeric_processors = Pipeline( steps=[ ( 'robustimputer', RobustImputer(strategy='constant', fill_values=nan) ) ] ) column_transformer = ColumnTransformer( transformers=[('numeric_processing', numeric_processors, numeric)] ) return Pipeline( steps=[ ('column_transformer', column_transformer ), ('robuststandardscaler', RobustStandardScaler()) ] )
def test_robust_imputer_transform_dim_error(): with pytest.raises(ValueError, match=transform_error_msg): robust_imputer = RobustImputer() robust_imputer.fit(X_impute) robust_imputer.transform(np.zeros((3, 4)))
def build_feature_transform(): """ Returns the model definition representing feature processing.""" # These features can be parsed as numeric. numeric = HEADER.as_feature_indices([ 'tBodyAcc.mean.X', 'tBodyAcc.mean.Y', 'tBodyAcc.mean.Z', 'tBodyAcc.std.X', 'tBodyAcc.std.Y', 'tBodyAcc.std.Z', 'tBodyAcc.mad.X', 'tBodyAcc.mad.Y', 'tBodyAcc.mad.Z', 'tBodyAcc.max.X', 'tBodyAcc.max.Y', 'tBodyAcc.max.Z', 'tBodyAcc.min.X', 'tBodyAcc.min.Y', 'tBodyAcc.min.Z', 'tBodyAcc.sma', 'tBodyAcc.energy.X', 'tBodyAcc.energy.Y', 'tBodyAcc.energy.Z', 'tBodyAcc.iqr.X', 'tBodyAcc.iqr.Y', 'tBodyAcc.iqr.Z', 'tBodyAcc.entropy.X', 'tBodyAcc.entropy.Y', 'tBodyAcc.entropy.Z', 'tBodyAcc.arCoeff.X.1', 'tBodyAcc.arCoeff.X.2', 'tBodyAcc.arCoeff.X.3', 'tBodyAcc.arCoeff.X.4', 'tBodyAcc.arCoeff.Y.1', 'tBodyAcc.arCoeff.Y.2', 'tBodyAcc.arCoeff.Y.3', 'tBodyAcc.arCoeff.Y.4', 'tBodyAcc.arCoeff.Z.1', 'tBodyAcc.arCoeff.Z.2', 'tBodyAcc.arCoeff.Z.3', 'tBodyAcc.arCoeff.Z.4', 'tBodyAcc.correlation.X.Y', 'tBodyAcc.correlation.X.Z', 'tBodyAcc.correlation.Y.Z', 'tGravityAcc.mean.X', 'tGravityAcc.mean.Y', 'tGravityAcc.mean.Z', 'tGravityAcc.std.X', 'tGravityAcc.std.Y', 'tGravityAcc.std.Z', 'tGravityAcc.mad.X', 'tGravityAcc.mad.Y', 'tGravityAcc.mad.Z', 'tGravityAcc.max.X', 'tGravityAcc.max.Y', 'tGravityAcc.max.Z', 'tGravityAcc.min.X', 'tGravityAcc.min.Y', 'tGravityAcc.min.Z', 'tGravityAcc.sma', 'tGravityAcc.energy.X', 'tGravityAcc.energy.Y', 'tGravityAcc.energy.Z', 'tGravityAcc.iqr.X', 'tGravityAcc.iqr.Y', 'tGravityAcc.iqr.Z', 'tGravityAcc.entropy.X', 'tGravityAcc.entropy.Y', 'tGravityAcc.entropy.Z', 'tGravityAcc.arCoeff.X.1', 'tGravityAcc.arCoeff.X.2', 'tGravityAcc.arCoeff.X.3', 'tGravityAcc.arCoeff.X.4', 'tGravityAcc.arCoeff.Y.1', 'tGravityAcc.arCoeff.Y.2', 'tGravityAcc.arCoeff.Y.3', 'tGravityAcc.arCoeff.Y.4', 'tGravityAcc.arCoeff.Z.1', 'tGravityAcc.arCoeff.Z.2', 'tGravityAcc.arCoeff.Z.3', 'tGravityAcc.arCoeff.Z.4', 'tGravityAcc.correlation.X.Y', 'tGravityAcc.correlation.X.Z', 'tGravityAcc.correlation.Y.Z', 'tBodyAccJerk.mean.X', 'tBodyAccJerk.mean.Y', 'tBodyAccJerk.mean.Z', 'tBodyAccJerk.std.X', 'tBodyAccJerk.std.Y', 'tBodyAccJerk.std.Z', 'tBodyAccJerk.mad.X', 'tBodyAccJerk.mad.Y', 'tBodyAccJerk.mad.Z', 'tBodyAccJerk.max.X', 'tBodyAccJerk.max.Y', 'tBodyAccJerk.max.Z', 'tBodyAccJerk.min.X', 'tBodyAccJerk.min.Y', 'tBodyAccJerk.min.Z', 'tBodyAccJerk.sma', 'tBodyAccJerk.energy.X', 'tBodyAccJerk.energy.Y', 'tBodyAccJerk.energy.Z', 'tBodyAccJerk.iqr.X', 'tBodyAccJerk.iqr.Y', 'tBodyAccJerk.iqr.Z', 'tBodyAccJerk.entropy.X', 'tBodyAccJerk.entropy.Y', 'tBodyAccJerk.entropy.Z', 'tBodyAccJerk.arCoeff.X.1', 'tBodyAccJerk.arCoeff.X.2', 'tBodyAccJerk.arCoeff.X.3', 'tBodyAccJerk.arCoeff.X.4', 'tBodyAccJerk.arCoeff.Y.1', 'tBodyAccJerk.arCoeff.Y.2', 'tBodyAccJerk.arCoeff.Y.3', 'tBodyAccJerk.arCoeff.Y.4', 'tBodyAccJerk.arCoeff.Z.1', 'tBodyAccJerk.arCoeff.Z.2', 'tBodyAccJerk.arCoeff.Z.3', 'tBodyAccJerk.arCoeff.Z.4', 'tBodyAccJerk.correlation.X.Y', 'tBodyAccJerk.correlation.X.Z', 'tBodyAccJerk.correlation.Y.Z', 'tBodyGyro.mean.X', 'tBodyGyro.mean.Y', 'tBodyGyro.mean.Z', 'tBodyGyro.std.X', 'tBodyGyro.std.Y', 'tBodyGyro.std.Z', 'tBodyGyro.mad.X', 'tBodyGyro.mad.Y', 'tBodyGyro.mad.Z', 'tBodyGyro.max.X', 'tBodyGyro.max.Y', 'tBodyGyro.max.Z', 'tBodyGyro.min.X', 'tBodyGyro.min.Y', 'tBodyGyro.min.Z', 'tBodyGyro.sma', 'tBodyGyro.energy.X', 'tBodyGyro.energy.Y', 'tBodyGyro.energy.Z', 'tBodyGyro.iqr.X', 'tBodyGyro.iqr.Y', 'tBodyGyro.iqr.Z', 'tBodyGyro.entropy.X', 'tBodyGyro.entropy.Y', 'tBodyGyro.entropy.Z', 'tBodyGyro.arCoeff.X.1', 'tBodyGyro.arCoeff.X.2', 'tBodyGyro.arCoeff.X.3', 'tBodyGyro.arCoeff.X.4', 'tBodyGyro.arCoeff.Y.1', 'tBodyGyro.arCoeff.Y.2', 'tBodyGyro.arCoeff.Y.3', 'tBodyGyro.arCoeff.Y.4', 'tBodyGyro.arCoeff.Z.1', 'tBodyGyro.arCoeff.Z.2', 'tBodyGyro.arCoeff.Z.3', 'tBodyGyro.arCoeff.Z.4', 'tBodyGyro.correlation.X.Y', 'tBodyGyro.correlation.X.Z', 'tBodyGyro.correlation.Y.Z', 'tBodyGyroJerk.mean.X', 'tBodyGyroJerk.mean.Y', 'tBodyGyroJerk.mean.Z', 'tBodyGyroJerk.std.X', 'tBodyGyroJerk.std.Y', 'tBodyGyroJerk.std.Z', 'tBodyGyroJerk.mad.X', 'tBodyGyroJerk.mad.Y', 'tBodyGyroJerk.mad.Z', 'tBodyGyroJerk.max.X', 'tBodyGyroJerk.max.Y', 'tBodyGyroJerk.max.Z', 'tBodyGyroJerk.min.X', 'tBodyGyroJerk.min.Y', 'tBodyGyroJerk.min.Z', 'tBodyGyroJerk.sma', 'tBodyGyroJerk.energy.X', 'tBodyGyroJerk.energy.Y', 'tBodyGyroJerk.energy.Z', 'tBodyGyroJerk.iqr.X', 'tBodyGyroJerk.iqr.Y', 'tBodyGyroJerk.iqr.Z', 'tBodyGyroJerk.entropy.X', 'tBodyGyroJerk.entropy.Y', 'tBodyGyroJerk.entropy.Z', 'tBodyGyroJerk.arCoeff.X.1', 'tBodyGyroJerk.arCoeff.X.2', 'tBodyGyroJerk.arCoeff.X.3', 'tBodyGyroJerk.arCoeff.X.4', 'tBodyGyroJerk.arCoeff.Y.1', 'tBodyGyroJerk.arCoeff.Y.2', 'tBodyGyroJerk.arCoeff.Y.3', 'tBodyGyroJerk.arCoeff.Y.4', 'tBodyGyroJerk.arCoeff.Z.1', 'tBodyGyroJerk.arCoeff.Z.2', 'tBodyGyroJerk.arCoeff.Z.3', 'tBodyGyroJerk.arCoeff.Z.4', 'tBodyGyroJerk.correlation.X.Y', 'tBodyGyroJerk.correlation.X.Z', 'tBodyGyroJerk.correlation.Y.Z', 'tBodyAccMag.mean', 'tBodyAccMag.std', 'tBodyAccMag.mad', 'tBodyAccMag.max', 'tBodyAccMag.min', 'tBodyAccMag.sma', 'tBodyAccMag.energy', 'tBodyAccMag.iqr', 'tBodyAccMag.entropy', 'tBodyAccMag.arCoeff1', 'tBodyAccMag.arCoeff2', 'tBodyAccMag.arCoeff3', 'tBodyAccMag.arCoeff4', 'tGravityAccMag.mean', 'tGravityAccMag.std', 'tGravityAccMag.mad', 'tGravityAccMag.max', 'tGravityAccMag.min', 'tGravityAccMag.sma', 'tGravityAccMag.energy', 'tGravityAccMag.iqr', 'tGravityAccMag.entropy', 'tGravityAccMag.arCoeff1', 'tGravityAccMag.arCoeff2', 'tGravityAccMag.arCoeff3', 'tGravityAccMag.arCoeff4', 'tBodyAccJerkMag.mean', 'tBodyAccJerkMag.std', 'tBodyAccJerkMag.mad', 'tBodyAccJerkMag.max', 'tBodyAccJerkMag.min', 'tBodyAccJerkMag.sma', 'tBodyAccJerkMag.energy', 'tBodyAccJerkMag.iqr', 'tBodyAccJerkMag.entropy', 'tBodyAccJerkMag.arCoeff1', 'tBodyAccJerkMag.arCoeff2', 'tBodyAccJerkMag.arCoeff3', 'tBodyAccJerkMag.arCoeff4', 'tBodyGyroMag.mean', 'tBodyGyroMag.std', 'tBodyGyroMag.mad', 'tBodyGyroMag.max', 'tBodyGyroMag.min', 'tBodyGyroMag.sma', 'tBodyGyroMag.energy', 'tBodyGyroMag.iqr', 'tBodyGyroMag.entropy', 'tBodyGyroMag.arCoeff1', 'tBodyGyroMag.arCoeff2', 'tBodyGyroMag.arCoeff3', 'tBodyGyroMag.arCoeff4', 'tBodyGyroJerkMag.mean', 'tBodyGyroJerkMag.std', 'tBodyGyroJerkMag.mad', 'tBodyGyroJerkMag.max', 'tBodyGyroJerkMag.min', 'tBodyGyroJerkMag.sma', 'tBodyGyroJerkMag.energy', 'tBodyGyroJerkMag.iqr', 'tBodyGyroJerkMag.entropy', 'tBodyGyroJerkMag.arCoeff1', 'tBodyGyroJerkMag.arCoeff2', 'tBodyGyroJerkMag.arCoeff3', 'tBodyGyroJerkMag.arCoeff4', 'fBodyAcc.mean.X', 'fBodyAcc.mean.Y', 'fBodyAcc.mean.Z', 'fBodyAcc.std.X', 'fBodyAcc.std.Y', 'fBodyAcc.std.Z', 'fBodyAcc.mad.X', 'fBodyAcc.mad.Y', 'fBodyAcc.mad.Z', 'fBodyAcc.max.X', 'fBodyAcc.max.Y', 'fBodyAcc.max.Z', 'fBodyAcc.min.X', 'fBodyAcc.min.Y', 'fBodyAcc.min.Z', 'fBodyAcc.sma', 'fBodyAcc.energy.X', 'fBodyAcc.energy.Y', 'fBodyAcc.energy.Z', 'fBodyAcc.iqr.X', 'fBodyAcc.iqr.Y', 'fBodyAcc.iqr.Z', 'fBodyAcc.entropy.X', 'fBodyAcc.entropy.Y', 'fBodyAcc.entropy.Z', 'fBodyAcc.maxInds.X', 'fBodyAcc.maxInds.Y', 'fBodyAcc.maxInds.Z', 'fBodyAcc.meanFreq.X', 'fBodyAcc.meanFreq.Y', 'fBodyAcc.meanFreq.Z', 'fBodyAcc.skewness.X', 'fBodyAcc.kurtosis.X', 'fBodyAcc.skewness.Y', 'fBodyAcc.kurtosis.Y', 'fBodyAcc.skewness.Z', 'fBodyAcc.kurtosis.Z', 'fBodyAcc.bandsEnergy.1.8', 'fBodyAcc.bandsEnergy.9.16', 'fBodyAcc.bandsEnergy.17.24', 'fBodyAcc.bandsEnergy.25.32', 'fBodyAcc.bandsEnergy.33.40', 'fBodyAcc.bandsEnergy.41.48', 'fBodyAcc.bandsEnergy.49.56', 'fBodyAcc.bandsEnergy.57.64', 'fBodyAcc.bandsEnergy.1.16', 'fBodyAcc.bandsEnergy.17.32', 'fBodyAcc.bandsEnergy.33.48', 'fBodyAcc.bandsEnergy.49.64', 'fBodyAcc.bandsEnergy.1.24', 'fBodyAcc.bandsEnergy.25.48', 'fBodyAcc.bandsEnergy.1.8.1', 'fBodyAcc.bandsEnergy.9.16.1', 'fBodyAcc.bandsEnergy.17.24.1', 'fBodyAcc.bandsEnergy.25.32.1', 'fBodyAcc.bandsEnergy.33.40.1', 'fBodyAcc.bandsEnergy.41.48.1', 'fBodyAcc.bandsEnergy.49.56.1', 'fBodyAcc.bandsEnergy.57.64.1', 'fBodyAcc.bandsEnergy.1.16.1', 'fBodyAcc.bandsEnergy.17.32.1', 'fBodyAcc.bandsEnergy.33.48.1', 'fBodyAcc.bandsEnergy.49.64.1', 'fBodyAcc.bandsEnergy.1.24.1', 'fBodyAcc.bandsEnergy.25.48.1', 'fBodyAcc.bandsEnergy.1.8.2', 'fBodyAcc.bandsEnergy.9.16.2', 'fBodyAcc.bandsEnergy.17.24.2', 'fBodyAcc.bandsEnergy.25.32.2', 'fBodyAcc.bandsEnergy.33.40.2', 'fBodyAcc.bandsEnergy.41.48.2', 'fBodyAcc.bandsEnergy.49.56.2', 'fBodyAcc.bandsEnergy.57.64.2', 'fBodyAcc.bandsEnergy.1.16.2', 'fBodyAcc.bandsEnergy.17.32.2', 'fBodyAcc.bandsEnergy.33.48.2', 'fBodyAcc.bandsEnergy.49.64.2', 'fBodyAcc.bandsEnergy.1.24.2', 'fBodyAcc.bandsEnergy.25.48.2', 'fBodyAccJerk.mean.X', 'fBodyAccJerk.mean.Y', 'fBodyAccJerk.mean.Z', 'fBodyAccJerk.std.X', 'fBodyAccJerk.std.Y', 'fBodyAccJerk.std.Z', 'fBodyAccJerk.mad.X', 'fBodyAccJerk.mad.Y', 'fBodyAccJerk.mad.Z', 'fBodyAccJerk.max.X', 'fBodyAccJerk.max.Y', 'fBodyAccJerk.max.Z', 'fBodyAccJerk.min.X', 'fBodyAccJerk.min.Y', 'fBodyAccJerk.min.Z', 'fBodyAccJerk.sma', 'fBodyAccJerk.energy.X', 'fBodyAccJerk.energy.Y', 'fBodyAccJerk.energy.Z', 'fBodyAccJerk.iqr.X', 'fBodyAccJerk.iqr.Y', 'fBodyAccJerk.iqr.Z', 'fBodyAccJerk.entropy.X', 'fBodyAccJerk.entropy.Y', 'fBodyAccJerk.entropy.Z', 'fBodyAccJerk.maxInds.X', 'fBodyAccJerk.maxInds.Y', 'fBodyAccJerk.maxInds.Z', 'fBodyAccJerk.meanFreq.X', 'fBodyAccJerk.meanFreq.Y', 'fBodyAccJerk.meanFreq.Z', 'fBodyAccJerk.skewness.X', 'fBodyAccJerk.kurtosis.X', 'fBodyAccJerk.skewness.Y', 'fBodyAccJerk.kurtosis.Y', 'fBodyAccJerk.skewness.Z', 'fBodyAccJerk.kurtosis.Z', 'fBodyAccJerk.bandsEnergy.1.8', 'fBodyAccJerk.bandsEnergy.9.16', 'fBodyAccJerk.bandsEnergy.17.24', 'fBodyAccJerk.bandsEnergy.25.32', 'fBodyAccJerk.bandsEnergy.33.40', 'fBodyAccJerk.bandsEnergy.41.48', 'fBodyAccJerk.bandsEnergy.49.56', 'fBodyAccJerk.bandsEnergy.57.64', 'fBodyAccJerk.bandsEnergy.1.16', 'fBodyAccJerk.bandsEnergy.17.32', 'fBodyAccJerk.bandsEnergy.33.48', 'fBodyAccJerk.bandsEnergy.49.64', 'fBodyAccJerk.bandsEnergy.1.24', 'fBodyAccJerk.bandsEnergy.25.48', 'fBodyAccJerk.bandsEnergy.1.8.1', 'fBodyAccJerk.bandsEnergy.9.16.1', 'fBodyAccJerk.bandsEnergy.17.24.1', 'fBodyAccJerk.bandsEnergy.25.32.1', 'fBodyAccJerk.bandsEnergy.33.40.1', 'fBodyAccJerk.bandsEnergy.41.48.1', 'fBodyAccJerk.bandsEnergy.49.56.1', 'fBodyAccJerk.bandsEnergy.57.64.1', 'fBodyAccJerk.bandsEnergy.1.16.1', 'fBodyAccJerk.bandsEnergy.17.32.1', 'fBodyAccJerk.bandsEnergy.33.48.1', 'fBodyAccJerk.bandsEnergy.49.64.1', 'fBodyAccJerk.bandsEnergy.1.24.1', 'fBodyAccJerk.bandsEnergy.25.48.1', 'fBodyAccJerk.bandsEnergy.1.8.2', 'fBodyAccJerk.bandsEnergy.9.16.2', 'fBodyAccJerk.bandsEnergy.17.24.2', 'fBodyAccJerk.bandsEnergy.25.32.2', 'fBodyAccJerk.bandsEnergy.33.40.2', 'fBodyAccJerk.bandsEnergy.41.48.2', 'fBodyAccJerk.bandsEnergy.49.56.2', 'fBodyAccJerk.bandsEnergy.57.64.2', 'fBodyAccJerk.bandsEnergy.1.16.2', 'fBodyAccJerk.bandsEnergy.17.32.2', 'fBodyAccJerk.bandsEnergy.33.48.2', 'fBodyAccJerk.bandsEnergy.49.64.2', 'fBodyAccJerk.bandsEnergy.1.24.2', 'fBodyAccJerk.bandsEnergy.25.48.2', 'fBodyGyro.mean.X', 'fBodyGyro.mean.Y', 'fBodyGyro.mean.Z', 'fBodyGyro.std.X', 'fBodyGyro.std.Y', 'fBodyGyro.std.Z', 'fBodyGyro.mad.X', 'fBodyGyro.mad.Y', 'fBodyGyro.mad.Z', 'fBodyGyro.max.X', 'fBodyGyro.max.Y', 'fBodyGyro.max.Z', 'fBodyGyro.min.X', 'fBodyGyro.min.Y', 'fBodyGyro.min.Z', 'fBodyGyro.sma', 'fBodyGyro.energy.X', 'fBodyGyro.energy.Y', 'fBodyGyro.energy.Z', 'fBodyGyro.iqr.X', 'fBodyGyro.iqr.Y', 'fBodyGyro.iqr.Z', 'fBodyGyro.entropy.X', 'fBodyGyro.entropy.Y', 'fBodyGyro.entropy.Z', 'fBodyGyro.maxInds.X', 'fBodyGyro.maxInds.Y', 'fBodyGyro.maxInds.Z', 'fBodyGyro.meanFreq.X', 'fBodyGyro.meanFreq.Y', 'fBodyGyro.meanFreq.Z', 'fBodyGyro.skewness.X', 'fBodyGyro.kurtosis.X', 'fBodyGyro.skewness.Y', 'fBodyGyro.kurtosis.Y', 'fBodyGyro.skewness.Z', 'fBodyGyro.kurtosis.Z', 'fBodyGyro.bandsEnergy.1.8', 'fBodyGyro.bandsEnergy.9.16', 'fBodyGyro.bandsEnergy.17.24', 'fBodyGyro.bandsEnergy.25.32', 'fBodyGyro.bandsEnergy.33.40', 'fBodyGyro.bandsEnergy.41.48', 'fBodyGyro.bandsEnergy.49.56', 'fBodyGyro.bandsEnergy.57.64', 'fBodyGyro.bandsEnergy.1.16', 'fBodyGyro.bandsEnergy.17.32', 'fBodyGyro.bandsEnergy.33.48', 'fBodyGyro.bandsEnergy.49.64', 'fBodyGyro.bandsEnergy.1.24', 'fBodyGyro.bandsEnergy.25.48', 'fBodyGyro.bandsEnergy.1.8.1', 'fBodyGyro.bandsEnergy.9.16.1', 'fBodyGyro.bandsEnergy.17.24.1', 'fBodyGyro.bandsEnergy.25.32.1', 'fBodyGyro.bandsEnergy.33.40.1', 'fBodyGyro.bandsEnergy.41.48.1', 'fBodyGyro.bandsEnergy.49.56.1', 'fBodyGyro.bandsEnergy.57.64.1', 'fBodyGyro.bandsEnergy.1.16.1', 'fBodyGyro.bandsEnergy.17.32.1', 'fBodyGyro.bandsEnergy.33.48.1', 'fBodyGyro.bandsEnergy.49.64.1', 'fBodyGyro.bandsEnergy.1.24.1', 'fBodyGyro.bandsEnergy.25.48.1', 'fBodyGyro.bandsEnergy.1.8.2', 'fBodyGyro.bandsEnergy.9.16.2', 'fBodyGyro.bandsEnergy.17.24.2', 'fBodyGyro.bandsEnergy.25.32.2', 'fBodyGyro.bandsEnergy.33.40.2', 'fBodyGyro.bandsEnergy.41.48.2', 'fBodyGyro.bandsEnergy.49.56.2', 'fBodyGyro.bandsEnergy.57.64.2', 'fBodyGyro.bandsEnergy.1.16.2', 'fBodyGyro.bandsEnergy.17.32.2', 'fBodyGyro.bandsEnergy.33.48.2', 'fBodyGyro.bandsEnergy.49.64.2', 'fBodyGyro.bandsEnergy.1.24.2', 'fBodyGyro.bandsEnergy.25.48.2', 'fBodyAccMag.mean', 'fBodyAccMag.std', 'fBodyAccMag.mad', 'fBodyAccMag.max', 'fBodyAccMag.min', 'fBodyAccMag.sma', 'fBodyAccMag.energy', 'fBodyAccMag.iqr', 'fBodyAccMag.entropy', 'fBodyAccMag.maxInds', 'fBodyAccMag.meanFreq', 'fBodyAccMag.skewness', 'fBodyAccMag.kurtosis', 'fBodyBodyAccJerkMag.mean', 'fBodyBodyAccJerkMag.std', 'fBodyBodyAccJerkMag.mad', 'fBodyBodyAccJerkMag.max', 'fBodyBodyAccJerkMag.min', 'fBodyBodyAccJerkMag.sma', 'fBodyBodyAccJerkMag.energy', 'fBodyBodyAccJerkMag.iqr', 'fBodyBodyAccJerkMag.entropy', 'fBodyBodyAccJerkMag.maxInds', 'fBodyBodyAccJerkMag.meanFreq', 'fBodyBodyAccJerkMag.skewness', 'fBodyBodyAccJerkMag.kurtosis', 'fBodyBodyGyroMag.mean', 'fBodyBodyGyroMag.std', 'fBodyBodyGyroMag.mad', 'fBodyBodyGyroMag.max', 'fBodyBodyGyroMag.min', 'fBodyBodyGyroMag.sma', 'fBodyBodyGyroMag.energy', 'fBodyBodyGyroMag.iqr', 'fBodyBodyGyroMag.entropy', 'fBodyBodyGyroMag.maxInds', 'fBodyBodyGyroMag.meanFreq', 'fBodyBodyGyroMag.skewness', 'fBodyBodyGyroMag.kurtosis', 'fBodyBodyGyroJerkMag.mean', 'fBodyBodyGyroJerkMag.std', 'fBodyBodyGyroJerkMag.mad', 'fBodyBodyGyroJerkMag.max', 'fBodyBodyGyroJerkMag.min', 'fBodyBodyGyroJerkMag.sma', 'fBodyBodyGyroJerkMag.energy', 'fBodyBodyGyroJerkMag.iqr', 'fBodyBodyGyroJerkMag.entropy', 'fBodyBodyGyroJerkMag.maxInds', 'fBodyBodyGyroJerkMag.meanFreq', 'fBodyBodyGyroJerkMag.skewness', 'fBodyBodyGyroJerkMag.kurtosis', 'angle.tBodyAccMean.gravity', 'angle.tBodyAccJerkMean.gravityMean', 'angle.tBodyGyroMean.gravityMean', 'angle.tBodyGyroJerkMean.gravityMean', 'angle.X.gravityMean', 'angle.Y.gravityMean', 'angle.Z.gravityMean' ]) # These features contain a relatively small number of unique items. categorical = HEADER.as_feature_indices([ 'tBodyAcc.mean.X', 'tBodyAcc.energy.Y', 'tBodyAcc.energy.Z', 'tGravityAcc.std.X', 'tGravityAcc.std.Y', 'tGravityAcc.std.Z', 'tGravityAcc.mad.X', 'tGravityAcc.mad.Y', 'tGravityAcc.mad.Z', 'tGravityAcc.iqr.X', 'tGravityAcc.iqr.Y', 'tGravityAcc.iqr.Z', 'tGravityAcc.entropy.Y', 'tBodyAccJerk.energy.Z', 'tBodyGyro.energy.X', 'tBodyGyro.energy.Y', 'tBodyGyro.energy.Z', 'tBodyGyroJerk.energy.X', 'tBodyGyroJerk.energy.Y', 'tBodyGyroJerk.energy.Z', 'tBodyAccMag.min', 'tGravityAccMag.min', 'tBodyAccJerkMag.energy', 'tBodyGyroJerkMag.energy', 'fBodyAcc.min.Y', 'fBodyAcc.min.Z', 'fBodyAcc.maxInds.X', 'fBodyAcc.maxInds.Y', 'fBodyAcc.maxInds.Z', 'fBodyAcc.bandsEnergy.9.16', 'fBodyAcc.bandsEnergy.25.32', 'fBodyAcc.bandsEnergy.33.40', 'fBodyAcc.bandsEnergy.41.48', 'fBodyAcc.bandsEnergy.49.56', 'fBodyAcc.bandsEnergy.57.64', 'fBodyAcc.bandsEnergy.33.48', 'fBodyAcc.bandsEnergy.49.64', 'fBodyAcc.bandsEnergy.25.48', 'fBodyAcc.bandsEnergy.25.32.1', 'fBodyAcc.bandsEnergy.33.40.1', 'fBodyAcc.bandsEnergy.41.48.1', 'fBodyAcc.bandsEnergy.49.56.1', 'fBodyAcc.bandsEnergy.57.64.1', 'fBodyAcc.bandsEnergy.33.48.1', 'fBodyAcc.bandsEnergy.49.64.1', 'fBodyAcc.bandsEnergy.25.48.1', 'fBodyAcc.bandsEnergy.9.16.2', 'fBodyAcc.bandsEnergy.17.24.2', 'fBodyAcc.bandsEnergy.25.32.2', 'fBodyAcc.bandsEnergy.33.40.2', 'fBodyAcc.bandsEnergy.41.48.2', 'fBodyAcc.bandsEnergy.49.56.2', 'fBodyAcc.bandsEnergy.57.64.2', 'fBodyAcc.bandsEnergy.1.16.2', 'fBodyAcc.bandsEnergy.17.32.2', 'fBodyAcc.bandsEnergy.33.48.2', 'fBodyAcc.bandsEnergy.49.64.2', 'fBodyAcc.bandsEnergy.25.48.2', 'fBodyAccJerk.min.X', 'fBodyAccJerk.min.Z', 'fBodyAccJerk.energy.Z', 'fBodyAccJerk.maxInds.X', 'fBodyAccJerk.maxInds.Y', 'fBodyAccJerk.maxInds.Z', 'fBodyAccJerk.kurtosis.Y', 'fBodyAccJerk.kurtosis.Z', 'fBodyAccJerk.bandsEnergy.1.8', 'fBodyAccJerk.bandsEnergy.9.16', 'fBodyAccJerk.bandsEnergy.17.24', 'fBodyAccJerk.bandsEnergy.25.32', 'fBodyAccJerk.bandsEnergy.33.40', 'fBodyAccJerk.bandsEnergy.41.48', 'fBodyAccJerk.bandsEnergy.49.56', 'fBodyAccJerk.bandsEnergy.57.64', 'fBodyAccJerk.bandsEnergy.1.16', 'fBodyAccJerk.bandsEnergy.33.48', 'fBodyAccJerk.bandsEnergy.49.64', 'fBodyAccJerk.bandsEnergy.9.16.1', 'fBodyAccJerk.bandsEnergy.25.32.1', 'fBodyAccJerk.bandsEnergy.33.40.1', 'fBodyAccJerk.bandsEnergy.41.48.1', 'fBodyAccJerk.bandsEnergy.49.56.1', 'fBodyAccJerk.bandsEnergy.57.64.1', 'fBodyAccJerk.bandsEnergy.33.48.1', 'fBodyAccJerk.bandsEnergy.49.64.1', 'fBodyAccJerk.bandsEnergy.25.48.1', 'fBodyAccJerk.bandsEnergy.1.8.2', 'fBodyAccJerk.bandsEnergy.9.16.2', 'fBodyAccJerk.bandsEnergy.17.24.2', 'fBodyAccJerk.bandsEnergy.25.32.2', 'fBodyAccJerk.bandsEnergy.33.40.2', 'fBodyAccJerk.bandsEnergy.41.48.2', 'fBodyAccJerk.bandsEnergy.49.56.2', 'fBodyAccJerk.bandsEnergy.57.64.2', 'fBodyAccJerk.bandsEnergy.1.16.2', 'fBodyAccJerk.bandsEnergy.17.32.2', 'fBodyAccJerk.bandsEnergy.33.48.2', 'fBodyAccJerk.bandsEnergy.49.64.2', 'fBodyAccJerk.bandsEnergy.1.24.2', 'fBodyAccJerk.bandsEnergy.25.48.2', 'fBodyGyro.min.X', 'fBodyGyro.min.Y', 'fBodyGyro.min.Z', 'fBodyGyro.energy.X', 'fBodyGyro.energy.Y', 'fBodyGyro.energy.Z', 'fBodyGyro.maxInds.X', 'fBodyGyro.maxInds.Y', 'fBodyGyro.maxInds.Z', 'fBodyGyro.bandsEnergy.1.8', 'fBodyGyro.bandsEnergy.9.16', 'fBodyGyro.bandsEnergy.17.24', 'fBodyGyro.bandsEnergy.25.32', 'fBodyGyro.bandsEnergy.33.40', 'fBodyGyro.bandsEnergy.41.48', 'fBodyGyro.bandsEnergy.49.56', 'fBodyGyro.bandsEnergy.57.64', 'fBodyGyro.bandsEnergy.1.16', 'fBodyGyro.bandsEnergy.17.32', 'fBodyGyro.bandsEnergy.33.48', 'fBodyGyro.bandsEnergy.49.64', 'fBodyGyro.bandsEnergy.1.24', 'fBodyGyro.bandsEnergy.25.48', 'fBodyGyro.bandsEnergy.1.8.1', 'fBodyGyro.bandsEnergy.9.16.1', 'fBodyGyro.bandsEnergy.17.24.1', 'fBodyGyro.bandsEnergy.25.32.1', 'fBodyGyro.bandsEnergy.33.40.1', 'fBodyGyro.bandsEnergy.41.48.1', 'fBodyGyro.bandsEnergy.49.56.1', 'fBodyGyro.bandsEnergy.57.64.1', 'fBodyGyro.bandsEnergy.1.16.1', 'fBodyGyro.bandsEnergy.17.32.1', 'fBodyGyro.bandsEnergy.33.48.1', 'fBodyGyro.bandsEnergy.49.64.1', 'fBodyGyro.bandsEnergy.1.24.1', 'fBodyGyro.bandsEnergy.25.48.1', 'fBodyGyro.bandsEnergy.1.8.2', 'fBodyGyro.bandsEnergy.9.16.2', 'fBodyGyro.bandsEnergy.17.24.2', 'fBodyGyro.bandsEnergy.25.32.2', 'fBodyGyro.bandsEnergy.33.40.2', 'fBodyGyro.bandsEnergy.41.48.2', 'fBodyGyro.bandsEnergy.49.56.2', 'fBodyGyro.bandsEnergy.57.64.2', 'fBodyGyro.bandsEnergy.1.16.2', 'fBodyGyro.bandsEnergy.17.32.2', 'fBodyGyro.bandsEnergy.33.48.2', 'fBodyGyro.bandsEnergy.49.64.2', 'fBodyGyro.bandsEnergy.1.24.2', 'fBodyGyro.bandsEnergy.25.48.2', 'fBodyAccMag.min', 'fBodyAccMag.maxInds', 'fBodyBodyAccJerkMag.maxInds', 'fBodyBodyGyroMag.min', 'fBodyBodyGyroMag.energy', 'fBodyBodyGyroMag.maxInds', 'fBodyBodyGyroJerkMag.min', 'fBodyBodyGyroJerkMag.energy', 'fBodyBodyGyroJerkMag.maxInds' ]) numeric_processors = Pipeline(steps=[('robustimputer', RobustImputer())]) categorical_processors = Pipeline(steps=[('thresholdonehotencoder', ThresholdOneHotEncoder( threshold=7))]) column_transformer = ColumnTransformer( transformers=[('numeric_processing', numeric_processors, numeric), ('categorical_processing', categorical_processors, categorical)]) return Pipeline(steps=[( 'column_transformer', column_transformer), ('robustpca', RobustPCA( n_components=171)), ('robuststandardscaler', RobustStandardScaler())])
def test_robust_imputer(X, X_expected, strategy, fill_values): robust_imputer = RobustImputer(strategy=strategy, fill_values=fill_values) robust_imputer.fit(X) X_observed = robust_imputer.transform(X) assert_array_equal(X_observed, X_expected)
class NALabelEncoder(BaseEstimator, TransformerMixin): """Encoder for transforming labels to NA values. Uses `RobustImputer` on 1D inputs of labels - Uses `is_finite_numeric` mask for encoding by default - Only uses the `RobustImputer` strategy `constant` and fills using `np.nan` - Default behavior encodes non-float and non-finite values as nan values in the target column of a given regression dataset Parameters ---------- mask_function : callable -> np.array, dtype('bool') (default=None) A vectorized python function, accepts np.array, returns np.array with dtype('bool') For each value, if mask_function(val) == False, that value will be imputed. mask_function is used to create a boolean mask that determines which values in the input to impute. Use np.vectorize to vectorize singular python functions. """ def __init__(self, mask_function=None): self.mask_function = mask_function def fit(self, y): """Fit the encoder on y. Parameters ---------- y : {array-like}, shape (n_samples,) Input column, where `n_samples` is the number of samples. Returns ------- self : NALabelEncoder """ self.model_ = RobustImputer(strategy="constant", fill_values=np.nan, mask_function=self.mask_function) y = y.reshape(-1, 1) self.model_.fit(X=y) return self def transform(self, y): """Encode all non-float and non-finite values in y as NA values. Parameters ---------- y : {array-like}, shape (n_samples) The input column to encode. Returns ------- yt : {ndarray}, shape (n_samples,) The encoded input column. """ check_is_fitted(self, "model_") y = y.reshape(-1, 1) return self.model_.transform(y).flatten() def inverse_transform(self, y): """Returns input column""" return y def _more_tags(self): return {"X_types": ["1dlabels"]}
from sagemaker_sklearn_extension.impute import RobustMissingIndicator from sagemaker_sklearn_extension.preprocessing import LogExtremeValuesTransformer from sagemaker_sklearn_extension.preprocessing import NALabelEncoder from sagemaker_sklearn_extension.preprocessing import QuadraticFeatures from sagemaker_sklearn_extension.preprocessing import QuantileExtremeValuesTransformer from sagemaker_sklearn_extension.preprocessing import RemoveConstantColumnsTransformer from sagemaker_sklearn_extension.preprocessing import RobustLabelEncoder from sagemaker_sklearn_extension.preprocessing import RobustStandardScaler from sagemaker_sklearn_extension.preprocessing import ThresholdOneHotEncoder @pytest.mark.parametrize( "Estimator", [ DateTimeVectorizer(), LogExtremeValuesTransformer(), MultiColumnTfidfVectorizer(), NALabelEncoder(), QuadraticFeatures(), QuantileExtremeValuesTransformer(), RobustImputer(), RemoveConstantColumnsTransformer(), RobustLabelEncoder(), RobustMissingIndicator(), RobustStandardScaler(), ThresholdOneHotEncoder(), ], ) def test_all_estimators(Estimator): return check_estimator(Estimator)