def test_column_transformer_get_set_params_with_remainder():
    ct = ColumnTransformer([('trans1', StandardScaler(), [0])],
                           remainder=StandardScaler())

    exp = {'n_jobs': 1,
           'remainder': ct.remainder,
           'remainder__copy': True,
           'remainder__with_mean': True,
           'remainder__with_std': True,
           'trans1': ct.transformers[0][1],
           'trans1__copy': True,
           'trans1__with_mean': True,
           'trans1__with_std': True,
           'transformers': ct.transformers,
           'transformer_weights': None}

    assert ct.get_params() == exp

    ct.set_params(remainder__with_std=False)
    assert not ct.get_params()['remainder__with_std']

    ct.set_params(trans1='passthrough')
    exp = {'n_jobs': 1,
           'remainder': ct.remainder,
           'remainder__copy': True,
           'remainder__with_mean': True,
           'remainder__with_std': False,
           'trans1': 'passthrough',
           'transformers': ct.transformers,
           'transformer_weights': None}

    assert ct.get_params() == exp
예제 #2
0
def test_column_transformer_get_set_params_with_remainder():
    ct = ColumnTransformer([('trans1', StandardScaler(), [0])],
                           remainder=StandardScaler())

    exp = {'n_jobs': None,
           'remainder': ct.remainder,
           'remainder__copy': True,
           'remainder__with_mean': True,
           'remainder__with_std': True,
           'sparse_threshold': 0.3,
           'trans1': ct.transformers[0][1],
           'trans1__copy': True,
           'trans1__with_mean': True,
           'trans1__with_std': True,
           'transformers': ct.transformers,
           'transformer_weights': None}

    assert ct.get_params() == exp

    ct.set_params(remainder__with_std=False)
    assert not ct.get_params()['remainder__with_std']

    ct.set_params(trans1='passthrough')
    exp = {'n_jobs': None,
           'remainder': ct.remainder,
           'remainder__copy': True,
           'remainder__with_mean': True,
           'remainder__with_std': False,
           'sparse_threshold': 0.3,
           'trans1': 'passthrough',
           'transformers': ct.transformers,
           'transformer_weights': None}

    assert ct.get_params() == exp
def test_column_transformer_get_set_params():
    ct = ColumnTransformer([('trans1', StandardScaler(), [0]),
                            ('trans2', StandardScaler(), [1])])

    exp = {'n_jobs': 1,
           'remainder': 'drop',
           'trans1': ct.transformers[0][1],
           'trans1__copy': True,
           'trans1__with_mean': True,
           'trans1__with_std': True,
           'trans2': ct.transformers[1][1],
           'trans2__copy': True,
           'trans2__with_mean': True,
           'trans2__with_std': True,
           'transformers': ct.transformers,
           'transformer_weights': None}

    assert_dict_equal(ct.get_params(), exp)

    ct.set_params(trans1__with_mean=False)
    assert_false(ct.get_params()['trans1__with_mean'])

    ct.set_params(trans1='passthrough')
    exp = {'n_jobs': 1,
           'remainder': 'drop',
           'trans1': 'passthrough',
           'trans2': ct.transformers[1][1],
           'trans2__copy': True,
           'trans2__with_mean': True,
           'trans2__with_std': True,
           'transformers': ct.transformers,
           'transformer_weights': None}

    assert_dict_equal(ct.get_params(), exp)
def test_column_transformer_get_set_params():
    ct = ColumnTransformer([('trans1', StandardScaler(), [0]),
                            ('trans2', StandardScaler(), [1])])

    exp = {'n_jobs': 1,
           'remainder': 'passthrough',
           'trans1': ct.transformers[0][1],
           'trans1__copy': True,
           'trans1__with_mean': True,
           'trans1__with_std': True,
           'trans2': ct.transformers[1][1],
           'trans2__copy': True,
           'trans2__with_mean': True,
           'trans2__with_std': True,
           'transformers': ct.transformers,
           'transformer_weights': None}

    assert_dict_equal(ct.get_params(), exp)

    ct.set_params(trans1__with_mean=False)
    assert_false(ct.get_params()['trans1__with_mean'])

    ct.set_params(trans1='passthrough')
    exp = {'n_jobs': 1,
           'remainder': 'passthrough',
           'trans1': 'passthrough',
           'trans2': ct.transformers[1][1],
           'trans2__copy': True,
           'trans2__with_mean': True,
           'trans2__with_std': True,
           'transformers': ct.transformers,
           'transformer_weights': None}

    assert_dict_equal(ct.get_params(), exp)
def test_column_transformer_get_set_params():
    ct = ColumnTransformer([('trans1', StandardScaler(), [0]),
                            ('trans2', StandardScaler(), [1])])

    exp = {
        'n_jobs': None,
        'remainder': 'drop',
        'sparse_threshold': 0.3,
        'trans1': ct.transformers[0][1],
        'trans1__copy': True,
        'trans1__with_mean': True,
        'trans1__with_std': True,
        'trans2': ct.transformers[1][1],
        'trans2__copy': True,
        'trans2__with_mean': True,
        'trans2__with_std': True,
        'transformers': ct.transformers,
        'transformer_weights': None,
        'verbose': False
    }

    assert ct.get_params() == exp

    ct.set_params(trans1__with_mean=False)
    assert not ct.get_params()['trans1__with_mean']

    ct.set_params(trans1='passthrough')
    exp = {
        'n_jobs': None,
        'remainder': 'drop',
        'sparse_threshold': 0.3,
        'trans1': 'passthrough',
        'trans2': ct.transformers[1][1],
        'trans2__copy': True,
        'trans2__with_mean': True,
        'trans2__with_std': True,
        'transformers': ct.transformers,
        'transformer_weights': None,
        'verbose': False
    }

    assert ct.get_params() == exp
class DFColumnTransformer(BaseEstimator, TransformerMixin):
    """ColumnTransformer that returns a DF.
    
    Parameters
    ----------
    transformers : list of tuples
        List of tuples in the form: (name, transformer, columns).
    remainder : str
        Strategy for the features that were not selected. Possible values are
        ['drop', 'passthrough'].
    
    Returns: DataFrame
        DF of transformed data.
    """
    def __init__(self, transformers, remainder='drop'):
        self.transformers = transformers
        self.ct = ColumnTransformer(self.transformers)
        self.remainder = remainder

    def fit(self, X, y=None):
        if self.remainder == 'drop':
            self.ct.fit(X, y)

        elif self.remainder == 'passthrough':
            transformed_features = np.concatenate(
                [tpl[-1] for tpl in self.transformers])
            passed_features = list(
                set(X.columns).difference(transformed_features))
            self.transformers.append(
                ('passthrough', PassThrough(), passed_features))
            self.ct.set_params(transformers=self.transformers).fit(X, y)

        return self

    def transform(self, X, y=None):
        col_names = np.concatenate([tple[-1] for tple in self.ct.transformers])
        return pd.DataFrame(data=self.ct.transform(X),
                            index=X.index,
                            columns=col_names)
예제 #7
0
# categorical_features = [3, 4, 7, 8]
categorical_imputer = impute.SimpleImputer(strategy="most_frequent")
categorical_encoder = preprocessing.OneHotEncoder(categories=categories, dtype = np.float64, handle_unknown = "error", sparse=False)
categorical_transformer = Pipeline(steps=[
    ('imp', categorical_imputer),
    ('enc', categorical_encoder)
])

start = time.time()
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

preprocessor.set_params(num__imp__strategy = 'median')

for i in range(10):
    sklearn.base.clone(preprocessor).fit(x_pipe, y_pipe)

end = time.time()
duration = end - start
print("scale", duration)
# .4 Of a second to scale

preprocessor.fit_transform(x_pipe, y_pipe)
x_pipe = preprocessor.transform(x_pipe)

#Selector	LVT_threshold	Reducer	PCA_iterated_power	
#LVT,	    0,	            PCA,	auto,