Python CategoricalGrouper Examples

Programming Language: Python

Namespace/Package Name: src.estimators

Method/Function: CategoricalGrouper

Examples at hotexamples.com: 5

Python CategoricalGrouper - 5 examples found. These are the top rated real world Python examples of src.estimators.CategoricalGrouper extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

 def __init__(self):
     self.catPreprocessor = make_pipeline(e.CategoricalGrouper(),
                                          e.CategoricalEncoder())
     self.catPCA = make_pipeline(e.CategoricalGrouper(),
                                 e.CategoricalEncoder(),
                                 PCA(n_components=6, random_state=0))
     self.contPreprocessor = make_pipeline(
         QuantileTransformer(output_distribution='normal', random_state=0),
         StandardScaler())
     self.contPCA = make_pipeline(
         QuantileTransformer(output_distribution='normal', random_state=0),
         StandardScaler(), PCA(n_components=6, random_state=0))

Example #2

Show file

File: 08_ranking_raw.py Project: fabiobaccarin/allstate-loss

LOGGER.info('Load data')
df = pd.read_pickle(p.joinpath('data', 'interim', 'research.pkl'))
X = df.drop(labels='loss', axis=1)
y = df['loss'].copy()

LOGGER.info('Process target')
y = pd.Series(data=power_transform(y.values.reshape(-1, 1)).flatten(), name='loss', index=y.index)

LOGGER.info('Load categorical features to drop')
noVarFeatures = json.load(open(file=p.joinpath('src', 'meta', 'NoVariance.json'), mode='r'))

LOGGER.info('Process categorical features')
catf = pd.DataFrame(
    data=make_pipeline(
        e.CategoricalGrouper(),
        e.CategoricalEncoder()
    ).fit_transform(X.filter(like='cat').drop(labels=noVarFeatures, axis=1), y),
    columns=X.filter(like='cat').drop(labels=noVarFeatures, axis=1).columns,
    index=X.index
)

LOGGER.info('Process continuous features')
contf = pd.DataFrame(
    data=scale(quantile_transform(
        X=X.filter(like='cont'),
        output_distribution='normal',
        random_state=0
    )),
    columns=X.filter(like='cont').columns,
    index=X.index

Example #3

Show file

File: 07_pca.py Project: fabiobaccarin/allstate-loss

LOGGER.info('Load correlated features')
CORRELATED = json.load(
    open(file=p.joinpath('src', 'meta', 'Correlated.json'), mode='r'))

LOGGER.info('Load data')
df = pd.read_pickle(p.joinpath('data', 'interim', 'research.pkl'))
X = df.filter(CORRELATED)
y = df['loss'].copy()

LOGGER.info('Process target')
y = pd.Series(data=power_transform(y.values.reshape(-1, 1)).flatten(),
              name='loss',
              index=y.index)

LOGGER.info('Process categorical features')
catf = pd.DataFrame(data=make_pipeline(e.CategoricalGrouper(),
                                       e.CategoricalEncoder()).fit_transform(
                                           X.filter(like='cat'), y),
                    columns=X.filter(like='cat').columns,
                    index=X.index)

LOGGER.info('Process continuous features')
contf = pd.DataFrame(data=scale(
    quantile_transform(X=X.filter(like='cont'),
                       output_distribution='normal',
                       random_state=0)),
                     columns=X.filter(like='cont').columns,
                     index=X.index)

LOGGER.info(r'Figure 1: Correlations above 75%')
X = catf.join(contf)

Example #4

Show file

df = pd.read_pickle(p.joinpath('data', 'interim', 'research.pkl'))
X = df.drop(labels='loss', axis=1)
y = df['loss'].copy()

LOGGER.info('Process target')
y = pd.Series(data=power_transform(y.values.reshape(-1, 1)).flatten(),
              name='loss',
              index=y.index)

LOGGER.info('Load categorical features to drop')
noVarFeatures = json.load(
    open(file=p.joinpath('src', 'meta', 'NoVariance.json'), mode='r'))

LOGGER.info('Process categorical features')
catf = pd.DataFrame(data=make_pipeline(
    e.CategoricalGrouper(), e.CategoricalEncoder()).fit_transform(
        X.filter(like='cat').drop(labels=noVarFeatures, axis=1), y),
                    columns=X.filter(like='cat').drop(labels=noVarFeatures,
                                                      axis=1).columns,
                    index=X.index)

LOGGER.info('Process continuous features')
contf = pd.DataFrame(data=scale(
    quantile_transform(X=X.filter(like='cont'),
                       output_distribution='normal',
                       random_state=0)),
                     columns=X.filter(like='cont').columns,
                     index=X.index)

LOGGER.info('Find correlations')
corr = catf.join(contf).corr()

Example #5

Show file

def make_pipeline(model: Estimator) -> Pipeline:
    return Pipeline([('grouper', e.CategoricalGrouper()),
                     ('encoder', e.CategoricalEncoder()), ('clf', model)])