Python CategoricalGrouper 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: src.estimators

메소드/함수: CategoricalGrouper

hotexamples.com에서의 예제들: 5

Python CategoricalGrouper - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 src.estimators.CategoricalGrouper에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

 def __init__(self):
     self.catPreprocessor = make_pipeline(e.CategoricalGrouper(),
                                          e.CategoricalEncoder())
     self.catPCA = make_pipeline(e.CategoricalGrouper(),
                                 e.CategoricalEncoder(),
                                 PCA(n_components=6, random_state=0))
     self.contPreprocessor = make_pipeline(
         QuantileTransformer(output_distribution='normal', random_state=0),
         StandardScaler())
     self.contPCA = make_pipeline(
         QuantileTransformer(output_distribution='normal', random_state=0),
         StandardScaler(), PCA(n_components=6, random_state=0))

예제 #2

파일 보기

파일: 08_ranking_raw.py 프로젝트: fabiobaccarin/allstate-loss

LOGGER.info('Load data')
df = pd.read_pickle(p.joinpath('data', 'interim', 'research.pkl'))
X = df.drop(labels='loss', axis=1)
y = df['loss'].copy()

LOGGER.info('Process target')
y = pd.Series(data=power_transform(y.values.reshape(-1, 1)).flatten(), name='loss', index=y.index)

LOGGER.info('Load categorical features to drop')
noVarFeatures = json.load(open(file=p.joinpath('src', 'meta', 'NoVariance.json'), mode='r'))

LOGGER.info('Process categorical features')
catf = pd.DataFrame(
    data=make_pipeline(
        e.CategoricalGrouper(),
        e.CategoricalEncoder()
    ).fit_transform(X.filter(like='cat').drop(labels=noVarFeatures, axis=1), y),
    columns=X.filter(like='cat').drop(labels=noVarFeatures, axis=1).columns,
    index=X.index
)

LOGGER.info('Process continuous features')
contf = pd.DataFrame(
    data=scale(quantile_transform(
        X=X.filter(like='cont'),
        output_distribution='normal',
        random_state=0
    )),
    columns=X.filter(like='cont').columns,
    index=X.index

예제 #3

파일 보기

파일: 07_pca.py 프로젝트: fabiobaccarin/allstate-loss

LOGGER.info('Load correlated features')
CORRELATED = json.load(
    open(file=p.joinpath('src', 'meta', 'Correlated.json'), mode='r'))

LOGGER.info('Load data')
df = pd.read_pickle(p.joinpath('data', 'interim', 'research.pkl'))
X = df.filter(CORRELATED)
y = df['loss'].copy()

LOGGER.info('Process target')
y = pd.Series(data=power_transform(y.values.reshape(-1, 1)).flatten(),
              name='loss',
              index=y.index)

LOGGER.info('Process categorical features')
catf = pd.DataFrame(data=make_pipeline(e.CategoricalGrouper(),
                                       e.CategoricalEncoder()).fit_transform(
                                           X.filter(like='cat'), y),
                    columns=X.filter(like='cat').columns,
                    index=X.index)

LOGGER.info('Process continuous features')
contf = pd.DataFrame(data=scale(
    quantile_transform(X=X.filter(like='cont'),
                       output_distribution='normal',
                       random_state=0)),
                     columns=X.filter(like='cont').columns,
                     index=X.index)

LOGGER.info(r'Figure 1: Correlations above 75%')
X = catf.join(contf)

예제 #4

파일 보기

df = pd.read_pickle(p.joinpath('data', 'interim', 'research.pkl'))
X = df.drop(labels='loss', axis=1)
y = df['loss'].copy()

LOGGER.info('Process target')
y = pd.Series(data=power_transform(y.values.reshape(-1, 1)).flatten(),
              name='loss',
              index=y.index)

LOGGER.info('Load categorical features to drop')
noVarFeatures = json.load(
    open(file=p.joinpath('src', 'meta', 'NoVariance.json'), mode='r'))

LOGGER.info('Process categorical features')
catf = pd.DataFrame(data=make_pipeline(
    e.CategoricalGrouper(), e.CategoricalEncoder()).fit_transform(
        X.filter(like='cat').drop(labels=noVarFeatures, axis=1), y),
                    columns=X.filter(like='cat').drop(labels=noVarFeatures,
                                                      axis=1).columns,
                    index=X.index)

LOGGER.info('Process continuous features')
contf = pd.DataFrame(data=scale(
    quantile_transform(X=X.filter(like='cont'),
                       output_distribution='normal',
                       random_state=0)),
                     columns=X.filter(like='cont').columns,
                     index=X.index)

LOGGER.info('Find correlations')
corr = catf.join(contf).corr()

예제 #5

파일 보기

def make_pipeline(model: Estimator) -> Pipeline:
    return Pipeline([('grouper', e.CategoricalGrouper()),
                     ('encoder', e.CategoricalEncoder()), ('clf', model)])