Exemple #1
0
        train = data.drop(target, axis=1)
        cv = StratifiedKFold(5)
        scorer = rev_weighted_quad_kappa
        predictors = [
            LabelsClipper(regressor=LinearRegression()),
        ]
        binner = CustomBinaryBinner({col: {'bins': 7} for col in continuous})
        # BINNER_CONFIG = [{ col: {'bins': 3} for col in continuous },
        #     { col: {'bins': 5} for col in continuous },
        #     { col: {'bins': 7} for col in continuous },
        #     { col: {'values': [train[col].max()]} for col in continuous }]
        BOX_COX = BOX_COX_P
        precision = 4

    elif dataset == 'boston':
        data, labels, continuous, discrete, dummy, categorical, target, missing = get_boston(
            missing=MISSING)
        train = data.drop(target, axis=1)
        cv = KFold(5, shuffle=True, random_state=0)
        scorer = rmse
        predictors = [
            LinearRegression(),
            # SVR(),
            # RandomForestRegressor(),
            # DecisionTreeRegressor(),
            # KNeighborsRegressor(n_neighbors=5),
        ]
        binner = CustomBinner(
            {col: {
                'bins': 7
            }
             for col in continuous + discrete})
Exemple #2
0
def get_test_config_boston(missing=True):
    data, labels, continuous, discrete, dummy, categorical, target, missing = get_boston(
        test=False, missing=missing)
    test_data, test_labels = get_boston(test=True, missing=missing)[0:2]
    test = test_data.drop(target, axis=1)
    scorer = rmse

    model = Pipeline([('clipper', None), ('binner', None), ('binner2', None),
                      ('simple_imputer', None), ('zero_filler', ZeroFiller()),
                      ('main_imputer', None),
                      ('dropper', FeatureDropper(drop=[])), ('poly', None),
                      ('combinations', None), ('boxcox', None),
                      ('scaler', None), ('reduce_dim', None),
                      ('predictor', None)])

    params = {
        #     0
        'XGBRegressor_best': {
            'params': {
                'binner2':
                None,
                'boxcox':
                BoxCoxTransformer(lambdas_per_column={
                    'age': 2,
                    'tax': 0,
                    'lstat': 0
                }),
                'clipper':
                OutliersClipper(columns=[
                    'crim', 'zn', 'nox', 'indus', 'rm', 'age', 'tax',
                    'ptratio', 'b', 'lstat', 'dis'
                ]),
                'combinations':
                None,
                'dropper__drop': [],
                'main_imputer':
                ModelBasedFullImputer(
                    columns=[
                        'crim', 'zn', 'nox', 'indus', 'rm', 'age', 'tax',
                        'ptratio', 'b', 'dis'
                    ],
                    model=DecisionTreeRegressor(max_depth=None)),
                'poly':
                PolynomialsAdder(
                    powers_per_column={
                        'crim': [2, 3],
                        'zn': [2, 3],
                        'nox': [2, 3],
                        'indus': [2, 3],
                        'rm': [2, 3],
                        'age': [2, 3],
                        'tax': [2, 3],
                        'ptratio': [2, 3],
                        'b': [2, 3],
                        'lstat': [2, 3],
                        'dis': [2, 3]
                    }),
                'predictor':
                XGBRegressor(learning_rate=0.05, max_depth=6,
                             n_estimators=500),
                'reduce_dim':
                None,
                'scaler':
                None,
                'simple_imputer':
                FillNaTransformer(from_dict={},
                                  mean=[],
                                  median=[
                                      'crim', 'zn', 'nox', 'indus', 'rm',
                                      'age', 'tax', 'ptratio', 'b', 'dis'
                                  ],
                                  nan_flag=[
                                      'crim', 'zn', 'nox', 'indus', 'rm',
                                      'age', 'tax', 'ptratio', 'b', 'dis'
                                  ],
                                  zero=[])
            },
            'score': 3.7821358047127682,
            'std': 0.4967512627490983
        },

        # 0
        'Lasso_best': {
            'params': {
                'binner2':
                None,
                'boxcox':
                BoxCoxTransformer(lambdas_per_column={
                    'age': 2,
                    'tax': 0,
                    'lstat': 0
                }),
                'clipper':
                OutliersClipper(columns=[
                    'crim', 'zn', 'nox', 'indus', 'rm', 'age', 'tax',
                    'ptratio', 'b', 'lstat', 'dis'
                ]),
                'combinations':
                FeatureProduct(columns=[
                    'crim', 'zn', 'nox', 'indus', 'rm', 'age', 'tax',
                    'ptratio', 'b', 'lstat', 'dis'
                ]),
                'dropper__drop': [
                    'crim_nan', 'zn_nan', 'nox_nan', 'indus_nan', 'rm_nan',
                    'age_nan', 'tax_nan', 'ptratio_nan', 'b_nan', 'dis_nan'
                ],
                'main_imputer':
                ModelBasedFullImputer(
                    columns=[
                        'crim', 'zn', 'nox', 'indus', 'rm', 'age', 'tax',
                        'ptratio', 'b', 'dis'
                    ],
                    model=DecisionTreeRegressor(max_depth=None)),
                'poly':
                None,
                'predictor':
                Lasso(alpha=0.01),
                'reduce_dim':
                None,
                'scaler':
                RobustScaler(),
                'simple_imputer':
                FillNaTransformer(from_dict={},
                                  mean=[],
                                  median=[
                                      'crim', 'zn', 'nox', 'indus', 'rm',
                                      'age', 'tax', 'ptratio', 'b', 'dis'
                                  ],
                                  nan_flag=[
                                      'crim', 'zn', 'nox', 'indus', 'rm',
                                      'age', 'tax', 'ptratio', 'b', 'dis'
                                  ],
                                  zero=[])
            },
            'score': 3.993797473454735,
            'std': 0.5808956921355953
        },

        #     0
        'LinearRegression_best': {
            'params': {
                'binner2':
                None,
                'boxcox':
                BoxCoxTransformer(lambdas_per_column={
                    'age': 2,
                    'tax': 0,
                    'lstat': 0
                }),
                'clipper':
                OutliersClipper(columns=[
                    'crim', 'zn', 'nox', 'indus', 'rm', 'age', 'tax',
                    'ptratio', 'b', 'lstat', 'dis'
                ]),
                'combinations':
                None,
                'dropper__drop': [
                    'crim_nan', 'zn_nan', 'nox_nan', 'indus_nan', 'rm_nan',
                    'age_nan', 'tax_nan', 'ptratio_nan', 'b_nan', 'dis_nan'
                ],
                'main_imputer':
                ModelBasedFullImputer(
                    columns=[
                        'crim', 'zn', 'nox', 'indus', 'rm', 'age', 'tax',
                        'ptratio', 'b', 'dis'
                    ],
                    model=DecisionTreeRegressor(max_depth=8)),
                'poly':
                PolynomialsAdder(
                    powers_per_column={
                        'crim': [2, 3],
                        'zn': [2, 3],
                        'nox': [2, 3],
                        'indus': [2, 3],
                        'rm': [2, 3],
                        'age': [2, 3],
                        'tax': [2, 3],
                        'ptratio': [2, 3],
                        'b': [2, 3],
                        'lstat': [2, 3],
                        'dis': [2, 3]
                    }),
                'predictor':
                LinearRegression(),
                'reduce_dim':
                None,
                'scaler':
                None,
                'simple_imputer':
                FillNaTransformer(from_dict={},
                                  mean=[],
                                  median=[
                                      'crim', 'zn', 'nox', 'indus', 'rm',
                                      'age', 'tax', 'ptratio', 'b', 'dis'
                                  ],
                                  nan_flag=[
                                      'crim', 'zn', 'nox', 'indus', 'rm',
                                      'age', 'tax', 'ptratio', 'b', 'dis'
                                  ],
                                  zero=[])
            },
            'score': 4.514645815970899,
            'std': 0.7631593234069367
        },
        'DecisionTreeRegressor_base': {
            'params': {
                'predictor':
                DecisionTreeRegressor(criterion='mse',
                                      max_depth=4,
                                      max_features=None,
                                      max_leaf_nodes=None,
                                      min_impurity_decrease=0.0,
                                      min_impurity_split=None,
                                      min_samples_leaf=1,
                                      min_samples_split=2,
                                      min_weight_fraction_leaf=0.0,
                                      presort=False,
                                      random_state=None,
                                      splitter='best'),
                'scaler':
                None,
                'simple_imputer':
                FillNaTransformer(from_dict={},
                                  mean=[],
                                  median=[],
                                  nan_flag=[],
                                  zero=[
                                      'crim', 'zn', 'nox', 'indus', 'rm',
                                      'age', 'tax', 'ptratio', 'b', 'dis'
                                  ])
            },
            'score': 5.5088106991425985,
            'std': 0.293662905734789
        },
        'KNeighborsRegressor_base': {
            'params': {
                'predictor':
                KNeighborsRegressor(algorithm='auto',
                                    leaf_size=30,
                                    metric='minkowski',
                                    metric_params=None,
                                    n_jobs=1,
                                    n_neighbors=7,
                                    p=2,
                                    weights='uniform'),
                'scaler':
                RobustScaler(copy=True,
                             quantile_range=(25.0, 75.0),
                             with_centering=True,
                             with_scaling=True),
                'simple_imputer':
                FillNaTransformer(from_dict={},
                                  mean=[
                                      'crim', 'zn', 'nox', 'indus', 'rm',
                                      'age', 'tax', 'ptratio', 'b', 'dis'
                                  ],
                                  median=[],
                                  nan_flag=[],
                                  zero=[])
            },
            'score': 5.859771905373064,
            'std': 0.90721907618626
        },
        'LinearRegression_base': {
            'params': {
                'predictor':
                LinearRegression(copy_X=True,
                                 fit_intercept=True,
                                 n_jobs=1,
                                 normalize=False),
                'scaler':
                RobustScaler(copy=True,
                             quantile_range=(25.0, 75.0),
                             with_centering=True,
                             with_scaling=True),
                'simple_imputer':
                FillNaTransformer(from_dict={},
                                  mean=[],
                                  median=[
                                      'crim', 'zn', 'nox', 'indus', 'rm',
                                      'age', 'tax', 'ptratio', 'b', 'dis'
                                  ],
                                  nan_flag=[],
                                  zero=[])
            },
            'score': 5.494688479501426,
            'std': 0.5377531716144219
        },
        'Lasso_base': {
            'params': {
                'predictor':
                Lasso(alpha=0.01),
                'scaler':
                RobustScaler(copy=True,
                             quantile_range=(25.0, 75.0),
                             with_centering=True,
                             with_scaling=True),
                'simple_imputer':
                FillNaTransformer(from_dict={},
                                  mean=[],
                                  median=[
                                      'crim', 'zn', 'nox', 'indus', 'rm',
                                      'age', 'tax', 'ptratio', 'b', 'dis'
                                  ],
                                  nan_flag=[],
                                  zero=[])
            },
            'score': 0,
            'std': 0
        },

        #     'XGBRegressor_base': {'params': {
        #         'predictor': XGBRegressor(max_depth=4),
        #         'scaler': None,
        #         'simple_imputer': FillNaTransformer(from_dict={},
        #                 mean=['crim', 'zn', 'nox', 'indus', 'rm', 'age', 'tax', 'ptratio', 'b', 'dis'],
        #                 median=[], nan_flag=[], zero=[])},
        #   'score': 4.088217989236429,
        #   'std': 0.5303490714753816,
        # },
        'XGBRegressor_tuned_base': {
            'params': {
                'predictor':
                XGBRegressor(learning_rate=0.05, max_depth=6,
                             n_estimators=500),
                'scaler':
                None,
                'simple_imputer':
                FillNaTransformer(from_dict={},
                                  mean=[
                                      'crim', 'zn', 'nox', 'indus', 'rm',
                                      'age', 'tax', 'ptratio', 'b', 'dis'
                                  ],
                                  median=[],
                                  nan_flag=[],
                                  zero=[])
            },
            'score': 3.942697483564859,
            'std': 0.6029251513214098
        },
        'DecisionTreeRegressor_best': {
            'params': {
                'binner2':
                None,
                'boxcox':
                BoxCoxTransformer(lambdas_per_column={
                    'age': 2,
                    'tax': 0,
                    'lstat': 0
                }),
                'clipper':
                OutliersClipper(columns=[
                    'crim', 'zn', 'nox', 'indus', 'rm', 'age', 'tax',
                    'ptratio', 'b', 'lstat', 'dis'
                ]),
                'combinations':
                None,
                'dropper__drop': [],
                'main_imputer':
                ModelBasedFullImputer(columns=[
                    'crim', 'zn', 'nox', 'indus', 'rm', 'age', 'tax',
                    'ptratio', 'b', 'dis'
                ],
                                      model=DecisionTreeRegressor(
                                          criterion='mse',
                                          max_depth=None,
                                          max_features=None,
                                          max_leaf_nodes=None,
                                          min_impurity_decrease=0.0,
                                          min_impurity_split=None,
                                          min_samples_leaf=1,
                                          min_samples_split=2,
                                          min_weight_fraction_leaf=0.0,
                                          presort=False,
                                          random_state=None,
                                          splitter='best')),
                'poly':
                None,
                'predictor':
                DecisionTreeRegressor(criterion='mse',
                                      max_depth=8,
                                      max_features=None,
                                      max_leaf_nodes=None,
                                      min_impurity_decrease=0.0,
                                      min_impurity_split=None,
                                      min_samples_leaf=1,
                                      min_samples_split=2,
                                      min_weight_fraction_leaf=0.0,
                                      presort=False,
                                      random_state=None,
                                      splitter='best'),
                'reduce_dim':
                None,
                'scaler':
                None,
                'simple_imputer':
                FillNaTransformer(from_dict={},
                                  mean=[],
                                  median=[
                                      'crim', 'zn', 'nox', 'indus', 'rm',
                                      'age', 'tax', 'ptratio', 'b', 'dis'
                                  ],
                                  nan_flag=[
                                      'crim', 'zn', 'nox', 'indus', 'rm',
                                      'age', 'tax', 'ptratio', 'b', 'dis'
                                  ],
                                  zero=[])
            },
            'score': 4.840076015850126,
            'std': 0.718619669114889
        },
    }

    return data, test, test_labels, scorer, model, params, target