Beispiel #1
0
def numeric_pipeline_complex(impute_strategy=None, seq_no=0):
    if impute_strategy is None:
        impute_strategy = Choice(
            ['mean', 'median', 'constant', 'most_frequent'])
    elif isinstance(impute_strategy, list):
        impute_strategy = Choice(impute_strategy)
    # reduce_skewness_kurtosis = SkewnessKurtosisTransformer(transform_fn=Choice([np.log, np.log10, np.log1p]))
    # reduce_skewness_kurtosis_optional = Optional(reduce_skewness_kurtosis, keep_link=True,
    #                                             name=f'numeric_reduce_skewness_kurtosis_optional_{seq_no}')

    imputer = SimpleImputer(missing_values=np.nan,
                            strategy=impute_strategy,
                            name=f'numeric_imputer_{seq_no}')
    scaler_options = ModuleChoice([
        StandardScaler(name=f'numeric_standard_scaler_{seq_no}'),
        MinMaxScaler(name=f'numeric_minmax_scaler_{seq_no}'),
        MaxAbsScaler(name=f'numeric_maxabs_scaler_{seq_no}'),
        RobustScaler(name=f'numeric_robust_scaler_{seq_no}')
    ],
                                  name=f'numeric_or_scaler_{seq_no}')
    scaler_optional = Optional(scaler_options,
                               keep_link=True,
                               name=f'numeric_scaler_optional_{seq_no}')

    pipeline = Pipeline([imputer, scaler_optional],
                        name=f'numeric_pipeline_complex_{seq_no}',
                        columns=column_number_exclude_timedelta)
    return pipeline
Beispiel #2
0
def categorical_pipeline_complex(impute_strategy=None,
                                 svd_components=3,
                                 seq_no=0):
    if impute_strategy is None:
        impute_strategy = Choice(['constant', 'most_frequent'])
    elif isinstance(impute_strategy, list):
        impute_strategy = Choice(impute_strategy)
    if isinstance(svd_components, list):
        svd_components = Choice(svd_components)

    def onehot_svd():
        onehot = OneHotEncoder(name=f'categorical_onehot_{seq_no}')
        optional_svd = Optional(TruncatedSVD(n_components=svd_components,
                                             name=f'categorical_svd_{seq_no}'),
                                name=f'categorical_optional_svd_{seq_no}',
                                keep_link=True)(onehot)
        return optional_svd

    imputer = SimpleImputer(missing_values=np.nan,
                            strategy=impute_strategy,
                            name=f'categorical_imputer_{seq_no}')
    label_encoder = MultiLabelEncoder(
        name=f'categorical_label_encoder_{seq_no}')
    onehot = onehot_svd()
    le_or_onehot_pca = ModuleChoice(
        [label_encoder, onehot], name=f'categorical_le_or_onehot_pca_{seq_no}')
    pipeline = Pipeline([imputer, le_or_onehot_pca],
                        name=f'categorical_pipeline_complex_{seq_no}',
                        columns=column_object_category_bool)
    return pipeline
def conv_cell(hp_dict,
              type,
              cell_no,
              node_no,
              left_or_right,
              inputs,
              filters,
              is_reduction=False,
              data_format=None):
    assert isinstance(inputs, list)
    assert all([isinstance(m, ModuleSpace) for m in inputs])
    name_prefix = f'{type}_C{cell_no}_N{node_no}_{left_or_right}_'

    input_choice_key = f'{type[2:]}_N{node_no}_{left_or_right}_input_choice'
    op_choice_key = f'{type[2:]}_N{node_no}_{left_or_right}_op_choice'
    hp_choice = hp_dict.get(input_choice_key)
    if hp_choice is None:
        hp_choice = MultipleChoice(list(range(len(inputs))),
                                   1,
                                   name=input_choice_key)
        hp_dict[input_choice_key] = hp_choice
    ic1 = InputChoice(inputs, 1, hp_choice=hp_choice)(inputs)
    if hp_choice is None:
        hp_dict[input_choice_key] = ic1.hp_choice

    # hp_strides = Dynamic(lambda_fn=lambda choice: (2, 2) if is_reduction and choice[0] <= 1 else (1, 1),
    #                      choice=ic1.hp_choice)
    hp_strides = (1, 1)
    hp_op_choice = hp_dict.get(op_choice_key)
    module_candidates = [
        sepconv5x5(name_prefix,
                   filters,
                   strides=hp_strides,
                   data_format=data_format),
        sepconv3x3(name_prefix,
                   filters,
                   strides=hp_strides,
                   data_format=data_format),
        avgpooling3x3(name_prefix,
                      filters,
                      strides=hp_strides,
                      data_format=data_format),
        maxpooling3x3(name_prefix,
                      filters,
                      strides=hp_strides,
                      data_format=data_format),
        identity(name_prefix)
    ]
    if hp_op_choice is None:
        hp_op_choice = Choice(list(range(len(module_candidates))),
                              name=op_choice_key)
        hp_dict[op_choice_key] = hp_op_choice
    op_choice = ModuleChoice(module_candidates, hp_or=hp_op_choice)(ic1)

    return op_choice
Beispiel #4
0
def get_space_num_cat_pipeline_complex(dataframe_mapper_default=False,
                                       lightgbm_fit_kwargs={},
                                       xgb_fit_kwargs={},
                                       catboost_fit_kwargs={}):
    space = HyperSpace()
    with space.as_default():
        input = HyperInput(name='input1')
        p1 = numeric_pipeline_complex()(input)
        p2 = categorical_pipeline_complex()(input)
        # p2 = categorical_pipeline_simple()(input)
        p3 = DataFrameMapper(default=dataframe_mapper_default,
                             input_df=True,
                             df_out=True,
                             df_out_dtype_transforms=[(column_object, 'int')
                                                      ])([p1, p2])

        lightgbm_init_kwargs = {
            'boosting_type': Choice(['gbdt', 'dart', 'goss']),
            'num_leaves': Choice([11, 31, 101, 301, 501]),
            'learning_rate': Real(0.001, 0.1, step=0.005),
            'n_estimators': 100,
            'max_depth': -1,
            'tree_learner': 'data'  # add for dask
            # subsample_for_bin = 200000, objective = None, class_weight = None,
            #  min_split_gain = 0., min_child_weight = 1e-3, min_child_samples = 20,
        }
        lightgbm_est = LightGBMDaskEstimator(task='binary',
                                             fit_kwargs=lightgbm_fit_kwargs,
                                             **lightgbm_init_kwargs)

        xgb_init_kwargs = {
            'tree_method': 'approx'  # add for dask
        }
        xgb_est = XGBoostDaskEstimator(task='binary',
                                       fit_kwargs=xgb_fit_kwargs,
                                       **xgb_init_kwargs)

        # catboost_init_kwargs = {
        #     'silent': True
        # }
        # catboost_est = CatBoostEstimator(task='binary', fit_kwargs=catboost_fit_kwargs, **catboost_init_kwargs)
        # or_est = ModuleChoice([lightgbm_est, xgb_est, catboost_est], name='estimator_options')(p3)

        or_est = ModuleChoice([lightgbm_est, xgb_est],
                              name='estimator_options')(p3)

        space.set_inputs(input)
    return space
Beispiel #5
0
def get_space_num_cat_pipeline_multi_complex(dataframe_mapper_default=False,
                                             lightgbm_fit_kwargs={},
                                             xgb_fit_kwargs={}):
    space = HyperSpace()
    with space.as_default():
        input = HyperInput(name='input1')
        p1 = numeric_pipeline_complex()(input)
        p2 = categorical_pipeline_complex()(input)
        p3 = DataFrameMapper(default=dataframe_mapper_default,
                             input_df=True,
                             df_out=True,
                             df_out_dtype_transforms=[(column_object,
                                                       'category')])([p1, p2])

        p4 = numeric_pipeline_complex(seq_no=1)(p3)
        p5 = categorical_pipeline_complex(seq_no=1)(p3)
        p6 = DataFrameMapper(default=dataframe_mapper_default,
                             input_df=True,
                             df_out=True,
                             df_out_dtype_transforms=[(column_object,
                                                       'category')])([p4, p5])

        lightgbm_init_kwargs = {
            'boosting_type': Choice(['gbdt', 'dart', 'goss']),
            'num_leaves': Choice([11, 31, 101, 301, 501]),
            'learning_rate': Real(0.001, 0.1, step=0.005),
            'n_estimators': 100,
            'max_depth': -1,
            # subsample_for_bin = 200000, objective = None, class_weight = None,
            #  min_split_gain = 0., min_child_weight = 1e-3, min_child_samples = 20,
        }

        lightgbm_est = LightGBMEstimator(task='binary',
                                         fit_kwargs=lightgbm_fit_kwargs,
                                         **lightgbm_init_kwargs)

        xgb_init_kwargs = {}
        xgb_est = XGBoostEstimator(task='binary',
                                   fit_kwargs=xgb_fit_kwargs,
                                   **xgb_init_kwargs)

        or_est = ModuleChoice([lightgbm_est, xgb_est])(p6)
        space.set_inputs(input)
    return space
Beispiel #6
0
    def __call__(self, *args, **kwargs):
        space = HyperSpace()

        with space.as_default():
            hyper_input = HyperInput(name='input1')

            estimators = []
            if self.enable_dt:
                estimators.append(self.dt)
            if self.enable_dtr:
                estimators.append(self.dtr)
            if self.enable_lr:
                estimators.append(self.lr)
            if self.enable_nn:
                estimators.append(self.nn)

            modules = [ModuleSpace(name=f'{e["cls"].__name__}', **e) for e in estimators]
            outputs = ModuleChoice(modules)(hyper_input)
            space.set_inputs(hyper_input)

        return space
def conv_block(block_no, hp_pooling, hp_filters, hp_kernel_size, hp_bn_act, hp_use_bn, hp_activation, strides=(1, 1)):
    def conv_bn(step):
        conv = Conv2D(filters=conv_filters, kernel_size=hp_kernel_size, strides=strides, padding='same')
        act = Activation(activation=hp_activation)
        optional_bn = Optional(BatchNormalization(), keep_link=True, hp_opt=hp_use_bn)

        # Use `Permutation` to try different arrangements of act, optional_bn
        # optional_bn is optional module and will be skipped when hp_use_bn is False
        perm_act_bn = Permutation([optional_bn, act], hp_seq=hp_bn_act)
        seq = Sequential([conv, perm_act_bn])
        return seq

    if block_no < 2:
        repeat_num_choices = [2]
        multiplier = 1
    else:
        repeat_num_choices = [3, 4, 5]
        multiplier = 2 ** (block_no - 1)

    conv_filters = Dynamic(lambda filters: filters * multiplier, filters=hp_filters)
    conv = Repeat(conv_bn, repeat_times=repeat_num_choices)
    pooling = ModuleChoice([MaxPooling2D(padding='same'), AveragePooling2D(padding='same')], hp_or=hp_pooling)
    block = Sequential([conv, pooling])
    return block