Ejemplo n.º 1
0
class ModelRepresentationBase(_AbstractModelRepresentation):
    """ class just to store the default HyperParameters """

    default_hyper = {
        "n_components":
        hp.HyperRangeFloat(start=0.1, end=1, step=0.05),
        # Forest like estimators
        "n_estimators":
        hp.HyperComposition([
            (0.75, hp.HyperRangeInt(start=25, end=175, step=25)),
            (0.25, hp.HyperRangeInt(start=200, end=1000, step=100)),
        ]),
        "max_features":
        hp.HyperComposition([(0.25, ["sqrt", "auto"]),
                             (0.75,
                              hp.HyperRangeBetaFloat(start=0,
                                                     end=1,
                                                     alpha=3,
                                                     beta=1))]),
        "max_depth":
        hp.HyperChoice([
            None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 25,
            30, 50, 100
        ]),
        "min_samples_split":
        hp.HyperRangeBetaInt(start=2, end=100, alpha=1, beta=5),
        # Linear model
        "C":
        hp.HyperLogRangeFloat(start=0.00001, end=10, n=50),
        "alpha":
        hp.HyperLogRangeFloat(start=0.00001, end=10, n=50),
        # CV
        "analyzer":
        hp.HyperChoice(["word", "char", "char_wb"]),
        "penalty": ["l1", "l2"],
        "random_state": [
            123
        ],  # So that every for every model with a random_state attribute, it will be passed and fix
        "drop_used_columns": [True],
        "drop_unused_columns": [True]
    }
    # This dictionnary is used to specify the default hyper-parameters that are used during the random search phase
    # They will be used if :
    # * the model has a paramters among that list
    # * the parameters is not specified within the class (within 'custom_hyper')

    default_default_hyper = {
        "random_state": 123,
        "drop_used_columns": True,
        "drop_unused_columns": True
    }
    # This dictionnary is used to specify the default hyper-parameters that are used during the default model phase
    # They will be used if :
    # * the model has a paramters among that list
    # * the default parameters is not specified within the class (withing 'default_parameters')

    depends_on = ()
Ejemplo n.º 2
0
    def get_hyper_parameter(cls):
        ### Specific function to handle the fact that I don't want ngram != 1 IF analyzer = word ###
        res = hp.HyperComposition([
            (
                0.5,
                hp.HyperCrossProduct({
                    "ngram_range": 1,
                    "analyzer": "word",
                    "min_df": [1, 0.001, 0.01, 0.05],
                    "max_df": [0.999, 0.99, 0.95],
                    "tfidf": [True, False],
                }),
            ),
            (
                0.5,
                hp.HyperCrossProduct({
                    "ngram_range":
                    hp.HyperRangeBetaInt(
                        start=1, end=5, alpha=2, beta=1
                    ),  # 1 = 1.5% ; 2 = 12% ; 3 = 25% ; 4 = 37% ; 5 = 24%
                    "analyzer":
                    hp.HyperChoice(("char", "char_wb")),
                    "min_df": [1, 0.001, 0.01, 0.05],
                    "max_df": [0.999, 0.99, 0.95],
                    "tfidf": [True, False],
                }),
            ),
        ])

        return res
Ejemplo n.º 3
0
class ModelRepresentationBase(_AbstractModelRepresentation):
    """ class just to store the default HyperParameters """

    default_hyper = {
        "n_components":
        hp.HyperRangeFloat(start=0.1, end=1, step=0.05),
        # Forest like estimators
        "n_estimators":
        hp.HyperComposition([
            (0.75, hp.HyperRangeInt(start=25, end=175, step=25)),
            (0.25, hp.HyperRangeInt(start=200, end=1000, step=100)),
        ]),
        "max_features":
        hp.HyperComposition([(0.25, ["sqrt", "auto"]),
                             (0.75,
                              hp.HyperRangeBetaFloat(start=0,
                                                     end=1,
                                                     alpha=3,
                                                     beta=1))]),
        "max_depth":
        hp.HyperChoice([
            None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 25,
            30, 50, 100
        ]),
        "min_samples_split":
        hp.HyperRangeBetaInt(start=1, end=100, alpha=1, beta=5),
        # Linear model
        "C":
        hp.HyperLogRangeFloat(start=0.00001, end=10, n=50),
        "alpha":
        hp.HyperLogRangeFloat(start=0.00001, end=10, n=50),
        # CV
        "analyzer":
        hp.HyperChoice(["word", "char", "char_wb"]),
        "penalty": ["l1", "l2"],
        "random_state": [
            123
        ],  # So that every for every model with a random_state attribute, it will be passed and fix
        "columns_to_encode": ["--object--"]
    }
Ejemplo n.º 4
0
class BoxCoxTargetTransformer_TargetModifier(ModelRepresentationBase):

    klass = BoxCoxTargetTransformer
    category = StepCategories.TargetTransformer

    type_of_variable = None

    # is_regression = True

    type_of_model = TypeOfProblem.REGRESSION

    custom_hyper = {
        "ll": hp.HyperComposition([(0.1, [0]), (0.9, hp.HyperRangeFloat(0,
                                                                        2))])
    }

    use_y = True
Ejemplo n.º 5
0
class TargetEncoderRegressor_CatEncoder(ModelRepresentationBase):

    klass = TargetEncoderRegressor
    category = StepCategories.CategoryEncoder

    type_of_variable = (TypeOfVariables.CAT, TypeOfVariables.NUM)

    custom_hyper = {
        "cv": [None, 2, 5, 10],
        "noise_level":
        hp.HyperComposition([(0.5, [None]), (0.5, hp.HyperRangeFloat(0, 1))]),
        "smoothing_min":
        hp.HyperRangeFloat(0, 10),
        "smoothing_value":
        hp.HyperRangeFloat(0, 10),
    }

    # is_regression = True

    type_of_model = TypeOfProblem.REGRESSION

    use_y = True
Ejemplo n.º 6
0
 def get_hyper_parameter(cls):
     """ specific function to handle dependency between hyper-parameters : bagging_fraction AND bagging_freq """
     res = hp.HyperComposition([
         ##################
         ### No Bagging ###
         ##################
         # * bagging_freq == 0
         # * bagging_fraction  == 1.0
         # * no random forest here : 'booting_type' != 'rf'
         (
             0.5,
             hp.HyperCrossProduct({
                 "boosting_type": ["gbdt", "dart"],
                 "learning_rate":
                 hp.HyperLogRangeFloat(0.0001, 0.1),
                 "max_depth":
                 hp.HyperChoice([
                     -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                     20, 25, 30, 50, 100
                 ]),
                 "n_estimators":
                 hp.HyperComposition([
                     (0.50, hp.HyperRangeInt(start=25, end=175, step=25)),
                     (0.25, hp.HyperRangeInt(start=200, end=900, step=100)),
                     (0.25, hp.HyperRangeInt(start=1000,
                                             end=10000,
                                             step=100)),
                 ]),
                 "colsample_bytree":
                 hp.HyperRangeBetaFloat(start=0.1, end=1, alpha=3,
                                        beta=1),  # Mean = 0.75
                 "min_child_samples":
                 hp.HyperRangeInt(2, 50),
                 "num_leaves":
                 hp.HyperRangeInt(10, 200),
                 "bagging_fraction": [1.0],
                 "bagging_freq": [0],
                 "n_jobs": [1],
             }),
         ),
         ###############
         ### Bagging ###
         ###############
         # * bagging_freq = 1
         # * bagging_fraction < 1
         (
             0.5,
             hp.HyperCrossProduct({
                 "boosting_type": ["rf", "gbdt", "dart"],
                 "learning_rate":
                 hp.HyperLogRangeFloat(0.0001, 0.1),
                 "max_depth":
                 hp.HyperChoice([
                     -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                     20, 25, 30, 50, 100
                 ]),
                 "n_estimators":
                 hp.HyperComposition([
                     (0.50, hp.HyperRangeInt(start=25, end=175, step=25)),
                     (0.25, hp.HyperRangeInt(start=200, end=900, step=100)),
                     (0.25, hp.HyperRangeInt(start=1000,
                                             end=10000,
                                             step=100)),
                 ]),
                 "colsample_bytree":
                 hp.HyperRangeBetaFloat(start=0.1, end=1, alpha=3,
                                        beta=1),  # Mean = 0.75
                 "min_child_samples":
                 hp.HyperRangeInt(2, 50),
                 "num_leaves":
                 hp.HyperRangeInt(10, 200),
                 "bagging_fraction":
                 hp.HyperRangeBetaFloat(start=0.1, end=1, alpha=3, beta=1),
                 "bagging_freq": [1],
                 "n_jobs": [1],
             }),
         ),
     ])
     return res