Exemple #1
0
    def get_hyper_parameter(cls):
        ### Specific function to handle the fact that I don't want ngram != 1 IF analyzer = word ###
        res = hp.HyperComposition([
            (
                0.5,
                hp.HyperCrossProduct({
                    "ngram_range": 1,
                    "analyzer": "word",
                    "min_df": [1, 0.001, 0.01, 0.05],
                    "max_df": [0.999, 0.99, 0.95],
                    "tfidf": [True, False],
                }),
            ),
            (
                0.5,
                hp.HyperCrossProduct({
                    "ngram_range":
                    hp.HyperRangeBetaInt(
                        start=1, end=5, alpha=2, beta=1
                    ),  # 1 = 1.5% ; 2 = 12% ; 3 = 25% ; 4 = 37% ; 5 = 24%
                    "analyzer":
                    hp.HyperChoice(("char", "char_wb")),
                    "min_df": [1, 0.001, 0.01, 0.05],
                    "max_df": [0.999, 0.99, 0.95],
                    "tfidf": [True, False],
                }),
            ),
        ])

        return res
Exemple #2
0
    def get_hyper_parameter(cls):

        if cls.klass is None:
            raise ValueError("I need a klass")

        if cls.hyper is not None:
            return cls.hyper

        all_params = list(get_init_parameters(cls.klass).keys())

        all_hyper = {}
        for p in all_params:

            if p in cls.custom_hyper:
                all_hyper[p] = cls.custom_hyper[p]

            elif p in cls.default_hyper:
                all_hyper[p] = cls.default_hyper[p]

        return hp.HyperCrossProduct(all_hyper)  # TODO : fix seed here
Exemple #3
0
 def get_hyper_parameter(cls):
     """ specific function to handle dependency between hyper-parameters : bagging_fraction AND bagging_freq """
     res = hp.HyperComposition([
         ##################
         ### No Bagging ###
         ##################
         # * bagging_freq == 0
         # * bagging_fraction  == 1.0
         # * no random forest here : 'booting_type' != 'rf'
         (
             0.5,
             hp.HyperCrossProduct({
                 "boosting_type": ["gbdt", "dart"],
                 "learning_rate":
                 hp.HyperLogRangeFloat(0.0001, 0.1),
                 "max_depth":
                 hp.HyperChoice([
                     -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                     20, 25, 30, 50, 100
                 ]),
                 "n_estimators":
                 hp.HyperComposition([
                     (0.50, hp.HyperRangeInt(start=25, end=175, step=25)),
                     (0.25, hp.HyperRangeInt(start=200, end=900, step=100)),
                     (0.25, hp.HyperRangeInt(start=1000,
                                             end=10000,
                                             step=100)),
                 ]),
                 "colsample_bytree":
                 hp.HyperRangeBetaFloat(start=0.1, end=1, alpha=3,
                                        beta=1),  # Mean = 0.75
                 "min_child_samples":
                 hp.HyperRangeInt(2, 50),
                 "num_leaves":
                 hp.HyperRangeInt(10, 200),
                 "bagging_fraction": [1.0],
                 "bagging_freq": [0],
                 "n_jobs": [1],
             }),
         ),
         ###############
         ### Bagging ###
         ###############
         # * bagging_freq = 1
         # * bagging_fraction < 1
         (
             0.5,
             hp.HyperCrossProduct({
                 "boosting_type": ["rf", "gbdt", "dart"],
                 "learning_rate":
                 hp.HyperLogRangeFloat(0.0001, 0.1),
                 "max_depth":
                 hp.HyperChoice([
                     -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                     20, 25, 30, 50, 100
                 ]),
                 "n_estimators":
                 hp.HyperComposition([
                     (0.50, hp.HyperRangeInt(start=25, end=175, step=25)),
                     (0.25, hp.HyperRangeInt(start=200, end=900, step=100)),
                     (0.25, hp.HyperRangeInt(start=1000,
                                             end=10000,
                                             step=100)),
                 ]),
                 "colsample_bytree":
                 hp.HyperRangeBetaFloat(start=0.1, end=1, alpha=3,
                                        beta=1),  # Mean = 0.75
                 "min_child_samples":
                 hp.HyperRangeInt(2, 50),
                 "num_leaves":
                 hp.HyperRangeInt(10, 200),
                 "bagging_fraction":
                 hp.HyperRangeBetaFloat(start=0.1, end=1, alpha=3, beta=1),
                 "bagging_freq": [1],
                 "n_jobs": [1],
             }),
         ),
     ])
     return res