class ModelRepresentationBase(_AbstractModelRepresentation): """ class just to store the default HyperParameters """ default_hyper = { "n_components": hp.HyperRangeFloat(start=0.1, end=1, step=0.05), # Forest like estimators "n_estimators": hp.HyperComposition([ (0.75, hp.HyperRangeInt(start=25, end=175, step=25)), (0.25, hp.HyperRangeInt(start=200, end=1000, step=100)), ]), "max_features": hp.HyperComposition([(0.25, ["sqrt", "auto"]), (0.75, hp.HyperRangeBetaFloat(start=0, end=1, alpha=3, beta=1))]), "max_depth": hp.HyperChoice([ None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 25, 30, 50, 100 ]), "min_samples_split": hp.HyperRangeBetaInt(start=2, end=100, alpha=1, beta=5), # Linear model "C": hp.HyperLogRangeFloat(start=0.00001, end=10, n=50), "alpha": hp.HyperLogRangeFloat(start=0.00001, end=10, n=50), # CV "analyzer": hp.HyperChoice(["word", "char", "char_wb"]), "penalty": ["l1", "l2"], "random_state": [ 123 ], # So that every for every model with a random_state attribute, it will be passed and fix "drop_used_columns": [True], "drop_unused_columns": [True] } # This dictionnary is used to specify the default hyper-parameters that are used during the random search phase # They will be used if : # * the model has a paramters among that list # * the parameters is not specified within the class (within 'custom_hyper') default_default_hyper = { "random_state": 123, "drop_used_columns": True, "drop_unused_columns": True } # This dictionnary is used to specify the default hyper-parameters that are used during the default model phase # They will be used if : # * the model has a paramters among that list # * the default parameters is not specified within the class (withing 'default_parameters') depends_on = ()
class TargetEncoderRegressor_CatEncoder(ModelRepresentationBase): klass = TargetEncoderRegressor category = StepCategories.CategoryEncoder type_of_variable = (TypeOfVariables.CAT, TypeOfVariables.NUM) custom_hyper = { "cv": [None, 2, 5, 10], "noise_level": hp.HyperComposition([(0.5, [None]), (0.5, hp.HyperRangeFloat(0, 1))]), "smoothing_min": hp.HyperRangeFloat(0, 10), "smoothing_value": hp.HyperRangeFloat(0, 10), } # is_regression = True type_of_model = TypeOfProblem.REGRESSION use_y = True
class BoxCoxTargetTransformer_TargetModifier(ModelRepresentationBase): klass = BoxCoxTargetTransformer category = StepCategories.TargetTransformer type_of_variable = None # is_regression = True type_of_model = TypeOfProblem.REGRESSION custom_hyper = { "ll": hp.HyperComposition([(0.1, [0]), (0.9, hp.HyperRangeFloat(0, 2))]) } use_y = True
class ModelRepresentationBase(_AbstractModelRepresentation): """ class just to store the default HyperParameters """ default_hyper = { "n_components": hp.HyperRangeFloat(start=0.1, end=1, step=0.05), # Forest like estimators "n_estimators": hp.HyperComposition([ (0.75, hp.HyperRangeInt(start=25, end=175, step=25)), (0.25, hp.HyperRangeInt(start=200, end=1000, step=100)), ]), "max_features": hp.HyperComposition([(0.25, ["sqrt", "auto"]), (0.75, hp.HyperRangeBetaFloat(start=0, end=1, alpha=3, beta=1))]), "max_depth": hp.HyperChoice([ None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 25, 30, 50, 100 ]), "min_samples_split": hp.HyperRangeBetaInt(start=1, end=100, alpha=1, beta=5), # Linear model "C": hp.HyperLogRangeFloat(start=0.00001, end=10, n=50), "alpha": hp.HyperLogRangeFloat(start=0.00001, end=10, n=50), # CV "analyzer": hp.HyperChoice(["word", "char", "char_wb"]), "penalty": ["l1", "l2"], "random_state": [ 123 ], # So that every for every model with a random_state attribute, it will be passed and fix "columns_to_encode": ["--object--"] }