def test_add_to_registry(self): class Model1: algorithm_short_name = "" model1 = { "task_name": "binary_classification", "model_class": Model1, "model_params": {}, "required_preprocessing": {}, "additional": {}, } AlgorithmsRegistry.add(**model1)
"trees_in_step": 100, "max_steps": 50, "early_stopping_rounds": 50, "max_rows_limit": None, "max_cols_limit": None, } required_preprocessing = [ "missing_values_inputation", "convert_categorical", "target_as_integer", ] AlgorithmsRegistry.add( BINARY_CLASSIFICATION, ExtraTreesAlgorithm, et_params, required_preprocessing, additional, classification_default_params, ) AlgorithmsRegistry.add( MULTICLASS_CLASSIFICATION, ExtraTreesAlgorithm, et_params, required_preprocessing, additional, classification_default_params, ) # # REGRESSION
"max_cols_limit": None, } required_preprocessing = [ "missing_values_inputation", "convert_categorical", "datetime_transform", "text_transform", "scale", "target_as_integer", ] AlgorithmsRegistry.add( BINARY_CLASSIFICATION, NeuralNetworkAlgorithm, nn_params, required_preprocessing, additional, default_nn_params, ) required_preprocessing = [ "missing_values_inputation", "convert_categorical", "datetime_transform", "text_transform", "scale", "target_as_one_hot", ] AlgorithmsRegistry.add( MULTICLASS_CLASSIFICATION, NeuralNetworkAlgorithm,
self.library_version = sklearn.__version__ self.max_iters = additional.get("max_steps", 1) self.model = DummyRegressor(strategy="mean") def file_extenstion(self): return "baseline" additional = {"max_steps": 1, "max_rows_limit": None, "max_cols_limit": None} required_preprocessing = ["target_as_integer"] AlgorithmsRegistry.add( BINARY_CLASSIFICATION, BaselineClassifierAlgorithm, {}, required_preprocessing, additional, ) AlgorithmsRegistry.add( MULTICLASS_CLASSIFICATION, BaselineClassifierAlgorithm, {}, required_preprocessing, additional, ) AlgorithmsRegistry.add(REGRESSION, BaselineRegressorAlgorithm, {}, {}, additional)
"early_stopping_rounds": 50, "max_rows_limit": None, "max_cols_limit": None, } required_preprocessing = [ "missing_values_inputation", "convert_categorical", "datetime_transform", "text_transform", "target_as_integer", ] AlgorithmsRegistry.add( BINARY_CLASSIFICATION, XgbAlgorithm, xgb_bin_class_params, required_preprocessing, additional, classification_bin_default_params, ) AlgorithmsRegistry.add( MULTICLASS_CLASSIFICATION, XgbAlgorithm, xgb_multi_class_params, required_preprocessing, additional, classification_multi_default_params, ) regression_required_preprocessing = [ "missing_values_inputation",
"trees_in_step": 1, "train_cant_improve_limit": 0, "max_steps": 1, "max_rows_limit": None, "max_cols_limit": None, } required_preprocessing = [ "missing_values_inputation", "convert_categorical", "target_as_integer", ] AlgorithmsRegistry.add( BINARY_CLASSIFICATION, DecisionTreeAlgorithm, dt_params, required_preprocessing, additional, classification_default_params, ) AlgorithmsRegistry.add( MULTICLASS_CLASSIFICATION, DecisionTreeAlgorithm, dt_params, required_preprocessing, additional, classification_default_params, ) dt_regression_params = { "criterion": [
"convert_categorical", "target_as_integer", ] lgbm_multi_params = copy.deepcopy(lgbm_bin_params) lgbm_multi_params["objective"] = ["multiclass"] lgbm_multi_params["metric"] = ["multi_logloss", "multi_error"] lgbr_params = copy.deepcopy(lgbm_bin_params) lgbr_params["objective"] = ["regression"] lgbr_params["metric"] = ["l1", "l2"] AlgorithmsRegistry.add( BINARY_CLASSIFICATION, LightgbmAlgorithm, lgbm_bin_params, required_preprocessing, additional, ) AlgorithmsRegistry.add( MULTICLASS_CLASSIFICATION, LightgbmAlgorithm, lgbm_multi_params, required_preprocessing, additional, ) regression_required_preprocessing = [ "missing_values_inputation", "convert_categorical",
"train_cant_improve_limit": 5, "max_steps": 500, "max_rows_limit": None, "max_cols_limit": None, } required_preprocessing = [ "missing_values_inputation", "convert_categorical", "target_as_integer", "target_scale", ] AlgorithmsRegistry.add( BINARY_CLASSIFICATION, RandomForestAlgorithm, rf_params, required_preprocessing, additional, ) AlgorithmsRegistry.add( MULTICLASS_CLASSIFICATION, RandomForestAlgorithm, rf_params, required_preprocessing, additional, ) # # REGRESSION #
"max_rounds": 10000, "early_stopping_rounds": 50, "max_rows_limit": None, "max_cols_limit": None, } required_preprocessing = [ "missing_values_inputation", "datetime_transform", "text_transform", "target_as_integer", ] AlgorithmsRegistry.add( BINARY_CLASSIFICATION, CatBoostAlgorithm, classification_params, required_preprocessing, additional, classification_default_params, ) multiclass_classification_params = copy.deepcopy(classification_params) multiclass_classification_params["loss_function"] = ["MultiClass"] multiclass_classification_params["depth"] = [3, 4, 5, 6] multiclass_classification_params["learning_rate"] = [0.1, 0.15, 0.2] multiclass_classification_default_params = copy.deepcopy( classification_default_params) multiclass_classification_default_params["loss_function"] = "MultiClass" multiclass_classification_default_params["depth"] = 5 multiclass_classification_default_params["learning_rate"] = 0.15
) df.to_csv( os.path.join(model_file_path, f"{learner_name}_coefs.csv"), index=False ) additional = {"max_steps": 1, "max_rows_limit": None, "max_cols_limit": None} required_preprocessing = [ "missing_values_inputation", "convert_categorical", "scale", "target_as_integer", ] AlgorithmsRegistry.add( BINARY_CLASSIFICATION, LinearAlgorithm, {}, required_preprocessing, additional ) AlgorithmsRegistry.add( MULTICLASS_CLASSIFICATION, LinearAlgorithm, {}, required_preprocessing, additional ) regression_required_preprocessing = [ "missing_values_inputation", "convert_categorical", "scale", "target_scale", ] AlgorithmsRegistry.add( REGRESSION, LinearRegressorAlgorithm,
additional = { "one_step": 50, "train_cant_improve_limit": 5, "max_steps": 500, "max_rows_limit": None, "max_cols_limit": None, } required_preprocessing = [ "missing_values_inputation", "convert_categorical", "target_preprocessing", ] """ AlgorithmsRegistry.add( BINARY_CLASSIFICATION, CatBoostAlgorithm, bin_class_params, required_preprocessing, additional, ) """ # switch off for now # maybe my misuse or bug https://github.com/catboost/catboost/issues/861 """ multi_class_params = copy.deepcopy(bin_class_params) multi_class_params["loss_function"] = ["MultiClass"]
default_params = {"n_neighbors": 5, "weights": "uniform"} additional = {"max_rows_limit": 100000, "max_cols_limit": 100} required_preprocessing = [ "missing_values_inputation", "convert_categorical", "scale", "target_as_integer", ] AlgorithmsRegistry.add( BINARY_CLASSIFICATION, KNeighborsAlgorithm, knn_params, required_preprocessing, additional, default_params, ) AlgorithmsRegistry.add( MULTICLASS_CLASSIFICATION, KNeighborsAlgorithm, knn_params, required_preprocessing, additional, default_params, ) AlgorithmsRegistry.add( REGRESSION, KNeighborsRegressorAlgorithm,