Ejemplo n.º 1
0
    def test_add_to_registry(self):
        class Model1:
            algorithm_short_name = ""

        model1 = {
            "task_name": "binary_classification",
            "model_class": Model1,
            "model_params": {},
            "required_preprocessing": {},
            "additional": {},
        }
        AlgorithmsRegistry.add(**model1)
Ejemplo n.º 2
0
    "trees_in_step": 100,
    "max_steps": 50,
    "early_stopping_rounds": 50,
    "max_rows_limit": None,
    "max_cols_limit": None,
}
required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "target_as_integer",
]

AlgorithmsRegistry.add(
    BINARY_CLASSIFICATION,
    ExtraTreesAlgorithm,
    et_params,
    required_preprocessing,
    additional,
    classification_default_params,
)

AlgorithmsRegistry.add(
    MULTICLASS_CLASSIFICATION,
    ExtraTreesAlgorithm,
    et_params,
    required_preprocessing,
    additional,
    classification_default_params,
)

#
# REGRESSION
Ejemplo n.º 3
0
    "max_cols_limit": None,
}

required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "datetime_transform",
    "text_transform",
    "scale",
    "target_as_integer",
]

AlgorithmsRegistry.add(
    BINARY_CLASSIFICATION,
    NeuralNetworkAlgorithm,
    nn_params,
    required_preprocessing,
    additional,
    default_nn_params,
)

required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "datetime_transform",
    "text_transform",
    "scale",
    "target_as_one_hot",
]
AlgorithmsRegistry.add(
    MULTICLASS_CLASSIFICATION,
    NeuralNetworkAlgorithm,
Ejemplo n.º 4
0
        self.library_version = sklearn.__version__
        self.max_iters = additional.get("max_steps", 1)
        self.model = DummyRegressor(strategy="mean")

    def file_extenstion(self):
        return "baseline"


additional = {"max_steps": 1, "max_rows_limit": None, "max_cols_limit": None}
required_preprocessing = ["target_as_integer"]

AlgorithmsRegistry.add(
    BINARY_CLASSIFICATION,
    BaselineClassifierAlgorithm,
    {},
    required_preprocessing,
    additional,
)

AlgorithmsRegistry.add(
    MULTICLASS_CLASSIFICATION,
    BaselineClassifierAlgorithm,
    {},
    required_preprocessing,
    additional,
)

AlgorithmsRegistry.add(REGRESSION, BaselineRegressorAlgorithm, {}, {},
                       additional)
Ejemplo n.º 5
0
    "early_stopping_rounds": 50,
    "max_rows_limit": None,
    "max_cols_limit": None,
}
required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "datetime_transform",
    "text_transform",
    "target_as_integer",
]

AlgorithmsRegistry.add(
    BINARY_CLASSIFICATION,
    XgbAlgorithm,
    xgb_bin_class_params,
    required_preprocessing,
    additional,
    classification_bin_default_params,
)

AlgorithmsRegistry.add(
    MULTICLASS_CLASSIFICATION,
    XgbAlgorithm,
    xgb_multi_class_params,
    required_preprocessing,
    additional,
    classification_multi_default_params,
)

regression_required_preprocessing = [
    "missing_values_inputation",
Ejemplo n.º 6
0
    "trees_in_step": 1,
    "train_cant_improve_limit": 0,
    "max_steps": 1,
    "max_rows_limit": None,
    "max_cols_limit": None,
}
required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "target_as_integer",
]

AlgorithmsRegistry.add(
    BINARY_CLASSIFICATION,
    DecisionTreeAlgorithm,
    dt_params,
    required_preprocessing,
    additional,
    classification_default_params,
)

AlgorithmsRegistry.add(
    MULTICLASS_CLASSIFICATION,
    DecisionTreeAlgorithm,
    dt_params,
    required_preprocessing,
    additional,
    classification_default_params,
)

dt_regression_params = {
    "criterion": [
Ejemplo n.º 7
0
    "convert_categorical",
    "target_as_integer",
]

lgbm_multi_params = copy.deepcopy(lgbm_bin_params)
lgbm_multi_params["objective"] = ["multiclass"]
lgbm_multi_params["metric"] = ["multi_logloss", "multi_error"]

lgbr_params = copy.deepcopy(lgbm_bin_params)
lgbr_params["objective"] = ["regression"]
lgbr_params["metric"] = ["l1", "l2"]

AlgorithmsRegistry.add(
    BINARY_CLASSIFICATION,
    LightgbmAlgorithm,
    lgbm_bin_params,
    required_preprocessing,
    additional,
)

AlgorithmsRegistry.add(
    MULTICLASS_CLASSIFICATION,
    LightgbmAlgorithm,
    lgbm_multi_params,
    required_preprocessing,
    additional,
)

regression_required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "train_cant_improve_limit": 5,
    "max_steps": 500,
    "max_rows_limit": None,
    "max_cols_limit": None,
}
required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "target_as_integer",
    "target_scale",
]

AlgorithmsRegistry.add(
    BINARY_CLASSIFICATION,
    RandomForestAlgorithm,
    rf_params,
    required_preprocessing,
    additional,
)

AlgorithmsRegistry.add(
    MULTICLASS_CLASSIFICATION,
    RandomForestAlgorithm,
    rf_params,
    required_preprocessing,
    additional,
)

#
# REGRESSION
#
Ejemplo n.º 9
0
    "max_rounds": 10000,
    "early_stopping_rounds": 50,
    "max_rows_limit": None,
    "max_cols_limit": None,
}
required_preprocessing = [
    "missing_values_inputation",
    "datetime_transform",
    "text_transform",
    "target_as_integer",
]

AlgorithmsRegistry.add(
    BINARY_CLASSIFICATION,
    CatBoostAlgorithm,
    classification_params,
    required_preprocessing,
    additional,
    classification_default_params,
)

multiclass_classification_params = copy.deepcopy(classification_params)
multiclass_classification_params["loss_function"] = ["MultiClass"]
multiclass_classification_params["depth"] = [3, 4, 5, 6]
multiclass_classification_params["learning_rate"] = [0.1, 0.15, 0.2]

multiclass_classification_default_params = copy.deepcopy(
    classification_default_params)
multiclass_classification_default_params["loss_function"] = "MultiClass"
multiclass_classification_default_params["depth"] = 5
multiclass_classification_default_params["learning_rate"] = 0.15
Ejemplo n.º 10
0
        )
        df.to_csv(
            os.path.join(model_file_path, f"{learner_name}_coefs.csv"), index=False
        )


additional = {"max_steps": 1, "max_rows_limit": None, "max_cols_limit": None}
required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "scale",
    "target_as_integer",
]

AlgorithmsRegistry.add(
    BINARY_CLASSIFICATION, LinearAlgorithm, {}, required_preprocessing, additional
)
AlgorithmsRegistry.add(
    MULTICLASS_CLASSIFICATION, LinearAlgorithm, {}, required_preprocessing, additional
)

regression_required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "scale",
    "target_scale",
]

AlgorithmsRegistry.add(
    REGRESSION,
    LinearRegressorAlgorithm,
Ejemplo n.º 11
0

additional = {
    "one_step": 50,
    "train_cant_improve_limit": 5,
    "max_steps": 500,
    "max_rows_limit": None,
    "max_cols_limit": None,
}
required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "target_preprocessing",
]

"""
AlgorithmsRegistry.add(
    BINARY_CLASSIFICATION,
    CatBoostAlgorithm,
    bin_class_params,
    required_preprocessing,
    additional,
)
"""

# switch off for now
# maybe my misuse or bug https://github.com/catboost/catboost/issues/861
"""
multi_class_params = copy.deepcopy(bin_class_params)
multi_class_params["loss_function"] = ["MultiClass"]
Ejemplo n.º 12
0
default_params = {"n_neighbors": 5, "weights": "uniform"}

additional = {"max_rows_limit": 100000, "max_cols_limit": 100}

required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "scale",
    "target_as_integer",
]

AlgorithmsRegistry.add(
    BINARY_CLASSIFICATION,
    KNeighborsAlgorithm,
    knn_params,
    required_preprocessing,
    additional,
    default_params,
)
AlgorithmsRegistry.add(
    MULTICLASS_CLASSIFICATION,
    KNeighborsAlgorithm,
    knn_params,
    required_preprocessing,
    additional,
    default_params,
)

AlgorithmsRegistry.add(
    REGRESSION,
    KNeighborsRegressorAlgorithm,