def partial_dependence_curve(tickers: Tuple[str, ...], date: pd.Timestamp): """Рисует кривые частичной зависимости для численных параметров. :param tickers: Тикеры, для которых необходимо составить ML-модель. :param date: Дата, на которую составляется ML-модель. """ params = config.ML_PARAMS cases = examples.Examples(tickers, date, params["data"]) clf, train_pool_params = train_clf(cases, params) n_plots = len(train_pool_params["data"].columns) - len( cases.categorical_features()) axs = axs_iter(n_plots) results = [] for n, name in enumerate(train_pool_params["data"]): if n in cases.categorical_features(): continue ax = next(axs) pool_params = copy.deepcopy(train_pool_params) quantiles = pool_params["data"].iloc[:, n].quantile(QUANTILE).values y = [] for quantile in quantiles: pool_params["data"].iloc[:, n] = quantile predict_pool = catboost.Pool(**pool_params) raw_prediction = clf.predict(predict_pool) prediction = (raw_prediction * pool_params["data"].iloc[:, 0] * YEAR_IN_TRADING_DAYS) y.append(prediction.values.mean()) ax.set_title(f"{name}") ax.tick_params(labelleft=True) ax.plot(quantiles, y) results.append((quantiles, y)) plt.show() return results
def test_cv_model_raise_max_iter(monkeypatch): fake_tech_params = dict(**cv.TECH_PARAMS) fake_max_iter = 3 fake_tech_params["iterations"] = fake_max_iter monkeypatch.setattr(cv, "TECH_PARAMS", fake_tech_params) monkeypatch.setattr(cv, "MAX_ITERATIONS", fake_max_iter) data = examples.Examples(("LSNGP", "LKOH", "GMKN"), pd.Timestamp("2018-12-14"), PARAMS["data"]) with pytest.raises(POptimizerError) as error: cv.valid_model(PARAMS, data) assert "Необходимо увеличить MAX_ITERATIONS =" in str(error.value)
def test_valid_model(): data = examples.Examples(("LSNGP", "LKOH", "GMKN"), pd.Timestamp("2018-12-14"), PARAMS["data"]) result = cv.valid_model(PARAMS, data) assert isinstance(result, dict) assert len(result) == 6 assert result["loss"] == pytest.approx(0.014_495_100_438_051_8) assert result["status"] == "ok" assert result["std"] == pytest.approx(0.160839952004336) assert result["r2"] == pytest.approx(-0.014_495_100_438_051_8) assert result["data"] == PARAMS["data"] for key, value in PARAMS["model"].items(): assert result["model"][key] == value for key, value in cv.TECH_PARAMS.items(): if key == "iterations": assert result["model"][key] < value else: assert result["model"][key] == value
def make_cases(monkeypatch): return examples.Examples( ("SNGSP", "VSMO", "DSKY"), pd.Timestamp("2018-12-14"), PARAMS["data"] )
def test_optimize_hyper(monkeypatch, capsys): space = { "data": ( ("Label", { "days": hp.choice("label", list(range(21, 31))) }), ("STD", { "days": 186 }), ("Ticker", { "on_off": False }), ("Mom12m", { "days": 279 }), ("DivYield", { "days": 252, "periods": 1 }), ), "model": { "bagging_temperature": 1, "depth": 6, "l2_leaf_reg": 3, "learning_rate": 0.1, "one_hot_max_size": 2, "random_strength": 1, "ignored_features": [], }, } cases = examples.Examples(("LSNGP", "LKOH", "GMKN"), pd.Timestamp("2018-12-14"), PARAMS["data"]) monkeypatch.setattr(cv, "MAX_SEARCHES", 10) monkeypatch.setattr(cases, "get_params_space", lambda: space["data"]) monkeypatch.setattr(cv, "get_model_space", lambda: space["model"]) result = cv.optimize_hyper(cases) captured = capsys.readouterr() assert "Необходимо расширить" in captured.out assert isinstance(result, dict) assert len(result) == 2 assert result["data"] == ( ("Label", { "days": 29 }), ("STD", { "days": 186 }), ("Ticker", { "on_off": False }), ("Mom12m", { "days": 279 }), ("DivYield", { "days": 252, "periods": 1 }), ) model_params = { "bagging_temperature": 1, "depth": 6, "l2_leaf_reg": 3, "learning_rate": 0.1, "one_hot_max_size": 2, "random_strength": 1, } for k, v in model_params.items(): assert result["model"][k] == pytest.approx(v)
def create_examples(): yield examples.Examples(("AKRN", "CHMF", "BANEP"), pd.Timestamp("2018-12-13"), FEAT_PARAMS)