예제 #1
0
def partial_dependence_curve(tickers: Tuple[str, ...], date: pd.Timestamp):
    """Рисует кривые частичной зависимости для численных параметров.

    :param tickers:
        Тикеры, для которых необходимо составить ML-модель.
    :param date:
        Дата, на которую составляется ML-модель.
    """
    params = config.ML_PARAMS
    cases = examples.Examples(tickers, date, params["data"])
    clf, train_pool_params = train_clf(cases, params)
    n_plots = len(train_pool_params["data"].columns) - len(
        cases.categorical_features())
    axs = axs_iter(n_plots)
    results = []
    for n, name in enumerate(train_pool_params["data"]):
        if n in cases.categorical_features():
            continue
        ax = next(axs)
        pool_params = copy.deepcopy(train_pool_params)
        quantiles = pool_params["data"].iloc[:, n].quantile(QUANTILE).values
        y = []
        for quantile in quantiles:
            pool_params["data"].iloc[:, n] = quantile
            predict_pool = catboost.Pool(**pool_params)
            raw_prediction = clf.predict(predict_pool)
            prediction = (raw_prediction * pool_params["data"].iloc[:, 0] *
                          YEAR_IN_TRADING_DAYS)
            y.append(prediction.values.mean())
        ax.set_title(f"{name}")
        ax.tick_params(labelleft=True)
        ax.plot(quantiles, y)
        results.append((quantiles, y))
    plt.show()
    return results
예제 #2
0
def test_cv_model_raise_max_iter(monkeypatch):
    fake_tech_params = dict(**cv.TECH_PARAMS)
    fake_max_iter = 3
    fake_tech_params["iterations"] = fake_max_iter
    monkeypatch.setattr(cv, "TECH_PARAMS", fake_tech_params)
    monkeypatch.setattr(cv, "MAX_ITERATIONS", fake_max_iter)
    data = examples.Examples(("LSNGP", "LKOH", "GMKN"),
                             pd.Timestamp("2018-12-14"), PARAMS["data"])
    with pytest.raises(POptimizerError) as error:
        cv.valid_model(PARAMS, data)
    assert "Необходимо увеличить MAX_ITERATIONS =" in str(error.value)
예제 #3
0
def test_valid_model():
    data = examples.Examples(("LSNGP", "LKOH", "GMKN"),
                             pd.Timestamp("2018-12-14"), PARAMS["data"])
    result = cv.valid_model(PARAMS, data)

    assert isinstance(result, dict)
    assert len(result) == 6
    assert result["loss"] == pytest.approx(0.014_495_100_438_051_8)
    assert result["status"] == "ok"
    assert result["std"] == pytest.approx(0.160839952004336)
    assert result["r2"] == pytest.approx(-0.014_495_100_438_051_8)
    assert result["data"] == PARAMS["data"]
    for key, value in PARAMS["model"].items():
        assert result["model"][key] == value
    for key, value in cv.TECH_PARAMS.items():
        if key == "iterations":
            assert result["model"][key] < value
        else:
            assert result["model"][key] == value
예제 #4
0
def make_cases(monkeypatch):
    return examples.Examples(
        ("SNGSP", "VSMO", "DSKY"), pd.Timestamp("2018-12-14"), PARAMS["data"]
    )
예제 #5
0
def test_optimize_hyper(monkeypatch, capsys):
    space = {
        "data": (
            ("Label", {
                "days": hp.choice("label", list(range(21, 31)))
            }),
            ("STD", {
                "days": 186
            }),
            ("Ticker", {
                "on_off": False
            }),
            ("Mom12m", {
                "days": 279
            }),
            ("DivYield", {
                "days": 252,
                "periods": 1
            }),
        ),
        "model": {
            "bagging_temperature": 1,
            "depth": 6,
            "l2_leaf_reg": 3,
            "learning_rate": 0.1,
            "one_hot_max_size": 2,
            "random_strength": 1,
            "ignored_features": [],
        },
    }
    cases = examples.Examples(("LSNGP", "LKOH", "GMKN"),
                              pd.Timestamp("2018-12-14"), PARAMS["data"])
    monkeypatch.setattr(cv, "MAX_SEARCHES", 10)
    monkeypatch.setattr(cases, "get_params_space", lambda: space["data"])
    monkeypatch.setattr(cv, "get_model_space", lambda: space["model"])

    result = cv.optimize_hyper(cases)

    captured = capsys.readouterr()
    assert "Необходимо расширить" in captured.out

    assert isinstance(result, dict)
    assert len(result) == 2
    assert result["data"] == (
        ("Label", {
            "days": 29
        }),
        ("STD", {
            "days": 186
        }),
        ("Ticker", {
            "on_off": False
        }),
        ("Mom12m", {
            "days": 279
        }),
        ("DivYield", {
            "days": 252,
            "periods": 1
        }),
    )
    model_params = {
        "bagging_temperature": 1,
        "depth": 6,
        "l2_leaf_reg": 3,
        "learning_rate": 0.1,
        "one_hot_max_size": 2,
        "random_strength": 1,
    }
    for k, v in model_params.items():
        assert result["model"][k] == pytest.approx(v)
예제 #6
0
def create_examples():
    yield examples.Examples(("AKRN", "CHMF", "BANEP"),
                            pd.Timestamp("2018-12-13"), FEAT_PARAMS)