Example #1
0
def test_fit_model_via_design_matrix_stats_models():
    """Testing the model fits via statsmodels module"""
    df = generate_test_data_for_fitting(n=50, seed=41,
                                        heteroscedastic=False)["df"]
    df["y"] = df["y"].abs() + 1
    model_formula_str = "y ~ x1_categ + x2 + x3"
    design_mat_info = design_mat_from_formula(df,
                                              model_formula_str,
                                              pred_cols=None,
                                              y_col=None)

    x_train = design_mat_info["x_mat"]
    y_train = design_mat_info["y"]

    ml_model = fit_model_via_design_matrix(x_train=x_train,
                                           y_train=y_train,
                                           fit_algorithm="statsmodels_ols")

    expected = [14.0, 1.0, -3.1, -2.1, -0.3, 2.3, 0.7]
    assert list(round(ml_model.params, 1).values) == expected

    ml_model = fit_model_via_design_matrix(x_train=x_train,
                                           y_train=y_train,
                                           fit_algorithm="statsmodels_wls")
    assert list(round(ml_model.params, 1).values) == expected

    ml_model = fit_model_via_design_matrix(x_train=x_train,
                                           y_train=y_train,
                                           fit_algorithm="statsmodels_gls")
    assert list(round(ml_model.params, 1).values) == expected

    ml_model = fit_model_via_design_matrix(x_train=x_train,
                                           y_train=y_train,
                                           fit_algorithm="statsmodels_glm")
    assert list(round(ml_model.params, 1).values) == [0.1, 0, 0, 0, 0, 0, 0]
Example #2
0
def test_fit_model_via_design_matrix_error(design_mat_info):
    """Tests fit_model_via_design_matrix with """
    x_train = design_mat_info["x_mat"]
    y_train = design_mat_info["y"]

    with pytest.raises(ValueError,
                       match="The fit algorithm requested was not found"):
        fit_model_via_design_matrix(x_train=x_train,
                                    y_train=y_train,
                                    fit_algorithm="unknown_model")
Example #3
0
def test_fit_model_via_design_matrix_with_weights(data_with_weights):
    df = data_with_weights["df"]
    design_mat_info = data_with_weights["design_mat_info"]
    x_train = design_mat_info["x_mat"]
    y_train = design_mat_info["y"]
    sample_weight = df["w"]

    # Ridge without weights
    ml_model = fit_model_via_design_matrix(x_train=x_train,
                                           y_train=y_train,
                                           fit_algorithm="ridge",
                                           sample_weight=None)

    assert np.round(ml_model.intercept_, 0) == 15.0
    assert ml_model.coef_[0].round() == 0.0
    assert ml_model.coef_[1].round() == 0.0

    # Ridge with weights
    # Here we expect to get the coeffcients from: ``y[(n//2):] = 20 + -20 * x2[(n//2):]``
    # This is becauase the weights are zero in the first half
    # Therefore only the second equation (given above) will be relevant
    ml_model = fit_model_via_design_matrix(x_train=x_train,
                                           y_train=y_train,
                                           fit_algorithm="ridge",
                                           sample_weight=sample_weight)
    """
    # commented out graphical test
    # we expect to see two trends for y w.r.t x2
    import plotly
    from plotly import graph_objs as go
    trace = go.Scatter(
                x=df['x2'].values,
                y=df['y'].values,
                mode='markers')
    data = [trace]
    fig = go.Figure(data)
    plotly.io.show(fig)
    """
    assert np.round(ml_model.intercept_, 0) == 20.0
    assert ml_model.coef_[0].round() == 0.0
    assert ml_model.coef_[1].round() == -2.0
Example #4
0
def test_fit_model_via_design_matrix2(design_mat_info):
    """Tests fit_model_via_design_matrix with elastic_net algorithm
        and fit_algorithm_params"""
    x_train = design_mat_info["x_mat"]
    y_train = design_mat_info["y"]

    ml_model = fit_model_via_design_matrix(x_train=x_train,
                                           y_train=y_train,
                                           fit_algorithm="elastic_net",
                                           fit_algorithm_params=dict(
                                               n_alphas=100, eps=1e-2))

    assert ml_model.coef_[0].round() == 0
    assert ml_model.n_alphas == 100
    assert ml_model.eps == 1e-2
    assert ml_model.cv == 5  # from default parameters
Example #5
0
def test_fit_model_via_design_matrix3(design_mat_info):
    """Tests fit_model_via_design_matrix with
        elastic_net fit_algorithm and fit_algorithm_params"""
    x_train = design_mat_info["x_mat"]
    y_train = design_mat_info["y"]

    ml_model = fit_model_via_design_matrix(x_train=x_train,
                                           y_train=y_train,
                                           fit_algorithm="lasso_lars",
                                           fit_algorithm_params=dict(
                                               max_n_alphas=100,
                                               eps=1e-2,
                                               cv=2))

    assert ml_model.coef_[0].round() == 0
    assert ml_model.max_n_alphas == 100
    assert ml_model.eps == 1e-2
    assert ml_model.cv == 2  # override default
Example #6
0
def test_fit_model_via_design_matrix(design_mat_info):
    """Tests fit_model_via_design_matrix"""
    x_train = design_mat_info["x_mat"]
    y_train = design_mat_info["y"]
    sample_weight = np.array([1, 2, 3, 4, 5])

    # Linear
    ml_model = fit_model_via_design_matrix(x_train=x_train,
                                           y_train=y_train,
                                           fit_algorithm="linear")

    assert ml_model.coef_[0].round() == 10.0
    assert np.round(ml_model.intercept_, 1) == 0.0

    # Ridge without weights
    ml_model = fit_model_via_design_matrix(x_train=x_train,
                                           y_train=y_train,
                                           fit_algorithm="ridge",
                                           sample_weight=None)

    assert ml_model.coef_[0].round() == 0.0
    assert np.round(ml_model.intercept_, 1) == 10.0

    # Ridge with weights
    ml_model = fit_model_via_design_matrix(x_train=x_train,
                                           y_train=y_train,
                                           fit_algorithm="ridge",
                                           sample_weight=sample_weight)

    assert ml_model.coef_[0].round() == 0.0
    assert np.round(ml_model.intercept_, 1) == 10.0

    # statsmodels_wls with weights
    ml_model = fit_model_via_design_matrix(x_train=x_train,
                                           y_train=y_train,
                                           fit_algorithm="statsmodels_wls",
                                           sample_weight=sample_weight)

    assert ml_model.coef_[0].round() == 10.0
    assert np.round(ml_model.intercept_, 1) == 0.0

    with pytest.raises(ValueError, match="sample weights are passed."):
        fit_model_via_design_matrix(x_train=x_train,
                                    y_train=y_train,
                                    fit_algorithm="lasso",
                                    sample_weight=sample_weight)