Ejemplo n.º 1
0
 def test_feature_onehot_not_exist(self, titanic_data):
     with pytest.raises(ValueError):
         _ = _check_feature(
             feature=[
                 "Embarked_C", "Embarked_S", "Embarked_Q", "Embarked_F"
             ],
             df=titanic_data,
         )
Ejemplo n.º 2
0
def plot_pdp(
    X: pd.DataFrame,
    y: np.array,
    model,
    var_name: str,
    target_name: str,
    num_grid_points: int = 10,
) -> go.Figure:

    ## Concat target column
    df = pd.concat([X, pd.DataFrame({target_name: y})], axis=1)

    ## Compute mean target per bin
    data_x, _, summary_df, _, _, _ = _prepare_info_plot_data(
        feature=var_name,
        feature_type=_check_feature(var_name, df),
        data=df,
        num_grid_points=num_grid_points,
        grid_type="percentile",
        percentile_range=None,
        grid_range=None,
        cust_grid_points=None,
        show_percentile=False,
        show_outliers=False,
        endpoint=True,
    )

    target_line = (data_x.groupby("x", as_index=False).agg({
        target_name: "mean"
    }).sort_values("x", ascending=True))
    summary_df = summary_df.merge(target_line, on="x", how="outer")[[
        "display_column", target_name
    ]].rename({
        "display_column": var_name,
        target_name: "mean_target"
    }, axis=1)

    ## Compute Partial dependence plot
    pdp_array = pdp.pdp_isolate(
        model=model,
        dataset=X,
        model_features=X.columns,
        feature=var_name,
        num_grid_points=num_grid_points,
    ).pdp
    pdp_array = 0.5 * (pdp_array[1:] + pdp_array[:-1])

    ## Merge with summary_df
    summary_df["pdp"] = pdp_array

    ## Plot figure
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=summary_df[var_name],
            y=summary_df["mean_target"],
            mode="lines",
            name="mean_target",
            line=dict(color="rgb(46, 186, 175)", width=4),
        ))
    fig.add_trace(
        go.Scatter(
            x=summary_df[var_name],
            y=summary_df["pdp"],
            mode="lines",
            name="partial_dependence_plot",
            line=dict(color="rgb(255, 161, 105)", width=4),
        ))

    title = "Mean conversion rate and Partial dependence plot"
    fig.update_layout(
        title={
            "text": title,
            "y": 0.9,
            "x": 0.5,
            "xanchor": "center",
            "yanchor": "top",
        },
        legend=dict(x=0.65, y=0.9),
        xaxis_title=var_name,
        yaxis_title=target_name,
        yaxis_range=[0, 0.25],
        autosize=False,
        width=800,
        height=500,
        plot_bgcolor="rgb(240, 240, 240)",
    )

    return fig.show()
Ejemplo n.º 3
0
 def test_feature_onehot_incomplete(self, titanic_data):
     with pytest.raises(ValueError):
         _ = _check_feature(feature=["Embarked_C"], df=titanic_data)
Ejemplo n.º 4
0
 def test_feature_not_exist(self, titanic_data):
     with pytest.raises(ValueError):
         _ = _check_feature(feature="gender", df=titanic_data)
Ejemplo n.º 5
0
 def test_feature_onehot(self, titanic_data):
     feature_type = _check_feature(
         feature=["Embarked_C", "Embarked_S", "Embarked_Q"],
         df=titanic_data)
     assert feature_type == "onehot"
Ejemplo n.º 6
0
 def test_feature_numeric(self, titanic_data):
     feature_type = _check_feature(feature="Fare", df=titanic_data)
     assert feature_type == "numeric"
Ejemplo n.º 7
0
 def test_feature_binary(self, titanic_data):
     feature_type = _check_feature(feature="Sex", df=titanic_data)
     assert feature_type == "binary"
Ejemplo n.º 8
0
 def test_feature_onehot_not_exist(self, titanic_data):
     with pytest.raises(ValueError):
         _ = _check_feature(feature=['Embarked_C', 'Embarked_S', 'Embarked_Q', 'Embarked_F'], df=titanic_data)
Ejemplo n.º 9
0
 def test_feature_numeric(self, titanic_data):
     feature_type = _check_feature(feature='Fare', df=titanic_data)
     assert feature_type == 'numeric'
Ejemplo n.º 10
0
 def test_feature_onehot(self, titanic_data):
     feature_type = _check_feature(feature=['Embarked_C', 'Embarked_S', 'Embarked_Q'], df=titanic_data)
     assert feature_type == 'onehot'
Ejemplo n.º 11
0
 def test_feature_binary(self, titanic_data):
     feature_type = _check_feature(feature='Sex', df=titanic_data)
     assert feature_type == 'binary'