Exemple #1
0
    def test_vDF_score(self, base, titanic_vd):
        from verticapy.learn.linear_model import LogisticRegression

        model = LogisticRegression(
            name="public.LR_titanic",
            cursor=base.cursor,
            tol=1e-4,
            C=1.0,
            max_iter=100,
            solver="CGD",
            l1_ratio=0.5,
        )

        model.drop()  # dropping the model in case of its existance
        model.fit("public.titanic", ["fare", "age"], "survived")
        model.predict(titanic_vd, name="survived_pred")

        # Computing AUC
        auc = titanic_vd.score(y_true="survived",
                               y_score="survived_pred",
                               method="auc")
        assert auc == pytest.approx(0.697476274)

        # Computing MSE
        mse = titanic_vd.score(y_true="survived",
                               y_score="survived_pred",
                               method="mse")
        assert mse == pytest.approx(0.224993557)

        # Drawing ROC Curve
        roc_res = titanic_vd.score(y_true="survived",
                                   y_score="survived_pred",
                                   method="roc")
        assert roc_res["threshold"][3] == 0.003
        assert roc_res["false_positive"][3] == 1.0
        assert roc_res["true_positive"][3] == 1.0
        assert roc_res["threshold"][300] == 0.3
        assert roc_res["false_positive"][300] == pytest.approx(0.9900826446)
        assert roc_res["true_positive"][300] == pytest.approx(0.9974424552)
        assert roc_res["threshold"][900] == 0.9
        assert roc_res["false_positive"][900] == pytest.approx(0.01818181818)
        assert roc_res["true_positive"][900] == pytest.approx(0.06649616368)

        # Drawing PRC Curve
        prc_res = titanic_vd.score(y_true="survived",
                                   y_score="survived_pred",
                                   method="prc")
        assert prc_res["threshold"][3] == 0.002
        assert prc_res["recall"][3] == 1.0
        assert prc_res["precision"][3] == pytest.approx(0.3925702811)
        assert prc_res["threshold"][300] == 0.299
        assert prc_res["recall"][300] == pytest.approx(1.0)
        assert prc_res["precision"][300] == pytest.approx(0.3949494949)
        assert prc_res["threshold"][900] == 0.899
        assert prc_res["recall"][900] == pytest.approx(0.06649616368)
        assert prc_res["precision"][900] == pytest.approx(0.7027027027)

        # dropping the created model
        model.drop()
 def test_contour(self, titanic_vd):
     model_test = LogisticRegression("model_contour",)
     model_test.drop()
     model_test.fit(
         titanic_vd, ["age", "fare"], "survived",
     )
     result = model_test.contour()
     assert len(result.get_default_bbox_extra_artists()) == 38
     model_test.drop()
    def test_vDF_score(self, base, titanic_vd):
        from verticapy.learn.linear_model import LogisticRegression

        model = LogisticRegression(
            name="public.LR_titanic",
            cursor=base.cursor,
            tol=1e-4,
            C=1.0,
            max_iter=100,
            solver="CGD",
            penalty="ENet",
            l1_ratio=0.5,
        )

        model.drop()  # dropping the model in case of its existance
        model.fit("public.titanic", ["fare", "age"], "survived")
        model.predict(titanic_vd, name="survived_pred")

        # Computing AUC
        auc = titanic_vd.score(y_true="survived", y_score="survived_pred", method="auc")
        assert auc == pytest.approx(0.7051784997146537)

        # Computing MSE
        mse = titanic_vd.score(y_true="survived", y_score="survived_pred", method="mse")
        assert mse == pytest.approx(0.228082579110535)

        # Drawing ROC Curve
        roc_res = titanic_vd.score(
            y_true="survived", y_score="survived_pred", method="roc", nbins=1000,
        )
        assert roc_res["threshold"][3] == 0.003
        assert roc_res["false_positive"][3] == 1.0
        assert roc_res["true_positive"][3] == 1.0
        assert roc_res["threshold"][300] == 0.3
        assert roc_res["false_positive"][300] == pytest.approx(1.0)
        assert roc_res["true_positive"][300] == pytest.approx(1.0)
        assert roc_res["threshold"][900] == 0.9
        assert roc_res["false_positive"][900] == pytest.approx(0.0148760330578512)
        assert roc_res["true_positive"][900] == pytest.approx(0.061381074168798)

        # Drawing PRC Curve
        prc_res = titanic_vd.score(
            y_true="survived", y_score="survived_pred", method="prc", nbins=1000,
        )
        assert prc_res["threshold"][3] == 0.002
        assert prc_res["recall"][3] == 1.0
        assert prc_res["precision"][3] == pytest.approx(0.3925702811)
        assert prc_res["threshold"][300] == 0.299
        assert prc_res["recall"][300] == pytest.approx(1.0)
        assert prc_res["precision"][300] == pytest.approx(0.392570281124498)
        assert prc_res["threshold"][900] == 0.899
        assert prc_res["recall"][900] == pytest.approx(0.061381074168798)
        assert prc_res["precision"][900] == pytest.approx(0.727272727272727)

        # dropping the created model
        model.drop()
    def test_model_from_vDF(self, base, titanic_vd):
        base.cursor.execute("DROP MODEL IF EXISTS logreg_from_vDF")
        model_test = LogisticRegression("logreg_from_vDF", cursor=base.cursor)
        model_test.fit(titanic_vd, ["age", "fare"], "survived")

        base.cursor.execute(
            "SELECT model_name FROM models WHERE model_name = 'logreg_from_vDF'"
        )
        assert base.cursor.fetchone()[0] == "logreg_from_vDF"

        model_test.drop()
Exemple #5
0
 def test_get_plot(self, base, winequality_vd):
     # 1D
     base.cursor.execute("DROP MODEL IF EXISTS model_test_plot")
     model_test = LogisticRegression("model_test_plot", cursor=base.cursor)
     model_test.fit(winequality_vd, ["alcohol"], "good")
     result = model_test.plot(color="r")
     assert len(result.get_default_bbox_extra_artists()) == 11
     plt.close("all")
     model_test.drop()
     # 2D
     model_test.fit(winequality_vd, ["alcohol", "residual_sugar"], "good")
     result = model_test.plot(color="r")
     assert len(result.get_default_bbox_extra_artists()) == 5
     plt.close("all")
     model_test.drop()
Exemple #6
0
    def test_drop(self, base):
        base.cursor.execute("DROP MODEL IF EXISTS logreg_model_test_drop")
        model_test = LogisticRegression("logreg_model_test_drop", cursor=base.cursor)
        model_test.fit("public.titanic", ["age", "fare"], "survived")

        base.cursor.execute(
            "SELECT model_name FROM models WHERE model_name = 'logreg_model_test_drop'"
        )
        assert base.cursor.fetchone()[0] == "logreg_model_test_drop"

        model_test.drop()
        base.cursor.execute(
            "SELECT model_name FROM models WHERE model_name = 'logreg_model_test_drop'"
        )
        assert base.cursor.fetchone() is None
    def test_drop(self):
        current_cursor().execute("DROP MODEL IF EXISTS logreg_model_test_drop")
        model_test = LogisticRegression("logreg_model_test_drop",)
        model_test.fit("public.titanic", ["age", "fare"], "survived")

        current_cursor().execute(
            "SELECT model_name FROM models WHERE model_name = 'logreg_model_test_drop'"
        )
        assert current_cursor().fetchone()[0] == "logreg_model_test_drop"

        model_test.drop()
        current_cursor().execute(
            "SELECT model_name FROM models WHERE model_name = 'logreg_model_test_drop'"
        )
        assert current_cursor().fetchone() is None
def train(churn):
    drop(name = "public.churn_model")
    model = LogisticRegression("churn_model", 
                           penalty = 'L2', 
                           tol = 1e-6, 
                           max_iter = 1000, 
                           solver = "BFGS")
    # print("Running cross_validate function\n")                       
    # cross_validate(model, churn, churn.get_columns(exclude_columns = ["churn"]), 'churn')
    print("Fitting logistic regression model...")
    model.fit(churn, 
          churn.get_columns(exclude_columns = ["churn", "customerID"]), 
          'churn')      
    print("Success! demo.churn_model created")
    print("Model AUC: " + str(model.score(method="auc")) + '\n')
    # Begin Predict
    model.predict(churn,
                  X = churn.get_columns(exclude_columns = ["churn", "customerID"]),
                  name = 'pred_probs')
    churn.sort({"pred_probs":"desc"})       
    churn['pred_probs'].dropna()
    return(churn)
def model(base, titanic_vd):
    base.cursor.execute("DROP MODEL IF EXISTS logreg_model_test")
    model_class = LogisticRegression("logreg_model_test", cursor=base.cursor)
    model_class.fit("public.titanic", ["age", "fare"], "survived")
    yield model_class
    model_class.drop()
def model(titanic_vd):
    model_class = LogisticRegression("logreg_model_test",)
    model_class.drop()
    model_class.fit("public.titanic", ["age", "fare"], "survived")
    yield model_class
    model_class.drop()