Exemple #1
0
def _train_pipeline(model: str, destination: str, ignore_prints: bool,
                    ignore_html: bool) -> None:
    click.echo(f"Train and save pipeline in {destination}")
    X, y = load_dataset()
    train_pipeline(
        X=X,
        y=y,
        model=model,
        destination=destination,
        ignore_prints=ignore_prints,
        ignore_html=ignore_html,
    )
Exemple #2
0
def _evaluate_pipeline(pipeline: str, threshold: str, prefix: str,
                       destination: str) -> None:
    click.echo(
        f"Evaluate and save pipeline performance metrics in {destination}")
    X, y = load_dataset()
    evaluate_pipeline(
        X=X,
        y=y,
        pipeline=pipeline,
        threshold=threshold,
        prefix=prefix,
        destination=destination,
    )
Exemple #3
0
    def test_evaluate_pipeline(self):
        runner = CliRunner()
        pattern = "/*.joblib"
        X, y = load_dataset()

        dummy_pipeline = Pipeline(
            [("dummy_classifier", DummyClassifier(strategy="constant", constant=0))]
        )

        with tempfile.TemporaryDirectory() as destination:
            threshold = destination + "/DUMMY_threshold.json"
            train_pipeline(
                X=X,
                y=y,
                model="DUMMY",
                pipeline=dummy_pipeline,
                destination=destination,
                ignore_prints=True,
                ignore_html=True,
            )
            pipeline_path = glob.glob(destination + pattern)
            runner.invoke(
                main,
                [
                    "evaluate",
                    "--pipeline",
                    pipeline_path[0],
                    "--threshold",
                    threshold,
                    "--prefix",
                    "DUMMY",
                    "--destination",
                    destination,
                ],
            )
            files = glob.glob(destination + "/*")
            self.assertTrue(any([".png" in file for file in files]))
            self.assertTrue(any([".json" in file for file in files]))
            self.assertTrue(any([".csv" in file for file in files]))
Exemple #4
0
    def test_train_pipeline(self):
        X, y = load_dataset()
        pattern = "/*.joblib"

        dummy_pipeline = Pipeline([("dummy_classifier",
                                    DummyClassifier(strategy="constant",
                                                    constant=0))])
        with tempfile.TemporaryDirectory() as destination:
            train_pipeline(
                X=X,
                y=y,
                model="XGBOOST",
                destination=destination,
                ignore_prints=True,
                ignore_html=True,
            )
            train_pipeline(
                X=X,
                y=y,
                model="RF",
                destination=destination,
                ignore_prints=True,
                ignore_html=True,
            )
            train_pipeline(
                X=X,
                y=y,
                model="DUMMY",
                pipeline=dummy_pipeline,
                destination=destination,
                ignore_prints=True,
                ignore_html=True,
            )
            files = glob.glob(destination + pattern)
            self.assertTrue(any(["RF" in file for file in files]))
            self.assertTrue(any(["XGBOOST" in file for file in files]))
            self.assertTrue(any(["DUMMY" in file for file in files]))
Exemple #5
0
 def test_load(self):
     with tempfile.TemporaryDirectory() as destination:
         with mock.patch("pyro_risks.config.DATA_REGISTRY", destination):
             dataset_path = os.path.join(destination, cfg.DATASET)
             load_dataset()
             self.assertTrue(os.path.isfile(dataset_path))