Exemple #1
0
    def save(self, results_path, model_subpath):
        start_time = time.time()
        model_path = os.path.join(results_path, model_subpath)
        logger.info(f"Save the model {model_path}")

        type_of_predictions = (
            "validation" if "k_folds" not in self.validation_params else "out_of_folds"
        )
        predictions_fname = os.path.join(
            model_subpath, f"predictions_{type_of_predictions}.csv"
        )
        self._oof_predictions_fname = os.path.join(results_path, predictions_fname)
        predictions = self.get_out_of_folds()
        predictions.to_csv(self._oof_predictions_fname, index=False)

        saved = [os.path.join(model_subpath, l.get_fname()) for l in self.learners]

        with open(os.path.join(model_path, "framework.json"), "w") as fout:
            preprocessing = [p.to_json() for p in self.preprocessings]
            learners_params = [learner.get_params() for learner in self.learners]
            desc = {
                "uid": self.uid,
                "name": self._name,
                "preprocessing": preprocessing,
                "learners": learners_params,
                "params": self.params,
                "saved": saved,
                "predictions_fname": predictions_fname,
                "metric_name": self.get_metric_name(),
                "final_loss": self.get_final_loss(),
                "train_time": self.get_train_time(),
                "is_stacked": self._is_stacked,
            }
            if self._threshold is not None:
                desc["threshold"] = self._threshold
            fout.write(json.dumps(desc, indent=4))

        learning_curve_metric = self.learners[0].get_metric_name()
        if learning_curve_metric is None:
            learning_curve_metric = self.get_metric_name()

        LearningCurves.plot(
            [l.name for l in self.learners],
            learning_curve_metric,
            model_path,
            trees_in_iteration=self.additional_params.get("trees_in_step"),
        )

        # call additional metics just to be sure they are computed
        self._additional_metrics = self.get_additional_metrics()

        AdditionalMetrics.save(
            self._additional_metrics, self._ml_task, self.model_markdown(), model_path
        )

        with open(os.path.join(model_path, "status.txt"), "w") as fout:
            fout.write("ALL OK!")
        # I'm adding save time to total train time
        # there is always save after the training
        self.train_time += time.time() - start_time
Exemple #2
0
    def save(self, model_path):
        logger.info(f"Save the ensemble to {model_path}")

        with open(os.path.join(model_path, "ensemble.json"), "w") as fout:
            ms = []
            for selected in self.selected_models:
                ms += [{"model": selected["model"]._name, "repeat": selected["repeat"]}]

            desc = {
                "name": self._name,
                "ml_task": self._ml_task,
                "optimize_metric": self._optimize_metric,
                "selected_models": ms,
            }

            if self._threshold is not None:
                desc["threshold"] = self._threshold
            fout.write(json.dumps(desc, indent=4))

        predictions = self.get_out_of_folds()
        predictions.to_csv(
            os.path.join(model_path, f"predictions_ensemble.csv"), index=False
        )

        LearningCurves.plot_for_ensemble(self._scores, self.metric.name, model_path)

        self._additional_metrics = self.get_additional_metrics()

        AdditionalMetrics.save(
            self._additional_metrics, self._ml_task, self.model_markdown(), model_path
        )

        with open(os.path.join(model_path, "status.txt"), "w") as fout:
            fout.write("ALL OK!")
Exemple #3
0
    def save(self, model_path):
        start_time = time.time()
        logger.info(f"Save the model {model_path}")

        type_of_predictions = ("validation"
                               if "k_folds" not in self.validation_params else
                               "out_of_folds")
        predictions_fname = os.path.join(
            model_path, f"predictions_{type_of_predictions}.csv")
        predictions = self.get_out_of_folds()
        predictions.to_csv(predictions_fname, index=False)

        saved = []
        for i, l in enumerate(self.learners):
            p = os.path.join(model_path, f"learner_{i+1}.{l.file_extension()}")
            # l.save(p)
            saved += [p]

        with open(os.path.join(model_path, "framework.json"), "w") as fout:
            preprocessing = [p.to_json() for p in self.preprocessings]
            learners_params = [
                learner.get_params() for learner in self.learners
            ]
            desc = {
                "uid": self.uid,
                "name": self._name,
                "preprocessing": preprocessing,
                "learners": learners_params,
                "params": self.params,
                "saved": saved,
                "predictions_fname": predictions_fname,
                "metric_name": self.get_metric_name(),
                "final_loss": self.get_final_loss(),
                "train_time": self.get_train_time(),
                "is_stacked": self._is_stacked,
            }
            if self._threshold is not None:
                desc["threshold"] = self._threshold
            fout.write(json.dumps(desc, indent=4))

        LearningCurves.plot(
            self.validation.get_n_splits(),
            self.get_metric_name(),
            model_path,
            trees_in_iteration=self.additional_params.get("trees_in_step"),
        )

        self._additional_metrics = self.get_additional_metrics()

        AdditionalMetrics.save(self._additional_metrics, self._ml_task,
                               self.model_markdown(), model_path)

        with open(os.path.join(model_path, "status.txt"), "w") as fout:
            fout.write("ALL OK!")
        # I'm adding save time to total train time
        # there is always save after the training
        self.train_time += time.time() - start_time
    def test_plot_close(self):
        """
        Test if we close plots. To avoid following warning:
        RuntimeWarning: More than 20 figures have been opened.
        Figures created through the pyplot interface (`matplotlib.pyplot.figure`)
        are retained until explicitly closed and may consume too much memory.
        """
        for _ in range(
                1
        ):  # you can increase the range, for tests speed reason I keep it low
            LearningCurves.plot_for_ensemble([3, 2, 1], "random_metrics", ".")

        os.remove(LearningCurves.output_file_name)
    def save(self, results_path, model_subpath):
        model_path = os.path.join(results_path, model_subpath)
        logger.info(f"Save the ensemble to {model_path}")

        predictions = self.get_out_of_folds()
        predictions_fname = os.path.join(model_subpath,
                                         f"predictions_ensemble.csv")
        self._oof_predictions_fname = os.path.join(results_path,
                                                   predictions_fname)
        predictions.to_csv(self._oof_predictions_fname, index=False)

        with open(os.path.join(model_path, "ensemble.json"), "w") as fout:
            ms = []
            for selected in self.selected_models:
                ms += [{
                    "model": selected["model"]._name,
                    "repeat": selected["repeat"]
                }]

            desc = {
                "name": self._name,
                "ml_task": self._ml_task,
                "optimize_metric": self._optimize_metric,
                "selected_models": ms,
                "predictions_fname": predictions_fname,
                "metric_name": self.get_metric_name(),
                "final_loss": self.get_final_loss(),
                "train_time": self.get_train_time(),
                "is_stacked": self._is_stacked,
            }

            if self._threshold is not None:
                desc["threshold"] = self._threshold
            fout.write(json.dumps(desc, indent=4))

        LearningCurves.plot_for_ensemble(self._scores, self.metric.name,
                                         model_path)

        # call additional metics just to be sure they are computed
        self._additional_metrics = self.get_additional_metrics()

        AdditionalMetrics.save(self._additional_metrics, self._ml_task,
                               self.model_markdown(), model_path)

        with open(os.path.join(model_path, "status.txt"), "w") as fout:
            fout.write("ALL OK!")
Exemple #6
0
    def save(self, model_path):
        logger.info(f"Save the model {model_path}")

        saved = []
        for i, l in enumerate(self.learners):
            p = os.path.join(model_path,
                             f"learner_{i+1}.{l.file_extenstion()}")
            l.save(p)
            saved += [p]

        with open(os.path.join(model_path, "framework.json"), "w") as fout:
            preprocessing = [p.to_json() for p in self.preprocessings]
            learners_params = [
                learner.get_params() for learner in self.learners
            ]
            desc = {
                "uid": self.uid,
                "name": self._name,
                "preprocessing": preprocessing,
                "learners": learners_params,
                "params": self.params,
                "saved": saved,
            }
            if self._threshold is not None:
                desc["threshold"] = self._threshold
            fout.write(json.dumps(desc, indent=4))

        type_of_predictions = ("validation"
                               if "k_folds" not in self.validation_params else
                               "out_of_folds")
        predictions = self.get_out_of_folds()
        predictions.to_csv(
            os.path.join(model_path, f"predictions_{type_of_predictions}.csv"),
            index=False,
        )

        LearningCurves.plot(self.validation.get_n_splits(),
                            self.get_metric_name(), model_path)

        self._additional_metrics = self.get_additional_metrics()

        AdditionalMetrics.save(self._additional_metrics, self._ml_task,
                               self.model_markdown(), model_path)

        with open(os.path.join(model_path, "status.txt"), "w") as fout:
            fout.write("ALL OK!")