예제 #1
0
    def save(self, model_path):
        logger.info(f"Save the ensemble to {model_path}")

        with open(os.path.join(model_path, "ensemble.json"), "w") as fout:
            ms = []
            for selected in self.selected_models:
                ms += [{"model": selected["model"]._name, "repeat": selected["repeat"]}]

            desc = {
                "name": self._name,
                "ml_task": self._ml_task,
                "optimize_metric": self._optimize_metric,
                "selected_models": ms,
            }

            if self._threshold is not None:
                desc["threshold"] = self._threshold
            fout.write(json.dumps(desc, indent=4))

        predictions = self.get_out_of_folds()
        predictions.to_csv(
            os.path.join(model_path, f"predictions_ensemble.csv"), index=False
        )

        LearningCurves.plot_for_ensemble(self._scores, self.metric.name, model_path)

        self._additional_metrics = self.get_additional_metrics()

        AdditionalMetrics.save(
            self._additional_metrics, self._ml_task, self.model_markdown(), model_path
        )

        with open(os.path.join(model_path, "status.txt"), "w") as fout:
            fout.write("ALL OK!")
    def test_plot_close(self):
        """
        Test if we close plots. To avoid following warning:
        RuntimeWarning: More than 20 figures have been opened.
        Figures created through the pyplot interface (`matplotlib.pyplot.figure`)
        are retained until explicitly closed and may consume too much memory.
        """
        for _ in range(
                1
        ):  # you can increase the range, for tests speed reason I keep it low
            LearningCurves.plot_for_ensemble([3, 2, 1], "random_metrics", ".")

        os.remove(LearningCurves.output_file_name)
예제 #3
0
    def save(self, results_path, model_subpath):
        model_path = os.path.join(results_path, model_subpath)
        logger.info(f"Save the ensemble to {model_path}")

        predictions = self.get_out_of_folds()
        predictions_fname = os.path.join(model_subpath,
                                         f"predictions_ensemble.csv")
        self._oof_predictions_fname = os.path.join(results_path,
                                                   predictions_fname)
        predictions.to_csv(self._oof_predictions_fname, index=False)

        with open(os.path.join(model_path, "ensemble.json"), "w") as fout:
            ms = []
            for selected in self.selected_models:
                ms += [{
                    "model": selected["model"]._name,
                    "repeat": selected["repeat"]
                }]

            desc = {
                "name": self._name,
                "ml_task": self._ml_task,
                "optimize_metric": self._optimize_metric,
                "selected_models": ms,
                "predictions_fname": predictions_fname,
                "metric_name": self.get_metric_name(),
                "final_loss": self.get_final_loss(),
                "train_time": self.get_train_time(),
                "is_stacked": self._is_stacked,
            }

            if self._threshold is not None:
                desc["threshold"] = self._threshold
            fout.write(json.dumps(desc, indent=4))

        LearningCurves.plot_for_ensemble(self._scores, self.metric.name,
                                         model_path)

        # call additional metics just to be sure they are computed
        self._additional_metrics = self.get_additional_metrics()

        AdditionalMetrics.save(self._additional_metrics, self._ml_task,
                               self.model_markdown(), model_path)

        with open(os.path.join(model_path, "status.txt"), "w") as fout:
            fout.write("ALL OK!")