def save(self, model_path): logger.info(f"Save the ensemble to {model_path}") with open(os.path.join(model_path, "ensemble.json"), "w") as fout: ms = [] for selected in self.selected_models: ms += [{"model": selected["model"]._name, "repeat": selected["repeat"]}] desc = { "name": self._name, "ml_task": self._ml_task, "optimize_metric": self._optimize_metric, "selected_models": ms, } if self._threshold is not None: desc["threshold"] = self._threshold fout.write(json.dumps(desc, indent=4)) predictions = self.get_out_of_folds() predictions.to_csv( os.path.join(model_path, f"predictions_ensemble.csv"), index=False ) LearningCurves.plot_for_ensemble(self._scores, self.metric.name, model_path) self._additional_metrics = self.get_additional_metrics() AdditionalMetrics.save( self._additional_metrics, self._ml_task, self.model_markdown(), model_path ) with open(os.path.join(model_path, "status.txt"), "w") as fout: fout.write("ALL OK!")
def test_plot_close(self): """ Test if we close plots. To avoid following warning: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. """ for _ in range( 1 ): # you can increase the range, for tests speed reason I keep it low LearningCurves.plot_for_ensemble([3, 2, 1], "random_metrics", ".") os.remove(LearningCurves.output_file_name)
def save(self, results_path, model_subpath): model_path = os.path.join(results_path, model_subpath) logger.info(f"Save the ensemble to {model_path}") predictions = self.get_out_of_folds() predictions_fname = os.path.join(model_subpath, f"predictions_ensemble.csv") self._oof_predictions_fname = os.path.join(results_path, predictions_fname) predictions.to_csv(self._oof_predictions_fname, index=False) with open(os.path.join(model_path, "ensemble.json"), "w") as fout: ms = [] for selected in self.selected_models: ms += [{ "model": selected["model"]._name, "repeat": selected["repeat"] }] desc = { "name": self._name, "ml_task": self._ml_task, "optimize_metric": self._optimize_metric, "selected_models": ms, "predictions_fname": predictions_fname, "metric_name": self.get_metric_name(), "final_loss": self.get_final_loss(), "train_time": self.get_train_time(), "is_stacked": self._is_stacked, } if self._threshold is not None: desc["threshold"] = self._threshold fout.write(json.dumps(desc, indent=4)) LearningCurves.plot_for_ensemble(self._scores, self.metric.name, model_path) # call additional metics just to be sure they are computed self._additional_metrics = self.get_additional_metrics() AdditionalMetrics.save(self._additional_metrics, self._ml_task, self.model_markdown(), model_path) with open(os.path.join(model_path, "status.txt"), "w") as fout: fout.write("ALL OK!")