def save(self, results_path, model_subpath): start_time = time.time() model_path = os.path.join(results_path, model_subpath) logger.info(f"Save the model {model_path}") type_of_predictions = ( "validation" if "k_folds" not in self.validation_params else "out_of_folds" ) predictions_fname = os.path.join( model_subpath, f"predictions_{type_of_predictions}.csv" ) self._oof_predictions_fname = os.path.join(results_path, predictions_fname) predictions = self.get_out_of_folds() predictions.to_csv(self._oof_predictions_fname, index=False) saved = [os.path.join(model_subpath, l.get_fname()) for l in self.learners] with open(os.path.join(model_path, "framework.json"), "w") as fout: preprocessing = [p.to_json() for p in self.preprocessings] learners_params = [learner.get_params() for learner in self.learners] desc = { "uid": self.uid, "name": self._name, "preprocessing": preprocessing, "learners": learners_params, "params": self.params, "saved": saved, "predictions_fname": predictions_fname, "metric_name": self.get_metric_name(), "final_loss": self.get_final_loss(), "train_time": self.get_train_time(), "is_stacked": self._is_stacked, } if self._threshold is not None: desc["threshold"] = self._threshold fout.write(json.dumps(desc, indent=4)) learning_curve_metric = self.learners[0].get_metric_name() if learning_curve_metric is None: learning_curve_metric = self.get_metric_name() LearningCurves.plot( [l.name for l in self.learners], learning_curve_metric, model_path, trees_in_iteration=self.additional_params.get("trees_in_step"), ) # call additional metics just to be sure they are computed self._additional_metrics = self.get_additional_metrics() AdditionalMetrics.save( self._additional_metrics, self._ml_task, self.model_markdown(), model_path ) with open(os.path.join(model_path, "status.txt"), "w") as fout: fout.write("ALL OK!") # I'm adding save time to total train time # there is always save after the training self.train_time += time.time() - start_time
def save(self, model_path): logger.info(f"Save the ensemble to {model_path}") with open(os.path.join(model_path, "ensemble.json"), "w") as fout: ms = [] for selected in self.selected_models: ms += [{"model": selected["model"]._name, "repeat": selected["repeat"]}] desc = { "name": self._name, "ml_task": self._ml_task, "optimize_metric": self._optimize_metric, "selected_models": ms, } if self._threshold is not None: desc["threshold"] = self._threshold fout.write(json.dumps(desc, indent=4)) predictions = self.get_out_of_folds() predictions.to_csv( os.path.join(model_path, f"predictions_ensemble.csv"), index=False ) LearningCurves.plot_for_ensemble(self._scores, self.metric.name, model_path) self._additional_metrics = self.get_additional_metrics() AdditionalMetrics.save( self._additional_metrics, self._ml_task, self.model_markdown(), model_path ) with open(os.path.join(model_path, "status.txt"), "w") as fout: fout.write("ALL OK!")
def save(self, model_path): start_time = time.time() logger.info(f"Save the model {model_path}") type_of_predictions = ("validation" if "k_folds" not in self.validation_params else "out_of_folds") predictions_fname = os.path.join( model_path, f"predictions_{type_of_predictions}.csv") predictions = self.get_out_of_folds() predictions.to_csv(predictions_fname, index=False) saved = [] for i, l in enumerate(self.learners): p = os.path.join(model_path, f"learner_{i+1}.{l.file_extension()}") # l.save(p) saved += [p] with open(os.path.join(model_path, "framework.json"), "w") as fout: preprocessing = [p.to_json() for p in self.preprocessings] learners_params = [ learner.get_params() for learner in self.learners ] desc = { "uid": self.uid, "name": self._name, "preprocessing": preprocessing, "learners": learners_params, "params": self.params, "saved": saved, "predictions_fname": predictions_fname, "metric_name": self.get_metric_name(), "final_loss": self.get_final_loss(), "train_time": self.get_train_time(), "is_stacked": self._is_stacked, } if self._threshold is not None: desc["threshold"] = self._threshold fout.write(json.dumps(desc, indent=4)) LearningCurves.plot( self.validation.get_n_splits(), self.get_metric_name(), model_path, trees_in_iteration=self.additional_params.get("trees_in_step"), ) self._additional_metrics = self.get_additional_metrics() AdditionalMetrics.save(self._additional_metrics, self._ml_task, self.model_markdown(), model_path) with open(os.path.join(model_path, "status.txt"), "w") as fout: fout.write("ALL OK!") # I'm adding save time to total train time # there is always save after the training self.train_time += time.time() - start_time
def test_plot_close(self): """ Test if we close plots. To avoid following warning: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. """ for _ in range( 1 ): # you can increase the range, for tests speed reason I keep it low LearningCurves.plot_for_ensemble([3, 2, 1], "random_metrics", ".") os.remove(LearningCurves.output_file_name)
def save(self, results_path, model_subpath): model_path = os.path.join(results_path, model_subpath) logger.info(f"Save the ensemble to {model_path}") predictions = self.get_out_of_folds() predictions_fname = os.path.join(model_subpath, f"predictions_ensemble.csv") self._oof_predictions_fname = os.path.join(results_path, predictions_fname) predictions.to_csv(self._oof_predictions_fname, index=False) with open(os.path.join(model_path, "ensemble.json"), "w") as fout: ms = [] for selected in self.selected_models: ms += [{ "model": selected["model"]._name, "repeat": selected["repeat"] }] desc = { "name": self._name, "ml_task": self._ml_task, "optimize_metric": self._optimize_metric, "selected_models": ms, "predictions_fname": predictions_fname, "metric_name": self.get_metric_name(), "final_loss": self.get_final_loss(), "train_time": self.get_train_time(), "is_stacked": self._is_stacked, } if self._threshold is not None: desc["threshold"] = self._threshold fout.write(json.dumps(desc, indent=4)) LearningCurves.plot_for_ensemble(self._scores, self.metric.name, model_path) # call additional metics just to be sure they are computed self._additional_metrics = self.get_additional_metrics() AdditionalMetrics.save(self._additional_metrics, self._ml_task, self.model_markdown(), model_path) with open(os.path.join(model_path, "status.txt"), "w") as fout: fout.write("ALL OK!")
def save(self, model_path): logger.info(f"Save the model {model_path}") saved = [] for i, l in enumerate(self.learners): p = os.path.join(model_path, f"learner_{i+1}.{l.file_extenstion()}") l.save(p) saved += [p] with open(os.path.join(model_path, "framework.json"), "w") as fout: preprocessing = [p.to_json() for p in self.preprocessings] learners_params = [ learner.get_params() for learner in self.learners ] desc = { "uid": self.uid, "name": self._name, "preprocessing": preprocessing, "learners": learners_params, "params": self.params, "saved": saved, } if self._threshold is not None: desc["threshold"] = self._threshold fout.write(json.dumps(desc, indent=4)) type_of_predictions = ("validation" if "k_folds" not in self.validation_params else "out_of_folds") predictions = self.get_out_of_folds() predictions.to_csv( os.path.join(model_path, f"predictions_{type_of_predictions}.csv"), index=False, ) LearningCurves.plot(self.validation.get_n_splits(), self.get_metric_name(), model_path) self._additional_metrics = self.get_additional_metrics() AdditionalMetrics.save(self._additional_metrics, self._ml_task, self.model_markdown(), model_path) with open(os.path.join(model_path, "status.txt"), "w") as fout: fout.write("ALL OK!")