def save_artifacts(predictor, leaderboard, config): artifacts = config.framework_params.get('_save_artifacts', ['leaderboard']) try: models_dir = output_subdir("models", config) shutil.rmtree(os.path.join(models_dir, "utils"), ignore_errors=True) if 'leaderboard' in artifacts: save_pd.save(path=os.path.join(models_dir, "leaderboard.csv"), df=leaderboard) if 'info' in artifacts: ag_info = predictor.info() info_dir = output_subdir("info", config) save_pkl.save(path=os.path.join(info_dir, "info.pkl"), object=ag_info) if 'models' in artifacts: utils.zip_path(models_dir, os.path.join(models_dir, "models.zip")) def delete(path, isdir): if isdir: shutil.rmtree(path, ignore_errors=True) elif os.path.splitext(path)[1] == '.pkl': os.remove(path) utils.walk_apply(models_dir, delete, max_depth=0) except Exception: log.warning("Error when saving artifacts.", exc_info=True)
def save_artifacts(estimator, config): try: models_repr = estimator.show_models() log.debug("Trained Ensemble:\n%s", models_repr) artifacts = config.framework_params.get('_save_artifacts', []) if 'models' in artifacts: models_file = os.path.join(output_subdir('models', config), 'models.txt') with open(models_file, 'w') as f: f.write(models_repr) if 'debug_as_files' in artifacts or 'debug_as_zip' in artifacts: print('Saving debug artifacts!') debug_dir = output_subdir('debug', config) ignore_extensions = [ '.npy', '.pcs', '.model', '.cv_model', '.ensemble', '.pkl' ] tmp_directory = estimator.automl_._backend.temporary_directory if 'debug_as_files' in artifacts: def _copy(filename, **_): dst = filename.replace(tmp_directory, debug_dir + '/') os.makedirs(os.path.dirname(dst), exist_ok=True) shutil.copyfile(filename, dst) utils.walk_apply( tmp_directory, _copy, filtr=lambda path: (os.path.splitext(path)[1] not in ignore_extensions and not os.path.isdir(path)), ) else: utils.zip_path(tmp_directory, os.path.join(debug_dir, "artifacts.zip"), filtr=lambda p: os.path.splitext(p)[1] not in ignore_extensions) except Exception as e: log.debug("Error when saving artifacts= {e}.".format(e), exc_info=True)
def save_artifacts(automl, dataset, config): artifacts = config.framework_params.get('_save_artifacts', ['leaderboard']) try: lb = automl.leaderboard.as_data_frame() log.debug("Leaderboard:\n%s", lb.to_string()) if 'leaderboard' in artifacts: models_dir = output_subdir("models", config) write_csv(lb, os.path.join(models_dir, "leaderboard.csv")) if 'models' in artifacts: models_dir = output_subdir("models", config) all_models_se = next( (mid for mid in lb['model_id'] if mid.startswith("StackedEnsemble_AllModels")), None) mformat = 'mojo' if 'mojos' in artifacts else 'json' if all_models_se and mformat == 'mojo': save_model(all_models_se, dest_dir=models_dir, mformat=mformat) else: for mid in lb['model_id']: save_model(mid, dest_dir=models_dir, mformat=mformat) models_archive = os.path.join(models_dir, "models.zip") utils.zip_path(models_dir, models_archive) def delete(path, isdir): if path != models_archive and os.path.splitext( path)[1] in ['.json', '.zip']: os.remove(path) utils.walk_apply(models_dir, delete, max_depth=0) if 'models_predictions' in artifacts: predictions_dir = output_subdir("predictions", config) test = h2o.get_frame(frame_name('test', config)) for mid in lb['model_id']: model = h2o.get_model(mid) h2o_preds = model.predict(test) preds = extract_preds(h2o_preds, test, dataset=dataset) if preds.probabilities_labels is None: preds.probabilities_labels = preds.h2o_labels write_preds( preds, os.path.join(predictions_dir, mid, 'predictions.csv')) utils.zip_path( predictions_dir, os.path.join(predictions_dir, "models_predictions.zip")) def delete(path, isdir): if isdir: shutil.rmtree(path, ignore_errors=True) utils.walk_apply(predictions_dir, delete, max_depth=0) if 'logs' in artifacts: logs_dir = output_subdir("logs", config) logs_zip = os.path.join(logs_dir, "h2o_logs.zip") utils.zip_path(logs_dir, logs_zip) # h2o.download_all_logs(dirname=logs_dir) def delete(path, isdir): if isdir: shutil.rmtree(path, ignore_errors=True) elif path != logs_zip: os.remove(path) utils.walk_apply(logs_dir, delete, max_depth=0) except Exception: log.debug("Error when saving artifacts.", exc_info=True)
def save_artifacts(automl, dataset, config): artifacts = config.framework_params.get('_save_artifacts', ['leaderboard']) try: models_artifacts = [] lb = automl.leaderboard.as_data_frame() log.debug("Leaderboard:\n%s", lb.to_string()) if 'leaderboard' in artifacts: models_dir = output_subdir("models", config) lb_path = os.path.join(models_dir, "leaderboard.csv") write_csv(lb, lb_path) models_artifacts.append(lb_path) models_pat = re.compile(r"models(\[(json|binary|mojo)(?:,(\d+))?\])?") models = list(filter(models_pat.fullmatch, artifacts)) for m in models: models_dir = output_subdir("models", config) all_models_se = next( (mid for mid in lb['model_id'] if mid.startswith("StackedEnsemble_AllModels")), None) match = models_pat.fullmatch(m) mformat = match.group(2) or 'json' topN = int(match.group(3) or -1) if topN < 0 and mformat != 'json' and all_models_se: models_artifacts.append( save_model(all_models_se, dest_dir=models_dir, mformat=mformat)) else: count = 0 for mid in lb['model_id']: if topN < 0 or count < topN: save_model(mid, dest_dir=models_dir, mformat=mformat) count += 1 else: break models_archive = os.path.join(models_dir, f"models_{mformat}.zip") utils.zip_path(models_dir, models_archive, filtr=lambda p: p not in models_artifacts) models_artifacts.append(models_archive) def delete(path, isdir): if not isdir and path not in models_artifacts and os.path.splitext( path)[1] in ['.json', '.zip', '']: os.remove(path) utils.walk_apply(models_dir, delete, max_depth=0) if 'model_predictions' in artifacts: predictions_dir = output_subdir("predictions", config) test = h2o.get_frame(frame_name('test', config)) for mid in lb['model_id']: model = h2o.get_model(mid) h2o_preds = model.predict(test) preds = extract_preds(h2o_preds, test, dataset=dataset) if preds.probabilities_labels is None: preds.probabilities_labels = preds.h2o_labels write_preds( preds, os.path.join(predictions_dir, mid, 'predictions.csv')) utils.zip_path( predictions_dir, os.path.join(predictions_dir, "model_predictions.zip")) def delete(path, isdir): if isdir: shutil.rmtree(path, ignore_errors=True) utils.walk_apply(predictions_dir, delete, max_depth=0) if 'logs' in artifacts: logs_dir = output_subdir("logs", config) logs_zip = os.path.join(logs_dir, "h2o_logs.zip") utils.zip_path(logs_dir, logs_zip) # h2o.download_all_logs(dirname=logs_dir) def delete(path, isdir): if isdir: shutil.rmtree(path, ignore_errors=True) elif path != logs_zip: os.remove(path) utils.walk_apply(logs_dir, delete, max_depth=0) except Exception: log.debug("Error when saving artifacts.", exc_info=True)