예제 #1
0
def save_artifacts(predictor, leaderboard, config):
    artifacts = config.framework_params.get('_save_artifacts', ['leaderboard'])
    try:
        models_dir = output_subdir("models", config)
        shutil.rmtree(os.path.join(models_dir, "utils"), ignore_errors=True)

        if 'leaderboard' in artifacts:
            save_pd.save(path=os.path.join(models_dir, "leaderboard.csv"),
                         df=leaderboard)

        if 'info' in artifacts:
            ag_info = predictor.info()
            info_dir = output_subdir("info", config)
            save_pkl.save(path=os.path.join(info_dir, "info.pkl"),
                          object=ag_info)

        if 'models' in artifacts:
            utils.zip_path(models_dir, os.path.join(models_dir, "models.zip"))

        def delete(path, isdir):
            if isdir:
                shutil.rmtree(path, ignore_errors=True)
            elif os.path.splitext(path)[1] == '.pkl':
                os.remove(path)

        utils.walk_apply(models_dir, delete, max_depth=0)

    except Exception:
        log.warning("Error when saving artifacts.", exc_info=True)
예제 #2
0
def save_artifacts(estimator, config):
    try:
        models_repr = estimator.show_models()
        log.debug("Trained Ensemble:\n%s", models_repr)
        artifacts = config.framework_params.get('_save_artifacts', [])
        if 'models' in artifacts:
            models_file = os.path.join(output_subdir('models', config),
                                       'models.txt')
            with open(models_file, 'w') as f:
                f.write(models_repr)
        if 'debug_as_files' in artifacts or 'debug_as_zip' in artifacts:
            print('Saving debug artifacts!')
            debug_dir = output_subdir('debug', config)
            ignore_extensions = [
                '.npy', '.pcs', '.model', '.cv_model', '.ensemble', '.pkl'
            ]
            tmp_directory = estimator.automl_._backend.temporary_directory
            if 'debug_as_files' in artifacts:

                def _copy(filename, **_):
                    dst = filename.replace(tmp_directory, debug_dir + '/')
                    os.makedirs(os.path.dirname(dst), exist_ok=True)
                    shutil.copyfile(filename, dst)

                utils.walk_apply(
                    tmp_directory,
                    _copy,
                    filtr=lambda path:
                    (os.path.splitext(path)[1] not in ignore_extensions and
                     not os.path.isdir(path)),
                )
            else:
                utils.zip_path(tmp_directory,
                               os.path.join(debug_dir, "artifacts.zip"),
                               filtr=lambda p: os.path.splitext(p)[1] not in
                               ignore_extensions)
    except Exception as e:
        log.debug("Error when saving artifacts= {e}.".format(e), exc_info=True)
예제 #3
0
def save_artifacts(automl, dataset, config):
    artifacts = config.framework_params.get('_save_artifacts', ['leaderboard'])
    try:
        lb = automl.leaderboard.as_data_frame()
        log.debug("Leaderboard:\n%s", lb.to_string())
        if 'leaderboard' in artifacts:
            models_dir = output_subdir("models", config)
            write_csv(lb, os.path.join(models_dir, "leaderboard.csv"))
        if 'models' in artifacts:
            models_dir = output_subdir("models", config)
            all_models_se = next(
                (mid for mid in lb['model_id']
                 if mid.startswith("StackedEnsemble_AllModels")), None)
            mformat = 'mojo' if 'mojos' in artifacts else 'json'
            if all_models_se and mformat == 'mojo':
                save_model(all_models_se, dest_dir=models_dir, mformat=mformat)
            else:
                for mid in lb['model_id']:
                    save_model(mid, dest_dir=models_dir, mformat=mformat)
                models_archive = os.path.join(models_dir, "models.zip")
                utils.zip_path(models_dir, models_archive)

                def delete(path, isdir):
                    if path != models_archive and os.path.splitext(
                            path)[1] in ['.json', '.zip']:
                        os.remove(path)

                utils.walk_apply(models_dir, delete, max_depth=0)

        if 'models_predictions' in artifacts:
            predictions_dir = output_subdir("predictions", config)
            test = h2o.get_frame(frame_name('test', config))
            for mid in lb['model_id']:
                model = h2o.get_model(mid)
                h2o_preds = model.predict(test)
                preds = extract_preds(h2o_preds, test, dataset=dataset)
                if preds.probabilities_labels is None:
                    preds.probabilities_labels = preds.h2o_labels
                write_preds(
                    preds, os.path.join(predictions_dir, mid,
                                        'predictions.csv'))
            utils.zip_path(
                predictions_dir,
                os.path.join(predictions_dir, "models_predictions.zip"))

            def delete(path, isdir):
                if isdir:
                    shutil.rmtree(path, ignore_errors=True)

            utils.walk_apply(predictions_dir, delete, max_depth=0)

        if 'logs' in artifacts:
            logs_dir = output_subdir("logs", config)
            logs_zip = os.path.join(logs_dir, "h2o_logs.zip")
            utils.zip_path(logs_dir, logs_zip)

            # h2o.download_all_logs(dirname=logs_dir)

            def delete(path, isdir):
                if isdir:
                    shutil.rmtree(path, ignore_errors=True)
                elif path != logs_zip:
                    os.remove(path)

            utils.walk_apply(logs_dir, delete, max_depth=0)
    except Exception:
        log.debug("Error when saving artifacts.", exc_info=True)
예제 #4
0
def save_artifacts(automl, dataset, config):
    artifacts = config.framework_params.get('_save_artifacts', ['leaderboard'])
    try:
        models_artifacts = []
        lb = automl.leaderboard.as_data_frame()
        log.debug("Leaderboard:\n%s", lb.to_string())
        if 'leaderboard' in artifacts:
            models_dir = output_subdir("models", config)
            lb_path = os.path.join(models_dir, "leaderboard.csv")
            write_csv(lb, lb_path)
            models_artifacts.append(lb_path)

        models_pat = re.compile(r"models(\[(json|binary|mojo)(?:,(\d+))?\])?")
        models = list(filter(models_pat.fullmatch, artifacts))
        for m in models:
            models_dir = output_subdir("models", config)
            all_models_se = next(
                (mid for mid in lb['model_id']
                 if mid.startswith("StackedEnsemble_AllModels")), None)
            match = models_pat.fullmatch(m)
            mformat = match.group(2) or 'json'
            topN = int(match.group(3) or -1)
            if topN < 0 and mformat != 'json' and all_models_se:
                models_artifacts.append(
                    save_model(all_models_se,
                               dest_dir=models_dir,
                               mformat=mformat))
            else:
                count = 0
                for mid in lb['model_id']:
                    if topN < 0 or count < topN:
                        save_model(mid, dest_dir=models_dir, mformat=mformat)
                        count += 1
                    else:
                        break

                models_archive = os.path.join(models_dir,
                                              f"models_{mformat}.zip")
                utils.zip_path(models_dir,
                               models_archive,
                               filtr=lambda p: p not in models_artifacts)
                models_artifacts.append(models_archive)

                def delete(path, isdir):
                    if not isdir and path not in models_artifacts and os.path.splitext(
                            path)[1] in ['.json', '.zip', '']:
                        os.remove(path)

                utils.walk_apply(models_dir, delete, max_depth=0)

        if 'model_predictions' in artifacts:
            predictions_dir = output_subdir("predictions", config)
            test = h2o.get_frame(frame_name('test', config))
            for mid in lb['model_id']:
                model = h2o.get_model(mid)
                h2o_preds = model.predict(test)
                preds = extract_preds(h2o_preds, test, dataset=dataset)
                if preds.probabilities_labels is None:
                    preds.probabilities_labels = preds.h2o_labels
                write_preds(
                    preds, os.path.join(predictions_dir, mid,
                                        'predictions.csv'))
            utils.zip_path(
                predictions_dir,
                os.path.join(predictions_dir, "model_predictions.zip"))

            def delete(path, isdir):
                if isdir:
                    shutil.rmtree(path, ignore_errors=True)

            utils.walk_apply(predictions_dir, delete, max_depth=0)

        if 'logs' in artifacts:
            logs_dir = output_subdir("logs", config)
            logs_zip = os.path.join(logs_dir, "h2o_logs.zip")
            utils.zip_path(logs_dir, logs_zip)

            # h2o.download_all_logs(dirname=logs_dir)

            def delete(path, isdir):
                if isdir:
                    shutil.rmtree(path, ignore_errors=True)
                elif path != logs_zip:
                    os.remove(path)

            utils.walk_apply(logs_dir, delete, max_depth=0)
    except Exception:
        log.debug("Error when saving artifacts.", exc_info=True)