예제 #1
0
def run(dataset: Dataset, config: TaskConfig):
    log.info("\n**** Oboe ****\n")

    is_classification = config.type == 'classification'
    if not is_classification:
        # regression currently fails (as of 26.02.2019: still under development state by oboe team)
        raise ValueError('Regression is not yet supported (under development).')

    X_train, X_test = impute(dataset.train.X_enc, dataset.test.X_enc)
    y_train, y_test = dataset.train.y_enc, dataset.test.y_enc

    training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')}
    n_cores = config.framework_params.get('_n_cores', config.cores)

    log.info('Running oboe with a maximum time of {}s on {} cores.'.format(config.max_runtime_seconds, n_cores))
    log.warning('We completely ignore the advice to optimize towards metric: {}.'.format(config.metric))

    aml = AutoLearner(p_type='classification' if is_classification else 'regression',
                      n_cores=n_cores,
                      runtime_limit=config.max_runtime_seconds,
                      **training_params)

    aml_models = lambda: [aml.ensemble, *aml.ensemble.base_learners] if len(aml.ensemble.base_learners) > 0 else []

    with Timer() as training:
        try:
            aml.fit(X_train, y_train)
        except IndexError as e:
            if len(aml_models()) == 0:  # incorrect handling of some IndexError in oboe if ensemble is empty
                raise NoResultError("Oboe could not produce any model in the requested time.") from e
            raise e

    predictions = aml.predict(X_test).reshape(len(X_test))

    if is_classification:
        target_values_enc = dataset.target.label_encoder.transform(dataset.target.values)
        probabilities = Encoder('one-hot', target=False, encoded_type=float).fit(target_values_enc).transform(predictions)
    else:
        probabilities = None

    save_predictions_to_file(dataset=dataset,
                             output_file=config.output_predictions_file,
                             probabilities=probabilities,
                             predictions=predictions,
                             truth=y_test,
                             target_is_encoded=True)

    return dict(
        models_count=len(aml_models()),
        training_duration=training.duration
    )
예제 #2
0
def run_in_venv(caller_file,
                script_file: str,
                *args,
                input_data: Union[dict, ns],
                dataset: Dataset,
                config: TaskConfig,
                process_results=None,
                python_exec=None):

    here = dir_of(caller_file)
    venv_bin_path = os.path.join(here, 'venv', 'bin')
    if python_exec is None:  # use local virtual env by default
        python_exec = os.path.join(venv_bin_path, 'python -W ignore')
    script_path = os.path.join(here, script_file)
    cmd = f"{python_exec} {script_path}"

    input_data = ns.from_dict(input_data)
    with TemporaryDirectory() as tmpdir:

        def make_path(k, v, parents=None):
            if isinstance(v, np.ndarray):
                path = os.path.join(tmpdir, '.'.join(parents + [k, 'npy']))
                if vector_keys.match(k):
                    v = v.reshape(-1, 1)
                np.save(path, v, allow_pickle=True)
                return k, path
            return k, v

        ds = ns.walk(input_data, make_path)
        dataset.release()

        config.result_dir = tmpdir
        config.result_file = mktemp(dir=tmpdir)

        params = json_dumps(dict(dataset=ds, config=config), style='compact')
        with Timer() as proc_timer:
            output, err = run_cmd(
                cmd,
                *args,
                _input_str_=params,
                _live_output_=True,
                _error_level_=logging.DEBUG,
                _env_=dict(PATH=os.pathsep.join(
                    [venv_bin_path, os.environ['PATH']]),
                           PYTHONPATH=os.pathsep.join([
                               rconfig().root_dir,
                           ]),
                           AMLB_PATH=os.path.join(rconfig().root_dir, "amlb")),
            )

        res = ns(lambda: None)
        if os.path.exists(config.result_file):
            res = json_load(config.result_file, as_namespace=True)

        log.debug("Result from subprocess:\n%s", res)

        if not res:
            raise NoResultError(f"Process crashed:\n{err}")

        if res.error_message is not None:
            raise NoResultError(res.error_message)

        for name in ['predictions', 'truth', 'probabilities']:
            res[name] = np.load(
                res[name],
                allow_pickle=True) if res[name] is not None else None

        if callable(process_results):
            res = process_results(res)

        if res.output_file:
            save_predictions(
                dataset=dataset,
                output_file=res.output_file,
                predictions=res.predictions.reshape(-1)
                if res.predictions is not None else None,
                truth=res.truth.reshape(-1) if res.truth is not None else None,
                probabilities=res.probabilities,
                probabilities_labels=res.probabilities_labels,
                target_is_encoded=res.target_is_encoded)

        return dict(models_count=res.models_count
                    if res.models_count is not None else 1,
                    training_duration=res.training_duration if
                    res.training_duration is not None else proc_timer.duration,
                    predict_duration=res.predict_duration,
                    **res.others.__dict__)
예제 #3
0
def run_in_venv(caller_file,
                script_file: str,
                *args,
                input_data: Union[dict, ns],
                dataset: Dataset,
                config: TaskConfig,
                process_results=None,
                python_exec=None):

    here = dir_of(caller_file)
    if python_exec is None:  # use local virtual env by default
        python_exec = os.path.join(here, 'venv/bin/python -W ignore')
    script_path = os.path.join(here, script_file)
    cmd = f"{python_exec} {script_path}"

    input_data = ns.from_dict(input_data)
    with TmpDir() as tmpdir:

        def make_path(k, v, parents=None):
            if isinstance(v, np.ndarray):
                path = os.path.join(tmpdir, '.'.join(parents + [k, 'npy']))
                if vector_keys.match(k):
                    v = v.reshape(-1, 1)
                np.save(path, v, allow_pickle=True)
                return k, path
            return k, v

        ds = ns.walk(input_data, make_path)
        dataset.release()

        config.result_token = str(uuid.uuid1())
        config.result_dir = tmpdir

        params = json_dumps(dict(dataset=ds, config=config), style='compact')
        with Timer() as proc_timer:
            output, err = run_cmd(cmd,
                                  *args,
                                  _input_str_=params,
                                  _live_output_=True,
                                  _env_=dict(PYTHONPATH=os.pathsep.join([
                                      rconfig().root_dir,
                                      os.path.join(rconfig().root_dir, "amlb"),
                                  ])))

        out = io.StringIO(output)
        res = ns()
        for line in out:
            li = line.rstrip()
            if li == config.result_token:
                res = json_loads(out.readline(), as_namespace=True)
                break

        if res.error_message is not None:
            raise NoResultError(res.error_message)

        for name in ['predictions', 'truth', 'probabilities']:
            res[name] = np.load(
                res[name],
                allow_pickle=True) if res[name] is not None else None

        log.debug("Result from subprocess:\n%s", res)
        if callable(process_results):
            res = process_results(res)

        save_predictions_to_file(
            dataset=dataset,
            output_file=res.output_file,
            predictions=res.predictions.reshape(-1)
            if res.predictions is not None else None,
            truth=res.truth.reshape(-1) if res.truth is not None else None,
            probabilities=res.probabilities,
            target_is_encoded=res.target_is_encoded)

        return dict(models_count=res.models_count
                    if res.models_count is not None else 1,
                    training_duration=res.training_duration if
                    res.training_duration is not None else proc_timer.duration)
예제 #4
0
def run(dataset: Dataset, config: TaskConfig):
    log.info(f"\n**** AutoWEKA [v{config.framework_version}]****\n")
    save_metadata(config)

    is_classification = config.type == 'classification'
    if not is_classification:
        raise ValueError('Regression is not supported.')

    # Mapping of benchmark metrics to Weka metrics
    metrics_mapping = dict(acc='errorRate',
                           auc='areaUnderROC',
                           logloss='kBInformation')
    metric = metrics_mapping[
        config.metric] if config.metric in metrics_mapping else None
    if metric is None:
        raise ValueError("Performance metric {} not supported.".format(
            config.metric))

    train_file = dataset.train.path
    test_file = dataset.test.path
    # Weka to requires target as the last attribute
    if dataset.target.index != len(dataset.predictors):
        train_file = reorder_dataset(dataset.train.path,
                                     target_src=dataset.target.index)
        test_file = reorder_dataset(dataset.test.path,
                                    target_src=dataset.target.index)

    training_params = {
        k: v
        for k, v in config.framework_params.items() if not k.startswith('_')
    }
    parallelRuns = config.framework_params.get('_parallelRuns', config.cores)

    memLimit = config.framework_params.get('_memLimit', 'auto')
    if memLimit == 'auto':
        memLimit = max(
            min(config.max_mem_size_mb,
                math.ceil(config.max_mem_size_mb / parallelRuns)),
            1024)  # AutoWEKA default memLimit
    log.info("Using %sMB memory per run on %s parallel runs.", memLimit,
             parallelRuns)

    f = split_path(config.output_predictions_file)
    f.extension = '.weka_pred.csv'
    weka_file = path_from_split(f)
    cmd_root = "java -cp {here}/lib/autoweka/autoweka.jar weka.classifiers.meta.AutoWEKAClassifier ".format(
        here=dir_of(__file__))
    cmd_params = dict(
        t='"{}"'.format(train_file),
        T='"{}"'.format(test_file),
        memLimit=memLimit,
        classifications=
        '"weka.classifiers.evaluation.output.prediction.CSV -distribution -file \\\"{}\\\""'
        .format(weka_file),
        timeLimit=int(config.max_runtime_seconds / 60),
        parallelRuns=parallelRuns,
        metric=metric,
        seed=config.seed % (1 << 16),  # weka accepts only int16 as seeds
        **training_params)
    cmd = cmd_root + ' '.join(
        ["-{} {}".format(k, v) for k, v in cmd_params.items()])
    with Timer() as training:
        run_cmd(cmd, _live_output_=True)

    # if target values are not sorted alphabetically in the ARFF file, then class probabilities are returned in the original order
    # interestingly, other frameworks seem to always sort the target values first
    # that's why we need to specify the probabilities labels here: sorting+formatting is done in saving function
    probabilities_labels = dataset.target.values
    if not os.path.exists(weka_file):
        raise NoResultError("AutoWEKA failed producing any prediction.")
    with open(weka_file, 'r') as weka_file:
        probabilities = []
        predictions = []
        truth = []
        for line in weka_file.readlines()[1:-1]:
            inst, actual, predicted, error, *distribution = line.split(',')
            pred_probabilities = [
                pred_probability.replace('*', '').replace('\n', '')
                for pred_probability in distribution
            ]
            _, pred = predicted.split(':')
            _, tru = actual.split(':')
            probabilities.append(pred_probabilities)
            predictions.append(pred)
            truth.append(tru)

    save_predictions(dataset=dataset,
                     output_file=config.output_predictions_file,
                     probabilities=probabilities,
                     predictions=predictions,
                     truth=truth,
                     probabilities_labels=probabilities_labels)

    return dict(training_duration=training.duration)
예제 #5
0
def run(dataset: Dataset, config: TaskConfig):
    log.info(f"\n**** MLNet [v{config.framework_version}] ****\n")

    avaible_task_list = ['classification', 'regression']
    if config.type not in avaible_task_list:
        raise ValueError(f'{config.type} is not supported.')

    dir_path = os.path.dirname(os.path.realpath(__file__))
    DOTNET_INSTALL_DIR = os.path.join(dir_path, 'lib')
    os.environ['DOTNET_ROOT'] = DOTNET_INSTALL_DIR
    os.environ['MLNetCLIEnablePredict'] = 'True'
    os.environ['MLNET_MAX_THREAD'] = str(config.cores)
    mlnet = os.path.join(DOTNET_INSTALL_DIR, 'mlnet')
    train_time_in_seconds = config.max_runtime_seconds
    sub_command = config.type

    # set up MODELBUILDER_AUTOML
    MODELBUILDER_AUTOML = config.framework_params.get('automl_type', 'NNI')
    os.environ['MODELBUILDER_AUTOML'] = MODELBUILDER_AUTOML

    artifacts = config.framework_params.get('_save_artifacts', [])
    tmpdir = tempfile.mkdtemp()
    tmp_output_folder = os.path.join(tmpdir, str(config.fold))
    output_dir = output_subdir(
        'models',
        config=config) if 'models' in artifacts else tmp_output_folder
    log_dir = output_subdir(
        'logs', config=config) if 'logs' in artifacts else tmp_output_folder
    log_path = os.path.join(log_dir, 'log.txt')

    try:
        label = dataset.target.index
        train_dataset_path = dataset.train.data_path('csv')
        test_dataset_path = dataset.test.data_path('csv')

        log.info(f'train dataset: {train_dataset_path}')
        log.info(f'test dataset: {test_dataset_path}')

        cmd = (
            f"{mlnet} {sub_command}"
            f" --dataset {train_dataset_path} --test-dataset {test_dataset_path} --train-time {train_time_in_seconds}"
            f" --label-col {label} --output {os.path.dirname(output_dir)} --name {config.fold}"
            f" --verbosity q --log-file-path {log_path}")

        with Timer() as training:
            run_cmd(cmd)

        train_result_json = os.path.join(output_dir,
                                         '{}.mbconfig'.format(config.fold))
        if not os.path.exists(train_result_json):
            raise NoResultError("MLNet failed producing any prediction.")

        with open(train_result_json, 'r') as f:
            json_str = f.read()
            mb_config = json.loads(json_str)
            model_path = os.path.join(output_dir, f"{config.fold}.zip")
            output_prediction_path = os.path.join(
                log_dir, "prediction.txt"
            )  # keeping this in log dir as it contains useful error when prediction fails
            models_count = len(mb_config['RunHistory']['Trials'])
            # predict
            predict_cmd = (
                f"{mlnet} predict --task-type {config.type}"
                f" --model {model_path} --dataset {test_dataset_path} --label-col {dataset.target.name} > {output_prediction_path}"
            )
            with Timer() as prediction:
                run_cmd(predict_cmd)
            if config.type == 'classification':
                prediction_df = pd.read_csv(output_prediction_path,
                                            dtype={'PredictedLabel': 'object'})

                save_predictions(
                    dataset=dataset,
                    output_file=config.output_predictions_file,
                    predictions=prediction_df['PredictedLabel'].values,
                    truth=dataset.test.y,
                    probabilities=prediction_df.values[:, :-1],
                    probabilities_labels=list(
                        prediction_df.columns.values[:-1]),
                )

            if config.type == 'regression':
                prediction_df = pd.read_csv(output_prediction_path)
                save_predictions(
                    dataset=dataset,
                    output_file=config.output_predictions_file,
                    predictions=prediction_df['Score'].values,
                    truth=dataset.test.y,
                )

            return dict(
                models_count=models_count,
                training_duration=training.duration,
                predict_duration=prediction.duration,
            )
    finally:
        if 'logs' in artifacts:
            logs_zip = os.path.join(log_dir, "logs.zip")
            zip_path(log_dir, logs_zip)
            clean_dir(log_dir, filter_=lambda p: p != logs_zip)
        if 'models' in artifacts:
            models_zip = os.path.join(output_dir, "models.zip")
            zip_path(output_dir, models_zip)
            clean_dir(output_dir, filter_=lambda p: p != models_zip)

        shutil.rmtree(tmpdir, ignore_errors=True)
예제 #6
0
def run(dataset: Dataset, config: TaskConfig):
    log.info("\n**** H2O AutoML ****\n")
    # Mapping of benchmark metrics to H2O metrics
    metrics_mapping = dict(acc='mean_per_class_error',
                           auc='AUC',
                           logloss='logloss',
                           mae='mae',
                           mse='mse',
                           r2='r2',
                           rmse='rmse',
                           rmsle='rmsle')
    sort_metric = metrics_mapping[
        config.metric] if config.metric in metrics_mapping else None
    if sort_metric is None:
        # TODO: Figure out if we are going to blindly pass metrics through, or if we use a strict mapping
        log.warning("Performance metric %s not supported, defaulting to AUTO.",
                    config.metric)

    try:
        training_params = {
            k: v
            for k, v in config.framework_params.items()
            if not k.startswith('_')
        }
        nthreads = config.framework_params.get('_nthreads', config.cores)
        jvm_memory = str(
            round(config.max_mem_size_mb * 2 /
                  3)) + "M"  # leaving 1/3rd of available memory for XGBoost

        log.info("Starting H2O cluster with %s cores, %s memory.", nthreads,
                 jvm_memory)
        max_port_range = 49151
        min_port_range = 1024
        rnd_port = os.getpid() % (max_port_range -
                                  min_port_range) + min_port_range
        port = config.framework_params.get('_port', rnd_port)

        h2o.init(
            nthreads=nthreads,
            port=port,
            min_mem_size=jvm_memory,
            max_mem_size=jvm_memory,
            strict_version_check=config.framework_params.get(
                '_strict_version_check', True)
            # log_dir=os.path.join(config.output_dir, 'logs', config.name, str(config.fold))
        )

        # Load train as an H2O Frame, but test as a Pandas DataFrame
        log.debug("Loading train data from %s.", dataset.train.path)
        train = h2o.import_file(dataset.train.path,
                                destination_frame=frame_name('train', config))
        # train.impute(method='mean')
        log.debug("Loading test data from %s.", dataset.test.path)
        test = h2o.import_file(dataset.test.path,
                               destination_frame=frame_name('test', config))
        # test.impute(method='mean')

        log.info("Running model on task %s, fold %s.", config.name,
                 config.fold)
        log.debug(
            "Running H2O AutoML with a maximum time of %ss on %s core(s), optimizing %s.",
            config.max_runtime_seconds, config.cores, sort_metric)

        aml = H2OAutoML(
            max_runtime_secs=config.max_runtime_seconds,
            max_runtime_secs_per_model=round(
                config.max_runtime_seconds /
                2),  # to prevent timeout on ensembles
            sort_metric=sort_metric,
            seed=config.seed,
            **training_params)

        monitor = (
            BackendMemoryMonitoring(
                frequency_seconds=rconfig().monitoring.frequency_seconds,
                check_on_exit=True,
                verbosity=rconfig().monitoring.verbosity)
            if config.framework_params.get('_monitor_backend', False)
            # else contextlib.nullcontext  # Py 3.7+ only
            else contextlib.contextmanager(iter)([0]))
        with Timer() as training:
            with monitor:
                aml.train(y=dataset.target.index, training_frame=train)

        if not aml.leader:
            raise NoResultError(
                "H2O could not produce any model in the requested time.")

        save_predictions(aml, test, dataset=dataset, config=config)
        save_artifacts(aml, dataset=dataset, config=config)

        return dict(models_count=len(aml.leaderboard),
                    training_duration=training.duration)

    finally:
        if h2o.connection():
            # h2o.remove_all()
            h2o.connection().close()
        if h2o.connection().local_server:
            h2o.connection().local_server.shutdown()
예제 #7
0
def run(dataset: Dataset, config: TaskConfig):
    log.info("\n**** H2O AutoML ****\n")
    # Mapping of benchmark metrics to H2O metrics
    metrics_mapping = dict(acc='mean_per_class_error',
                           auc='AUC',
                           logloss='logloss',
                           mae='mae',
                           mse='mse',
                           rmse='rmse',
                           rmsle='rmsle')
    sort_metric = metrics_mapping[
        config.metric] if config.metric in metrics_mapping else None
    if sort_metric is None:
        # TODO: Figure out if we are going to blindly pass metrics through, or if we use a strict mapping
        log.warning("Performance metric %s not supported, defaulting to AUTO.",
                    config.metric)

    try:
        training_params = {
            k: v
            for k, v in config.framework_params.items()
            if not k.startswith('_')
        }
        nthreads = config.framework_params.get('_nthreads', config.cores)

        log.info("Starting H2O cluster with %s cores, %sMB memory.", nthreads,
                 config.max_mem_size_mb)
        h2o.init(
            nthreads=nthreads,
            min_mem_size=str(config.max_mem_size_mb) + "M",
            max_mem_size=str(config.max_mem_size_mb) + "M",
            # log_dir=os.path.join(config.output_dir, 'logs', config.name, str(config.fold))
        )

        # Load train as an H2O Frame, but test as a Pandas DataFrame
        log.debug("Loading train data from %s.", dataset.train.path)
        train = h2o.import_file(dataset.train.path,
                                destination_frame=frame_name('train', config))
        # train.impute(method='mean')
        log.debug("Loading test data from %s.", dataset.test.path)
        test = h2o.import_file(dataset.test.path,
                               destination_frame=frame_name('test', config))
        # test.impute(method='mean')

        log.info("Running model on task %s, fold %s.", config.name,
                 config.fold)
        log.debug(
            "Running H2O AutoML with a maximum time of %ss on %s core(s), optimizing %s.",
            config.max_runtime_seconds, config.cores, sort_metric)

        aml = H2OAutoML(max_runtime_secs=config.max_runtime_seconds,
                        sort_metric=sort_metric,
                        seed=config.seed,
                        **training_params)

        with Timer() as training:
            aml.train(y=dataset.target.index, training_frame=train)

        if not aml.leader:
            raise NoResultError(
                "H2O could not produce any model in the requested time.")

        save_predictions(aml, test, dataset=dataset, config=config)
        save_artifacts(aml, dataset=dataset, config=config)

        return dict(models_count=len(aml.leaderboard),
                    training_duration=training.duration)

    finally:
        if h2o.connection():
            h2o.remove_all()
            h2o.connection().close()
        if h2o.connection().local_server:
            h2o.connection().local_server.shutdown()
예제 #8
0
def run_in_venv(caller_file,
                script_file: str,
                *args,
                input_data: Union[dict, ns],
                dataset: Dataset,
                config: TaskConfig,
                options: Union[None, dict, ns] = None,
                process_results=None,
                python_exec=None):
    here = dir_of(caller_file)
    if python_exec is None:  # use local virtual env by default
        python_exec = venv_python_exec(here)
    script_path = os.path.join(here, script_file)
    cmd = f"{python_exec} {script_path}"

    options = ns.from_dict(options) if options else ns()
    ser_config = options['serialization']
    env = options['env'] or ns()

    with TemporaryDirectory() as tmpdir:

        ds = _make_input_dataset(input_data,
                                 dataset,
                                 tmpdir,
                                 serialization=ser_config)

        config.result_dir = tmpdir
        config.result_file = mktemp(dir=tmpdir)

        params = json_dumps(dict(dataset=ds, config=config, options=options),
                            style='compact')
        log.debug("Params passed to subprocess:\n%s", params)
        cmon = rconfig().monitoring
        monitor = (dict(interval_seconds=cmon.interval_seconds,
                        verbosity=cmon.verbosity)
                   if 'sub_proc_memory' in cmon.statistics else None)
        env = dict(PATH=os.pathsep.join([venv_bin(here), os.environ['PATH']]),
                   PYTHONPATH=os.pathsep.join([
                       rconfig().root_dir,
                   ]),
                   AMLB_PATH=os.path.join(rconfig().root_dir, "amlb"),
                   AMLB_LOG_TRACE=str(
                       logging.TRACE if hasattr(logging, 'TRACE') else ''),
                   **{k: str(v)
                      for k, v in env})

        with Timer() as proc_timer:
            output, err = run_cmd(cmd,
                                  *args,
                                  _input_str_=params,
                                  _live_output_=True,
                                  _error_level_=logging.DEBUG,
                                  _env_=env,
                                  _monitor_=monitor)

        res = ns(lambda: None)
        if os.path.exists(config.result_file):
            res = json_load(config.result_file, as_namespace=True)

        log.debug("Result from subprocess:\n%s", res)

        if not res:
            raise NoResultError(f"Process crashed:\n{err}")

        if res.error_message is not None:
            raise NoResultError(res.error_message)

        for name in ['predictions', 'truth', 'probabilities']:
            res[name] = deserialize_data(
                res[name],
                config=ser_config) if res[name] is not None else None

        if callable(process_results):
            res = process_results(res)

        if res.output_file:
            save_predictions(dataset=dataset,
                             output_file=res.output_file,
                             predictions=as_vec(res.predictions),
                             truth=(as_vec(res.truth) if res.truth is not None
                                    else dataset.test.y_enc if
                                    res.target_is_encoded else dataset.test.y),
                             probabilities=res.probabilities,
                             probabilities_labels=res.probabilities_labels,
                             target_is_encoded=res.target_is_encoded)

        return dict(models_count=res.models_count
                    if res.models_count is not None else 1,
                    training_duration=res.training_duration if
                    res.training_duration is not None else proc_timer.duration,
                    predict_duration=res.predict_duration,
                    **res.others.__dict__)
예제 #9
0
def run(dataset: Dataset, config: TaskConfig):
    log.info("\n**** H2O AutoML ****\n")
    # Mapping of benchmark metrics to H2O metrics
    metrics_mapping = dict(acc='mean_per_class_error',
                           auc='AUC',
                           logloss='logloss',
                           mae='mae',
                           mse='mse',
                           rmse='rmse',
                           rmsle='rmsle')
    sort_metric = metrics_mapping[
        config.metric] if config.metric in metrics_mapping else None
    if sort_metric is None:
        # TODO: Figure out if we are going to blindly pass metrics through, or if we use a strict mapping
        log.warning("Performance metric %s not supported, defaulting to AUTO.",
                    config.metric)

    try:
        training_params = {
            k: v
            for k, v in config.framework_params.items()
            if not k.startswith('_')
        }
        nthreads = config.framework_params.get('_nthreads', config.cores)

        log.info("Starting H2O cluster with %s cores, %sMB memory.", nthreads,
                 config.max_mem_size_mb)
        h2o.init(nthreads=nthreads,
                 min_mem_size=str(config.max_mem_size_mb) + "M",
                 max_mem_size=str(config.max_mem_size_mb) + "M",
                 log_dir=os.path.join(config.output_dir, 'logs', config.name,
                                      str(config.fold)))

        # Load train as an H2O Frame, but test as a Pandas DataFrame
        log.debug("Loading train data from %s.", dataset.train.path)
        train = h2o.import_file(dataset.train.path)
        # train.impute(method='mean')
        log.debug("Loading test data from %s.", dataset.test.path)
        test = h2o.import_file(dataset.test.path)
        # test.impute(method='mean')

        log.info("Running model on task %s, fold %s.", config.name,
                 config.fold)
        log.debug(
            "Running H2O AutoML with a maximum time of %ss on %s core(s), optimizing %s.",
            config.max_runtime_seconds, config.cores, sort_metric)

        aml = H2OAutoML(max_runtime_secs=config.max_runtime_seconds,
                        sort_metric=sort_metric,
                        seed=config.seed,
                        **training_params)

        with Timer() as training:
            aml.train(y=dataset.target.index, training_frame=train)

        if not aml.leader:
            raise NoResultError(
                "H2O could not produce any model in the requested time.")

        lb = aml.leaderboard.as_data_frame()
        log.debug("Leaderboard:\n%s", lb.to_string())
        lbf = split_path(config.output_predictions_file)
        lbf.extension = '.leaderboard.csv'
        lbf = path_from_split(lbf)
        write_csv(lb, lbf)

        h2o_preds = aml.predict(test).as_data_frame(use_pandas=False)
        preds = to_data_frame(h2o_preds[1:], columns=h2o_preds[0])
        y_pred = preds.iloc[:, 0]

        h2o_truth = test[:,
                         dataset.target.index].as_data_frame(use_pandas=False,
                                                             header=False)
        y_truth = to_data_frame(h2o_truth)

        predictions = y_pred.values
        probabilities = preds.iloc[:, 1:].values
        truth = y_truth.values

        save_predictions_to_file(dataset=dataset,
                                 output_file=config.output_predictions_file,
                                 probabilities=probabilities,
                                 predictions=predictions,
                                 truth=truth)

        return dict(models_count=len(aml.leaderboard),
                    training_duration=training.duration)

    finally:
        if h2o.connection():
            h2o.remove_all()
            h2o.connection().close()
        if h2o.connection().local_server:
            h2o.connection().local_server.shutdown()