Example #1
0
    def evaluate_model(self, item, model, run_hash):
        from mlflow.tracking.client import MlflowClient
        from mlflow.entities import ViewType

        exp_name = self.runner.analysis_name + ':' + self.runner.current_pipeline_name + ':'
        exp_name += str(item.get('base', 'None')) + ':' + str(
            item['func']) + ':' + item['hash']

        client = MlflowClient()
        experiments = [
            exp for exp in client.list_experiments() if exp.name == exp_name
        ]
        if len(experiments) == 0 or len(experiments) > 1:
            raise ValueError('Unable to find the experiment.')
        experiment = experiments[0]

        run = client.search_runs(
            experiment_ids=experiment.experiment_id,
            filter_string='tags."mlflow.runName" = ' + "'" + run_hash + "'",
            run_view_type=ViewType.ACTIVE_ONLY,
            max_results=1,
        )[0]

        run_id = run.info.run_id
        Tracker.resume_run(run_id)

        process = self.get_process(item)

        for source in model.sources:
            source.load_files()
            source.load()
        model.load()

        print('Evaluating run ' + run_hash + '...')
        process.run_id = run_hash
        process.evaluate(model)

        Tracker.end_run()
Example #2
0
def search(data, max_runs, metric, algo):
    tracking_client = mlflow.tracking.MlflowClient()
    _inf = np.finfo(np.float64).max
    
    space = [
        hp.quniform('max_depth', 2, 12, 1)
        hp.quniform('min_samples_leaf', 2, 20, 1)
    ]
    
    with mlflow.start_run() as run:
        exp_id = run.info.experiment_id

        best = fmin(
          fn=train_fn(exp_id, _inf, _inf),
          space=space,
          algo=tpe.suggest if algo == "tpe.suggest" else rand.suggest,
          max_evals=max_runs
          )
        mlflow.set_tag("best params", str(best))
        # find all runs generated by this search
        client = MlflowClient()
        query = "tags.mlflow.parentRunId = '{run_id}' ".format(run_id=run.info.run_id)
        runs = client.search_runs([exp_id], query)
        # iterate over all runs to find best one
        best_train, best_valid = _inf, _inf
        best_run = None
        for r in runs:
            if r.data.metrics["val_auc"] < best_val_valid:
                best_run = r
                best_train = r.data.metrics["train_auc"]
                best_valid = r.data.metrics["val_auc"]
        # log best run metrics as the final metrics of this run.
        mlflow.set_tag("best_run", best_run.info.run_id)
        mlflow.log_metrics({
          "train_{}".format(metric): best_train,
          "val_{}".format(metric): best_valid
          })
Example #3
0
def run(training_data, max_runs, batch_size, max_p, epochs, metric, gpy_model,
        gpy_acquisition, initial_design, seed):
    bounds = [
        {
            'name': 'lr',
            'type': 'continuous',
            'domain': (1e-5, 1e-1)
        },
        {
            'name': 'momentum',
            'type': 'continuous',
            'domain': (0.0, 1.0)
        },
    ]
    # create random file to store run ids of the training tasks
    tracking_client = mlflow.tracking.MlflowClient()

    def new_eval(nepochs,
                 experiment_id,
                 null_train_loss,
                 null_valid_loss,
                 null_test_loss,
                 return_all=False):
        """
        Create a new eval function

        :param nepochs: Number of epochs to train the model.
        :experiment_id: Experiment id for the training run
        :valid_null_loss: Loss of a null model on the validation dataset
        :test_null_loss: Loss of a null model on the test dataset.
        :return_test_loss: Return both validation and test loss if set.

        :return: new eval function.
        """
        def eval(params):
            """
            Train Keras model with given parameters by invoking MLflow run.

            Notice we store runUuid and resulting metric in a file. We will later use these to pick
            the best run and to log the runUuids of the child runs as an artifact. This is a
            temporary workaround until MLflow offers better mechanism of linking runs together.

            :param params: Parameters to the train_keras script we optimize over:
                          learning_rate, drop_out_1
            :return: The metric value evaluated on the validation data.
            """
            lr, momentum = params[0]
            with mlflow.start_run(nested=True) as child_run:
                p = mlflow.projects.run(run_id=child_run.info.run_id,
                                        uri=".",
                                        entry_point="train",
                                        parameters={
                                            "training_data": training_data,
                                            "epochs": str(nepochs),
                                            "learning_rate": str(lr),
                                            "momentum": str(momentum),
                                            "seed": str(seed)
                                        },
                                        experiment_id=experiment_id,
                                        synchronous=False)
                succeeded = p.wait()
            if succeeded:
                training_run = tracking_client.get_run(p.run_id)
                metrics = training_run.data.metrics

                # cap the loss at the loss of the null model
                train_loss = min(null_valid_loss,
                                 metrics["train_{}".format(metric)])
                valid_loss = min(null_valid_loss,
                                 metrics["val_{}".format(metric)])
                test_loss = min(null_test_loss,
                                metrics["test_{}".format(metric)])
            else:
                # run failed => return null loss
                tracking_client.set_terminated(p.run_id, "FAILED")
                train_loss = null_train_loss
                valid_loss = null_valid_loss
                test_loss = null_test_loss

            mlflow.log_metrics({
                "train_{}".format(metric): train_loss,
                "val_{}".format(metric): valid_loss,
                "test_{}".format(metric): test_loss
            })

            if return_all:
                return train_loss, valid_loss, test_loss
            else:
                return valid_loss

        return eval

    with mlflow.start_run() as run:
        experiment_id = run.info.experiment_id
        # Evaluate null model first.
        # We use null model (predict everything to the mean) as a reasonable upper bound on loss.
        # We need an upper bound to handle the failed runs (e.g. return NaNs) because GPyOpt can not
        # handle Infs.
        # Always including a null model in our results is also a good ML practice.
        train_null_loss, valid_null_loss, test_null_loss = new_eval(
            0, experiment_id, _inf, _inf, _inf, True)(params=[[0, 0]])
        myProblem = GPyOpt.methods.BayesianOptimization(
            new_eval(epochs, experiment_id, train_null_loss, valid_null_loss,
                     test_null_loss),
            bounds,
            evaluator_type="local_penalization"
            if min(batch_size, max_p) > 1 else "sequential",
            batch_size=batch_size,
            num_cores=max_p,
            model_type=gpy_model,
            acquisition_type=gpy_acquisition,
            initial_design_type=initial_design,
            initial_design_numdata=max_runs >> 2,
            exact_feval=False)
        myProblem.run_optimization(max_runs)
        matplotlib.use('agg')
        plt.switch_backend('agg')
        with TempDir() as tmp:
            acquisition_plot = tmp.path("acquisition_plot.png")
            convergence_plot = tmp.path("convergence_plot.png")
            myProblem.plot_acquisition(filename=acquisition_plot)
            myProblem.plot_convergence(filename=convergence_plot)
            if os.path.exists(convergence_plot):
                mlflow.log_artifact(convergence_plot, "converegence_plot")
            if os.path.exists(acquisition_plot):
                mlflow.log_artifact(acquisition_plot, "acquisition_plot")

        # find the best run, log its metrics as the final metrics of this run.
        client = MlflowClient()
        runs = client.search_runs(
            [experiment_id], "tags.mlflow.parentRunId = '{run_id}' ".format(
                run_id=run.info.run_id))
        best_val_train = _inf
        best_val_valid = _inf
        best_val_test = _inf
        best_run = None
        for r in runs:
            if r.data.metrics["val_rmse"] < best_val_valid:
                best_run = r
                best_val_train = r.data.metrics["train_rmse"]
                best_val_valid = r.data.metrics["val_rmse"]
                best_val_test = r.data.metrics["test_rmse"]
        mlflow.set_tag("best_run", best_run.info.run_id)
        mlflow.log_metrics({
            "train_{}".format(metric): best_val_train,
            "val_{}".format(metric): best_val_valid,
            "test_{}".format(metric): best_val_test
        })
Example #4
0
def run(training_data, config_path, max_runs, max_p, metric, seed):
    val_metric = f"val_{metric}"

    np.random.seed(seed)
    tracking_client = mlflow.tracking.MlflowClient()

    def new_eval(experiment_id):
        def eval(parms):
            with mlflow.start_run(nested=True) as child_run:
                p = mlflow.projects.run(run_id=child_run.info.run_id,
                                        uri=".",
                                        entry_point="train",
                                        parameters={
                                            "training_data":
                                            training_data,
                                            "colsample_bytree":
                                            str(parms['colsample_bytree']),
                                            "subsample":
                                            str(parms['subsample']),
                                            "target-name":
                                            str(parms['target_name'])
                                        },
                                        experiment_id=experiment_id,
                                        synchronous=True,
                                        use_conda=False)
                succeeded = p.wait()
            if succeeded:
                training_run = tracking_client.get_run(p.run_id)
                metrics = training_run.data.metrics
                val_loss = metrics[val_metric]
            else:
                tracking_client.set_terminated(p.run_id, "FAILED")
                val_loss = _inf
            mlflow.log_metrics({
                val_metric: val_loss,
            })
            return p.run_id, val_loss

        return eval

    with mlflow.start_run() as run:
        experiment_id = run.info.experiment_id

        with ThreadPoolExecutor(max_workers=max_p) as executor:
            _ = executor.map(new_eval(experiment_id),
                             generate_configs(config_path, max_runs))

        # find the best run, log its metrics as the final metrics of this run.
        client = MlflowClient()
        runs = client.search_runs(
            [experiment_id], f"tags.mlflow.parentRunId = '{run.info.run_id}' ")

        print(runs)

        best_val_valid = _inf
        best_run = None
        for r in runs:
            if r.data.metrics[val_metric] < best_val_valid:
                best_run = r
                best_val_valid = r.data.metrics[val_metric]
        mlflow.set_tag("best_run", best_run.info.run_id)
        mlflow.log_metrics({
            "val_{}".format(metric): best_val_valid,
        })
Example #5
0
def train(training_data, max_runs, epochs, metric, algo, seed):
    """
    Run hyperparameter optimization.
    """
    # create random file to store run ids of the training tasks
    tracking_client = mlflow.tracking.MlflowClient()

    def new_eval(nepochs,
                 experiment_id,
                 null_train_loss,
                 null_valid_loss,
                 null_test_loss,
                 return_all=False):
        """
        Create a new eval function

        :param nepochs: Number of epochs to train the model.
        :experiment_id: Experiment id for the training run
        :valid_null_loss: Loss of a null model on the validation dataset
        :test_null_loss: Loss of a null model on the test dataset.
        :return_test_loss: Return both validation and test loss if set.

        :return: new eval function.
        """
        def eval(params):
            """
            Train Keras model with given parameters by invoking MLflow run.

            Notice we store runUuid and resulting metric in a file. We will later use these to pick
            the best run and to log the runUuids of the child runs as an artifact. This is a
            temporary workaround until MLflow offers better mechanism of linking runs together.

            :param params: Parameters to the train_keras script we optimize over:
                          learning_rate, drop_out_1
            :return: The metric value evaluated on the validation data.
            """
            import mlflow.tracking

            lr, momentum = params
            with mlflow.start_run(nested=True) as child_run:
                params = {
                    "training_data": training_data,
                    "epochs": str(nepochs),
                    "learning_rate": str(lr),
                    "momentum": str(momentum),
                    "seed": str(seed),
                }
                mlflow.log_params(params)
                p = mlflow.projects.run(
                    uri=".",
                    entry_point="train",
                    run_id=child_run.info.run_id,
                    parameters={
                        "training_data": training_data,
                        "epochs": str(nepochs),
                        "learning_rate": str(lr),
                        "momentum": str(momentum),
                        "seed": seed,
                    },
                    experiment_id=experiment_id,
                    use_conda=False,  # We are already in the environment
                    synchronous=
                    False,  # Allow the run to fail if a model is not properly created
                )
                succeeded = p.wait()
            if succeeded:
                training_run = tracking_client.get_run(p.run_id)
                metrics = training_run.data.metrics
                # cap the loss at the loss of the null model
                train_loss = min(null_train_loss,
                                 metrics["train_{}".format(metric)])
                valid_loss = min(null_valid_loss,
                                 metrics["val_{}".format(metric)])
                test_loss = min(null_test_loss,
                                metrics["test_{}".format(metric)])
            else:
                # run failed => return null loss
                tracking_client.set_terminated(p.run_id, "FAILED")
                train_loss = null_train_loss
                valid_loss = null_valid_loss
                test_loss = null_test_loss

            mlflow.log_metrics({
                "train_{}".format(metric): train_loss,
                "val_{}".format(metric): valid_loss,
                "test_{}".format(metric): test_loss,
            })

            if return_all:
                return train_loss, valid_loss, test_loss
            else:
                return valid_loss

        return eval

    space = [
        hp.uniform("lr", 1e-5, 1e-1),
        hp.uniform("momentum", 0.0, 1.0),
    ]

    with mlflow.start_run() as run:
        experiment_id = run.info.experiment_id
        # Evaluate null model first.
        train_null_loss, valid_null_loss, test_null_loss = new_eval(
            0, experiment_id, _inf, _inf, _inf, True)(params=[0, 0])
        best = fmin(
            fn=new_eval(epochs, experiment_id, train_null_loss,
                        valid_null_loss, test_null_loss),
            space=space,
            algo=tpe.suggest if algo == "tpe.suggest" else rand.suggest,
            max_evals=max_runs,
        )
        mlflow.set_tag("best params", str(best))
        # find the best run, log its metrics as the final metrics of this run.
        client = MlflowClient()
        runs = client.search_runs(
            [experiment_id], "tags.mlflow.parentRunId = '{run_id}' ".format(
                run_id=run.info.run_id))
        best_val_train = _inf
        best_val_valid = _inf
        best_val_test = _inf
        best_run = None
        for r in runs:
            if r.data.metrics["val_rmse"] < best_val_valid:
                best_run = r
                best_val_train = r.data.metrics["train_rmse"]
                best_val_valid = r.data.metrics["val_rmse"]
                best_val_test = r.data.metrics["test_rmse"]
        mlflow.set_tag("best_run", best_run.info.run_id)
        mlflow.log_metrics({
            "train_{}".format(metric): best_val_train,
            "val_{}".format(metric): best_val_valid,
            "test_{}".format(metric): best_val_test,
        })
Example #6
0
def run(training_data, max_runs, max_p, epochs, metric, seed):
    train_metric = "train_{}".format(metric)
    val_metric = "val_{}".format(metric)
    test_metric = "test_{}".format(metric)
    np.random.seed(seed)
    tracking_client = mlflow.tracking.MlflowClient()

    def new_eval(nepochs,
                 experiment_id,
                 null_train_loss=_inf,
                 null_val_loss=_inf,
                 null_test_loss=_inf):
        def eval(parms):
            lr, momentum = parms
            with mlflow.start_run(nested=True) as child_run:
                params = {
                    "training_data": training_data,
                    "epochs": str(nepochs),
                    "learning_rate": str(lr),
                    "momentum": str(momentum),
                    "seed": str(seed),
                }
                mlflow.log_params(params)
                p = mlflow.projects.run(
                    run_id=child_run.info.run_id,
                    uri=".",
                    entry_point="train",
                    parameters={
                        "training_data": training_data,
                        "epochs": str(nepochs),
                        "learning_rate": str(lr),
                        "momentum": str(momentum),
                        "seed": str(seed),
                    },
                    experiment_id=experiment_id,
                    synchronous=False,
                )
                succeeded = p.wait()
            if succeeded:
                training_run = tracking_client.get_run(p.run_id)
                metrics = training_run.data.metrics
                # cap the loss at the loss of the null model
                train_loss = min(null_train_loss, metrics[train_metric])
                val_loss = min(null_val_loss, metrics[val_metric])
                test_loss = min(null_test_loss, metrics[test_metric])
            else:
                # run failed => return null loss
                tracking_client.set_terminated(p.run_id, "FAILED")
                train_loss = null_train_loss
                val_loss = null_val_loss
                test_loss = null_test_loss
            mlflow.log_metrics({
                "train_{}".format(metric): train_loss,
                "val_{}".format(metric): val_loss,
                "test_{}".format(metric): test_loss,
            })
            return p.run_id, train_loss, val_loss, test_loss

        return eval

    with mlflow.start_run() as run:
        experiment_id = run.info.experiment_id
        _, null_train_loss, null_val_loss, null_test_loss = new_eval(
            0, experiment_id)((0, 0))
        runs = [(np.random.uniform(1e-5, 1e-1), np.random.uniform(0, 1.0))
                for _ in range(max_runs)]
        with ThreadPoolExecutor(max_workers=max_p) as executor:
            _ = executor.map(
                new_eval(epochs, experiment_id, null_train_loss, null_val_loss,
                         null_test_loss),
                runs,
            )

        # find the best run, log its metrics as the final metrics of this run.
        client = MlflowClient()
        runs = client.search_runs(
            [experiment_id], "tags.mlflow.parentRunId = '{run_id}' ".format(
                run_id=run.info.run_id))
        best_val_train = _inf
        best_val_valid = _inf
        best_val_test = _inf
        best_run = None
        for r in runs:
            if r.data.metrics["val_rmse"] < best_val_valid:
                best_run = r
                best_val_train = r.data.metrics["train_rmse"]
                best_val_valid = r.data.metrics["val_rmse"]
                best_val_test = r.data.metrics["test_rmse"]
        mlflow.set_tag("best_run", best_run.info.run_id)
        mlflow.log_metrics({
            "train_{}".format(metric): best_val_train,
            "val_{}".format(metric): best_val_valid,
            "test_{}".format(metric): best_val_test,
        })
def run(max_runs, max_p):
    tracking_client = mlflow.tracking.MlflowClient()
    np.random.seed(_random_state)

    def new_eval(experiment_id):
        def eval(parms):
            md, msl = parms
            with mlflow.start_run(nested=True) as child_run:
                p = mlflow.projects.run(
                    run_id=child_run.info.run_id,
                    uri=".",
                    entry_point="train",
                    parameters={
                        "max_depth": md,
                        "min_samples_leaf": msl,
                    },
                    experiment_id=experiment_id,
                    synchronous=False,
                )
                succeeded = p.wait()
                if succeeded:
                    training_run = tracking_client.get_run(p.run_id)
                    metrics = training_run.data.metrics
                    # cap the loss at the loss of the null model
                    train_loss = metrics["train_acc"]
                    test_loss = metrics["test_acc"]
                else:
                    # run failed => return null loss
                    tracking_client.set_terminated(p.run_id, "FAILED")
                    train_loss = -np.finfo(np.float64).max
                    test_loss = -np.finfo(np.float64).max
                mlflow.log_params({
                    "param_max_depth": md,
                    "param_min_samples_leaf": msl,
                })
                mlflow.log_metrics({
                    "train_acc": train_loss,
                    "test_acc": test_loss,
                })
            return p.run_id

        return eval

    with mlflow.start_run() as run:
        experiment_id = run.info.experiment_id
        runs = [(np.random.randint(1, 10), np.random.randint(1, 10))
                for _ in range(max_runs)]

        with ThreadPoolExecutor(max_workers=max_p) as executor:
            _ = executor.map(
                new_eval(experiment_id),
                runs,
            )

        # find the best run, log its metrics as the final metrics of this run.
        client = MlflowClient()
        runs = client.search_runs(
            [experiment_id], "tags.mlflow.parentRunId = '{run_id}' ".format(
                run_id=run.info.run_id))

        best_val_train = -np.finfo(np.float64).max
        best_val_test = -np.finfo(np.float64).max
        best_run = None
        for r in runs:
            if r.data.metrics["test_acc"] > best_val_test:
                best_run = r
                best_val_train = r.data.metrics["train_acc"]
                best_val_test = r.data.metrics["test_acc"]
        mlflow.set_tag("best_run", best_run.info.run_id)
        mlflow.log_metrics({
            "train_acc": best_val_train,
            "test_acc": best_val_test,
        })