예제 #1
0
def _create_child_runs_for_parameter_search(parent_estimator, parent_model,
                                            parent_run, child_tags):
    from itertools import zip_longest

    client = MlflowClient()
    # Use the start time of the parent parameter search run as a rough estimate for the
    # start time of child runs, since we cannot precisely determine when each point
    # in the parameter search space was explored
    child_run_start_time = parent_run.info.start_time
    child_run_end_time = int(time.time() * 1000)

    estimator_param_maps = parent_estimator.getEstimatorParamMaps()
    tuned_estimator = parent_estimator.getEstimator()

    metrics_dict, _ = _get_param_search_metrics_and_best_index(
        parent_estimator, parent_model)
    for i in range(len(estimator_param_maps)):
        child_estimator = tuned_estimator.copy(estimator_param_maps[i])
        tags_to_log = dict(child_tags) if child_tags else {}
        tags_to_log.update({MLFLOW_PARENT_RUN_ID: parent_run.info.run_id})
        tags_to_log.update(_get_estimator_info_tags(child_estimator))

        child_run = client.create_run(
            experiment_id=parent_run.info.experiment_id,
            start_time=child_run_start_time,
            tags=tags_to_log,
        )

        params_to_log = _get_instance_param_map(
            child_estimator,
            parent_estimator._autologging_metadata.uid_to_indexed_name_map)
        param_batches_to_log = _chunk_dict(
            params_to_log, chunk_size=MAX_PARAMS_TAGS_PER_BATCH)
        metrics_to_log = {k: v[i] for k, v in metrics_dict.items()}
        for params_batch, metrics_batch in zip_longest(param_batches_to_log,
                                                       [metrics_to_log],
                                                       fillvalue={}):
            # Trim any parameter keys / values and metric keys that exceed the limits
            # imposed by corresponding MLflow Tracking APIs (e.g., LogParam, LogMetric)
            truncated_params_batch = _truncate_dict(params_batch,
                                                    MAX_ENTITY_KEY_LENGTH,
                                                    MAX_PARAM_VAL_LENGTH)
            truncated_metrics_batch = _truncate_dict(
                metrics_batch, max_key_length=MAX_ENTITY_KEY_LENGTH)
            client.log_batch(
                run_id=child_run.info.run_id,
                params=[
                    Param(str(key), str(value))
                    for key, value in truncated_params_batch.items()
                ],
                metrics=[
                    Metric(key=str(key),
                           value=value,
                           timestamp=child_run_end_time,
                           step=0)
                    for key, value in truncated_metrics_batch.items()
                ],
            )
        client.set_terminated(run_id=child_run.info.run_id,
                              end_time=child_run_end_time)
예제 #2
0
def _create_child_runs_for_parameter_search(cv_estimator,
                                            parent_run,
                                            child_tags=None):
    """
    Creates a collection of child runs for a parameter search training session.
    Runs are reconstructed from the `cv_results_` attribute of the specified trained
    parameter search estimator - `cv_estimator`, which provides relevant performance
    metrics for each point in the parameter search space. One child run is created
    for each point in the parameter search space. For additional information, see
    `https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html`_. # noqa: E501

    :param cv_estimator: The trained parameter search estimator for which to create
                         child runs.
    :param parent_run: A py:class:`mlflow.entities.Run` object referring to the parent
                       parameter search run for which child runs should be created.
    :param child_tags: An optional dictionary of MLflow tag keys and values to log
                       for each child run.
    """
    import pandas as pd

    client = MlflowClient()
    # Use the start time of the parent parameter search run as a rough estimate for the
    # start time of child runs, since we cannot precisely determine when each point
    # in the parameter search space was explored
    child_run_start_time = parent_run.info.start_time
    child_run_end_time = int(time.time() * 1000)

    seed_estimator = cv_estimator.estimator
    # In the unlikely case that a seed of a parameter search estimator is,
    # itself, a parameter search estimator, we should avoid logging the untuned
    # parameters of the seeds's seed estimator
    should_log_params_deeply = not _is_parameter_search_estimator(
        seed_estimator)
    # Each row of `cv_results_` only provides parameters that vary across
    # the user-specified parameter grid. In order to log the complete set
    # of parameters for each child run, we fetch the parameters defined by
    # the seed estimator and update them with parameter subset specified
    # in the result row
    base_params = seed_estimator.get_params(deep=should_log_params_deeply)

    cv_results_df = pd.DataFrame.from_dict(cv_estimator.cv_results_)
    for _, result_row in cv_results_df.iterrows():
        tags_to_log = dict(child_tags) if child_tags else {}
        tags_to_log.update({MLFLOW_PARENT_RUN_ID: parent_run.info.run_id})
        tags_to_log.update(_get_estimator_info_tags(seed_estimator))
        child_run = client.create_run(
            experiment_id=parent_run.info.experiment_id,
            start_time=child_run_start_time,
            tags=tags_to_log,
        )

        from itertools import zip_longest

        params_to_log = dict(base_params)
        params_to_log.update(result_row.get("params", {}))
        param_batches_to_log = _chunk_dict(
            params_to_log, chunk_size=MAX_PARAMS_TAGS_PER_BATCH)

        # Parameters values are recorded twice in the set of search `cv_results_`:
        # once within a `params` column with dictionary values and once within
        # a separate dataframe column that is created for each parameter. To prevent
        # duplication of parameters, we log the consolidated values from the parameter
        # dictionary column and filter out the other parameter-specific columns with
        # names of the form `param_{param_name}`. Additionally, `cv_results_` produces
        # metrics for each training split, which is fairly verbose; accordingly, we filter
        # out per-split metrics in favor of aggregate metrics (mean, std, etc.)
        excluded_metric_prefixes = ["param", "split"]
        metric_batches_to_log = _chunk_dict(
            {
                key: value
                for key, value in result_row.iteritems() if not any([
                    key.startswith(prefix)
                    for prefix in excluded_metric_prefixes
                ]) and isinstance(value, Number)
            },
            chunk_size=min(MAX_ENTITIES_PER_BATCH - MAX_PARAMS_TAGS_PER_BATCH,
                           MAX_METRICS_PER_BATCH),
        )

        for params_batch, metrics_batch in zip_longest(param_batches_to_log,
                                                       metric_batches_to_log,
                                                       fillvalue={}):
            # Trim any parameter keys / values and metric keys that exceed the limits
            # imposed by corresponding MLflow Tracking APIs (e.g., LogParam, LogMetric)
            truncated_params_batch = _truncate_dict(params_batch,
                                                    MAX_ENTITY_KEY_LENGTH,
                                                    MAX_PARAM_VAL_LENGTH)
            truncated_metrics_batch = _truncate_dict(
                metrics_batch, max_key_length=MAX_ENTITY_KEY_LENGTH)
            client.log_batch(
                run_id=child_run.info.run_id,
                params=[
                    Param(str(key), str(value))
                    for key, value in truncated_params_batch.items()
                ],
                metrics=[
                    Metric(key=str(key),
                           value=value,
                           timestamp=child_run_end_time,
                           step=0)
                    for key, value in truncated_metrics_batch.items()
                ],
            )

        client.set_terminated(run_id=child_run.info.run_id,
                              end_time=child_run_end_time)
예제 #3
0
class MLflowWriter(object):
    def __init__(self, exp_name, save_dir, log_every, **mlflow_cfg):
        mlflow.set_tracking_uri(save_dir)
        self.client = MlflowClient(**mlflow_cfg)
        mlflow.set_experiment(exp_name)
        self.experiment_id = self.client.get_experiment_by_name(
            exp_name).experiment_id
        self.run_id = self.client.create_run(self.experiment_id).info.run_id

        self.log_every = log_every
        self.clear()

    def log_params_from_omegaconf(self, params):
        self._explore_recursive("", params)

    def _explore_recursive(self, parent_name, element):
        if isinstance(element, DictConfig):
            iterator = element.items()
        elif isinstance(element, ListConfig):
            iterator = enumerate(element)

        for k, v in iterator:
            if isinstance(v, DictConfig) or isinstance(v, ListConfig):
                self._explore_recursive(f"{parent_name}{k}.", v)
            else:
                self.client.log_param(
                    self.run_id, f"{parent_name}{k}", v)

    def log_torch_model(self, model, epoch):
        with mlflow.start_run(self.run_id):
            mlflow.pytorch.log_model(model, "model_%04d" % epoch)

    def log_metric(self, key, value, is_training):
        if isinstance(value, torch.Tensor):
            value = float(value.detach().cpu().numpy())

        metric_name = "train/" if is_training else "valid/"
        metric_name += str(key)

        if key in self.metrics:
            self.metrics[metric_name].append(value)
        else:
            self.metrics[metric_name] = [value]

    def next_iteration(self):
        self.iterations += 1
        if self.iterations % self.log_every == 0:
            self.toMlflow(nb_data=self.log_every)

    def toMlflow(self, nb_data=0, step=0):
        for key, value in self.metrics.items():
            self.client.log_metric(
                self.run_id, key,
                np.mean(value[-nb_data:]), step=step)

    def get_mean(self, key, is_training):
        metric_name = "train/" if is_training else "valid/"
        metric_name += str(key)

        return np.mean(self.metrics[metric_name])

    def clear(self):
        self.metrics = {}
        self.iterations = 0

    def log_artifact(self, path):
        self.client.log_artifact(self.run_id, local_path=path)

    def terminate(self):
        self.client.set_terminated(self.run_id)