Esempio n. 1
0
def test_mlflow_bad_metric_name_handling(dirname):
    import mlflow

    true_values = [123.0, 23.4, 333.4]
    with MLflowLogger(os.path.join(dirname, "mlruns")) as mlflow_logger:

        active_run = mlflow.active_run()

        handler = OutputHandler(tag="training", metric_names="all")
        engine = Engine(lambda e, b: None)
        engine.state = State(metrics={
            "metric:0 in %": 123.0,
            "metric 0": 1000.0,
        })

        with pytest.warns(UserWarning, match=r"MLflowLogger output_handler encountered an invalid metric name"):

            engine.state.epoch = 1
            handler(engine, mlflow_logger, event_name=Events.EPOCH_COMPLETED)

            for i, v in enumerate(true_values):
                engine.state.epoch += 1
                engine.state.metrics['metric 0'] = v
                handler(engine, mlflow_logger, event_name=Events.EPOCH_COMPLETED)

    from mlflow.tracking import MlflowClient

    client = MlflowClient(tracking_uri=os.path.join(dirname, "mlruns"))
    stored_values = client.get_metric_history(active_run.info.run_id, "training metric 0")

    for t, s in zip([1000.0, ] + true_values, stored_values):
        assert t == s.value
Esempio n. 2
0
def test_autolog_early_stopping_callback():
    mlflow.paddle.autolog()

    early_stopping = paddle.callbacks.EarlyStopping("loss",
                                                    mode="min",
                                                    patience=1,
                                                    min_delta=0)
    with mlflow.start_run() as run:
        train_model(callbacks=[early_stopping])

    client = MlflowClient()
    data = client.get_run(run.info.run_id).data

    for param_key in ["monitor", "patience", "min_delta", "baseline"]:
        assert param_key in data.params
        assert data.params[param_key] == str(getattr(early_stopping,
                                                     param_key))

    for metric_key in ["stopped_epoch", "best_value"]:
        assert metric_key in data.metrics
        assert float(data.metrics[metric_key]) == getattr(
            early_stopping, metric_key)

    for metric_key in ["loss", "step"]:
        assert metric_key in data.metrics
        metric_history = client.get_metric_history(run.info.run_id, metric_key)
        assert len(metric_history) == NUM_EPOCHS
Esempio n. 3
0
def test_autolog_logs_expected_data():
    mlflow.paddle.autolog()

    with mlflow.start_run() as run:
        train_model()

    client = MlflowClient()
    data = client.get_run(run.info.run_id).data

    # Testing params are logged
    for param_key, expected_param_value in [("optimizer_name", "Adam"),
                                            ("learning_rate", "0.01")]:
        assert param_key in data.params
        assert data.params[param_key] == expected_param_value

    # Testing metrics are logged
    for metric_key in [
            "batch_size", "loss", "step", "eval_batch_size", "eval_loss",
            "eval_step"
    ]:
        assert metric_key in data.metrics
        metric_history = client.get_metric_history(run.info.run_id, metric_key)
        assert len(metric_history) == NUM_EPOCHS

    # Testing model_summary.txt is saved
    artifacts = client.list_artifacts(run.info.run_id)
    assert any(x.path == "model_summary.txt" for x in artifacts)
Esempio n. 4
0
def test_mlflow(ray_start_4_cpus, tmp_path):
    config = TestConfig()

    params = {"p1": "p1"}

    temp_dir = tmp_path
    num_workers = 4

    def train_func(config):
        train.report(episode_reward_mean=4)
        train.report(episode_reward_mean=5)
        train.report(episode_reward_mean=6)
        return 1

    callback = MLflowLoggerCallback(experiment_name="test_exp",
                                    logdir=temp_dir)
    trainer = Trainer(config, num_workers=num_workers)
    trainer.start()
    trainer.run(train_func, config=params, callbacks=[callback])

    from mlflow.tracking import MlflowClient

    client = MlflowClient(
        tracking_uri=callback.mlflow_util._mlflow.get_tracking_uri())

    experiment_id = client.get_experiment_by_name("test_exp").experiment_id
    all_runs = callback.mlflow_util._mlflow.search_runs(
        experiment_ids=[experiment_id])
    assert len(all_runs) == 1
    # all_runs is a pandas dataframe.
    all_runs = all_runs.to_dict(orient="records")
    run_id = all_runs[0]["run_id"]
    run = client.get_run(run_id)

    assert run.data.params == params
    assert ("episode_reward_mean" in run.data.metrics
            and run.data.metrics["episode_reward_mean"] == 6.0)
    assert (TRAINING_ITERATION in run.data.metrics
            and run.data.metrics[TRAINING_ITERATION] == 3.0)

    metric_history = client.get_metric_history(run_id=run_id,
                                               key="episode_reward_mean")

    assert len(metric_history) == 3
    iterations = [metric.step for metric in metric_history]
    assert iterations == [1, 2, 3]
    rewards = [metric.value for metric in metric_history]
    assert rewards == [4, 5, 6]
def test_integration(dirname):

    n_epochs = 5
    data = list(range(50))

    losses = torch.rand(n_epochs * len(data))
    losses_iter = iter(losses)

    def update_fn(engine, batch):
        return next(losses_iter)

    trainer = Engine(update_fn)

    mlflow_logger = MLflowLogger(tracking_uri=os.path.join(dirname, "mlruns"))

    true_values = []

    def dummy_handler(engine, logger, event_name):
        global_step = engine.state.get_event_attrib_value(event_name)
        v = global_step * 0.1
        true_values.append(v)
        logger.log_metrics({"{}".format("test_value"): v}, step=global_step)

    mlflow_logger.attach(trainer,
                         log_handler=dummy_handler,
                         event_name=Events.EPOCH_COMPLETED)

    import mlflow

    active_run = mlflow.active_run()

    trainer.run(data, max_epochs=n_epochs)
    mlflow_logger.close()

    from mlflow.tracking import MlflowClient

    client = MlflowClient(tracking_uri=os.path.join(dirname, "mlruns"))
    stored_values = client.get_metric_history(active_run.info.run_id,
                                              "test_value")

    for t, s in zip(true_values, stored_values):
        assert pytest.approx(t) == s.value
Esempio n. 6
0
    def _load(self) -> MetricsDict:
        """Load MlflowMetricDataSet.

        Returns:
            Dict[str, Union[int, float]]: Dictionary with MLflow metrics dataset.
        """
        client = MlflowClient()
        run_id = self.run_id
        all_metrics = client._tracking_client.store.get_all_metrics(run_uuid=run_id)
        dataset_metrics = filter(self._is_dataset_metric, all_metrics)
        dataset = reduce(
            lambda xs, x: self._update_metric(
                # get_all_metrics returns last saved values per metric key.
                # All values are required here.
                client.get_metric_history(run_id, x.key),
                xs,
            ),
            dataset_metrics,
            {},
        )
        return dataset
Esempio n. 7
0
    def _load(self):
        self._validate_run_id()
        mlflow_client = MlflowClient()
        metric_history = mlflow_client.get_metric_history(
            run_id=self.run_id,
            key=self.key)  # gets active run if no run_id was given

        # the metric history is always a list of mlflow.entities.metric.Metric
        # we want the value of the last one stored because this dataset only deal with one single metric
        step = self._load_args.get("step")

        if step is None:
            # we take the last value recorded
            metric_value = metric_history[-1].value
        else:
            # we should take the last historical value with the given step
            # (it is possible to have several values with the same step)
            metric_value = next(metric.value
                                for metric in reversed(metric_history)
                                if metric.step == step)

        return metric_value
Esempio n. 8
0
    def _save(self, data: float):
        if self._logging_activated:
            self._validate_run_id()
            run_id = (
                self.run_id
            )  # we access it once instead of calling self.run_id everywhere to avoid looking or an active run each time

            mlflow_client = MlflowClient()

            # get the metric history if it has been saved previously to ensure
            #  to retrieve the right data
            # reminder: this is True even if no run_id was originally specified but a run is active
            metric_history = (mlflow_client.get_metric_history(
                run_id=run_id, key=self.key) if self._exists() else [])

            save_args = deepcopy(self._save_args)
            step = save_args.pop("step", None)
            if step is None:
                if self.mode == "overwrite":
                    step = max([metric.step for metric in metric_history],
                               default=0)
                elif self.mode == "append":
                    # I put a max([]) default to -1 so that default "step" equals 0
                    step = (max([metric.step for metric in metric_history],
                                default=-1) + 1)
                else:
                    raise ValueError(
                        f"save_args['mode'] must be one of {self.SUPPORTED_SAVE_MODES}, got '{self.mode}' instead."
                    )

            mlflow_client.log_metric(
                run_id=run_id,
                key=self.key,
                value=data,
                step=step,
                **save_args,
            )
def assert_are_metrics_logged(
    data: Dict[str, Union[float, List[float]]],
    client: MlflowClient,
    run_id: str,
    prefix: Optional[str] = None,
) -> bool:
    """Helper function which checks if given metrics where logged.

    Args:
        data: (Dict[str, Union[float, List[float]]]): Logged metrics.
        client: (MlflowClient): MLflow client instance.
        run_id: (str): id of run where data was logged.
        prefix: (Optional[str])
    """
    for key in data.keys():
        metric_key = f"{prefix}.{key}" if prefix else key
        metric = client.get_metric_history(run_id, metric_key)
        data_len = len(data[key]) if isinstance(data[key], list) else 1
        assert len(metric) == data_len
        for idx, item in enumerate(metric):
            data_value = (data[key][idx]["value"] if isinstance(
                data[key], list) else data[key]["value"])
            assert item.value == data_value and item.key == metric_key
    assert True
Esempio n. 10
0
    def _load(self):
        self._validate_run_id()
        mode = self._load_args.get("mode", "list")
        mlflow_client = MlflowClient()

        metric_history = mlflow_client.get_metric_history(self.run_id,
                                                          key=self.key)

        if mode == "list":
            simplified_history = [metric.value for metric in metric_history]
        elif mode == "dict":
            simplified_history = {
                metric.step: metric.value
                for metric in metric_history
            }
        elif mode == "history":
            # history is a list of dict whom keys are "log_metric" arguments. The following is equivalent to dict mode:
            # [{"step": 0, "value": 0.1}, {"step": 1, "value": 0.2}, {"step": 2, "value": 0.3}]
            simplified_history = [{
                "step": metric.step,
                "value": metric.value,
                "timestamp": metric.timestamp,
            } for metric in metric_history]
        return simplified_history
class MlflowHelper:
    def __init__(
        self,
        tracking_uri: str = "http://localhost:5000",
        local_mlflow_dir_prefix: str = "../gsim01/mlruns/",
        experiment_name: str = "Domain Guided Monitoring",
        experiment_id: Optional[str] = "1",
        pkl_file: Optional[Path] = None,
    ):
        self.mlflow_client = MlflowClient(tracking_uri=tracking_uri)
        self.experiment_id = experiment_id if experiment_id is not None else self.mlflow_client.get_experiment_by_name(experiment_name).experiment_id
        self.local_mlflow_dir = local_mlflow_dir_prefix + str(self.experiment_id) + "/"
        if pkl_file is not None and pkl_file.exists():
            self.run_df = pd.read_pickle("mlflow_run_df.pkl") 
            print("Initialized with", len(self.run_df), "MLFlow runs from pkl")
        else:
            self.run_df = pd.DataFrame(columns=["info_run_id"])
        self.metric_history_names: Set[str] = set()

    def query_valid_runs(self, 
            pkl_file: Optional[Path] = None, 
            valid_sequence_types: List[str] = ['mimic', 'huawei_logs'], 
            filter_string_suffix: Optional[str] = " and params.ModelConfigrnn_type = 'gru'"):
        for sequence_type in valid_sequence_types:
            filter_string = "tags.sequence_type = '" + sequence_type + "'"
            if filter_string_suffix is not None:
                filter_string = filter_string + filter_string_suffix
            
            self.query_runs(filter_string=filter_string)
            print("Queried", len(self.run_df), "runs from MLFlow for", sequence_type)
            
        if pkl_file is not None:
            self.run_df.to_pickle(pkl_file)

    def query_runs(self, filter_string: Optional[str] = None, pkl_file: Optional[Path] = None,):
        runs = self.mlflow_client.search_runs(
            experiment_ids=[self.experiment_id], max_results=10000, filter_string=filter_string,
        )
        for run in tqdm(runs, desc="Querying data per run..."):
            self._handle_run(run)

        if pkl_file is not None:
            self.run_df.to_pickle(pkl_file)

    def _handle_run(self, run):
        if (
            len(self.run_df) > 0
            and run.info.run_id in set(self.run_df["info_run_id"])
            and run.info.status == "FINISHED"
            and len(
                self.run_df[
                    (self.run_df["info_run_id"] == run.info.run_id)
                    & (self.run_df["info_status"] == run.info.status)
                ]
            )
            == 1
        ):
            return

        if not run.info.status == "FINISHED" and not run.info.run_id in set(
            self.run_df["info_run_id"]
        ):
            return
        
        run_dict = {
            (k + "_" + sk): v
            for k, sd in run.to_dictionary().items()
            for sk, v in sd.items()
        }
        final_run_dict = {
            (k + "_" + sk): v
            for k, sd in run_dict.items()
            if type(sd) == type(dict())
            for sk, v in sd.items()
        }
        final_run_dict.update(
            {k: v for k, v in run_dict.items() if not (type(v) == type(dict()))}
        )
        if (
            final_run_dict.get("data_tags_model_type", "") == "causal"
            and final_run_dict.get(
                "data_params_KnowledgeConfigadd_causality_prefix", "False"
            )
            == "True"
        ):
            final_run_dict["data_tags_model_type"] = "causal2"
        if (
            (final_run_dict.get("data_tags_model_type", "") == "causal"
            or final_run_dict.get("data_tags_model_type", "") == "causal2")
            and final_run_dict.get("data_tags_sequence_type", "") == "huawei_logs"
            and final_run_dict.get("data_params_HuaweiPreprocessorConfiglog_only_causality", "") == "True"
        ):
            final_run_dict["data_tags_model_type"] = final_run_dict["data_tags_model_type"] + "_logonly"
        if (
            final_run_dict.get("data_tags_model_type", "") == "text"
            and final_run_dict.get(
                "data_params_KnowledgeConfigbuild_text_hierarchy", "False"
            )
            == "True"
        ):
            final_run_dict["data_tags_model_type"] = "text_hierarchy"
        if (
            final_run_dict.get("data_tags_model_type", "") == "gram"
            and final_run_dict.get("data_tags_sequence_type", "") == "huawei_logs"
            and final_run_dict.get("data_params_KnowledgeConfigadd_causality_prefix")
            and final_run_dict.get(
                "data_params_HuaweiPreprocessorConfiguse_log_hierarchy", "False"
            )
            == "True"
        ):
            final_run_dict["data_tags_model_type"] = "gram_logs"

        self.run_df = self.run_df.append(
            final_run_dict, ignore_index=True
        ).drop_duplicates(subset=["info_run_id"], keep="last", ignore_index=True)
    
    
    def mimic_run_df(
        self, include_noise: bool = False, include_refinements: bool = False, 
        risk_prediction: bool = False,
        valid_x_columns: List[str]=["level_0"],
        valid_y_columns: List[str]=["level_3"],
    ) -> pd.DataFrame:
        mimic_run_df = self.run_df[
            (self.run_df["data_tags_sequence_type"] == "mimic")
            & (self.run_df["data_params_ModelConfigrnn_type"] == "gru")
            & (self.run_df["data_params_SequenceConfigtest_percentage"].fillna("").astype(str) == "0.2")
            & (self.run_df["data_params_ModelConfigbest_model_metric"] == "val_loss")
            & (self.run_df["info_status"] == "FINISHED")
            & (self.run_df["data_params_ModelConfigrnn_dim"] == "200")
            & (self.run_df["data_params_ModelConfigoptimizer"].fillna("adam") == "adam")
            & (self.run_df["data_params_ModelConfigdropout_rate"].fillna("0.0").astype(str) == "0.5")
            & (self.run_df["data_params_ModelConfigrnn_dropout"].fillna("0.0").astype(str) == "0.0")
            & (self.run_df["data_params_ModelConfigkernel_regularizer_scope"].fillna("[]") == "[]")
            & (self.run_df["data_params_SequenceConfigpredict_full_y_sequence_wide"].astype(str).fillna("") == "True")
            & (
                (
                    (self.run_df["data_params_SequenceConfigy_sequence_column_name"].astype(str) == "level_3")
                    & (self.run_df["data_params_ExperimentConfigbatch_size"].astype(str).fillna("") == "128")
                ) |
                (
                    (self.run_df["data_params_SequenceConfigy_sequence_column_name"].astype(str) == "level_2")
                    & (self.run_df["data_params_ExperimentConfigbatch_size"].astype(str).fillna("") == "16")
                )
            )
            & (self.run_df["data_params_MimicPreprocessorConfigreplace_keys"].fillna("[]") == "[]")
        ]

        if risk_prediction:
            mimic_run_df = mimic_run_df[
                (mimic_run_df["data_tags_task_type"] == "risk_prediction") &
                (mimic_run_df["data_params_ModelConfigfinal_activation_function"] == "sigmoid")
            ]
        else:
            mimic_run_df = mimic_run_df[
                (mimic_run_df["data_params_ModelConfigfinal_activation_function"] == "softmax")
                & (mimic_run_df["data_params_SequenceConfigflatten_y"] == "True")
            ]

        if len(valid_x_columns) > 0:
            mimic_run_df = mimic_run_df[
                mimic_run_df["data_params_SequenceConfigx_sequence_column_name"].apply(lambda x: x in valid_x_columns)
            ]
        if len(valid_y_columns) > 0:
            mimic_run_df = mimic_run_df[
                mimic_run_df["data_params_SequenceConfigy_sequence_column_name"].apply(lambda x: x in valid_y_columns)
            ]

        if not include_noise:
            mimic_run_df = mimic_run_df[
                (mimic_run_df["data_tags_noise_type"].fillna("").apply(len) == 0)
            ]
        if not include_refinements:
            mimic_run_df = mimic_run_df[
                (mimic_run_df["data_tags_refinement_type"].fillna("") == "")
            ]

        return mimic_run_df

    def huawei_run_df(
        self, include_noise: bool = False, include_refinements: bool = False,
        risk_prediction: bool = False,
        valid_x_columns: List[str]=["log_cluster_template", "fine_log_cluster_template"],
        valid_y_columns: List[str]=["attributes"],
        include_drain_hierarchy: bool=False,
    ) -> pd.DataFrame:
        huawei_run_df = self.run_df[
            (self.run_df["data_tags_sequence_type"] == "huawei_logs")
            & (self.run_df["data_params_ModelConfigrnn_type"] == "gru")
            & (self.run_df["data_params_SequenceConfigtest_percentage"].fillna("").astype(str) == "0.1")
            & (self.run_df["data_params_ModelConfigbest_model_metric"] == "val_loss")
            & (self.run_df["info_status"] == "FINISHED")
            & (self.run_df["data_params_ModelConfigrnn_dim"] == "200")
            & (self.run_df["data_params_ModelConfigoptimizer"].fillna("adam") == "adam")
            & (self.run_df["data_params_ModelConfigdropout_rate"].fillna("0.0").astype(str) == "0.5")
            & (self.run_df["data_params_ModelConfigrnn_dropout"].fillna("0.0").astype(str) == "0.0")
            & (self.run_df["data_params_ModelConfigkernel_regularizer_scope"].fillna("[]") == "[]")
            & (self.run_df["data_params_ExperimentConfigbatch_size"].astype(str).fillna("") == "128")
            & (
                (self.run_df["data_params_HuaweiPreprocessorConfigfine_drain_log_st"].astype(str).fillna("") == "0.75")
                | (self.run_df["data_params_HuaweiPreprocessorConfigdrain_log_st"].astype(str).fillna("") == "0.75")
            )
            & (
                (self.run_df["data_params_HuaweiPreprocessorConfigfine_drain_log_depth"].astype(str).fillna("") == "10")
                | (self.run_df["data_params_HuaweiPreprocessorConfigdrain_log_depth"].astype(str).fillna("") == "10")
            )
            & (
                (~ (
                    (self.run_df["data_params_SequenceConfigx_sequence_column_name"].astype(str).fillna("") == "coarse_log_cluster_template")
                    | (self.run_df["data_params_SequenceConfigy_sequence_column_name"].astype(str).fillna("") == "coarse_log_cluster_template")
                    | (self.run_df["data_params_HuaweiPreprocessorConfigdrain_log_sts"].fillna("[]").astype(str).apply(len) > 2)
                )) | (
                    (self.run_df["data_params_HuaweiPreprocessorConfigcoarse_drain_log_st"].astype(str).fillna("") == "0.2")
                    & (self.run_df["data_params_HuaweiPreprocessorConfigcoarse_drain_log_depth"].astype(str).fillna("") == "4")
                )
            )
        ]

        if risk_prediction:
            huawei_run_df = huawei_run_df[
                (huawei_run_df["data_tags_task_type"] == "risk_prediction") &
                (huawei_run_df["data_params_ModelConfigfinal_activation_function"] == "sigmoid")
            ]
        else:
            huawei_run_df = huawei_run_df[
                (huawei_run_df["data_params_ModelConfigfinal_activation_function"] == "softmax")
                & (huawei_run_df["data_params_SequenceConfigflatten_y"] == "True")
            ]

        if len(valid_x_columns) > 0:
            huawei_run_df = huawei_run_df[
                huawei_run_df["data_params_SequenceConfigx_sequence_column_name"].apply(lambda x: x in valid_x_columns)
            ]
        if len(valid_y_columns) > 0:
            huawei_run_df = huawei_run_df[
                huawei_run_df["data_params_SequenceConfigy_sequence_column_name"].apply(lambda x: x in valid_y_columns)
            ]

        if not include_noise:
            huawei_run_df = huawei_run_df[
                (huawei_run_df["data_tags_noise_type"].fillna("").apply(len) == 0)
            ]
        if not include_refinements:
            huawei_run_df = huawei_run_df[
                (huawei_run_df["data_tags_refinement_type"].fillna("") == "")
                & (huawei_run_df["data_params_HuaweiPreprocessorConfigmin_causality"].fillna(0.0).astype(str) == "0.01")
            ]
        if not include_drain_hierarchy:
            huawei_run_df = huawei_run_df[
                huawei_run_df["data_params_HuaweiPreprocessorConfigdrain_log_sts"].fillna("[]").astype(str).apply(len) <= 2
            ]

        return huawei_run_df

    def _load_metrics_from_local(self, run_id: str) -> Optional[Dict[str, List[float]]]:
        local_run_dir = Path(self.local_mlflow_dir + "/" + run_id + "/metrics/")
        if not local_run_dir.exists() or not local_run_dir.is_dir():
            return None
        
        metric_dict: Dict[str, List[float]] = {}
        for metric_file in local_run_dir.iterdir():
            metric = metric_file.name
            metric_history = pd.read_csv(metric_file, sep=" ", names=["time", "value", "step"]).to_dict(orient='index')
            metric_dict[metric+"_history"] = [x["value"] for x in sorted(metric_history.values(), key=lambda x: x["step"])]
            metric_dict[metric+"_times"] = [x["time"] for x in sorted(metric_history.values(), key=lambda x: x["step"])]


        return metric_dict

    def _load_metrics_from_remote(self, run_id: str) -> Dict[str, List[float]]:
        run = self.mlflow_client.get_run(run_id)
        metric_dict: Dict[str, Any] = {}
        for metric in run.data.metrics.keys():
            metric_history = self.mlflow_client.get_metric_history(
                run.info.run_id, metric
            )
            metric_dict[metric + "_history"] = [
                metric.value
                for metric in sorted(metric_history, key=lambda x: x.step)
            ]
            metric_dict[metric + "_times"] = [
                metric.time
                for metric in sorted(metric_history, key=lambda x: x.step)
            ]
        return metric_dict

    def load_metric_history_for_ids(
        self, run_ids: Set[str],
    ):
        metric_records = []
        for run_id in tqdm(run_ids, desc="Querying metrics for runs"):
            metric_dict = self._load_metrics_from_local(run_id=run_id)
            if metric_dict is None:
                metric_dict = self._load_metrics_from_remote(run_id=run_id)
            
            for metric, metric_history in metric_dict.items():
                for epoch in range(len(metric_history)):
                    metric_records.append({
                        "run_id": run_id,
                        metric: metric_history[epoch],
                        "epoch": epoch,
                    })

        return pd.merge(
            pd.DataFrame.from_records(metric_records), self.run_df, left_on="run_id", right_on="info_run_id", how="left"
        )

    def load_training_times_for_ids(
        self, run_ids: Set[str], reference_metric_name: str = "val_loss_times"
    ):
        metric_records = []
        for run_id in tqdm(run_ids, desc="Querying metrics for runs"):
            metric_dict = self._load_metrics_from_local(run_id=run_id)
            if metric_dict is None or reference_metric_name not in metric_dict:
                metric_dict = self._load_metrics_from_remote(run_id=run_id)
            if reference_metric_name not in metric_dict:
                print("Error! Reference Metric not in metric_dict", reference_metric_name, run_id)
                continue

            times = [int(x) for x in metric_dict[reference_metric_name]]
            metric_records.append({
                "run_id": run_id,
                "num_epochs": len(times),
                "total_duration": max(times) - min(times),
                "avg_per_epoch": (max(times) - min(times)) / len(times),
            })

        return pd.merge(
            pd.DataFrame.from_records(metric_records), self.run_df, left_on="run_id", right_on="info_run_id", how="inner"
        )
    
    def load_best_metrics_for_ids(
        self, run_ids: Set[str], best_metric_name: str = "val_loss_history"
    ):
        metric_records = []
        for run_id in tqdm(run_ids, desc="Querying metrics for runs"):
            metric_dict = self._load_metrics_from_local(run_id=run_id)
            if metric_dict is None or best_metric_name not in metric_dict:
                metric_dict = self._load_metrics_from_remote(run_id=run_id)
            if best_metric_name not in metric_dict:
                print("Error! Best Metric not in metric_dict", best_metric_name, run_id)
                continue

            best_epoch = [
                idx
                for idx, _ in sorted(
                    enumerate(metric_dict[best_metric_name]),
                    key=lambda x: x[1],
                    reverse=False,
                )
            ][0]
            best_metric_dict = {
                metric_name + "_best": metric_dict[metric_name][best_epoch]
                for metric_name in metric_dict
                if len(metric_dict[metric_name]) > best_epoch
            }
            best_metric_dict["run_id"] = run_id
            best_metric_dict["epoch"] = best_epoch
            metric_records.append(best_metric_dict)

        return pd.merge(
            pd.DataFrame.from_records(metric_records), self.run_df, left_on="run_id", right_on="info_run_id", how="inner"
        )
Esempio n. 12
0
from mlflow.tracking import MlflowClient

if __name__ == "__main__":

    def print_metric_info(history):
        for m in history:
            print("name: {}".format(m.key))
            print("value: {}".format(m.value))
            print("step: {}".format(m.step))
            print("timestamp: {}".format(m.timestamp))
            print("--")

    # Create a run under the default experiment (whose id is "0"). Since this is low-level
    # CRUD operation, the method will create a run. To end the run, you'll have
    # to explicitly end it.
    client = MlflowClient()
    experiment_id = "0"
    run = client.create_run(experiment_id)
    print("run_id: {}".format(run.info.run_id))
    print("--")

    # Log couple of metrics, update their initial value, and fetch each
    # logged metrics' history.
    for k, v in [("m1", 1.5), ("m2", 2.5)]:
        client.log_metric(run.info.run_id, k, v, step=0)
        client.log_metric(run.info.run_id, k, v + 1, step=1)
    client.set_terminated(run.info.run_id)
    # run terminated, but still you can fetch the metrics
    print_metric_info(client.get_metric_history(run.info.run_id, k))