Esempio n. 1
0
def get_or_create_experiment(experiment_name) -> Experiment:
    """
    Creates an mlflow experiment
    :param experiment_name: str. The name of the experiment to be set in MLFlow
    :return: the experiment created if it doesn't exist, experiment if it is already created.
    """
    try:
        client = MlflowClient()
        experiment: Experiment = client.get_experiment_by_name(
            name=experiment_name)
        if experiment and experiment.lifecycle_stage != 'deleted':
            return experiment
        else:
            experiment_id = client.create_experiment(name=experiment_name)
            return client.get_experiment(experiment_id=experiment_id)
    except Exception as e:
        logger.error(
            f'Unable to get or create experiment {experiment_name}: {e}')
Esempio n. 2
0
File: mlflow.py Progetto: xqk/ray
class MLFlowLoggerCallback(LoggerCallback):
    """MLFlow Logger to automatically log Tune results and config to MLFlow.

    MLFlow (https://mlflow.org) Tracking is an open source library for
    recording and querying experiments. This Ray Tune ``LoggerCallback``
    sends information (config parameters, training results & metrics,
    and artifacts) to MLFlow for automatic experiment tracking.

    Args:
        tracking_uri (str): The tracking URI for where to manage experiments
            and runs. This can either be a local file path or a remote server.
            This arg gets passed directly to mlflow.tracking.MlflowClient
            initialization. When using Tune in a multi-node setting, make sure
            to set this to a remote server and not a local file path.
        registry_uri (str): The registry URI that gets passed directly to
            mlflow.tracking.MlflowClient initialization.
        experiment_name (str): The experiment name to use for this Tune run.
            If None is passed in here, the Logger will automatically then
            check the MLFLOW_EXPERIMENT_NAME and then the MLFLOW_EXPERIMENT_ID
            environment variables to determine the experiment name.
            If the experiment with the name already exists with MlFlow,
            it will be reused. If not, a new experiment will be created with
            that name.
        save_artifact (bool): If set to True, automatically save the entire
            contents of the Tune local_dir as an artifact to the
            corresponding run in MlFlow.

    Example:

    .. code-block:: python

        from ray.tune.integration.mlflow import MLFlowLoggerCallback
        tune.run(
            train_fn,
            config={
                # define search space here
                "parameter_1": tune.choice([1, 2, 3]),
                "parameter_2": tune.choice([4, 5, 6]),
            },
            callbacks=[MLFlowLoggerCallback(
                experiment_name="experiment1",
                save_artifact=True)])

    """
    def __init__(self,
                 tracking_uri: Optional[str] = None,
                 registry_uri: Optional[str] = None,
                 experiment_name: Optional[str] = None,
                 save_artifact: bool = False):

        mlflow = _import_mlflow()
        if mlflow is None:
            raise RuntimeError("MLFlow has not been installed. Please `pip "
                               "install mlflow` to use the MLFlowLogger.")

        from mlflow.tracking import MlflowClient
        self.client = MlflowClient(tracking_uri=tracking_uri,
                                   registry_uri=registry_uri)

        if experiment_name is None:
            # If no name is passed in, then check env vars.
            # First check if experiment_name env var is set.
            experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME")

        if experiment_name is not None:
            # First check if experiment with name exists.
            experiment = self.client.get_experiment_by_name(experiment_name)
            if experiment is not None:
                # If it already exists then get the id.
                experiment_id = experiment.experiment_id
            else:
                # If it does not exist, create the experiment.
                experiment_id = self.client.create_experiment(
                    name=experiment_name)
        else:
            # No experiment_name is passed in and name env var is not set.
            # Now check the experiment id env var.
            experiment_id = os.environ.get("MLFLOW_EXPERIMENT_ID")
            # Confirm that an experiment with this id exists.
            if experiment_id is None or self.client.get_experiment(
                    experiment_id) is None:
                raise ValueError("No experiment_name passed, "
                                 "MLFLOW_EXPERIMENT_NAME env var is not "
                                 "set, and MLFLOW_EXPERIMENT_ID either "
                                 "is not set or does not exist. Please "
                                 "set one of these to use the "
                                 "MLFlowLoggerCallback.")

        # At this point, experiment_id should be set.
        self.experiment_id = experiment_id
        self.save_artifact = save_artifact

        self._trial_runs = {}

    def log_trial_start(self, trial: "Trial"):
        # Create run if not already exists.
        if trial not in self._trial_runs:
            run = self.client.create_run(experiment_id=self.experiment_id,
                                         tags={"trial_name": str(trial)})
            self._trial_runs[trial] = run.info.run_id

        run_id = self._trial_runs[trial]

        # Log the config parameters.
        config = trial.config

        for key, value in config.items():
            self.client.log_param(run_id=run_id, key=key, value=value)

    def log_trial_result(self, iteration: int, trial: "Trial", result: Dict):
        run_id = self._trial_runs[trial]
        for key, value in result.items():
            try:
                value = float(value)
            except (ValueError, TypeError):
                logger.debug("Cannot log key {} with value {} since the "
                             "value cannot be converted to float.".format(
                                 key, value))
                continue
            self.client.log_metric(run_id=run_id,
                                   key=key,
                                   value=value,
                                   step=iteration)

    def log_trial_end(self, trial: "Trial", failed: bool = False):
        run_id = self._trial_runs[trial]

        # Log the artifact if set_artifact is set to True.
        if self.save_artifact:
            self.client.log_artifacts(run_id, local_dir=trial.logdir)

        # Stop the run once trial finishes.
        status = "FINISHED" if not failed else "FAILED"
        self.client.set_terminated(run_id=run_id, status=status)
Esempio n. 3
0
import warnings

from mlflow.tracking import MlflowClient

if __name__ == "__main__":

    warnings.filterwarnings("ignore")

    def print_experiment_info(experiment):
        print("Name: {}".format(experiment.name))
        print("Experiment_id: {}".format(experiment.experiment_id))
        print("Lifecycle_stage: {}".format(experiment.lifecycle_stage))

    # Create an experiment name, which must be unique and case sensitive
    client = MlflowClient()
    experiment_id = client.create_experiment("Social NLP Experiments")

    # Fetch experiment metadata information
    experiment = client.get_experiment(experiment_id)
    print_experiment_info(experiment)
    print("--")

    # Rename and fetch experiment metadata information
    client.rename_experiment(experiment_id, "Social Media NLP Experiments")
    experiment = client.get_experiment(experiment_id)
    print_experiment_info(experiment)
Esempio n. 4
0
class MlflowLogger(BaseTrainLogger):
    """A common mlflow logger for pipeline training

    Parameters
    ----------
    experiment_name
        The experiment name
    artifact_location
        The artifact location used for this experiment
    run_name
        If specified, set a name to created run
    tags
        Extra arguments used as tags to created experiment run
    """

    __LOGGER = logging.getLogger(__name__)

    def __init__(
        self,
        experiment_name: str = None,
        artifact_location: str = None,
        run_name: str = None,
        **tags,
    ):
        self._client = MlflowClient()
        self._experiment = self._configure_experiment_with_retry(
            experiment_name, artifact_location
        )

        tags = tags or {}
        if run_name:
            tags[mlflow_tags.MLFLOW_RUN_NAME] = run_name

        run = self._client.create_run(self._experiment.experiment_id, tags=tags)
        self._run_id = run.info.run_id

        self._skipped_metrics = ["training_duration"]

    def _configure_experiment_with_retry(
        self, experiment_name: str, artifact_location: str, retries: int = 5
    ) -> Optional[Experiment]:
        """Tries to configure (fetch or create) an mlflow experiment with retrying process on errors"""
        if retries <= 0:
            return None
        try:
            experiment = self._client.get_experiment_by_name(
                experiment_name or "default"
            )
            if experiment:
                return experiment

            return self._client.get_experiment(
                self._client.create_experiment(experiment_name, artifact_location)
            )
        except Exception as e:
            self.__LOGGER.debug(e)
            return self._configure_experiment_with_retry(
                experiment_name, artifact_location, retries=retries - 1
            )

    def init_train(
        self,
        pipeline: "Pipeline",
        trainer_configuration: "TrainerConfiguration",
        training: InstancesDataset,
        validation: Optional[InstancesDataset] = None,
        test: Optional[InstancesDataset] = None,
    ):
        from pandas import json_normalize

        for prefix, params_set in [
            ("pipeline", json_normalize(pipeline.config.as_dict())),
            ("trainer", json_normalize(dataclasses.asdict(trainer_configuration))),
        ]:
            for key, value in params_set.to_dict(orient="records")[0].items():
                if value:
                    self._client.log_param(self._run_id, f"{prefix}.{key}", value)
        self._client.log_param(
            self._run_id, key="pipeline.num_parameters", value=pipeline.num_parameters
        )
        self._client.log_param(
            self._run_id,
            key="pipeline.num_trainable_parameters",
            value=pipeline.num_trainable_parameters,
        )

    def log_epoch_metrics(self, epoch: int, metrics: Dict[str, Any]):

        [
            self._client.log_metric(self._run_id, key=k, value=v, step=epoch)
            for k, v in metrics.items()
            if k not in self._skipped_metrics
        ]

    def end_train(self, results: TrainingResults):
        try:
            self._client.log_artifact(self._run_id, local_path=results.model_path)
            [
                self._client.log_metric(self._run_id, key=k, value=v)
                for k, v in results.metrics.items()
                if k not in self._skipped_metrics
            ]
        finally:
            self._client.set_terminated(self._run_id)
Esempio n. 5
0
class TianshouMLFlowLogger(tianshou.utils.BaseLogger):
    def __init__(
        self,
        train_interval=1000,
        test_interval=1,
        update_interval=1000,
        save_interval=1,
        experiment_name="Default",
        run_name=None,
        tracking_uri=None,
        tags=None,
        save_dir="./mlruns",
        prefix="",
        artifact_location=None,
        filename=None,
        info_logger=None,
    ):
        super().__init__(train_interval, test_interval, update_interval)
        self.last_save_step = -1
        self.save_interval = save_interval

        if not tracking_uri:
            tracking_uri = f"{LOCAL_FILE_URI_PREFIX}{save_dir}"

        self._experiment_name = experiment_name
        self._experiment_id = None
        self._tracking_uri = tracking_uri
        self._run_name = run_name
        self._run_id = None
        self.tags = self._get_mlflow_tags(filename=filename, manual_tags=tags)
        self._prefix = prefix
        self._artifact_location = artifact_location
        self.info_logger = info_logger

        self._mlflow_client = MlflowClient(tracking_uri)

    @property
    def experiment(self):
        """
        Actual MLflow object
        Example::
            self.logger.experiment.some_mlflow_function()
        """
        if self._experiment_id is None:
            expt = self._mlflow_client.get_experiment_by_name(
                self._experiment_name)
            if expt is not None:
                self._experiment_id = expt.experiment_id
            else:
                self._experiment_id = self._mlflow_client.create_experiment(
                    name=self._experiment_name,
                    artifact_location=self._artifact_location,
                )

        if self._run_id is None:
            if self._run_name is not None:
                self.tags[MLFLOW_RUN_NAME] = self._run_name
            run = self._mlflow_client.create_run(
                experiment_id=self._experiment_id, tags=self.tags)
            self._run_id = run.info.run_id

        e = self._mlflow_client.get_experiment(self._experiment_id)
        return self._mlflow_client

    @property
    def run_id(self):
        """Create the experiment if it does not exist to get the run id.
        Returns:
            The run id.
        """
        _ = self.experiment
        return self._run_id

    @property
    def experiment_id(self):
        """Create the experiment if it does not exist to get the experiment id.
        Returns:
            The experiment id.
        """
        _ = self.experiment
        return self._experiment_id

    def log_hyperparameters(self, params):
        params_to_log = process_nested_dict(params)
        for k, v in params_to_log.items():
            if len(str(v)) > 250:
                f"Mlflow only allows parameters with up to 250 characters. Discard {k}={v}", RuntimeWarning
                continue
            self.experiment.log_param(self.run_id, k, v)

    def write(self, step_type: str, step: int, data: LOG_DATA_TYPE) -> None:
        """Specify how the writer is used to log data.

        :param str step_type: namespace which the data dict belongs to.
        :param int step: stands for the ordinate of the data dict.
        :param dict data: the data to write with format ``{key: value}``.
        """
        for k, v in data.items():
            self.experiment.log_metric(self._run_id, k, v, step)

    def log_test_data(self, collect_result: dict, step: int) -> None:
        """Use writer to log statistics generated during evaluating.
        :param collect_result: a dict containing information of data collected in
            evaluating stage, i.e., returns of collector.collect().
        :param int step: stands for the timestep the collect_result being logged.
        .. note::
            ``collect_result`` will be modified in-place with "rew", "rew_std", "len",
            and "len_std" keys.
        """
        assert collect_result["n/ep"] > 0
        rews, lens = collect_result["rews"], collect_result["lens"]
        rew, rew_std, len_, len_std = rews.mean(), rews.std(), lens.mean(
        ), lens.std()
        collect_result.update(rew=rew,
                              rew_std=rew_std,
                              len=len_,
                              len_std=len_std)
        if step - self.last_log_test_step >= self.test_interval:
            log_data = {
                "test/env_step": step,
                "test/reward": rew,
                "test/length": len_,
                "test/reward_std": rew_std,
                "test/length_std": len_std,
            }

            # Supplement the data to be logged with stuff from info
            if self.info_logger:
                info_to_log = self.info_logger.report_for_logging()
                for k, v in info_to_log.items():
                    log_data[k] = v

            self.write("test/env_step", step, log_data)
            self.last_log_test_step = step

    def close(self) -> None:
        """"""
        self.experiment.set_terminated(self._run_id)

    def save_data(self, epoch, env_step, gradient_step, save_checkpoint_fn):
        if save_checkpoint_fn and epoch - self.last_save_step >= self.save_interval:
            self.last_save_step = epoch
            checkpoint_path = Path(
                save_checkpoint_fn(epoch, env_step, gradient_step))

            metadata = {
                "save/epoch": epoch,
                "save/env_step": env_step,
                "save/gradient_step": gradient_step,
                "checkpoint_path": str(checkpoint_path),
            }

            metadata_file_path = checkpoint_path.parent / "trainer_metadata.yaml"

            with open(str(metadata_file_path), "w") as f:
                yaml.dump(metadata, f)

            self.experiment.log_artifact(self.run_id, checkpoint_path.parent,
                                         "training_checkpoints")

    @staticmethod
    def _get_mlflow_tags(filename=None, manual_tags=None):
        # Can specify filename as string
        # for example for Jupyter where os.path.basename(__file__)
        # does not work

        # Use specified filename if provided
        # Otherwise resolve automatically
        if filename:
            source_name = filename
        else:
            source_name = resolve_tags()["mlflow.source.name"]

        # Use specified working directory if provided

        work_dir = os.getcwd()

        source_version = mlflow_utils._get_git_commit(work_dir)
        tags = {
            MLFLOW_USER: mlflow_utils._get_user(),
            MLFLOW_SOURCE_NAME: source_name,
        }
        if source_version is not None:
            tags[MLFLOW_GIT_COMMIT] = source_version

        repo_url = mlflow_utils._get_git_repo_url(work_dir)
        if repo_url is not None:
            tags[MLFLOW_GIT_REPO_URL] = repo_url
            tags[LEGACY_MLFLOW_GIT_REPO_URL] = repo_url

        if manual_tags:
            for k, v in manual_tags.items():
                tags[k] = v

        return tags
Esempio n. 6
0
                    'google_job_id': '',
                    'ref': self.args.ref
            }
            print('callback')
            callback(self.args.callback_uri,data)

        return False


def __getattr__(name: str):
    def dummy_method(*args, **kwargs):
        return None

    if name == 'log_metric' or name == 'log_param':
        if not mlflow_disabled_reporting():
            return getattr(mlflow, name)
        else:
            return dummy_method

if __name__ == '__main__':
    client = MlflowClient()


    # Examine the deleted experiment details.
    experiment = client.get_experiment(4)
    print("--")
    print(experiment)

    # Restore the experiment and fetch its info
    client.restore_experiment(2)