Esempio n. 1
0
    def _save(self, data: MetricsDict) -> None:
        """Save given MLflow metrics dataset and log it in MLflow as metrics.

        Args:
            data (MetricsDict): MLflow metrics dataset.
        """
        client = MlflowClient()
        try:
            run_id = self.run_id
        except DataSetError:
            # If run_id can't be found log_metric would create new run.
            run_id = None

        log_metric = (
            partial(client.log_metric, run_id)
            if run_id is not None
            else mlflow.log_metric
        )
        metrics = (
            self._build_args_list_from_metric_item(k, v) for k, v in data.items()
        )
        for k, v, i in chain.from_iterable(metrics):
            log_metric(k, v, step=i)
Esempio n. 2
0
def print_experiment_details(experiment_id, run_id):
    """
    Method to print experiment run info and a specific run details
    :param experiment_id: MLflow experiment ID
    :param run_id: MLflow run ID within an experiment
    :return: none
    """
    print("Finished MLflow Run with run_id {} and experiment_id {}".format(
        run_id, experiment_id))

    # Use MlflowClient API to list experiments and run info
    client = MlflowClient()
    print("=" * 80)
    # Get a list of all experiments
    print("List of all Experiments")
    print("=" * 80)
    [
        print(pprint.pprint(dict(exp), indent=4))
        for exp in client.list_experiments()
    ]
    print("=" * 80)
    print(f"List Run info for run_id={run_id}")
    print(pprint.pprint(dict(mlflow.get_run(run_id))))
Esempio n. 3
0
def test_metric_name(tmpdir: py.path.local) -> None:

    tracking_file_name = "file:{}".format(tmpdir)
    metric_name = "my_metric_name"

    mlflc = MLflowCallback(tracking_uri=tracking_file_name,
                           metric_name=metric_name)
    study = optuna.create_study(study_name="my_study")
    study.optimize(_objective_func, n_trials=3, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_file_name)
    experiments = mlfl_client.list_experiments()

    experiment = experiments[0]
    experiment_id = experiment.experiment_id

    run_infos = mlfl_client.list_run_infos(experiment_id)

    first_run_id = run_infos[0].run_id
    first_run = mlfl_client.get_run(first_run_id)
    first_run_dict = first_run.to_dictionary()

    assert metric_name in first_run_dict["data"]["metrics"]
Esempio n. 4
0
 def __init__(self, name, verbose=False, artifacts_folder='.'):
     self._name = name
     self._params = dict()
     self._artifacts = list()
     self._metrics = dict()
     self._verbose = verbose
     self._client = MlflowClient()
     self._n_partitions = None
     self._n_samples = None
     self._tags = {}
     self._imgs_path = None
     self._model_path = None
     self._artifacts_folders = None
     self.create_artifacts_folders(artifacts_folder)
     self._actual_experiment_id = _get_experiment_id()
     logging.basicConfig(
         format=
         'Date-Time : %(asctime)s : Line No. : %(lineno)d - %(message)s',
         level=logging.DEBUG)
     self.logger = logging.getLogger(__name__)
     if self._verbose:
         msg = 'New MlLogs object create.'
         self.logger.info(msg)
Esempio n. 5
0
    def _save(self, data: float):
        if self._logging_activated:
            self._validate_run_id()
            run_id = (
                self.run_id
            )  # we access it once instead of calling self.run_id everywhere to avoid looking or an active run each time

            mlflow_client = MlflowClient()

            # get the metric history if it has been saved previously to ensure
            #  to retrieve the right data
            # reminder: this is True even if no run_id was originally specified but a run is active
            metric_history = (mlflow_client.get_metric_history(
                run_id=run_id, key=self.key) if self._exists() else [])

            save_args = deepcopy(self._save_args)
            step = save_args.pop("step", None)
            if step is None:
                if self.mode == "overwrite":
                    step = max([metric.step for metric in metric_history],
                               default=0)
                elif self.mode == "append":
                    # I put a max([]) default to -1 so that default "step" equals 0
                    step = (max([metric.step for metric in metric_history],
                                default=-1) + 1)
                else:
                    raise ValueError(
                        f"save_args['mode'] must be one of {self.SUPPORTED_SAVE_MODES}, got '{self.mode}' instead."
                    )

            mlflow_client.log_metric(
                run_id=run_id,
                key=self.key,
                value=data,
                step=step,
                **save_args,
            )
Esempio n. 6
0
def log_production_model(config_path):
    config = read_params(config_path)

    mlflow_config = config["mlflow_config"]

    model_name = mlflow_config["registered_model_name"]

    remote_server_uri = mlflow_config["remote_server_uri"]

    mlflow.set_tracking_uri(remote_server_uri)

    runs = mlflow.search_runs(experiment_ids=1)
    lowest = runs["metrics.mae"].sort_values(ascending=True)[0]
    lowest_run_id = runs[runs["metrics.mae"] == lowest]["run_id"][0]

    client = MlflowClient()
    for mv in client.search_model_versions(f"name='{model_name}'"):
        mv = dict(mv)

        if mv["run_id"] == lowest_run_id:
            current_version = mv["version"]
            logged_model = mv["source"]
            pprint(mv, indent=4)
            client.transition_model_version_stage(name=model_name,
                                                  version=current_version,
                                                  stage="Production")
        else:
            current_version = mv["version"]
            client.transition_model_version_stage(name=model_name,
                                                  version=current_version,
                                                  stage="Staging")

    loaded_model = mlflow.pyfunc.load_model(logged_model)

    model_path = config["webapp_model_dir"]  # "prediction_service/model"

    joblib.dump(loaded_model, model_path)
def test_is_versioned_dataset_logged_correctly_in_mlflow(
        tmp_path, tracking_uri, df1):
    """Check if versioned dataset is logged correctly in MLflow as artifact.

    For versioned datasets just artifacts from current run should be logged.
    """
    mlflow.set_tracking_uri(tracking_uri.as_uri())
    mlflow_client = MlflowClient(tracking_uri=tracking_uri.as_uri())

    with mlflow.start_run():

        run_id = mlflow.active_run().info.run_id

        mlflow_csv_dataset = MlflowArtifactDataSet(data_set=dict(
            type=CSVDataSet,
            filepath=(tmp_path / "df1.csv").as_posix(),
            versioned=True,
        ),
                                                   # run_id=run_id,
                                                   )
        mlflow_csv_dataset.save(df1)

        run_artifacts = [
            fileinfo.path
            for fileinfo in mlflow_client.list_artifacts(run_id=run_id)
        ]

        # Check if just one artifact was created in given run.
        assert len(run_artifacts) == 1

        artifact_path = mlflow_client.download_artifacts(run_id=run_id,
                                                         path=run_artifacts[0])

        # Check if saved artifact is file and not folder where versioned datasets are stored.
        assert Path(artifact_path).is_file()

        assert df1.equals(mlflow_csv_dataset.load())  # and must loadable
Esempio n. 8
0
    def configure(
        self,
        run_uuid,
        experiment_name,
        tracking_uri,
        always_log_artifacts=False,
        create_run=True,
        create_experiment=True,
        nest_run=True,
    ):
        if mlflow.active_run() and not nest_run:
            logger.info('Ending previous MLFlow run: {}.'.format(self.run_uuid))
            mlflow.end_run()

        self.always_log_artifacts = always_log_artifacts
        self._experiment_name = experiment_name

        # MLflow specific
        if tracking_uri:
            mlflow.set_tracking_uri(tracking_uri)

        if run_uuid:
            existing_run = MlflowClient().get_run(run_uuid)
            if not existing_run and not create_run:
                raise FileNotFoundError(
                    'Run ID {} not found under {}'.format(
                        run_uuid, mlflow.get_tracking_uri()
                    )
                )

        experiment_id = self._retrieve_mlflow_experiment_id(
            experiment_name, create=create_experiment
        )

        return mlflow.start_run(
            run_uuid=run_uuid, experiment_id=experiment_id, nested=nest_run
        )
def get_best_runs_by_model(experience_name):
    client = MlflowClient()
    experiments = client.list_experiments()
    experiment = next(ex for ex in experiments if ex.name == experience_name)
    experiment_id = experiment.experiment_id

    best_runs_by_model = []
    for model in models:
        for params in product_dict(**model["params"]):
            filter_string = f'params.model="{model["name"]}"'

            for param_key, param_value in params.items():
                filter_string += f' and params.{param_key}="{param_value}"'

            runs = client.search_runs(
                experiment_id,
                filter_string,
                order_by=["metric.auc DESC"],
                max_results=1,
            )

            if not runs:
                continue

            run = runs[0]

            run_name = run.data.tags["mlflow.runName"]
            for idx, (param_key, param_value) in enumerate(params.items()):
                if idx == 0:
                    run_name += f" | "
                else:
                    run_name += f", "
                run_name += f"{param_key}={param_value}"

            best_runs_by_model.append({"name": run_name, "run": runs[0]})

    return best_runs_by_model
Esempio n. 10
0
def test_create_model_version_explicitly_set_run_link(mock_registry_store):
    run_id = "runid"
    run_link = "my-run-link"
    hostname = "https://workspace.databricks.com/"
    workspace_id = "10002"
    mock_registry_store.create_model_version.return_value = ModelVersion(
        "name", 1, 0, 1, source="source", run_id=run_id, run_link=run_link)
    # mocks to make sure that even if you're in a notebook, this setting is respected.
    with mock.patch(
            "mlflow.tracking.client.is_in_databricks_notebook",
            return_value=True), mock.patch(
                "mlflow.tracking.client.get_workspace_info_from_dbutils",
                return_value=(hostname, workspace_id),
            ):
        client = MlflowClient(tracking_uri="databricks",
                              registry_uri="otherplace")
        model_version = client.create_model_version("name",
                                                    "source",
                                                    "runid",
                                                    run_link=run_link)
        assert model_version.run_link == run_link
        # verify that the store was provided with the explicitly passed in run link
        mock_registry_store.create_model_version.assert_called_once_with(
            "name", "source", "runid", [], run_link, None)
Esempio n. 11
0
def test_hyperopt_ray_mlflow(csv_filename, tmpdir):
    with ray_start_4_cpus():
        mlflow_uri = f"file://{tmpdir}/mlruns"
        mlflow.set_tracking_uri(mlflow_uri)
        client = MlflowClient(tracking_uri=mlflow_uri)

        num_samples = 2
        config = _get_config({"type": "ray", "num_samples": num_samples}, {"type": "ray"})

        rel_path = generate_data(config["input_features"], config["output_features"], csv_filename)

        exp_name = "mlflow_test"
        run_hyperopt(config, rel_path, experiment_name=exp_name, callbacks=[MlflowCallback(mlflow_uri)])

        experiment = client.get_experiment_by_name(exp_name)
        assert experiment is not None

        runs = client.search_runs([experiment.experiment_id])
        assert len(runs) > 0

        for run in runs:
            artifacts = [f.path for f in client.list_artifacts(run.info.run_id, "")]
            assert "config.yaml" in artifacts
            assert "model" in artifacts
Esempio n. 12
0
    def _load(self):
        self._validate_run_id()
        mode = self._load_args.get("mode", "list")
        mlflow_client = MlflowClient()

        metric_history = mlflow_client.get_metric_history(self.run_id,
                                                          key=self.key)

        if mode == "list":
            simplified_history = [metric.value for metric in metric_history]
        elif mode == "dict":
            simplified_history = {
                metric.step: metric.value
                for metric in metric_history
            }
        elif mode == "history":
            # history is a list of dict whom keys are "log_metric" arguments. The following is equivalent to dict mode:
            # [{"step": 0, "value": 0.1}, {"step": 1, "value": 0.2}, {"step": 2, "value": 0.3}]
            simplified_history = [{
                "step": metric.step,
                "value": metric.value,
                "timestamp": metric.timestamp,
            } for metric in metric_history]
        return simplified_history
def test_kedro_mlflow_config_experiment_was_deleted(kedro_project_with_mlflow_conf):

    # create an experiment with the same name and then delete it
    mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri()
    mlflow_client = MlflowClient(mlflow_tracking_uri)
    mlflow_client.create_experiment("exp1")
    mlflow_client.delete_experiment(
        mlflow_client.get_experiment_by_name("exp1").experiment_id
    )

    # the config must restore properly the experiment
    config = KedroMlflowConfig(
        server=dict(mlflow_tracking_uri="mlruns"),
        tracking=dict(experiment=dict(name="exp1")),
    )

    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session:
        context = session.load_context()  # setup config
        config.setup(context)

    assert "exp1" in [
        exp.name for exp in config.server._mlflow_client.list_experiments()
    ]
    def setup_mlflow_tracking(self, URI, experiment_name, run_name):

        # select URI for server tracking
        set_tracking_uri(uri=URI)
        if is_tracking_uri_set():
            logging.debug('MLFlow URI: ' + str(get_tracking_uri()))

        # CRUD interface
        self.client = MlflowClient(tracking_uri=get_tracking_uri())

        # Experiment setup
        if self.client.get_experiment_by_name(name=experiment_name) is None:
            exp_id = self.client.create_experiment(name=experiment_name)
        else:
            exp = self.client.get_experiment_by_name(name=experiment_name)
            exp_id = exp.experiment_id

        # Run setup
        mlflow.start_run(experiment_id=exp_id, run_name=run_name)
        self.run_id = mlflow.active_run().info.run_id
        data = self.client.get_run(mlflow.active_run().info.run_id).data
        logging.info('MLFlow tracking started - Experiment: ' +
                     str(experiment_name) + " - Run: " +
                     str(data.tags["mlflow.runName"]))
Esempio n. 15
0
    def __init__(
        self,
        experiment_name: str = 'default',
        tracking_uri: Optional[str] = None,
        tags: Optional[Dict[str, Any]] = None,
        save_dir: Optional[str] = './mlruns',
        prefix: str = '',
    ):
        if mlflow is None:
            raise ImportError(
                'You want to use `mlflow` logger which is not installed yet,'
                ' install it with `pip install mlflow`.'
            )
        super().__init__()
        if not tracking_uri:
            tracking_uri = f'{LOCAL_FILE_URI_PREFIX}{save_dir}'

        self._experiment_name = experiment_name
        self._experiment_id = None
        self._tracking_uri = tracking_uri
        self._run_id = None
        self.tags = tags
        self._prefix = prefix
        self._mlflow_client = MlflowClient(tracking_uri)
Esempio n. 16
0
    def __init__(self,
                 experiment_name: str = 'default',
                 run_name: str = 'test',
                 tracking_uri: Optional[str] = None,
                 tags: Optional[Dict[str, Any]] = None,
                 save_dir: Optional[str] = './mlruns'):

        if not _MLFLOW_AVAILABLE:
            raise ImportError(
                'You want to use `mlflow` logger which is not installed yet,'
                ' install it with `pip install mlflow`.')
        super().__init__()
        if not tracking_uri:
            tracking_uri = f'{LOCAL_FILE_URI_PREFIX}{save_dir}'

        self._experiment_name = experiment_name
        self._tracking_uri = tracking_uri
        self.tags = tags

        mlflow.set_experiment(experiment_name)
        run = mlflow.start_run(run_name=run_name)
        self._run_id = run.info.run_id
        self._experiment_id = run.info.experiment_id
        self._mlflow_client = MlflowClient(tracking_uri)
Esempio n. 17
0
def test_metric_name_multiobjective(tmpdir: py.path.local,
                                    names: Union[str, List[str]],
                                    expected: List[str]) -> None:

    tracking_uri = f"file:{tmpdir}"

    mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=names)
    study = optuna.create_study(study_name="my_study",
                                directions=["minimize", "maximize"])
    study.optimize(_multiobjective_func, n_trials=3, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiments = mlfl_client.list_experiments()

    experiment = experiments[0]
    experiment_id = experiment.experiment_id

    run_infos = mlfl_client.list_run_infos(experiment_id)

    first_run_id = run_infos[0].run_id
    first_run = mlfl_client.get_run(first_run_id)
    first_run_dict = first_run.to_dictionary()

    assert all([e in first_run_dict["data"]["metrics"] for e in expected])
Esempio n. 18
0
def test_node_hook_logging_above_limit_truncate_strategy(
        tmp_path, config_dir, dummy_run_params, dummy_node, param_length):

    # mocker.patch("kedro_mlflow.utils._is_kedro_project", return_value=True)

    _write_yaml(
        tmp_path / "conf" / "base" / "mlflow.yml",
        dict(hooks=dict(node=dict(long_parameters_strategy="truncate")), ),
    )

    mlflow_tracking_uri = (tmp_path / "mlruns").as_uri()
    mlflow.set_tracking_uri(mlflow_tracking_uri)

    mlflow_node_hook = MlflowNodeHook()

    param_value = param_length * "a"
    node_inputs = {"params:my_param": param_value}

    with mlflow.start_run():
        mlflow_node_hook.before_pipeline_run(run_params=dummy_run_params,
                                             pipeline=Pipeline([]),
                                             catalog=DataCatalog())
        mlflow_node_hook.before_node_run(
            node=node(func=lambda x: x, inputs=dict(x="a"), outputs=None),
            catalog=DataCatalog(),  # can be empty
            inputs=node_inputs,
            is_async=False,
            run_id="132",
        )
        run_id = mlflow.active_run().info.run_id

    mlflow_client = MlflowClient(mlflow_tracking_uri)
    current_run = mlflow_client.get_run(run_id)
    assert current_run.data.params == {
        "my_param": param_value[0:MAX_PARAM_VAL_LENGTH]
    }
Esempio n. 19
0
    def __init__(self, name, uri, host, port):
        self.name = name
        self.uri = uri
        self.host = host
        self.port = port
        self.app = Flask(name)
        self.client = MlflowClient(tracking_uri=uri)

        @self.app.route("/log_artifact", methods=['POST'])
        def log_artifact():
            if request.method == 'POST':
                if request.files:
                    if 'run_id' in request.form:
                        run_id = request.form['run_id']
                    else:
                        return {
                            'status': 'fail',
                            'text': 'run_id is not found!'
                        }

                    tmp_path = os.path.join(str(Path.home()),
                                            "mlflow_artifacts_proxy", run_id)
                    os.makedirs(tmp_path)
                    for f in request.files:
                        file = request.files[f]
                        filename = secure_filename(file.filename)
                        file_path = os.path.join(tmp_path, filename)
                        file.save(file_path)
                        self.client.log_artifact(run_id, local_path=file_path)
                    shutil.rmtree(
                        os.path.join(str(Path.home()),
                                     "mlflow_artifacts_proxy"))

                    return {'status': 'success', 'text': "You're great!"}
                else:
                    return {'status': 'fail', 'text': 'No files!'}
Esempio n. 20
0
def test_log_metric_none(tmpdir: py.path.local) -> None:

    tracking_uri = f"file:{tmpdir}"
    metric_name = "metric"
    study_name = "my_study"

    mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=metric_name)
    study = optuna.create_study(study_name=study_name)
    study.optimize(lambda _: np.nan, n_trials=1, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiments = mlfl_client.list_experiments()
    experiment = experiments[0]
    experiment_id = experiment.experiment_id

    run_infos = mlfl_client.list_run_infos(experiment_id)
    assert len(run_infos) == 1

    first_run_id = run_infos[0].run_id
    first_run = mlfl_client.get_run(first_run_id)
    first_run_dict = first_run.to_dictionary()

    # When `values` is `None`, do not save values with metric names.
    assert metric_name not in first_run_dict["data"]["metrics"]
Esempio n. 21
0
def test_create_model_version_run_link_with_configured_profile(
        mock_registry_store):
    experiment_id = 'test-exp-id'
    hostname = 'https://workspace.databricks.com/'
    workspace_id = '10002'
    run_id = 'runid'
    workspace_url = construct_run_url(hostname, experiment_id, run_id,
                                      workspace_id)
    get_run_mock = mock.MagicMock()
    get_run_mock.return_value = Run(
        RunInfo(run_id, experiment_id, 'userid', 'status', 0, 1, None), None)
    with mock.patch('mlflow.tracking.client.is_in_databricks_notebook', return_value=False), \
            mock.patch('mlflow.tracking.client.get_workspace_info_from_databricks_secrets',
                       return_value=(hostname, workspace_id)):
        client = MlflowClient(tracking_uri='databricks',
                              registry_uri='otherplace')
        client.get_run = get_run_mock
        mock_registry_store.create_model_version.return_value = \
            ModelVersion('name', 1, 0, 1, source='source', run_id=run_id, run_link=workspace_url)
        model_version = client.create_model_version('name', 'source', 'runid')
        assert (model_version.run_link == workspace_url)
        # verify that the client generated the right URL
        mock_registry_store.create_model_version.assert_called_once_with(
            "name", 'source', 'runid', [], workspace_url)
Esempio n. 22
0
def register_model(run, model, model_name):
    result = mlflow.register_model(
        "runs:/" + run.info.run_id + "/artifacts/" + model,
        model_name
    )

    description = []
    for param in run.data.params:
        description.append(
            "**{}:** {}\n".format(param, run.data.params[param]))

    description.append(
        "**Accuracy:** {}".format(
            client.get_metric_history(run.info.run_id, "accuracy")[0].value))

    description.append(
        "**Loss:** {}".format(
            client.get_metric_history(run.info.run_id, "loss")[0].value))

    MlflowClient().update_model_version(
        name=model_name,
        version=result.version,
        description="".join(description)
    )
Esempio n. 23
0
    return output_df


# COMMAND ----------

input_data = table('bank_db.bank_marketing_train_set')
pdDF = input_data.toPandas()

# COMMAND ----------

returnDF = train_xgboost(pdDF)

# COMMAND ----------

# MAGIC %md
# MAGIC ##### Extra: Use the MLflow client to analyse the runs programmatically
# MAGIC Next to using the Experiments UI, or just putting all the results from a run into spark DF, the `MLflowClient` class can be used to look up runs programmatially from a given (list of) experiment(s). Below is an example of how we can retrieve the run_id with the highest area under the curve score by using the `client.search_runs` method.

# COMMAND ----------

from mlflow.tracking import MlflowClient
client = MlflowClient()
best_run_id = client.search_runs(experiment_ids=[experiment_id],
                                 order_by=["metrics.auc DESC"])[0].info.run_id

# COMMAND ----------

best_run_id

# COMMAND ----------
Esempio n. 24
0
def main():
    df = pd.read_csv(config["train"]["data_path"])
    y = np.array(df[config["train"]["label_column"]])

    df = preprocess(df)

    label_nbr = len(df[config["train"]["label_column"]].unique())
    label_names = config["train"]["label"]

    y = np.array(df[config["train"]["label_column"]])
    df = df.drop([config["train"]["label_column"]] +
                 config['train']['to_drop'],
                 axis=1)
    X = np.array(df)

    print(X.shape, y.shape)

    try:
        device = torch.device(config["train"]["device"])
    except:
        device = torch.device("cpu")

    classifier = Net(input_dim=df.shape[1],
                     hidden_dim=config["train"]["hidden_dim"]).to(device)
    criterion = torch.nn.functional.mse_loss

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=.25,
                                                        random_state=42)

    X_train, y_train = torch.tensor(X_train).float(), torch.tensor(
        y_train).float()
    X_test, y_test = torch.tensor(X_test).float(), torch.tensor(y_test).float()

    # create dataloader with specified batch_size
    ds_train = torch.utils.data.TensorDataset(X_train, y_train)
    dataloader_train = torch.utils.data.DataLoader(
        ds_train, batch_size=config["train"]["batch_size"], shuffle=True)

    ds_test = torch.utils.data.TensorDataset(X_test, y_test)
    dataloader_test = torch.utils.data.DataLoader(
        ds_test, batch_size=config["train"]["batch_size"], shuffle=True)

    trainer = Trainer(classifier, device=device, criterion=criterion)
    trainer.train(dataloader_train,
                  dataloader_test,
                  config["train"]["epochs"],
                  config["train"]["log_every"],
                  task="regression")

    # eval step

    metrics = {}
    metrics["mse"] = trainer.metric
    mlflow.log_params(metrics)

    mlflow.pytorch.log_model(
        pytorch_model=classifier,
        artifact_path="model",
        registered_model_name=config["mlflow"]["model_name"])

    api_request_model = get_request_features(df)
    with open("request_model.json", "w") as rmodel:
        json.dump(api_request_model, rmodel, indent=4)

    # checking if there are any productions models,
    # so we can put at least one in production

    model_name = config['mlflow']['model_name']

    try:
        mlflow.pytorch.load_model(f"models:/{model_name}/Production")
    except:
        client = MlflowClient()
        version = client.search_model_versions(
            f"name='{model_name}'")[0].version

        client.transition_model_version_stage(name=model_name,
                                              version=version,
                                              stage="Production")
Esempio n. 25
0
 def mlflow_client(self):
     mlflow.set_tracking_uri(MLFLOW_URI)
     return MlflowClient()
Esempio n. 26
0
def test_node_hook_logging(
    tmp_path,
    mocker,
    monkeypatch,
    dummy_run_params,
    dummy_catalog,
    dummy_pipeline,
    dummy_node,
    config_dir,
    flatten_dict_params,
    expected,
):

    mocker.patch("logging.config.dictConfig")
    mocker.patch("kedro_mlflow.utils._is_kedro_project", return_value=True)
    monkeypatch.chdir(tmp_path)
    # config = KedroMlflowConfig(
    #     project_path=tmp_path,
    #     node_hook_opts={"flatten_dict_params": flatten_dict_params, "sep": "-"},
    # )
    # # the function is imported inside the other file antd this is the file to patch
    # # see https://stackoverflow.com/questions/30987973/python-mock-patch-doesnt-work-as-expected-for-public-method
    # mocker.patch(
    #     "kedro_mlflow.framework.hooks.node_hook.get_mlflow_config", return_value=config
    # )

    _write_yaml(
        tmp_path / "conf" / "base" / "mlflow.yml",
        dict(
            hooks=dict(
                node=dict(
                    flatten_dict_params=flatten_dict_params, recursive=False, sep="-"
                )
            ),
        ),
    ),

    mlflow_node_hook = MlflowNodeHook()

    node_inputs = {
        v: dummy_catalog._data_sets.get(v) for k, v in dummy_node._inputs.items()
    }

    mlflow_tracking_uri = (tmp_path / "mlruns").as_uri()
    mlflow.set_tracking_uri(mlflow_tracking_uri)
    with mlflow.start_run():
        mlflow_node_hook.before_pipeline_run(
            run_params=dummy_run_params, pipeline=dummy_pipeline, catalog=dummy_catalog
        )
        mlflow_node_hook.before_node_run(
            node=dummy_node,
            catalog=dummy_catalog,
            inputs=node_inputs,
            is_async=False,
            run_id="132",
        )
        run_id = mlflow.active_run().info.run_id

    mlflow_client = MlflowClient(mlflow_tracking_uri)
    current_run = mlflow_client.get_run(run_id)
    assert current_run.data.params == expected
Esempio n. 27
0
def mlflow_client(tracking_server_uri):
    """Provides an MLflow Tracking API client pointed at the local tracking server."""
    mlflow.set_tracking_uri(tracking_server_uri)
    yield mock.Mock(wraps=MlflowClient(tracking_server_uri))
    mlflow.set_tracking_uri(None)
def mlflow_resources():
    uri = mlflow.get_tracking_uri()
    client = MlflowClient(uri)
    return (uri, client)
Esempio n. 29
0
def register_model(model_uri,
                   name,
                   await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS):
    """
    Create a new model version in model registry for the model files specified by ``model_uri``.
    Note that this method assumes the model registry backend URI is the same as that of the
    tracking backend.

    :param model_uri: URI referring to the MLmodel directory. Use a ``runs:/`` URI if you want to
                      record the run ID with the model in model registry. ``models:/`` URIs are
                      currently not supported.
    :param name: Name of the registered model under which to create a new model version. If a
                 registered model with the given name does not exist, it will be created
                 automatically.
    :param await_registration_for: Number of seconds to wait for the model version to finish
                            being created and is in ``READY`` status. By default, the function
                            waits for five minutes. Specify 0 or None to skip waiting.
    :return: Single :py:class:`mlflow.entities.model_registry.ModelVersion` object created by
             backend.

    .. code-block:: python
        :caption: Example

        import mlflow.sklearn
        from sklearn.ensemble import RandomForestRegressor

        mlflow.set_tracking_uri("sqlite:////tmp/mlruns.db")
        params = {"n_estimators": 3, "random_state": 42}

        # Log MLflow entities
        with mlflow.start_run() as run:
           rfr = RandomForestRegressor(**params).fit([[0, 1]], [1])
           mlflow.log_params(params)
           mlflow.sklearn.log_model(rfr, artifact_path="sklearn-model")

        model_uri = "runs:/{}/sklearn-model".format(run.info.run_id)
        mv = mlflow.register_model(model_uri, "RandomForestRegressionModel")
        print("Name: {}".format(mv.name))
        print("Version: {}".format(mv.version))

    .. code-block:: text
        :caption: Output

        Name: RandomForestRegressionModel
        Version: 1
    """
    client = MlflowClient()
    try:
        create_model_response = client.create_registered_model(name)
        eprint("Successfully registered model '%s'." %
               create_model_response.name)
    except MlflowException as e:
        if e.error_code == ErrorCode.Name(RESOURCE_ALREADY_EXISTS):
            eprint(
                "Registered model '%s' already exists. Creating a new version of this model..."
                % name)
        else:
            raise e

    if RunsArtifactRepository.is_runs_uri(model_uri):
        source = RunsArtifactRepository.get_underlying_uri(model_uri)
        (run_id, _) = RunsArtifactRepository.parse_runs_uri(model_uri)
        create_version_response = client.create_model_version(
            name, source, run_id, await_creation_for=await_registration_for)
    else:
        create_version_response = client.create_model_version(
            name,
            source=model_uri,
            run_id=None,
            await_creation_for=await_registration_for)
    eprint("Created version '{version}' of model '{model_name}'.".format(
        version=create_version_response.version,
        model_name=create_version_response.name))
    return create_version_response
Esempio n. 30
0
def set_tag_mock():
    with mock.patch("mlflow.projects.databricks.tracking.MlflowClient") as m:
        mlflow_service_mock = mock.Mock(wraps=MlflowClient())
        m.return_value = mlflow_service_mock
        yield mlflow_service_mock.set_tag