Beispiel #1
0
def check_predictor_connection(predictor_url):
    url = url_join(predictor_url, "isAlive/ping")
    try:
        with get_http_session(predictor_url) as session:
            session.get(url)
            return True
    except Exception:
        LOGGER.error("Error checking predictor connectivity", exc_info=True)
        return False
Beispiel #2
0
def check_rest_api_connection(rest_api_url):
    url = url_join(rest_api_url, "account-details")
    try:
        with get_http_session(rest_api_url) as session:
            session.get(url)
            return True
    except Exception:
        LOGGER.error("Error checking rest api connectivity", exc_info=True)
        return False
Beispiel #3
0
def check_optimizer_connection(optimizer_url):
    url = url_join(optimizer_url, "spec")
    try:
        with get_http_session(optimizer_url) as session:
            params = {"algorithmName": "bayes"}
            session.get(url, params=params)
            return True
    except Exception:
        LOGGER.error("Error checking optimizer connectivity", exc_info=True)
        return False
Beispiel #4
0
def test_conversion(tmp_path, monkeypatch):
    path = tmp_path.resolve().as_posix()
    os.chdir(path)

    # Run the MLFlow example
    mlflow_example()

    # Check that MLFlow have created its on-disk content
    assert os.path.isdir(os.path.join(path, "mlruns"))

    # Monkey-patch HTTP interactions
    backend_version_body = {
        "msg": "1.2.131",
        "name": "Python-Backend",
        "ip": "",
        "hostname": "",
        "version": "1.2.131",
    }

    url = url_join(SERVER_ADDRESS, "isAlive/ver")

    responses.add(
        responses.GET,
        url,
        json=backend_version_body,
        status=200,
    )

    # Check that comet_for_mlflow have created an offline experiment
    api_key = "XXX"
    monkeypatch.setenv("COMET_WORKSPACE", "WORKSPACE")
    conv = comet_for_mlflow.Translator(False, api_key, path, None, None, "no",
                                       "*****@*****.**")
    conv.prepare()

    assert len(list(tmp_path.glob("*.zip"))) == 1
Beispiel #5
0
    def upload(self, prepared_data):
        LOGGER.info("# Start uploading data to Comet.ml")

        all_project_names = []

        with tqdm(total=self.summary["runs"]) as pbar:
            for experiment_data in prepared_data:
                experiment = experiment_data["experiment"]

                project_name = self.get_or_create_comet_project(experiment)

                # Sync the experiment note
                project_note = experiment.tags.get("mlflow.note.content", None)
                if project_note:
                    note_template = (
                        u"/!\\ This project notes has been copied from MLFlow. It might be overwritten if you run comet_for_mlflow again/!\\ \n%s"
                        % project_note)
                    # We don't support Unicode project notes yet
                    self.api_client.set_project_notes(
                        self.workspace,
                        project_name,
                        note_template,
                    )

                all_project_names.append(project_name)

                runs = experiment_data["runs"]

                for mlflow_run, archive_path in runs:
                    write_comet_experiment_metadata_file(
                        mlflow_run, project_name, archive_path, self.workspace)

                    upload_single_offline_experiment(
                        archive_path,
                        self.api_key,
                        force_reupload=self.force_reupload,
                        display_level="debug",
                    )

                    pbar.update(1)

        LOGGER.info("")
        LOGGER.info(
            "Explore your experiment data on Comet.ml with the following links:",
        )
        if len(all_project_names) < 6:
            for project_name in all_project_names:
                project_url = url_join(
                    self.api_client._get_url_server(),
                    self.workspace + "/",
                    project_name,
                    loginToken=self.token,
                )
                LOGGER.info("\t- %s", project_url)
        else:
            url = url_join(
                self.api_client._get_url_server(),
                self.workspace,
                query="mlflow",
                loginToken=self.token,
            )
            LOGGER.info("\t- %s", url)

        LOGGER.info(
            "Get deeper instrumentation by adding Comet SDK to your project: https://comet.ml/docs/python-sdk/mlflow/"
        )
        LOGGER.info("")
Beispiel #6
0
    def prepare_single_mlflow_run(self, run, original_experiment_name):
        self.tmpdir = tempfile.mkdtemp()

        if not run.info.end_time:
            # Seems to be the case when using the optimizer, some runs doesn't have an end_time
            LOGGER.warning("### Skipping run, no end time")
            return False

        run_start_time = run.info.start_time

        messages_file_path = os.path.join(self.tmpdir, "messages.json")

        with JsonLinesFile(messages_file_path, self.tmpdir) as json_writer:
            # Get mlflow tags
            tags = run.data.tags

            if not tags:
                tags = {}

            LOGGER.debug("### Preparing env details")
            json_writer.write_filename_msg(tags["mlflow.source.name"],
                                           run_start_time)

            json_writer.write_user_msg(tags["mlflow.user"], run_start_time)

            LOGGER.debug("### Preparing git details")
            json_writer.write_git_meta_msg(
                tags.get("mlflow.source.git.commit"),
                tags.get("mlflow.source.git.repoURL"),
                run_start_time,
            )

            # Import any custom name
            if tags.get("mlflow.runName"):
                tags["Name"] = tags["mlflow.runName"]

            # Save the run id as tag too as Experiment id can be different in case
            # of multiple uploads
            tags["mlflow.runId"] = run.info.run_id

            if tags.get("mlflow.parentRunId"):
                base_url = url_join(self.api_client.server_url,
                                    "/api/experiment/redirect")
                tags["mlflow.parentRunUrl"] = format_url(
                    base_url, experimentKey=tags["mlflow.parentRunId"])

            # Save the original MLFlow experiment name too as Comet.ml project might
            # get renamed
            tags["mlflow.experimentName"] = original_experiment_name

            LOGGER.debug("### Importing tags")
            for tag_name, tag_value in tags.items():
                LOGGER.debug("#### Tag %r: %r", tag_name, tag_value)
                json_writer.write_log_other_msg(tag_name, tag_value,
                                                run_start_time)

                self.summary["tags"] += 1

            # Mark the experiments has being uploaded from MLFlow
            json_writer.write_log_other_msg("Uploaded from", "MLFlow",
                                            run_start_time)

            LOGGER.debug("### Importing params")
            for param_key, param_value in run.data.params.items():
                LOGGER.debug("#### Param %r: %r", param_key, param_value)

                json_writer.write_param_msg(param_key, param_value,
                                            run_start_time)

                self.summary["params"] += 1

            LOGGER.debug("### Importing metrics")
            for metric in run.data._metric_objs:
                metric_history = self.store.get_metric_history(
                    run.info.run_id, metric.key)
                # Check if all steps are uniques, if not we don't pass any so the backend
                # fallback to the unique timestamp
                steps = [mh.step for mh in metric_history]

                use_steps = True

                if len(set(steps)) != len(metric_history):
                    LOGGER.warning(
                        "Non-unique steps detected, importing metrics with wall time instead"
                    )
                    use_steps = False

                for mh in metric_history:
                    if use_steps:
                        step = mh.step
                    else:
                        step = None

                    json_writer.write_metric_msg(mh.key, step, mh.timestamp,
                                                 mh.value)

                    self.summary["metrics"] += 1

                LOGGER.debug("#### Metric %r: %r", metric.key, metric_history)

            LOGGER.debug("### Importing artifacts")
            artifact_store = get_artifact_repository(run.info.artifact_uri)

            # List all the registered models if possible
            models_prefixes = {}
            if self.model_registry_store:
                query = "run_id='%s'" % run.info.run_id
                registered_models = self.model_registry_store.search_model_versions(
                    query)

                for model in registered_models:
                    model_relpath = os.path.relpath(model.source,
                                                    run.info.artifact_uri)
                    models_prefixes[model_relpath] = model

            for artifact in walk_run_artifacts(artifact_store):
                artifact_path = artifact.path

                LOGGER.debug("### Artifact %r: %r", artifact, artifact_path)
                # Check if the file is an visualization or not
                _, extension = os.path.splitext(artifact_path)

                local_artifact_path = artifact_store.download_artifacts(
                    artifact_path)

                self.summary["artifacts"] += 1

                # Check if it's belonging to one of the registered model
                matching_model = None
                for model_prefix, model in models_prefixes.items():
                    if artifact_path.startswith(model_prefix):
                        matching_model = model
                        # We should match at most one model
                        break

                if matching_model:
                    json_writer.log_artifact_as_model(
                        local_artifact_path,
                        artifact_path,
                        run_start_time,
                        matching_model.registered_model.name,
                    )
                else:
                    json_writer.log_artifact_as_asset(
                        local_artifact_path,
                        artifact_path,
                        run_start_time,
                    )

        return self.compress_archive(run.info.run_id)
Beispiel #7
0
def check(args, rest=None):
    # Called via `comet upload EXP.zip`
    if args.debug:
        activate_debug()

    config = get_config()

    LOGGER.info("Comet Check")
    LOGGER.info("=" * 80)
    print("")

    LOGGER.info("Checking connectivity to server...")
    print("")

    # Clientlib
    server_address = sanitize_url(config["comet.url_override"])
    server_address_config_origin = config_source(
        config.get_config_origin("comet.url_override"))
    LOGGER.info("Configured server address %r", server_address)
    if server_address_config_origin:
        LOGGER.info("Server address was configured in %s",
                    server_address_config_origin)
    else:
        LOGGER.info("Server address is the default one")
    print("")
    server_connected = check_server_connection(server_address)
    print("")
    if server_connected:
        LOGGER.info("Server connection is ok")
    else:
        LOGGER.warning("Server connection is not ok")

    # Rest API
    LOGGER.info("=" * 80)
    LOGGER.info("Checking connectivity to Rest API...")
    LOGGER.info("=" * 80)

    root_url = sanitize_url(get_root_url(config["comet.url_override"]))
    rest_api_url = url_join(root_url, *["api/rest/", "v2" + "/"])
    LOGGER.info("Configured Rest API address %r", rest_api_url)
    if server_address_config_origin:
        LOGGER.info("Rest API address was configured in %s",
                    server_address_config_origin)
    else:
        LOGGER.info("Rest API address is the default one")
    print("")
    rest_api_connected = check_rest_api_connection(rest_api_url)
    print("")
    if rest_api_connected:
        LOGGER.info("REST API connection is ok")
    else:
        LOGGER.warning("REST API connection is not ok")

    # Websocket
    LOGGER.info("=" * 80)
    LOGGER.info("Checking connectivity to Websocket Server")
    LOGGER.info("=" * 80)

    websocket_url = config["comet.ws_url_override"]
    if websocket_url is None:
        websocket_url = get_default_ws_url(server_address)
        LOGGER.warning(
            "No WS address configured on client side, fallbacking on default WS address %r, if that's incorrect set the WS url through the `comet.ws_url_override` config key",
            websocket_url,
        )
        websocket_url_config_origin = None
    else:
        websocket_url = websocket_url
        websocket_url_config_origin = config_source(
            config.get_config_origin("comet.ws_url_override"))
    LOGGER.info(
        "Configured WS address %r",
        websocket_url,
    )
    if websocket_url_config_origin:
        LOGGER.info("WS address was configured in %s",
                    websocket_url_config_origin)
    print("")
    ws_connected = check_ws_connection(websocket_url, args.debug)
    print("")
    if ws_connected:
        LOGGER.info("Websocket connection is ok")
    else:
        LOGGER.warning("Websocket connection is not ok")

    # Optimizer
    LOGGER.info("=" * 80)
    LOGGER.info("Checking connectivity to Optimizer Server")
    LOGGER.info("=" * 80)

    optimizer_url = sanitize_url(config["comet.optimizer_url"])
    optimizer_url_config_origin = config_source(
        config.get_config_origin("comet.optimizer_url"))
    LOGGER.info(
        "Configured Optimizer address %r",
        optimizer_url,
    )
    if optimizer_url_config_origin:
        LOGGER.info("Optimizer address was configured in %s",
                    optimizer_url_config_origin)
    else:
        LOGGER.info("Optimizer address is the default one")
    print("")
    optimizer_connected = check_optimizer_connection(optimizer_url)
    print("")
    if optimizer_connected:
        LOGGER.info("Optimizer connection is ok")
    else:
        LOGGER.warning("Optimizer connection is not ok")

    # Predictor
    LOGGER.info("=" * 80)
    LOGGER.info("Checking connectivity to Predictor Server")
    LOGGER.info("=" * 80)

    predictor_url = sanitize_url(config["comet.predictor_url"])
    predictor_url_config_origin = config_source(
        config.get_config_origin("comet.predictor_url"))
    LOGGER.info(
        "Configured Predictor address %r",
        predictor_url,
    )
    if predictor_url_config_origin:
        LOGGER.info("Predictor address was configured in %s",
                    predictor_url_config_origin)
    else:
        LOGGER.info("Predictor address is the default one")
    print("")
    predictor_connected = check_predictor_connection(predictor_url)
    print("")
    if predictor_connected:
        LOGGER.info("Predictor connection is ok")
    else:
        LOGGER.warning("Predictor connection is not ok")

    print("")
    print("")

    LOGGER.info("Summary")
    LOGGER.info("-" * 80)
    LOGGER.info("Server connectivity\t\t\t%s", server_connected)
    LOGGER.info("Rest API connectivity\t\t%r", rest_api_connected)
    LOGGER.info("WS server connectivity\t\t%r", ws_connected)
    LOGGER.info("Optimizer server connectivity\t%r", optimizer_connected)
    LOGGER.info("Predictor server connectivity\t%r", predictor_connected)