def _fetch_project(uri, force_tempdir, version=None): """ Fetch a project into a local directory, returning the path to the local project directory. :param force_tempdir: If True, will fetch the project into a temporary directory. Otherwise, will fetch ZIP or Git projects into a temporary directory but simply return the path of local projects (i.e. perform a no-op for local projects). """ parsed_uri, subdirectory = _parse_subdirectory(uri) use_temp_dst_dir = force_tempdir or _is_zip_uri(parsed_uri) or not _is_local_uri(parsed_uri) dst_dir = tempfile.mkdtemp() if use_temp_dst_dir else parsed_uri if use_temp_dst_dir: _logger.info("=== Fetching project from %s into %s ===", uri, dst_dir) if _is_zip_uri(parsed_uri): if _is_file_uri(parsed_uri): parsed_file_uri = urllib.parse.urlparse(urllib.parse.unquote(parsed_uri)) parsed_uri = os.path.join(parsed_file_uri.netloc, parsed_file_uri.path) _unzip_repo(zip_file=( parsed_uri if _is_local_uri(parsed_uri) else _fetch_zip_repo(parsed_uri)), dst_dir=dst_dir) elif _is_local_uri(uri): if version is not None: raise ExecutionException("Setting a version is only supported for Git project URIs") if use_temp_dst_dir: dir_util.copy_tree(src=parsed_uri, dst=dst_dir) else: assert _GIT_URI_REGEX.match(parsed_uri), "Non-local URI %s should be a Git URI" % parsed_uri _fetch_git_repo(parsed_uri, version, dst_dir) res = os.path.abspath(os.path.join(dst_dir, subdirectory)) if not os.path.exists(res): raise ExecutionException("Could not find subdirectory %s of %s" % (subdirectory, dst_dir)) return res
def _create_run(uri, experiment_id, work_dir, entry_point): """ Create a ``Run`` against the current MLflow tracking server, logging metadata (e.g. the URI, entry point, and parameters of the project) about the run. Return an ``ActiveRun`` that can be used to report additional data about the run (metrics/params) to the tracking server. """ if _is_local_uri(uri): source_name = tracking._tracking_service.utils._get_git_url_if_present( _expand_uri(uri)) else: source_name = _expand_uri(uri) source_version = _get_git_commit(work_dir) existing_run = fluent.active_run() if existing_run: parent_run_id = existing_run.info.run_id else: parent_run_id = None tags = { MLFLOW_USER: _get_user(), MLFLOW_SOURCE_NAME: source_name, MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.PROJECT), MLFLOW_PROJECT_ENTRY_POINT: entry_point } if source_version is not None: tags[MLFLOW_GIT_COMMIT] = source_version if parent_run_id is not None: tags[MLFLOW_PARENT_RUN_ID] = parent_run_id active_run = tracking.MlflowClient().create_run( experiment_id=experiment_id, tags=tags) return active_run