def _fetch_project(uri, force_tempdir, version=None, git_username=None, git_password=None): """ Fetch a project into a local directory, returning the path to the local project directory. :param force_tempdir: If True, will fetch the project into a temporary directory. Otherwise, will fetch ZIP or Git projects into a temporary directory but simply return the path of local projects (i.e. perform a no-op for local projects). """ parsed_uri, subdirectory = _parse_subdirectory(uri) use_temp_dst_dir = force_tempdir or _is_zip_uri(parsed_uri) or not _is_local_uri(parsed_uri) dst_dir = tempfile.mkdtemp() if use_temp_dst_dir else parsed_uri if use_temp_dst_dir: _logger.info("=== Fetching project from %s into %s ===", uri, dst_dir) if _is_zip_uri(parsed_uri): if _is_file_uri(parsed_uri): from six.moves import urllib parsed_file_uri = urllib.parse.urlparse(urllib.parse.unquote(parsed_uri)) parsed_uri = os.path.join(parsed_file_uri.netloc, parsed_file_uri.path) _unzip_repo(zip_file=( parsed_uri if _is_local_uri(parsed_uri) else _fetch_zip_repo(parsed_uri)), dst_dir=dst_dir) elif _is_local_uri(uri): if version is not None: raise ExecutionException("Setting a version is only supported for Git project URIs") if use_temp_dst_dir: dir_util.copy_tree(src=parsed_uri, dst=dst_dir) else: assert _GIT_URI_REGEX.match(parsed_uri), "Non-local URI %s should be a Git URI" % parsed_uri _fetch_git_repo(parsed_uri, version, dst_dir, git_username, git_password) res = os.path.abspath(os.path.join(dst_dir, subdirectory)) if not os.path.exists(res): raise ExecutionException("Could not find subdirectory %s of %s" % (subdirectory, dst_dir)) return res
def server(backend_store_uri, default_artifact_root, host, port, workers, static_prefix, gunicorn_opts): """ Run the MLflow tracking server. The server which listen on http://localhost:5000 by default, and only accept connections from the local machine. To let the server accept connections from other machines, you will need to pass --host 0.0.0.0 to listen on all network interfaces (or a specific interface address). """ # Ensure that both backend_store_uri and default_artifact_uri are set correctly. if not backend_store_uri: backend_store_uri = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH if not default_artifact_root: if _is_local_uri(backend_store_uri): default_artifact_root = backend_store_uri else: eprint("Option 'default-artifact-root' is required, when backend store is not " "local file based.") sys.exit(1) try: _run_server(backend_store_uri, default_artifact_root, host, port, workers, static_prefix, gunicorn_opts) except ShellCommandException: eprint("Running the mlflow server failed. Please see the logs above for details.") sys.exit(1)
def ui(backend_store_uri, default_artifact_root, port): """ Launch the MLflow tracking UI for local viewing of run results. To launch a production server, use the "mlflow server" command instead. The UI will be visible at http://localhost:5000 by default. """ # Ensure that both backend_store_uri and default_artifact_uri are set correctly. if not backend_store_uri: backend_store_uri = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH if not default_artifact_root: if _is_local_uri(backend_store_uri): default_artifact_root = backend_store_uri else: default_artifact_root = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH # TODO: We eventually want to disable the write path in this version of the server. try: _run_server(backend_store_uri, default_artifact_root, "127.0.0.1", port, 1, None, []) except ShellCommandException: eprint( "Running the mlflow server failed. Please see the logs above for details." ) sys.exit(1)
def __init__(self, db_uri, default_artifact_root): """ Create a database backed store. :param db_uri: SQL connection string used by SQLAlchemy Engine to connect to the database. Argument is expected to be in the format: ``db_type://<user_name>:<password>@<host>:<port>/<database_name>` Supported database types are ``mysql``, ``mssql``, ``sqlite``, and ``postgresql``. :param default_artifact_root: Path/URI to location suitable for large data (such as a blob store object, DBFS path, or shared NFS file system). """ super(SqlAlchemyStore, self).__init__() self.db_uri = db_uri self.db_type = urllib.parse.urlparse(db_uri).scheme self.artifact_root_uri = default_artifact_root self.engine = sqlalchemy.create_engine(db_uri) Base.metadata.create_all(self.engine) Base.metadata.bind = self.engine SessionMaker = sqlalchemy.orm.sessionmaker(bind=self.engine) self.ManagedSessionMaker = self._get_managed_session_maker(SessionMaker) if _is_local_uri(default_artifact_root): mkdir(default_artifact_root) if len(self.list_experiments()) == 0: with self.ManagedSessionMaker() as session: self._create_default_experiment(session)
def __init__(self, db_uri, default_artifact_root): super(SqlAlchemyStore, self).__init__() self.db_uri = db_uri self.db_type = urllib.parse.urlparse(db_uri).scheme self.artifact_root_uri = default_artifact_root self.engine = sqlalchemy.create_engine(db_uri) Base.metadata.create_all(self.engine) Base.metadata.bind = self.engine self.SessionMaker = sqlalchemy.orm.sessionmaker(bind=self.engine) self.session = self.SessionMaker() if _is_local_uri(default_artifact_root): mkdir(default_artifact_root) if len(self.list_experiments()) == 0: self._create_default_experiment()
def _get_store(): from mlflow.server import BACKEND_STORE_URI_ENV_VAR, ARTIFACT_ROOT_ENV_VAR global _store if _store is None: store_dir = os.environ.get(BACKEND_STORE_URI_ENV_VAR, None) artifact_root = os.environ.get(ARTIFACT_ROOT_ENV_VAR, None) if _is_database_uri(store_dir): from mlflow.store.sqlalchemy_store import SqlAlchemyStore return SqlAlchemyStore(store_dir, artifact_root) elif _is_local_uri(store_dir): from mlflow.store.file_store import FileStore _store = FileStore(store_dir, artifact_root) else: raise MlflowException("Unexpected URI type '{}' for backend store. " "Expext local file or database type.".format(store_dir)) return _store
def test_uri_types(): from mlflow.tracking import utils assert utils._is_local_uri("mlruns") assert utils._is_local_uri("./mlruns") assert utils._is_local_uri("file:///foo/mlruns") assert utils._is_local_uri("file:foo/mlruns") assert not utils._is_local_uri("https://whatever") assert not utils._is_local_uri("http://whatever") assert not utils._is_local_uri("databricks") assert not utils._is_local_uri("databricks:whatever") assert not utils._is_local_uri("databricks://whatever") assert utils._is_databricks_uri("databricks") assert utils._is_databricks_uri("databricks:whatever") assert utils._is_databricks_uri("databricks://whatever") assert not utils._is_databricks_uri("mlruns") assert not utils._is_databricks_uri("http://whatever") assert utils._is_http_uri("http://whatever") assert utils._is_http_uri("https://whatever") assert not utils._is_http_uri("file://whatever") assert not utils._is_http_uri("databricks://whatever") assert not utils._is_http_uri("mlruns")
def __init__(self, db_uri, default_artifact_root): """ Create a database backed store. :param db_uri: The SQLAlchemy database URI string to connect to the database. See the `SQLAlchemy docs <https://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>`_ for format specifications. Mlflow supports the dialects ``mysql``, ``mssql``, ``sqlite``, and ``postgresql``. :param default_artifact_root: Path/URI to location suitable for large data (such as a blob store object, DBFS path, or shared NFS file system). """ super(SqlAlchemyStore, self).__init__() self.db_uri = db_uri self.db_type = extract_db_type_from_uri(db_uri) self.artifact_root_uri = default_artifact_root self.engine = sqlalchemy.create_engine(db_uri, pool_pre_ping=True) insp = sqlalchemy.inspect(self.engine) # On a completely fresh MLflow installation against an empty database (verify database # emptiness by checking that 'experiments' etc aren't in the list of table names), run all # DB migrations expected_tables = set([ SqlExperiment.__tablename__, SqlRun.__tablename__, SqlMetric.__tablename__, SqlParam.__tablename__, SqlTag.__tablename__, SqlExperimentTag.__tablename__, SqlLatestMetric.__tablename__, ]) if len(expected_tables & set(insp.get_table_names())) == 0: SqlAlchemyStore._initialize_tables(self.engine) Base.metadata.bind = self.engine SessionMaker = sqlalchemy.orm.sessionmaker(bind=self.engine) self.ManagedSessionMaker = self._get_managed_session_maker( SessionMaker) SqlAlchemyStore._verify_schema(self.engine) if _is_local_uri(default_artifact_root): mkdir(local_file_uri_to_path(default_artifact_root)) if len(self.list_experiments()) == 0: with self.ManagedSessionMaker() as session: self._create_default_experiment(session)
def __init__(self, db_uri, default_artifact_root): """ Create a database backed store. :param db_uri: SQL connection string used by SQLAlchemy Engine to connect to the database. Argument is expected to be in the format: ``db_type://<user_name>:<password>@<host>:<port>/<database_name>` Supported database types are ``mysql``, ``mssql``, ``sqlite``, and ``postgresql``. :param default_artifact_root: Path/URI to location suitable for large data (such as a blob store object, DBFS path, or shared NFS file system). """ super(SqlAlchemyStore, self).__init__() self.db_uri = db_uri self.db_type = urllib.parse.urlparse(db_uri).scheme self.artifact_root_uri = default_artifact_root self.engine = sqlalchemy.create_engine(db_uri) insp = sqlalchemy.inspect(self.engine) # On a completely fresh MLflow installation against an empty database (verify database # emptiness by checking that 'experiments' etc aren't in the list of table names), run all # DB migrations expected_tables = set([ SqlExperiment.__tablename__, SqlRun.__tablename__, SqlMetric.__tablename__, SqlParam.__tablename__, SqlTag.__tablename__ ]) if len(expected_tables & set(insp.get_table_names())) == 0: SqlAlchemyStore._initialize_tables(self.engine) Base.metadata.bind = self.engine SessionMaker = sqlalchemy.orm.sessionmaker(bind=self.engine) self.ManagedSessionMaker = self._get_managed_session_maker(SessionMaker) SqlAlchemyStore._verify_schema(self.engine) if _is_local_uri(default_artifact_root): mkdir(local_file_uri_to_path(default_artifact_root)) if len(self.list_experiments()) == 0: with self.ManagedSessionMaker() as session: self._create_default_experiment(session)
def ui(backend_store_uri, default_artifact_root, host, port, gunicorn_opts): """ Launch the MLflow tracking UI. The UI will be visible at http://localhost:5000 by default. """ # Ensure that both backend_store_uri and default_artifact_uri are set correctly. if not backend_store_uri: backend_store_uri = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH if not default_artifact_root: if _is_local_uri(backend_store_uri): default_artifact_root = backend_store_uri else: default_artifact_root = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH # TODO: We eventually want to disable the write path in this version of the server. try: _run_server(backend_store_uri, default_artifact_root, host, port, 1, None, gunicorn_opts) except ShellCommandException: print("Running the mlflow server failed. Please see the logs above for details.", file=sys.stderr) sys.exit(1)
def _create_run(uri, experiment_id, work_dir, entry_point): """ Create a ``Run`` against the current MLflow tracking server, logging metadata (e.g. the URI, entry point, and parameters of the project) about the run. Return an ``ActiveRun`` that can be used to report additional data about the run (metrics/params) to the tracking server. """ if _is_local_uri(uri): source_name = tracking.utils._get_git_url_if_present(_expand_uri(uri)) else: source_name = _expand_uri(uri) existing_run = fluent.active_run() if existing_run: parent_run_id = existing_run.info.run_uuid else: parent_run_id = None active_run = tracking.MlflowClient().create_run( experiment_id=experiment_id, source_name=source_name, source_version=_get_git_commit(work_dir), entry_point_name=entry_point, source_type=SourceType.PROJECT, parent_run_id=parent_run_id) return active_run
def _expand_uri(uri): if _is_local_uri(uri): return os.path.abspath(uri) return uri