def log_artifacts(self, local_dir, artifact_path=None): verify_artifact_path(artifact_path) # NOTE: The artifact_path is expected to be in posix format. # Posix paths work fine on windows but just in case we normalize it here. if artifact_path: artifact_path = os.path.normpath(artifact_path) artifact_dir = os.path.join(self.artifact_dir, artifact_path) if artifact_path else \ self.artifact_dir if not os.path.exists(artifact_dir): mkdir(artifact_dir) dir_util.copy_tree(src=local_dir, dst=artifact_dir, preserve_mode=0, preserve_times=0)
def log_artifact(self, local_file, artifact_path=None): verify_artifact_path(artifact_path) # NOTE: The artifact_path is expected to be in posix format. # Posix paths work fine on windows but just in case we normalize it here. if artifact_path: artifact_path = os.path.normpath(artifact_path) artifact_dir = os.path.join(self.artifact_dir, artifact_path) if artifact_path else \ self.artifact_dir if not os.path.exists(artifact_dir): mkdir(artifact_dir) shutil.copyfile( local_file, os.path.join(artifact_dir, os.path.basename(local_file)))
def __init__(self, db_uri, default_artifact_root): """ Create a database backed store. :param db_uri: The SQLAlchemy database URI string to connect to the database. See the `SQLAlchemy docs <https://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>`_ for format specifications. Mlflow supports the dialects ``mysql``, ``mssql``, ``sqlite``, and ``postgresql``. :param default_artifact_root: Path/URI to location suitable for large data (such as a blob store object, DBFS path, or shared NFS file system). """ super(SqlAlchemyStore, self).__init__() self.db_uri = db_uri self.db_type = extract_db_type_from_uri(db_uri) self.artifact_root_uri = default_artifact_root self.engine = kiwi.store.db.utils.create_sqlalchemy_engine(db_uri) # On a completely fresh MLflow installation against an empty database (verify database # emptiness by checking that 'experiments' etc aren't in the list of table names), run all # DB migrations expected_tables = [ SqlExperiment.__tablename__, SqlRun.__tablename__, SqlMetric.__tablename__, SqlParam.__tablename__, SqlTag.__tablename__, SqlExperimentTag.__tablename__, SqlLatestMetric.__tablename__, ] inspected_tables = set( sqlalchemy.inspect(self.engine).get_table_names()) if any([table not in inspected_tables for table in expected_tables]): kiwi.store.db.utils._initialize_tables(self.engine) Base.metadata.bind = self.engine SessionMaker = sqlalchemy.orm.sessionmaker(bind=self.engine) self.ManagedSessionMaker = kiwi.store.db.utils._get_managed_session_maker( SessionMaker, self.db_type) kiwi.store.db.utils._verify_schema(self.engine) if is_local_uri(default_artifact_root): mkdir(local_file_uri_to_path(default_artifact_root)) if len(self.list_experiments()) == 0: with self.ManagedSessionMaker() as session: self._create_default_experiment(session)
def __init__(self, root_directory=None, artifact_root_uri=None): """ Create a new FileStore with the given root directory and a given default artifact root URI. """ super(FileStore, self).__init__() self.root_directory = local_file_uri_to_path(root_directory or _default_root_dir()) self.artifact_root_uri = artifact_root_uri or path_to_local_file_uri( self.root_directory) self.trash_folder = os.path.join(self.root_directory, FileStore.TRASH_FOLDER_NAME) # Create root directory if needed if not exists(self.root_directory): mkdir(self.root_directory) self._create_experiment_with_id( name=Experiment.DEFAULT_EXPERIMENT_NAME, experiment_id=FileStore.DEFAULT_EXPERIMENT_ID, artifact_uri=None) # Create trash folder if needed if not exists(self.trash_folder): mkdir(self.trash_folder)
def download_artifacts(self, artifact_path, dst_path=None): """ Download an artifact file or directory to a local directory/file if applicable, and return a local path for it. The caller is responsible for managing the lifecycle of the downloaded artifacts. (self.path contains the base path - hdfs:/some/path/run_id/artifacts) :param artifact_path: Relative source path to the desired artifacts file or directory. :param dst_path: Absolute path of the local filesystem destination directory to which to download the specified artifacts. This directory must already exist. If unspecified, the artifacts will be downloaded to a new, uniquely-named directory on the local filesystem. :return: Absolute path of the local filesystem location containing the downloaded artifacts - file/directory. """ hdfs_base_path = _resolve_base_path(self.path, artifact_path) local_dir = _tmp_dir(dst_path) with hdfs_system(scheme=self.scheme, host=self.host, port=self.port) as hdfs: if not hdfs.isdir(hdfs_base_path): local_path = os.path.join(local_dir, os.path.normpath(artifact_path)) _download_hdfs_file(hdfs, hdfs_base_path, local_path) return local_path for path, is_dir, _ in self._walk_path(hdfs, hdfs_base_path): relative_path = _relative_path_remote(hdfs_base_path, path) local_path = os.path.join(local_dir, relative_path) \ if relative_path else local_dir if is_dir: mkdir(local_path) else: _download_hdfs_file(hdfs, path, local_path) return local_dir
def _create_experiment_with_id(self, name, experiment_id, artifact_uri): artifact_uri = artifact_uri or append_to_uri_path( self.artifact_root_uri, str(experiment_id)) self._check_root_dir() meta_dir = mkdir(self.root_directory, str(experiment_id)) experiment = Experiment(experiment_id, name, artifact_uri, LifecycleStage.ACTIVE) experiment_dict = dict(experiment) # tags are added to the file system and are not written to this dict on write # As such, we should not include them in the meta file. del experiment_dict['tags'] write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, experiment_dict) return experiment_id
def test_mkdir(tmpdir): temp_dir = str(tmpdir) new_dir_name = "mkdir_test_%d" % random_int() file_utils.mkdir(temp_dir, new_dir_name) assert os.listdir(temp_dir) == [new_dir_name] with pytest.raises(OSError): file_utils.mkdir("/ bad directory @ name ", "ouch") # does not raise if directory exists already file_utils.mkdir(temp_dir, new_dir_name) # raises if it exists already but is a file dummy_file_path = str(tmpdir.join("dummy_file")) open(dummy_file_path, 'a').close() with pytest.raises(OSError): file_utils.mkdir(dummy_file_path)
def create_run(self, experiment_id, user_id, start_time, tags): """ Creates a run with the specified attributes. """ experiment_id = FileStore.DEFAULT_EXPERIMENT_ID if experiment_id is None else experiment_id experiment = self.get_experiment(experiment_id) if experiment is None: raise MlflowException( "Could not create run under experiment with ID %s - no such experiment " "exists." % experiment_id, databricks_pb2.RESOURCE_DOES_NOT_EXIST) if experiment.lifecycle_stage != LifecycleStage.ACTIVE: raise MlflowException( "Could not create run under non-active experiment with ID " "%s." % experiment_id, databricks_pb2.INVALID_STATE) run_uuid = uuid.uuid4().hex artifact_uri = self._get_artifact_dir(experiment_id, run_uuid) run_info = RunInfo(run_uuid=run_uuid, run_id=run_uuid, experiment_id=experiment_id, artifact_uri=artifact_uri, user_id=user_id, status=RunStatus.to_string(RunStatus.RUNNING), start_time=start_time, end_time=None, lifecycle_stage=LifecycleStage.ACTIVE) # Persist run metadata and create directories for logging metrics, parameters, artifacts run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_id) mkdir(run_dir) run_info_dict = _make_persisted_run_info_dict(run_info) write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, run_info_dict) mkdir(run_dir, FileStore.METRICS_FOLDER_NAME) mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME) mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME) for tag in tags: self.set_tag(run_uuid, tag) return self.get_run(run_id=run_uuid)