예제 #1
0
 def log_artifact(self, local_file, artifact_path=None):
     (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri)
     if artifact_path:
         dest_path = build_path(dest_path, artifact_path)
     dest_path = build_path(dest_path, os.path.basename(local_file))
     s3_client = self._get_s3_client()
     s3_client.upload_file(local_file, bucket, dest_path)
예제 #2
0
 def list_artifacts(self, path=None):
     (bucket, artifact_path) = data.parse_s3_uri(self.artifact_uri)
     dest_path = artifact_path
     if path:
         dest_path = build_path(dest_path, path)
     infos = []
     prefix = dest_path + "/"
     s3_client = self._get_s3_client()
     paginator = s3_client.get_paginator("list_objects_v2")
     results = paginator.paginate(Bucket=bucket,
                                  Prefix=prefix,
                                  Delimiter='/')
     for result in results:
         # Subdirectories will be listed as "common prefixes" due to the way we made the request
         for obj in result.get("CommonPrefixes", []):
             subdir = obj.get("Prefix")[len(artifact_path) + 1:]
             if subdir.endswith("/"):
                 subdir = subdir[:-1]
             infos.append(FileInfo(subdir, True, None))
         # Objects listed directly will be files
         for obj in result.get('Contents', []):
             name = obj.get("Key")[len(artifact_path) + 1:]
             size = int(obj.get('Size'))
             infos.append(FileInfo(name, False, size))
     return sorted(infos, key=lambda f: f.path)
예제 #3
0
 def list_artifacts(self, path=None):
     (bucket, artifact_path) = data.parse_s3_uri(self.artifact_uri)
     dest_path = artifact_path
     if path:
         dest_path = posixpath.join(dest_path, path)
     infos = []
     prefix = dest_path + "/" if dest_path else ""
     s3_client = self._get_s3_client()
     paginator = s3_client.get_paginator("list_objects_v2")
     results = paginator.paginate(Bucket=bucket,
                                  Prefix=prefix,
                                  Delimiter="/")
     for result in results:
         # Subdirectories will be listed as "common prefixes" due to the way we made the request
         for obj in result.get("CommonPrefixes", []):
             subdir_path = obj.get("Prefix")
             self._verify_listed_object_contains_artifact_path_prefix(
                 listed_object_path=subdir_path,
                 artifact_path=artifact_path)
             subdir_rel_path = posixpath.relpath(path=subdir_path,
                                                 start=artifact_path)
             if subdir_rel_path.endswith("/"):
                 subdir_rel_path = subdir_rel_path[:-1]
             infos.append(FileInfo(subdir_rel_path, True, None))
         # Objects listed directly will be files
         for obj in result.get("Contents", []):
             file_path = obj.get("Key")
             self._verify_listed_object_contains_artifact_path_prefix(
                 listed_object_path=file_path, artifact_path=artifact_path)
             file_rel_path = posixpath.relpath(path=file_path,
                                               start=artifact_path)
             file_size = int(obj.get("Size"))
             infos.append(FileInfo(file_rel_path, False, file_size))
     return sorted(infos, key=lambda f: f.path)
예제 #4
0
 def log_artifact(self, local_file, artifact_path=None):
     (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri)
     if artifact_path:
         dest_path = build_path(dest_path, artifact_path)
     dest_path = build_path(dest_path, os.path.basename(local_file))
     s3_endpoint_url = os.environ.get('MLFLOW_S3_ENDPOINT_URL')
     boto3.client('s3', endpoint_url=s3_endpoint_url).upload_file(
         local_file, bucket, dest_path)
예제 #5
0
 def log_artifact(self, local_file, artifact_path=None):
     (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri)
     if artifact_path:
         dest_path = posixpath.join(dest_path, artifact_path)
     dest_path = posixpath.join(dest_path, os.path.basename(local_file))
     self._upload_file(
         s3_client=self._get_s3_client(), local_file=local_file, bucket=bucket, key=dest_path
     )
예제 #6
0
 def log_artifact(self, local_file, artifact_path=None):
     (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri)
     if artifact_path:
         dest_path = self.get_path_module().join(dest_path, artifact_path)
     dest_path = self.get_path_module().join(
         dest_path,
         self.get_path_module().basename(local_file))
     s3_client = self._get_s3_client()
     s3_client.upload_file(local_file, bucket, dest_path)
예제 #7
0
 def log_artifact(self, local_file, artifact_path=None):
     print('Using modified logging')
     (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri)
     if artifact_path:
         dest_path = posixpath.join(dest_path, artifact_path)
     dest_path = posixpath.join(
         dest_path, os.path.basename(local_file))
     s3_client = self._get_s3_client()
     s3_client.upload_file(
         Filename=local_file,
         Bucket=bucket,
         Key=dest_path)
예제 #8
0
 def log_artifact(self, local_file, artifact_path=None):
     (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri)
     if artifact_path:
         dest_path = posixpath.join(dest_path, artifact_path)
     dest_path = posixpath.join(
         dest_path, os.path.basename(local_file))
     s3_client = self._get_s3_client()
     s3_client.upload_file(
         Filename=local_file,
         Bucket=bucket,
         Key=dest_path,
         ExtraArgs=self.get_s3_file_upload_extra_args())
예제 #9
0
 def log_artifacts(self, local_dir, artifact_path=None):
     (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri)
     if artifact_path:
         dest_path = build_path(dest_path, artifact_path)
     s3_client = self._get_s3_client()
     local_dir = os.path.abspath(local_dir)
     for (root, _, filenames) in os.walk(local_dir):
         upload_path = dest_path
         if root != local_dir:
             rel_path = get_relative_path(local_dir, root)
             upload_path = build_path(dest_path, rel_path)
         for f in filenames:
             s3_client.upload_file(build_path(root, f), bucket, build_path(upload_path, f))
예제 #10
0
    def delete_artifacts(self, artifact_path=None):
        (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri)
        if artifact_path:
            dest_path = posixpath.join(dest_path, artifact_path)

        s3_client = self._get_s3_client()
        list_objects = s3_client.list_objects(Bucket=bucket,
                                              Prefix=dest_path).get(
                                                  "Contents", [])
        for to_delete_obj in list_objects:
            file_path = to_delete_obj.get("Key")
            self._verify_listed_object_contains_artifact_path_prefix(
                listed_object_path=file_path, artifact_path=dest_path)
            s3_client.delete_object(Bucket=bucket, Key=file_path)
예제 #11
0
 def log_artifacts(self, local_dir, artifact_path=None):
     (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri)
     if artifact_path:
         dest_path = build_path(dest_path, artifact_path)
     s3_endpoint_url = os.environ.get('MLFLOW_S3_ENDPOINT_URL')
     s3 = boto3.client('s3', endpoint_url=s3_endpoint_url)
     local_dir = os.path.abspath(local_dir)
     for (root, _, filenames) in os.walk(local_dir):
         upload_path = dest_path
         if root != local_dir:
             rel_path = get_relative_path(local_dir, root)
             upload_path = build_path(dest_path, rel_path)
         for f in filenames:
             s3.upload_file(build_path(root, f), bucket,
                            build_path(upload_path, f))
예제 #12
0
 def log_artifacts(self, local_dir, artifact_path=None):
     (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri)
     if artifact_path:
         dest_path = self.get_path_module().join(dest_path, artifact_path)
     s3_client = self._get_s3_client()
     local_dir = self.get_path_module().abspath(local_dir)
     for (root, _, filenames) in os.walk(local_dir):
         upload_path = dest_path
         if root != local_dir:
             rel_path = self.get_path_module().relpath(root, local_dir)
             upload_path = self.get_path_module().join(dest_path, rel_path)
         for f in filenames:
             s3_client.upload_file(
                 self.get_path_module().join(root, f), bucket,
                 self.get_path_module().join(upload_path, f))
예제 #13
0
 def _download_artifacts_into(self, artifact_path, dest_dir):
     """Private version of download_artifacts that takes a destination directory."""
     basename = os.path.basename(artifact_path)
     local_path = build_path(dest_dir, basename)
     listing = self.list_artifacts(artifact_path)
     if len(listing) > 0:
         # Artifact_path is a directory, so make a directory for it and download everything
         os.mkdir(local_path)
         for file_info in listing:
             self._download_artifacts_into(file_info.path, local_path)
     else:
         (bucket, s3_path) = data.parse_s3_uri(self.artifact_uri)
         s3_path = build_path(s3_path, artifact_path)
         boto3.client('s3').download_file(bucket, s3_path, local_path)
     return local_path
예제 #14
0
 def log_artifacts(self, local_dir, artifact_path=None):
     (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri)
     if artifact_path:
         dest_path = posixpath.join(dest_path, artifact_path)
     s3_client = self._get_s3_client()
     local_dir = os.path.abspath(local_dir)
     for (root, _, filenames) in os.walk(local_dir):
         upload_path = dest_path
         if root != local_dir:
             rel_path = os.path.relpath(root, local_dir)
             rel_path = relative_path_to_artifact_path(rel_path)
             upload_path = posixpath.join(dest_path, rel_path)
         for f in filenames:
             self._upload_file(s3_client=s3_client,
                               local_file=os.path.join(root, f),
                               bucket=bucket,
                               key=posixpath.join(upload_path, f))
예제 #15
0
def log_artifacts_minio(
    run: mlflow.entities.Run,
    local_dir: str,
    artifact_path: str = None,
    delete_local: bool = True,
) -> None:
    """Upload local artefacts via Minio client
    This is needed as boto3 and Minio have problems with empty files. See

    - https://github.com/minio/minio/issues/5150
    - https://github.com/boto/botocore/pull/1328  

    :param run: an active Mlflow Run
    :type run: mlflow.entities.Run 
    :param local_dir: the path to the local directory with artifacts to log to Mlflow
    :type local_dir: str
    :param artifact_path: relative path of logged artifacts in Mlflow Run assets
    :type artifact_path: str
    :param delete_local: whether to delete the local assets after logging them to Mlflow
    :type delete_local: bool
    """
    (bucket, dest_path) = parse_s3_uri(run.info.artifact_uri)
    if artifact_path:
        dest_path = posixpath.join(dest_path, artifact_path)
    minio_client = Minio(
        urlparse(os.environ["MLFLOW_S3_ENDPOINT_URL"]).netloc,
        access_key=os.environ["AWS_ACCESS_KEY_ID"],
        secret_key=os.environ["AWS_SECRET_ACCESS_KEY"],
        secure=False,
    )
    local_dir = os.path.abspath(local_dir)
    for (root, _, filenames) in os.walk(local_dir):
        upload_path = dest_path
        if root != local_dir:
            rel_path = os.path.relpath(root, local_dir)
            rel_path = relative_path_to_artifact_path(rel_path)
            upload_path = posixpath.join(dest_path, rel_path)
        for f in filenames:
            minio_client.fput_object(bucket, posixpath.join(upload_path, f),
                                     os.path.join(root, f))
    if delete_local:
        shutil.rmtree(local_dir)
예제 #16
0
    def delete_artifacts(self, artifact_path=None):
        _CONTENTS_STR = 'Contents'
        _KEY_STR = 'Key'
        _OBJECTS_STR = 'Objects'
        _QUIET_STR = 'Quiet'
        _ERRORS_STR = 'Errors'

        _MAX_OBJECTS_DELETE_LOOP = 100

        (bucket, artifact_path) = data.parse_s3_uri(self.artifact_uri)
        cli = self._get_s3_client()
        over = False
        for i in range(_MAX_OBJECTS_DELETE_LOOP):
            artifact_response = cli.list_objects(Bucket=bucket,
                                                 Prefix=artifact_path)
            if _CONTENTS_STR in artifact_response:
                delete_objects = []
                for content in artifact_response[_CONTENTS_STR]:
                    target = content[_KEY_STR]
                    delete_objects.append({
                        _KEY_STR: target,
                    })
                if delete_objects:
                    delete_response = cli.delete_objects(Bucket=bucket,
                                                         Delete={
                                                             _OBJECTS_STR:
                                                             delete_objects,
                                                             _QUIET_STR: True
                                                         })
                    if _ERRORS_STR in delete_response:
                        raise ValueError(
                            "Error occured while deleting artifacts. Error {}".
                            format(delete_response[_ERRORS_STR]))

            if not artifact_response['IsTruncated']:
                over = True
                break
        if not over:
            raise ValueError(
                "Failed to fully clean objects at: {}".format(artifact_path))
예제 #17
0
 def _download_file(self, remote_file_path, local_path):
     (bucket, s3_root_path) = data.parse_s3_uri(self.artifact_uri)
     s3_full_path = posixpath.join(s3_root_path, remote_file_path)
     s3_client = self._get_s3_client()
     s3_client.download_file(bucket, s3_full_path, local_path)