def log_artifact(self, local_file, artifact_path=None): (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri) if artifact_path: dest_path = build_path(dest_path, artifact_path) dest_path = build_path(dest_path, os.path.basename(local_file)) s3_client = self._get_s3_client() s3_client.upload_file(local_file, bucket, dest_path)
def list_artifacts(self, path=None): (bucket, artifact_path) = data.parse_s3_uri(self.artifact_uri) dest_path = artifact_path if path: dest_path = build_path(dest_path, path) infos = [] prefix = dest_path + "/" s3_client = self._get_s3_client() paginator = s3_client.get_paginator("list_objects_v2") results = paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/') for result in results: # Subdirectories will be listed as "common prefixes" due to the way we made the request for obj in result.get("CommonPrefixes", []): subdir = obj.get("Prefix")[len(artifact_path) + 1:] if subdir.endswith("/"): subdir = subdir[:-1] infos.append(FileInfo(subdir, True, None)) # Objects listed directly will be files for obj in result.get('Contents', []): name = obj.get("Key")[len(artifact_path) + 1:] size = int(obj.get('Size')) infos.append(FileInfo(name, False, size)) return sorted(infos, key=lambda f: f.path)
def list_artifacts(self, path=None): (bucket, artifact_path) = data.parse_s3_uri(self.artifact_uri) dest_path = artifact_path if path: dest_path = posixpath.join(dest_path, path) infos = [] prefix = dest_path + "/" if dest_path else "" s3_client = self._get_s3_client() paginator = s3_client.get_paginator("list_objects_v2") results = paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="/") for result in results: # Subdirectories will be listed as "common prefixes" due to the way we made the request for obj in result.get("CommonPrefixes", []): subdir_path = obj.get("Prefix") self._verify_listed_object_contains_artifact_path_prefix( listed_object_path=subdir_path, artifact_path=artifact_path) subdir_rel_path = posixpath.relpath(path=subdir_path, start=artifact_path) if subdir_rel_path.endswith("/"): subdir_rel_path = subdir_rel_path[:-1] infos.append(FileInfo(subdir_rel_path, True, None)) # Objects listed directly will be files for obj in result.get("Contents", []): file_path = obj.get("Key") self._verify_listed_object_contains_artifact_path_prefix( listed_object_path=file_path, artifact_path=artifact_path) file_rel_path = posixpath.relpath(path=file_path, start=artifact_path) file_size = int(obj.get("Size")) infos.append(FileInfo(file_rel_path, False, file_size)) return sorted(infos, key=lambda f: f.path)
def log_artifact(self, local_file, artifact_path=None): (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri) if artifact_path: dest_path = build_path(dest_path, artifact_path) dest_path = build_path(dest_path, os.path.basename(local_file)) s3_endpoint_url = os.environ.get('MLFLOW_S3_ENDPOINT_URL') boto3.client('s3', endpoint_url=s3_endpoint_url).upload_file( local_file, bucket, dest_path)
def log_artifact(self, local_file, artifact_path=None): (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri) if artifact_path: dest_path = posixpath.join(dest_path, artifact_path) dest_path = posixpath.join(dest_path, os.path.basename(local_file)) self._upload_file( s3_client=self._get_s3_client(), local_file=local_file, bucket=bucket, key=dest_path )
def log_artifact(self, local_file, artifact_path=None): (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri) if artifact_path: dest_path = self.get_path_module().join(dest_path, artifact_path) dest_path = self.get_path_module().join( dest_path, self.get_path_module().basename(local_file)) s3_client = self._get_s3_client() s3_client.upload_file(local_file, bucket, dest_path)
def log_artifact(self, local_file, artifact_path=None): print('Using modified logging') (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri) if artifact_path: dest_path = posixpath.join(dest_path, artifact_path) dest_path = posixpath.join( dest_path, os.path.basename(local_file)) s3_client = self._get_s3_client() s3_client.upload_file( Filename=local_file, Bucket=bucket, Key=dest_path)
def log_artifact(self, local_file, artifact_path=None): (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri) if artifact_path: dest_path = posixpath.join(dest_path, artifact_path) dest_path = posixpath.join( dest_path, os.path.basename(local_file)) s3_client = self._get_s3_client() s3_client.upload_file( Filename=local_file, Bucket=bucket, Key=dest_path, ExtraArgs=self.get_s3_file_upload_extra_args())
def log_artifacts(self, local_dir, artifact_path=None): (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri) if artifact_path: dest_path = build_path(dest_path, artifact_path) s3_client = self._get_s3_client() local_dir = os.path.abspath(local_dir) for (root, _, filenames) in os.walk(local_dir): upload_path = dest_path if root != local_dir: rel_path = get_relative_path(local_dir, root) upload_path = build_path(dest_path, rel_path) for f in filenames: s3_client.upload_file(build_path(root, f), bucket, build_path(upload_path, f))
def delete_artifacts(self, artifact_path=None): (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri) if artifact_path: dest_path = posixpath.join(dest_path, artifact_path) s3_client = self._get_s3_client() list_objects = s3_client.list_objects(Bucket=bucket, Prefix=dest_path).get( "Contents", []) for to_delete_obj in list_objects: file_path = to_delete_obj.get("Key") self._verify_listed_object_contains_artifact_path_prefix( listed_object_path=file_path, artifact_path=dest_path) s3_client.delete_object(Bucket=bucket, Key=file_path)
def log_artifacts(self, local_dir, artifact_path=None): (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri) if artifact_path: dest_path = build_path(dest_path, artifact_path) s3_endpoint_url = os.environ.get('MLFLOW_S3_ENDPOINT_URL') s3 = boto3.client('s3', endpoint_url=s3_endpoint_url) local_dir = os.path.abspath(local_dir) for (root, _, filenames) in os.walk(local_dir): upload_path = dest_path if root != local_dir: rel_path = get_relative_path(local_dir, root) upload_path = build_path(dest_path, rel_path) for f in filenames: s3.upload_file(build_path(root, f), bucket, build_path(upload_path, f))
def log_artifacts(self, local_dir, artifact_path=None): (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri) if artifact_path: dest_path = self.get_path_module().join(dest_path, artifact_path) s3_client = self._get_s3_client() local_dir = self.get_path_module().abspath(local_dir) for (root, _, filenames) in os.walk(local_dir): upload_path = dest_path if root != local_dir: rel_path = self.get_path_module().relpath(root, local_dir) upload_path = self.get_path_module().join(dest_path, rel_path) for f in filenames: s3_client.upload_file( self.get_path_module().join(root, f), bucket, self.get_path_module().join(upload_path, f))
def _download_artifacts_into(self, artifact_path, dest_dir): """Private version of download_artifacts that takes a destination directory.""" basename = os.path.basename(artifact_path) local_path = build_path(dest_dir, basename) listing = self.list_artifacts(artifact_path) if len(listing) > 0: # Artifact_path is a directory, so make a directory for it and download everything os.mkdir(local_path) for file_info in listing: self._download_artifacts_into(file_info.path, local_path) else: (bucket, s3_path) = data.parse_s3_uri(self.artifact_uri) s3_path = build_path(s3_path, artifact_path) boto3.client('s3').download_file(bucket, s3_path, local_path) return local_path
def log_artifacts(self, local_dir, artifact_path=None): (bucket, dest_path) = data.parse_s3_uri(self.artifact_uri) if artifact_path: dest_path = posixpath.join(dest_path, artifact_path) s3_client = self._get_s3_client() local_dir = os.path.abspath(local_dir) for (root, _, filenames) in os.walk(local_dir): upload_path = dest_path if root != local_dir: rel_path = os.path.relpath(root, local_dir) rel_path = relative_path_to_artifact_path(rel_path) upload_path = posixpath.join(dest_path, rel_path) for f in filenames: self._upload_file(s3_client=s3_client, local_file=os.path.join(root, f), bucket=bucket, key=posixpath.join(upload_path, f))
def log_artifacts_minio( run: mlflow.entities.Run, local_dir: str, artifact_path: str = None, delete_local: bool = True, ) -> None: """Upload local artefacts via Minio client This is needed as boto3 and Minio have problems with empty files. See - https://github.com/minio/minio/issues/5150 - https://github.com/boto/botocore/pull/1328 :param run: an active Mlflow Run :type run: mlflow.entities.Run :param local_dir: the path to the local directory with artifacts to log to Mlflow :type local_dir: str :param artifact_path: relative path of logged artifacts in Mlflow Run assets :type artifact_path: str :param delete_local: whether to delete the local assets after logging them to Mlflow :type delete_local: bool """ (bucket, dest_path) = parse_s3_uri(run.info.artifact_uri) if artifact_path: dest_path = posixpath.join(dest_path, artifact_path) minio_client = Minio( urlparse(os.environ["MLFLOW_S3_ENDPOINT_URL"]).netloc, access_key=os.environ["AWS_ACCESS_KEY_ID"], secret_key=os.environ["AWS_SECRET_ACCESS_KEY"], secure=False, ) local_dir = os.path.abspath(local_dir) for (root, _, filenames) in os.walk(local_dir): upload_path = dest_path if root != local_dir: rel_path = os.path.relpath(root, local_dir) rel_path = relative_path_to_artifact_path(rel_path) upload_path = posixpath.join(dest_path, rel_path) for f in filenames: minio_client.fput_object(bucket, posixpath.join(upload_path, f), os.path.join(root, f)) if delete_local: shutil.rmtree(local_dir)
def delete_artifacts(self, artifact_path=None): _CONTENTS_STR = 'Contents' _KEY_STR = 'Key' _OBJECTS_STR = 'Objects' _QUIET_STR = 'Quiet' _ERRORS_STR = 'Errors' _MAX_OBJECTS_DELETE_LOOP = 100 (bucket, artifact_path) = data.parse_s3_uri(self.artifact_uri) cli = self._get_s3_client() over = False for i in range(_MAX_OBJECTS_DELETE_LOOP): artifact_response = cli.list_objects(Bucket=bucket, Prefix=artifact_path) if _CONTENTS_STR in artifact_response: delete_objects = [] for content in artifact_response[_CONTENTS_STR]: target = content[_KEY_STR] delete_objects.append({ _KEY_STR: target, }) if delete_objects: delete_response = cli.delete_objects(Bucket=bucket, Delete={ _OBJECTS_STR: delete_objects, _QUIET_STR: True }) if _ERRORS_STR in delete_response: raise ValueError( "Error occured while deleting artifacts. Error {}". format(delete_response[_ERRORS_STR])) if not artifact_response['IsTruncated']: over = True break if not over: raise ValueError( "Failed to fully clean objects at: {}".format(artifact_path))
def _download_file(self, remote_file_path, local_path): (bucket, s3_root_path) = data.parse_s3_uri(self.artifact_uri) s3_full_path = posixpath.join(s3_root_path, remote_file_path) s3_client = self._get_s3_client() s3_client.download_file(bucket, s3_full_path, local_path)