class PythonBosClient(object): def __init__(self): #新建BOSClient self._client = BosClient(bos_conf.config) def list_buckets(self): response = self._client.list_buckets() for bucket in response.buckets: print bucket.name def list_objects(self,bucket_name,prefix = ""): response = self._client.list_objects(bucket_name,1000,prefix) for object in response.contents: print object.key def del_objects(self,bucket_name,key_name): response = self._client.list_objects(bucket_name,1000,prefix) for object in response.contents: self._client.delete_object(bucket_name,object.key) print object.key + " Deleted" def dels_objects(self,bucket_name,prefix): response = self._client.list_objects(bucket_name,1000,prefix) for object in response.contents: self._client.delete_object(bucket_name,object.key) print object.key + " Deleted"
class PythonBosClient(object): def __init__(self): #新建BOSClient self._client = BosClient(bos_conf.config) def list_buckets(self): response = self._client.list_buckets() for bucket in response.buckets: print bucket.name def list_objects(self, bucket_name, prefix=""): response = self._client.list_objects(bucket_name, 1000, prefix) for object in response.contents: print object.key def del_objects(self, bucket_name, key_name): response = self._client.list_objects(bucket_name, 1000, prefix) for object in response.contents: self._client.delete_object(bucket_name, object.key) print object.key + " Deleted" def dels_objects(self, bucket_name, prefix): response = self._client.list_objects(bucket_name, 1000, prefix) for object in response.contents: self._client.delete_object(bucket_name, object.key) print object.key + " Deleted"
content = bos_client.get_object_as_string(bucket_name, key) __logger.debug("[Sample] get object as string:%s", content) # put a file as object _create_file(file_name, 4096) bos_client.put_object_from_file(bucket_name, key, file_name) # get object into file bos_client.get_object_to_file(bucket_name, key, download) __logger.debug("[Sample] get object into file, file size:%s", os.path.getsize(download)) # copy a object bos_client.copy_object(bucket_name, key, bucket_name, key + ".copy",) # list objects in a bucket(up to 1000) response = bos_client.list_objects(bucket_name) for obj in response.contents: __logger.debug("[Sample] list objects key:%s", obj.key) # delete an object bos_client.delete_object(bucket_name, key) ###################################################################################################### # acl operation samples ###################################################################################################### # set bucket canned acl to "private" bos_client.set_bucket_canned_acl(bucket_name, canned_acl.PRIVATE) # get bucket acl response = bos_client.get_bucket_acl(bucket_name)
# get object into file bos_client.get_object_to_file(bucket_name, key, download) __logger.debug("[Sample] get object into file, file size:%s", os.path.getsize(download)) # copy a object bos_client.copy_object( bucket_name, key, bucket_name, key + ".copy", ) # list objects in a bucket(up to 1000) response = bos_client.list_objects(bucket_name) for obj in response.contents: __logger.debug("[Sample] list objects key:%s", obj.key) # delete an object bos_client.delete_object(bucket_name, key) ###################################################################################################### # acl operation samples ###################################################################################################### # set bucket canned acl to "private" bos_client.set_bucket_canned_acl(bucket_name, canned_acl.PRIVATE) # get bucket acl response = bos_client.get_bucket_acl(bucket_name)
class BosFileSystem(object): def __init__(self): self.max_contents_count = 1 self.max_contents_time = 1 self.get_bos_config() self.bos_client = BosClient(self.config) self.file_length_map = {} self._file_contents_to_add = b'' self._file_contents_count = 0 self._start_append_time = time.time() def get_bos_config(self): bos_host = os.getenv("BOS_HOST") if not bos_host: raise KeyError('${BOS_HOST} is not found.') access_key_id = os.getenv("BOS_AK") if not access_key_id: raise KeyError('${BOS_AK} is not found.') secret_access_key = os.getenv("BOS_SK") if not secret_access_key: raise KeyError('${BOS_SK} is not found.') self.max_contents_count = int(os.getenv('BOS_CACHE_COUNT', 1)) self.max_contents_time = int(os.getenv('BOS_CACHE_TIME', 1)) bos_sts = os.getenv("BOS_STS") self.config = BceClientConfiguration( credentials=BceCredentials(access_key_id, secret_access_key), endpoint=bos_host, security_token=bos_sts) def isfile(self, filename): return exists(filename) def read_file(self, filename, binary=True): bucket_name, object_key = BosFileSystem._get_object_info(filename) result = self.bos_client.get_object_as_string(bucket_name, object_key) return result @staticmethod def _get_object_info(path): path = path[6:] index = path.index('/') bucket_name = path[0:index] object_key = path[index + 1:] return bucket_name, object_key def exists(self, path): bucket_name, object_key = BosFileSystem._get_object_info(path) try: self.bos_client.get_object_meta_data(bucket_name, object_key) return True except exception.BceError: return False def get_meta(self, bucket_name, object_key): return self.bos_client.get_object_meta_data(bucket_name, object_key) def makedirs(self, path): if not path.endswith('/'): path += '/' if self.exists(path): return bucket_name, object_key = BosFileSystem._get_object_info(path) if not object_key.endswith('/'): object_key += '/' init_data = b'' self.bos_client.append_object(bucket_name=bucket_name, key=object_key, data=init_data, content_md5=content_md5(init_data), content_length=len(init_data)) @staticmethod def join(path, *paths): result = os.path.join(path, *paths) result.replace('\\', '/') return result def read(self, filename, binary_mode=False, size=0, continue_from=None): bucket_name, object_key = BosFileSystem._get_object_info(filename) offset = 0 if continue_from is not None: offset = continue_from.get("last_offset", 0) length = int( self.get_meta(bucket_name, object_key).metadata.content_length) if offset < length: data = self.bos_client.get_object_as_string(bucket_name=bucket_name, key=object_key, range=[offset, length - 1]) else: data = b'' continue_from_token = {"last_offset": length} return data, continue_from_token def ready_to_append(self): if self._file_contents_count >= self.max_contents_count or \ time.time() - self._start_append_time > self.max_contents_time: return True else: return False def append(self, filename, file_content, binary_mode=False, force=False): self._file_contents_to_add += file_content self._file_contents_count += 1 if not force and not self.ready_to_append(): return file_content = self._file_contents_to_add bucket_name, object_key = BosFileSystem._get_object_info(filename) if not self.exists(filename): init_data = b'' self.bos_client.append_object(bucket_name=bucket_name, key=object_key, data=init_data, content_md5=content_md5(init_data), content_length=len(init_data)) content_length = len(file_content) offset = self.get_meta(bucket_name, object_key).metadata.content_length self.bos_client.append_object(bucket_name=bucket_name, key=object_key, data=file_content, content_md5=content_md5(file_content), content_length=content_length, offset=offset) self._file_contents_to_add = b'' self._file_contents_count = 0 self._start_append_time = time.time() def write(self, filename, file_content, binary_mode=False): self.append(filename, file_content, binary_mode=False) # bucket_name, object_key = BosFileSystem._get_object_info(filename) # # self.bos_client.append_object(bucket_name=bucket_name, # key=object_key, # data=file_content, # content_md5=content_md5(file_content), # content_length=len(file_content)) def walk(self, dir): class WalkGenerator(): def __init__(self, bucket_name, contents): self.contents = None self.length = 0 self.bucket = bucket_name self.handle_contents(contents) self.count = 0 def handle_contents(self, contents): contents_map = {} for item in contents: try: rindex = item.rindex('/') key = item[0:rindex] value = item[rindex + 1:] except ValueError: key = '.' value = item if key in contents_map.keys(): contents_map[key].append(value) else: contents_map[key] = [value] temp_walk = [] for key, value in contents_map.items(): temp_walk.append( [BosFileSystem.join('bos://' + self.bucket, key), [], value]) self.length = len(temp_walk) self.contents = temp_walk def __iter__(self): return self def __next__(self): if self.count < self.length: self.count += 1 return self.contents[self.count - 1] else: raise StopIteration bucket_name, object_key = BosFileSystem._get_object_info(dir) if object_key in ['.', './']: prefix = None else: prefix = object_key if object_key.endswith( '/') else object_key + '/' response = self.bos_client.list_objects(bucket_name, prefix=prefix) contents = [content.key for content in response.contents] return WalkGenerator(bucket_name, contents)
response = bos_client.get_object_as_string(bucket_name=bucket_name, key=append_key) __logger.debug("[Sample] append object value:%s", response) bos_client.delete_object(bucket_name, append_key) # copy a object bos_client.copy_object( bucket_name, key, bucket_name, key + ".copy", ) # list objects in a bucket(up to 1000) response = bos_client.list_objects(bucket_name) for obj in response.contents: __logger.debug("[Sample] list objects key:%s", obj.key) # delete an object bos_client.delete_object(bucket_name, key) # delete multiple objects key_list = ['key1', 'key2', 'key3'] bos_client.delete_multiple_object(bucket_name, key_list) ###################################################################################################### # acl operation samples ###################################################################################################### # set bucket canned acl to "private"
class BosFileSystem(object): def __init__(self, write_flag=True): if write_flag: self.max_contents_count = 1 self.max_contents_time = 1 self.get_bos_config() self.bos_client = BosClient(self.config) self.file_length_map = {} self._file_contents_to_add = b'' self._file_contents_count = 0 self._start_append_time = time.time() def get_bos_config(self): bos_host = os.getenv("BOS_HOST") if not bos_host: raise KeyError('${BOS_HOST} is not found.') access_key_id = os.getenv("BOS_AK") if not access_key_id: raise KeyError('${BOS_AK} is not found.') secret_access_key = os.getenv("BOS_SK") if not secret_access_key: raise KeyError('${BOS_SK} is not found.') self.max_contents_count = int(os.getenv('BOS_CACHE_COUNT', 1)) self.max_contents_time = int(os.getenv('BOS_CACHE_TIME', 1)) bos_sts = os.getenv("BOS_STS") self.config = BceClientConfiguration( credentials=BceCredentials(access_key_id, secret_access_key), endpoint=bos_host, security_token=bos_sts) def set_bos_config(self, bos_ak, bos_sk, bos_sts, bos_host="bj.bcebos.com"): self.config = BceClientConfiguration( credentials=BceCredentials(bos_ak, bos_sk), endpoint=bos_host, security_token=bos_sts) self.bos_client = BosClient(self.config) def renew_bos_client_from_server(self): import requests import json from visualdl.utils.dir import CONFIG_PATH with open(CONFIG_PATH, 'r') as fp: server_url = json.load(fp)['server_url'] url = server_url + '/sts/' res = requests.post(url=url).json() err_code = res.get('code') msg = res.get('msg') if '000000' == err_code: sts_ak = msg.get('sts_ak') sts_sk = msg.get('sts_sk') sts_token = msg.get('token') self.set_bos_config(sts_ak, sts_sk, sts_token) else: print('Renew bos client error. Error msg: {}'.format(msg)) return def isfile(self, filename): return exists(filename) def read_file(self, filename, binary=True): bucket_name, object_key = get_object_info(filename) result = self.bos_client.get_object_as_string(bucket_name, object_key) return result def exists(self, path): bucket_name, object_key = get_object_info(path) try: self.bos_client.get_object_meta_data(bucket_name, object_key) return True except exception.BceError: return False def get_meta(self, bucket_name, object_key): return self.bos_client.get_object_meta_data(bucket_name, object_key) def makedirs(self, path): if not path.endswith('/'): path += '/' if self.exists(path): return bucket_name, object_key = get_object_info(path) if not object_key.endswith('/'): object_key += '/' init_data = b'' self.bos_client.append_object( bucket_name=bucket_name, key=object_key, data=init_data, content_md5=content_md5(init_data), content_length=len(init_data)) @staticmethod def join(path, *paths): result = os.path.join(path, *paths) result.replace('\\', '/') return result def read(self, filename, binary_mode=False, size=0, continue_from=None): bucket_name, object_key = get_object_info(filename) offset = 0 if continue_from is not None: offset = continue_from.get("last_offset", 0) length = int( self.get_meta(bucket_name, object_key).metadata.content_length) if offset < length: data = self.bos_client.get_object_as_string( bucket_name=bucket_name, key=object_key, range=[offset, length - 1]) else: data = b'' continue_from_token = {"last_offset": length} return data, continue_from_token def ready_to_append(self): if self._file_contents_count >= self.max_contents_count or \ time.time() - self._start_append_time > self.max_contents_time: return True else: return False def append(self, filename, file_content, binary_mode=False, force=False): self._file_contents_to_add += file_content self._file_contents_count += 1 if not force and not self.ready_to_append(): return file_content = self._file_contents_to_add bucket_name, object_key = get_object_info(filename) if not self.exists(filename): init_data = b'' try: self.bos_client.append_object( bucket_name=bucket_name, key=object_key, data=init_data, content_md5=content_md5(init_data), content_length=len(init_data)) except (exception.BceServerError, exception.BceHttpClientError): self.renew_bos_client_from_server() self.bos_client.append_object( bucket_name=bucket_name, key=object_key, data=init_data, content_md5=content_md5(init_data), content_length=len(init_data)) return content_length = len(file_content) try: offset = self.get_meta(bucket_name, object_key).metadata.content_length self.bos_client.append_object( bucket_name=bucket_name, key=object_key, data=file_content, content_md5=content_md5(file_content), content_length=content_length, offset=offset) except (exception.BceServerError, exception.BceHttpClientError): self.renew_bos_client_from_server() offset = self.get_meta(bucket_name, object_key).metadata.content_length self.bos_client.append_object( bucket_name=bucket_name, key=object_key, data=file_content, content_md5=content_md5(file_content), content_length=content_length, offset=offset) self._file_contents_to_add = b'' self._file_contents_count = 0 self._start_append_time = time.time() def write(self, filename, file_content, binary_mode=False): self.append(filename, file_content, binary_mode=False) # bucket_name, object_key = BosFileSystem._get_object_info(filename) # # self.bos_client.append_object(bucket_name=bucket_name, # key=object_key, # data=file_content, # content_md5=content_md5(file_content), # content_length=len(file_content)) def walk(self, dir): class WalkGenerator(): def __init__(self, bucket_name, contents): self.contents = None self.length = 0 self.bucket = bucket_name self.handle_contents(contents) self.count = 0 def handle_contents(self, contents): contents_map = {} for item in contents: try: rindex = item.rindex('/') key = item[0:rindex] value = item[rindex + 1:] except ValueError: key = '.' value = item if key in contents_map.keys(): contents_map[key].append(value) else: contents_map[key] = [value] temp_walk = [] for key, value in contents_map.items(): temp_walk.append([ BosFileSystem.join('bos://' + self.bucket, key), [], value ]) self.length = len(temp_walk) self.contents = temp_walk def __iter__(self): return self def __next__(self): if self.count < self.length: self.count += 1 return self.contents[self.count - 1] else: raise StopIteration bucket_name, object_key = get_object_info(dir) if object_key in ['.', './']: prefix = None else: prefix = object_key if object_key.endswith( '/') else object_key + '/' response = self.bos_client.list_objects(bucket_name, prefix=prefix) contents = [content.key for content in response.contents] return WalkGenerator(bucket_name, contents)
class BaiduBos: def __init__(self, bucket): self._bucket = bucket self._bos_client = BosClient(_config) def upload_file(self, fn, key, get_url=False, absent=True, expiration_in_seconds=-1): """ 上传文件,如果文件超过25兆,将采用分块上传 如果key已存在,则返回key的url :param fn: :param key: :param get_url:是否需要获取key对应的url :param absent: True时,如果bos已存在该文件 则不上传 :return: """ exists = False if absent: for obj in self._bos_client.list_all_objects(self._bucket): if obj.key == key: LOGGER.warning( "the key '{0}' has already existed, upload canceled". format(key)) exists = True break if not exists: fs = os.path.getsize(fn) with open(fn, mode='rb') as f: if fs > 25 * 1024 * 1024: self._multipart_upload(fn, key) else: self._bos_client.put_object(self._bucket, key, f, fs, self.md5_file(fn)) if get_url: url = self._bos_client.generate_pre_signed_url( self._bucket, key, expiration_in_seconds=expiration_in_seconds) return url.decode("utf-8") return None def upload_bytes(self, byte_arr, key, get_url=False, absent=True, expiration_in_seconds=-1): """ 上传字节 如果key已存在,则返回key的url :param byte_arr: :param key: :return: """ exists = False if absent: for obj in self._bos_client.list_all_objects(self._bucket): if obj.key == key: LOGGER.warning( "the key '{0}' has already existed, upload canceled". format(key)) exists = True break if not exists: self._bos_client.put_object(GENIOUS_BUCKET, key, io.BytesIO(byte_arr), len(byte_arr), self.md5_obj(byte_arr)) if get_url: url = self._bos_client.generate_pre_signed_url( self._bucket, key, expiration_in_seconds=expiration_in_seconds) return url.decode("utf-8") return None def _multipart_upload(self, fn, key): """ 文件分块上传 如果key已存在,则返回key的url :arg key :arg fn """ upload_id = self._bos_client.initiate_multipart_upload( GENIOUS_BUCKET, key).upload_id left_size = os.path.getsize(fn) # left_size用于设置分块开始位置 # 设置分块的开始偏移位置 offset = 0 part_number = 1 part_list = [] index = 0 while left_size > 0: # 设置每块为5MB part_size = 5 * 1024 * 1024 if left_size < part_size: part_size = left_size response = self._bos_client.upload_part_from_file( GENIOUS_BUCKET, key, upload_id, part_number, part_size, fn, offset) index += 1 print(index) left_size -= part_size offset += part_size part_list.append({ "partNumber": part_number, "eTag": response.metadata.etag }) part_number += 1 location = self._bos_client.complete_multipart_upload( GENIOUS_BUCKET, key, upload_id, part_list) print(location.location) return location def md5_file(self, fn): buf_size = 8192 md5 = hashlib.md5() with open(fn, mode='rb') as fp: while True: bytes_to_read = buf_size buf = fp.read(bytes_to_read) if not buf: break md5.update(buf) content_md5 = base64.standard_b64encode(md5.digest()) return content_md5 def md5_obj(self, bs): md5 = hashlib.md5() md5.update(bs) return base64.standard_b64encode(md5.digest()) def list_uploaded_objects(self, prefix=None): """ 列出桶中的文件,如果提供了prefix,则最多返回1000条记录 若无法满足需要,可以使用sdk的api进行获取 :arg 指定返回key的前缀""" keys = [] if prefix is not None: response = self._bos_client.list_objects(self._bucket, prefix=prefix, max_keys=1000) for obj in response.contents: keys.append(obj.key) return keys response = self._bos_client.list_all_objects(self._bucket) for obj in response.contents: keys.append(obj.key) return keys def file_exists(self, fn): """ :arg 文件名是否存在,服务器上的文件名为key去掉前缀(带slash)后的 :return 如果文件存在,返回文件url,否则返回None """ keys = self.list_uploaded_objects() for key in keys: slash_index = key.rfind("/") if slash_index > 0: file_name = key[slash_index + 1:] if file_name == fn: url = self._bos_client.generate_pre_signed_url( bucket_name=GENIOUS_BUCKET, key=key, expiration_in_seconds=-1) return url.decode("utf-8") return None def key_exists(self, key): keys = self.list_uploaded_objects() return keys.index(key) >= 0
class UBosClient(object): """ BosClient """ def __init__(self, access_key_id, secret_access_key, bucket_name='', endpoint=''): """ 初始化 """ super(UBosClient, self).__init__() # 创建BceClientConfiguration config = BceClientConfiguration(credentials=BceCredentials( access_key_id, secret_access_key), endpoint=endpoint) # 设置请求超时时间 config.connection_timeout_in_mills = 3000 # 新建BosClient self.client = BosClient(config) self.bucket = bucket_name def check_bucket(self): """ 校验bucket是否存在 """ return not not self.client.does_bucket_exist(self.bucket) def check_object_key(self, object_key): """ 校验文件对象是否存在 """ if not self.check_bucket(): return False try: self.client.get_object_meta_data(self.bucket, object_key) return True except: return False def mkdir(self, dir_name): """ 创建文件夹 """ if not self.check_bucket(): return False try: self.client.put_object_from_string(self.bucket, '{}/'.format(dir_name), '') return True except: return False def get_all_files(self): """ 获取bucket所有文件对象集 """ file_list = [] if not self.check_bucket(): return file_list for fobj in self.client.list_all_objects(self.bucket): file_list.append({'name': fobj.key, 'size': fobj.size}) return filelist def get_files_by_dir(self, dir_name): """ 获取文件夹子文件对象集 """ file_list = [] if not self.check_bucket(): return file_list prefix = '{}/'.format(dir_name) response = self.client.list_objects(self.bucket, prefix=prefix) for fobj in response.contents: if fobj.key == prefix: continue file_list.append({'name': fobj.key, 'size': fobj.size}) return file_list def rmfile(self, object_key): """ 单一删除文件对象 """ if not self.check_bucket(): return False self.client.delete_object(self.bucket, object_key) return True def rmfiles(self, object_keys): """ 批量删除 """ if not self.check_bucket(): return False self.client.delete_multiple_objects(self.bucket, object_keys) return True def rmdir(self, dir_name): """ 删除目录, 需保证目录下无对象存在 """ if not self.check_bucket(): return False prefix = '{}/'.format(dir_name) file_list = self.get_files_by_dir(dir_name) object_keys = [fobj['name'] for fobj in file_list if 'name' in fobj] self.rmfiles(object_keys) self.client.delete_object(self.bucket, prefix) return True def single_upload(self, object_key, file_path): """ 一次性上传 """ if not self.check_bucket(): return False suffix = filename.split('.')[-1].lower() if suffix == 'mp4': ret = self.client.put_object_from_file(self.bucket, object_key, file_path, content_type='video/mp4') elif suffix in ['jpg', 'jpeg']: ret = self.client.put_object_from_file(self.bucket, object_key, file_path, content_type='image/jpeg') else: ret = self.client.put_object_from_file(self.bucket, object_key, file_path) print ret return True def get_upload_id(self, object_key): """ 断点续传获取upload_id """ upload_id = None response = self.client.list_multipart_uploads(self.bucket) for item in response.uploads: if item.key == object_key: upload_id = item.upload_id break if not upload_id: upload_id = self.client.initiate_multipart_upload( self.bucket, object_key, content_type='video/mp4').upload_id return upload_id def multipart_upload(self, object_key, file_path): """ 断点续传 """ upload_id = self.get_upload_id(object_key) if not upload_id: return False left_size = os.path.getsize(file_path) offset, part_number, part_list = 0, 1, [] while left_size > 0: # 设置每块为5MB part_size = 5 * 1024 * 1024 if left_size < part_size: part_size = left_size for _ in (_ for _ in range(300)): try: response = self.client.upload_part_from_file( self.bucket, object_key, upload_id, part_number, part_size, file_path, offset) break except: pass left_size -= part_size offset += part_size part_list.append({ "partNumber": part_number, "eTag": response.metadata.etag }) part_number += 1 for _ in (_ for _ in range(300)): try: ret = self.client.complete_multipart_upload( self.bucket, object_key, upload_id, part_list) print ret return False except: pass def upload_file(self, object_key, file_path): """ 上传文件 """ file_size = os.path.getsize(file_name) if file_size > 5 * 1024 * 1024: # 断点续传 self.multipart_upload(object_key, file_path) else: # 一次性上传 self.single_upload(object_key, file_path)