def get_bos_file_bytes_io(path): logger = logging.getLogger("baidubce.http.bce_http_client") logger.setLevel(logging.DEBUG) logging.info('Getting file from Baidu BOS...') bos_config = BceClientConfiguration(credentials=BceCredentials( access_key_id, secret_access_key), endpoint=bos_host) bos_client = BosClient(bos_config) content = bos_client.get_object_as_string(bucket_name=bucket_name, key=path) audio = io.BytesIO(content) # this would auto seek(0) return audio
bos_client.create_bucket(bucket_name) # list your buckets response = bos_client.list_buckets() for bucket in response.buckets: __logger.debug("[Sample] list buckets:%s", bucket.name) ###################################################################################################### # object operation samples ###################################################################################################### # put a string as object bos_client.put_object_from_string(bucket_name, key, "This is string content.") # get a object as string content = bos_client.get_object_as_string(bucket_name, key) __logger.debug("[Sample] get object as string:%s", content) # put a file as object _create_file(file_name, 4096) bos_client.put_object_from_file(bucket_name, key, file_name) # get object into file bos_client.get_object_to_file(bucket_name, key, download) __logger.debug("[Sample] get object into file, file size:%s", os.path.getsize(download)) # copy a object bos_client.copy_object(bucket_name, key, bucket_name, key + ".copy",) # list objects in a bucket(up to 1000) response = bos_client.list_objects(bucket_name)
# list your buckets response = bos_client.list_buckets() for bucket in response.buckets: __logger.debug("[Sample] list buckets:%s", bucket.name) ###################################################################################################### # object operation samples ###################################################################################################### # put a string as object bos_client.put_object_from_string(bucket_name, key, "This is string content.") # get a object as string content = bos_client.get_object_as_string(bucket_name, key) __logger.debug("[Sample] get object as string:%s", content) # put a file as object _create_file(file_name, 4096) bos_client.put_object_from_file(bucket_name, key, file_name) # get object into file bos_client.get_object_to_file(bucket_name, key, download) __logger.debug("[Sample] get object into file, file size:%s", os.path.getsize(download)) # copy a object bos_client.copy_object( bucket_name, key,
class BosFileSystem(object): def __init__(self): self.max_contents_count = 1 self.max_contents_time = 1 self.get_bos_config() self.bos_client = BosClient(self.config) self.file_length_map = {} self._file_contents_to_add = b'' self._file_contents_count = 0 self._start_append_time = time.time() def get_bos_config(self): bos_host = os.getenv("BOS_HOST") if not bos_host: raise KeyError('${BOS_HOST} is not found.') access_key_id = os.getenv("BOS_AK") if not access_key_id: raise KeyError('${BOS_AK} is not found.') secret_access_key = os.getenv("BOS_SK") if not secret_access_key: raise KeyError('${BOS_SK} is not found.') self.max_contents_count = int(os.getenv('BOS_CACHE_COUNT', 1)) self.max_contents_time = int(os.getenv('BOS_CACHE_TIME', 1)) bos_sts = os.getenv("BOS_STS") self.config = BceClientConfiguration( credentials=BceCredentials(access_key_id, secret_access_key), endpoint=bos_host, security_token=bos_sts) def isfile(self, filename): return exists(filename) def read_file(self, filename, binary=True): bucket_name, object_key = BosFileSystem._get_object_info(filename) result = self.bos_client.get_object_as_string(bucket_name, object_key) return result @staticmethod def _get_object_info(path): path = path[6:] index = path.index('/') bucket_name = path[0:index] object_key = path[index + 1:] return bucket_name, object_key def exists(self, path): bucket_name, object_key = BosFileSystem._get_object_info(path) try: self.bos_client.get_object_meta_data(bucket_name, object_key) return True except exception.BceError: return False def get_meta(self, bucket_name, object_key): return self.bos_client.get_object_meta_data(bucket_name, object_key) def makedirs(self, path): if not path.endswith('/'): path += '/' if self.exists(path): return bucket_name, object_key = BosFileSystem._get_object_info(path) if not object_key.endswith('/'): object_key += '/' init_data = b'' self.bos_client.append_object(bucket_name=bucket_name, key=object_key, data=init_data, content_md5=content_md5(init_data), content_length=len(init_data)) @staticmethod def join(path, *paths): result = os.path.join(path, *paths) result.replace('\\', '/') return result def read(self, filename, binary_mode=False, size=0, continue_from=None): bucket_name, object_key = BosFileSystem._get_object_info(filename) offset = 0 if continue_from is not None: offset = continue_from.get("last_offset", 0) length = int( self.get_meta(bucket_name, object_key).metadata.content_length) if offset < length: data = self.bos_client.get_object_as_string(bucket_name=bucket_name, key=object_key, range=[offset, length - 1]) else: data = b'' continue_from_token = {"last_offset": length} return data, continue_from_token def ready_to_append(self): if self._file_contents_count >= self.max_contents_count or \ time.time() - self._start_append_time > self.max_contents_time: return True else: return False def append(self, filename, file_content, binary_mode=False, force=False): self._file_contents_to_add += file_content self._file_contents_count += 1 if not force and not self.ready_to_append(): return file_content = self._file_contents_to_add bucket_name, object_key = BosFileSystem._get_object_info(filename) if not self.exists(filename): init_data = b'' self.bos_client.append_object(bucket_name=bucket_name, key=object_key, data=init_data, content_md5=content_md5(init_data), content_length=len(init_data)) content_length = len(file_content) offset = self.get_meta(bucket_name, object_key).metadata.content_length self.bos_client.append_object(bucket_name=bucket_name, key=object_key, data=file_content, content_md5=content_md5(file_content), content_length=content_length, offset=offset) self._file_contents_to_add = b'' self._file_contents_count = 0 self._start_append_time = time.time() def write(self, filename, file_content, binary_mode=False): self.append(filename, file_content, binary_mode=False) # bucket_name, object_key = BosFileSystem._get_object_info(filename) # # self.bos_client.append_object(bucket_name=bucket_name, # key=object_key, # data=file_content, # content_md5=content_md5(file_content), # content_length=len(file_content)) def walk(self, dir): class WalkGenerator(): def __init__(self, bucket_name, contents): self.contents = None self.length = 0 self.bucket = bucket_name self.handle_contents(contents) self.count = 0 def handle_contents(self, contents): contents_map = {} for item in contents: try: rindex = item.rindex('/') key = item[0:rindex] value = item[rindex + 1:] except ValueError: key = '.' value = item if key in contents_map.keys(): contents_map[key].append(value) else: contents_map[key] = [value] temp_walk = [] for key, value in contents_map.items(): temp_walk.append( [BosFileSystem.join('bos://' + self.bucket, key), [], value]) self.length = len(temp_walk) self.contents = temp_walk def __iter__(self): return self def __next__(self): if self.count < self.length: self.count += 1 return self.contents[self.count - 1] else: raise StopIteration bucket_name, object_key = BosFileSystem._get_object_info(dir) if object_key in ['.', './']: prefix = None else: prefix = object_key if object_key.endswith( '/') else object_key + '/' response = self.bos_client.list_objects(bucket_name, prefix=prefix) contents = [content.key for content in response.contents] return WalkGenerator(bucket_name, contents)
# list your buckets response = bos_client.list_buckets() for bucket in response.buckets: __logger.debug("[Sample] list buckets:%s", bucket.name) ###################################################################################################### # object operation samples ###################################################################################################### # put a string as object bos_client.put_object_from_string(bucket_name, key, "This is string content.") # get a object as string content = bos_client.get_object_as_string(bucket_name, key) __logger.debug("[Sample] get object as string:%s", content) # put a file as object _create_file(file_name, 4096) bos_client.put_object_from_file(bucket_name, key, file_name) # get object into file bos_client.get_object_to_file(bucket_name, key, download) __logger.debug("[Sample] get object into file, file size:%s", os.path.getsize(download)) # put an appendable object append_key = 'test_append_key' result = bos_client.append_object_from_string( bucket_name=bucket_name,
class BosFileSystem(object): def __init__(self, write_flag=True): if write_flag: self.max_contents_count = 1 self.max_contents_time = 1 self.get_bos_config() self.bos_client = BosClient(self.config) self.file_length_map = {} self._file_contents_to_add = b'' self._file_contents_count = 0 self._start_append_time = time.time() def get_bos_config(self): bos_host = os.getenv("BOS_HOST") if not bos_host: raise KeyError('${BOS_HOST} is not found.') access_key_id = os.getenv("BOS_AK") if not access_key_id: raise KeyError('${BOS_AK} is not found.') secret_access_key = os.getenv("BOS_SK") if not secret_access_key: raise KeyError('${BOS_SK} is not found.') self.max_contents_count = int(os.getenv('BOS_CACHE_COUNT', 1)) self.max_contents_time = int(os.getenv('BOS_CACHE_TIME', 1)) bos_sts = os.getenv("BOS_STS") self.config = BceClientConfiguration( credentials=BceCredentials(access_key_id, secret_access_key), endpoint=bos_host, security_token=bos_sts) def set_bos_config(self, bos_ak, bos_sk, bos_sts, bos_host="bj.bcebos.com"): self.config = BceClientConfiguration( credentials=BceCredentials(bos_ak, bos_sk), endpoint=bos_host, security_token=bos_sts) self.bos_client = BosClient(self.config) def renew_bos_client_from_server(self): import requests import json from visualdl.utils.dir import CONFIG_PATH with open(CONFIG_PATH, 'r') as fp: server_url = json.load(fp)['server_url'] url = server_url + '/sts/' res = requests.post(url=url).json() err_code = res.get('code') msg = res.get('msg') if '000000' == err_code: sts_ak = msg.get('sts_ak') sts_sk = msg.get('sts_sk') sts_token = msg.get('token') self.set_bos_config(sts_ak, sts_sk, sts_token) else: print('Renew bos client error. Error msg: {}'.format(msg)) return def isfile(self, filename): return exists(filename) def read_file(self, filename, binary=True): bucket_name, object_key = get_object_info(filename) result = self.bos_client.get_object_as_string(bucket_name, object_key) return result def exists(self, path): bucket_name, object_key = get_object_info(path) try: self.bos_client.get_object_meta_data(bucket_name, object_key) return True except exception.BceError: return False def get_meta(self, bucket_name, object_key): return self.bos_client.get_object_meta_data(bucket_name, object_key) def makedirs(self, path): if not path.endswith('/'): path += '/' if self.exists(path): return bucket_name, object_key = get_object_info(path) if not object_key.endswith('/'): object_key += '/' init_data = b'' self.bos_client.append_object( bucket_name=bucket_name, key=object_key, data=init_data, content_md5=content_md5(init_data), content_length=len(init_data)) @staticmethod def join(path, *paths): result = os.path.join(path, *paths) result.replace('\\', '/') return result def read(self, filename, binary_mode=False, size=0, continue_from=None): bucket_name, object_key = get_object_info(filename) offset = 0 if continue_from is not None: offset = continue_from.get("last_offset", 0) length = int( self.get_meta(bucket_name, object_key).metadata.content_length) if offset < length: data = self.bos_client.get_object_as_string( bucket_name=bucket_name, key=object_key, range=[offset, length - 1]) else: data = b'' continue_from_token = {"last_offset": length} return data, continue_from_token def ready_to_append(self): if self._file_contents_count >= self.max_contents_count or \ time.time() - self._start_append_time > self.max_contents_time: return True else: return False def append(self, filename, file_content, binary_mode=False, force=False): self._file_contents_to_add += file_content self._file_contents_count += 1 if not force and not self.ready_to_append(): return file_content = self._file_contents_to_add bucket_name, object_key = get_object_info(filename) if not self.exists(filename): init_data = b'' try: self.bos_client.append_object( bucket_name=bucket_name, key=object_key, data=init_data, content_md5=content_md5(init_data), content_length=len(init_data)) except (exception.BceServerError, exception.BceHttpClientError): self.renew_bos_client_from_server() self.bos_client.append_object( bucket_name=bucket_name, key=object_key, data=init_data, content_md5=content_md5(init_data), content_length=len(init_data)) return content_length = len(file_content) try: offset = self.get_meta(bucket_name, object_key).metadata.content_length self.bos_client.append_object( bucket_name=bucket_name, key=object_key, data=file_content, content_md5=content_md5(file_content), content_length=content_length, offset=offset) except (exception.BceServerError, exception.BceHttpClientError): self.renew_bos_client_from_server() offset = self.get_meta(bucket_name, object_key).metadata.content_length self.bos_client.append_object( bucket_name=bucket_name, key=object_key, data=file_content, content_md5=content_md5(file_content), content_length=content_length, offset=offset) self._file_contents_to_add = b'' self._file_contents_count = 0 self._start_append_time = time.time() def write(self, filename, file_content, binary_mode=False): self.append(filename, file_content, binary_mode=False) # bucket_name, object_key = BosFileSystem._get_object_info(filename) # # self.bos_client.append_object(bucket_name=bucket_name, # key=object_key, # data=file_content, # content_md5=content_md5(file_content), # content_length=len(file_content)) def walk(self, dir): class WalkGenerator(): def __init__(self, bucket_name, contents): self.contents = None self.length = 0 self.bucket = bucket_name self.handle_contents(contents) self.count = 0 def handle_contents(self, contents): contents_map = {} for item in contents: try: rindex = item.rindex('/') key = item[0:rindex] value = item[rindex + 1:] except ValueError: key = '.' value = item if key in contents_map.keys(): contents_map[key].append(value) else: contents_map[key] = [value] temp_walk = [] for key, value in contents_map.items(): temp_walk.append([ BosFileSystem.join('bos://' + self.bucket, key), [], value ]) self.length = len(temp_walk) self.contents = temp_walk def __iter__(self): return self def __next__(self): if self.count < self.length: self.count += 1 return self.contents[self.count - 1] else: raise StopIteration bucket_name, object_key = get_object_info(dir) if object_key in ['.', './']: prefix = None else: prefix = object_key if object_key.endswith( '/') else object_key + '/' response = self.bos_client.list_objects(bucket_name, prefix=prefix) contents = [content.key for content in response.contents] return WalkGenerator(bucket_name, contents)