class BosConfigClient(object): def __init__(self, bos_ak, bos_sk, bos_sts, bos_host="bj.bcebos.com"): self.config = BceClientConfiguration(credentials=BceCredentials( bos_ak, bos_sk), endpoint=bos_host, security_token=bos_sts) self.bos_client = BosClient(self.config) def exists(self, path): bucket_name, object_key = get_object_info(path) try: self.bos_client.get_object_meta_data(bucket_name, object_key) return True except exception.BceError: return False def makedirs(self, path): if not path.endswith('/'): path += '/' if self.exists(path): return bucket_name, object_key = get_object_info(path) if not object_key.endswith('/'): object_key += '/' init_data = b'' self.bos_client.append_object(bucket_name=bucket_name, key=object_key, data=init_data, content_md5=content_md5(init_data), content_length=len(init_data)) @staticmethod def join(path, *paths): result = os.path.join(path, *paths) result.replace('\\', '/') return result def upload_object_from_file(self, path, filename): if not self.exists(path): self.makedirs(path) bucket_name, object_key = get_object_info(path) object_key = self.join(object_key, filename) # if not object_key.endswith('/'): # object_key += '/' print('Uploading file `%s`' % filename) self.bos_client.put_object_from_file(bucket=bucket_name, key=object_key, file_name=filename)
class BosFileSystem(object): def __init__(self): self.max_contents_count = 1 self.max_contents_time = 1 self.get_bos_config() self.bos_client = BosClient(self.config) self.file_length_map = {} self._file_contents_to_add = b'' self._file_contents_count = 0 self._start_append_time = time.time() def get_bos_config(self): bos_host = os.getenv("BOS_HOST") if not bos_host: raise KeyError('${BOS_HOST} is not found.') access_key_id = os.getenv("BOS_AK") if not access_key_id: raise KeyError('${BOS_AK} is not found.') secret_access_key = os.getenv("BOS_SK") if not secret_access_key: raise KeyError('${BOS_SK} is not found.') self.max_contents_count = int(os.getenv('BOS_CACHE_COUNT', 1)) self.max_contents_time = int(os.getenv('BOS_CACHE_TIME', 1)) bos_sts = os.getenv("BOS_STS") self.config = BceClientConfiguration( credentials=BceCredentials(access_key_id, secret_access_key), endpoint=bos_host, security_token=bos_sts) def isfile(self, filename): return exists(filename) def read_file(self, filename, binary=True): bucket_name, object_key = BosFileSystem._get_object_info(filename) result = self.bos_client.get_object_as_string(bucket_name, object_key) return result @staticmethod def _get_object_info(path): path = path[6:] index = path.index('/') bucket_name = path[0:index] object_key = path[index + 1:] return bucket_name, object_key def exists(self, path): bucket_name, object_key = BosFileSystem._get_object_info(path) try: self.bos_client.get_object_meta_data(bucket_name, object_key) return True except exception.BceError: return False def get_meta(self, bucket_name, object_key): return self.bos_client.get_object_meta_data(bucket_name, object_key) def makedirs(self, path): if not path.endswith('/'): path += '/' if self.exists(path): return bucket_name, object_key = BosFileSystem._get_object_info(path) if not object_key.endswith('/'): object_key += '/' init_data = b'' self.bos_client.append_object(bucket_name=bucket_name, key=object_key, data=init_data, content_md5=content_md5(init_data), content_length=len(init_data)) @staticmethod def join(path, *paths): result = os.path.join(path, *paths) result.replace('\\', '/') return result def read(self, filename, binary_mode=False, size=0, continue_from=None): bucket_name, object_key = BosFileSystem._get_object_info(filename) offset = 0 if continue_from is not None: offset = continue_from.get("last_offset", 0) length = int( self.get_meta(bucket_name, object_key).metadata.content_length) if offset < length: data = self.bos_client.get_object_as_string(bucket_name=bucket_name, key=object_key, range=[offset, length - 1]) else: data = b'' continue_from_token = {"last_offset": length} return data, continue_from_token def ready_to_append(self): if self._file_contents_count >= self.max_contents_count or \ time.time() - self._start_append_time > self.max_contents_time: return True else: return False def append(self, filename, file_content, binary_mode=False, force=False): self._file_contents_to_add += file_content self._file_contents_count += 1 if not force and not self.ready_to_append(): return file_content = self._file_contents_to_add bucket_name, object_key = BosFileSystem._get_object_info(filename) if not self.exists(filename): init_data = b'' self.bos_client.append_object(bucket_name=bucket_name, key=object_key, data=init_data, content_md5=content_md5(init_data), content_length=len(init_data)) content_length = len(file_content) offset = self.get_meta(bucket_name, object_key).metadata.content_length self.bos_client.append_object(bucket_name=bucket_name, key=object_key, data=file_content, content_md5=content_md5(file_content), content_length=content_length, offset=offset) self._file_contents_to_add = b'' self._file_contents_count = 0 self._start_append_time = time.time() def write(self, filename, file_content, binary_mode=False): self.append(filename, file_content, binary_mode=False) # bucket_name, object_key = BosFileSystem._get_object_info(filename) # # self.bos_client.append_object(bucket_name=bucket_name, # key=object_key, # data=file_content, # content_md5=content_md5(file_content), # content_length=len(file_content)) def walk(self, dir): class WalkGenerator(): def __init__(self, bucket_name, contents): self.contents = None self.length = 0 self.bucket = bucket_name self.handle_contents(contents) self.count = 0 def handle_contents(self, contents): contents_map = {} for item in contents: try: rindex = item.rindex('/') key = item[0:rindex] value = item[rindex + 1:] except ValueError: key = '.' value = item if key in contents_map.keys(): contents_map[key].append(value) else: contents_map[key] = [value] temp_walk = [] for key, value in contents_map.items(): temp_walk.append( [BosFileSystem.join('bos://' + self.bucket, key), [], value]) self.length = len(temp_walk) self.contents = temp_walk def __iter__(self): return self def __next__(self): if self.count < self.length: self.count += 1 return self.contents[self.count - 1] else: raise StopIteration bucket_name, object_key = BosFileSystem._get_object_info(dir) if object_key in ['.', './']: prefix = None else: prefix = object_key if object_key.endswith( '/') else object_key + '/' response = self.bos_client.list_objects(bucket_name, prefix=prefix) contents = [content.key for content in response.contents] return WalkGenerator(bucket_name, contents)
class BosFileSystem(object): def __init__(self, write_flag=True): if write_flag: self.max_contents_count = 1 self.max_contents_time = 1 self.get_bos_config() self.bos_client = BosClient(self.config) self.file_length_map = {} self._file_contents_to_add = b'' self._file_contents_count = 0 self._start_append_time = time.time() def get_bos_config(self): bos_host = os.getenv("BOS_HOST") if not bos_host: raise KeyError('${BOS_HOST} is not found.') access_key_id = os.getenv("BOS_AK") if not access_key_id: raise KeyError('${BOS_AK} is not found.') secret_access_key = os.getenv("BOS_SK") if not secret_access_key: raise KeyError('${BOS_SK} is not found.') self.max_contents_count = int(os.getenv('BOS_CACHE_COUNT', 1)) self.max_contents_time = int(os.getenv('BOS_CACHE_TIME', 1)) bos_sts = os.getenv("BOS_STS") self.config = BceClientConfiguration( credentials=BceCredentials(access_key_id, secret_access_key), endpoint=bos_host, security_token=bos_sts) def set_bos_config(self, bos_ak, bos_sk, bos_sts, bos_host="bj.bcebos.com"): self.config = BceClientConfiguration( credentials=BceCredentials(bos_ak, bos_sk), endpoint=bos_host, security_token=bos_sts) self.bos_client = BosClient(self.config) def renew_bos_client_from_server(self): import requests import json from visualdl.utils.dir import CONFIG_PATH with open(CONFIG_PATH, 'r') as fp: server_url = json.load(fp)['server_url'] url = server_url + '/sts/' res = requests.post(url=url).json() err_code = res.get('code') msg = res.get('msg') if '000000' == err_code: sts_ak = msg.get('sts_ak') sts_sk = msg.get('sts_sk') sts_token = msg.get('token') self.set_bos_config(sts_ak, sts_sk, sts_token) else: print('Renew bos client error. Error msg: {}'.format(msg)) return def isfile(self, filename): return exists(filename) def read_file(self, filename, binary=True): bucket_name, object_key = get_object_info(filename) result = self.bos_client.get_object_as_string(bucket_name, object_key) return result def exists(self, path): bucket_name, object_key = get_object_info(path) try: self.bos_client.get_object_meta_data(bucket_name, object_key) return True except exception.BceError: return False def get_meta(self, bucket_name, object_key): return self.bos_client.get_object_meta_data(bucket_name, object_key) def makedirs(self, path): if not path.endswith('/'): path += '/' if self.exists(path): return bucket_name, object_key = get_object_info(path) if not object_key.endswith('/'): object_key += '/' init_data = b'' self.bos_client.append_object( bucket_name=bucket_name, key=object_key, data=init_data, content_md5=content_md5(init_data), content_length=len(init_data)) @staticmethod def join(path, *paths): result = os.path.join(path, *paths) result.replace('\\', '/') return result def read(self, filename, binary_mode=False, size=0, continue_from=None): bucket_name, object_key = get_object_info(filename) offset = 0 if continue_from is not None: offset = continue_from.get("last_offset", 0) length = int( self.get_meta(bucket_name, object_key).metadata.content_length) if offset < length: data = self.bos_client.get_object_as_string( bucket_name=bucket_name, key=object_key, range=[offset, length - 1]) else: data = b'' continue_from_token = {"last_offset": length} return data, continue_from_token def ready_to_append(self): if self._file_contents_count >= self.max_contents_count or \ time.time() - self._start_append_time > self.max_contents_time: return True else: return False def append(self, filename, file_content, binary_mode=False, force=False): self._file_contents_to_add += file_content self._file_contents_count += 1 if not force and not self.ready_to_append(): return file_content = self._file_contents_to_add bucket_name, object_key = get_object_info(filename) if not self.exists(filename): init_data = b'' try: self.bos_client.append_object( bucket_name=bucket_name, key=object_key, data=init_data, content_md5=content_md5(init_data), content_length=len(init_data)) except (exception.BceServerError, exception.BceHttpClientError): self.renew_bos_client_from_server() self.bos_client.append_object( bucket_name=bucket_name, key=object_key, data=init_data, content_md5=content_md5(init_data), content_length=len(init_data)) return content_length = len(file_content) try: offset = self.get_meta(bucket_name, object_key).metadata.content_length self.bos_client.append_object( bucket_name=bucket_name, key=object_key, data=file_content, content_md5=content_md5(file_content), content_length=content_length, offset=offset) except (exception.BceServerError, exception.BceHttpClientError): self.renew_bos_client_from_server() offset = self.get_meta(bucket_name, object_key).metadata.content_length self.bos_client.append_object( bucket_name=bucket_name, key=object_key, data=file_content, content_md5=content_md5(file_content), content_length=content_length, offset=offset) self._file_contents_to_add = b'' self._file_contents_count = 0 self._start_append_time = time.time() def write(self, filename, file_content, binary_mode=False): self.append(filename, file_content, binary_mode=False) # bucket_name, object_key = BosFileSystem._get_object_info(filename) # # self.bos_client.append_object(bucket_name=bucket_name, # key=object_key, # data=file_content, # content_md5=content_md5(file_content), # content_length=len(file_content)) def walk(self, dir): class WalkGenerator(): def __init__(self, bucket_name, contents): self.contents = None self.length = 0 self.bucket = bucket_name self.handle_contents(contents) self.count = 0 def handle_contents(self, contents): contents_map = {} for item in contents: try: rindex = item.rindex('/') key = item[0:rindex] value = item[rindex + 1:] except ValueError: key = '.' value = item if key in contents_map.keys(): contents_map[key].append(value) else: contents_map[key] = [value] temp_walk = [] for key, value in contents_map.items(): temp_walk.append([ BosFileSystem.join('bos://' + self.bucket, key), [], value ]) self.length = len(temp_walk) self.contents = temp_walk def __iter__(self): return self def __next__(self): if self.count < self.length: self.count += 1 return self.contents[self.count - 1] else: raise StopIteration bucket_name, object_key = get_object_info(dir) if object_key in ['.', './']: prefix = None else: prefix = object_key if object_key.endswith( '/') else object_key + '/' response = self.bos_client.list_objects(bucket_name, prefix=prefix) contents = [content.key for content in response.contents] return WalkGenerator(bucket_name, contents)
"partNumber": part_number, "eTag": response.metadata.etag }) part_number += 1 # copy a object part by part # step 1: init multi-upload upload_id = bos_client.initiate_multipart_upload(target_bucket, target_key).upload_id upload_id_about = bos_client.initiate_multipart_upload( target_bucket, target_key + "_about").upload_id # step 2: upload copy part by part left_size = int( bos_client.get_object_meta_data(source_bucket, source_key).metadata.content_length) offset = 0 part_number = 1 part_list = [] while left_size > 0: part_size = 5 * 1024 * 1024 if left_size < part_size: part_size = left_size response = bos_client.upload_part_copy(source_bucket, source_key, target_bucket, target_key, upload_id, part_number, part_size, offset) left_size -= part_size offset += part_size part_list.append({"partNumber": part_number, "eTag": response.etag}) part_number += 1
class UBosClient(object): """ BosClient """ def __init__(self, access_key_id, secret_access_key, bucket_name='', endpoint=''): """ 初始化 """ super(UBosClient, self).__init__() # 创建BceClientConfiguration config = BceClientConfiguration(credentials=BceCredentials( access_key_id, secret_access_key), endpoint=endpoint) # 设置请求超时时间 config.connection_timeout_in_mills = 3000 # 新建BosClient self.client = BosClient(config) self.bucket = bucket_name def check_bucket(self): """ 校验bucket是否存在 """ return not not self.client.does_bucket_exist(self.bucket) def check_object_key(self, object_key): """ 校验文件对象是否存在 """ if not self.check_bucket(): return False try: self.client.get_object_meta_data(self.bucket, object_key) return True except: return False def mkdir(self, dir_name): """ 创建文件夹 """ if not self.check_bucket(): return False try: self.client.put_object_from_string(self.bucket, '{}/'.format(dir_name), '') return True except: return False def get_all_files(self): """ 获取bucket所有文件对象集 """ file_list = [] if not self.check_bucket(): return file_list for fobj in self.client.list_all_objects(self.bucket): file_list.append({'name': fobj.key, 'size': fobj.size}) return filelist def get_files_by_dir(self, dir_name): """ 获取文件夹子文件对象集 """ file_list = [] if not self.check_bucket(): return file_list prefix = '{}/'.format(dir_name) response = self.client.list_objects(self.bucket, prefix=prefix) for fobj in response.contents: if fobj.key == prefix: continue file_list.append({'name': fobj.key, 'size': fobj.size}) return file_list def rmfile(self, object_key): """ 单一删除文件对象 """ if not self.check_bucket(): return False self.client.delete_object(self.bucket, object_key) return True def rmfiles(self, object_keys): """ 批量删除 """ if not self.check_bucket(): return False self.client.delete_multiple_objects(self.bucket, object_keys) return True def rmdir(self, dir_name): """ 删除目录, 需保证目录下无对象存在 """ if not self.check_bucket(): return False prefix = '{}/'.format(dir_name) file_list = self.get_files_by_dir(dir_name) object_keys = [fobj['name'] for fobj in file_list if 'name' in fobj] self.rmfiles(object_keys) self.client.delete_object(self.bucket, prefix) return True def single_upload(self, object_key, file_path): """ 一次性上传 """ if not self.check_bucket(): return False suffix = filename.split('.')[-1].lower() if suffix == 'mp4': ret = self.client.put_object_from_file(self.bucket, object_key, file_path, content_type='video/mp4') elif suffix in ['jpg', 'jpeg']: ret = self.client.put_object_from_file(self.bucket, object_key, file_path, content_type='image/jpeg') else: ret = self.client.put_object_from_file(self.bucket, object_key, file_path) print ret return True def get_upload_id(self, object_key): """ 断点续传获取upload_id """ upload_id = None response = self.client.list_multipart_uploads(self.bucket) for item in response.uploads: if item.key == object_key: upload_id = item.upload_id break if not upload_id: upload_id = self.client.initiate_multipart_upload( self.bucket, object_key, content_type='video/mp4').upload_id return upload_id def multipart_upload(self, object_key, file_path): """ 断点续传 """ upload_id = self.get_upload_id(object_key) if not upload_id: return False left_size = os.path.getsize(file_path) offset, part_number, part_list = 0, 1, [] while left_size > 0: # 设置每块为5MB part_size = 5 * 1024 * 1024 if left_size < part_size: part_size = left_size for _ in (_ for _ in range(300)): try: response = self.client.upload_part_from_file( self.bucket, object_key, upload_id, part_number, part_size, file_path, offset) break except: pass left_size -= part_size offset += part_size part_list.append({ "partNumber": part_number, "eTag": response.metadata.etag }) part_number += 1 for _ in (_ for _ in range(300)): try: ret = self.client.complete_multipart_upload( self.bucket, object_key, upload_id, part_list) print ret return False except: pass def upload_file(self, object_key, file_path): """ 上传文件 """ file_size = os.path.getsize(file_name) if file_size > 5 * 1024 * 1024: # 断点续传 self.multipart_upload(object_key, file_path) else: # 一次性上传 self.single_upload(object_key, file_path)