download = 'download' ###################################################################################################### # bucket operation samples ###################################################################################################### # create a bos client bos_client = BosClient(bos_sample_conf.config) # check if bucket exists if not bos_client.does_bucket_exist(bucket_name): bos_client.create_bucket(bucket_name) # delete a bucket(you can't delete a bucket which is not empty) # clear it first for obj in bos_client.list_all_objects(bucket_name): bos_client.delete_object(bucket_name, obj.key) bos_client.delete_bucket(bucket_name) # create the bucket again bos_client.create_bucket(bucket_name) # list your buckets response = bos_client.list_buckets() for bucket in response.buckets: __logger.debug("[Sample] list buckets:%s", bucket.name) ###################################################################################################### # object operation samples ######################################################################################################
class BaiduBos: def __init__(self, bucket): self._bucket = bucket self._bos_client = BosClient(_config) def upload_file(self, fn, key, get_url=False, absent=True, expiration_in_seconds=-1): """ 上传文件,如果文件超过25兆,将采用分块上传 如果key已存在,则返回key的url :param fn: :param key: :param get_url:是否需要获取key对应的url :param absent: True时,如果bos已存在该文件 则不上传 :return: """ exists = False if absent: for obj in self._bos_client.list_all_objects(self._bucket): if obj.key == key: LOGGER.warning( "the key '{0}' has already existed, upload canceled". format(key)) exists = True break if not exists: fs = os.path.getsize(fn) with open(fn, mode='rb') as f: if fs > 25 * 1024 * 1024: self._multipart_upload(fn, key) else: self._bos_client.put_object(self._bucket, key, f, fs, self.md5_file(fn)) if get_url: url = self._bos_client.generate_pre_signed_url( self._bucket, key, expiration_in_seconds=expiration_in_seconds) return url.decode("utf-8") return None def upload_bytes(self, byte_arr, key, get_url=False, absent=True, expiration_in_seconds=-1): """ 上传字节 如果key已存在,则返回key的url :param byte_arr: :param key: :return: """ exists = False if absent: for obj in self._bos_client.list_all_objects(self._bucket): if obj.key == key: LOGGER.warning( "the key '{0}' has already existed, upload canceled". format(key)) exists = True break if not exists: self._bos_client.put_object(GENIOUS_BUCKET, key, io.BytesIO(byte_arr), len(byte_arr), self.md5_obj(byte_arr)) if get_url: url = self._bos_client.generate_pre_signed_url( self._bucket, key, expiration_in_seconds=expiration_in_seconds) return url.decode("utf-8") return None def _multipart_upload(self, fn, key): """ 文件分块上传 如果key已存在,则返回key的url :arg key :arg fn """ upload_id = self._bos_client.initiate_multipart_upload( GENIOUS_BUCKET, key).upload_id left_size = os.path.getsize(fn) # left_size用于设置分块开始位置 # 设置分块的开始偏移位置 offset = 0 part_number = 1 part_list = [] index = 0 while left_size > 0: # 设置每块为5MB part_size = 5 * 1024 * 1024 if left_size < part_size: part_size = left_size response = self._bos_client.upload_part_from_file( GENIOUS_BUCKET, key, upload_id, part_number, part_size, fn, offset) index += 1 print(index) left_size -= part_size offset += part_size part_list.append({ "partNumber": part_number, "eTag": response.metadata.etag }) part_number += 1 location = self._bos_client.complete_multipart_upload( GENIOUS_BUCKET, key, upload_id, part_list) print(location.location) return location def md5_file(self, fn): buf_size = 8192 md5 = hashlib.md5() with open(fn, mode='rb') as fp: while True: bytes_to_read = buf_size buf = fp.read(bytes_to_read) if not buf: break md5.update(buf) content_md5 = base64.standard_b64encode(md5.digest()) return content_md5 def md5_obj(self, bs): md5 = hashlib.md5() md5.update(bs) return base64.standard_b64encode(md5.digest()) def list_uploaded_objects(self, prefix=None): """ 列出桶中的文件,如果提供了prefix,则最多返回1000条记录 若无法满足需要,可以使用sdk的api进行获取 :arg 指定返回key的前缀""" keys = [] if prefix is not None: response = self._bos_client.list_objects(self._bucket, prefix=prefix, max_keys=1000) for obj in response.contents: keys.append(obj.key) return keys response = self._bos_client.list_all_objects(self._bucket) for obj in response.contents: keys.append(obj.key) return keys def file_exists(self, fn): """ :arg 文件名是否存在,服务器上的文件名为key去掉前缀(带slash)后的 :return 如果文件存在,返回文件url,否则返回None """ keys = self.list_uploaded_objects() for key in keys: slash_index = key.rfind("/") if slash_index > 0: file_name = key[slash_index + 1:] if file_name == fn: url = self._bos_client.generate_pre_signed_url( bucket_name=GENIOUS_BUCKET, key=key, expiration_in_seconds=-1) return url.decode("utf-8") return None def key_exists(self, key): keys = self.list_uploaded_objects() return keys.index(key) >= 0
class UBosClient(object): """ BosClient """ def __init__(self, access_key_id, secret_access_key, bucket_name='', endpoint=''): """ 初始化 """ super(UBosClient, self).__init__() # 创建BceClientConfiguration config = BceClientConfiguration(credentials=BceCredentials( access_key_id, secret_access_key), endpoint=endpoint) # 设置请求超时时间 config.connection_timeout_in_mills = 3000 # 新建BosClient self.client = BosClient(config) self.bucket = bucket_name def check_bucket(self): """ 校验bucket是否存在 """ return not not self.client.does_bucket_exist(self.bucket) def check_object_key(self, object_key): """ 校验文件对象是否存在 """ if not self.check_bucket(): return False try: self.client.get_object_meta_data(self.bucket, object_key) return True except: return False def mkdir(self, dir_name): """ 创建文件夹 """ if not self.check_bucket(): return False try: self.client.put_object_from_string(self.bucket, '{}/'.format(dir_name), '') return True except: return False def get_all_files(self): """ 获取bucket所有文件对象集 """ file_list = [] if not self.check_bucket(): return file_list for fobj in self.client.list_all_objects(self.bucket): file_list.append({'name': fobj.key, 'size': fobj.size}) return filelist def get_files_by_dir(self, dir_name): """ 获取文件夹子文件对象集 """ file_list = [] if not self.check_bucket(): return file_list prefix = '{}/'.format(dir_name) response = self.client.list_objects(self.bucket, prefix=prefix) for fobj in response.contents: if fobj.key == prefix: continue file_list.append({'name': fobj.key, 'size': fobj.size}) return file_list def rmfile(self, object_key): """ 单一删除文件对象 """ if not self.check_bucket(): return False self.client.delete_object(self.bucket, object_key) return True def rmfiles(self, object_keys): """ 批量删除 """ if not self.check_bucket(): return False self.client.delete_multiple_objects(self.bucket, object_keys) return True def rmdir(self, dir_name): """ 删除目录, 需保证目录下无对象存在 """ if not self.check_bucket(): return False prefix = '{}/'.format(dir_name) file_list = self.get_files_by_dir(dir_name) object_keys = [fobj['name'] for fobj in file_list if 'name' in fobj] self.rmfiles(object_keys) self.client.delete_object(self.bucket, prefix) return True def single_upload(self, object_key, file_path): """ 一次性上传 """ if not self.check_bucket(): return False suffix = filename.split('.')[-1].lower() if suffix == 'mp4': ret = self.client.put_object_from_file(self.bucket, object_key, file_path, content_type='video/mp4') elif suffix in ['jpg', 'jpeg']: ret = self.client.put_object_from_file(self.bucket, object_key, file_path, content_type='image/jpeg') else: ret = self.client.put_object_from_file(self.bucket, object_key, file_path) print ret return True def get_upload_id(self, object_key): """ 断点续传获取upload_id """ upload_id = None response = self.client.list_multipart_uploads(self.bucket) for item in response.uploads: if item.key == object_key: upload_id = item.upload_id break if not upload_id: upload_id = self.client.initiate_multipart_upload( self.bucket, object_key, content_type='video/mp4').upload_id return upload_id def multipart_upload(self, object_key, file_path): """ 断点续传 """ upload_id = self.get_upload_id(object_key) if not upload_id: return False left_size = os.path.getsize(file_path) offset, part_number, part_list = 0, 1, [] while left_size > 0: # 设置每块为5MB part_size = 5 * 1024 * 1024 if left_size < part_size: part_size = left_size for _ in (_ for _ in range(300)): try: response = self.client.upload_part_from_file( self.bucket, object_key, upload_id, part_number, part_size, file_path, offset) break except: pass left_size -= part_size offset += part_size part_list.append({ "partNumber": part_number, "eTag": response.metadata.etag }) part_number += 1 for _ in (_ for _ in range(300)): try: ret = self.client.complete_multipart_upload( self.bucket, object_key, upload_id, part_list) print ret return False except: pass def upload_file(self, object_key, file_path): """ 上传文件 """ file_size = os.path.getsize(file_name) if file_size > 5 * 1024 * 1024: # 断点续传 self.multipart_upload(object_key, file_path) else: # 一次性上传 self.single_upload(object_key, file_path)