コード例 #1
0
ファイル: bfile.py プロジェクト: zangqx/VisualDL
class BosConfigClient(object):
    def __init__(self, bos_ak, bos_sk, bos_sts, bos_host="bj.bcebos.com"):
        self.config = BceClientConfiguration(credentials=BceCredentials(
            bos_ak, bos_sk),
                                             endpoint=bos_host,
                                             security_token=bos_sts)
        self.bos_client = BosClient(self.config)

    def exists(self, path):
        bucket_name, object_key = get_object_info(path)
        try:
            self.bos_client.get_object_meta_data(bucket_name, object_key)
            return True
        except exception.BceError:
            return False

    def makedirs(self, path):
        if not path.endswith('/'):
            path += '/'
        if self.exists(path):
            return
        bucket_name, object_key = get_object_info(path)
        if not object_key.endswith('/'):
            object_key += '/'
        init_data = b''
        self.bos_client.append_object(bucket_name=bucket_name,
                                      key=object_key,
                                      data=init_data,
                                      content_md5=content_md5(init_data),
                                      content_length=len(init_data))

    @staticmethod
    def join(path, *paths):
        result = os.path.join(path, *paths)
        result.replace('\\', '/')
        return result

    def upload_object_from_file(self, path, filename):
        if not self.exists(path):
            self.makedirs(path)
        bucket_name, object_key = get_object_info(path)

        object_key = self.join(object_key, filename)
        # if not object_key.endswith('/'):
        #     object_key += '/'
        print('Uploading file `%s`' % filename)
        self.bos_client.put_object_from_file(bucket=bucket_name,
                                             key=object_key,
                                             file_name=filename)
コード例 #2
0
class BosFileSystem(object):
    def __init__(self):
        self.max_contents_count = 1
        self.max_contents_time = 1
        self.get_bos_config()
        self.bos_client = BosClient(self.config)
        self.file_length_map = {}

        self._file_contents_to_add = b''
        self._file_contents_count = 0
        self._start_append_time = time.time()

    def get_bos_config(self):
        bos_host = os.getenv("BOS_HOST")
        if not bos_host:
            raise KeyError('${BOS_HOST} is not found.')
        access_key_id = os.getenv("BOS_AK")
        if not access_key_id:
            raise KeyError('${BOS_AK} is not found.')
        secret_access_key = os.getenv("BOS_SK")
        if not secret_access_key:
            raise KeyError('${BOS_SK} is not found.')
        self.max_contents_count = int(os.getenv('BOS_CACHE_COUNT', 1))
        self.max_contents_time = int(os.getenv('BOS_CACHE_TIME', 1))
        bos_sts = os.getenv("BOS_STS")
        self.config = BceClientConfiguration(
            credentials=BceCredentials(access_key_id, secret_access_key),
            endpoint=bos_host, security_token=bos_sts)

    def isfile(self, filename):
        return exists(filename)

    def read_file(self, filename, binary=True):
        bucket_name, object_key = BosFileSystem._get_object_info(filename)
        result = self.bos_client.get_object_as_string(bucket_name, object_key)
        return result

    @staticmethod
    def _get_object_info(path):
        path = path[6:]
        index = path.index('/')
        bucket_name = path[0:index]
        object_key = path[index + 1:]
        return bucket_name, object_key

    def exists(self, path):
        bucket_name, object_key = BosFileSystem._get_object_info(path)
        try:
            self.bos_client.get_object_meta_data(bucket_name, object_key)
            return True
        except exception.BceError:
            return False

    def get_meta(self, bucket_name, object_key):
        return self.bos_client.get_object_meta_data(bucket_name, object_key)

    def makedirs(self, path):
        if not path.endswith('/'):
            path += '/'
        if self.exists(path):
            return
        bucket_name, object_key = BosFileSystem._get_object_info(path)
        if not object_key.endswith('/'):
            object_key += '/'
        init_data = b''
        self.bos_client.append_object(bucket_name=bucket_name,
                                      key=object_key,
                                      data=init_data,
                                      content_md5=content_md5(init_data),
                                      content_length=len(init_data))

    @staticmethod
    def join(path, *paths):
        result = os.path.join(path, *paths)
        result.replace('\\', '/')
        return result

    def read(self, filename, binary_mode=False, size=0, continue_from=None):
        bucket_name, object_key = BosFileSystem._get_object_info(filename)
        offset = 0
        if continue_from is not None:
            offset = continue_from.get("last_offset", 0)
        length = int(
            self.get_meta(bucket_name, object_key).metadata.content_length)
        if offset < length:
            data = self.bos_client.get_object_as_string(bucket_name=bucket_name,
                                                        key=object_key,
                                                        range=[offset,
                                                               length - 1])
        else:
            data = b''

        continue_from_token = {"last_offset": length}
        return data, continue_from_token

    def ready_to_append(self):
        if self._file_contents_count >= self.max_contents_count or \
                time.time() - self._start_append_time > self.max_contents_time:
            return True
        else:
            return False

    def append(self, filename, file_content, binary_mode=False, force=False):
        self._file_contents_to_add += file_content
        self._file_contents_count += 1

        if not force and not self.ready_to_append():
            return
        file_content = self._file_contents_to_add
        bucket_name, object_key = BosFileSystem._get_object_info(filename)
        if not self.exists(filename):
            init_data = b''
            self.bos_client.append_object(bucket_name=bucket_name,
                                          key=object_key,
                                          data=init_data,
                                          content_md5=content_md5(init_data),
                                          content_length=len(init_data))
        content_length = len(file_content)

        offset = self.get_meta(bucket_name, object_key).metadata.content_length
        self.bos_client.append_object(bucket_name=bucket_name,
                                      key=object_key,
                                      data=file_content,
                                      content_md5=content_md5(file_content),
                                      content_length=content_length,
                                      offset=offset)
        self._file_contents_to_add = b''
        self._file_contents_count = 0
        self._start_append_time = time.time()

    def write(self, filename, file_content, binary_mode=False):
        self.append(filename, file_content, binary_mode=False)

        # bucket_name, object_key = BosFileSystem._get_object_info(filename)
        #
        # self.bos_client.append_object(bucket_name=bucket_name,
        #                               key=object_key,
        #                               data=file_content,
        #                               content_md5=content_md5(file_content),
        #                               content_length=len(file_content))

    def walk(self, dir):
        class WalkGenerator():
            def __init__(self, bucket_name, contents):
                self.contents = None
                self.length = 0
                self.bucket = bucket_name
                self.handle_contents(contents)
                self.count = 0

            def handle_contents(self, contents):
                contents_map = {}
                for item in contents:
                    try:
                        rindex = item.rindex('/')
                        key = item[0:rindex]
                        value = item[rindex + 1:]
                    except ValueError:
                        key = '.'
                        value = item
                    if key in contents_map.keys():
                        contents_map[key].append(value)
                    else:
                        contents_map[key] = [value]
                temp_walk = []
                for key, value in contents_map.items():
                    temp_walk.append(
                        [BosFileSystem.join('bos://' + self.bucket, key), [],
                         value])
                self.length = len(temp_walk)
                self.contents = temp_walk

            def __iter__(self):
                return self

            def __next__(self):
                if self.count < self.length:
                    self.count += 1
                    return self.contents[self.count - 1]
                else:
                    raise StopIteration

        bucket_name, object_key = BosFileSystem._get_object_info(dir)

        if object_key in ['.', './']:
            prefix = None
        else:
            prefix = object_key if object_key.endswith(
                '/') else object_key + '/'
        response = self.bos_client.list_objects(bucket_name,
                                                prefix=prefix)
        contents = [content.key for content in response.contents]
        return WalkGenerator(bucket_name, contents)
コード例 #3
0
ファイル: bfile.py プロジェクト: PeterPanZH/VisualDL
class BosFileSystem(object):
    def __init__(self, write_flag=True):
        if write_flag:
            self.max_contents_count = 1
            self.max_contents_time = 1
            self.get_bos_config()
            self.bos_client = BosClient(self.config)
            self.file_length_map = {}

            self._file_contents_to_add = b''
            self._file_contents_count = 0
            self._start_append_time = time.time()

    def get_bos_config(self):
        bos_host = os.getenv("BOS_HOST")
        if not bos_host:
            raise KeyError('${BOS_HOST} is not found.')
        access_key_id = os.getenv("BOS_AK")
        if not access_key_id:
            raise KeyError('${BOS_AK} is not found.')
        secret_access_key = os.getenv("BOS_SK")
        if not secret_access_key:
            raise KeyError('${BOS_SK} is not found.')
        self.max_contents_count = int(os.getenv('BOS_CACHE_COUNT', 1))
        self.max_contents_time = int(os.getenv('BOS_CACHE_TIME', 1))
        bos_sts = os.getenv("BOS_STS")
        self.config = BceClientConfiguration(
            credentials=BceCredentials(access_key_id, secret_access_key),
            endpoint=bos_host,
            security_token=bos_sts)

    def set_bos_config(self, bos_ak, bos_sk, bos_sts,
                       bos_host="bj.bcebos.com"):
        self.config = BceClientConfiguration(
            credentials=BceCredentials(bos_ak, bos_sk),
            endpoint=bos_host,
            security_token=bos_sts)
        self.bos_client = BosClient(self.config)

    def renew_bos_client_from_server(self):
        import requests
        import json
        from visualdl.utils.dir import CONFIG_PATH
        with open(CONFIG_PATH, 'r') as fp:
            server_url = json.load(fp)['server_url']
        url = server_url + '/sts/'
        res = requests.post(url=url).json()
        err_code = res.get('code')
        msg = res.get('msg')
        if '000000' == err_code:
            sts_ak = msg.get('sts_ak')
            sts_sk = msg.get('sts_sk')
            sts_token = msg.get('token')
            self.set_bos_config(sts_ak, sts_sk, sts_token)
        else:
            print('Renew bos client error. Error msg: {}'.format(msg))
            return

    def isfile(self, filename):
        return exists(filename)

    def read_file(self, filename, binary=True):
        bucket_name, object_key = get_object_info(filename)
        result = self.bos_client.get_object_as_string(bucket_name, object_key)
        return result

    def exists(self, path):
        bucket_name, object_key = get_object_info(path)
        try:
            self.bos_client.get_object_meta_data(bucket_name, object_key)
            return True
        except exception.BceError:
            return False

    def get_meta(self, bucket_name, object_key):
        return self.bos_client.get_object_meta_data(bucket_name, object_key)

    def makedirs(self, path):
        if not path.endswith('/'):
            path += '/'
        if self.exists(path):
            return
        bucket_name, object_key = get_object_info(path)
        if not object_key.endswith('/'):
            object_key += '/'
        init_data = b''
        self.bos_client.append_object(
            bucket_name=bucket_name,
            key=object_key,
            data=init_data,
            content_md5=content_md5(init_data),
            content_length=len(init_data))

    @staticmethod
    def join(path, *paths):
        result = os.path.join(path, *paths)
        result.replace('\\', '/')
        return result

    def read(self, filename, binary_mode=False, size=0, continue_from=None):
        bucket_name, object_key = get_object_info(filename)
        offset = 0
        if continue_from is not None:
            offset = continue_from.get("last_offset", 0)
        length = int(
            self.get_meta(bucket_name, object_key).metadata.content_length)
        if offset < length:
            data = self.bos_client.get_object_as_string(
                bucket_name=bucket_name,
                key=object_key,
                range=[offset, length - 1])
        else:
            data = b''

        continue_from_token = {"last_offset": length}
        return data, continue_from_token

    def ready_to_append(self):
        if self._file_contents_count >= self.max_contents_count or \
                time.time() - self._start_append_time > self.max_contents_time:
            return True
        else:
            return False

    def append(self, filename, file_content, binary_mode=False, force=False):
        self._file_contents_to_add += file_content
        self._file_contents_count += 1

        if not force and not self.ready_to_append():
            return
        file_content = self._file_contents_to_add
        bucket_name, object_key = get_object_info(filename)
        if not self.exists(filename):
            init_data = b''
            try:
                self.bos_client.append_object(
                    bucket_name=bucket_name,
                    key=object_key,
                    data=init_data,
                    content_md5=content_md5(init_data),
                    content_length=len(init_data))
            except (exception.BceServerError, exception.BceHttpClientError):
                self.renew_bos_client_from_server()
                self.bos_client.append_object(
                    bucket_name=bucket_name,
                    key=object_key,
                    data=init_data,
                    content_md5=content_md5(init_data),
                    content_length=len(init_data))
                return
        content_length = len(file_content)

        try:
            offset = self.get_meta(bucket_name,
                                   object_key).metadata.content_length
            self.bos_client.append_object(
                bucket_name=bucket_name,
                key=object_key,
                data=file_content,
                content_md5=content_md5(file_content),
                content_length=content_length,
                offset=offset)
        except (exception.BceServerError, exception.BceHttpClientError):
            self.renew_bos_client_from_server()
            offset = self.get_meta(bucket_name,
                                   object_key).metadata.content_length
            self.bos_client.append_object(
                bucket_name=bucket_name,
                key=object_key,
                data=file_content,
                content_md5=content_md5(file_content),
                content_length=content_length,
                offset=offset)

        self._file_contents_to_add = b''
        self._file_contents_count = 0
        self._start_append_time = time.time()

    def write(self, filename, file_content, binary_mode=False):
        self.append(filename, file_content, binary_mode=False)

        # bucket_name, object_key = BosFileSystem._get_object_info(filename)
        #
        # self.bos_client.append_object(bucket_name=bucket_name,
        #                               key=object_key,
        #                               data=file_content,
        #                               content_md5=content_md5(file_content),
        #                               content_length=len(file_content))

    def walk(self, dir):
        class WalkGenerator():
            def __init__(self, bucket_name, contents):
                self.contents = None
                self.length = 0
                self.bucket = bucket_name
                self.handle_contents(contents)
                self.count = 0

            def handle_contents(self, contents):
                contents_map = {}
                for item in contents:
                    try:
                        rindex = item.rindex('/')
                        key = item[0:rindex]
                        value = item[rindex + 1:]
                    except ValueError:
                        key = '.'
                        value = item
                    if key in contents_map.keys():
                        contents_map[key].append(value)
                    else:
                        contents_map[key] = [value]
                temp_walk = []
                for key, value in contents_map.items():
                    temp_walk.append([
                        BosFileSystem.join('bos://' + self.bucket, key), [],
                        value
                    ])
                self.length = len(temp_walk)
                self.contents = temp_walk

            def __iter__(self):
                return self

            def __next__(self):
                if self.count < self.length:
                    self.count += 1
                    return self.contents[self.count - 1]
                else:
                    raise StopIteration

        bucket_name, object_key = get_object_info(dir)

        if object_key in ['.', './']:
            prefix = None
        else:
            prefix = object_key if object_key.endswith(
                '/') else object_key + '/'
        response = self.bos_client.list_objects(bucket_name, prefix=prefix)
        contents = [content.key for content in response.contents]
        return WalkGenerator(bucket_name, contents)
コード例 #4
0
            "partNumber": part_number,
            "eTag": response.metadata.etag
        })
        part_number += 1

    # copy a object part by part

    # step 1: init multi-upload
    upload_id = bos_client.initiate_multipart_upload(target_bucket,
                                                     target_key).upload_id
    upload_id_about = bos_client.initiate_multipart_upload(
        target_bucket, target_key + "_about").upload_id

    # step 2: upload copy part by part
    left_size = int(
        bos_client.get_object_meta_data(source_bucket,
                                        source_key).metadata.content_length)
    offset = 0
    part_number = 1
    part_list = []
    while left_size > 0:
        part_size = 5 * 1024 * 1024
        if left_size < part_size:
            part_size = left_size
        response = bos_client.upload_part_copy(source_bucket, source_key,
                                               target_bucket, target_key,
                                               upload_id, part_number,
                                               part_size, offset)
        left_size -= part_size
        offset += part_size
        part_list.append({"partNumber": part_number, "eTag": response.etag})
        part_number += 1
コード例 #5
0
class UBosClient(object):
    """
    BosClient
    """
    def __init__(self,
                 access_key_id,
                 secret_access_key,
                 bucket_name='',
                 endpoint=''):
        """
        初始化
        """
        super(UBosClient, self).__init__()

        # 创建BceClientConfiguration
        config = BceClientConfiguration(credentials=BceCredentials(
            access_key_id, secret_access_key),
                                        endpoint=endpoint)
        # 设置请求超时时间
        config.connection_timeout_in_mills = 3000
        # 新建BosClient
        self.client = BosClient(config)
        self.bucket = bucket_name

    def check_bucket(self):
        """
        校验bucket是否存在
        """
        return not not self.client.does_bucket_exist(self.bucket)

    def check_object_key(self, object_key):
        """
        校验文件对象是否存在
        """
        if not self.check_bucket():
            return False
        try:
            self.client.get_object_meta_data(self.bucket, object_key)
            return True
        except:
            return False

    def mkdir(self, dir_name):
        """
        创建文件夹
        """
        if not self.check_bucket():
            return False
        try:
            self.client.put_object_from_string(self.bucket,
                                               '{}/'.format(dir_name), '')
            return True
        except:
            return False

    def get_all_files(self):
        """
        获取bucket所有文件对象集
        """
        file_list = []
        if not self.check_bucket():
            return file_list
        for fobj in self.client.list_all_objects(self.bucket):
            file_list.append({'name': fobj.key, 'size': fobj.size})
        return filelist

    def get_files_by_dir(self, dir_name):
        """
        获取文件夹子文件对象集
        """
        file_list = []
        if not self.check_bucket():
            return file_list
        prefix = '{}/'.format(dir_name)
        response = self.client.list_objects(self.bucket, prefix=prefix)
        for fobj in response.contents:
            if fobj.key == prefix:
                continue
            file_list.append({'name': fobj.key, 'size': fobj.size})
        return file_list

    def rmfile(self, object_key):
        """
        单一删除文件对象
        """
        if not self.check_bucket():
            return False
        self.client.delete_object(self.bucket, object_key)
        return True

    def rmfiles(self, object_keys):
        """
        批量删除
        """
        if not self.check_bucket():
            return False
        self.client.delete_multiple_objects(self.bucket, object_keys)
        return True

    def rmdir(self, dir_name):
        """
        删除目录, 需保证目录下无对象存在
        """
        if not self.check_bucket():
            return False
        prefix = '{}/'.format(dir_name)
        file_list = self.get_files_by_dir(dir_name)
        object_keys = [fobj['name'] for fobj in file_list if 'name' in fobj]
        self.rmfiles(object_keys)
        self.client.delete_object(self.bucket, prefix)
        return True

    def single_upload(self, object_key, file_path):
        """
        一次性上传
        """
        if not self.check_bucket():
            return False
        suffix = filename.split('.')[-1].lower()
        if suffix == 'mp4':
            ret = self.client.put_object_from_file(self.bucket,
                                                   object_key,
                                                   file_path,
                                                   content_type='video/mp4')
        elif suffix in ['jpg', 'jpeg']:
            ret = self.client.put_object_from_file(self.bucket,
                                                   object_key,
                                                   file_path,
                                                   content_type='image/jpeg')
        else:
            ret = self.client.put_object_from_file(self.bucket, object_key,
                                                   file_path)
        print ret
        return True

    def get_upload_id(self, object_key):
        """
        断点续传获取upload_id
        """
        upload_id = None
        response = self.client.list_multipart_uploads(self.bucket)
        for item in response.uploads:
            if item.key == object_key:
                upload_id = item.upload_id
                break
        if not upload_id:
            upload_id = self.client.initiate_multipart_upload(
                self.bucket, object_key, content_type='video/mp4').upload_id
        return upload_id

    def multipart_upload(self, object_key, file_path):
        """
        断点续传
        """
        upload_id = self.get_upload_id(object_key)
        if not upload_id:
            return False
        left_size = os.path.getsize(file_path)
        offset, part_number, part_list = 0, 1, []
        while left_size > 0:
            # 设置每块为5MB
            part_size = 5 * 1024 * 1024
            if left_size < part_size:
                part_size = left_size
            for _ in (_ for _ in range(300)):
                try:
                    response = self.client.upload_part_from_file(
                        self.bucket, object_key, upload_id, part_number,
                        part_size, file_path, offset)
                    break
                except:
                    pass
            left_size -= part_size
            offset += part_size
            part_list.append({
                "partNumber": part_number,
                "eTag": response.metadata.etag
            })
            part_number += 1

        for _ in (_ for _ in range(300)):
            try:
                ret = self.client.complete_multipart_upload(
                    self.bucket, object_key, upload_id, part_list)
                print ret
                return False
            except:
                pass

    def upload_file(self, object_key, file_path):
        """
        上传文件
        """
        file_size = os.path.getsize(file_name)
        if file_size > 5 * 1024 * 1024:
            # 断点续传
            self.multipart_upload(object_key, file_path)
        else:
            # 一次性上传
            self.single_upload(object_key, file_path)