Ejemplo n.º 1
0
def get_bos_file_bytes_io(path):
    logger = logging.getLogger("baidubce.http.bce_http_client")
    logger.setLevel(logging.DEBUG)
    logging.info('Getting file from Baidu BOS...')

    bos_config = BceClientConfiguration(credentials=BceCredentials(
        access_key_id, secret_access_key),
                                        endpoint=bos_host)
    bos_client = BosClient(bos_config)
    content = bos_client.get_object_as_string(bucket_name=bucket_name,
                                              key=path)
    audio = io.BytesIO(content)  # this would auto seek(0)
    return audio
Ejemplo n.º 2
0
    bos_client.create_bucket(bucket_name)

    # list your buckets
    response = bos_client.list_buckets()
    for bucket in response.buckets:
        __logger.debug("[Sample] list buckets:%s", bucket.name)

    ######################################################################################################
    #            object operation samples
    ######################################################################################################

    # put a string as object
    bos_client.put_object_from_string(bucket_name, key, "This is string content.")

    # get a object as string
    content = bos_client.get_object_as_string(bucket_name, key)
    __logger.debug("[Sample] get object as string:%s", content)

    # put a file as object
    _create_file(file_name, 4096)
    bos_client.put_object_from_file(bucket_name, key, file_name)

    # get object into file
    bos_client.get_object_to_file(bucket_name, key, download)
    __logger.debug("[Sample] get object into file, file size:%s", os.path.getsize(download))

    # copy a object
    bos_client.copy_object(bucket_name, key, bucket_name, key + ".copy",)

    # list objects in a bucket(up to 1000)
    response = bos_client.list_objects(bucket_name)
Ejemplo n.º 3
0
    # list your buckets
    response = bos_client.list_buckets()
    for bucket in response.buckets:
        __logger.debug("[Sample] list buckets:%s", bucket.name)

    ######################################################################################################
    #            object operation samples
    ######################################################################################################

    # put a string as object
    bos_client.put_object_from_string(bucket_name, key,
                                      "This is string content.")

    # get a object as string
    content = bos_client.get_object_as_string(bucket_name, key)
    __logger.debug("[Sample] get object as string:%s", content)

    # put a file as object
    _create_file(file_name, 4096)
    bos_client.put_object_from_file(bucket_name, key, file_name)

    # get object into file
    bos_client.get_object_to_file(bucket_name, key, download)
    __logger.debug("[Sample] get object into file, file size:%s",
                   os.path.getsize(download))

    # copy a object
    bos_client.copy_object(
        bucket_name,
        key,
Ejemplo n.º 4
0
class BosFileSystem(object):
    def __init__(self):
        self.max_contents_count = 1
        self.max_contents_time = 1
        self.get_bos_config()
        self.bos_client = BosClient(self.config)
        self.file_length_map = {}

        self._file_contents_to_add = b''
        self._file_contents_count = 0
        self._start_append_time = time.time()

    def get_bos_config(self):
        bos_host = os.getenv("BOS_HOST")
        if not bos_host:
            raise KeyError('${BOS_HOST} is not found.')
        access_key_id = os.getenv("BOS_AK")
        if not access_key_id:
            raise KeyError('${BOS_AK} is not found.')
        secret_access_key = os.getenv("BOS_SK")
        if not secret_access_key:
            raise KeyError('${BOS_SK} is not found.')
        self.max_contents_count = int(os.getenv('BOS_CACHE_COUNT', 1))
        self.max_contents_time = int(os.getenv('BOS_CACHE_TIME', 1))
        bos_sts = os.getenv("BOS_STS")
        self.config = BceClientConfiguration(
            credentials=BceCredentials(access_key_id, secret_access_key),
            endpoint=bos_host, security_token=bos_sts)

    def isfile(self, filename):
        return exists(filename)

    def read_file(self, filename, binary=True):
        bucket_name, object_key = BosFileSystem._get_object_info(filename)
        result = self.bos_client.get_object_as_string(bucket_name, object_key)
        return result

    @staticmethod
    def _get_object_info(path):
        path = path[6:]
        index = path.index('/')
        bucket_name = path[0:index]
        object_key = path[index + 1:]
        return bucket_name, object_key

    def exists(self, path):
        bucket_name, object_key = BosFileSystem._get_object_info(path)
        try:
            self.bos_client.get_object_meta_data(bucket_name, object_key)
            return True
        except exception.BceError:
            return False

    def get_meta(self, bucket_name, object_key):
        return self.bos_client.get_object_meta_data(bucket_name, object_key)

    def makedirs(self, path):
        if not path.endswith('/'):
            path += '/'
        if self.exists(path):
            return
        bucket_name, object_key = BosFileSystem._get_object_info(path)
        if not object_key.endswith('/'):
            object_key += '/'
        init_data = b''
        self.bos_client.append_object(bucket_name=bucket_name,
                                      key=object_key,
                                      data=init_data,
                                      content_md5=content_md5(init_data),
                                      content_length=len(init_data))

    @staticmethod
    def join(path, *paths):
        result = os.path.join(path, *paths)
        result.replace('\\', '/')
        return result

    def read(self, filename, binary_mode=False, size=0, continue_from=None):
        bucket_name, object_key = BosFileSystem._get_object_info(filename)
        offset = 0
        if continue_from is not None:
            offset = continue_from.get("last_offset", 0)
        length = int(
            self.get_meta(bucket_name, object_key).metadata.content_length)
        if offset < length:
            data = self.bos_client.get_object_as_string(bucket_name=bucket_name,
                                                        key=object_key,
                                                        range=[offset,
                                                               length - 1])
        else:
            data = b''

        continue_from_token = {"last_offset": length}
        return data, continue_from_token

    def ready_to_append(self):
        if self._file_contents_count >= self.max_contents_count or \
                time.time() - self._start_append_time > self.max_contents_time:
            return True
        else:
            return False

    def append(self, filename, file_content, binary_mode=False, force=False):
        self._file_contents_to_add += file_content
        self._file_contents_count += 1

        if not force and not self.ready_to_append():
            return
        file_content = self._file_contents_to_add
        bucket_name, object_key = BosFileSystem._get_object_info(filename)
        if not self.exists(filename):
            init_data = b''
            self.bos_client.append_object(bucket_name=bucket_name,
                                          key=object_key,
                                          data=init_data,
                                          content_md5=content_md5(init_data),
                                          content_length=len(init_data))
        content_length = len(file_content)

        offset = self.get_meta(bucket_name, object_key).metadata.content_length
        self.bos_client.append_object(bucket_name=bucket_name,
                                      key=object_key,
                                      data=file_content,
                                      content_md5=content_md5(file_content),
                                      content_length=content_length,
                                      offset=offset)
        self._file_contents_to_add = b''
        self._file_contents_count = 0
        self._start_append_time = time.time()

    def write(self, filename, file_content, binary_mode=False):
        self.append(filename, file_content, binary_mode=False)

        # bucket_name, object_key = BosFileSystem._get_object_info(filename)
        #
        # self.bos_client.append_object(bucket_name=bucket_name,
        #                               key=object_key,
        #                               data=file_content,
        #                               content_md5=content_md5(file_content),
        #                               content_length=len(file_content))

    def walk(self, dir):
        class WalkGenerator():
            def __init__(self, bucket_name, contents):
                self.contents = None
                self.length = 0
                self.bucket = bucket_name
                self.handle_contents(contents)
                self.count = 0

            def handle_contents(self, contents):
                contents_map = {}
                for item in contents:
                    try:
                        rindex = item.rindex('/')
                        key = item[0:rindex]
                        value = item[rindex + 1:]
                    except ValueError:
                        key = '.'
                        value = item
                    if key in contents_map.keys():
                        contents_map[key].append(value)
                    else:
                        contents_map[key] = [value]
                temp_walk = []
                for key, value in contents_map.items():
                    temp_walk.append(
                        [BosFileSystem.join('bos://' + self.bucket, key), [],
                         value])
                self.length = len(temp_walk)
                self.contents = temp_walk

            def __iter__(self):
                return self

            def __next__(self):
                if self.count < self.length:
                    self.count += 1
                    return self.contents[self.count - 1]
                else:
                    raise StopIteration

        bucket_name, object_key = BosFileSystem._get_object_info(dir)

        if object_key in ['.', './']:
            prefix = None
        else:
            prefix = object_key if object_key.endswith(
                '/') else object_key + '/'
        response = self.bos_client.list_objects(bucket_name,
                                                prefix=prefix)
        contents = [content.key for content in response.contents]
        return WalkGenerator(bucket_name, contents)
Ejemplo n.º 5
0
    # list your buckets
    response = bos_client.list_buckets()
    for bucket in response.buckets:
        __logger.debug("[Sample] list buckets:%s", bucket.name)

    ######################################################################################################
    #            object operation samples
    ######################################################################################################

    # put a string as object
    bos_client.put_object_from_string(bucket_name, key,
                                      "This is string content.")

    # get a object as string
    content = bos_client.get_object_as_string(bucket_name, key)
    __logger.debug("[Sample] get object as string:%s", content)

    # put a file as object
    _create_file(file_name, 4096)
    bos_client.put_object_from_file(bucket_name, key, file_name)

    # get object into file
    bos_client.get_object_to_file(bucket_name, key, download)
    __logger.debug("[Sample] get object into file, file size:%s",
                   os.path.getsize(download))

    # put an appendable object
    append_key = 'test_append_key'
    result = bos_client.append_object_from_string(
        bucket_name=bucket_name,
Ejemplo n.º 6
0
class BosFileSystem(object):
    def __init__(self, write_flag=True):
        if write_flag:
            self.max_contents_count = 1
            self.max_contents_time = 1
            self.get_bos_config()
            self.bos_client = BosClient(self.config)
            self.file_length_map = {}

            self._file_contents_to_add = b''
            self._file_contents_count = 0
            self._start_append_time = time.time()

    def get_bos_config(self):
        bos_host = os.getenv("BOS_HOST")
        if not bos_host:
            raise KeyError('${BOS_HOST} is not found.')
        access_key_id = os.getenv("BOS_AK")
        if not access_key_id:
            raise KeyError('${BOS_AK} is not found.')
        secret_access_key = os.getenv("BOS_SK")
        if not secret_access_key:
            raise KeyError('${BOS_SK} is not found.')
        self.max_contents_count = int(os.getenv('BOS_CACHE_COUNT', 1))
        self.max_contents_time = int(os.getenv('BOS_CACHE_TIME', 1))
        bos_sts = os.getenv("BOS_STS")
        self.config = BceClientConfiguration(
            credentials=BceCredentials(access_key_id, secret_access_key),
            endpoint=bos_host,
            security_token=bos_sts)

    def set_bos_config(self, bos_ak, bos_sk, bos_sts,
                       bos_host="bj.bcebos.com"):
        self.config = BceClientConfiguration(
            credentials=BceCredentials(bos_ak, bos_sk),
            endpoint=bos_host,
            security_token=bos_sts)
        self.bos_client = BosClient(self.config)

    def renew_bos_client_from_server(self):
        import requests
        import json
        from visualdl.utils.dir import CONFIG_PATH
        with open(CONFIG_PATH, 'r') as fp:
            server_url = json.load(fp)['server_url']
        url = server_url + '/sts/'
        res = requests.post(url=url).json()
        err_code = res.get('code')
        msg = res.get('msg')
        if '000000' == err_code:
            sts_ak = msg.get('sts_ak')
            sts_sk = msg.get('sts_sk')
            sts_token = msg.get('token')
            self.set_bos_config(sts_ak, sts_sk, sts_token)
        else:
            print('Renew bos client error. Error msg: {}'.format(msg))
            return

    def isfile(self, filename):
        return exists(filename)

    def read_file(self, filename, binary=True):
        bucket_name, object_key = get_object_info(filename)
        result = self.bos_client.get_object_as_string(bucket_name, object_key)
        return result

    def exists(self, path):
        bucket_name, object_key = get_object_info(path)
        try:
            self.bos_client.get_object_meta_data(bucket_name, object_key)
            return True
        except exception.BceError:
            return False

    def get_meta(self, bucket_name, object_key):
        return self.bos_client.get_object_meta_data(bucket_name, object_key)

    def makedirs(self, path):
        if not path.endswith('/'):
            path += '/'
        if self.exists(path):
            return
        bucket_name, object_key = get_object_info(path)
        if not object_key.endswith('/'):
            object_key += '/'
        init_data = b''
        self.bos_client.append_object(
            bucket_name=bucket_name,
            key=object_key,
            data=init_data,
            content_md5=content_md5(init_data),
            content_length=len(init_data))

    @staticmethod
    def join(path, *paths):
        result = os.path.join(path, *paths)
        result.replace('\\', '/')
        return result

    def read(self, filename, binary_mode=False, size=0, continue_from=None):
        bucket_name, object_key = get_object_info(filename)
        offset = 0
        if continue_from is not None:
            offset = continue_from.get("last_offset", 0)
        length = int(
            self.get_meta(bucket_name, object_key).metadata.content_length)
        if offset < length:
            data = self.bos_client.get_object_as_string(
                bucket_name=bucket_name,
                key=object_key,
                range=[offset, length - 1])
        else:
            data = b''

        continue_from_token = {"last_offset": length}
        return data, continue_from_token

    def ready_to_append(self):
        if self._file_contents_count >= self.max_contents_count or \
                time.time() - self._start_append_time > self.max_contents_time:
            return True
        else:
            return False

    def append(self, filename, file_content, binary_mode=False, force=False):
        self._file_contents_to_add += file_content
        self._file_contents_count += 1

        if not force and not self.ready_to_append():
            return
        file_content = self._file_contents_to_add
        bucket_name, object_key = get_object_info(filename)
        if not self.exists(filename):
            init_data = b''
            try:
                self.bos_client.append_object(
                    bucket_name=bucket_name,
                    key=object_key,
                    data=init_data,
                    content_md5=content_md5(init_data),
                    content_length=len(init_data))
            except (exception.BceServerError, exception.BceHttpClientError):
                self.renew_bos_client_from_server()
                self.bos_client.append_object(
                    bucket_name=bucket_name,
                    key=object_key,
                    data=init_data,
                    content_md5=content_md5(init_data),
                    content_length=len(init_data))
                return
        content_length = len(file_content)

        try:
            offset = self.get_meta(bucket_name,
                                   object_key).metadata.content_length
            self.bos_client.append_object(
                bucket_name=bucket_name,
                key=object_key,
                data=file_content,
                content_md5=content_md5(file_content),
                content_length=content_length,
                offset=offset)
        except (exception.BceServerError, exception.BceHttpClientError):
            self.renew_bos_client_from_server()
            offset = self.get_meta(bucket_name,
                                   object_key).metadata.content_length
            self.bos_client.append_object(
                bucket_name=bucket_name,
                key=object_key,
                data=file_content,
                content_md5=content_md5(file_content),
                content_length=content_length,
                offset=offset)

        self._file_contents_to_add = b''
        self._file_contents_count = 0
        self._start_append_time = time.time()

    def write(self, filename, file_content, binary_mode=False):
        self.append(filename, file_content, binary_mode=False)

        # bucket_name, object_key = BosFileSystem._get_object_info(filename)
        #
        # self.bos_client.append_object(bucket_name=bucket_name,
        #                               key=object_key,
        #                               data=file_content,
        #                               content_md5=content_md5(file_content),
        #                               content_length=len(file_content))

    def walk(self, dir):
        class WalkGenerator():
            def __init__(self, bucket_name, contents):
                self.contents = None
                self.length = 0
                self.bucket = bucket_name
                self.handle_contents(contents)
                self.count = 0

            def handle_contents(self, contents):
                contents_map = {}
                for item in contents:
                    try:
                        rindex = item.rindex('/')
                        key = item[0:rindex]
                        value = item[rindex + 1:]
                    except ValueError:
                        key = '.'
                        value = item
                    if key in contents_map.keys():
                        contents_map[key].append(value)
                    else:
                        contents_map[key] = [value]
                temp_walk = []
                for key, value in contents_map.items():
                    temp_walk.append([
                        BosFileSystem.join('bos://' + self.bucket, key), [],
                        value
                    ])
                self.length = len(temp_walk)
                self.contents = temp_walk

            def __iter__(self):
                return self

            def __next__(self):
                if self.count < self.length:
                    self.count += 1
                    return self.contents[self.count - 1]
                else:
                    raise StopIteration

        bucket_name, object_key = get_object_info(dir)

        if object_key in ['.', './']:
            prefix = None
        else:
            prefix = object_key if object_key.endswith(
                '/') else object_key + '/'
        response = self.bos_client.list_objects(bucket_name, prefix=prefix)
        contents = [content.key for content in response.contents]
        return WalkGenerator(bucket_name, contents)