예제 #1
0
파일: slo.py 프로젝트: pchng/swift
    def get_or_head_response(self, req, resp_headers, resp_iter):
        with closing_if_possible(resp_iter):
            resp_body = "".join(resp_iter)
        try:
            segments = json.loads(resp_body)
        except ValueError:
            segments = []

        etag = md5()
        content_length = 0
        for seg_dict in segments:
            if seg_dict.get("range"):
                etag.update("%s:%s;" % (seg_dict["hash"], seg_dict["range"]))
            else:
                etag.update(seg_dict["hash"])

            if config_true_value(seg_dict.get("sub_slo")):
                override_bytes_from_content_type(seg_dict, logger=self.slo.logger)
            content_length += self._segment_length(seg_dict)

        response_headers = [(h, v) for h, v in resp_headers if h.lower() not in ("etag", "content-length")]
        response_headers.append(("Content-Length", str(content_length)))
        response_headers.append(("Etag", '"%s"' % etag.hexdigest()))

        if req.method == "HEAD":
            return self._manifest_head_response(req, response_headers)
        else:
            return self._manifest_get_response(req, content_length, response_headers, segments)
예제 #2
0
    def get_or_head_response(self, req, resp_headers, resp_iter):
        segments = self._get_manifest_read(resp_iter)

        etag = md5()
        content_length = 0
        for seg_dict in segments:
            if seg_dict.get('range'):
                etag.update('%s:%s;' % (seg_dict['hash'], seg_dict['range']))
            else:
                etag.update(seg_dict['hash'])

            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(
                    seg_dict, logger=self.slo.logger)
            content_length += self._segment_length(seg_dict)

        response_headers = [(h, v) for h, v in resp_headers
                            if h.lower() not in ('etag', 'content-length')]
        response_headers.append(('Content-Length', str(content_length)))
        response_headers.append(('Etag', '"%s"' % etag.hexdigest()))

        if req.method == 'HEAD':
            return self._manifest_head_response(req, response_headers)
        else:
            return self._manifest_get_response(
                req, content_length, response_headers, segments)
예제 #3
0
    def update_data_record(self, record, versions=False):
        if 'subdir' in record:
            return {'subdir': record['name']}

        props = record.get('properties', {})
        # This metadata is added by encryption middleware.
        if 'x-object-sysmeta-container-update-override-etag' in props:
            hash_ = props['x-object-sysmeta-container-update-override-etag']
        else:
            hash_ = record.get('hash')
            if hash_ is not None:
                hash_ = hash_.lower()

        response = {
            'name': record['name'],
            'bytes': record['size'],
            'last_modified': Timestamp(record['mtime']).isoformat,
            'content_type': record.get('mime_type', 'application/octet-stream')
        }
        if hash_:
            response['hash'] = hash_
        else:
            response['hash'] = ''
        if record.get('deleted', False):
            response['content_type'] = DELETE_MARKER_CONTENT_TYPE
        if versions:
            response['version'] = record.get('version', 'null')
        override_bytes_from_content_type(response)
        return response
예제 #4
0
파일: server.py 프로젝트: bigbang009/swift
    def update_data_record(self, record):
        """
        Perform any mutations to container listing records that are common to
        all serialization formats, and returns it as a dict.

        Converts created time to iso timestamp.
        Replaces size with 'swift_bytes' content type parameter.

        :params record: object entry record
        :returns: modified record
        """
        if isinstance(record, ShardRange):
            created = record.timestamp
            response = dict(record)
        else:
            (name, created, size, content_type, etag) = record[:5]
            name_ = name.decode('utf8') if six.PY2 else name
            if content_type is None:
                return {'subdir': name_}
            response = {
                'bytes': size,
                'hash': etag,
                'name': name_,
                'content_type': content_type
            }
            override_bytes_from_content_type(response, logger=self.logger)
        response['last_modified'] = Timestamp(created).isoformat
        return response
예제 #5
0
    def get_or_head_response(self, req, resp_headers, resp_iter):
        with closing_if_possible(resp_iter):
            resp_body = ''.join(resp_iter)
        try:
            segments = json.loads(resp_body)
        except ValueError:
            segments = []

        etag = md5()
        content_length = 0
        for seg_dict in segments:
            if seg_dict.get('range'):
                etag.update('%s:%s;' % (seg_dict['hash'], seg_dict['range']))
            else:
                etag.update(seg_dict['hash'])

            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(
                    seg_dict, logger=self.slo.logger)
            content_length += self._segment_length(seg_dict)

        response_headers = [(h, v) for h, v in resp_headers
                            if h.lower() not in ('etag', 'content-length')]
        response_headers.append(('Content-Length', str(content_length)))
        response_headers.append(('Etag', '"%s"' % etag.hexdigest()))

        if req.method == 'HEAD':
            return self._manifest_head_response(req, response_headers)
        else:
            return self._manifest_get_response(
                req, content_length, response_headers, segments)
예제 #6
0
    def get_or_head_response(self, req, resp_headers, resp_iter):
        resp_body = ''.join(resp_iter)
        try:
            segments = json.loads(resp_body)
        except ValueError:
            segments = []

        etag = md5()
        content_length = 0
        for seg_dict in segments:
            etag.update(seg_dict['hash'])

            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(seg_dict,
                                                 logger=self.slo.logger)
            content_length += int(seg_dict['bytes'])

        response_headers = [(h, v) for h, v in resp_headers
                            if h.lower() not in ('etag', 'content-length')]
        response_headers.append(('Content-Length', str(content_length)))
        response_headers.append(('Etag', '"%s"' % etag.hexdigest()))

        if req.method == 'HEAD':
            return self._manifest_head_response(req, response_headers)
        else:
            return self._manifest_get_response(req, content_length,
                                               response_headers, segments)
예제 #7
0
    def update_data_record(self, record, list_meta=False):
        """
        Perform any mutations to container listing records that are common to
        all serialization formats, and returns it as a dict.

        Converts created time to iso timestamp.
        Replaces size with 'swift_bytes' content type parameter.

        :params record: object entry record
        :returns: modified record
        """
        (name, created, size, content_type, etag, metadata) = record
        if content_type is None:
            return {'subdir': name}
        response = {'bytes': size, 'hash': etag, 'name': name,
                    'content_type': content_type}
        if list_meta:
            metadata = json.loads(metadata)
            utf8encodekeys(metadata)
            response['metadata'] = metadata
        last_modified = datetime.utcfromtimestamp(float(created)).isoformat()
        # python isoformat() doesn't include msecs when zero
        if len(last_modified) < len("1970-01-01T00:00:00.000000"):
            last_modified += ".000000"
        response['last_modified'] = last_modified
        override_bytes_from_content_type(response, logger=self.logger)
        return response
예제 #8
0
파일: server.py 프로젝트: openstack/swift
    def update_data_record(self, record):
        """
        Perform any mutations to container listing records that are common to
        all serialization formats, and returns it as a dict.

        Converts created time to iso timestamp.
        Replaces size with 'swift_bytes' content type parameter.

        :params record: object entry record
        :returns: modified record
        """
        if isinstance(record, ShardRange):
            created = record.timestamp
            response = dict(record)
        else:
            (name, created, size, content_type, etag) = record[:5]
            name_ = name.decode('utf8') if six.PY2 else name
            if content_type is None:
                return {'subdir': name_}
            response = {
                'bytes': size, 'hash': etag, 'name': name_,
                'content_type': content_type}
            override_bytes_from_content_type(response, logger=self.logger)
        response['last_modified'] = Timestamp(created).isoformat
        return response
예제 #9
0
파일: slo.py 프로젝트: stjordanis/oio-swift
    def get_or_head_response(self, req, resp_headers, resp_iter):
        segments = self._get_manifest_read(resp_iter)
        slo_etag = None
        content_length = None
        response_headers = []
        for header, value in resp_headers:
            lheader = header.lower()
            if lheader not in ('etag', 'content-length'):
                response_headers.append((header, value))

            if lheader == SYSMETA_SLO_ETAG:
                slo_etag = value
            elif lheader == SYSMETA_SLO_SIZE:
                # it's from sysmeta, so we don't worry about non-integer
                # values here
                content_length = int(value)

        # Prep to calculate content_length & etag if necessary
        if slo_etag is None:
            calculated_etag = md5()
        if content_length is None:
            calculated_content_length = 0

        for seg_dict in segments:
            # Decode any inlined data; it's important that we do this *before*
            # calculating the segment length and etag
            if 'data' in seg_dict:
                seg_dict['raw_data'] = base64.b64decode(seg_dict.pop('data'))

            if slo_etag is None:
                if 'raw_data' in seg_dict:
                    calculated_etag.update(
                        md5(seg_dict['raw_data']).hexdigest())
                elif seg_dict.get('range'):
                    calculated_etag.update(
                        '%s:%s;' % (seg_dict['hash'], seg_dict['range']))
                else:
                    calculated_etag.update(seg_dict['hash'])

            if content_length is None:
                if config_true_value(seg_dict.get('sub_slo')):
                    override_bytes_from_content_type(seg_dict,
                                                     logger=self.slo.logger)
                calculated_content_length += self._segment_length(seg_dict)

        if slo_etag is None:
            slo_etag = calculated_etag.hexdigest()
        if content_length is None:
            content_length = calculated_content_length

        response_headers.append(('Content-Length', str(content_length)))
        response_headers.append(('Etag', '"%s"' % slo_etag))

        if req.method == 'HEAD':
            return self._manifest_head_response(req, response_headers)
        else:
            return self._manifest_get_response(req, content_length,
                                               response_headers, segments)
예제 #10
0
파일: slo.py 프로젝트: AfonsoFGarcia/swift
    def _segment_listing_iterator(self, req, version, account, segments,
                                  recursion_depth=1):
        for seg_dict in segments:
            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(seg_dict,
                                                 logger=self.slo.logger)

        # We handle the range stuff here so that we can be smart about
        # skipping unused submanifests. For example, if our first segment is a
        # submanifest referencing 50 MiB total, but self.first_byte falls in
        # the 51st MiB, then we can avoid fetching the first submanifest.
        #
        # If we were to make SegmentedIterable handle all the range
        # calculations, we would be unable to make this optimization.
        total_length = sum(int(seg['bytes']) for seg in segments)
        if self.first_byte is None:
            self.first_byte = 0
        if self.last_byte is None:
            self.last_byte = total_length - 1

        for seg_dict in segments:
            seg_length = int(seg_dict['bytes'])

            if self.first_byte >= seg_length:
                # don't need any bytes from this segment
                self.first_byte = max(self.first_byte - seg_length, -1)
                self.last_byte = max(self.last_byte - seg_length, -1)
                continue

            if self.last_byte < 0:
                # no bytes are needed from this or any future segment
                break

            if config_true_value(seg_dict.get('sub_slo')):
                # do this check here so that we can avoid fetching this last
                # manifest before raising the exception
                if recursion_depth >= self.max_slo_recursion_depth:
                    raise ListingIterError("Max recursion depth exceeded")

                sub_path = get_valid_utf8_str(seg_dict['name'])
                sub_cont, sub_obj = split_path(sub_path, 2, 2, True)
                sub_segments = self._fetch_sub_slo_segments(
                    req, version, account, sub_cont, sub_obj)
                for sub_seg_dict, sb, eb in self._segment_listing_iterator(
                        req, version, account, sub_segments,
                        recursion_depth=recursion_depth + 1):
                    yield sub_seg_dict, sb, eb
            else:
                if isinstance(seg_dict['name'], unicode):
                    seg_dict['name'] = seg_dict['name'].encode("utf-8")
                seg_length = int(seg_dict['bytes'])
                yield (seg_dict,
                       (None if self.first_byte <= 0 else self.first_byte),
                       (None if self.last_byte >=
                        seg_length - 1 else self.last_byte))
                self.first_byte = max(self.first_byte - seg_length, -1)
                self.last_byte = max(self.last_byte - seg_length, -1)
예제 #11
0
    def update_data_record(self, record):
        if 'subdir' in record:
            return {'subdir': record['name']}

        sysmeta = extract_sysmeta(record.get('system_metadata', None))
        response = {'name': record['name'],
                    'bytes': record['size'],
                    'hash': record['hash'].lower(),
                    'last_modified': Timestamp(record['ctime']).isoformat,
                    'content_type': sysmeta.get('mime-type',
                                                'application/octet-stream')}
        override_bytes_from_content_type(response)
        return response
예제 #12
0
    def update_data_record(self, record, versions=False):
        if 'subdir' in record:
            return {'subdir': record['name']}

        response = {'name': record['name'],
                    'bytes': record['size'],
                    'hash': record['hash'].lower(),
                    'last_modified': Timestamp(record['ctime']).isoformat,
                    'content_type': record.get(
                        'mime_type', 'application/octet-stream')}
        if record.get('deleted', False):
            response['content_type'] = DELETE_MARKER_CONTENT_TYPE
        if versions:
            response['version'] = record.get('version', 'null')
        override_bytes_from_content_type(response)
        return response
예제 #13
0
파일: slo.py 프로젝트: bebule/swift
    def get_or_head_response(self, req, resp_headers, resp_iter):
        segments = self._get_manifest_read(resp_iter)

        slo_etag = None
        content_length = None
        response_headers = []
        for header, value in resp_headers:
            lheader = header.lower()
            if lheader == SYSMETA_SLO_ETAG:
                slo_etag = value
            elif lheader == SYSMETA_SLO_SIZE:
                content_length = value
            elif lheader not in ('etag', 'content-length'):
                response_headers.append((header, value))

        if slo_etag is None or content_length is None:
            etag = md5()
            content_length = 0
            for seg_dict in segments:
                if seg_dict.get('range'):
                    etag.update('%s:%s;' % (seg_dict['hash'],
                                            seg_dict['range']))
                else:
                    etag.update(seg_dict['hash'])

                if config_true_value(seg_dict.get('sub_slo')):
                    override_bytes_from_content_type(
                        seg_dict, logger=self.slo.logger)
                content_length += self._segment_length(seg_dict)
            slo_etag = etag.hexdigest()

        response_headers.append(('Content-Length', str(content_length)))
        response_headers.append(('Etag', '"%s"' % slo_etag))

        if req.method == 'HEAD':
            return self._manifest_head_response(req, response_headers)
        else:
            return self._manifest_get_response(
                req, content_length, response_headers, segments)
예제 #14
0
    def get_or_head_response(self, req, resp_headers, resp_iter):
        segments = self._get_manifest_read(resp_iter)

        slo_etag = None
        content_length = None
        response_headers = []
        for header, value in resp_headers:
            lheader = header.lower()
            if lheader == SYSMETA_SLO_ETAG:
                slo_etag = value
            elif lheader == SYSMETA_SLO_SIZE:
                content_length = value
            elif lheader not in ('etag', 'content-length'):
                response_headers.append((header, value))

        if slo_etag is None or content_length is None:
            etag = md5()
            content_length = 0
            for seg_dict in segments:
                if seg_dict.get('range'):
                    etag.update('%s:%s;' %
                                (seg_dict['hash'], seg_dict['range']))
                else:
                    etag.update(seg_dict['hash'])

                if config_true_value(seg_dict.get('sub_slo')):
                    override_bytes_from_content_type(seg_dict,
                                                     logger=self.slo.logger)
                content_length += self._segment_length(seg_dict)
            slo_etag = etag.hexdigest()

        response_headers.append(('Content-Length', str(content_length)))
        response_headers.append(('Etag', '"%s"' % slo_etag))

        if req.method == 'HEAD':
            return self._manifest_head_response(req, response_headers)
        else:
            return self._manifest_get_response(req, content_length,
                                               response_headers, segments)
예제 #15
0
파일: slo.py 프로젝트: bebule/swift
    def _segment_listing_iterator(self, req, version, account, segments,
                                  byteranges):
        for seg_dict in segments:
            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(seg_dict,
                                                 logger=self.slo.logger)

        # We handle the range stuff here so that we can be smart about
        # skipping unused submanifests. For example, if our first segment is a
        # submanifest referencing 50 MiB total, but start_byte falls in
        # the 51st MiB, then we can avoid fetching the first submanifest.
        #
        # If we were to make SegmentedIterable handle all the range
        # calculations, we would be unable to make this optimization.
        total_length = sum(self._segment_length(seg) for seg in segments)
        if not byteranges:
            byteranges = [(0, total_length - 1)]

        # Cache segments from sub-SLOs in case more than one byterange
        # includes data from a particular sub-SLO. We only cache a few sets
        # of segments so that a malicious user cannot build a giant SLO tree
        # and then GET it to run the proxy out of memory.
        #
        # LRUCache is a little awkward to use this way, but it beats doing
        # things manually.
        #
        # 20 is sort of an arbitrary choice; it's twice our max recursion
        # depth, so we know this won't expand memory requirements by too
        # much.
        cached_fetch_sub_slo_segments = \
            LRUCache(maxsize=20)(self._fetch_sub_slo_segments)

        for first_byte, last_byte in byteranges:
            byterange_listing_iter = self._byterange_listing_iterator(
                req, version, account, segments, first_byte, last_byte,
                cached_fetch_sub_slo_segments)
            for seg_info in byterange_listing_iter:
                yield seg_info
예제 #16
0
    def _segment_listing_iterator(self, req, version, account, segments,
                                  byteranges):
        for seg_dict in segments:
            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(seg_dict,
                                                 logger=self.slo.logger)

        # We handle the range stuff here so that we can be smart about
        # skipping unused submanifests. For example, if our first segment is a
        # submanifest referencing 50 MiB total, but start_byte falls in
        # the 51st MiB, then we can avoid fetching the first submanifest.
        #
        # If we were to make SegmentedIterable handle all the range
        # calculations, we would be unable to make this optimization.
        total_length = sum(self._segment_length(seg) for seg in segments)
        if not byteranges:
            byteranges = [(0, total_length - 1)]

        # Cache segments from sub-SLOs in case more than one byterange
        # includes data from a particular sub-SLO. We only cache a few sets
        # of segments so that a malicious user cannot build a giant SLO tree
        # and then GET it to run the proxy out of memory.
        #
        # LRUCache is a little awkward to use this way, but it beats doing
        # things manually.
        #
        # 20 is sort of an arbitrary choice; it's twice our max recursion
        # depth, so we know this won't expand memory requirements by too
        # much.
        cached_fetch_sub_slo_segments = \
            LRUCache(maxsize=20)(self._fetch_sub_slo_segments)

        for first_byte, last_byte in byteranges:
            byterange_listing_iter = self._byterange_listing_iterator(
                req, version, account, segments, first_byte, last_byte,
                cached_fetch_sub_slo_segments)
            for seg_info in byterange_listing_iter:
                yield seg_info
예제 #17
0
    def update_data_record(self, record, versions=False):
        if 'subdir' in record:
            return {'subdir': record['name']}

        props = record.get('properties', {})
        # This metadata is added by encryption middleware.
        if 'x-object-sysmeta-container-update-override-etag' in props:
            hash_ = props['x-object-sysmeta-container-update-override-etag']
        else:
            hash_ = record['hash'].lower()

        response = {'name': record['name'],
                    'bytes': record['size'],
                    'hash': hash_,
                    'last_modified': Timestamp(record['ctime']).isoformat,
                    'content_type': record.get(
                        'mime_type', 'application/octet-stream')}
        if record.get('deleted', False):
            response['content_type'] = DELETE_MARKER_CONTENT_TYPE
        if versions:
            response['version'] = record.get('version', 'null')
        override_bytes_from_content_type(response)
        return response
예제 #18
0
    def _segment_listing_iterator(self, req, version, account, segments,
                                  recursion_depth=1):
        for seg_dict in segments:
            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(seg_dict,
                                                 logger=self.slo.logger)

        # We handle the range stuff here so that we can be smart about
        # skipping unused submanifests. For example, if our first segment is a
        # submanifest referencing 50 MiB total, but start_byte falls in
        # the 51st MiB, then we can avoid fetching the first submanifest.
        #
        # If we were to make SegmentedIterable handle all the range
        # calculations, we would be unable to make this optimization.
        total_length = sum(self._segment_length(seg) for seg in segments)
        if self.first_byte is None:
            self.first_byte = 0
        if self.last_byte is None:
            self.last_byte = total_length - 1

        last_sub_path = None
        for seg_dict in segments:
            seg_length = self._segment_length(seg_dict)
            if self.first_byte >= seg_length:
                # don't need any bytes from this segment
                self.first_byte -= seg_length
                self.last_byte -= seg_length
                continue

            if self.last_byte < 0:
                # no bytes are needed from this or any future segment
                break

            range = seg_dict.get('range')
            if range is None:
                range_start, range_end = 0, seg_length - 1
            else:
                # We already validated and supplied concrete values
                # for the range on upload
                range_start, range_end = map(int, range.split('-'))

            if config_true_value(seg_dict.get('sub_slo')):
                # do this check here so that we can avoid fetching this last
                # manifest before raising the exception
                if recursion_depth >= self.max_slo_recursion_depth:
                    raise ListingIterError("Max recursion depth exceeded")

                sub_path = get_valid_utf8_str(seg_dict['name'])
                sub_cont, sub_obj = split_path(sub_path, 2, 2, True)
                if last_sub_path != sub_path:
                    sub_segments = self._fetch_sub_slo_segments(
                        req, version, account, sub_cont, sub_obj)
                last_sub_path = sub_path

                # Use the existing machinery to slice into the sub-SLO.
                # This requires that we save off our current state, and
                # restore at the other end.
                orig_start, orig_end = self.first_byte, self.last_byte
                self.first_byte = range_start + max(0, self.first_byte)
                self.last_byte = min(range_end, range_start + self.last_byte)

                for sub_seg_dict, sb, eb in self._segment_listing_iterator(
                        req, version, account, sub_segments,
                        recursion_depth=recursion_depth + 1):
                    yield sub_seg_dict, sb, eb

                # Restore the first/last state
                self.first_byte, self.last_byte = orig_start, orig_end
            else:
                if isinstance(seg_dict['name'], six.text_type):
                    seg_dict['name'] = seg_dict['name'].encode("utf-8")
                yield (seg_dict,
                       max(0, self.first_byte) + range_start,
                       min(range_end, range_start + self.last_byte))

            self.first_byte -= seg_length
            self.last_byte -= seg_length
예제 #19
0
    def _segment_listing_iterator(self,
                                  req,
                                  version,
                                  account,
                                  segments,
                                  recursion_depth=1):
        for seg_dict in segments:
            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(seg_dict,
                                                 logger=self.slo.logger)

        # We handle the range stuff here so that we can be smart about
        # skipping unused submanifests. For example, if our first segment is a
        # submanifest referencing 50 MiB total, but self.first_byte falls in
        # the 51st MiB, then we can avoid fetching the first submanifest.
        #
        # If we were to make SegmentedIterable handle all the range
        # calculations, we would be unable to make this optimization.
        total_length = sum(int(seg['bytes']) for seg in segments)
        if self.first_byte is None:
            self.first_byte = 0
        if self.last_byte is None:
            self.last_byte = total_length - 1

        for seg_dict in segments:
            seg_length = int(seg_dict['bytes'])

            if self.first_byte >= seg_length:
                # don't need any bytes from this segment
                self.first_byte = max(self.first_byte - seg_length, -1)
                self.last_byte = max(self.last_byte - seg_length, -1)
                continue

            if self.last_byte < 0:
                # no bytes are needed from this or any future segment
                break

            if config_true_value(seg_dict.get('sub_slo')):
                # do this check here so that we can avoid fetching this last
                # manifest before raising the exception
                if recursion_depth >= self.max_slo_recursion_depth:
                    raise ListingIterError("Max recursion depth exceeded")

                sub_path = get_valid_utf8_str(seg_dict['name'])
                sub_cont, sub_obj = split_path(sub_path, 2, 2, True)
                sub_segments = self._fetch_sub_slo_segments(
                    req, version, account, sub_cont, sub_obj)
                for sub_seg_dict, sb, eb in self._segment_listing_iterator(
                        req,
                        version,
                        account,
                        sub_segments,
                        recursion_depth=recursion_depth + 1):
                    yield sub_seg_dict, sb, eb
            else:
                if isinstance(seg_dict['name'], unicode):
                    seg_dict['name'] = seg_dict['name'].encode("utf-8")
                seg_length = int(seg_dict['bytes'])
                yield (seg_dict,
                       (None if self.first_byte <= 0 else self.first_byte),
                       (None if self.last_byte >= seg_length - 1 else
                        self.last_byte))
                self.first_byte = max(self.first_byte - seg_length, -1)
                self.last_byte = max(self.last_byte - seg_length, -1)
예제 #20
0
    def _segment_listing_iterator(self, req, version, account, segments,
                                  recursion_depth=1):
        for seg_dict in segments:
            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(seg_dict,
                                                 logger=self.slo.logger)

        # We handle the range stuff here so that we can be smart about
        # skipping unused submanifests. For example, if our first segment is a
        # submanifest referencing 50 MiB total, but start_byte falls in
        # the 51st MiB, then we can avoid fetching the first submanifest.
        #
        # If we were to make SegmentedIterable handle all the range
        # calculations, we would be unable to make this optimization.
        total_length = sum(self._segment_length(seg) for seg in segments)
        if self.first_byte is None:
            self.first_byte = 0
        if self.last_byte is None:
            self.last_byte = total_length - 1

        last_sub_path = None
        for seg_dict in segments:
            seg_length = self._segment_length(seg_dict)
            if self.first_byte >= seg_length:
                # don't need any bytes from this segment
                self.first_byte -= seg_length
                self.last_byte -= seg_length
                continue

            if self.last_byte < 0:
                # no bytes are needed from this or any future segment
                break

            seg_range = seg_dict.get('range')
            if seg_range is None:
                range_start, range_end = 0, seg_length - 1
            else:
                # We already validated and supplied concrete values
                # for the range on upload
                range_start, range_end = map(int, seg_range.split('-'))

            if config_true_value(seg_dict.get('sub_slo')):
                # do this check here so that we can avoid fetching this last
                # manifest before raising the exception
                if recursion_depth >= self.max_slo_recursion_depth:
                    raise ListingIterError("Max recursion depth exceeded")

                sub_path = get_valid_utf8_str(seg_dict['name'])
                sub_cont, sub_obj = split_path(sub_path, 2, 2, True)
                if last_sub_path != sub_path:
                    sub_segments = self._fetch_sub_slo_segments(
                        req, version, account, sub_cont, sub_obj)
                last_sub_path = sub_path

                # Use the existing machinery to slice into the sub-SLO.
                # This requires that we save off our current state, and
                # restore at the other end.
                orig_start, orig_end = self.first_byte, self.last_byte
                self.first_byte = range_start + max(0, self.first_byte)
                self.last_byte = min(range_end, range_start + self.last_byte)

                for sub_seg_dict, sb, eb in self._segment_listing_iterator(
                        req, version, account, sub_segments,
                        recursion_depth=recursion_depth + 1):
                    yield sub_seg_dict, sb, eb

                # Restore the first/last state
                self.first_byte, self.last_byte = orig_start, orig_end
            else:
                if isinstance(seg_dict['name'], six.text_type):
                    seg_dict['name'] = seg_dict['name'].encode("utf-8")
                yield (seg_dict,
                       max(0, self.first_byte) + range_start,
                       min(range_end, range_start + self.last_byte))

            self.first_byte -= seg_length
            self.last_byte -= seg_length