Example #1
0
 def test_get_response_body(self):
     txt_body = bulk.get_response_body(
         'bad_formay', {'hey': 'there'}, [['json > xml', '202 Accepted']])
     self.assert_('hey: there' in txt_body)
     xml_body = bulk.get_response_body(
         'text/xml', {'hey': 'there'}, [['json > xml', '202 Accepted']])
     self.assert_('&gt' in xml_body)
Example #2
0
 def test_get_response_body(self):
     txt_body = bulk.get_response_body('bad_formay', {'hey': 'there'},
                                       [['json > xml', '202 Accepted']])
     self.assertTrue('hey: there' in txt_body)
     xml_body = bulk.get_response_body('text/xml', {'hey': 'there'},
                                       [['json > xml', '202 Accepted']])
     self.assertTrue('&gt' in xml_body)
Example #3
0
 def test_get_response_body(self):
     txt_body = bulk.get_response_body('bad_formay', {'hey': 'there'},
                                       [['json > xml', '202 Accepted']],
                                       "doesn't matter for text")
     self.assertIn(b'hey: there', txt_body)
     xml_body = bulk.get_response_body('text/xml', {'hey': 'there'},
                                       [['json > xml', '202 Accepted']],
                                       'root_tag')
     self.assertIn(b'&gt', xml_body)
     self.assertTrue(xml_body.startswith(b'<root_tag>\n'))
     self.assertTrue(xml_body.endswith(b'\n</root_tag>\n'))
Example #4
0
 def test_get_response_body(self):
     txt_body = bulk.get_response_body(
         'bad_formay', {'hey': 'there'}, [['json > xml', '202 Accepted']],
         "doesn't matter for text")
     self.assertTrue('hey: there' in txt_body)
     xml_body = bulk.get_response_body(
         'text/xml', {'hey': 'there'}, [['json > xml', '202 Accepted']],
         'root_tag')
     self.assertTrue('&gt' in xml_body)
     self.assertTrue(xml_body.startswith('<root_tag>\n'))
     self.assertTrue(xml_body.endswith('\n</root_tag>\n'))
Example #5
0
    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge(
                "Manifest File > %d bytes" % self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_and_validate_input(
            req.body_file.read(self.max_manifest_size),
            req.path, self.min_segment_size)
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of segments must be <= %d' %
                self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        slo_etag = md5()
        last_obj_path = None
        for index, seg_dict in enumerate(parsed_data):
            obj_name = seg_dict['path']
            if isinstance(obj_name, six.text_type):
                obj_name = obj_name.encode('utf-8')
            obj_path = '/'.join(['', vrs, account, obj_name.lstrip('/')])

            new_env = req.environ.copy()
            new_env['PATH_INFO'] = obj_path
            new_env['REQUEST_METHOD'] = 'HEAD'
            new_env['swift.source'] = 'SLO'
            del(new_env['wsgi.input'])
            del(new_env['QUERY_STRING'])
            new_env['CONTENT_LENGTH'] = 0
            new_env['HTTP_USER_AGENT'] = \
                '%s MultipartPUT' % req.environ.get('HTTP_USER_AGENT')
            if obj_path != last_obj_path:
                last_obj_path = obj_path
                head_seg_resp = \
                    Request.blank(obj_path, new_env).get_response(self)

            if head_seg_resp.is_success:
                segment_length = head_seg_resp.content_length
                if seg_dict.get('range'):
                    # Since we now know the length, we can normalize the
                    # range. We know that there is exactly one range
                    # requested since we checked that earlier in
                    # parse_and_validate_input().
                    ranges = seg_dict['range'].ranges_for_length(
                        head_seg_resp.content_length)

                    if not ranges:
                        problem_segments.append([quote(obj_name),
                                                 'Unsatisfiable Range'])
                    elif ranges == [(0, head_seg_resp.content_length)]:
                        # Just one range, and it exactly matches the object.
                        # Why'd we do this again?
                        del seg_dict['range']
                        segment_length = head_seg_resp.content_length
                    else:
                        rng = ranges[0]
                        seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1)
                        segment_length = rng[1] - rng[0]

                if segment_length < self.min_segment_size and \
                        index < len(parsed_data) - 1:
                    problem_segments.append(
                        [quote(obj_name),
                         'Too small; each segment, except the last, must be '
                         'at least %d bytes.' % self.min_segment_size])
                total_size += segment_length
                if seg_dict['size_bytes'] is not None and \
                        seg_dict['size_bytes'] != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_name), 'Size Mismatch'])
                if seg_dict['etag'] is None or \
                        seg_dict['etag'] == head_seg_resp.etag:
                    if seg_dict.get('range'):
                        slo_etag.update('%s:%s;' % (head_seg_resp.etag,
                                                    seg_dict['range']))
                    else:
                        slo_etag.update(head_seg_resp.etag)
                else:
                    problem_segments.append([quote(obj_name), 'Etag Mismatch'])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = \
                    last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
                seg_data = {'name': '/' + seg_dict['path'].lstrip('/'),
                            'bytes': head_seg_resp.content_length,
                            'hash': head_seg_resp.etag,
                            'content_type': head_seg_resp.content_type,
                            'last_modified': last_modified_formatted}
                if seg_dict.get('range'):
                    seg_data['range'] = seg_dict['range']

                if config_true_value(
                        head_seg_resp.headers.get('X-Static-Large-Object')):
                    seg_data['sub_slo'] = True
                data_for_storage.append(seg_data)

            else:
                problem_segments.append([quote(obj_name),
                                         head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(
                out_content_type, {}, problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overridden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size
        env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True'
        json_data = json.dumps(data_for_storage)
        if six.PY3:
            json_data = json_data.encode('utf-8')
        env['CONTENT_LENGTH'] = str(len(json_data))
        env['wsgi.input'] = BytesIO(json_data)

        slo_put_context = SloPutContext(self, slo_etag)
        return slo_put_context.handle_slo_put(req, start_response)
Example #6
0
    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" %
                                            self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_and_validate_input(
            req.body_file.read(self.max_manifest_size), req.path)
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of segments must be <= %d' %
                self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        path2indices = defaultdict(list)
        for index, seg_dict in enumerate(parsed_data):
            path2indices[seg_dict['path']].append(index)

        def do_head(obj_name):
            obj_path = '/'.join(
                ['', vrs, account,
                 get_valid_utf8_str(obj_name).lstrip('/')])

            sub_req = make_subrequest(
                req.environ,
                path=obj_path + '?',  # kill the query string
                method='HEAD',
                headers={'x-auth-token': req.headers.get('x-auth-token')},
                agent='%(orig)s SLO MultipartPUT',
                swift_source='SLO')
            return obj_name, sub_req.get_response(self)

        def validate_seg_dict(seg_dict, head_seg_resp):
            if not head_seg_resp.is_success:
                problem_segments.append(
                    [quote(obj_name), head_seg_resp.status])
                return 0, None

            segment_length = head_seg_resp.content_length
            if seg_dict.get('range'):
                # Since we now know the length, we can normalize the
                # range. We know that there is exactly one range
                # requested since we checked that earlier in
                # parse_and_validate_input().
                ranges = seg_dict['range'].ranges_for_length(
                    head_seg_resp.content_length)

                if not ranges:
                    problem_segments.append(
                        [quote(obj_name), 'Unsatisfiable Range'])
                elif ranges == [(0, head_seg_resp.content_length)]:
                    # Just one range, and it exactly matches the object.
                    # Why'd we do this again?
                    del seg_dict['range']
                    segment_length = head_seg_resp.content_length
                else:
                    rng = ranges[0]
                    seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1)
                    segment_length = rng[1] - rng[0]

            if segment_length < 1:
                problem_segments.append([
                    quote(obj_name),
                    'Too small; each segment must be at least 1 byte.'
                ])
            if seg_dict['size_bytes'] is not None and \
                    seg_dict['size_bytes'] != head_seg_resp.content_length:
                problem_segments.append([quote(obj_name), 'Size Mismatch'])
            if seg_dict['etag'] is not None and \
                    seg_dict['etag'] != head_seg_resp.etag:
                problem_segments.append([quote(obj_name), 'Etag Mismatch'])
            if head_seg_resp.last_modified:
                last_modified = head_seg_resp.last_modified
            else:
                # shouldn't happen
                last_modified = datetime.now()

            last_modified_formatted = \
                last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
            seg_data = {
                'name': '/' + seg_dict['path'].lstrip('/'),
                'bytes': head_seg_resp.content_length,
                'hash': head_seg_resp.etag,
                'content_type': head_seg_resp.content_type,
                'last_modified': last_modified_formatted
            }
            if seg_dict.get('range'):
                seg_data['range'] = seg_dict['range']
            if config_true_value(
                    head_seg_resp.headers.get('X-Static-Large-Object')):
                seg_data['sub_slo'] = True
            return segment_length, seg_data

        data_for_storage = [None] * len(parsed_data)
        with StreamingPile(self.concurrency) as pile:
            for obj_name, resp in pile.asyncstarmap(
                    do_head, ((path, ) for path in path2indices)):
                for i in path2indices[obj_name]:
                    segment_length, seg_data = validate_seg_dict(
                        parsed_data[i], resp)
                    data_for_storage[i] = seg_data
                    total_size += segment_length

        if problem_segments:
            resp_body = get_response_body(out_content_type, {},
                                          problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)

        slo_etag = md5()
        for seg_data in data_for_storage:
            if seg_data.get('range'):
                slo_etag.update('%s:%s;' %
                                (seg_data['hash'], seg_data['range']))
            else:
                slo_etag.update(seg_data['hash'])

        slo_etag = slo_etag.hexdigest()
        req.headers.update({
            SYSMETA_SLO_ETAG: slo_etag,
            SYSMETA_SLO_SIZE: total_size,
            'X-Static-Large-Object': 'True',
        })

        json_data = json.dumps(data_for_storage)
        if six.PY3:
            json_data = json_data.encode('utf-8')
        req.body = json_data

        env = req.environ
        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overridden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size

        def start_response_wrapper(status, headers, exc_info=None):
            for i, (header, _value) in enumerate(headers):
                if header.lower() == 'etag':
                    headers[i] = ('Etag', '"%s"' % slo_etag)
                    break
            return start_response(status, headers, exc_info)

        return self.app(env, start_response_wrapper)
Example #7
0
    def handle_multipart_put(self, req):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge(
                "Manifest File > %d bytes" % self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be Copy requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_input(req.body_file.read(self.max_manifest_size))
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number segments must be <= %d' % self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        for index, seg_dict in enumerate(parsed_data):
            obj_path = '/'.join(
                ['', vrs, account, seg_dict['path'].lstrip('/')])
            try:
                seg_size = int(seg_dict['size_bytes'])
            except (ValueError, TypeError):
                raise HTTPBadRequest('Invalid Manifest File')
            if seg_size < self.min_segment_size and \
                    (index == 0 or index < len(parsed_data) - 1):
                raise HTTPBadRequest(
                    'Each segment, except the last, must be larger than '
                    '%d bytes.' % self.min_segment_size)

            new_env = req.environ.copy()
            if isinstance(obj_path, unicode):
                obj_path = obj_path.encode('utf-8')
            new_env['PATH_INFO'] = obj_path
            new_env['REQUEST_METHOD'] = 'HEAD'
            new_env['swift.source'] = 'SLO'
            del(new_env['wsgi.input'])
            del(new_env['QUERY_STRING'])
            new_env['CONTENT_LENGTH'] = 0
            new_env['HTTP_USER_AGENT'] = \
                '%s MultipartPUT' % req.environ.get('HTTP_USER_AGENT')
            head_seg_resp = \
                Request.blank(obj_path, new_env).get_response(self.app)
            if head_seg_resp.status_int // 100 == 2:
                total_size += seg_size
                if seg_size != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_path), 'Size Mismatch'])
                if seg_dict['etag'] != head_seg_resp.etag:
                    problem_segments.append([quote(obj_path), 'Etag Mismatch'])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = \
                    last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
                data_for_storage.append(
                    {'name': '/' + seg_dict['path'].lstrip('/'),
                     'bytes': seg_size,
                     'hash': seg_dict['etag'],
                     'content_type': head_seg_resp.content_type,
                     'last_modified': last_modified_formatted})

            else:
                problem_segments.append([quote(obj_path),
                                         head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(
                out_content_type, {}, problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overriden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size
        env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True'
        json_data = json.dumps(data_for_storage)
        env['CONTENT_LENGTH'] = str(len(json_data))
        env['wsgi.input'] = StringIO(json_data)
        return self.app
Example #8
0
    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge(
                "Manifest File > %d bytes" % self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_and_validate_input(
            req.body_file.read(self.max_manifest_size),
            req.path)
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of segments must be <= %d' %
                self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        slo_etag = md5()
        last_obj_path = None
        for index, seg_dict in enumerate(parsed_data):
            obj_name = seg_dict['path']
            if isinstance(obj_name, six.text_type):
                obj_name = obj_name.encode('utf-8')
            obj_path = '/'.join(['', vrs, account, obj_name.lstrip('/')])

            if obj_path != last_obj_path:
                last_obj_path = obj_path
                sub_req = make_subrequest(
                    req.environ, path=obj_path + '?',  # kill the query string
                    method='HEAD',
                    headers={'x-auth-token': req.headers.get('x-auth-token')},
                    agent='%(orig)s SLO MultipartPUT', swift_source='SLO')
                head_seg_resp = sub_req.get_response(self)

            if head_seg_resp.is_success:
                segment_length = head_seg_resp.content_length
                if seg_dict.get('range'):
                    # Since we now know the length, we can normalize the
                    # range. We know that there is exactly one range
                    # requested since we checked that earlier in
                    # parse_and_validate_input().
                    ranges = seg_dict['range'].ranges_for_length(
                        head_seg_resp.content_length)

                    if not ranges:
                        problem_segments.append([quote(obj_name),
                                                 'Unsatisfiable Range'])
                    elif ranges == [(0, head_seg_resp.content_length)]:
                        # Just one range, and it exactly matches the object.
                        # Why'd we do this again?
                        del seg_dict['range']
                        segment_length = head_seg_resp.content_length
                    else:
                        rng = ranges[0]
                        seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1)
                        segment_length = rng[1] - rng[0]

                if segment_length < 1:
                    problem_segments.append(
                        [quote(obj_name),
                         'Too small; each segment must be at least 1 byte.'])
                total_size += segment_length
                if seg_dict['size_bytes'] is not None and \
                        seg_dict['size_bytes'] != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_name), 'Size Mismatch'])
                if seg_dict['etag'] is None or \
                        seg_dict['etag'] == head_seg_resp.etag:
                    if seg_dict.get('range'):
                        slo_etag.update('%s:%s;' % (head_seg_resp.etag,
                                                    seg_dict['range']))
                    else:
                        slo_etag.update(head_seg_resp.etag)
                else:
                    problem_segments.append([quote(obj_name), 'Etag Mismatch'])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = \
                    last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
                seg_data = {'name': '/' + seg_dict['path'].lstrip('/'),
                            'bytes': head_seg_resp.content_length,
                            'hash': head_seg_resp.etag,
                            'content_type': head_seg_resp.content_type,
                            'last_modified': last_modified_formatted}
                if seg_dict.get('range'):
                    seg_data['range'] = seg_dict['range']

                if config_true_value(
                        head_seg_resp.headers.get('X-Static-Large-Object')):
                    seg_data['sub_slo'] = True
                data_for_storage.append(seg_data)

            else:
                problem_segments.append([quote(obj_name),
                                         head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(
                out_content_type, {}, problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overridden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size
        env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True'
        json_data = json.dumps(data_for_storage)
        if six.PY3:
            json_data = json_data.encode('utf-8')
        env['CONTENT_LENGTH'] = str(len(json_data))
        env['wsgi.input'] = BytesIO(json_data)

        slo_put_context = SloPutContext(self, slo_etag)
        return slo_put_context.handle_slo_put(req, start_response)
Example #9
0
 def test_get_response_body(self):
     self.assertRaises(HTTPException, bulk.get_response_body, 'badformat',
                       {}, [])
     xml_body = bulk.get_response_body('text/xml', {'hey': 'there'},
                                       [['json > xml', '202 Accepted']])
     self.assert_('&gt' in xml_body)
Example #10
0
        def resp_iter(total_size=total_size):
            # wsgi won't propagate start_response calls until some data has
            # been yielded so make sure first heartbeat is sent immediately
            if heartbeat:
                yield ' '
            last_yield_time = time.time()

            # BEGIN: New OpenIO code
            sub_req = make_subrequest(
                req.environ,
                path='%s?format=json&prefix=%s&limit=%d' %
                (segments_container_path, seg_prefix,
                 self.max_manifest_segments),
                method='GET',
                headers={'x-auth-token': req.headers.get('x-auth-token')},
                agent='%(orig)s SLO MultipartPUT',
                swift_source='SLO')
            sub_req.environ.setdefault('oio.query', {})
            # All meta2 databases may not be synchronized
            sub_req.environ['oio.query']['force_master'] = True
            sub_req.environ['oio.query']['slo'] = True
            list_seg_resp = sub_req.get_response(self)

            with closing_if_possible(list_seg_resp.app_iter):
                segments_resp = json.loads(list_seg_resp.body)

            seg_resp_dict = dict()
            for seg_resp in segments_resp:
                obj_name = '/'.join(('', segments_container, seg_resp['name']))
                seg_resp_dict[obj_name] = seg_resp

            for obj_name in path2indices:
                now = time.time()
                if heartbeat and (now - last_yield_time >
                                  self.yield_frequency):
                    # Make sure we've called start_response before
                    # sending data
                    yield ' '
                    last_yield_time = now

                for i in path2indices[obj_name]:
                    if not list_seg_resp.is_success:
                        problem_segments.append(
                            [quote(obj_name), list_seg_resp.status])
                        segment_length = 0
                        seg_data = None
                    else:
                        seg_resp = seg_resp_dict.get(obj_name)
                        if seg_resp:
                            segment_length, seg_data = validate_seg_dict(
                                parsed_data[i], seg_resp,
                                (i == len(parsed_data) - 1))
                        else:
                            problem_segments.append([quote(obj_name), 404])
                            segment_length = 0
                            seg_data = None
                    data_for_storage[i] = seg_data
                    total_size += segment_length
            # END: New OpenIO code

            if problem_segments:
                err = HTTPBadRequest(content_type=out_content_type)
                resp_dict = {}
                if heartbeat:
                    resp_dict['Response Status'] = err.status
                    resp_dict['Response Body'] = err.body or '\n'.join(
                        RESPONSE_REASONS.get(err.status_int, ['']))
                else:
                    start_response(err.status,
                                   [(h, v) for h, v in err.headers.items()
                                    if h.lower() != 'content-length'])
                yield separator + get_response_body(
                    out_content_type, resp_dict, problem_segments, 'upload')
                return

            slo_etag = md5()
            for seg_data in data_for_storage:
                if 'data' in seg_data:
                    raw_data = base64.b64decode(seg_data['data'])
                    slo_etag.update(md5(raw_data).hexdigest())
                elif seg_data.get('range'):
                    slo_etag.update('%s:%s;' %
                                    (seg_data['hash'], seg_data['range']))
                else:
                    slo_etag.update(seg_data['hash'])

            slo_etag = slo_etag.hexdigest()
            client_etag = req.headers.get('Etag')
            if client_etag and client_etag.strip('"') != slo_etag:
                err = HTTPUnprocessableEntity(request=req)
                if heartbeat:
                    yield separator + get_response_body(
                        out_content_type, {
                            'Response Status':
                            err.status,
                            'Response Body':
                            err.body or '\n'.join(
                                RESPONSE_REASONS.get(err.status_int, [''])),
                        }, problem_segments, 'upload')
                else:
                    for chunk in err(req.environ, start_response):
                        yield chunk
                return

            json_data = json.dumps(data_for_storage)
            if six.PY3:
                json_data = json_data.encode('utf-8')
            req.body = json_data
            req.headers.update({
                SYSMETA_SLO_ETAG: slo_etag,
                SYSMETA_SLO_SIZE: total_size,
                'X-Static-Large-Object': 'True',
                'Etag': md5(json_data).hexdigest(),
            })

            # Ensure container listings have both etags. However, if any
            # middleware to the left of us touched the base value, trust them.
            override_header = 'X-Object-Sysmeta-Container-Update-Override-Etag'
            val, sep, params = req.headers.get(override_header,
                                               '').partition(';')
            req.headers[override_header] = '%s; slo_etag=%s' % (
                (val or req.headers['Etag']) + sep + params, slo_etag)

            env = req.environ
            if not env.get('CONTENT_TYPE'):
                guessed_type, _junk = mimetypes.guess_type(req.path_info)
                env['CONTENT_TYPE'] = (guessed_type
                                       or 'application/octet-stream')
            env['swift.content_type_overridden'] = True
            env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size

            resp = req.get_response(self.app)
            resp_dict = {'Response Status': resp.status}
            if resp.is_success:
                resp.etag = slo_etag
                resp_dict['Etag'] = resp.headers['Etag']
                resp_dict['Last Modified'] = resp.headers['Last-Modified']

            if heartbeat:
                resp_dict['Response Body'] = resp.body
                yield separator + get_response_body(out_content_type,
                                                    resp_dict, [], 'upload')
            else:
                for chunk in resp(req.environ, start_response):
                    yield chunk
Example #11
0
File: slo.py Project: bebule/swift
    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge(
                "Manifest File > %d bytes" % self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_and_validate_input(
            req.body_file.read(self.max_manifest_size),
            req.path)
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of segments must be <= %d' %
                self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        path2indices = defaultdict(list)
        for index, seg_dict in enumerate(parsed_data):
            path2indices[seg_dict['path']].append(index)

        def do_head(obj_name):
            obj_path = '/'.join(['', vrs, account,
                                 get_valid_utf8_str(obj_name).lstrip('/')])

            sub_req = make_subrequest(
                req.environ, path=obj_path + '?',  # kill the query string
                method='HEAD',
                headers={'x-auth-token': req.headers.get('x-auth-token')},
                agent='%(orig)s SLO MultipartPUT', swift_source='SLO')
            return obj_name, sub_req.get_response(self)

        def validate_seg_dict(seg_dict, head_seg_resp):
            if not head_seg_resp.is_success:
                problem_segments.append([quote(obj_name),
                                         head_seg_resp.status])
                return 0, None

            segment_length = head_seg_resp.content_length
            if seg_dict.get('range'):
                # Since we now know the length, we can normalize the
                # range. We know that there is exactly one range
                # requested since we checked that earlier in
                # parse_and_validate_input().
                ranges = seg_dict['range'].ranges_for_length(
                    head_seg_resp.content_length)

                if not ranges:
                    problem_segments.append([quote(obj_name),
                                             'Unsatisfiable Range'])
                elif ranges == [(0, head_seg_resp.content_length)]:
                    # Just one range, and it exactly matches the object.
                    # Why'd we do this again?
                    del seg_dict['range']
                    segment_length = head_seg_resp.content_length
                else:
                    rng = ranges[0]
                    seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1)
                    segment_length = rng[1] - rng[0]

            if segment_length < 1:
                problem_segments.append(
                    [quote(obj_name),
                     'Too small; each segment must be at least 1 byte.'])
            if seg_dict.get('size_bytes') is not None and \
                    seg_dict['size_bytes'] != head_seg_resp.content_length:
                problem_segments.append([quote(obj_name), 'Size Mismatch'])
            if seg_dict.get('etag') is not None and \
                    seg_dict['etag'] != head_seg_resp.etag:
                problem_segments.append([quote(obj_name), 'Etag Mismatch'])
            if head_seg_resp.last_modified:
                last_modified = head_seg_resp.last_modified
            else:
                # shouldn't happen
                last_modified = datetime.now()

            last_modified_formatted = \
                last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
            seg_data = {'name': '/' + seg_dict['path'].lstrip('/'),
                        'bytes': head_seg_resp.content_length,
                        'hash': head_seg_resp.etag,
                        'content_type': head_seg_resp.content_type,
                        'last_modified': last_modified_formatted}
            if seg_dict.get('range'):
                seg_data['range'] = seg_dict['range']
            if config_true_value(
                    head_seg_resp.headers.get('X-Static-Large-Object')):
                seg_data['sub_slo'] = True
            return segment_length, seg_data

        data_for_storage = [None] * len(parsed_data)
        with StreamingPile(self.concurrency) as pile:
            for obj_name, resp in pile.asyncstarmap(do_head, (
                    (path, ) for path in path2indices)):
                for i in path2indices[obj_name]:
                    segment_length, seg_data = validate_seg_dict(
                        parsed_data[i], resp)
                    data_for_storage[i] = seg_data
                    total_size += segment_length

        if problem_segments:
            resp_body = get_response_body(
                out_content_type, {}, problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)

        slo_etag = md5()
        for seg_data in data_for_storage:
            if seg_data.get('range'):
                slo_etag.update('%s:%s;' % (seg_data['hash'],
                                            seg_data['range']))
            else:
                slo_etag.update(seg_data['hash'])

        slo_etag = slo_etag.hexdigest()
        req.headers.update({
            SYSMETA_SLO_ETAG: slo_etag,
            SYSMETA_SLO_SIZE: total_size,
            'X-Static-Large-Object': 'True',
        })

        json_data = json.dumps(data_for_storage)
        if six.PY3:
            json_data = json_data.encode('utf-8')
        req.body = json_data

        env = req.environ
        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overridden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size

        def start_response_wrapper(status, headers, exc_info=None):
            for i, (header, _value) in enumerate(headers):
                if header.lower() == 'etag':
                    headers[i] = ('Etag', '"%s"' % slo_etag)
                    break
            return start_response(status, headers, exc_info)

        return self.app(env, start_response_wrapper)
Example #12
0
 def test_get_response_body(self):
     self.assertRaises(
         HTTPException, bulk.get_response_body, 'badformat', {}, [])
     xml_body = bulk.get_response_body(
         'text/xml', {'hey': 'there'}, [['json > xml', '202 Accepted']])
     self.assert_('&gt' in xml_body)
Example #13
0
    def handle_multipart_put(self, req):
                
        try:
            vrs, account, container, obj = split_path(req.path,1, 4, True)
        except ValueError:
            return self.app
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge(
                "Manifest File > %d bytes" % self.max_manifest_size)
            
        if req.headers.get('X-Copy-From') or req.headers.get('Destination'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be Copy requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_input(req.environ['wsgi.input'].read(self.max_manifest_size))
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number segments must be <= %d' % self.max_manifest_segments)
            
        total_size = 0
        out_content_type = 'application/json'
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        for index, seg_dict in enumerate(parsed_data):
            obj_path = '/'.join(
                ['', vrs, account, seg_dict['path'].lstrip('/')])
            try:
                seg_size = int(seg_dict['size_bytes'])
            except (ValueError, TypeError):
                raise HTTPBadRequest('Invalid Manifest File')
            
            new_env = req.environ.copy()
            if isinstance(obj_path, unicode):
                obj_path = obj_path.encode('utf-8')
            new_env['PATH_INFO'] = obj_path
            new_env['REQUEST_METHOD'] = 'HEAD'
            new_env['swift.source'] = 'SLO'
            del(new_env['wsgi.input'])
            del(new_env['QUERY_STRING'])
            new_env['CONTENT_LENGTH'] = 0
            new_env['HTTP_USER_AGENT'] = \
                '%s MultipartPUT' % req.environ.get('HTTP_USER_AGENT')
            head_seg_resp = \
                Request.blank(obj_path, new_env).get_response(self.app)
                
            if head_seg_resp.status_int // 100 == 2:
                total_size += seg_size
                if seg_size != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_path), 'Size Mismatch'])
                if seg_dict['etag'] != head_seg_resp.etag:
                    problem_segments.append([quote(obj_path), 'Etag Mismatch'])
                                
                data_for_storage.append(
                    {'name': '/' + seg_dict['path'].lstrip('/'),
                     'bytes': seg_size,
                     'hash': seg_dict['etag']})

            else:
                problem_segments.append([quote(obj_path),
                                         head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(
                out_content_type, {}, problem_segments)
            raise jresponse('-1','badrequest',req,400,param=resp_body)
        env = req.environ

        
        env['swift.content_type_overriden'] = True
        
        env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True'
        json_data = json.dumps(data_for_storage)
        env['CONTENT_LENGTH'] = str(len(json_data))
        env['wsgi.input'] = StringIO(json_data)
        return self.app
Example #14
0
File: slo.py Project: pchng/swift
    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" % self.max_manifest_size)
        if req.headers.get("X-Copy-From"):
            raise HTTPMethodNotAllowed("Multipart Manifest PUTs cannot be COPY requests")
        if req.content_length is None and req.headers.get("transfer-encoding", "").lower() != "chunked":
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_input(req.body_file.read(self.max_manifest_size))
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge("Number of segments must be <= %d" % self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = "text/plain"
        data_for_storage = []
        slo_etag = md5()
        last_obj_path = None
        for index, seg_dict in enumerate(parsed_data):
            obj_name = seg_dict["path"]
            if isinstance(obj_name, unicode):
                obj_name = obj_name.encode("utf-8")
            obj_path = "/".join(["", vrs, account, obj_name.lstrip("/")])
            if req.path == quote(obj_path):
                raise HTTPConflict('Manifest object name "%s" ' "cannot be included in the manifest" % obj_name)
            try:
                seg_size = int(seg_dict["size_bytes"])
            except (ValueError, TypeError):
                if seg_dict["size_bytes"] is None:
                    seg_size = None
                else:
                    raise HTTPBadRequest("Invalid Manifest File")
            if seg_size is not None and seg_size < self.min_segment_size and index < len(parsed_data) - 1:
                raise HTTPBadRequest(
                    "Each segment, except the last, must be at least " "%d bytes." % self.min_segment_size
                )

            new_env = req.environ.copy()
            new_env["PATH_INFO"] = obj_path
            new_env["REQUEST_METHOD"] = "HEAD"
            new_env["swift.source"] = "SLO"
            del (new_env["wsgi.input"])
            del (new_env["QUERY_STRING"])
            new_env["CONTENT_LENGTH"] = 0
            new_env["HTTP_USER_AGENT"] = "%s MultipartPUT" % req.environ.get("HTTP_USER_AGENT")
            if obj_path != last_obj_path:
                last_obj_path = obj_path
                head_seg_resp = Request.blank(obj_path, new_env).get_response(self)

            if head_seg_resp.is_success:
                segment_length = head_seg_resp.content_length
                if seg_dict.get("range"):
                    # Since we now know the length, we can normalize the ranges
                    ranges = seg_dict["range"].ranges_for_length(head_seg_resp.content_length)

                    if not ranges:
                        problem_segments.append([quote(obj_name), "Unsatisfiable Range"])
                    elif len(ranges) > 1:
                        problem_segments.append([quote(obj_name), "Multiple Ranges"])
                    elif ranges == [(0, head_seg_resp.content_length)]:
                        # Just one range, and it exactly matches the object.
                        # Why'd we do this again?
                        seg_dict["range"] = None
                        segment_length = head_seg_resp.content_length
                    else:
                        range = ranges[0]
                        seg_dict["range"] = "%d-%d" % (range[0], range[1] - 1)
                        segment_length = range[1] - range[0]

                if segment_length < self.min_segment_size and index < len(parsed_data) - 1:
                    raise HTTPBadRequest(
                        "Each segment, except the last, must be at least " "%d bytes." % self.min_segment_size
                    )
                total_size += segment_length
                if seg_size is not None and seg_size != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_name), "Size Mismatch"])
                if seg_dict["etag"] is None or seg_dict["etag"] == head_seg_resp.etag:
                    if seg_dict.get("range"):
                        slo_etag.update("%s:%s;" % (head_seg_resp.etag, seg_dict["range"]))
                    else:
                        slo_etag.update(head_seg_resp.etag)
                else:
                    problem_segments.append([quote(obj_name), "Etag Mismatch"])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = last_modified.strftime("%Y-%m-%dT%H:%M:%S.%f")
                seg_data = {
                    "name": "/" + seg_dict["path"].lstrip("/"),
                    "bytes": head_seg_resp.content_length,
                    "hash": head_seg_resp.etag,
                    "content_type": head_seg_resp.content_type,
                    "last_modified": last_modified_formatted,
                }
                if seg_dict.get("range"):
                    seg_data["range"] = seg_dict["range"]

                if config_true_value(head_seg_resp.headers.get("X-Static-Large-Object")):
                    seg_data["sub_slo"] = True
                data_for_storage.append(seg_data)

            else:
                problem_segments.append([quote(obj_name), head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(out_content_type, {}, problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get("CONTENT_TYPE"):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env["CONTENT_TYPE"] = guessed_type or "application/octet-stream"
        env["swift.content_type_overridden"] = True
        env["CONTENT_TYPE"] += ";swift_bytes=%d" % total_size
        env["HTTP_X_STATIC_LARGE_OBJECT"] = "True"
        json_data = json.dumps(data_for_storage)
        if six.PY3:
            json_data = json_data.encode("utf-8")
        env["CONTENT_LENGTH"] = str(len(json_data))
        env["wsgi.input"] = BytesIO(json_data)

        slo_put_context = SloPutContext(self, slo_etag)
        return slo_put_context.handle_slo_put(req, start_response)
Example #15
0
    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" %
                                            self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_input(req.body_file.read(self.max_manifest_size))
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of segments must be <= %d' %
                self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        slo_etag = md5()
        last_obj_path = None
        for index, seg_dict in enumerate(parsed_data):
            obj_name = seg_dict['path']
            if isinstance(obj_name, unicode):
                obj_name = obj_name.encode('utf-8')
            obj_path = '/'.join(['', vrs, account, obj_name.lstrip('/')])
            if req.path == quote(obj_path):
                raise HTTPConflict('Manifest object name "%s" '
                                   'cannot be included in the manifest' %
                                   obj_name)
            try:
                seg_size = int(seg_dict['size_bytes'])
            except (ValueError, TypeError):
                if seg_dict['size_bytes'] is None:
                    seg_size = None
                else:
                    raise HTTPBadRequest('Invalid Manifest File')
            if seg_size is not None and seg_size < self.min_segment_size and \
                    index < len(parsed_data) - 1:
                raise HTTPBadRequest(
                    'Each segment, except the last, must be at least '
                    '%d bytes.' % self.min_segment_size)

            new_env = req.environ.copy()
            new_env['PATH_INFO'] = obj_path
            new_env['REQUEST_METHOD'] = 'HEAD'
            new_env['swift.source'] = 'SLO'
            del (new_env['wsgi.input'])
            del (new_env['QUERY_STRING'])
            new_env['CONTENT_LENGTH'] = 0
            new_env['HTTP_USER_AGENT'] = \
                '%s MultipartPUT' % req.environ.get('HTTP_USER_AGENT')
            if obj_path != last_obj_path:
                last_obj_path = obj_path
                head_seg_resp = \
                    Request.blank(obj_path, new_env).get_response(self)

            if head_seg_resp.is_success:
                segment_length = head_seg_resp.content_length
                if seg_dict.get('range'):
                    # Since we now know the length, we can normalize the ranges
                    ranges = seg_dict['range'].ranges_for_length(
                        head_seg_resp.content_length)

                    if not ranges:
                        problem_segments.append(
                            [quote(obj_name), 'Unsatisfiable Range'])
                    elif len(ranges) > 1:
                        problem_segments.append(
                            [quote(obj_name), 'Multiple Ranges'])
                    elif ranges == [(0, head_seg_resp.content_length)]:
                        # Just one range, and it exactly matches the object.
                        # Why'd we do this again?
                        seg_dict['range'] = None
                        segment_length = head_seg_resp.content_length
                    else:
                        range = ranges[0]
                        seg_dict['range'] = '%d-%d' % (range[0], range[1] - 1)
                        segment_length = range[1] - range[0]

                if segment_length < self.min_segment_size and \
                        index < len(parsed_data) - 1:
                    raise HTTPBadRequest(
                        'Each segment, except the last, must be at least '
                        '%d bytes.' % self.min_segment_size)
                total_size += segment_length
                if seg_size is not None and \
                        seg_size != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_name), 'Size Mismatch'])
                if seg_dict['etag'] is None or \
                        seg_dict['etag'] == head_seg_resp.etag:
                    if seg_dict.get('range'):
                        slo_etag.update(
                            '%s:%s;' % (head_seg_resp.etag, seg_dict['range']))
                    else:
                        slo_etag.update(head_seg_resp.etag)
                else:
                    problem_segments.append([quote(obj_name), 'Etag Mismatch'])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = \
                    last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
                seg_data = {
                    'name': '/' + seg_dict['path'].lstrip('/'),
                    'bytes': head_seg_resp.content_length,
                    'hash': head_seg_resp.etag,
                    'content_type': head_seg_resp.content_type,
                    'last_modified': last_modified_formatted
                }
                if seg_dict.get('range'):
                    seg_data['range'] = seg_dict['range']

                if config_true_value(
                        head_seg_resp.headers.get('X-Static-Large-Object')):
                    seg_data['sub_slo'] = True
                data_for_storage.append(seg_data)

            else:
                problem_segments.append(
                    [quote(obj_name), head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(out_content_type, {},
                                          problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overridden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size
        env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True'
        json_data = json.dumps(data_for_storage)
        if six.PY3:
            json_data = json_data.encode('utf-8')
        env['CONTENT_LENGTH'] = str(len(json_data))
        env['wsgi.input'] = BytesIO(json_data)

        slo_put_context = SloPutContext(self, slo_etag)
        return slo_put_context.handle_slo_put(req, start_response)
Example #16
0
 def test_get_response_body(self):
     self.assertRaises(HTTPException, bulk.get_response_body, "badformat", {}, [])
     xml_body = bulk.get_response_body("text/xml", {"hey": "there"}, [["json > xml", "202 Accepted"]])
     self.assert_("&gt" in xml_body)
Example #17
0
 def test_get_response_body(self):
     txt_body = bulk.get_response_body("bad_formay", {"hey": "there"}, [["json > xml", "202 Accepted"]])
     self.assert_("hey: there" in txt_body)
     xml_body = bulk.get_response_body("text/xml", {"hey": "there"}, [["json > xml", "202 Accepted"]])
     self.assert_("&gt" in xml_body)
Example #18
0
    def handle_multipart_put(self, req):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" % self.max_manifest_size)
        if req.headers.get("X-Copy-From"):
            raise HTTPMethodNotAllowed("Multipart Manifest PUTs cannot be Copy requests")
        if req.content_length is None and req.headers.get("transfer-encoding", "").lower() != "chunked":
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_input(req.body_file.read(self.max_manifest_size))
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge("Number segments must be <= %d" % self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = "text/plain"
        data_for_storage = []
        for index, seg_dict in enumerate(parsed_data):
            obj_path = "/".join(["", vrs, account, seg_dict["path"].lstrip("/")])
            try:
                seg_size = int(seg_dict["size_bytes"])
            except (ValueError, TypeError):
                raise HTTPBadRequest("Invalid Manifest File")
            if seg_size < self.min_segment_size and (index == 0 or index < len(parsed_data) - 1):
                raise HTTPBadRequest(
                    "Each segment, except the last, must be larger than " "%d bytes." % self.min_segment_size
                )

            new_env = req.environ.copy()
            if isinstance(obj_path, unicode):
                obj_path = obj_path.encode("utf-8")
            new_env["PATH_INFO"] = obj_path
            new_env["REQUEST_METHOD"] = "HEAD"
            new_env["swift.source"] = "SLO"
            del (new_env["wsgi.input"])
            del (new_env["QUERY_STRING"])
            new_env["CONTENT_LENGTH"] = 0
            new_env["HTTP_USER_AGENT"] = "%s MultipartPUT" % req.environ.get("HTTP_USER_AGENT")
            head_seg_resp = Request.blank(obj_path, new_env).get_response(self.app)
            if head_seg_resp.is_success:
                total_size += seg_size
                if seg_size != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_path), "Size Mismatch"])
                if seg_dict["etag"] != head_seg_resp.etag:
                    problem_segments.append([quote(obj_path), "Etag Mismatch"])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = last_modified.strftime("%Y-%m-%dT%H:%M:%S.%f")
                seg_data = {
                    "name": "/" + seg_dict["path"].lstrip("/"),
                    "bytes": seg_size,
                    "hash": seg_dict["etag"],
                    "content_type": head_seg_resp.content_type,
                    "last_modified": last_modified_formatted,
                }
                if config_true_value(head_seg_resp.headers.get("X-Static-Large-Object")):
                    seg_data["sub_slo"] = True
                data_for_storage.append(seg_data)

            else:
                problem_segments.append([quote(obj_path), head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(out_content_type, {}, problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get("CONTENT_TYPE"):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env["CONTENT_TYPE"] = guessed_type or "application/octet-stream"
        env["swift.content_type_overriden"] = True
        env["CONTENT_TYPE"] += ";swift_bytes=%d" % total_size
        env["HTTP_X_STATIC_LARGE_OBJECT"] = "True"
        json_data = json.dumps(data_for_storage)
        env["CONTENT_LENGTH"] = str(len(json_data))
        env["wsgi.input"] = StringIO(json_data)
        return self.app