Esempio n. 1
0
 def _handle_sync_response(self, node, response, info, broker, http,
                           different_region=False):
     if response.status == HTTP_NOT_FOUND:  # completely missing, rsync
         self.stats['rsync'] += 1
         self.logger.increment('rsyncs')
         return self._rsync_db(broker, node, http, info['id'],
                               different_region=different_region)
     elif response.status == HTTP_INSUFFICIENT_STORAGE:
         raise DriveNotMounted()
     elif 200 <= response.status < 300:
         rinfo = json.loads(response.data)
         local_sync = broker.get_sync(rinfo['id'], incoming=False)
         if rinfo.get('metadata', ''):
             broker.update_metadata(json.loads(rinfo['metadata']))
         if self._in_sync(rinfo, info, broker, local_sync):
             return True
         # if the difference in rowids between the two differs by
         # more than 50% and the difference is greater than per_diff,
         # rsync then do a remote merge.
         # NOTE: difference > per_diff stops us from dropping to rsync
         # on smaller containers, who have only a few rows to sync.
         if rinfo['max_row'] / float(info['max_row']) < 0.5 and \
                 info['max_row'] - rinfo['max_row'] > self.per_diff:
             self.stats['remote_merge'] += 1
             self.logger.increment('remote_merges')
             return self._rsync_db(broker, node, http, info['id'],
                                   replicate_method='rsync_then_merge',
                                   replicate_timeout=(info['count'] / 2000),
                                   different_region=different_region)
         # else send diffs over to the remote server
         return self._usync_db(max(rinfo['point'], local_sync),
                               broker, http, rinfo['id'], info['id'])
Esempio n. 2
0
 def test_bulk_delete_500_resp(self):
     req = Request.blank("/broke/AUTH_acc/", body="/c/f\nc/f2\n", headers={"Accept": "application/json"})
     req.method = "DELETE"
     resp_body = self.handle_delete_and_iter(req)
     resp_data = json.loads(resp_body)
     self.assertEquals(resp_data["Errors"], [["/c/f", "500 Internal Error"], ["c/f2", "500 Internal Error"]])
     self.assertEquals(resp_data["Response Status"], "502 Bad Gateway")
Esempio n. 3
0
File: server.py Progetto: pkit/zwift
    def update_data_record(self, record, list_meta=False):
        """
        Perform any mutations to container listing records that are common to
        all serialization formats, and returns it as a dict.

        Converts created time to iso timestamp.
        Replaces size with 'swift_bytes' content type parameter.

        :params record: object entry record
        :returns: modified record
        """
        (name, created, size, content_type, etag, metadata) = record
        if content_type is None:
            return {'subdir': name}
        response = {'bytes': size, 'hash': etag, 'name': name,
                    'content_type': content_type}
        if list_meta:
            metadata = json.loads(metadata)
            utf8encodekeys(metadata)
            response['metadata'] = metadata
        last_modified = datetime.utcfromtimestamp(float(created)).isoformat()
        # python isoformat() doesn't include msecs when zero
        if len(last_modified) < len("1970-01-01T00:00:00.000000"):
            last_modified += ".000000"
        response['last_modified'] = last_modified
        override_bytes_from_content_type(response, logger=self.logger)
        return response
Esempio n. 4
0
    def DELETE(self, req):
        """
        Handles Abort Multipart Upload.
        """
        upload_id = req.params['uploadId']
        _check_upload_info(req, self.app, upload_id)

        # First check to see if this multi-part upload was already
        # completed.  Look in the primary container, if the object exists,
        # then it was completed and we return an error here.
        container = req.container_name + MULTIUPLOAD_SUFFIX
        obj = '%s/%s' % (req.object_name, upload_id)
        req.get_response(self.app, container=container, obj=obj)

        # The completed object was not found so this
        # must be a multipart upload abort.
        # We must delete any uploaded segments for this UploadID and then
        # delete the object in the main container as well
        query = {
            'format': 'json',
            'prefix': '%s/%s/' % (req.object_name, upload_id),
            'delimiter': '/',
        }

        resp = req.get_response(self.app, 'GET', container, '', query=query)

        #  Iterate over the segment objects and delete them individually
        objects = json.loads(resp.body)
        for o in objects:
            container = req.container_name + MULTIUPLOAD_SUFFIX
            req.get_response(self.app, container=container, obj=o['name'])

        return HTTPNoContent()
Esempio n. 5
0
File: obj.py Progetto: saebyuk/swift
 def _listing_pages_iter(self, lcontainer, lprefix, env):
     lpartition, lnodes = self.app.container_ring.get_nodes(
         self.account_name, lcontainer)
     marker = ''
     while True:
         lreq = Request.blank('i will be overridden by env', environ=env)
         # Don't quote PATH_INFO, by WSGI spec
         lreq.environ['PATH_INFO'] = \
             '/%s/%s' % (self.account_name, lcontainer)
         lreq.environ['REQUEST_METHOD'] = 'GET'
         lreq.environ['QUERY_STRING'] = \
             'format=json&prefix=%s&marker=%s' % (quote(lprefix),
                                                  quote(marker))
         lnodes = self.app.sort_nodes(lnodes)
         lresp = self.GETorHEAD_base(
             lreq, _('Container'), lpartition, lnodes, lreq.path_info,
             len(lnodes))
         if 'swift.authorize' in env:
             lreq.acl = lresp.headers.get('x-container-read')
             aresp = env['swift.authorize'](lreq)
             if aresp:
                 raise ListingIterNotAuthorized(aresp)
         if lresp.status_int == HTTP_NOT_FOUND:
             raise ListingIterNotFound()
         elif not is_success(lresp.status_int):
             raise ListingIterError()
         if not lresp.body:
             break
         sublisting = json.loads(lresp.body)
         if not sublisting:
             break
         marker = sublisting[-1]['name'].encode('utf-8')
         yield sublisting
Esempio n. 6
0
File: slo.py Progetto: pchng/swift
    def get_or_head_response(self, req, resp_headers, resp_iter):
        with closing_if_possible(resp_iter):
            resp_body = "".join(resp_iter)
        try:
            segments = json.loads(resp_body)
        except ValueError:
            segments = []

        etag = md5()
        content_length = 0
        for seg_dict in segments:
            if seg_dict.get("range"):
                etag.update("%s:%s;" % (seg_dict["hash"], seg_dict["range"]))
            else:
                etag.update(seg_dict["hash"])

            if config_true_value(seg_dict.get("sub_slo")):
                override_bytes_from_content_type(seg_dict, logger=self.slo.logger)
            content_length += self._segment_length(seg_dict)

        response_headers = [(h, v) for h, v in resp_headers if h.lower() not in ("etag", "content-length")]
        response_headers.append(("Content-Length", str(content_length)))
        response_headers.append(("Etag", '"%s"' % etag.hexdigest()))

        if req.method == "HEAD":
            return self._manifest_head_response(req, response_headers)
        else:
            return self._manifest_get_response(req, content_length, response_headers, segments)
Esempio n. 7
0
    def test_GET_OBJscope_conAttrs_metadata(self):
        """
        In object scope, specifying container attrs
        We should get back the container that the object
        belongs to
        """
        attrs = Cattrs
        req2 = Request.blank(
            '/v1/TEST_acc1/TEST_con1/TEST_obj1',
            environ={'REQUEST_METHOD': 'GET',
            'HTTP_X_TIMESTAMP': '0'}, headers={'attributes': attrs, 'format': 'json'})
        resp2 = req2.get_response(self.controller)
        self.assert_(resp2.status.startswith('200'))
        testList = json.loads(resp2.body)
        self.assert_(len(testList) == 1)
        testDict = testList[0]
        self.assert_('/TEST_acc1/TEST_con1' in testDict)
        metaReturned = testDict['/TEST_acc1/TEST_con1']
        self.assertEquals(
            metaReturned['container_uri'], '/TEST_acc1/TEST_con1')

        self.assertEquals(metaReturned['container_name'], 'TEST_con1')
        self.assertEquals(metaReturned['container_account_name'], 'TEST_acc1')
        self.assertEquals(metaReturned['container_create_time'], self.t)
        self.assertEquals(metaReturned['container_object_count'], 33)
        self.assertEquals(metaReturned['container_bytes_used'], 3342)
        self.assertEquals(metaReturned['container_meta_TESTCUSTOM'], 'CUSTOM')
Esempio n. 8
0
 def handle_multipart_delete(self, req):
     
     new_env = req.environ.copy()
     new_env['REQUEST_METHOD'] = 'GET'
     del(new_env['wsgi.input'])
     new_env['QUERY_STRING'] = 'multipart-manifest=get'
     new_env['CONTENT_LENGTH'] = 0
     new_env['HTTP_USER_AGENT'] = \
         '%s MultipartDELETE' % req.environ.get('HTTP_USER_AGENT')
     new_env['swift.source'] = 'SLO'
     get_man_resp = \
         Request.blank('', new_env).get_response(self.app)
         
     if get_man_resp.status_int // 100 == 2:
         if not config_true_value(
                 get_man_resp.headers.get('X-Static-Large-Object')):
             raise HTTPBadRequest('Not an SLO manifest')
         try:
             manifest = json.loads(get_man_resp.body)
         except ValueError:
             raise HTTPServerError('Invalid manifest file')
         delete_resp = self.bulk_deleter.handle_delete(
             req,
             objs_to_delete=[o['name'].encode('utf-8') for o in manifest],
             user_agent='MultipartDELETE', swift_source='SLO')
         if delete_resp.status_int // 100 == 2:
             # delete the manifest file itself
             return self.app
         else:
             return delete_resp
     return get_man_resp
Esempio n. 9
0
    def test_handle_multipart_put_check_data_bad(self):
        bad_data = json.dumps(
            [
                {"path": "/checktest/a_1", "etag": "a", "size_bytes": "2"},
                {"path": "/checktest/badreq", "etag": "a", "size_bytes": "1"},
                {"path": "/checktest/b_2", "etag": "not-b", "size_bytes": "2"},
                {"path": "/checktest/slob", "etag": "not-slob", "size_bytes": "2"},
            ]
        )
        req = Request.blank(
            "/v1/AUTH_test/checktest/man?multipart-manifest=put",
            environ={"REQUEST_METHOD": "PUT"},
            headers={"Accept": "application/json"},
            body=bad_data,
        )

        status, headers, body = self.call_slo(req)
        self.assertEquals(self.app.call_count, 4)
        errors = json.loads(body)["Errors"]
        self.assertEquals(len(errors), 5)
        self.assertEquals(errors[0][0], "/checktest/a_1")
        self.assertEquals(errors[0][1], "Size Mismatch")
        self.assertEquals(errors[1][0], "/checktest/badreq")
        self.assertEquals(errors[1][1], "400 Bad Request")
        self.assertEquals(errors[2][0], "/checktest/b_2")
        self.assertEquals(errors[2][1], "Etag Mismatch")
        self.assertEquals(errors[3][0], "/checktest/slob")
        self.assertEquals(errors[3][1], "Size Mismatch")
        self.assertEquals(errors[4][0], "/checktest/slob")
        self.assertEquals(errors[4][1], "Etag Mismatch")
Esempio n. 10
0
 def is_strict_mode(url, token, parsed, conn):
     conn.request('GET', '/info')
     resp = conn.getresponse()
     if resp.status // 100 == 2:
         info = json.loads(resp.read())
         return info.get('swift', {}).get('strict_cors_mode', False)
     return False
Esempio n. 11
0
 def test_bulk_delete_container_delete(self):
     req = Request.blank("/delete_cont_fail/AUTH_Acc", body="c\n", headers={"Accept": "application/json"})
     req.method = "DELETE"
     resp_body = self.handle_delete_and_iter(req)
     resp_data = json.loads(resp_body)
     self.assertEquals(resp_data["Number Deleted"], 0)
     self.assertEquals(resp_data["Errors"][0][1], "409 Conflict")
Esempio n. 12
0
    def _fetch_sub_slo_segments(self, req, version, acc, con, obj):
        """
        Fetch the submanifest, parse it, and return it.
        Raise exception on failures.
        """
        sub_req = req.copy_get()
        sub_req.range = None
        sub_req.environ['PATH_INFO'] = '/'.join(['', version, acc, con, obj])
        sub_req.environ['swift.source'] = 'SLO'
        sub_req.user_agent = "%s SLO MultipartGET" % sub_req.user_agent
        sub_resp = sub_req.get_response(self.slo.app)

        if not is_success(sub_resp.status_int):
            raise ListingIterError(
                'ERROR: while fetching %s, GET of submanifest %s '
                'failed with status %d' % (req.path, sub_req.path,
                                           sub_resp.status_int))

        try:
            with closing_if_possible(sub_resp.app_iter):
                return json.loads(''.join(sub_resp.app_iter))
        except ValueError as err:
            raise ListingIterError(
                'ERROR: while fetching %s, JSON-decoding of submanifest %s '
                'failed with %s' % (req.path, sub_req.path, err))
Esempio n. 13
0
    def get(self, key):
        """
        Gets the object specified by key.  It will also unserialize the object
        before returning if it is serialized in memcache with JSON, or if it
        is pickled and unpickling is allowed.

        :param key: key
        :returns: value of the key in memcache
        """
        key = md5hash(key)
        value = None
        for (server, fp, sock) in self._get_conns(key):
            try:
                with Timeout(self._io_timeout):
                    sock.sendall('get %s\r\n' % key)
                    line = fp.readline().strip().split()
                    while line[0].upper() != 'END':
                        if line[0].upper() == 'VALUE' and line[1] == key:
                            size = int(line[3])
                            value = fp.read(size)
                            if int(line[2]) & PICKLE_FLAG:
                                if self._allow_unpickle:
                                    value = pickle.loads(value)
                                else:
                                    value = None
                            elif int(line[2]) & JSON_FLAG:
                                value = json.loads(value)
                            fp.readline()
                        line = fp.readline().strip().split()
                    self._return_conn(server, fp, sock)
                    return value
            except (Exception, Timeout) as e:
                self._exception_occurred(server, e, sock=sock, fp=fp)
Esempio n. 14
0
 def test_handle_multipart_put_check_data_bad(self):
     bad_data = json.dumps(
         [
             {"path": "/c/a_1", "etag": "a", "size_bytes": "1"},
             {"path": "/c/a_2", "etag": "a", "size_bytes": "1"},
             {"path": "/d/b_2", "etag": "b", "size_bytes": "2"},
         ]
     )
     req = Request.blank(
         "/test_good/A/c/man?multipart-manifest=put",
         environ={"REQUEST_METHOD": "PUT"},
         headers={"Accept": "application/json"},
         body=bad_data,
     )
     try:
         self.slo.handle_multipart_put(req)
     except HTTPException, e:
         self.assertEquals(self.app.calls, 3)
         data = json.loads(e.body)
         errors = data["Errors"]
         self.assertEquals(errors[0][0], "/test_good/A/c/a_1")
         self.assertEquals(errors[0][1], "Size Mismatch")
         self.assertEquals(errors[2][1], "400 Bad Request")
         self.assertEquals(errors[-1][0], "/test_good/A/d/b_2")
         self.assertEquals(errors[-1][1], "Etag Mismatch")
Esempio n. 15
0
    def _reclaim(self, conn, timestamp):
        """
        Removes any empty metadata values older than the timestamp using the
        given database connection. This function will not call commit on the
        conn, but will instead return True if the database needs committing.
        This function was created as a worker to limit transactions and commits
        from other related functions.

        :param conn: Database connection to reclaim metadata within.
        :param timestamp: Empty metadata items last updated before this
                          timestamp will be removed.
        :returns: True if conn.commit() should be called
        """
        try:
            md = conn.execute('SELECT metadata FROM %s_stat' %
                              self.db_type).fetchone()[0]
            if md:
                md = json.loads(md)
                keys_to_delete = []
                for key, (value, value_timestamp) in md.iteritems():
                    if value == '' and value_timestamp < timestamp:
                        keys_to_delete.append(key)
                if keys_to_delete:
                    for key in keys_to_delete:
                        del md[key]
                    conn.execute('UPDATE %s_stat SET metadata = ?' %
                                 self.db_type, (json.dumps(md),))
                    return True
        except sqlite3.OperationalError as err:
            if 'no such column: metadata' not in str(err):
                raise
        return False
Esempio n. 16
0
    def deserialize_v1(cls, gz_file, metadata_only=False):
        """
        Deserialize a v1 ring file into a dictionary with `devs`, `part_shift`,
        and `replica2part2dev_id` keys.

        If the optional kwarg `metadata_only` is True, then the
        `replica2part2dev_id` is not loaded and that key in the returned
        dictionary just has the value `[]`.

        :param file gz_file: An opened file-like object which has already
                             consumed the 6 bytes of magic and version.
        :param bool metadata_only: If True, only load `devs` and `part_shift`
        :returns: A dict containing `devs`, `part_shift`, and
                  `replica2part2dev_id`
        """

        json_len, = struct.unpack('!I', gz_file.read(4))
        ring_dict = json.loads(gz_file.read(json_len))
        ring_dict['replica2part2dev_id'] = []

        if metadata_only:
            return ring_dict

        partition_count = 1 << (32 - ring_dict['part_shift'])
        for x in xrange(ring_dict['replica_count']):
            ring_dict['replica2part2dev_id'].append(
                array.array('H', gz_file.read(2 * partition_count)))
        return ring_dict
Esempio n. 17
0
File: slo.py Progetto: pchng/swift
    def _fetch_sub_slo_segments(self, req, version, acc, con, obj):
        """
        Fetch the submanifest, parse it, and return it.
        Raise exception on failures.
        """
        sub_req = make_subrequest(
            req.environ,
            path="/".join(["", version, acc, con, obj]),
            method="GET",
            headers={"x-auth-token": req.headers.get("x-auth-token")},
            agent=("%(orig)s " + "SLO MultipartGET"),
            swift_source="SLO",
        )
        sub_resp = sub_req.get_response(self.slo.app)

        if not is_success(sub_resp.status_int):
            close_if_possible(sub_resp.app_iter)
            raise ListingIterError(
                "ERROR: while fetching %s, GET of submanifest %s "
                "failed with status %d" % (req.path, sub_req.path, sub_resp.status_int)
            )

        try:
            with closing_if_possible(sub_resp.app_iter):
                return json.loads("".join(sub_resp.app_iter))
        except ValueError as err:
            raise ListingIterError(
                "ERROR: while fetching %s, JSON-decoding of submanifest %s "
                "failed with %s" % (req.path, sub_req.path, err)
            )
Esempio n. 18
0
    def test_GET_OBJscope_objAttrs_metadata(self):
        """
        In object scope give me object attrs
        Should give back the object in the path
        """
        attrs = Oattrs
        req2 = Request.blank(
            '/v1/TEST_acc1/TEST_con1/TEST_obj1',
            environ={'REQUEST_METHOD': 'GET',
            'HTTP_X_TIMESTAMP': '0'}, headers={'attributes': attrs, 'format': 'json'})
        resp2 = req2.get_response(self.controller)
        self.assert_(resp2.status.startswith('200'))
        testList = json.loads(resp2.body)
        self.assert_(len(testList) == 1)
        testDict = testList[0]
        self.assert_('/TEST_acc1/TEST_con1/TEST_obj1' in testDict)
        metaReturned = testDict['/TEST_acc1/TEST_con1/TEST_obj1']
        self.assertEquals(
            metaReturned['object_uri'], '/TEST_acc1/TEST_con1/TEST_obj1')

        self.assertEquals(metaReturned['object_name'], 'TEST_obj1')
        self.assertEquals(metaReturned['object_account_name'], 'TEST_acc1')
        self.assertEquals(metaReturned['object_container_name'], 'TEST_con1')
        self.assertEquals(metaReturned['object_uri_create_time'], self.t)
        self.assertEquals(metaReturned['object_etag_hash'], '0000000000000000')
        self.assertEquals(metaReturned['object_content_type'], 'text/plain')
        self.assertEquals(str(metaReturned['object_content_length']), '42')
        self.assertEquals(metaReturned['object_content_encoding'], 'gzip')
        self.assertEquals(metaReturned['object_content_language'], 'en')
        self.assertEquals(metaReturned['object_meta_TESTCUSTOM'], 'CUSTOM')
Esempio n. 19
0
    def test_direct_get_account(self):
        stub_headers = HeaderKeyDict({
            'X-Account-Container-Count': '1',
            'X-Account-Object-Count': '1',
            'X-Account-Bytes-Used': '1',
            'X-Timestamp': '1234567890',
            'X-PUT-Timestamp': '1234567890'})

        body = '[{"count": 1, "bytes": 20971520, "name": "c1"}]'

        with mocked_http_conn(200, stub_headers, body) as conn:
            resp_headers, resp = direct_client.direct_get_account(
                self.node, self.part, self.account, marker='marker',
                prefix='prefix', delimiter='delimiter', limit=1000)
            self.assertEqual(conn.method, 'GET')
            self.assertEqual(conn.path, self.account_path)

        self.assertEqual(conn.req_headers['user-agent'], self.user_agent)
        self.assertEqual(resp_headers, stub_headers)
        self.assertEqual(json.loads(body), resp)
        self.assertTrue('marker=marker' in conn.query_string)
        self.assertTrue('delimiter=delimiter' in conn.query_string)
        self.assertTrue('limit=1000' in conn.query_string)
        self.assertTrue('prefix=prefix' in conn.query_string)
        self.assertTrue('format=json' in conn.query_string)
Esempio n. 20
0
File: slo.py Progetto: pchng/swift
def parse_input(raw_data):
    """
    Given a request will parse the body and return a list of dictionaries
    :raises: HTTPException on parse errors
    :returns: a list of dictionaries on success
    """
    try:
        parsed_data = json.loads(raw_data)
    except ValueError:
        raise HTTPBadRequest("Manifest must be valid json.")

    req_keys = set(["path", "etag", "size_bytes"])
    opt_keys = set(["range"])
    try:
        for seg_dict in parsed_data:
            if not (req_keys <= set(seg_dict) <= req_keys | opt_keys) or "/" not in seg_dict["path"].lstrip("/"):
                raise HTTPBadRequest("Invalid SLO Manifest File")

            if seg_dict.get("range"):
                try:
                    seg_dict["range"] = Range("bytes=%s" % seg_dict["range"])
                except ValueError:
                    raise HTTPBadRequest("Invalid SLO Manifest File")
    except (AttributeError, TypeError):
        raise HTTPBadRequest("Invalid SLO Manifest File")

    return parsed_data
Esempio n. 21
0
 def _listing_pages_iter(self, lcontainer, lprefix, env):
     lpartition = self.app.container_ring.get_part(self.account_name, lcontainer)
     marker = ""
     while True:
         lreq = Request.blank("i will be overridden by env", environ=env)
         # Don't quote PATH_INFO, by WSGI spec
         lreq.environ["PATH_INFO"] = "/v1/%s/%s" % (self.account_name, lcontainer)
         lreq.environ["REQUEST_METHOD"] = "GET"
         lreq.environ["QUERY_STRING"] = "format=json&prefix=%s&marker=%s" % (quote(lprefix), quote(marker))
         lresp = self.GETorHEAD_base(
             lreq, _("Container"), self.app.container_ring, lpartition, lreq.swift_entity_path
         )
         if "swift.authorize" in env:
             lreq.acl = lresp.headers.get("x-container-read")
             aresp = env["swift.authorize"](lreq)
             if aresp:
                 raise ListingIterNotAuthorized(aresp)
         if lresp.status_int == HTTP_NOT_FOUND:
             raise ListingIterNotFound()
         elif not is_success(lresp.status_int):
             raise ListingIterError()
         if not lresp.body:
             break
         sublisting = json.loads(lresp.body)
         if not sublisting:
             break
         marker = sublisting[-1]["name"].encode("utf-8")
         yield sublisting
Esempio n. 22
0
 def __call__(self, req):
     account = None
     try:
         (version, account, container, obj) = \
             split_path(req.path_info, 2, 4, True)
     except ValueError:
         pass
     if not account or not req.headers.get('x-web-mode'):
         return req.get_response(self.app)
     if not obj:
         req.query_string = 'format=json'
     resp = req.get_response(self.app)
     if resp.content_type == 'application/json':
         listing = json.loads(resp.body)
         template = self.get_template(req, account, container)
         if template:
             ctx = {
                 'account': account,
                 'container': container,
                 'listing': listing,
             }
             if container:
                 index = [o for o in listing if o['name'] == 'index.html']
                 if index:
                     headers = {'Location': '/v1/%s/%s/index.html' %
                                (account, container)}
                     return HTTPSeeOther(headers=headers)
             return Response(body=template.render(**ctx))
         else:
             index = [o for o in listing if o['name'] == 'index.html']
             if index:
                 headers = {'Location': '/v1/%s/%s/index.html' %
                            (account, container)}
                 return HTTPSeeOther(headers=headers)
     return resp
Esempio n. 23
0
File: slo.py Progetto: bkolli/swift
    def get_or_head_response(self, req, resp_headers, resp_iter):
        with closing_if_possible(resp_iter):
            resp_body = ''.join(resp_iter)
        try:
            segments = json.loads(resp_body)
        except ValueError:
            segments = []

        etag = md5()
        content_length = 0
        for seg_dict in segments:
            if seg_dict.get('range'):
                etag.update('%s:%s;' % (seg_dict['hash'], seg_dict['range']))
            else:
                etag.update(seg_dict['hash'])

            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(
                    seg_dict, logger=self.slo.logger)
            content_length += self._segment_length(seg_dict)

        response_headers = [(h, v) for h, v in resp_headers
                            if h.lower() not in ('etag', 'content-length')]
        response_headers.append(('Content-Length', str(content_length)))
        response_headers.append(('Etag', '"%s"' % etag.hexdigest()))

        if req.method == 'HEAD':
            return self._manifest_head_response(req, response_headers)
        else:
            return self._manifest_get_response(
                req, content_length, response_headers, segments)
Esempio n. 24
0
    def _listing_pages_iter(self, account_name, lcontainer, lprefix, env):
        marker = ''
        while True:
            lreq = make_pre_authed_request(
                env, method='GET', swift_source='VW',
                path='/v1/%s/%s' % (account_name, lcontainer))
            lreq.environ['QUERY_STRING'] = \
                'format=json&prefix=%s&marker=%s' % (quote(lprefix),
                                                     quote(marker))
            lresp = lreq.get_response(self.app)
            if not is_success(lresp.status_int):
                if lresp.status_int == HTTP_NOT_FOUND:
                    raise ListingIterNotFound()
                elif is_client_error(lresp.status_int):
                    raise HTTPPreconditionFailed()
                else:
                    raise ListingIterError()

            if not lresp.body:
                break

            sublisting = json.loads(lresp.body)
            if not sublisting:
                break
            marker = sublisting[-1]['name'].encode('utf-8')
            yield sublisting
Esempio n. 25
0
File: slo.py Progetto: pchng/swift
    def get_slo_segments(self, obj_name, req):
        """
        Performs a swob.Request and returns the SLO manifest's segments.

        :raises HTTPServerError: on unable to load obj_name or
                                 on unable to load the SLO manifest data.
        :raises HTTPBadRequest: on not an SLO manifest
        :raises HTTPNotFound: on SLO manifest not found
        :returns: SLO manifest's segments
        """
        vrs, account, _junk = req.split_path(2, 3, True)
        new_env = req.environ.copy()
        new_env["REQUEST_METHOD"] = "GET"
        del (new_env["wsgi.input"])
        new_env["QUERY_STRING"] = "multipart-manifest=get"
        new_env["CONTENT_LENGTH"] = 0
        new_env["HTTP_USER_AGENT"] = "%s MultipartDELETE" % new_env.get("HTTP_USER_AGENT")
        new_env["swift.source"] = "SLO"
        new_env["PATH_INFO"] = ("/%s/%s/%s" % (vrs, account, obj_name.lstrip("/"))).encode("utf-8")
        resp = Request.blank("", new_env).get_response(self.app)

        if resp.is_success:
            if config_true_value(resp.headers.get("X-Static-Large-Object")):
                try:
                    return json.loads(resp.body)
                except ValueError:
                    raise HTTPServerError("Unable to load SLO manifest")
            else:
                raise HTTPBadRequest("Not an SLO manifest")
        elif resp.status_int == HTTP_NOT_FOUND:
            raise HTTPNotFound("SLO manifest not found")
        elif resp.status_int == HTTP_UNAUTHORIZED:
            raise HTTPUnauthorized("401 Unauthorized")
        else:
            raise HTTPServerError("Unable to load SLO manifest or segment.")
Esempio n. 26
0
 def _handle_sync_response(self, node, response, info, broker, http,
                           different_region=False):
     if response.status == HTTP_NOT_FOUND:  # completely missing, rsync
         self.stats['rsync'] += 1
         self.logger.increment('rsyncs')
         return self._rsync_db(broker, node, http, info['id'],
                               different_region=different_region)
     elif response.status == HTTP_INSUFFICIENT_STORAGE:
         raise DriveNotMounted()
     elif response.status >= 200 and response.status < 300:
         rinfo = json.loads(response.data)
         local_sync = broker.get_sync(rinfo['id'], incoming=False)
         if self._in_sync(rinfo, info, broker, local_sync):
             return True
         # if the difference in rowids between the two differs by
         # more than 50%, rsync then do a remote merge.
         if rinfo['max_row'] / float(info['max_row']) < 0.5:
             self.stats['remote_merge'] += 1
             self.logger.increment('remote_merges')
             return self._rsync_db(broker, node, http, info['id'],
                                   replicate_method='rsync_then_merge',
                                   replicate_timeout=(info['count'] / 2000),
                                   different_region=different_region)
         # else send diffs over to the remote server
         return self._usync_db(max(rinfo['point'], local_sync),
                               broker, http, rinfo['id'], info['id'])
Esempio n. 27
0
    def test_no_attributes_in_request_con_scope(self):
        req = Request.blank(
            '/v1/TEST_acc1/TEST_con1',
            environ={'REQUEST_METHOD': 'GET',
            'HTTP_X_TIMESTAMP': '0'}, headers={'format': 'json'})
        resp = req.get_response(self.controller)
        self.assert_(resp.status.startswith('200'))
        testList = json.loads(resp.body)
        self.assertEquals(len(testList), 4)

        testDict = testList[0]
        self.assert_('/TEST_acc1' in testDict)
        metaReturned = testDict['/TEST_acc1']
        self.assertEquals(metaReturned['account_uri'], '/TEST_acc1')

        testDict = testList[1]
        self.assert_('/TEST_acc1/TEST_con1' in testDict)
        metaReturned = testDict['/TEST_acc1/TEST_con1']
        self.assertEquals(
            metaReturned['container_uri'], '/TEST_acc1/TEST_con1')

        testDict = testList[2]
        self.assert_('/TEST_acc1/TEST_con1/TEST_obj1' in testDict)
        metaReturned = testDict['/TEST_acc1/TEST_con1/TEST_obj1']
        self.assertEquals(
            metaReturned['object_uri'], '/TEST_acc1/TEST_con1/TEST_obj1')

        testDict = testList[3]
        self.assert_('/TEST_acc1/TEST_con1/TEST_obj2' in testDict)
        metaReturned = testDict['/TEST_acc1/TEST_con1/TEST_obj2']
        self.assertEquals(
            metaReturned['object_uri'], '/TEST_acc1/TEST_con1/TEST_obj2')
Esempio n. 28
0
 def test_handle_multipart_put_check_data_bad(self):
     bad_data = json.dumps(
         [{'path': '/c/a_1', 'etag': 'a', 'size_bytes': '1'},
          {'path': '/c/a_2', 'etag': 'a', 'size_bytes': '1'},
          {'path': '/d/b_2', 'etag': 'b', 'size_bytes': '2'},
          {'path': '/d/slob', 'etag': 'a', 'size_bytes': '2'}])
     req = Request.blank(
         '/test_good/A/c/man?multipart-manifest=put',
         environ={'REQUEST_METHOD': 'PUT'},
         headers={'Accept': 'application/json'},
         body=bad_data)
     try:
         self.slo.handle_multipart_put(req, fake_start_response)
     except HTTPException as e:
         self.assertEquals(self.app.calls, 4)
         data = json.loads(e.body)
         errors = data['Errors']
         self.assertEquals(errors[0][0], '/c/a_1')
         self.assertEquals(errors[0][1], 'Size Mismatch')
         self.assertEquals(errors[2][0], '/c/a_2')
         self.assertEquals(errors[2][1], '400 Bad Request')
         self.assertEquals(errors[4][0], '/d/b_2')
         self.assertEquals(errors[4][1], 'Etag Mismatch')
         self.assertEquals(errors[-1][0], '/d/slob')
         self.assertEquals(errors[-1][1], 'Etag Mismatch')
     else:
         self.assert_(False)
Esempio n. 29
0
    def test_extract_tar_works(self):
        # On systems where $TMPDIR is long (like OS X), we need to do this
        # or else every upload will fail due to the path being too long.
        self.app.max_pathlen += len(self.testdir)
        for compress_format in ['', 'gz', 'bz2']:
            base_name = 'base_works_%s' % compress_format
            dir_tree = [
                {base_name: [{'sub_dir1': ['sub1_file1', 'sub1_file2']},
                             {'sub_dir2': ['sub2_file1', u'test obj \u2661']},
                             'sub_file1',
                             {'sub_dir3': [{'sub4_dir1': '../sub4 file1'}]},
                             {'sub_dir4': None},
                             ]}]

            build_dir_tree(self.testdir, dir_tree)
            mode = 'w'
            extension = ''
            if compress_format:
                mode += ':' + compress_format
                extension += '.' + compress_format
            tar = tarfile.open(name=os.path.join(self.testdir,
                                                 'tar_works.tar' + extension),
                               mode=mode)
            tar.add(os.path.join(self.testdir, base_name))
            tar.close()
            req = Request.blank('/tar_works/acc/cont/')
            req.environ['wsgi.input'] = open(
                os.path.join(self.testdir, 'tar_works.tar' + extension))
            req.headers['transfer-encoding'] = 'chunked'
            resp_body = self.handle_extract_and_iter(req, compress_format)
            resp_data = json.loads(resp_body)
            self.assertEquals(resp_data['Number Files Created'], 6)

            # test out xml
            req = Request.blank('/tar_works/acc/cont/')
            req.environ['wsgi.input'] = open(
                os.path.join(self.testdir, 'tar_works.tar' + extension))
            req.headers['transfer-encoding'] = 'chunked'
            resp_body = self.handle_extract_and_iter(
                req, compress_format, 'application/xml')
            self.assert_('<response_status>201 Created</response_status>' in
                         resp_body)
            self.assert_('<number_files_created>6</number_files_created>' in
                         resp_body)

            # test out nonexistent format
            req = Request.blank('/tar_works/acc/cont/?extract-archive=tar',
                                headers={'Accept': 'good_xml'})
            req.environ['REQUEST_METHOD'] = 'PUT'
            req.environ['wsgi.input'] = open(
                os.path.join(self.testdir, 'tar_works.tar' + extension))
            req.headers['transfer-encoding'] = 'chunked'

            def fake_start_response(*args, **kwargs):
                pass

            app_iter = self.bulk(req.environ, fake_start_response)
            resp_body = ''.join([i for i in app_iter])

            self.assert_('Response Status: 406' in resp_body)
Esempio n. 30
0
File: slo.py Progetto: bkolli/swift
    def _fetch_sub_slo_segments(self, req, version, acc, con, obj):
        """
        Fetch the submanifest, parse it, and return it.
        Raise exception on failures.
        """
        sub_req = make_subrequest(
            req.environ, path='/'.join(['', version, acc, con, obj]),
            method='GET',
            headers={'x-auth-token': req.headers.get('x-auth-token')},
            agent=('%(orig)s ' + 'SLO MultipartGET'), swift_source='SLO')
        sub_resp = sub_req.get_response(self.slo.app)

        if not is_success(sub_resp.status_int):
            close_if_possible(sub_resp.app_iter)
            raise ListingIterError(
                'ERROR: while fetching %s, GET of submanifest %s '
                'failed with status %d' % (req.path, sub_req.path,
                                           sub_resp.status_int))

        try:
            with closing_if_possible(sub_resp.app_iter):
                return json.loads(''.join(sub_resp.app_iter))
        except ValueError as err:
            raise ListingIterError(
                'ERROR: while fetching %s, JSON-decoding of submanifest %s '
                'failed with %s' % (req.path, sub_req.path, err))
Esempio n. 31
0
 def update_metadata(self, metadata_updates):
     """
     Updates the metadata dict for the database. The metadata dict values
     are tuples of (value, timestamp) where the timestamp indicates when
     that key was set to that value. Key/values will only be overwritten if
     the timestamp is newer. To delete a key, set its value to ('',
     timestamp). These empty keys will eventually be removed by
     :func:`reclaim`
     """
     old_metadata = self.metadata
     if set(metadata_updates).issubset(set(old_metadata)):
         for key, (value, timestamp) in metadata_updates.iteritems():
             if timestamp > old_metadata[key][1]:
                 break
         else:
             return
     with self.get() as conn:
         try:
             md = conn.execute('SELECT metadata FROM %s_stat' %
                               self.db_type).fetchone()[0]
             md = json.loads(md) if md else {}
             utf8encodekeys(md)
         except sqlite3.OperationalError as err:
             if 'no such column: metadata' not in str(err):
                 raise
             conn.execute("""
                 ALTER TABLE %s_stat
                 ADD COLUMN metadata TEXT DEFAULT '' """ % self.db_type)
             md = {}
         for key, value_timestamp in metadata_updates.iteritems():
             value, timestamp = value_timestamp
             if key not in md or timestamp > md[key][1]:
                 md[key] = value_timestamp
         conn.execute('UPDATE %s_stat SET metadata = ?' % self.db_type,
                      (json.dumps(md), ))
         conn.commit()
Esempio n. 32
0
 def test_extract_tar_fail_max_cont(self):
     dir_tree = [{
         'sub_dir1': ['sub1_file1']
     }, {
         'sub_dir2': ['sub2_file1', 'sub2_file2']
     }, 'f' * 101, {
         'sub_dir3': [{
             'sub4_dir1': 'sub4_file1'
         }]
     }]
     self.build_tar(dir_tree)
     with patch.object(self.bulk, 'max_containers', 1):
         self.app.calls = 0
         body = open(os.path.join(self.testdir, 'tar_fails.tar')).read()
         req = Request.blank('/tar_works_cont_head_fail/acc/',
                             body=body,
                             headers={'Accept': 'application/json'})
         req.headers['transfer-encoding'] = 'chunked'
         resp_body = self.handle_extract_and_iter(req, '')
         self.assertEquals(self.app.calls, 5)
         resp_data = json.loads(resp_body)
         self.assertEquals(resp_data['Response Status'], '400 Bad Request')
         self.assertEquals(resp_data['Response Body'],
                           'More than 1 containers to create from tar.')
    def test_direct_get_container(self):
        headers = HeaderKeyDict({'key': 'value'})
        body = '[{"hash": "8f4e3", "last_modified": "317260", "bytes": 209}]'

        with mocked_http_conn(200, headers, body) as conn:
            resp_headers, resp = direct_client.direct_get_container(
                self.node,
                self.part,
                self.account,
                self.container,
                marker='marker',
                prefix='prefix',
                delimiter='delimiter',
                limit=1000)

        self.assertEqual(conn.req_headers['user-agent'],
                         'direct-client %s' % os.getpid())
        self.assertEqual(headers, resp_headers)
        self.assertEqual(json.loads(body), resp)
        self.assertTrue('marker=marker' in conn.query_string)
        self.assertTrue('delimiter=delimiter' in conn.query_string)
        self.assertTrue('limit=1000' in conn.query_string)
        self.assertTrue('prefix=prefix' in conn.query_string)
        self.assertTrue('format=json' in conn.query_string)
Esempio n. 34
0
    def GET(self, req):
        """
        Handle GET Bucket (List Objects) request
        """
        max_keys = req.get_validated_param(
            'max-keys', self.conf.max_bucket_listing)
        tag_max_keys = max_keys
        # TODO: Separate max_bucket_listing and default_bucket_listing
        max_keys = min(max_keys, self.conf.max_bucket_listing)

        encoding_type, query, listing_type, fetch_owner = \
            self._parse_request_options(req, max_keys)

        resp = req.get_response(self.app, query=query)

        objects = json.loads(resp.body)

        is_truncated = max_keys > 0 and len(objects) > max_keys
        objects = objects[:max_keys]

        if listing_type == 'object-versions':
            elem = self._build_versions_result(req, objects, is_truncated)
        elif listing_type == 'version-2':
            elem = self._build_list_bucket_result_type_two(
                req, objects, is_truncated)
        else:
            elem = self._build_list_bucket_result_type_one(
                req, objects, encoding_type, is_truncated)
        self._finish_result(
            req, elem, tag_max_keys, encoding_type, is_truncated)
        self._add_objects_to_result(
            req, elem, objects, encoding_type, listing_type, fetch_owner)

        body = tostring(elem)

        return HTTPOk(body=body, content_type='application/xml')
Esempio n. 35
0
    def GET(self, req):
        """
        Handles List Multipart Uploads
        """
        def filter_max_uploads(o):
            name = o.get('name', '')
            return name.count('/') == 1

        encoding_type = req.params.get('encoding-type')
        if encoding_type is not None and encoding_type != 'url':
            err_msg = 'Invalid Encoding Method specified in Request'
            raise InvalidArgument('encoding-type', encoding_type, err_msg)

        # TODO: add support for delimiter query.

        keymarker = req.params.get('key-marker', '')
        uploadid = req.params.get('upload-id-marker', '')
        maxuploads = req.get_validated_param('max-uploads',
                                             DEFAULT_MAX_UPLOADS,
                                             DEFAULT_MAX_UPLOADS)

        query = {
            'format': 'json',
            'limit': maxuploads + 1,
        }

        if uploadid and keymarker:
            query.update({'marker': '%s/%s' % (keymarker, uploadid)})
        elif keymarker:
            query.update({'marker': '%s/~' % (keymarker)})
        if 'prefix' in req.params:
            query.update({'prefix': req.params['prefix']})

        container = req.container_name + MULTIUPLOAD_SUFFIX
        resp = req.get_response(self.app, container=container, query=query)
        objects = json.loads(resp.body)

        objects = filter(filter_max_uploads, objects)

        if len(objects) > maxuploads:
            objects = objects[:maxuploads]
            truncated = True
        else:
            truncated = False

        uploads = []
        prefixes = []
        for o in objects:
            obj, upid = split_path('/' + o['name'], 1, 2)
            uploads.append({
                'key': obj,
                'upload_id': upid,
                'last_modified': o['last_modified']
            })

        nextkeymarker = ''
        nextuploadmarker = ''
        if len(uploads) > 1:
            nextuploadmarker = uploads[-1]['upload_id']
            nextkeymarker = uploads[-1]['key']

        result_elem = Element('ListMultipartUploadsResult')
        SubElement(result_elem, 'Bucket').text = req.container_name
        SubElement(result_elem, 'KeyMarker').text = keymarker
        SubElement(result_elem, 'UploadIdMarker').text = uploadid
        SubElement(result_elem, 'NextKeyMarker').text = nextkeymarker
        SubElement(result_elem, 'NextUploadIdMarker').text = nextuploadmarker
        if 'prefix' in req.params:
            SubElement(result_elem, 'Prefix').text = req.params['prefix']
        SubElement(result_elem, 'MaxUploads').text = str(maxuploads)
        if encoding_type is not None:
            SubElement(result_elem, 'EncodingType').text = encoding_type
        SubElement(result_elem, 'IsTruncated').text = \
            'true' if truncated else 'false'

        # TODO: don't show uploads which are initiated before this bucket is
        # created.
        for u in uploads:
            upload_elem = SubElement(result_elem, 'Upload')
            SubElement(upload_elem, 'Key').text = u['key']
            SubElement(upload_elem, 'UploadId').text = u['upload_id']
            initiator_elem = SubElement(upload_elem, 'Initiator')
            SubElement(initiator_elem, 'ID').text = req.user_id
            SubElement(initiator_elem, 'DisplayName').text = req.user_id
            owner_elem = SubElement(upload_elem, 'Owner')
            SubElement(owner_elem, 'ID').text = req.user_id
            SubElement(owner_elem, 'DisplayName').text = req.user_id
            SubElement(upload_elem, 'StorageClass').text = 'STANDARD'
            SubElement(upload_elem, 'Initiated').text = \
                u['last_modified'][:-3] + 'Z'

        for p in prefixes:
            elem = SubElement(result_elem, 'CommonPrefixes')
            SubElement(elem, 'Prefix').text = p

        body = tostring(result_elem, encoding_type=encoding_type)

        return HTTPOk(body=body, content_type='application/xml')
Esempio n. 36
0
    def handle_object(self, env, start_response):
        """
        Handles a possible static web request for an object. This object could
        resolve into an index or listing request.

        :param env: The original WSGI environment dict.
        :param start_response: The original WSGI start_response hook.
        """
        tmp_env = dict(env)
        tmp_env['HTTP_USER_AGENT'] = \
            '%s StaticWeb' % env.get('HTTP_USER_AGENT')
        tmp_env['swift.source'] = 'SW'
        resp = self._app_call(tmp_env)
        status_int = self._get_status_int()
        if is_success(status_int) or is_redirection(status_int):
            start_response(self._response_status, self._response_headers,
                           self._response_exc_info)
            return resp
        if status_int != HTTP_NOT_FOUND:
            return self._error_response(resp, env, start_response)
        self._get_container_info(env)
        if not self._listings and not self._index:
            return self.app(env, start_response)
        status_int = HTTP_NOT_FOUND
        if self._index:
            tmp_env = dict(env)
            tmp_env['HTTP_USER_AGENT'] = \
                '%s StaticWeb' % env.get('HTTP_USER_AGENT')
            tmp_env['swift.source'] = 'SW'
            if tmp_env['PATH_INFO'][-1] != '/':
                tmp_env['PATH_INFO'] += '/'
            tmp_env['PATH_INFO'] += self._index
            resp = self._app_call(tmp_env)
            status_int = self._get_status_int()
            if is_success(status_int) or is_redirection(status_int):
                if env['PATH_INFO'][-1] != '/':
                    resp = HTTPMovedPermanently(location=env['PATH_INFO'] +
                                                '/')
                    return resp(env, start_response)
                start_response(self._response_status, self._response_headers,
                               self._response_exc_info)
                return resp
        if status_int == HTTP_NOT_FOUND:
            if env['PATH_INFO'][-1] != '/':
                tmp_env = make_pre_authed_env(
                    env,
                    'GET',
                    '/%s/%s/%s' % (self.version, self.account, self.container),
                    self.agent,
                    swift_source='SW')
                tmp_env['QUERY_STRING'] = 'limit=1&format=json&delimiter' \
                    '=/&limit=1&prefix=%s' % quote(self.obj + '/')
                resp = self._app_call(tmp_env)
                body = ''.join(resp)
                if not is_success(self._get_status_int()) or not body or \
                        not json.loads(body):
                    resp = HTTPNotFound()(env, self._start_response)
                    return self._error_response(resp, env, start_response)
                resp = HTTPMovedPermanently(location=env['PATH_INFO'] + '/')
                return resp(env, start_response)
            return self._listing(env, start_response, self.obj)
Esempio n. 37
0
    def POST(self, req):
        """
        Handles Complete Multipart Upload.
        """
        upload_id = req.params['uploadId']
        resp = _get_upload_info(req, self.app, upload_id)
        headers = {}
        for key, val in resp.headers.iteritems():
            _key = key.lower()
            if _key.startswith('x-amz-meta-'):
                headers['x-object-meta-' + _key[11:]] = val

        # Query for the objects in the segments area to make sure it completed
        query = {
            'format': 'json',
            'prefix': '%s/%s/' % (req.object_name, upload_id),
            'delimiter': '/'
        }

        container = req.container_name + MULTIUPLOAD_SUFFIX
        resp = req.get_response(self.app, 'GET', container, '', query=query)
        objinfo = json.loads(resp.body)
        objtable = dict((o['name'], {
            'path': '/'.join(['', container, o['name']]),
            'etag': o['hash'],
            'size_bytes': o['bytes']
        }) for o in objinfo)

        manifest = []
        previous_number = 0
        try:
            xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE)
            complete_elem = fromstring(xml, 'CompleteMultipartUpload')
            for part_elem in complete_elem.iterchildren('Part'):
                part_number = int(part_elem.find('./PartNumber').text)

                if part_number <= previous_number:
                    raise InvalidPartOrder(upload_id=upload_id)
                previous_number = part_number

                etag = part_elem.find('./ETag').text
                if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"':
                    # strip double quotes
                    etag = etag[1:-1]

                info = objtable.get("%s/%s/%s" %
                                    (req.object_name, upload_id, part_number))
                if info is None or info['etag'] != etag:
                    raise InvalidPart(upload_id=upload_id,
                                      part_number=part_number)

                manifest.append(info)
        except (XMLSyntaxError, DocumentInvalid):
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            LOGGER.error(e)
            raise

        try:
            # TODO: add support for versioning
            resp = req.get_response(self.app,
                                    'PUT',
                                    body=json.dumps(manifest),
                                    query={'multipart-manifest': 'put'},
                                    headers=headers)
        except BadSwiftRequest as e:
            msg = str(e)
            if msg.startswith('Each segment, except the last, '
                              'must be at least '):
                # FIXME: AWS S3 allows a smaller object than 5 MB if there is
                # only one part.  Use a COPY request to copy the part object
                # from the segments container instead.
                raise EntityTooSmall(msg)
            else:
                raise

        obj = '%s/%s' % (req.object_name, upload_id)
        req.get_response(self.app, 'DELETE', container, obj)

        result_elem = Element('CompleteMultipartUploadResult')
        SubElement(result_elem, 'Location').text = req.host_url + req.path
        SubElement(result_elem, 'Bucket').text = req.container_name
        SubElement(result_elem, 'Key').text = req.object_name
        SubElement(result_elem, 'ETag').text = resp.etag

        resp.body = tostring(result_elem)
        resp.status = 200
        resp.content_type = "application/xml"

        return resp
Esempio n. 38
0
    def GETorHEAD(self, req):
        """
        Handled GET or HEAD request on a part of a multipart object.
        """
        part_number = self.parse_part_number(req)

        had_match = False
        for match_header in ('if-match', 'if-none-match'):
            if match_header not in req.headers:
                continue
            had_match = True
            for value in list_from_csv(req.headers[match_header]):
                if value.startswith('"') and value.endswith('"'):
                    value = value[1:-1]
                if value.endswith('-N'):
                    # Deal with fake S3-like etags for SLOs uploaded via Swift
                    req.headers[match_header] += ', ' + value[:-2]

        if had_match:
            # Update where to look
            update_etag_is_at_header(req, sysmeta_header('object', 'etag'))

        # Get the list of parts. Must be raw to get all response headers.
        slo_resp = req.get_response(self.app,
                                    'GET',
                                    req.container_name,
                                    req.object_name,
                                    query={
                                        'multipart-manifest': 'get',
                                        'format': 'raw'
                                    })

        # Check if the object is really a SLO. If not, and user asked
        # for the first part, do a regular request.
        if 'X-Static-Large-Object' not in slo_resp.sw_headers:
            if part_number == 1:
                if slo_resp.is_success and req.method == 'HEAD':
                    # Clear body
                    slo_resp.body = ''
                return slo_resp
            else:
                close_if_possible(slo_resp.app_iter)
                raise InvalidRange()

        # Locate the part
        slo = json.loads(slo_resp.body)
        try:
            part = slo[part_number - 1]
        except IndexError:
            raise InvalidRange()

        # Redirect the request on the part
        _, req.container_name, req.object_name = part['path'].split('/', 2)
        # XXX enforce container_name and object_name to be <str>
        # or it will rise issues in swift3/requests when merging both
        req.container_name = req.container_name.encode('utf-8')
        req.object_name = req.object_name.encode('utf8')
        # The etag check was performed with the manifest
        if had_match:
            for match_header in ('if-match', 'if-none-match'):
                req.headers.pop(match_header, None)
        resp = req.get_response(self.app)

        # Replace status
        slo_resp.status = resp.status
        # Replace body
        slo_resp.app_iter = resp.app_iter
        # Update with the size of the part
        slo_resp.headers['Content-Length'] = \
            resp.headers.get('Content-Length', 0)
        slo_resp.sw_headers['Content-Length'] = \
            slo_resp.headers['Content-Length']
        # Add the number of parts in this object
        slo_resp.headers['X-Amz-Mp-Parts-Count'] = len(slo)
        return slo_resp
Esempio n. 39
0
    def handle_container_listing(self, env, start_response):
        # This code may be clearer by using Request(env).get_response()
        # instead of self._app_call(env)
        api_vers, account, container_name = split_path(
            env['PATH_INFO'], 3, 3, True)
        sub_env = env.copy()
        orig_container = get_unversioned_container(container_name)
        if orig_container != container_name:
            # Check that container_name is actually the versioning
            # container for orig_container
            sub_env['PATH_INFO'] = '/%s/%s/%s' % (api_vers, account,
                                                  orig_container)
            info = get_container_info(sub_env, self.app,
                                      swift_source='VW')
            vers_loc = info.get('sysmeta', {}).get('versions-location')
            # Sometimes we receive versioned listing requests whereas
            # versioning is not enabled (vers_loc is None or empty).
            if vers_loc and vers_loc != container_name:
                # The container specified in the request ends with the
                # versioning suffix, but user has asked the versions to
                # be saved elsewhere, thus we will consider this as a
                # regular listing request.
                orig_container = container_name

        if orig_container != container_name:
            qs = parse_qs(sub_env.get('QUERY_STRING', ''))
            if 'marker' in qs:
                marker, _ = swift3_split_object_name_version(qs['marker'][0])
                qs['marker'] = [marker]
            if 'prefix' in qs:
                prefix, _ = swift3_split_object_name_version(qs['prefix'][0])
                qs['prefix'] = prefix
            qs['format'] = 'json'
            sub_env['QUERY_STRING'] = urlencode(qs, True)
            sub_env['oio.query'] = {'versions': True}

        resp = super(OioVersionedWritesContext, self).handle_container_request(
            sub_env, lambda x, y, z: None)

        if orig_container != container_name and \
                self._response_status == '200 OK':
            with closing_if_possible(resp):
                versioned_objects = json.loads("".join(resp))

            # Discard the latest version of each object, because it is
            # not supposed to appear in the versioning container.

            # Also keep object prefixes as some of them may be shadowed
            # from the "main" container.
            latest = dict()
            subdirs = []
            for obj in versioned_objects:
                if 'subdir' in obj:
                    subdirs.append(obj)
                    continue
                ver = int(obj.get('version', '0'))
                # An integer is always strictly greater than None
                if ver > latest.get(obj['name']):
                    latest[obj['name']] = ver
            versioned_objects = [
                obj for obj in versioned_objects
                if 'subdir' not in obj
                and (int(obj.get('version', '0')) != latest[obj['name']]
                     or is_deleted(obj))
            ]

            for obj in versioned_objects:
                obj['name'] = swift3_versioned_object_name(
                    obj['name'], obj.get('version', ''))

            versioned_objects += subdirs
            resp = json.dumps(versioned_objects)
            self._response_headers = [x for x in self._response_headers
                                      if x[0] != 'Content-Length']
            self._response_headers.append(('Content-Length', str(len(resp))))

        start_response(self._response_status, self._response_headers,
                       self._response_exc_info)
        return resp
Esempio n. 40
0
    def POST(self, req):
        """
        Handles Complete Multipart Upload.
        """
        upload_id = req.params['uploadId']
        resp = _get_upload_info(req, self.app, upload_id)
        headers = {'Accept': 'application/json'}
        for key, val in resp.headers.items():
            _key = key.lower()
            if _key.startswith('x-amz-meta-'):
                headers['x-object-meta-' + _key[11:]] = val

        hct_header = sysmeta_header('object', 'has-content-type')
        if resp.sysmeta_headers.get(hct_header) == 'yes':
            content_type = resp.sysmeta_headers.get(
                sysmeta_header('object', 'content-type'))
        elif hct_header in resp.sysmeta_headers:
            # has-content-type is present but false, so no content type was
            # set on initial upload. In that case, we won't set one on our
            # PUT request. Swift will end up guessing one based on the
            # object name.
            content_type = None
        else:
            content_type = resp.headers.get('Content-Type')

        if content_type:
            headers['Content-Type'] = content_type

        # Query for the objects in the segments area to make sure it completed
        query = {
            'format': 'json',
            'prefix': '%s/%s/' % (req.object_name, upload_id),
            'delimiter': '/'
        }

        container = req.container_name + MULTIUPLOAD_SUFFIX
        resp = req.get_response(self.app, 'GET', container, '', query=query)
        objinfo = json.loads(resp.body)
        objtable = dict((o['name'], {
            'path': '/'.join(['', container, o['name']]),
            'etag': o['hash'],
            'size_bytes': o['bytes']
        }) for o in objinfo)

        s3_etag_hasher = md5()
        manifest = []
        previous_number = 0
        try:
            xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE)
            if not xml:
                raise InvalidRequest(msg='You must specify at least one part')

            complete_elem = fromstring(xml, 'CompleteMultipartUpload',
                                       self.logger)
            for part_elem in complete_elem.iterchildren('Part'):
                part_number = int(part_elem.find('./PartNumber').text)

                if part_number <= previous_number:
                    raise InvalidPartOrder(upload_id=upload_id)
                previous_number = part_number

                etag = part_elem.find('./ETag').text
                if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"':
                    # strip double quotes
                    etag = etag[1:-1]

                info = objtable.get("%s/%s/%s" %
                                    (req.object_name, upload_id, part_number))
                if info is None or info['etag'] != etag:
                    raise InvalidPart(upload_id=upload_id,
                                      part_number=part_number)

                s3_etag_hasher.update(etag.decode('hex'))
                info['size_bytes'] = int(info['size_bytes'])
                manifest.append(info)
        except (XMLSyntaxError, DocumentInvalid):
            # NB: our schema definitions catch uploads with no parts here
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            self.logger.error(e)
            raise

        s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest))
        headers[sysmeta_header('object', 'etag')] = s3_etag
        # Leave base header value blank; SLO will populate
        c_etag = '; s3_etag=%s' % s3_etag
        headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag

        # Check the size of each segment except the last and make sure they are
        # all more than the minimum upload chunk size
        for info in manifest[:-1]:
            if info['size_bytes'] < self.conf.min_segment_size:
                raise EntityTooSmall()

        def response_iter():
            # NB: XML requires that the XML declaration, if present, be at the
            # very start of the document. Clients *will* call us out on not
            # being valid XML if we pass through whitespace before it.
            # Track whether we've sent anything yet so we can yield out that
            # declaration *first*
            yielded_anything = False

            try:
                try:
                    # TODO: add support for versioning
                    put_resp = req.get_response(self.app,
                                                'PUT',
                                                body=json.dumps(manifest),
                                                query={
                                                    'multipart-manifest':
                                                    'put',
                                                    'heartbeat': 'on'
                                                },
                                                headers=headers)
                    if put_resp.status_int == 202:
                        body = []
                        put_resp.fix_conditional_response()
                        for chunk in put_resp.response_iter:
                            if not chunk.strip():
                                if not yielded_anything:
                                    yield ('<?xml version="1.0" '
                                           'encoding="UTF-8"?>\n')
                                yielded_anything = True
                                yield chunk
                            body.append(chunk)
                        body = json.loads(''.join(body))
                        if body['Response Status'] != '201 Created':
                            raise InvalidRequest(
                                status=body['Response Status'],
                                msg='\n'.join(': '.join(err)
                                              for err in body['Errors']))
                except BadSwiftRequest as e:
                    msg = str(e)
                    expected_msg = ('too small; each segment must be '
                                    'at least 1 byte')
                    if expected_msg in msg:
                        # FIXME: AWS S3 allows a smaller object than 5 MB if
                        # there is only one part.  Use a COPY request to copy
                        # the part object from the segments container instead.
                        raise EntityTooSmall(msg)
                    else:
                        raise

                # clean up the multipart-upload record
                obj = '%s/%s' % (req.object_name, upload_id)
                try:
                    req.get_response(self.app, 'DELETE', container, obj)
                except NoSuchKey:
                    # We know that this existed long enough for us to HEAD
                    pass

                result_elem = Element('CompleteMultipartUploadResult')

                # NOTE: boto with sig v4 appends port to HTTP_HOST value at
                # the request header when the port is non default value and it
                # makes req.host_url like as http://localhost:8080:8080/path
                # that obviously invalid. Probably it should be resolved at
                # swift.common.swob though, tentatively we are parsing and
                # reconstructing the correct host_url info here.
                # in detail, https://github.com/boto/boto/pull/3513
                parsed_url = urlparse(req.host_url)
                host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname)
                if parsed_url.port:
                    host_url += ':%s' % parsed_url.port

                SubElement(result_elem, 'Location').text = host_url + req.path
                SubElement(result_elem, 'Bucket').text = req.container_name
                SubElement(result_elem, 'Key').text = req.object_name
                SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag
                resp.headers.pop('ETag', None)
                if yielded_anything:
                    yield '\n'
                yield tostring(result_elem,
                               xml_declaration=not yielded_anything)
            except ErrorResponse as err_resp:
                if yielded_anything:
                    err_resp.xml_declaration = False
                    yield '\n'
                else:
                    # Oh good, we can still change HTTP status code, too!
                    resp.status = err_resp.status
                for chunk in err_resp({}, lambda *a: None):
                    yield chunk

        resp = HTTPOk()  # assume we're good for now... but see above!
        resp.app_iter = reiterate(response_iter())
        resp.content_type = "application/xml"

        return resp
Esempio n. 41
0
    def GETorHEAD(self, req):
        """Handle HTTP GET or HEAD requests."""
        container_info = self.container_info(
            self.account_name, self.container_name, req)
        req.acl = container_info['read_acl']
        if 'swift.authorize' in req.environ:
            aresp = req.environ['swift.authorize'](req)
            if aresp:
                return aresp

        partition = self.app.object_ring.get_part(
            self.account_name, self.container_name, self.object_name)
        resp = self.GETorHEAD_base(
            req, _('Object'), self.app.object_ring, partition, req.path_info)

        if ';' in resp.headers.get('content-type', ''):
            # strip off swift_bytes from content-type
            content_type, check_extra_meta = \
                resp.headers['content-type'].rsplit(';', 1)
            if check_extra_meta.lstrip().startswith('swift_bytes='):
                resp.content_type = content_type

        large_object = None
        if config_true_value(resp.headers.get('x-static-large-object')) and \
                req.params.get('multipart-manifest') == 'get' and \
                'X-Copy-From' not in req.headers and \
                self.app.allow_static_large_object:
            resp.content_type = 'application/json'

        if config_true_value(resp.headers.get('x-static-large-object')) and \
                req.params.get('multipart-manifest') != 'get' and \
                self.app.allow_static_large_object:
            large_object = 'SLO'
            listing_page1 = ()
            listing = []
            lcontainer = None  # container name is included in listing
            if resp.status_int == HTTP_OK and \
                    req.method == 'GET' and not req.range:
                try:
                    listing = json.loads(resp.body)
                except ValueError:
                    listing = []
            else:
                # need to make a second request to get whole manifest
                new_req = req.copy_get()
                new_req.method = 'GET'
                new_req.range = None
                new_resp = self.GETorHEAD_base(
                    new_req, _('Object'), self.app.object_ring, partition,
                    req.path_info)
                if new_resp.status_int // 100 == 2:
                    try:
                        listing = json.loads(new_resp.body)
                    except ValueError:
                        listing = []
                else:
                    return HTTPServiceUnavailable(
                        "Unable to load SLO manifest", request=req)

        if 'x-object-manifest' in resp.headers and \
                req.params.get('multipart-manifest') != 'get':
            large_object = 'DLO'
            lcontainer, lprefix = \
                resp.headers['x-object-manifest'].split('/', 1)
            lcontainer = unquote(lcontainer)
            lprefix = unquote(lprefix)
            try:
                pages_iter = iter(self._listing_pages_iter(lcontainer, lprefix,
                                                           req.environ))
                listing_page1 = pages_iter.next()
                listing = itertools.chain(listing_page1,
                                          self._remaining_items(pages_iter))
            except ListingIterNotFound:
                return HTTPNotFound(request=req)
            except ListingIterNotAuthorized, err:
                return err.aresp
            except ListingIterError:
                return HTTPServerError(request=req)
Esempio n. 42
0
    def test_get_endpoint(self):
        # Expected results for objects taken from test_ring
        # Expected results for others computed by manually invoking
        # ring.get_nodes().
        resp = Request.blank('/endpoints/a/c/o1').get_response(
            self.list_endpoints)
        self.assertEquals(resp.status_int, 200)
        self.assertEquals(resp.content_type, 'application/json')
        self.assertEquals(json.loads(resp.body), [
            "http://10.1.1.1:6000/sdb1/1/a/c/o1",
            "http://10.1.2.2:6000/sdd1/1/a/c/o1"
        ])

        # Here, 'o1/' is the object name.
        resp = Request.blank('/endpoints/a/c/o1/').get_response(
            self.list_endpoints)
        self.assertEquals(resp.status_int, 200)
        self.assertEquals(json.loads(resp.body), [
            "http://10.1.1.1:6000/sdb1/3/a/c/o1/",
            "http://10.1.2.2:6000/sdd1/3/a/c/o1/"
        ])

        resp = Request.blank('/endpoints/a/c2').get_response(
            self.list_endpoints)
        self.assertEquals(resp.status_int, 200)
        self.assertEquals(json.loads(resp.body), [
            "http://10.1.1.1:6000/sda1/2/a/c2",
            "http://10.1.2.1:6000/sdc1/2/a/c2"
        ])

        resp = Request.blank('/endpoints/a1').get_response(
            self.list_endpoints)
        self.assertEquals(resp.status_int, 200)
        self.assertEquals(json.loads(resp.body), [
            "http://10.1.2.1:6000/sdc1/0/a1",
            "http://10.1.1.1:6000/sda1/0/a1",
            "http://10.1.1.1:6000/sdb1/0/a1"
        ])

        resp = Request.blank('/endpoints/').get_response(
            self.list_endpoints)
        self.assertEquals(resp.status_int, 400)

        resp = Request.blank('/endpoints/a/c 2').get_response(
            self.list_endpoints)
        self.assertEquals(resp.status_int, 200)
        self.assertEquals(json.loads(resp.body), [
            "http://10.1.1.1:6000/sdb1/3/a/c%202",
            "http://10.1.2.2:6000/sdd1/3/a/c%202"
        ])

        resp = Request.blank('/endpoints/a/c%202').get_response(
            self.list_endpoints)
        self.assertEquals(resp.status_int, 200)
        self.assertEquals(json.loads(resp.body), [
            "http://10.1.1.1:6000/sdb1/3/a/c%202",
            "http://10.1.2.2:6000/sdd1/3/a/c%202"
        ])

        resp = Request.blank('/endpoints/ac%20count/con%20tainer/ob%20ject') \
            .get_response(self.list_endpoints)
        self.assertEquals(resp.status_int, 200)
        self.assertEquals(json.loads(resp.body), [
            "http://10.1.1.1:6000/sdb1/3/ac%20count/con%20tainer/ob%20ject",
            "http://10.1.2.2:6000/sdd1/3/ac%20count/con%20tainer/ob%20ject"
        ])

        resp = Request.blank('/endpoints/a/c/o1', {'REQUEST_METHOD': 'POST'}) \
            .get_response(self.list_endpoints)
        self.assertEquals(resp.status_int, 405)
        self.assertEquals(resp.status, '405 Method Not Allowed')
        self.assertEquals(resp.headers['allow'], 'GET')

        resp = Request.blank('/not-endpoints').get_response(
            self.list_endpoints)
        self.assertEquals(resp.status_int, 200)
        self.assertEquals(resp.status, '200 OK')
        self.assertEquals(resp.body, 'FakeApp')

        # test custom path with trailing slash
        custom_path_le = list_endpoints.filter_factory({
            'swift_dir': self.testdir,
            'list_endpoints_path': '/some/another/path/'
        })(self.app)
        resp = Request.blank('/some/another/path/a/c/o1') \
            .get_response(custom_path_le)
        self.assertEquals(resp.status_int, 200)
        self.assertEquals(resp.content_type, 'application/json')
        self.assertEquals(json.loads(resp.body), [
            "http://10.1.1.1:6000/sdb1/1/a/c/o1",
            "http://10.1.2.2:6000/sdd1/1/a/c/o1"
        ])

        # test ustom path without trailing slash
        custom_path_le = list_endpoints.filter_factory({
            'swift_dir': self.testdir,
            'list_endpoints_path': '/some/another/path'
        })(self.app)
        resp = Request.blank('/some/another/path/a/c/o1') \
            .get_response(custom_path_le)
        self.assertEquals(resp.status_int, 200)
        self.assertEquals(resp.content_type, 'application/json')
        self.assertEquals(json.loads(resp.body), [
            "http://10.1.1.1:6000/sdb1/1/a/c/o1",
            "http://10.1.2.2:6000/sdd1/1/a/c/o1"
        ])
Esempio n. 43
0
    def GET(self, req):
        """
        Handle GET Bucket (List Objects) request
        """

        max_keys = req.get_validated_param('max-keys', CONF.max_bucket_listing)
        # TODO: Separate max_bucket_listing and default_bucket_listing
        tag_max_keys = max_keys
        max_keys = min(max_keys, CONF.max_bucket_listing)

        encoding_type = req.params.get('encoding-type')
        if encoding_type is not None and encoding_type != 'url':
            err_msg = 'Invalid Encoding Method specified in Request'
            raise InvalidArgument('encoding-type', encoding_type, err_msg)

        query = {
            'format': 'json',
            'limit': max_keys + 1,
        }
        if 'marker' in req.params:
            query.update({'marker': req.params['marker']})
        if 'prefix' in req.params:
            query.update({'prefix': req.params['prefix']})
        if 'delimiter' in req.params:
            query.update({'delimiter': req.params['delimiter']})

        resp = req.get_response(self.app, query=query)

        objects = json.loads(resp.body)

        elem = Element('ListBucketResult')
        SubElement(elem, 'Name').text = req.container_name
        SubElement(elem, 'Prefix').text = req.params.get('prefix')
        SubElement(elem, 'Marker').text = req.params.get('marker')

        # in order to judge that truncated is valid, check whether
        # max_keys + 1 th element exists in swift.
        is_truncated = max_keys > 0 and len(objects) > max_keys
        objects = objects[:max_keys]

        if is_truncated and 'delimiter' in req.params:
            if 'name' in objects[-1]:
                SubElement(elem, 'NextMarker').text = \
                    objects[-1]['name']
            if 'subdir' in objects[-1]:
                SubElement(elem, 'NextMarker').text = \
                    objects[-1]['subdir']

        SubElement(elem, 'MaxKeys').text = str(tag_max_keys)

        if 'delimiter' in req.params:
            SubElement(elem, 'Delimiter').text = req.params['delimiter']

        if encoding_type is not None:
            SubElement(elem, 'EncodingType').text = encoding_type

        SubElement(elem, 'IsTruncated').text = \
            'true' if is_truncated else 'false'

        for o in objects:
            if 'subdir' not in o:
                contents = SubElement(elem, 'Contents')
                SubElement(contents, 'Key').text = o['name']
                SubElement(contents, 'LastModified').text = \
                    o['last_modified'][:-3] + 'Z'
                SubElement(contents, 'ETag').text = '"%s"' % o['hash']
                SubElement(contents, 'Size').text = str(o['bytes'])
                owner = SubElement(contents, 'Owner')
                SubElement(owner, 'ID').text = req.user_id
                SubElement(owner, 'DisplayName').text = req.user_id
                SubElement(contents, 'StorageClass').text = 'STANDARD'

        for o in objects:
            if 'subdir' in o:
                common_prefixes = SubElement(elem, 'CommonPrefixes')
                SubElement(common_prefixes, 'Prefix').text = o['subdir']

        body = tostring(elem, encoding_type=encoding_type)

        return HTTPOk(body=body, content_type='application/xml')
Esempio n. 44
0
    def handle_object(self, env, start_response):
        """
        Handles a possible static web request for an object. This object could
        resolve into an index or listing request.

        :param env: The original WSGI environment dict.
        :param start_response: The original WSGI start_response hook.
        """
        tmp_env = dict(env)
        tmp_env['HTTP_USER_AGENT'] = \
            '%s StaticWeb' % env.get('HTTP_USER_AGENT')
        tmp_env['swift.source'] = 'SW'
        resp = self._app_call(tmp_env)
        status_int = self._get_status_int()
        self._get_container_info(env)
        if is_success(status_int) or is_redirection(status_int):
            # Treat directory marker objects as not found
            if not self._dir_type:
                self._dir_type = 'application/directory'
            content_length = self._response_header_value('content-length')
            content_length = int(content_length) if content_length else 0
            if self._response_header_value('content-type') == self._dir_type \
                    and content_length <= 1:
                status_int = HTTP_NOT_FOUND
            else:
                start_response(self._response_status, self._response_headers,
                               self._response_exc_info)
                return resp
        if status_int != HTTP_NOT_FOUND:
            # Retaining the previous code's behavior of not using custom error
            # pages for non-404 errors.
            self._error = None
            return self._error_response(resp, env, start_response)
        if not self._listings and not self._index:
            start_response(self._response_status, self._response_headers,
                           self._response_exc_info)
            return resp
        status_int = HTTP_NOT_FOUND
        if self._index:
            tmp_env = dict(env)
            tmp_env['HTTP_USER_AGENT'] = \
                '%s StaticWeb' % env.get('HTTP_USER_AGENT')
            tmp_env['swift.source'] = 'SW'
            if tmp_env['PATH_INFO'][-1] != '/':
                tmp_env['PATH_INFO'] += '/'
            tmp_env['PATH_INFO'] += self._index
            resp = self._app_call(tmp_env)
            status_int = self._get_status_int()
            if is_success(status_int) or is_redirection(status_int):
                if env['PATH_INFO'][-1] != '/':
                    resp = HTTPMovedPermanently(location=env['PATH_INFO'] +
                                                '/')
                    return resp(env, start_response)
                start_response(self._response_status, self._response_headers,
                               self._response_exc_info)
                return resp
        if status_int == HTTP_NOT_FOUND:
            if env['PATH_INFO'][-1] != '/':
                tmp_env = make_pre_authed_env(
                    env,
                    'GET',
                    '/%s/%s/%s' % (self.version, self.account, self.container),
                    self.agent,
                    swift_source='SW')
                tmp_env['QUERY_STRING'] = 'limit=1&format=json&delimiter' \
                    '=/&limit=1&prefix=%s' % quote(self.obj + '/')
                resp = self._app_call(tmp_env)
                body = ''.join(resp)
                if not is_success(self._get_status_int()) or not body or \
                        not json.loads(body):
                    resp = HTTPNotFound()(env, self._start_response)
                    return self._error_response(resp, env, start_response)
                resp = HTTPMovedPermanently(location=env['PATH_INFO'] + '/')
                return resp(env, start_response)
            return self._listing(env, start_response, self.obj)
Esempio n. 45
0
    def GET(self, req):
        """
        Handle GET Bucket (List Objects) request
        """

        max_keys = req.get_validated_param('max-keys',
                                           self.conf.max_bucket_listing)
        # TODO: Separate max_bucket_listing and default_bucket_listing
        tag_max_keys = max_keys
        max_keys = min(max_keys, self.conf.max_bucket_listing)

        encoding_type = req.params.get('encoding-type')
        if encoding_type is not None and encoding_type != 'url':
            err_msg = 'Invalid Encoding Method specified in Request'
            raise InvalidArgument('encoding-type', encoding_type, err_msg)

        query = {
            'format': 'json',
            'limit': max_keys + 1,
        }
        if 'prefix' in req.params:
            query.update({'prefix': req.params['prefix']})
        if 'delimiter' in req.params:
            query.update({'delimiter': req.params['delimiter']})
        fetch_owner = False
        if 'versions' in req.params:
            listing_type = 'object-versions'
            if 'key-marker' in req.params:
                query.update({'marker': req.params['key-marker']})
            elif 'version-id-marker' in req.params:
                err_msg = ('A version-id marker cannot be specified without '
                           'a key marker.')
                raise InvalidArgument('version-id-marker',
                                      req.params['version-id-marker'], err_msg)
        elif int(req.params.get('list-type', '1')) == 2:
            listing_type = 'version-2'
            if 'start-after' in req.params:
                query.update({'marker': req.params['start-after']})
            # continuation-token overrides start-after
            if 'continuation-token' in req.params:
                decoded = b64decode(req.params['continuation-token'])
                query.update({'marker': decoded})
            if 'fetch-owner' in req.params:
                fetch_owner = config_true_value(req.params['fetch-owner'])
        else:
            listing_type = 'version-1'
            if 'marker' in req.params:
                query.update({'marker': req.params['marker']})

        resp = req.get_response(self.app, query=query)

        objects = json.loads(resp.body)

        # in order to judge that truncated is valid, check whether
        # max_keys + 1 th element exists in swift.
        is_truncated = max_keys > 0 and len(objects) > max_keys
        objects = objects[:max_keys]

        if listing_type == 'object-versions':
            elem = Element('ListVersionsResult')
            SubElement(elem, 'Name').text = req.container_name
            SubElement(elem, 'Prefix').text = req.params.get('prefix')
            SubElement(elem, 'KeyMarker').text = req.params.get('key-marker')
            SubElement(
                elem,
                'VersionIdMarker').text = req.params.get('version-id-marker')
            if is_truncated:
                if 'name' in objects[-1]:
                    SubElement(elem, 'NextKeyMarker').text = \
                        objects[-1]['name']
                if 'subdir' in objects[-1]:
                    SubElement(elem, 'NextKeyMarker').text = \
                        objects[-1]['subdir']
                SubElement(elem, 'NextVersionIdMarker').text = 'null'
        else:
            elem = Element('ListBucketResult')
            SubElement(elem, 'Name').text = req.container_name
            SubElement(elem, 'Prefix').text = req.params.get('prefix')
            if listing_type == 'version-1':
                SubElement(elem, 'Marker').text = req.params.get('marker')
                if is_truncated and 'delimiter' in req.params:
                    if 'name' in objects[-1]:
                        name = objects[-1]['name']
                    else:
                        name = objects[-1]['subdir']
                    if encoding_type == 'url':
                        name = quote(name)
                    SubElement(elem, 'NextMarker').text = name
            elif listing_type == 'version-2':
                if is_truncated:
                    if 'name' in objects[-1]:
                        SubElement(elem, 'NextContinuationToken').text = \
                            b64encode(objects[-1]['name'].encode('utf8'))
                    if 'subdir' in objects[-1]:
                        SubElement(elem, 'NextContinuationToken').text = \
                            b64encode(objects[-1]['subdir'].encode('utf8'))
                if 'continuation-token' in req.params:
                    SubElement(elem, 'ContinuationToken').text = \
                        req.params['continuation-token']
                if 'start-after' in req.params:
                    SubElement(elem, 'StartAfter').text = \
                        req.params['start-after']
                SubElement(elem, 'KeyCount').text = str(len(objects))

        SubElement(elem, 'MaxKeys').text = str(tag_max_keys)

        if 'delimiter' in req.params:
            SubElement(elem, 'Delimiter').text = req.params['delimiter']

        if encoding_type == 'url':
            SubElement(elem, 'EncodingType').text = encoding_type

        SubElement(elem, 'IsTruncated').text = \
            'true' if is_truncated else 'false'

        for o in objects:
            if 'subdir' not in o:
                name = o['name']
                if encoding_type == 'url':
                    name = quote(name.encode('utf-8'))

                if listing_type == 'object-versions':
                    contents = SubElement(elem, 'Version')
                    SubElement(contents, 'Key').text = name
                    SubElement(contents, 'VersionId').text = 'null'
                    SubElement(contents, 'IsLatest').text = 'true'
                else:
                    contents = SubElement(elem, 'Contents')
                    SubElement(contents, 'Key').text = name
                SubElement(contents, 'LastModified').text = \
                    o['last_modified'][:-3] + 'Z'
                if 's3_etag' in o:
                    # New-enough MUs are already in the right format
                    etag = o['s3_etag']
                elif 'slo_etag' in o:
                    # SLOs may be in something *close* to the MU format
                    etag = '"%s-N"' % o['slo_etag'].strip('"')
                else:
                    # Normal objects just use the MD5
                    etag = '"%s"' % o['hash']
                    # This also catches sufficiently-old SLOs, but we have
                    # no way to identify those from container listings
                SubElement(contents, 'ETag').text = etag
                SubElement(contents, 'Size').text = str(o['bytes'])
                if fetch_owner or listing_type != 'version-2':
                    owner = SubElement(contents, 'Owner')
                    SubElement(owner, 'ID').text = req.user_id
                    SubElement(owner, 'DisplayName').text = req.user_id
                SubElement(contents, 'StorageClass').text = 'STANDARD'

        for o in objects:
            if 'subdir' in o:
                common_prefixes = SubElement(elem, 'CommonPrefixes')
                name = o['subdir']
                if encoding_type == 'url':
                    name = quote(name.encode('utf-8'))
                SubElement(common_prefixes, 'Prefix').text = name

        body = tostring(elem)

        return HTTPOk(body=body, content_type='application/xml')
Esempio n. 46
0
    def POST(self, req):
        """
        Handles Complete Multipart Upload.
        """
        upload_id = req.params['uploadId']
        resp = _get_upload_info(req, self.app, upload_id)
        headers = {}
        for key, val in resp.headers.iteritems():
            _key = key.lower()
            if _key.startswith('x-amz-meta-'):
                headers['x-object-meta-' + _key[11:]] = val
            elif _key == 'content-type':
                headers['Content-Type'] = val

        # Query for the objects in the segments area to make sure it completed
        query = {
            'format': 'json',
            'prefix': '%s/%s/' % (req.object_name, upload_id),
            'delimiter': '/'
        }

        container = req.container_name + MULTIUPLOAD_SUFFIX
        resp = req.get_response(self.app, 'GET', container, '', query=query)
        objinfo = json.loads(resp.body)
        objtable = dict((o['name'], {
            'path': '/'.join(['', container, o['name']]),
            'etag': o['hash'],
            'size_bytes': o['bytes']
        }) for o in objinfo)

        etag_hash = md5()
        for obj in objinfo:
            etag_hash.update(unhexlify(obj['hash']))
        s3_etag = "%s-%d" % (etag_hash.hexdigest(), len(objinfo))
        headers['Content-Type'] += ";s3_etag=%s" % s3_etag

        manifest = []
        previous_number = 0
        try:
            xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE)
            complete_elem = fromstring(xml, 'CompleteMultipartUpload')
            for part_elem in complete_elem.iterchildren('Part'):
                part_number = int(part_elem.find('./PartNumber').text)

                if part_number <= previous_number:
                    raise InvalidPartOrder(upload_id=upload_id)
                previous_number = part_number

                etag = part_elem.find('./ETag').text
                if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"':
                    # strip double quotes
                    etag = etag[1:-1]

                info = objtable.get("%s/%s/%s" %
                                    (req.object_name, upload_id, part_number))
                if info is None or info['etag'] != etag:
                    raise InvalidPart(upload_id=upload_id,
                                      part_number=part_number)

                info['size_bytes'] = int(info['size_bytes'])
                manifest.append(info)
        except (XMLSyntaxError, DocumentInvalid):
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            LOGGER.error(e)
            raise exc_type, exc_value, exc_traceback

        # Following swift commit 7f636a5, zero-byte segments aren't allowed,
        # even as the final segment
        empty_seg = None
        if manifest[-1]['size_bytes'] == 0:
            empty_seg = manifest.pop()

            # We'll check the sizes of all except the last segment below, but
            # since we just popped off a zero-byte segment, we should check
            # that last segment, too.
            if manifest and manifest[-1]['size_bytes'] < CONF.min_segment_size:
                raise EntityTooSmall()

        # Check the size of each segment except the last and make sure they are
        # all more than the minimum upload chunk size
        for info in manifest[:-1]:
            if info['size_bytes'] < CONF.min_segment_size:
                raise EntityTooSmall()

        try:
            # TODO: add support for versioning
            if manifest:
                resp = req.get_response(self.app,
                                        'PUT',
                                        body=json.dumps(manifest),
                                        query={'multipart-manifest': 'put'},
                                        headers=headers)
            else:
                # the upload must have consisted of a single zero-length part
                # just write it directly
                resp = req.get_response(self.app,
                                        'PUT',
                                        body='',
                                        headers=headers)
        except BadSwiftRequest as e:
            msg = str(e)
            expected_msg = 'too small; each segment must be at least 1 byte'
            if expected_msg in msg:
                # FIXME: AWS S3 allows a smaller object than 5 MB if there is
                # only one part.  Use a COPY request to copy the part object
                # from the segments container instead.
                raise EntityTooSmall(msg)
            else:
                raise

        if empty_seg:
            # clean up the zero-byte segment
            _, empty_seg_cont, empty_seg_name = empty_seg['path'].split('/', 2)
            req.get_response(self.app,
                             'DELETE',
                             container=empty_seg_cont,
                             obj=empty_seg_name)

        # clean up the multipart-upload record
        obj = '%s/%s' % (req.object_name, upload_id)
        req.get_response(self.app, 'DELETE', container, obj)

        result_elem = Element('CompleteMultipartUploadResult')

        # NOTE: boto with sig v4 appends port to HTTP_HOST value at the
        # request header when the port is non default value and it makes
        # req.host_url like as http://localhost:8080:8080/path
        # that obviously invalid. Probably it should be resolved at
        # swift.common.swob though, tentatively we are parsing and
        # reconstructing the correct host_url info here.
        # in detail, https://github.com/boto/boto/pull/3513
        parsed_url = urlparse(req.host_url)
        host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname)
        if parsed_url.port:
            host_url += ':%s' % parsed_url.port

        SubElement(result_elem, 'Location').text = host_url + req.path
        SubElement(result_elem, 'Bucket').text = req.container_name
        SubElement(result_elem, 'Key').text = req.object_name
        SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag
        del resp.headers['ETag']

        resp.body = tostring(result_elem)
        resp.status = 200
        resp.content_type = "application/xml"

        return resp
Esempio n. 47
0
    def GET(self, req):
        """
        Handles List Parts.
        """
        def filter_part_num_marker(o):
            try:
                num = int(os.path.basename(o['name']))
                return num > part_num_marker
            except ValueError:
                return False

        encoding_type = req.params.get('encoding-type')
        if encoding_type is not None and encoding_type != 'url':
            err_msg = 'Invalid Encoding Method specified in Request'
            raise InvalidArgument('encoding-type', encoding_type, err_msg)

        upload_id = req.params['uploadId']
        _get_upload_info(req, self.app, upload_id)

        maxparts = req.get_validated_param('max-parts',
                                           DEFAULT_MAX_PARTS_LISTING,
                                           self.conf.max_parts_listing)
        part_num_marker = req.get_validated_param('part-number-marker', 0)

        query = {
            'format': 'json',
            'prefix': '%s/%s/' % (req.object_name, upload_id),
            'delimiter': '/',
            'marker': '',
        }

        container = req.container_name + MULTIUPLOAD_SUFFIX
        # Because the parts are out of order in Swift, we list up to the
        # maximum number of parts and then apply the marker and limit options.
        objects = []
        while True:
            resp = req.get_response(self.app,
                                    container=container,
                                    obj='',
                                    query=query)
            new_objects = json.loads(resp.body)
            if not new_objects:
                break
            objects.extend(new_objects)
            query['marker'] = new_objects[-1]['name']

        last_part = 0

        # If the caller requested a list starting at a specific part number,
        # construct a sub-set of the object list.
        objList = [obj for obj in objects if filter_part_num_marker(obj)]

        # pylint: disable-msg=E1103
        objList.sort(key=lambda o: int(o['name'].split('/')[-1]))

        if len(objList) > maxparts:
            objList = objList[:maxparts]
            truncated = True
        else:
            truncated = False
        # TODO: We have to retrieve object list again when truncated is True
        # and some objects filtered by invalid name because there could be no
        # enough objects for limit defined by maxparts.

        if objList:
            o = objList[-1]
            last_part = os.path.basename(o['name'])

        result_elem = Element('ListPartsResult')
        SubElement(result_elem, 'Bucket').text = req.container_name
        name = req.object_name
        if encoding_type == 'url':
            name = quote(name)
        SubElement(result_elem, 'Key').text = name
        SubElement(result_elem, 'UploadId').text = upload_id

        initiator_elem = SubElement(result_elem, 'Initiator')
        SubElement(initiator_elem, 'ID').text = req.user_id
        SubElement(initiator_elem, 'DisplayName').text = req.user_id
        owner_elem = SubElement(result_elem, 'Owner')
        SubElement(owner_elem, 'ID').text = req.user_id
        SubElement(owner_elem, 'DisplayName').text = req.user_id

        SubElement(result_elem, 'StorageClass').text = 'STANDARD'
        SubElement(result_elem, 'PartNumberMarker').text = str(part_num_marker)
        SubElement(result_elem, 'NextPartNumberMarker').text = str(last_part)
        SubElement(result_elem, 'MaxParts').text = str(maxparts)
        if 'encoding-type' in req.params:
            SubElement(result_elem, 'EncodingType').text = \
                req.params['encoding-type']
        SubElement(result_elem, 'IsTruncated').text = \
            'true' if truncated else 'false'

        for i in objList:
            part_elem = SubElement(result_elem, 'Part')
            SubElement(part_elem, 'PartNumber').text = i['name'].split('/')[-1]
            SubElement(part_elem, 'LastModified').text = \
                i['last_modified'][:-3] + 'Z'
            SubElement(part_elem, 'ETag').text = '"%s"' % i['hash']
            SubElement(part_elem, 'Size').text = str(i['bytes'])

        body = tostring(result_elem)

        return HTTPOk(body=body, content_type='application/xml')
Esempio n. 48
0
    def test_GET_ACCscope_objAttrs_metadata(self):
        """
        In account scope, give me object attrs
        Should return info for all 3 objects
        we uploaded in setup
        """
        attrs = Oattrs
        req2 = Request.blank('/v1/TEST_acc1',
                             environ={
                                 'REQUEST_METHOD': 'GET',
                                 'HTTP_X_TIMESTAMP': '0'
                             },
                             headers={
                                 'attributes': attrs,
                                 'format': 'json'
                             })
        resp2 = req2.get_response(self.controller)
        self.assert_(resp2.status.startswith('200'))
        testList = json.loads(resp2.body)
        self.assert_(len(testList) == 3)
        testDict = testList[0]
        self.assert_('/TEST_acc1/TEST_con1/TEST_obj1' in testDict)
        metaReturned = testDict['/TEST_acc1/TEST_con1/TEST_obj1']
        self.assertEquals(metaReturned['object_uri'],
                          '/TEST_acc1/TEST_con1/TEST_obj1')

        self.assertEquals(metaReturned['object_name'], 'TEST_obj1')
        self.assertEquals(metaReturned['object_account_name'], 'TEST_acc1')
        self.assertEquals(metaReturned['object_container_name'], 'TEST_con1')
        self.assertEquals(metaReturned['object_uri_create_time'], self.t)
        self.assertEquals(metaReturned['object_etag_hash'], '0000000000000000')
        self.assertEquals(metaReturned['object_content_type'], 'text/plain')
        self.assertEquals(str(metaReturned['object_content_length']), '42')
        self.assertEquals(metaReturned['object_content_encoding'], 'gzip')
        self.assertEquals(metaReturned['object_content_language'], 'en')
        self.assertEquals(metaReturned['object_meta_TESTCUSTOM'], 'CUSTOM')

        testDict = testList[1]
        self.assert_('/TEST_acc1/TEST_con1/TEST_obj2' in testDict)
        metaReturned = testDict['/TEST_acc1/TEST_con1/TEST_obj2']
        self.assertEquals(metaReturned['object_uri'],
                          '/TEST_acc1/TEST_con1/TEST_obj2')

        self.assertEquals(metaReturned['object_name'], 'TEST_obj2')
        self.assertEquals(metaReturned['object_account_name'], 'TEST_acc1')
        self.assertEquals(metaReturned['object_container_name'], 'TEST_con1')
        self.assertEquals(metaReturned['object_uri_create_time'], self.t)
        self.assertEquals(metaReturned['object_etag_hash'], '0000000000000000')
        self.assertEquals(metaReturned['object_content_type'], 'text/plain')
        self.assertEquals(str(metaReturned['object_content_length']), '42')
        self.assertEquals(metaReturned['object_content_encoding'], 'gzip')
        self.assertEquals(metaReturned['object_content_language'], 'en')
        self.assertEquals(metaReturned['object_meta_TESTCUSTOM'], 'CUSTOM')

        testDict = testList[2]
        self.assert_('/TEST_acc1/TEST_con2/TEST_obj3' in testDict)
        metaReturned = testDict['/TEST_acc1/TEST_con2/TEST_obj3']
        self.assertEquals(metaReturned['object_uri'],
                          '/TEST_acc1/TEST_con2/TEST_obj3')

        self.assertEquals(metaReturned['object_name'], 'TEST_obj3')
        self.assertEquals(metaReturned['object_account_name'], 'TEST_acc1')
        self.assertEquals(metaReturned['object_container_name'], 'TEST_con2')
        self.assertEquals(metaReturned['object_uri_create_time'], self.t)
        self.assertEquals(metaReturned['object_etag_hash'], '0000000000000000')
        self.assertEquals(metaReturned['object_content_type'], 'text/plain')
        self.assertEquals(str(metaReturned['object_content_length']), '42')
        self.assertEquals(metaReturned['object_content_encoding'], 'gzip')
        self.assertEquals(metaReturned['object_content_language'], 'en')
        self.assertEquals(metaReturned['object_meta_TESTCUSTOM'], 'CUSTOM')
Esempio n. 49
0
        def response_iter():
            # NB: XML requires that the XML declaration, if present, be at the
            # very start of the document. Clients *will* call us out on not
            # being valid XML if we pass through whitespace before it.
            # Track whether we've sent anything yet so we can yield out that
            # declaration *first*
            yielded_anything = False

            try:
                try:
                    # TODO: add support for versioning
                    put_resp = req.get_response(self.app,
                                                'PUT',
                                                body=json.dumps(manifest),
                                                query={
                                                    'multipart-manifest':
                                                    'put',
                                                    'heartbeat': 'on'
                                                },
                                                headers=headers)
                    if put_resp.status_int == 202:
                        body = []
                        put_resp.fix_conditional_response()
                        for chunk in put_resp.response_iter:
                            if not chunk.strip():
                                if not yielded_anything:
                                    yield ('<?xml version="1.0" '
                                           'encoding="UTF-8"?>\n')
                                yielded_anything = True
                                yield chunk
                            body.append(chunk)
                        body = json.loads(''.join(body))
                        if body['Response Status'] != '201 Created':
                            raise InvalidRequest(
                                status=body['Response Status'],
                                msg='\n'.join(': '.join(err)
                                              for err in body['Errors']))
                except BadSwiftRequest as e:
                    msg = str(e)
                    expected_msg = ('too small; each segment must be '
                                    'at least 1 byte')
                    if expected_msg in msg:
                        # FIXME: AWS S3 allows a smaller object than 5 MB if
                        # there is only one part.  Use a COPY request to copy
                        # the part object from the segments container instead.
                        raise EntityTooSmall(msg)
                    else:
                        raise

                # clean up the multipart-upload record
                obj = '%s/%s' % (req.object_name, upload_id)
                try:
                    req.get_response(self.app, 'DELETE', container, obj)
                except NoSuchKey:
                    # We know that this existed long enough for us to HEAD
                    pass

                result_elem = Element('CompleteMultipartUploadResult')

                # NOTE: boto with sig v4 appends port to HTTP_HOST value at
                # the request header when the port is non default value and it
                # makes req.host_url like as http://localhost:8080:8080/path
                # that obviously invalid. Probably it should be resolved at
                # swift.common.swob though, tentatively we are parsing and
                # reconstructing the correct host_url info here.
                # in detail, https://github.com/boto/boto/pull/3513
                parsed_url = urlparse(req.host_url)
                host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname)
                if parsed_url.port:
                    host_url += ':%s' % parsed_url.port

                SubElement(result_elem, 'Location').text = host_url + req.path
                SubElement(result_elem, 'Bucket').text = req.container_name
                SubElement(result_elem, 'Key').text = req.object_name
                SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag
                resp.headers.pop('ETag', None)
                if yielded_anything:
                    yield '\n'
                yield tostring(result_elem,
                               xml_declaration=not yielded_anything)
            except ErrorResponse as err_resp:
                if yielded_anything:
                    err_resp.xml_declaration = False
                    yield '\n'
                else:
                    # Oh good, we can still change HTTP status code, too!
                    resp.status = err_resp.status
                for chunk in err_resp({}, lambda *a: None):
                    yield chunk
Esempio n. 50
0
    def POST(self, req):
        """
        Handles Complete Multipart Upload.
        """
        upload_id = req.params['uploadId']
        resp = _get_upload_info(req, self.app, upload_id)
        headers = {}
        for key, val in resp.headers.iteritems():
            _key = key.lower()
            if _key.startswith('x-amz-meta-'):
                headers['x-object-meta-' + _key[11:]] = val

        hct_header = sysmeta_header('object', 'has-content-type')
        if resp.sysmeta_headers.get(hct_header) == 'yes':
            content_type = resp.sysmeta_headers.get(
                sysmeta_header('object', 'content-type'))
        elif hct_header in resp.sysmeta_headers:
            # has-content-type is present but false, so no content type was
            # set on initial upload. In that case, we won't set one on our
            # PUT request. Swift will end up guessing one based on the
            # object name.
            content_type = None
        else:
            content_type = resp.headers.get('Content-Type')

        if content_type:
            headers['Content-Type'] = content_type

        # Query for the objects in the segments area to make sure it completed
        query = {
            'format': 'json',
            'prefix': '%s/%s/' % (req.object_name, upload_id),
            'delimiter': '/'
        }

        container = req.container_name + MULTIUPLOAD_SUFFIX
        resp = req.get_response(self.app, 'GET', container, '', query=query)
        objinfo = json.loads(resp.body)
        objtable = dict((o['name'], {
            'path': '/'.join(['', container, o['name']]),
            'etag': o['hash'],
            'size_bytes': o['bytes']
        }) for o in objinfo)

        manifest = []
        previous_number = 0
        try:
            xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE)
            if not xml:
                raise InvalidRequest(msg='You must specify at least one part')

            complete_elem = fromstring(xml, 'CompleteMultipartUpload',
                                       self.logger)
            for part_elem in complete_elem.iterchildren('Part'):
                part_number = int(part_elem.find('./PartNumber').text)

                if part_number <= previous_number:
                    raise InvalidPartOrder(upload_id=upload_id)
                previous_number = part_number

                etag = part_elem.find('./ETag').text
                if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"':
                    # strip double quotes
                    etag = etag[1:-1]

                info = objtable.get("%s/%s/%s" %
                                    (req.object_name, upload_id, part_number))
                if info is None or info['etag'] != etag:
                    raise InvalidPart(upload_id=upload_id,
                                      part_number=part_number)

                info['size_bytes'] = int(info['size_bytes'])
                manifest.append(info)
        except (XMLSyntaxError, DocumentInvalid):
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            self.logger.error(e)
            raise

        # Check the size of each segment except the last and make sure they are
        # all more than the minimum upload chunk size
        for info in manifest[:-1]:
            if info['size_bytes'] < self.conf.min_segment_size:
                raise EntityTooSmall()

        try:
            # TODO: add support for versioning
            if manifest:
                resp = req.get_response(self.app,
                                        'PUT',
                                        body=json.dumps(manifest),
                                        query={'multipart-manifest': 'put'},
                                        headers=headers)
            else:
                # the upload must have consisted of a single zero-length part
                # just write it directly
                resp = req.get_response(self.app,
                                        'PUT',
                                        body='',
                                        headers=headers)
        except BadSwiftRequest as e:
            msg = str(e)
            expected_msg = 'too small; each segment must be at least 1 byte'
            if expected_msg in msg:
                # FIXME: AWS S3 allows a smaller object than 5 MB if there is
                # only one part.  Use a COPY request to copy the part object
                # from the segments container instead.
                raise EntityTooSmall(msg)
            else:
                raise

        # clean up the multipart-upload record
        obj = '%s/%s' % (req.object_name, upload_id)
        try:
            req.get_response(self.app, 'DELETE', container, obj)
        except NoSuchKey:
            pass  # We know that this existed long enough for us to HEAD

        result_elem = Element('CompleteMultipartUploadResult')

        # NOTE: boto with sig v4 appends port to HTTP_HOST value at the
        # request header when the port is non default value and it makes
        # req.host_url like as http://localhost:8080:8080/path
        # that obviously invalid. Probably it should be resolved at
        # swift.common.swob though, tentatively we are parsing and
        # reconstructing the correct host_url info here.
        # in detail, https://github.com/boto/boto/pull/3513
        parsed_url = urlparse(req.host_url)
        host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname)
        if parsed_url.port:
            host_url += ':%s' % parsed_url.port

        SubElement(result_elem, 'Location').text = host_url + req.path
        SubElement(result_elem, 'Bucket').text = req.container_name
        SubElement(result_elem, 'Key').text = req.object_name
        SubElement(result_elem, 'ETag').text = resp.etag

        resp.body = tostring(result_elem)
        resp.status = 200
        resp.content_type = "application/xml"

        return resp
Esempio n. 51
0
    def map_objects_to_targets(self):
        """ Map objects to their local storage server data replicas
        and create a request for the generic backend interface """
        self.logger.debug('Mapping objects to files')
        self.logger.debug('request_in(first 1024 bytes): %s',
                          str(self.request_in)[0:1023])

        request_in_dict = json.loads(self.request_in)
        # TODO consider modifying incoming request in place
        self.request_out = {}
        self.request_out['request'] = request_in_dict['request']
        objects_and_files = []
        oc = ObjectController(self.conf, self.logger)
        self.logger.debug('oc.node_timeout: %s', oc.node_timeout)
        for obj_and_dev in request_in_dict['objects']:
            obj_and_file = {}
            obj_and_file['object'] = obj_and_dev['object']
            self.logger.debug('obj: %s', obj_and_dev)
            try:
                (account, container,
                 obj) = split_path(obj_and_dev['object'].encode('utf-8'), 3, 3,
                                   True)
            except ValueError:
                self.logger.debug('split_path exception')
                raise
            device = obj_and_dev['device']
            # TODO, can can storage_policy_index be determined from storage
            # node to not have to pass from proxy?
            # container_info = get_container_info(
            #     {'PATH_INFO': '/v1/%s/%s' % (account, container)},
            #     self.app, swift_source='LE')
            # storage_policy_index = container_info['storage_policy']
            # obj_ring = self.get_object_ring(storage_policy_index)
            swift_dir = request_in_dict['swift_dir']
            storage_policy_index = request_in_dict['storage_policy_index']
            obj_ring = POLICIES.get_object_ring(storage_policy_index,
                                                swift_dir)
            # need partition, same comment as for storage_policy_index
            partition, nodes = obj_ring.get_nodes(account, container, obj)
            self.logger.debug('Storage nodes: %s' % str(nodes))
            self.logger.debug('partition: %s', partition)
            # scor (aux)
            # key = hash_path(account, container, obj, raw_digest=True)
            key = hash_path(account, container, obj)
            self.logger.debug('hash_path or key: %s', key)

            # Create/use Object Controller to map objects to files
            policy = POLICIES.get_by_index(storage_policy_index)
            self.logger.debug('policy: %s index: %s', policy, str(int(policy)))
            try:
                oc.disk_file = oc.get_diskfile(device,
                                               partition,
                                               account,
                                               container,
                                               obj,
                                               policy=policy)
            except DiskFileDeviceUnavailable:  # scor
                self.logger.error(
                    "Unavailable device: %s, for object: %s,"
                    "storage policy: %s", device, obj_and_dev['object'],
                    policy)
            data_dir = oc.disk_file._datadir
            self.logger.debug('data_dir: %s', data_dir)
            # Swift-on-File detection
            sof_detected = False
            # Get the device path from the object server config file
            devpath = self.conf.get('devices', None)
            # The Swift-on-File device directory is a symlink
            # in the devpath directory constructed like shown below
            sofpath = devpath + '/' + obj_and_dev['device']
            if data_dir.find(sofpath) == 0 and os.path.islink(sofpath):
                # data_dir starts with sofpath and sofpath is a symlink -> SoF
                sof_detected = True
                self.logger.debug('SOF detected, sofpath: %s, realpath: %s',
                                  sofpath, os.path.realpath(sofpath))
                # Follow the symlink and append a/c/o to get the data file path
                oc._data_file = os.path.realpath(sofpath) + \
                    obj_and_file['object']
            elif not self.gbi_provide_dirpaths_instead_of_filepaths:
                files = os.listdir(oc.disk_file._datadir)
                file_info = {}
                # DiskFile method got renamed between Liberty and Mitaka
                try:
                    file_info = oc.disk_file._get_ondisk_file(files)
                except AttributeError:
                    file_info = oc.disk_file._get_ondisk_files(files)
                oc._data_file = file_info.get('data_file')
                self.logger.debug('data_file: %s', oc._data_file)
            # Add file path to the request
            self.logger.debug('obj_and_dev: %s', obj_and_dev)
            if (not self.gbi_provide_dirpaths_instead_of_filepaths) or \
               sof_detected:
                obj_and_file['file'] = oc._data_file
            else:
                obj_and_file['file'] = data_dir
            self.logger.debug('obj_and_file: %s', obj_and_file)
            objects_and_files.append(obj_and_file)

        self.logger.debug('objects_and_files(first 1024 bytes): %s',
                          str(objects_and_files[0:1023]))
        self.request_out['objects'] = objects_and_files

        self.logger.debug('request_in(first 1024 bytes): %s',
                          str(self.request_in)[0:1023])
        self.logger.debug('request_out(first 1024 bytes): %s',
                          str(self.request_out)[0:1023])
Esempio n. 52
0
    def test_extract_tar_works(self):
        # On systems where $TMPDIR is long (like OS X), we need to do this
        # or else every upload will fail due to the path being too long.
        self.app.max_pathlen += len(self.testdir)
        for compress_format in ['', 'gz', 'bz2']:
            base_name = 'base_works_%s' % compress_format
            dir_tree = [{
                base_name: [
                    {
                        'sub_dir1': ['sub1_file1', 'sub1_file2']
                    },
                    {
                        'sub_dir2': ['sub2_file1', u'test obj \u2661']
                    },
                    'sub_file1',
                    {
                        'sub_dir3': [{
                            'sub4_dir1': '../sub4 file1'
                        }]
                    },
                    {
                        'sub_dir4': None
                    },
                ]
            }]

            build_dir_tree(self.testdir, dir_tree)
            mode = 'w'
            extension = ''
            if compress_format:
                mode += ':' + compress_format
                extension += '.' + compress_format
            tar = tarfile.open(name=os.path.join(self.testdir,
                                                 'tar_works.tar' + extension),
                               mode=mode)
            tar.add(os.path.join(self.testdir, base_name))
            tar.close()
            req = Request.blank('/tar_works/acc/cont/')
            req.environ['wsgi.input'] = open(
                os.path.join(self.testdir, 'tar_works.tar' + extension))
            req.headers['transfer-encoding'] = 'chunked'
            resp_body = self.handle_extract_and_iter(req, compress_format)
            resp_data = json.loads(resp_body)
            self.assertEquals(resp_data['Number Files Created'], 6)

            # test out xml
            req = Request.blank('/tar_works/acc/cont/')
            req.environ['wsgi.input'] = open(
                os.path.join(self.testdir, 'tar_works.tar' + extension))
            req.headers['transfer-encoding'] = 'chunked'
            resp_body = self.handle_extract_and_iter(req, compress_format,
                                                     'application/xml')
            self.assert_(
                '<response_status>201 Created</response_status>' in resp_body)
            self.assert_(
                '<number_files_created>6</number_files_created>' in resp_body)

            # test out nonexistent format
            req = Request.blank('/tar_works/acc/cont/?extract-archive=tar',
                                headers={'Accept': 'good_xml'})
            req.environ['REQUEST_METHOD'] = 'PUT'
            req.environ['wsgi.input'] = open(
                os.path.join(self.testdir, 'tar_works.tar' + extension))
            req.headers['transfer-encoding'] = 'chunked'

            def fake_start_response(*args, **kwargs):
                pass

            app_iter = self.bulk(req.environ, fake_start_response)
            resp_body = ''.join([i for i in app_iter])

            self.assert_('Response Status: 406' in resp_body)
Esempio n. 53
0
def parse_and_validate_input(req_body, req_path, min_segment_size):
    """
    Given a request body, parses it and returns a list of dictionaries.

    The output structure is nearly the same as the input structure, but it
    is not an exact copy. Given a valid input dictionary `d_in`, its
    corresponding output dictionary `d_out` will be as follows:

      * d_out['etag'] == d_in['etag']

      * d_out['path'] == d_in['path']

      * d_in['size_bytes'] can be a string ("12") or an integer (12), but
        d_out['size_bytes'] is an integer.

      * (optional) d_in['range'] is a string of the form "M-N", "M-", or
        "-N", where M and N are non-negative integers. d_out['range'] is the
        corresponding swob.Range object. If d_in does not have a key
        'range', neither will d_out.

    :raises: HTTPException on parse errors or semantic errors (e.g. bogus
        JSON structure, syntactically invalid ranges)

    :returns: a list of dictionaries on success
    """
    try:
        parsed_data = json.loads(req_body)
    except ValueError:
        raise HTTPBadRequest("Manifest must be valid JSON.\n")

    if not isinstance(parsed_data, list):
        raise HTTPBadRequest("Manifest must be a list.\n")

    # If we got here, req_path refers to an object, so this won't ever raise
    # ValueError.
    vrs, account, _junk = split_path(req_path, 3, 3, True)

    errors = []
    num_segs = len(parsed_data)
    for seg_index, seg_dict in enumerate(parsed_data):
        if not isinstance(seg_dict, dict):
            errors.append("Index %d: not a JSON object" % seg_index)
            continue

        missing_keys = [k for k in REQUIRED_SLO_KEYS if k not in seg_dict]
        if missing_keys:
            errors.append(
                "Index %d: missing keys %s" %
                (seg_index, ", ".join('"%s"' % (mk, )
                                      for mk in sorted(missing_keys))))
            continue

        extraneous_keys = [k for k in seg_dict if k not in ALLOWED_SLO_KEYS]
        if extraneous_keys:
            errors.append(
                "Index %d: extraneous keys %s" %
                (seg_index, ", ".join('"%s"' % (ek, )
                                      for ek in sorted(extraneous_keys))))
            continue

        if not isinstance(seg_dict['path'], basestring):
            errors.append("Index %d: \"path\" must be a string" % seg_index)
            continue
        if not (seg_dict['etag'] is None
                or isinstance(seg_dict['etag'], basestring)):
            errors.append("Index %d: \"etag\" must be a string or null" %
                          seg_index)
            continue

        if '/' not in seg_dict['path'].strip('/'):
            errors.append(
                "Index %d: path does not refer to an object. Path must be of "
                "the form /container/object." % seg_index)
            continue

        seg_size = seg_dict['size_bytes']
        if seg_size is not None:
            try:
                seg_size = int(seg_size)
                seg_dict['size_bytes'] = seg_size
            except (TypeError, ValueError):
                errors.append("Index %d: invalid size_bytes" % seg_index)
                continue
            if (seg_size < min_segment_size and seg_index < num_segs - 1):
                errors.append("Index %d: too small; each segment, except "
                              "the last, must be at least %d bytes." %
                              (seg_index, min_segment_size))
                continue

        obj_path = '/'.join(['', vrs, account, seg_dict['path'].lstrip('/')])
        if req_path == quote(obj_path):
            errors.append(
                "Index %d: manifest must not include itself as a segment" %
                seg_index)
            continue

        if seg_dict.get('range'):
            try:
                seg_dict['range'] = Range('bytes=%s' % seg_dict['range'])
            except ValueError:
                errors.append("Index %d: invalid range" % seg_index)
                continue

            if len(seg_dict['range'].ranges) > 1:
                errors.append("Index %d: multiple ranges (only one allowed)" %
                              seg_index)
                continue

            # If the user *told* us the object's size, we can check range
            # satisfiability right now. If they lied about the size, we'll
            # fail that validation later.
            if (seg_size is not None and
                    len(seg_dict['range'].ranges_for_length(seg_size)) != 1):
                errors.append("Index %d: unsatisfiable range" % seg_index)
                continue

    if errors:
        error_message = "".join(e + "\n" for e in errors)
        raise HTTPBadRequest(error_message,
                             headers={"Content-Type": "text/plain"})

    return parsed_data
Esempio n. 54
0
        def response_iter():
            # NB: XML requires that the XML declaration, if present, be at the
            # very start of the document. Clients *will* call us out on not
            # being valid XML if we pass through whitespace before it.
            # Track whether we've sent anything yet so we can yield out that
            # declaration *first*
            yielded_anything = False

            try:
                try:
                    # TODO: add support for versioning
                    put_resp = req.get_response(self.app,
                                                'PUT',
                                                body=json.dumps(manifest),
                                                query={
                                                    'multipart-manifest':
                                                    'put',
                                                    'heartbeat': 'on'
                                                },
                                                headers=headers)
                    if put_resp.status_int == 202:
                        body = []
                        put_resp.fix_conditional_response()
                        for chunk in put_resp.response_iter:
                            if not chunk.strip():
                                if time.time() - start_time < 10:
                                    # Include some grace period to keep
                                    # ceph-s3tests happy
                                    continue
                                if not yielded_anything:
                                    yield (b'<?xml version="1.0" '
                                           b'encoding="UTF-8"?>\n')
                                yielded_anything = True
                                yield chunk
                                continue
                            body.append(chunk)
                        body = json.loads(b''.join(body))
                        if body['Response Status'] != '201 Created':
                            for seg, err in body['Errors']:
                                if err == too_small_message:
                                    raise EntityTooSmall()
                                elif err in ('Etag Mismatch', '404 Not Found'):
                                    raise InvalidPart(upload_id=upload_id)
                            raise InvalidRequest(
                                status=body['Response Status'],
                                msg='\n'.join(': '.join(err)
                                              for err in body['Errors']))
                except BadSwiftRequest as e:
                    msg = str(e)
                    if too_small_message in msg:
                        raise EntityTooSmall(msg)
                    elif ', Etag Mismatch' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    elif ', 404 Not Found' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    else:
                        raise

                # clean up the multipart-upload record
                obj = '%s/%s' % (req.object_name, upload_id)
                try:
                    req.get_response(self.app, 'DELETE', container, obj)
                except NoSuchKey:
                    # We know that this existed long enough for us to HEAD
                    pass

                result_elem = Element('CompleteMultipartUploadResult')

                # NOTE: boto with sig v4 appends port to HTTP_HOST value at
                # the request header when the port is non default value and it
                # makes req.host_url like as http://localhost:8080:8080/path
                # that obviously invalid. Probably it should be resolved at
                # swift.common.swob though, tentatively we are parsing and
                # reconstructing the correct host_url info here.
                # in detail, https://github.com/boto/boto/pull/3513
                parsed_url = urlparse(req.host_url)
                host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname)
                # Why are we doing our own port parsing? Because py3 decided
                # to start raising ValueErrors on access after parsing such
                # an invalid port
                netloc = parsed_url.netloc.split('@')[-1].split(']')[-1]
                if ':' in netloc:
                    port = netloc.split(':', 2)[1]
                    host_url += ':%s' % port

                SubElement(result_elem, 'Location').text = host_url + req.path
                SubElement(result_elem, 'Bucket').text = req.container_name
                SubElement(result_elem, 'Key').text = req.object_name
                SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag
                resp.headers.pop('ETag', None)
                if yielded_anything:
                    yield b'\n'
                yield tostring(result_elem,
                               xml_declaration=not yielded_anything)
            except ErrorResponse as err_resp:
                if yielded_anything:
                    err_resp.xml_declaration = False
                    yield b'\n'
                else:
                    # Oh good, we can still change HTTP status code, too!
                    resp.status = err_resp.status
                for chunk in err_resp({}, lambda *a: None):
                    yield chunk
Esempio n. 55
0
    def POST(self, req):
        """
        Handles Complete Multipart Upload.
        """
        log_s3api_command(req, 'complete-multipart-upload')
        upload_id = req.params['uploadId']
        resp = _get_upload_info(req, self.app, upload_id)
        headers = {}
        for key, val in resp.headers.iteritems():
            _key = key.lower()
            if _key.startswith('x-amz-meta-'):
                headers['x-object-meta-' + _key[11:]] = val
            elif _key == 'content-type':
                headers['Content-Type'] = val
        for key, val in resp.sysmeta_headers.items():
            _key = key.lower()
            if _key == OBJECT_TAGGING_HEADER.lower():
                headers[key] = val

        # Query for the objects in the segments area to make sure it completed
        query = {
            'format': 'json',
            'prefix': '%s/%s/' % (req.object_name, upload_id),
            'delimiter': '/'
        }

        # Force the master to be sure to fetch all uploaded parts
        req.environ.setdefault('oio.query', {})
        req.environ['oio.query']['force_master'] = True

        container = req.container_name + MULTIUPLOAD_SUFFIX
        resp = req.get_response(self.app, 'GET', container, '', query=query)
        objinfo = json.loads(resp.body)

        # pylint: disable-msg=no-member
        objinfo.sort(key=lambda o: int(o['name'].split('/')[-1]))

        objtable = dict((o['name'].encode('utf-8'), {
            'path': '/'.join(['', container, o['name']]),
            'etag': o['hash'],
            'size_bytes': o['bytes']
        }) for o in objinfo)

        s3_etag_hasher = md5()
        manifest = []
        previous_number = 0
        try:
            xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE)
            if not xml:
                raise InvalidRequest(msg='You must specify at least one part')
            if 'content-md5' in req.headers:
                # If an MD5 was provided, we need to verify it.
                # Note that S3Request already took care of translating to ETag
                if req.headers['etag'] != md5(xml).hexdigest():
                    raise BadDigest(content_md5=req.headers['content-md5'])
                # We're only interested in the body here, in the
                # multipart-upload controller -- *don't* let it get
                # plumbed down to the object-server
                del req.headers['etag']

            complete_elem = fromstring(xml, 'CompleteMultipartUpload')
            for part_elem in complete_elem.iterchildren('Part'):
                part_number = int(part_elem.find('./PartNumber').text)

                if part_number <= previous_number:
                    raise InvalidPartOrder(upload_id=upload_id)
                previous_number = part_number

                etag = part_elem.find('./ETag').text
                if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"':
                    # strip double quotes
                    etag = etag[1:-1]

                info = objtable.get("%s/%s/%s" %
                                    (req.object_name, upload_id, part_number))
                if info is None or info['etag'] != etag:
                    raise InvalidPart(upload_id=upload_id,
                                      part_number=part_number)

                s3_etag_hasher.update(binascii.a2b_hex(etag))
                info['size_bytes'] = int(info['size_bytes'])
                manifest.append(info)
        except (XMLSyntaxError, DocumentInvalid):
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            LOGGER.error(e)
            raise

        s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest))
        headers[sysmeta_header('object', 'etag')] = s3_etag
        # Leave base header value blank; SLO will populate
        c_etag = '; s3_etag=%s' % s3_etag
        headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag

        # Following swift commit 7f636a5, zero-byte segments aren't allowed,
        # even as the final segment
        empty_seg = None
        if manifest[-1]['size_bytes'] == 0:
            empty_seg = manifest.pop()

            # We'll check the sizes of all except the last segment below, but
            # since we just popped off a zero-byte segment, we should check
            # that last segment, too.
            if manifest and manifest[-1]['size_bytes'] < CONF.min_segment_size:
                raise EntityTooSmall()

        # Check the size of each segment except the last and make sure they are
        # all more than the minimum upload chunk size
        for info in manifest[:-1]:
            if info['size_bytes'] < CONF.min_segment_size:
                raise EntityTooSmall()

        try:
            # TODO: add support for versioning
            if manifest:
                resp = req.get_response(self.app,
                                        'PUT',
                                        body=json.dumps(manifest),
                                        query={'multipart-manifest': 'put'},
                                        headers=headers)
            else:
                # the upload must have consisted of a single zero-length part
                # just write it directly
                resp = req.get_response(self.app,
                                        'PUT',
                                        body='',
                                        headers=headers)
        except ErrorResponse as e:
            msg = str(e._msg)
            expected_msg = 'too small; each segment must be at least 1 byte'
            if expected_msg in msg:
                # FIXME: AWS S3 allows a smaller object than 5 MB if there is
                # only one part.  Use a COPY request to copy the part object
                # from the segments container instead.
                raise EntityTooSmall(msg)
            else:
                raise

        if empty_seg:
            # clean up the zero-byte segment
            _, empty_seg_cont, empty_seg_name = empty_seg['path'].split('/', 2)
            req.get_response(self.app,
                             'DELETE',
                             container=empty_seg_cont,
                             obj=empty_seg_name)

        # clean up the multipart-upload record
        obj = '%s/%s' % (req.object_name, upload_id)
        req.environ['oio.ephemeral_object'] = True
        req.get_response(self.app, 'DELETE', container, obj)

        result_elem = Element('CompleteMultipartUploadResult')

        # NOTE: boto with sig v4 appends port to HTTP_HOST value at the
        # request header when the port is non default value and it makes
        # req.host_url like as http://localhost:8080:8080/path
        # that obviously invalid. Probably it should be resolved at
        # swift.common.swob though, tentatively we are parsing and
        # reconstructing the correct host_url info here.
        # in detail, https://github.com/boto/boto/pull/3513
        parsed_url = urlparse(req.host_url)
        host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname)
        if parsed_url.port:
            host_url += ':%s' % parsed_url.port

        SubElement(result_elem, 'Location').text = host_url + req.path
        SubElement(result_elem, 'Bucket').text = req.container_name
        SubElement(result_elem, 'Key').text = req.object_name
        SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag
        del resp.headers['ETag']

        resp.body = tostring(result_elem)
        resp.status = 200
        resp.content_type = "application/xml"

        return resp
Esempio n. 56
0
    def _listing(self, env, start_response, prefix=None):
        """
        Sends an HTML object listing to the remote client.

        :param env: The original WSGI environment dict.
        :param start_response: The original WSGI start_response hook.
        :param prefix: Any prefix desired for the container listing.
        """
        if not config_true_value(self._listings):
            resp = HTTPNotFound()(env, self._start_response)
            return self._error_response(resp, env, start_response)
        tmp_env = make_pre_authed_env(
            env,
            'GET',
            '/%s/%s/%s' % (self.version, self.account, self.container),
            self.agent,
            swift_source='SW')
        tmp_env['QUERY_STRING'] = 'delimiter=/&format=json'
        if prefix:
            tmp_env['QUERY_STRING'] += '&prefix=%s' % quote(prefix)
        else:
            prefix = ''
        resp = self._app_call(tmp_env)
        if not is_success(self._get_status_int()):
            return self._error_response(resp, env, start_response)
        listing = None
        body = ''.join(resp)
        if body:
            listing = json.loads(body)
        if not listing:
            resp = HTTPNotFound()(env, self._start_response)
            return self._error_response(resp, env, start_response)
        headers = {'Content-Type': 'text/html; charset=UTF-8'}
        body = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 ' \
               'Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n' \
               '<html>\n' \
               ' <head>\n' \
               '  <title>Listing of %s</title>\n' % \
               cgi.escape(env['PATH_INFO'])
        if self._listings_css:
            body += '  <link rel="stylesheet" type="text/css" ' \
                    'href="%s" />\n' % (self._build_css_path(prefix))
        else:
            body += '  <style type="text/css">\n' \
                    '   h1 {font-size: 1em; font-weight: bold;}\n' \
                    '   th {text-align: left; padding: 0px 1em 0px 1em;}\n' \
                    '   td {padding: 0px 1em 0px 1em;}\n' \
                    '   a {text-decoration: none;}\n' \
                    '  </style>\n'
        body += ' </head>\n' \
                ' <body>\n' \
                '  <h1 id="title">Listing of %s</h1>\n' \
                '  <table id="listing">\n' \
                '   <tr id="heading">\n' \
                '    <th class="colname">Name</th>\n' \
                '    <th class="colsize">Size</th>\n' \
                '    <th class="coldate">Date</th>\n' \
                '   </tr>\n' % \
                cgi.escape(env['PATH_INFO'])
        if prefix:
            body += '   <tr id="parent" class="item">\n' \
                    '    <td class="colname"><a href="../">../</a></td>\n' \
                    '    <td class="colsize">&nbsp;</td>\n' \
                    '    <td class="coldate">&nbsp;</td>\n' \
                    '   </tr>\n'
        for item in listing:
            if 'subdir' in item:
                subdir = item['subdir']
                if prefix:
                    subdir = subdir[len(prefix):]
                body += '   <tr class="item subdir">\n' \
                        '    <td class="colname"><a href="%s">%s</a></td>\n' \
                        '    <td class="colsize">&nbsp;</td>\n' \
                        '    <td class="coldate">&nbsp;</td>\n' \
                        '   </tr>\n' % \
                        (quote(subdir), cgi.escape(subdir))
        for item in listing:
            if 'name' in item:
                name = item['name']
                if prefix:
                    name = name[len(prefix):]
                body += '   <tr class="item %s">\n' \
                        '    <td class="colname"><a href="%s">%s</a></td>\n' \
                        '    <td class="colsize">%s</td>\n' \
                        '    <td class="coldate">%s</td>\n' \
                        '   </tr>\n' % \
                        (' '.join('type-' + cgi.escape(t.lower(), quote=True)
                                  for t in item['content_type'].split('/')),
                         quote(name), cgi.escape(name),
                         human_readable(item['bytes']),
                         cgi.escape(item['last_modified']).split('.')[0].
                            replace('T', ' '))
        body += '  </table>\n' \
                ' </body>\n' \
                '</html>\n'
        resp = Response(headers=headers, body=body)
        return resp(env, start_response)
Esempio n. 57
0
    def GET(self, req):
        """
        Handles List Multipart Uploads
        """
        def separate_uploads(uploads, prefix, delimiter):
            """
            separate_uploads will separate uploads into non_delimited_uploads
            (a subset of uploads) and common_prefixes according to the
            specified delimiter. non_delimited_uploads is a list of uploads
            which exclude the delimiter. common_prefixes is a set of prefixes
            prior to the specified delimiter. Note that the prefix in the
            common_prefixes includes the delimiter itself.

            i.e. if '/' delimiter specified and then the uploads is consists of
            ['foo', 'foo/bar'], this function will return (['foo'], ['foo/']).

            :param uploads: A list of uploads dictionary
            :param prefix: A string of prefix reserved on the upload path.
                           (i.e. the delimiter must be searched behind the
                            prefix)
            :param delimiter: A string of delimiter to split the path in each
                              upload

            :return (non_delimited_uploads, common_prefixes)
            """
            if six.PY2:
                (prefix, delimiter) = utf8encode(prefix, delimiter)
            non_delimited_uploads = []
            common_prefixes = set()
            for upload in uploads:
                key = upload['key']
                end = key.find(delimiter, len(prefix))
                if end >= 0:
                    common_prefix = key[:end + len(delimiter)]
                    common_prefixes.add(common_prefix)
                else:
                    non_delimited_uploads.append(upload)
            return non_delimited_uploads, sorted(common_prefixes)

        encoding_type = req.params.get('encoding-type')
        if encoding_type is not None and encoding_type != 'url':
            err_msg = 'Invalid Encoding Method specified in Request'
            raise InvalidArgument('encoding-type', encoding_type, err_msg)

        keymarker = req.params.get('key-marker', '')
        uploadid = req.params.get('upload-id-marker', '')
        maxuploads = req.get_validated_param('max-uploads',
                                             DEFAULT_MAX_UPLOADS,
                                             DEFAULT_MAX_UPLOADS)

        query = {
            'format': 'json',
            'limit': maxuploads + 1,
        }

        if uploadid and keymarker:
            query.update({'marker': '%s/%s' % (keymarker, uploadid)})
        elif keymarker:
            query.update({'marker': '%s/~' % (keymarker)})
        if 'prefix' in req.params:
            query.update({'prefix': req.params['prefix']})

        container = req.container_name + MULTIUPLOAD_SUFFIX
        try:
            resp = req.get_response(self.app, container=container, query=query)
            objects = json.loads(resp.body)
        except NoSuchBucket:
            # Assume NoSuchBucket as no uploads
            objects = []

        def object_to_upload(object_info):
            obj, upid = object_info['name'].rsplit('/', 1)
            obj_dict = {
                'key': obj,
                'upload_id': upid,
                'last_modified': object_info['last_modified']
            }
            return obj_dict

        # uploads is a list consists of dict, {key, upload_id, last_modified}
        # Note that pattern matcher will drop whole segments objects like as
        # object_name/upload_id/1.
        pattern = re.compile('/[0-9]+$')
        uploads = [
            object_to_upload(obj) for obj in objects
            if pattern.search(obj.get('name', '')) is None
        ]

        prefixes = []
        if 'delimiter' in req.params:
            prefix = req.params.get('prefix', '')
            delimiter = req.params['delimiter']
            uploads, prefixes = separate_uploads(uploads, prefix, delimiter)

        if len(uploads) > maxuploads:
            uploads = uploads[:maxuploads]
            truncated = True
        else:
            truncated = False

        nextkeymarker = ''
        nextuploadmarker = ''
        if len(uploads) > 1:
            nextuploadmarker = uploads[-1]['upload_id']
            nextkeymarker = uploads[-1]['key']

        result_elem = Element('ListMultipartUploadsResult')
        SubElement(result_elem, 'Bucket').text = req.container_name
        SubElement(result_elem, 'KeyMarker').text = keymarker
        SubElement(result_elem, 'UploadIdMarker').text = uploadid
        SubElement(result_elem, 'NextKeyMarker').text = nextkeymarker
        SubElement(result_elem, 'NextUploadIdMarker').text = nextuploadmarker
        if 'delimiter' in req.params:
            SubElement(result_elem, 'Delimiter').text = req.params['delimiter']
        if 'prefix' in req.params:
            SubElement(result_elem, 'Prefix').text = req.params['prefix']
        SubElement(result_elem, 'MaxUploads').text = str(maxuploads)
        if encoding_type is not None:
            SubElement(result_elem, 'EncodingType').text = encoding_type
        SubElement(result_elem, 'IsTruncated').text = \
            'true' if truncated else 'false'

        # TODO: don't show uploads which are initiated before this bucket is
        # created.
        for u in uploads:
            upload_elem = SubElement(result_elem, 'Upload')
            name = u['key']
            if encoding_type == 'url':
                name = quote(name)
            SubElement(upload_elem, 'Key').text = name
            SubElement(upload_elem, 'UploadId').text = u['upload_id']
            initiator_elem = SubElement(upload_elem, 'Initiator')
            SubElement(initiator_elem, 'ID').text = req.user_id
            SubElement(initiator_elem, 'DisplayName').text = req.user_id
            owner_elem = SubElement(upload_elem, 'Owner')
            SubElement(owner_elem, 'ID').text = req.user_id
            SubElement(owner_elem, 'DisplayName').text = req.user_id
            SubElement(upload_elem, 'StorageClass').text = 'STANDARD'
            SubElement(upload_elem, 'Initiated').text = \
                u['last_modified'][:-3] + 'Z'

        for p in prefixes:
            elem = SubElement(result_elem, 'CommonPrefixes')
            SubElement(elem, 'Prefix').text = p

        body = tostring(result_elem)

        return HTTPOk(body=body, content_type='application/xml')
    def handle_container_listing(self, env, start_response):
        # This code may be clearer by using Request(env).get_response()
        # instead of self._app_call(env)
        api_vers, account, container_name = split_path(
            env['PATH_INFO'], 3, 3, True)
        sub_env = env.copy()
        orig_container = get_unversioned_container(container_name)
        if orig_container != container_name:
            # Check that container_name is actually the versioning
            # container for orig_container
            sub_env['PATH_INFO'] = '/%s/%s/%s' % (api_vers, account,
                                                  orig_container)
            info = get_container_info(sub_env, self.app,
                                      swift_source='VW')
            if info.get('sysmeta', {}).get('versions-location') != \
                    container_name:
                # We were wrong, do a standard listing
                orig_container = container_name

        if orig_container != container_name:
            qs = parse_qs(sub_env.get('QUERY_STRING', ''))
            if 'marker' in qs:
                marker, _ = swift3_split_object_name_version(qs['marker'][0])
                qs['marker'] = [marker]
            if 'prefix' in qs:
                prefix, _ = swift3_split_object_name_version(qs['prefix'][0])
                qs['prefix'] = prefix
            sub_env['QUERY_STRING'] = urlencode(qs, True)
            sub_env['oio_query'] = {'versions': True}

        resp = super(OioVersionedWritesContext, self).handle_container_request(
            sub_env, lambda x, y, z: None)

        if orig_container != container_name and \
                self._response_status == '200 OK':
            with closing_if_possible(resp):
                versioned_objects = json.loads("".join(resp))

            # Discard the latest version of each object, because it is
            # not supposed to appear in the versioning container.
            latest = dict()
            for obj in versioned_objects:
                ver = int(obj.get('version', '0'))
                if ver > latest.get(obj['name'], 0):
                    latest[obj['name']] = ver
            versioned_objects = [obj for obj in versioned_objects
                                 if int(obj.get('version', '0')) !=
                                 latest[obj['name']] or
                                 is_deleted(obj)]

            for obj in versioned_objects:
                obj['name'] = swift3_versioned_object_name(
                    obj['name'], obj.get('version', ''))
            resp = json.dumps(versioned_objects)
            self._response_headers = [x for x in self._response_headers
                                      if x[0] != 'Content-Length']
            self._response_headers.append(('Content-Length', str(len(resp))))

        start_response(self._response_status, self._response_headers,
                       self._response_exc_info)
        return resp
Esempio n. 59
0
        def response_iter():
            # NB: XML requires that the XML declaration, if present, be at the
            # very start of the document. Clients *will* call us out on not
            # being valid XML if we pass through whitespace before it.
            # Track whether we've sent anything yet so we can yield out that
            # declaration *first*
            yielded_anything = False

            try:
                try:
                    # TODO: add support for versioning
                    put_resp = req.get_response(self.app,
                                                'PUT',
                                                body=json.dumps(manifest),
                                                query={
                                                    'multipart-manifest':
                                                    'put',
                                                    'heartbeat': 'on'
                                                },
                                                headers=headers)
                    if put_resp.status_int == 202:
                        body = []
                        put_resp.fix_conditional_response()
                        for chunk in put_resp.response_iter:
                            if not chunk.strip():
                                if time.time() - start_time < 10:
                                    # Include some grace period to keep
                                    # ceph-s3tests happy
                                    continue
                                if not yielded_anything:
                                    yield (b'<?xml version="1.0" '
                                           b'encoding="UTF-8"?>\n')
                                yielded_anything = True
                                yield chunk
                                continue
                            body.append(chunk)
                        body = json.loads(b''.join(body))
                        if body['Response Status'] != '201 Created':
                            for seg, err in body['Errors']:
                                if err == too_small_message:
                                    raise EntityTooSmall()
                                elif err in ('Etag Mismatch', '404 Not Found'):
                                    raise InvalidPart(upload_id=upload_id)
                            raise InvalidRequest(
                                status=body['Response Status'],
                                msg='\n'.join(': '.join(err)
                                              for err in body['Errors']))
                except BadSwiftRequest as e:
                    msg = str(e)
                    if too_small_message in msg:
                        raise EntityTooSmall(msg)
                    elif ', Etag Mismatch' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    elif ', 404 Not Found' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    else:
                        raise

                # clean up the multipart-upload record
                obj = '%s/%s' % (req.object_name, upload_id)
                try:
                    req.get_response(self.app, 'DELETE', container, obj)
                except NoSuchKey:
                    # The important thing is that we wrote out a tombstone to
                    # make sure the marker got cleaned up. If it's already
                    # gone (e.g., because of concurrent completes or a retried
                    # complete), so much the better.
                    pass

                yield _make_complete_body(req, s3_etag, yielded_anything)
            except ErrorResponse as err_resp:
                if yielded_anything:
                    err_resp.xml_declaration = False
                    yield b'\n'
                else:
                    # Oh good, we can still change HTTP status code, too!
                    resp.status = err_resp.status
                for chunk in err_resp({}, lambda *a: None):
                    yield chunk
Esempio n. 60
0
    def POST(self, req):
        """
        Handles Complete Multipart Upload.
        """
        upload_id = req.params['uploadId']
        req.headers['x-object-meta-object-type'] = 'Multipart'
        resp = _get_upload_info(req, self.app, upload_id)
        headers = {}
        for key, val in resp.headers.iteritems():
            _key = key.lower()
            if _key.startswith('x-oss-meta-'):
                headers['x-object-meta-' + _key[11:]] = val
            elif _key == 'content-type':
                headers['Content-Type'] = val

        # Query for the objects in the segments area to make sure it completed
        query = {
            'format': 'json',
            'prefix': '%s/%s/' % (req.object_name, upload_id),
            'delimiter': '/'
        }

        container = req.container_name + MULTIUPLOAD_SUFFIX
        resp = req.get_response(self.app, 'GET', container, '', query=query)
        objinfo = json.loads(resp.body)
        objtable = dict((o['name'],
                         {'path': '/'.join(['', container, o['name']]),
                          'etag': o['hash'],
                          'size_bytes': o['bytes']}) for o in objinfo)

        manifest = []
        previous_number = 0
        try:
            xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE)
            complete_elem = fromstring(xml, 'CompleteMultipartUpload')
            for part_elem in complete_elem.iterchildren('Part'):
                part_number = int(part_elem.find('./PartNumber').text)

                if part_number <= previous_number:
                    raise InvalidPartOrder(upload_id=upload_id)
                previous_number = part_number

                etag = part_elem.find('./ETag').text
                if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"':
                    # strip double quotes
                    etag = etag[1:-1]

                info = objtable.get("%s/%s/%s" % (req.object_name, upload_id,
                                                  part_number))
                if info is None or info['etag'] != etag:
                    raise InvalidPart(upload_id=upload_id,
                                      part_number=part_number)

                manifest.append(info)
        except (XMLSyntaxError, DocumentInvalid):
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            LOGGER.error(e)
            raise exc_type, exc_value, exc_traceback

        # Following swift commit 7f636a5, zero-byte segments aren't allowed,
        # even as the final segment
        if int(info['size_bytes']) == 0:
            manifest.pop()

            # Ordinarily, we just let SLO check segment sizes. However, we
            # just popped off a zero-byte segment; if there was a second
            # zero-byte segment and it was at the end, it would succeed on
            # Swift < 2.6.0 and fail on newer Swift. It seems reasonable that
            # it should always fail.
            if manifest and int(manifest[-1]['size_bytes']) == 0:
                raise EntityTooSmall()

        try:
            # TODO: add support for versioning
            if manifest:
                resp = req.get_response(self.app, 'PUT',
                                        body=json.dumps(manifest),
                                        query={'multipart-manifest': 'put'},
                                        headers=headers)
            else:
                # the upload must have consisted of a single zero-length part
                # just write it directly
                resp = req.get_response(self.app, 'PUT', body='',
                                        headers=headers)
        except BadSwiftRequest as e:
            msg = str(e)
            msg_pre_260 = 'Each segment, except the last, must be at least '
            # see https://github.com/openstack/swift/commit/c0866ce
            msg_260 = ('too small; each segment, except the last, must be '
                       'at least ')
            # see https://github.com/openstack/swift/commit/7f636a5
            msg_post_260 = 'too small; each segment must be at least 1 byte'
            if msg.startswith(msg_pre_260) or \
                    msg_260 in msg or msg_post_260 in msg:
                # FIXME: Alibaba OSS allows a smaller object than 5 MB if there is
                # only one part.  Use a COPY request to copy the part object
                # from the segments container instead.
                raise EntityTooSmall(msg)
            else:
                raise

        if int(info['size_bytes']) == 0:
            # clean up the zero-byte segment
            empty_seg_cont, empty_seg_name = info['path'].split('/', 2)[1:]
            req.get_response(self.app, 'DELETE',
                             container=empty_seg_cont, obj=empty_seg_name)

        # clean up the multipart-upload record
        obj = '%s/%s' % (req.object_name, upload_id)
        req.get_response(self.app, 'DELETE', container, obj)

        result_elem = Element('CompleteMultipartUploadResult')

        # NOTE: boto with sig v4 appends port to HTTP_HOST value at the
        # request header when the port is non default value and it makes
        # req.host_url like as http://localhost:8080:8080/path
        # that obviously invalid. Probably it should be resolved at
        # swift.common.swob though, tentatively we are parsing and
        # reconstructing the correct host_url info here.
        # in detail, https://github.com/boto/boto/pull/3513
        parsed_url = urlparse(req.host_url)
        host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname)
        if parsed_url.port:
            host_url += ':%s' % parsed_url.port

        SubElement(result_elem, 'Location').text = host_url + req.path
        SubElement(result_elem, 'Bucket').text = req.container_name
        SubElement(result_elem, 'Key').text = req.object_name
        SubElement(result_elem, 'ETag').text = resp.etag

        resp.body = tostring(result_elem)
        resp.status = 200
        resp.content_type = "application/xml"

        return resp