def _handle_sync_response(self, node, response, info, broker, http, different_region=False): if response.status == HTTP_NOT_FOUND: # completely missing, rsync self.stats['rsync'] += 1 self.logger.increment('rsyncs') return self._rsync_db(broker, node, http, info['id'], different_region=different_region) elif response.status == HTTP_INSUFFICIENT_STORAGE: raise DriveNotMounted() elif 200 <= response.status < 300: rinfo = json.loads(response.data) local_sync = broker.get_sync(rinfo['id'], incoming=False) if rinfo.get('metadata', ''): broker.update_metadata(json.loads(rinfo['metadata'])) if self._in_sync(rinfo, info, broker, local_sync): return True # if the difference in rowids between the two differs by # more than 50% and the difference is greater than per_diff, # rsync then do a remote merge. # NOTE: difference > per_diff stops us from dropping to rsync # on smaller containers, who have only a few rows to sync. if rinfo['max_row'] / float(info['max_row']) < 0.5 and \ info['max_row'] - rinfo['max_row'] > self.per_diff: self.stats['remote_merge'] += 1 self.logger.increment('remote_merges') return self._rsync_db(broker, node, http, info['id'], replicate_method='rsync_then_merge', replicate_timeout=(info['count'] / 2000), different_region=different_region) # else send diffs over to the remote server return self._usync_db(max(rinfo['point'], local_sync), broker, http, rinfo['id'], info['id'])
def test_bulk_delete_500_resp(self): req = Request.blank("/broke/AUTH_acc/", body="/c/f\nc/f2\n", headers={"Accept": "application/json"}) req.method = "DELETE" resp_body = self.handle_delete_and_iter(req) resp_data = json.loads(resp_body) self.assertEquals(resp_data["Errors"], [["/c/f", "500 Internal Error"], ["c/f2", "500 Internal Error"]]) self.assertEquals(resp_data["Response Status"], "502 Bad Gateway")
def update_data_record(self, record, list_meta=False): """ Perform any mutations to container listing records that are common to all serialization formats, and returns it as a dict. Converts created time to iso timestamp. Replaces size with 'swift_bytes' content type parameter. :params record: object entry record :returns: modified record """ (name, created, size, content_type, etag, metadata) = record if content_type is None: return {'subdir': name} response = {'bytes': size, 'hash': etag, 'name': name, 'content_type': content_type} if list_meta: metadata = json.loads(metadata) utf8encodekeys(metadata) response['metadata'] = metadata last_modified = datetime.utcfromtimestamp(float(created)).isoformat() # python isoformat() doesn't include msecs when zero if len(last_modified) < len("1970-01-01T00:00:00.000000"): last_modified += ".000000" response['last_modified'] = last_modified override_bytes_from_content_type(response, logger=self.logger) return response
def DELETE(self, req): """ Handles Abort Multipart Upload. """ upload_id = req.params['uploadId'] _check_upload_info(req, self.app, upload_id) # First check to see if this multi-part upload was already # completed. Look in the primary container, if the object exists, # then it was completed and we return an error here. container = req.container_name + MULTIUPLOAD_SUFFIX obj = '%s/%s' % (req.object_name, upload_id) req.get_response(self.app, container=container, obj=obj) # The completed object was not found so this # must be a multipart upload abort. # We must delete any uploaded segments for this UploadID and then # delete the object in the main container as well query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/', } resp = req.get_response(self.app, 'GET', container, '', query=query) # Iterate over the segment objects and delete them individually objects = json.loads(resp.body) for o in objects: container = req.container_name + MULTIUPLOAD_SUFFIX req.get_response(self.app, container=container, obj=o['name']) return HTTPNoContent()
def _listing_pages_iter(self, lcontainer, lprefix, env): lpartition, lnodes = self.app.container_ring.get_nodes( self.account_name, lcontainer) marker = '' while True: lreq = Request.blank('i will be overridden by env', environ=env) # Don't quote PATH_INFO, by WSGI spec lreq.environ['PATH_INFO'] = \ '/%s/%s' % (self.account_name, lcontainer) lreq.environ['REQUEST_METHOD'] = 'GET' lreq.environ['QUERY_STRING'] = \ 'format=json&prefix=%s&marker=%s' % (quote(lprefix), quote(marker)) lnodes = self.app.sort_nodes(lnodes) lresp = self.GETorHEAD_base( lreq, _('Container'), lpartition, lnodes, lreq.path_info, len(lnodes)) if 'swift.authorize' in env: lreq.acl = lresp.headers.get('x-container-read') aresp = env['swift.authorize'](lreq) if aresp: raise ListingIterNotAuthorized(aresp) if lresp.status_int == HTTP_NOT_FOUND: raise ListingIterNotFound() elif not is_success(lresp.status_int): raise ListingIterError() if not lresp.body: break sublisting = json.loads(lresp.body) if not sublisting: break marker = sublisting[-1]['name'].encode('utf-8') yield sublisting
def get_or_head_response(self, req, resp_headers, resp_iter): with closing_if_possible(resp_iter): resp_body = "".join(resp_iter) try: segments = json.loads(resp_body) except ValueError: segments = [] etag = md5() content_length = 0 for seg_dict in segments: if seg_dict.get("range"): etag.update("%s:%s;" % (seg_dict["hash"], seg_dict["range"])) else: etag.update(seg_dict["hash"]) if config_true_value(seg_dict.get("sub_slo")): override_bytes_from_content_type(seg_dict, logger=self.slo.logger) content_length += self._segment_length(seg_dict) response_headers = [(h, v) for h, v in resp_headers if h.lower() not in ("etag", "content-length")] response_headers.append(("Content-Length", str(content_length))) response_headers.append(("Etag", '"%s"' % etag.hexdigest())) if req.method == "HEAD": return self._manifest_head_response(req, response_headers) else: return self._manifest_get_response(req, content_length, response_headers, segments)
def test_GET_OBJscope_conAttrs_metadata(self): """ In object scope, specifying container attrs We should get back the container that the object belongs to """ attrs = Cattrs req2 = Request.blank( '/v1/TEST_acc1/TEST_con1/TEST_obj1', environ={'REQUEST_METHOD': 'GET', 'HTTP_X_TIMESTAMP': '0'}, headers={'attributes': attrs, 'format': 'json'}) resp2 = req2.get_response(self.controller) self.assert_(resp2.status.startswith('200')) testList = json.loads(resp2.body) self.assert_(len(testList) == 1) testDict = testList[0] self.assert_('/TEST_acc1/TEST_con1' in testDict) metaReturned = testDict['/TEST_acc1/TEST_con1'] self.assertEquals( metaReturned['container_uri'], '/TEST_acc1/TEST_con1') self.assertEquals(metaReturned['container_name'], 'TEST_con1') self.assertEquals(metaReturned['container_account_name'], 'TEST_acc1') self.assertEquals(metaReturned['container_create_time'], self.t) self.assertEquals(metaReturned['container_object_count'], 33) self.assertEquals(metaReturned['container_bytes_used'], 3342) self.assertEquals(metaReturned['container_meta_TESTCUSTOM'], 'CUSTOM')
def handle_multipart_delete(self, req): new_env = req.environ.copy() new_env['REQUEST_METHOD'] = 'GET' del(new_env['wsgi.input']) new_env['QUERY_STRING'] = 'multipart-manifest=get' new_env['CONTENT_LENGTH'] = 0 new_env['HTTP_USER_AGENT'] = \ '%s MultipartDELETE' % req.environ.get('HTTP_USER_AGENT') new_env['swift.source'] = 'SLO' get_man_resp = \ Request.blank('', new_env).get_response(self.app) if get_man_resp.status_int // 100 == 2: if not config_true_value( get_man_resp.headers.get('X-Static-Large-Object')): raise HTTPBadRequest('Not an SLO manifest') try: manifest = json.loads(get_man_resp.body) except ValueError: raise HTTPServerError('Invalid manifest file') delete_resp = self.bulk_deleter.handle_delete( req, objs_to_delete=[o['name'].encode('utf-8') for o in manifest], user_agent='MultipartDELETE', swift_source='SLO') if delete_resp.status_int // 100 == 2: # delete the manifest file itself return self.app else: return delete_resp return get_man_resp
def test_handle_multipart_put_check_data_bad(self): bad_data = json.dumps( [ {"path": "/checktest/a_1", "etag": "a", "size_bytes": "2"}, {"path": "/checktest/badreq", "etag": "a", "size_bytes": "1"}, {"path": "/checktest/b_2", "etag": "not-b", "size_bytes": "2"}, {"path": "/checktest/slob", "etag": "not-slob", "size_bytes": "2"}, ] ) req = Request.blank( "/v1/AUTH_test/checktest/man?multipart-manifest=put", environ={"REQUEST_METHOD": "PUT"}, headers={"Accept": "application/json"}, body=bad_data, ) status, headers, body = self.call_slo(req) self.assertEquals(self.app.call_count, 4) errors = json.loads(body)["Errors"] self.assertEquals(len(errors), 5) self.assertEquals(errors[0][0], "/checktest/a_1") self.assertEquals(errors[0][1], "Size Mismatch") self.assertEquals(errors[1][0], "/checktest/badreq") self.assertEquals(errors[1][1], "400 Bad Request") self.assertEquals(errors[2][0], "/checktest/b_2") self.assertEquals(errors[2][1], "Etag Mismatch") self.assertEquals(errors[3][0], "/checktest/slob") self.assertEquals(errors[3][1], "Size Mismatch") self.assertEquals(errors[4][0], "/checktest/slob") self.assertEquals(errors[4][1], "Etag Mismatch")
def is_strict_mode(url, token, parsed, conn): conn.request('GET', '/info') resp = conn.getresponse() if resp.status // 100 == 2: info = json.loads(resp.read()) return info.get('swift', {}).get('strict_cors_mode', False) return False
def test_bulk_delete_container_delete(self): req = Request.blank("/delete_cont_fail/AUTH_Acc", body="c\n", headers={"Accept": "application/json"}) req.method = "DELETE" resp_body = self.handle_delete_and_iter(req) resp_data = json.loads(resp_body) self.assertEquals(resp_data["Number Deleted"], 0) self.assertEquals(resp_data["Errors"][0][1], "409 Conflict")
def _fetch_sub_slo_segments(self, req, version, acc, con, obj): """ Fetch the submanifest, parse it, and return it. Raise exception on failures. """ sub_req = req.copy_get() sub_req.range = None sub_req.environ['PATH_INFO'] = '/'.join(['', version, acc, con, obj]) sub_req.environ['swift.source'] = 'SLO' sub_req.user_agent = "%s SLO MultipartGET" % sub_req.user_agent sub_resp = sub_req.get_response(self.slo.app) if not is_success(sub_resp.status_int): raise ListingIterError( 'ERROR: while fetching %s, GET of submanifest %s ' 'failed with status %d' % (req.path, sub_req.path, sub_resp.status_int)) try: with closing_if_possible(sub_resp.app_iter): return json.loads(''.join(sub_resp.app_iter)) except ValueError as err: raise ListingIterError( 'ERROR: while fetching %s, JSON-decoding of submanifest %s ' 'failed with %s' % (req.path, sub_req.path, err))
def get(self, key): """ Gets the object specified by key. It will also unserialize the object before returning if it is serialized in memcache with JSON, or if it is pickled and unpickling is allowed. :param key: key :returns: value of the key in memcache """ key = md5hash(key) value = None for (server, fp, sock) in self._get_conns(key): try: with Timeout(self._io_timeout): sock.sendall('get %s\r\n' % key) line = fp.readline().strip().split() while line[0].upper() != 'END': if line[0].upper() == 'VALUE' and line[1] == key: size = int(line[3]) value = fp.read(size) if int(line[2]) & PICKLE_FLAG: if self._allow_unpickle: value = pickle.loads(value) else: value = None elif int(line[2]) & JSON_FLAG: value = json.loads(value) fp.readline() line = fp.readline().strip().split() self._return_conn(server, fp, sock) return value except (Exception, Timeout) as e: self._exception_occurred(server, e, sock=sock, fp=fp)
def test_handle_multipart_put_check_data_bad(self): bad_data = json.dumps( [ {"path": "/c/a_1", "etag": "a", "size_bytes": "1"}, {"path": "/c/a_2", "etag": "a", "size_bytes": "1"}, {"path": "/d/b_2", "etag": "b", "size_bytes": "2"}, ] ) req = Request.blank( "/test_good/A/c/man?multipart-manifest=put", environ={"REQUEST_METHOD": "PUT"}, headers={"Accept": "application/json"}, body=bad_data, ) try: self.slo.handle_multipart_put(req) except HTTPException, e: self.assertEquals(self.app.calls, 3) data = json.loads(e.body) errors = data["Errors"] self.assertEquals(errors[0][0], "/test_good/A/c/a_1") self.assertEquals(errors[0][1], "Size Mismatch") self.assertEquals(errors[2][1], "400 Bad Request") self.assertEquals(errors[-1][0], "/test_good/A/d/b_2") self.assertEquals(errors[-1][1], "Etag Mismatch")
def _reclaim(self, conn, timestamp): """ Removes any empty metadata values older than the timestamp using the given database connection. This function will not call commit on the conn, but will instead return True if the database needs committing. This function was created as a worker to limit transactions and commits from other related functions. :param conn: Database connection to reclaim metadata within. :param timestamp: Empty metadata items last updated before this timestamp will be removed. :returns: True if conn.commit() should be called """ try: md = conn.execute('SELECT metadata FROM %s_stat' % self.db_type).fetchone()[0] if md: md = json.loads(md) keys_to_delete = [] for key, (value, value_timestamp) in md.iteritems(): if value == '' and value_timestamp < timestamp: keys_to_delete.append(key) if keys_to_delete: for key in keys_to_delete: del md[key] conn.execute('UPDATE %s_stat SET metadata = ?' % self.db_type, (json.dumps(md),)) return True except sqlite3.OperationalError as err: if 'no such column: metadata' not in str(err): raise return False
def deserialize_v1(cls, gz_file, metadata_only=False): """ Deserialize a v1 ring file into a dictionary with `devs`, `part_shift`, and `replica2part2dev_id` keys. If the optional kwarg `metadata_only` is True, then the `replica2part2dev_id` is not loaded and that key in the returned dictionary just has the value `[]`. :param file gz_file: An opened file-like object which has already consumed the 6 bytes of magic and version. :param bool metadata_only: If True, only load `devs` and `part_shift` :returns: A dict containing `devs`, `part_shift`, and `replica2part2dev_id` """ json_len, = struct.unpack('!I', gz_file.read(4)) ring_dict = json.loads(gz_file.read(json_len)) ring_dict['replica2part2dev_id'] = [] if metadata_only: return ring_dict partition_count = 1 << (32 - ring_dict['part_shift']) for x in xrange(ring_dict['replica_count']): ring_dict['replica2part2dev_id'].append( array.array('H', gz_file.read(2 * partition_count))) return ring_dict
def _fetch_sub_slo_segments(self, req, version, acc, con, obj): """ Fetch the submanifest, parse it, and return it. Raise exception on failures. """ sub_req = make_subrequest( req.environ, path="/".join(["", version, acc, con, obj]), method="GET", headers={"x-auth-token": req.headers.get("x-auth-token")}, agent=("%(orig)s " + "SLO MultipartGET"), swift_source="SLO", ) sub_resp = sub_req.get_response(self.slo.app) if not is_success(sub_resp.status_int): close_if_possible(sub_resp.app_iter) raise ListingIterError( "ERROR: while fetching %s, GET of submanifest %s " "failed with status %d" % (req.path, sub_req.path, sub_resp.status_int) ) try: with closing_if_possible(sub_resp.app_iter): return json.loads("".join(sub_resp.app_iter)) except ValueError as err: raise ListingIterError( "ERROR: while fetching %s, JSON-decoding of submanifest %s " "failed with %s" % (req.path, sub_req.path, err) )
def test_GET_OBJscope_objAttrs_metadata(self): """ In object scope give me object attrs Should give back the object in the path """ attrs = Oattrs req2 = Request.blank( '/v1/TEST_acc1/TEST_con1/TEST_obj1', environ={'REQUEST_METHOD': 'GET', 'HTTP_X_TIMESTAMP': '0'}, headers={'attributes': attrs, 'format': 'json'}) resp2 = req2.get_response(self.controller) self.assert_(resp2.status.startswith('200')) testList = json.loads(resp2.body) self.assert_(len(testList) == 1) testDict = testList[0] self.assert_('/TEST_acc1/TEST_con1/TEST_obj1' in testDict) metaReturned = testDict['/TEST_acc1/TEST_con1/TEST_obj1'] self.assertEquals( metaReturned['object_uri'], '/TEST_acc1/TEST_con1/TEST_obj1') self.assertEquals(metaReturned['object_name'], 'TEST_obj1') self.assertEquals(metaReturned['object_account_name'], 'TEST_acc1') self.assertEquals(metaReturned['object_container_name'], 'TEST_con1') self.assertEquals(metaReturned['object_uri_create_time'], self.t) self.assertEquals(metaReturned['object_etag_hash'], '0000000000000000') self.assertEquals(metaReturned['object_content_type'], 'text/plain') self.assertEquals(str(metaReturned['object_content_length']), '42') self.assertEquals(metaReturned['object_content_encoding'], 'gzip') self.assertEquals(metaReturned['object_content_language'], 'en') self.assertEquals(metaReturned['object_meta_TESTCUSTOM'], 'CUSTOM')
def test_direct_get_account(self): stub_headers = HeaderKeyDict({ 'X-Account-Container-Count': '1', 'X-Account-Object-Count': '1', 'X-Account-Bytes-Used': '1', 'X-Timestamp': '1234567890', 'X-PUT-Timestamp': '1234567890'}) body = '[{"count": 1, "bytes": 20971520, "name": "c1"}]' with mocked_http_conn(200, stub_headers, body) as conn: resp_headers, resp = direct_client.direct_get_account( self.node, self.part, self.account, marker='marker', prefix='prefix', delimiter='delimiter', limit=1000) self.assertEqual(conn.method, 'GET') self.assertEqual(conn.path, self.account_path) self.assertEqual(conn.req_headers['user-agent'], self.user_agent) self.assertEqual(resp_headers, stub_headers) self.assertEqual(json.loads(body), resp) self.assertTrue('marker=marker' in conn.query_string) self.assertTrue('delimiter=delimiter' in conn.query_string) self.assertTrue('limit=1000' in conn.query_string) self.assertTrue('prefix=prefix' in conn.query_string) self.assertTrue('format=json' in conn.query_string)
def parse_input(raw_data): """ Given a request will parse the body and return a list of dictionaries :raises: HTTPException on parse errors :returns: a list of dictionaries on success """ try: parsed_data = json.loads(raw_data) except ValueError: raise HTTPBadRequest("Manifest must be valid json.") req_keys = set(["path", "etag", "size_bytes"]) opt_keys = set(["range"]) try: for seg_dict in parsed_data: if not (req_keys <= set(seg_dict) <= req_keys | opt_keys) or "/" not in seg_dict["path"].lstrip("/"): raise HTTPBadRequest("Invalid SLO Manifest File") if seg_dict.get("range"): try: seg_dict["range"] = Range("bytes=%s" % seg_dict["range"]) except ValueError: raise HTTPBadRequest("Invalid SLO Manifest File") except (AttributeError, TypeError): raise HTTPBadRequest("Invalid SLO Manifest File") return parsed_data
def _listing_pages_iter(self, lcontainer, lprefix, env): lpartition = self.app.container_ring.get_part(self.account_name, lcontainer) marker = "" while True: lreq = Request.blank("i will be overridden by env", environ=env) # Don't quote PATH_INFO, by WSGI spec lreq.environ["PATH_INFO"] = "/v1/%s/%s" % (self.account_name, lcontainer) lreq.environ["REQUEST_METHOD"] = "GET" lreq.environ["QUERY_STRING"] = "format=json&prefix=%s&marker=%s" % (quote(lprefix), quote(marker)) lresp = self.GETorHEAD_base( lreq, _("Container"), self.app.container_ring, lpartition, lreq.swift_entity_path ) if "swift.authorize" in env: lreq.acl = lresp.headers.get("x-container-read") aresp = env["swift.authorize"](lreq) if aresp: raise ListingIterNotAuthorized(aresp) if lresp.status_int == HTTP_NOT_FOUND: raise ListingIterNotFound() elif not is_success(lresp.status_int): raise ListingIterError() if not lresp.body: break sublisting = json.loads(lresp.body) if not sublisting: break marker = sublisting[-1]["name"].encode("utf-8") yield sublisting
def __call__(self, req): account = None try: (version, account, container, obj) = \ split_path(req.path_info, 2, 4, True) except ValueError: pass if not account or not req.headers.get('x-web-mode'): return req.get_response(self.app) if not obj: req.query_string = 'format=json' resp = req.get_response(self.app) if resp.content_type == 'application/json': listing = json.loads(resp.body) template = self.get_template(req, account, container) if template: ctx = { 'account': account, 'container': container, 'listing': listing, } if container: index = [o for o in listing if o['name'] == 'index.html'] if index: headers = {'Location': '/v1/%s/%s/index.html' % (account, container)} return HTTPSeeOther(headers=headers) return Response(body=template.render(**ctx)) else: index = [o for o in listing if o['name'] == 'index.html'] if index: headers = {'Location': '/v1/%s/%s/index.html' % (account, container)} return HTTPSeeOther(headers=headers) return resp
def get_or_head_response(self, req, resp_headers, resp_iter): with closing_if_possible(resp_iter): resp_body = ''.join(resp_iter) try: segments = json.loads(resp_body) except ValueError: segments = [] etag = md5() content_length = 0 for seg_dict in segments: if seg_dict.get('range'): etag.update('%s:%s;' % (seg_dict['hash'], seg_dict['range'])) else: etag.update(seg_dict['hash']) if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type( seg_dict, logger=self.slo.logger) content_length += self._segment_length(seg_dict) response_headers = [(h, v) for h, v in resp_headers if h.lower() not in ('etag', 'content-length')] response_headers.append(('Content-Length', str(content_length))) response_headers.append(('Etag', '"%s"' % etag.hexdigest())) if req.method == 'HEAD': return self._manifest_head_response(req, response_headers) else: return self._manifest_get_response( req, content_length, response_headers, segments)
def _listing_pages_iter(self, account_name, lcontainer, lprefix, env): marker = '' while True: lreq = make_pre_authed_request( env, method='GET', swift_source='VW', path='/v1/%s/%s' % (account_name, lcontainer)) lreq.environ['QUERY_STRING'] = \ 'format=json&prefix=%s&marker=%s' % (quote(lprefix), quote(marker)) lresp = lreq.get_response(self.app) if not is_success(lresp.status_int): if lresp.status_int == HTTP_NOT_FOUND: raise ListingIterNotFound() elif is_client_error(lresp.status_int): raise HTTPPreconditionFailed() else: raise ListingIterError() if not lresp.body: break sublisting = json.loads(lresp.body) if not sublisting: break marker = sublisting[-1]['name'].encode('utf-8') yield sublisting
def get_slo_segments(self, obj_name, req): """ Performs a swob.Request and returns the SLO manifest's segments. :raises HTTPServerError: on unable to load obj_name or on unable to load the SLO manifest data. :raises HTTPBadRequest: on not an SLO manifest :raises HTTPNotFound: on SLO manifest not found :returns: SLO manifest's segments """ vrs, account, _junk = req.split_path(2, 3, True) new_env = req.environ.copy() new_env["REQUEST_METHOD"] = "GET" del (new_env["wsgi.input"]) new_env["QUERY_STRING"] = "multipart-manifest=get" new_env["CONTENT_LENGTH"] = 0 new_env["HTTP_USER_AGENT"] = "%s MultipartDELETE" % new_env.get("HTTP_USER_AGENT") new_env["swift.source"] = "SLO" new_env["PATH_INFO"] = ("/%s/%s/%s" % (vrs, account, obj_name.lstrip("/"))).encode("utf-8") resp = Request.blank("", new_env).get_response(self.app) if resp.is_success: if config_true_value(resp.headers.get("X-Static-Large-Object")): try: return json.loads(resp.body) except ValueError: raise HTTPServerError("Unable to load SLO manifest") else: raise HTTPBadRequest("Not an SLO manifest") elif resp.status_int == HTTP_NOT_FOUND: raise HTTPNotFound("SLO manifest not found") elif resp.status_int == HTTP_UNAUTHORIZED: raise HTTPUnauthorized("401 Unauthorized") else: raise HTTPServerError("Unable to load SLO manifest or segment.")
def _handle_sync_response(self, node, response, info, broker, http, different_region=False): if response.status == HTTP_NOT_FOUND: # completely missing, rsync self.stats['rsync'] += 1 self.logger.increment('rsyncs') return self._rsync_db(broker, node, http, info['id'], different_region=different_region) elif response.status == HTTP_INSUFFICIENT_STORAGE: raise DriveNotMounted() elif response.status >= 200 and response.status < 300: rinfo = json.loads(response.data) local_sync = broker.get_sync(rinfo['id'], incoming=False) if self._in_sync(rinfo, info, broker, local_sync): return True # if the difference in rowids between the two differs by # more than 50%, rsync then do a remote merge. if rinfo['max_row'] / float(info['max_row']) < 0.5: self.stats['remote_merge'] += 1 self.logger.increment('remote_merges') return self._rsync_db(broker, node, http, info['id'], replicate_method='rsync_then_merge', replicate_timeout=(info['count'] / 2000), different_region=different_region) # else send diffs over to the remote server return self._usync_db(max(rinfo['point'], local_sync), broker, http, rinfo['id'], info['id'])
def test_no_attributes_in_request_con_scope(self): req = Request.blank( '/v1/TEST_acc1/TEST_con1', environ={'REQUEST_METHOD': 'GET', 'HTTP_X_TIMESTAMP': '0'}, headers={'format': 'json'}) resp = req.get_response(self.controller) self.assert_(resp.status.startswith('200')) testList = json.loads(resp.body) self.assertEquals(len(testList), 4) testDict = testList[0] self.assert_('/TEST_acc1' in testDict) metaReturned = testDict['/TEST_acc1'] self.assertEquals(metaReturned['account_uri'], '/TEST_acc1') testDict = testList[1] self.assert_('/TEST_acc1/TEST_con1' in testDict) metaReturned = testDict['/TEST_acc1/TEST_con1'] self.assertEquals( metaReturned['container_uri'], '/TEST_acc1/TEST_con1') testDict = testList[2] self.assert_('/TEST_acc1/TEST_con1/TEST_obj1' in testDict) metaReturned = testDict['/TEST_acc1/TEST_con1/TEST_obj1'] self.assertEquals( metaReturned['object_uri'], '/TEST_acc1/TEST_con1/TEST_obj1') testDict = testList[3] self.assert_('/TEST_acc1/TEST_con1/TEST_obj2' in testDict) metaReturned = testDict['/TEST_acc1/TEST_con1/TEST_obj2'] self.assertEquals( metaReturned['object_uri'], '/TEST_acc1/TEST_con1/TEST_obj2')
def test_handle_multipart_put_check_data_bad(self): bad_data = json.dumps( [{'path': '/c/a_1', 'etag': 'a', 'size_bytes': '1'}, {'path': '/c/a_2', 'etag': 'a', 'size_bytes': '1'}, {'path': '/d/b_2', 'etag': 'b', 'size_bytes': '2'}, {'path': '/d/slob', 'etag': 'a', 'size_bytes': '2'}]) req = Request.blank( '/test_good/A/c/man?multipart-manifest=put', environ={'REQUEST_METHOD': 'PUT'}, headers={'Accept': 'application/json'}, body=bad_data) try: self.slo.handle_multipart_put(req, fake_start_response) except HTTPException as e: self.assertEquals(self.app.calls, 4) data = json.loads(e.body) errors = data['Errors'] self.assertEquals(errors[0][0], '/c/a_1') self.assertEquals(errors[0][1], 'Size Mismatch') self.assertEquals(errors[2][0], '/c/a_2') self.assertEquals(errors[2][1], '400 Bad Request') self.assertEquals(errors[4][0], '/d/b_2') self.assertEquals(errors[4][1], 'Etag Mismatch') self.assertEquals(errors[-1][0], '/d/slob') self.assertEquals(errors[-1][1], 'Etag Mismatch') else: self.assert_(False)
def test_extract_tar_works(self): # On systems where $TMPDIR is long (like OS X), we need to do this # or else every upload will fail due to the path being too long. self.app.max_pathlen += len(self.testdir) for compress_format in ['', 'gz', 'bz2']: base_name = 'base_works_%s' % compress_format dir_tree = [ {base_name: [{'sub_dir1': ['sub1_file1', 'sub1_file2']}, {'sub_dir2': ['sub2_file1', u'test obj \u2661']}, 'sub_file1', {'sub_dir3': [{'sub4_dir1': '../sub4 file1'}]}, {'sub_dir4': None}, ]}] build_dir_tree(self.testdir, dir_tree) mode = 'w' extension = '' if compress_format: mode += ':' + compress_format extension += '.' + compress_format tar = tarfile.open(name=os.path.join(self.testdir, 'tar_works.tar' + extension), mode=mode) tar.add(os.path.join(self.testdir, base_name)) tar.close() req = Request.blank('/tar_works/acc/cont/') req.environ['wsgi.input'] = open( os.path.join(self.testdir, 'tar_works.tar' + extension)) req.headers['transfer-encoding'] = 'chunked' resp_body = self.handle_extract_and_iter(req, compress_format) resp_data = json.loads(resp_body) self.assertEquals(resp_data['Number Files Created'], 6) # test out xml req = Request.blank('/tar_works/acc/cont/') req.environ['wsgi.input'] = open( os.path.join(self.testdir, 'tar_works.tar' + extension)) req.headers['transfer-encoding'] = 'chunked' resp_body = self.handle_extract_and_iter( req, compress_format, 'application/xml') self.assert_('<response_status>201 Created</response_status>' in resp_body) self.assert_('<number_files_created>6</number_files_created>' in resp_body) # test out nonexistent format req = Request.blank('/tar_works/acc/cont/?extract-archive=tar', headers={'Accept': 'good_xml'}) req.environ['REQUEST_METHOD'] = 'PUT' req.environ['wsgi.input'] = open( os.path.join(self.testdir, 'tar_works.tar' + extension)) req.headers['transfer-encoding'] = 'chunked' def fake_start_response(*args, **kwargs): pass app_iter = self.bulk(req.environ, fake_start_response) resp_body = ''.join([i for i in app_iter]) self.assert_('Response Status: 406' in resp_body)
def _fetch_sub_slo_segments(self, req, version, acc, con, obj): """ Fetch the submanifest, parse it, and return it. Raise exception on failures. """ sub_req = make_subrequest( req.environ, path='/'.join(['', version, acc, con, obj]), method='GET', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent=('%(orig)s ' + 'SLO MultipartGET'), swift_source='SLO') sub_resp = sub_req.get_response(self.slo.app) if not is_success(sub_resp.status_int): close_if_possible(sub_resp.app_iter) raise ListingIterError( 'ERROR: while fetching %s, GET of submanifest %s ' 'failed with status %d' % (req.path, sub_req.path, sub_resp.status_int)) try: with closing_if_possible(sub_resp.app_iter): return json.loads(''.join(sub_resp.app_iter)) except ValueError as err: raise ListingIterError( 'ERROR: while fetching %s, JSON-decoding of submanifest %s ' 'failed with %s' % (req.path, sub_req.path, err))
def update_metadata(self, metadata_updates): """ Updates the metadata dict for the database. The metadata dict values are tuples of (value, timestamp) where the timestamp indicates when that key was set to that value. Key/values will only be overwritten if the timestamp is newer. To delete a key, set its value to ('', timestamp). These empty keys will eventually be removed by :func:`reclaim` """ old_metadata = self.metadata if set(metadata_updates).issubset(set(old_metadata)): for key, (value, timestamp) in metadata_updates.iteritems(): if timestamp > old_metadata[key][1]: break else: return with self.get() as conn: try: md = conn.execute('SELECT metadata FROM %s_stat' % self.db_type).fetchone()[0] md = json.loads(md) if md else {} utf8encodekeys(md) except sqlite3.OperationalError as err: if 'no such column: metadata' not in str(err): raise conn.execute(""" ALTER TABLE %s_stat ADD COLUMN metadata TEXT DEFAULT '' """ % self.db_type) md = {} for key, value_timestamp in metadata_updates.iteritems(): value, timestamp = value_timestamp if key not in md or timestamp > md[key][1]: md[key] = value_timestamp conn.execute('UPDATE %s_stat SET metadata = ?' % self.db_type, (json.dumps(md), )) conn.commit()
def test_extract_tar_fail_max_cont(self): dir_tree = [{ 'sub_dir1': ['sub1_file1'] }, { 'sub_dir2': ['sub2_file1', 'sub2_file2'] }, 'f' * 101, { 'sub_dir3': [{ 'sub4_dir1': 'sub4_file1' }] }] self.build_tar(dir_tree) with patch.object(self.bulk, 'max_containers', 1): self.app.calls = 0 body = open(os.path.join(self.testdir, 'tar_fails.tar')).read() req = Request.blank('/tar_works_cont_head_fail/acc/', body=body, headers={'Accept': 'application/json'}) req.headers['transfer-encoding'] = 'chunked' resp_body = self.handle_extract_and_iter(req, '') self.assertEquals(self.app.calls, 5) resp_data = json.loads(resp_body) self.assertEquals(resp_data['Response Status'], '400 Bad Request') self.assertEquals(resp_data['Response Body'], 'More than 1 containers to create from tar.')
def test_direct_get_container(self): headers = HeaderKeyDict({'key': 'value'}) body = '[{"hash": "8f4e3", "last_modified": "317260", "bytes": 209}]' with mocked_http_conn(200, headers, body) as conn: resp_headers, resp = direct_client.direct_get_container( self.node, self.part, self.account, self.container, marker='marker', prefix='prefix', delimiter='delimiter', limit=1000) self.assertEqual(conn.req_headers['user-agent'], 'direct-client %s' % os.getpid()) self.assertEqual(headers, resp_headers) self.assertEqual(json.loads(body), resp) self.assertTrue('marker=marker' in conn.query_string) self.assertTrue('delimiter=delimiter' in conn.query_string) self.assertTrue('limit=1000' in conn.query_string) self.assertTrue('prefix=prefix' in conn.query_string) self.assertTrue('format=json' in conn.query_string)
def GET(self, req): """ Handle GET Bucket (List Objects) request """ max_keys = req.get_validated_param( 'max-keys', self.conf.max_bucket_listing) tag_max_keys = max_keys # TODO: Separate max_bucket_listing and default_bucket_listing max_keys = min(max_keys, self.conf.max_bucket_listing) encoding_type, query, listing_type, fetch_owner = \ self._parse_request_options(req, max_keys) resp = req.get_response(self.app, query=query) objects = json.loads(resp.body) is_truncated = max_keys > 0 and len(objects) > max_keys objects = objects[:max_keys] if listing_type == 'object-versions': elem = self._build_versions_result(req, objects, is_truncated) elif listing_type == 'version-2': elem = self._build_list_bucket_result_type_two( req, objects, is_truncated) else: elem = self._build_list_bucket_result_type_one( req, objects, encoding_type, is_truncated) self._finish_result( req, elem, tag_max_keys, encoding_type, is_truncated) self._add_objects_to_result( req, elem, objects, encoding_type, listing_type, fetch_owner) body = tostring(elem) return HTTPOk(body=body, content_type='application/xml')
def GET(self, req): """ Handles List Multipart Uploads """ def filter_max_uploads(o): name = o.get('name', '') return name.count('/') == 1 encoding_type = req.params.get('encoding-type') if encoding_type is not None and encoding_type != 'url': err_msg = 'Invalid Encoding Method specified in Request' raise InvalidArgument('encoding-type', encoding_type, err_msg) # TODO: add support for delimiter query. keymarker = req.params.get('key-marker', '') uploadid = req.params.get('upload-id-marker', '') maxuploads = req.get_validated_param('max-uploads', DEFAULT_MAX_UPLOADS, DEFAULT_MAX_UPLOADS) query = { 'format': 'json', 'limit': maxuploads + 1, } if uploadid and keymarker: query.update({'marker': '%s/%s' % (keymarker, uploadid)}) elif keymarker: query.update({'marker': '%s/~' % (keymarker)}) if 'prefix' in req.params: query.update({'prefix': req.params['prefix']}) container = req.container_name + MULTIUPLOAD_SUFFIX resp = req.get_response(self.app, container=container, query=query) objects = json.loads(resp.body) objects = filter(filter_max_uploads, objects) if len(objects) > maxuploads: objects = objects[:maxuploads] truncated = True else: truncated = False uploads = [] prefixes = [] for o in objects: obj, upid = split_path('/' + o['name'], 1, 2) uploads.append({ 'key': obj, 'upload_id': upid, 'last_modified': o['last_modified'] }) nextkeymarker = '' nextuploadmarker = '' if len(uploads) > 1: nextuploadmarker = uploads[-1]['upload_id'] nextkeymarker = uploads[-1]['key'] result_elem = Element('ListMultipartUploadsResult') SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'KeyMarker').text = keymarker SubElement(result_elem, 'UploadIdMarker').text = uploadid SubElement(result_elem, 'NextKeyMarker').text = nextkeymarker SubElement(result_elem, 'NextUploadIdMarker').text = nextuploadmarker if 'prefix' in req.params: SubElement(result_elem, 'Prefix').text = req.params['prefix'] SubElement(result_elem, 'MaxUploads').text = str(maxuploads) if encoding_type is not None: SubElement(result_elem, 'EncodingType').text = encoding_type SubElement(result_elem, 'IsTruncated').text = \ 'true' if truncated else 'false' # TODO: don't show uploads which are initiated before this bucket is # created. for u in uploads: upload_elem = SubElement(result_elem, 'Upload') SubElement(upload_elem, 'Key').text = u['key'] SubElement(upload_elem, 'UploadId').text = u['upload_id'] initiator_elem = SubElement(upload_elem, 'Initiator') SubElement(initiator_elem, 'ID').text = req.user_id SubElement(initiator_elem, 'DisplayName').text = req.user_id owner_elem = SubElement(upload_elem, 'Owner') SubElement(owner_elem, 'ID').text = req.user_id SubElement(owner_elem, 'DisplayName').text = req.user_id SubElement(upload_elem, 'StorageClass').text = 'STANDARD' SubElement(upload_elem, 'Initiated').text = \ u['last_modified'][:-3] + 'Z' for p in prefixes: elem = SubElement(result_elem, 'CommonPrefixes') SubElement(elem, 'Prefix').text = p body = tostring(result_elem, encoding_type=encoding_type) return HTTPOk(body=body, content_type='application/xml')
def handle_object(self, env, start_response): """ Handles a possible static web request for an object. This object could resolve into an index or listing request. :param env: The original WSGI environment dict. :param start_response: The original WSGI start_response hook. """ tmp_env = dict(env) tmp_env['HTTP_USER_AGENT'] = \ '%s StaticWeb' % env.get('HTTP_USER_AGENT') tmp_env['swift.source'] = 'SW' resp = self._app_call(tmp_env) status_int = self._get_status_int() if is_success(status_int) or is_redirection(status_int): start_response(self._response_status, self._response_headers, self._response_exc_info) return resp if status_int != HTTP_NOT_FOUND: return self._error_response(resp, env, start_response) self._get_container_info(env) if not self._listings and not self._index: return self.app(env, start_response) status_int = HTTP_NOT_FOUND if self._index: tmp_env = dict(env) tmp_env['HTTP_USER_AGENT'] = \ '%s StaticWeb' % env.get('HTTP_USER_AGENT') tmp_env['swift.source'] = 'SW' if tmp_env['PATH_INFO'][-1] != '/': tmp_env['PATH_INFO'] += '/' tmp_env['PATH_INFO'] += self._index resp = self._app_call(tmp_env) status_int = self._get_status_int() if is_success(status_int) or is_redirection(status_int): if env['PATH_INFO'][-1] != '/': resp = HTTPMovedPermanently(location=env['PATH_INFO'] + '/') return resp(env, start_response) start_response(self._response_status, self._response_headers, self._response_exc_info) return resp if status_int == HTTP_NOT_FOUND: if env['PATH_INFO'][-1] != '/': tmp_env = make_pre_authed_env( env, 'GET', '/%s/%s/%s' % (self.version, self.account, self.container), self.agent, swift_source='SW') tmp_env['QUERY_STRING'] = 'limit=1&format=json&delimiter' \ '=/&limit=1&prefix=%s' % quote(self.obj + '/') resp = self._app_call(tmp_env) body = ''.join(resp) if not is_success(self._get_status_int()) or not body or \ not json.loads(body): resp = HTTPNotFound()(env, self._start_response) return self._error_response(resp, env, start_response) resp = HTTPMovedPermanently(location=env['PATH_INFO'] + '/') return resp(env, start_response) return self._listing(env, start_response, self.obj)
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {} for key, val in resp.headers.iteritems(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val # Query for the objects in the segments area to make sure it completed query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/' } container = req.container_name + MULTIUPLOAD_SUFFIX resp = req.get_response(self.app, 'GET', container, '', query=query) objinfo = json.loads(resp.body) objtable = dict((o['name'], { 'path': '/'.join(['', container, o['name']]), 'etag': o['hash'], 'size_bytes': o['bytes'] }) for o in objinfo) manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) complete_elem = fromstring(xml, 'CompleteMultipartUpload') for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = part_elem.find('./ETag').text if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] info = objtable.get("%s/%s/%s" % (req.object_name, upload_id, part_number)) if info is None or info['etag'] != etag: raise InvalidPart(upload_id=upload_id, part_number=part_number) manifest.append(info) except (XMLSyntaxError, DocumentInvalid): raise MalformedXML() except ErrorResponse: raise except Exception as e: LOGGER.error(e) raise try: # TODO: add support for versioning resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={'multipart-manifest': 'put'}, headers=headers) except BadSwiftRequest as e: msg = str(e) if msg.startswith('Each segment, except the last, ' 'must be at least '): # FIXME: AWS S3 allows a smaller object than 5 MB if there is # only one part. Use a COPY request to copy the part object # from the segments container instead. raise EntityTooSmall(msg) else: raise obj = '%s/%s' % (req.object_name, upload_id) req.get_response(self.app, 'DELETE', container, obj) result_elem = Element('CompleteMultipartUploadResult') SubElement(result_elem, 'Location').text = req.host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = resp.etag resp.body = tostring(result_elem) resp.status = 200 resp.content_type = "application/xml" return resp
def GETorHEAD(self, req): """ Handled GET or HEAD request on a part of a multipart object. """ part_number = self.parse_part_number(req) had_match = False for match_header in ('if-match', 'if-none-match'): if match_header not in req.headers: continue had_match = True for value in list_from_csv(req.headers[match_header]): if value.startswith('"') and value.endswith('"'): value = value[1:-1] if value.endswith('-N'): # Deal with fake S3-like etags for SLOs uploaded via Swift req.headers[match_header] += ', ' + value[:-2] if had_match: # Update where to look update_etag_is_at_header(req, sysmeta_header('object', 'etag')) # Get the list of parts. Must be raw to get all response headers. slo_resp = req.get_response(self.app, 'GET', req.container_name, req.object_name, query={ 'multipart-manifest': 'get', 'format': 'raw' }) # Check if the object is really a SLO. If not, and user asked # for the first part, do a regular request. if 'X-Static-Large-Object' not in slo_resp.sw_headers: if part_number == 1: if slo_resp.is_success and req.method == 'HEAD': # Clear body slo_resp.body = '' return slo_resp else: close_if_possible(slo_resp.app_iter) raise InvalidRange() # Locate the part slo = json.loads(slo_resp.body) try: part = slo[part_number - 1] except IndexError: raise InvalidRange() # Redirect the request on the part _, req.container_name, req.object_name = part['path'].split('/', 2) # XXX enforce container_name and object_name to be <str> # or it will rise issues in swift3/requests when merging both req.container_name = req.container_name.encode('utf-8') req.object_name = req.object_name.encode('utf8') # The etag check was performed with the manifest if had_match: for match_header in ('if-match', 'if-none-match'): req.headers.pop(match_header, None) resp = req.get_response(self.app) # Replace status slo_resp.status = resp.status # Replace body slo_resp.app_iter = resp.app_iter # Update with the size of the part slo_resp.headers['Content-Length'] = \ resp.headers.get('Content-Length', 0) slo_resp.sw_headers['Content-Length'] = \ slo_resp.headers['Content-Length'] # Add the number of parts in this object slo_resp.headers['X-Amz-Mp-Parts-Count'] = len(slo) return slo_resp
def handle_container_listing(self, env, start_response): # This code may be clearer by using Request(env).get_response() # instead of self._app_call(env) api_vers, account, container_name = split_path( env['PATH_INFO'], 3, 3, True) sub_env = env.copy() orig_container = get_unversioned_container(container_name) if orig_container != container_name: # Check that container_name is actually the versioning # container for orig_container sub_env['PATH_INFO'] = '/%s/%s/%s' % (api_vers, account, orig_container) info = get_container_info(sub_env, self.app, swift_source='VW') vers_loc = info.get('sysmeta', {}).get('versions-location') # Sometimes we receive versioned listing requests whereas # versioning is not enabled (vers_loc is None or empty). if vers_loc and vers_loc != container_name: # The container specified in the request ends with the # versioning suffix, but user has asked the versions to # be saved elsewhere, thus we will consider this as a # regular listing request. orig_container = container_name if orig_container != container_name: qs = parse_qs(sub_env.get('QUERY_STRING', '')) if 'marker' in qs: marker, _ = swift3_split_object_name_version(qs['marker'][0]) qs['marker'] = [marker] if 'prefix' in qs: prefix, _ = swift3_split_object_name_version(qs['prefix'][0]) qs['prefix'] = prefix qs['format'] = 'json' sub_env['QUERY_STRING'] = urlencode(qs, True) sub_env['oio.query'] = {'versions': True} resp = super(OioVersionedWritesContext, self).handle_container_request( sub_env, lambda x, y, z: None) if orig_container != container_name and \ self._response_status == '200 OK': with closing_if_possible(resp): versioned_objects = json.loads("".join(resp)) # Discard the latest version of each object, because it is # not supposed to appear in the versioning container. # Also keep object prefixes as some of them may be shadowed # from the "main" container. latest = dict() subdirs = [] for obj in versioned_objects: if 'subdir' in obj: subdirs.append(obj) continue ver = int(obj.get('version', '0')) # An integer is always strictly greater than None if ver > latest.get(obj['name']): latest[obj['name']] = ver versioned_objects = [ obj for obj in versioned_objects if 'subdir' not in obj and (int(obj.get('version', '0')) != latest[obj['name']] or is_deleted(obj)) ] for obj in versioned_objects: obj['name'] = swift3_versioned_object_name( obj['name'], obj.get('version', '')) versioned_objects += subdirs resp = json.dumps(versioned_objects) self._response_headers = [x for x in self._response_headers if x[0] != 'Content-Length'] self._response_headers.append(('Content-Length', str(len(resp)))) start_response(self._response_status, self._response_headers, self._response_exc_info) return resp
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {'Accept': 'application/json'} for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type # Query for the objects in the segments area to make sure it completed query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/' } container = req.container_name + MULTIUPLOAD_SUFFIX resp = req.get_response(self.app, 'GET', container, '', query=query) objinfo = json.loads(resp.body) objtable = dict((o['name'], { 'path': '/'.join(['', container, o['name']]), 'etag': o['hash'], 'size_bytes': o['bytes'] }) for o in objinfo) s3_etag_hasher = md5() manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = part_elem.find('./ETag').text if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] info = objtable.get("%s/%s/%s" % (req.object_name, upload_id, part_number)) if info is None or info['etag'] != etag: raise InvalidPart(upload_id=upload_id, part_number=part_number) s3_etag_hasher.update(etag.decode('hex')) info['size_bytes'] = int(info['size_bytes']) manifest.append(info) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) headers[sysmeta_header('object', 'etag')] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag # Check the size of each segment except the last and make sure they are # all more than the minimum upload chunk size for info in manifest[:-1]: if info['size_bytes'] < self.conf.min_segment_size: raise EntityTooSmall() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if not yielded_anything: yield ('<?xml version="1.0" ' 'encoding="UTF-8"?>\n') yielded_anything = True yield chunk body.append(chunk) body = json.loads(''.join(body)) if body['Response Status'] != '201 Created': raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) expected_msg = ('too small; each segment must be ' 'at least 1 byte') if expected_msg in msg: # FIXME: AWS S3 allows a smaller object than 5 MB if # there is only one part. Use a COPY request to copy # the part object from the segments container instead. raise EntityTooSmall(msg) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # We know that this existed long enough for us to HEAD pass result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at # the request header when the port is non default value and it # makes req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) if parsed_url.port: host_url += ':%s' % parsed_url.port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag resp.headers.pop('ETag', None) if yielded_anything: yield '\n' yield tostring(result_elem, xml_declaration=not yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield '\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp
def GETorHEAD(self, req): """Handle HTTP GET or HEAD requests.""" container_info = self.container_info( self.account_name, self.container_name, req) req.acl = container_info['read_acl'] if 'swift.authorize' in req.environ: aresp = req.environ['swift.authorize'](req) if aresp: return aresp partition = self.app.object_ring.get_part( self.account_name, self.container_name, self.object_name) resp = self.GETorHEAD_base( req, _('Object'), self.app.object_ring, partition, req.path_info) if ';' in resp.headers.get('content-type', ''): # strip off swift_bytes from content-type content_type, check_extra_meta = \ resp.headers['content-type'].rsplit(';', 1) if check_extra_meta.lstrip().startswith('swift_bytes='): resp.content_type = content_type large_object = None if config_true_value(resp.headers.get('x-static-large-object')) and \ req.params.get('multipart-manifest') == 'get' and \ 'X-Copy-From' not in req.headers and \ self.app.allow_static_large_object: resp.content_type = 'application/json' if config_true_value(resp.headers.get('x-static-large-object')) and \ req.params.get('multipart-manifest') != 'get' and \ self.app.allow_static_large_object: large_object = 'SLO' listing_page1 = () listing = [] lcontainer = None # container name is included in listing if resp.status_int == HTTP_OK and \ req.method == 'GET' and not req.range: try: listing = json.loads(resp.body) except ValueError: listing = [] else: # need to make a second request to get whole manifest new_req = req.copy_get() new_req.method = 'GET' new_req.range = None new_resp = self.GETorHEAD_base( new_req, _('Object'), self.app.object_ring, partition, req.path_info) if new_resp.status_int // 100 == 2: try: listing = json.loads(new_resp.body) except ValueError: listing = [] else: return HTTPServiceUnavailable( "Unable to load SLO manifest", request=req) if 'x-object-manifest' in resp.headers and \ req.params.get('multipart-manifest') != 'get': large_object = 'DLO' lcontainer, lprefix = \ resp.headers['x-object-manifest'].split('/', 1) lcontainer = unquote(lcontainer) lprefix = unquote(lprefix) try: pages_iter = iter(self._listing_pages_iter(lcontainer, lprefix, req.environ)) listing_page1 = pages_iter.next() listing = itertools.chain(listing_page1, self._remaining_items(pages_iter)) except ListingIterNotFound: return HTTPNotFound(request=req) except ListingIterNotAuthorized, err: return err.aresp except ListingIterError: return HTTPServerError(request=req)
def test_get_endpoint(self): # Expected results for objects taken from test_ring # Expected results for others computed by manually invoking # ring.get_nodes(). resp = Request.blank('/endpoints/a/c/o1').get_response( self.list_endpoints) self.assertEquals(resp.status_int, 200) self.assertEquals(resp.content_type, 'application/json') self.assertEquals(json.loads(resp.body), [ "http://10.1.1.1:6000/sdb1/1/a/c/o1", "http://10.1.2.2:6000/sdd1/1/a/c/o1" ]) # Here, 'o1/' is the object name. resp = Request.blank('/endpoints/a/c/o1/').get_response( self.list_endpoints) self.assertEquals(resp.status_int, 200) self.assertEquals(json.loads(resp.body), [ "http://10.1.1.1:6000/sdb1/3/a/c/o1/", "http://10.1.2.2:6000/sdd1/3/a/c/o1/" ]) resp = Request.blank('/endpoints/a/c2').get_response( self.list_endpoints) self.assertEquals(resp.status_int, 200) self.assertEquals(json.loads(resp.body), [ "http://10.1.1.1:6000/sda1/2/a/c2", "http://10.1.2.1:6000/sdc1/2/a/c2" ]) resp = Request.blank('/endpoints/a1').get_response( self.list_endpoints) self.assertEquals(resp.status_int, 200) self.assertEquals(json.loads(resp.body), [ "http://10.1.2.1:6000/sdc1/0/a1", "http://10.1.1.1:6000/sda1/0/a1", "http://10.1.1.1:6000/sdb1/0/a1" ]) resp = Request.blank('/endpoints/').get_response( self.list_endpoints) self.assertEquals(resp.status_int, 400) resp = Request.blank('/endpoints/a/c 2').get_response( self.list_endpoints) self.assertEquals(resp.status_int, 200) self.assertEquals(json.loads(resp.body), [ "http://10.1.1.1:6000/sdb1/3/a/c%202", "http://10.1.2.2:6000/sdd1/3/a/c%202" ]) resp = Request.blank('/endpoints/a/c%202').get_response( self.list_endpoints) self.assertEquals(resp.status_int, 200) self.assertEquals(json.loads(resp.body), [ "http://10.1.1.1:6000/sdb1/3/a/c%202", "http://10.1.2.2:6000/sdd1/3/a/c%202" ]) resp = Request.blank('/endpoints/ac%20count/con%20tainer/ob%20ject') \ .get_response(self.list_endpoints) self.assertEquals(resp.status_int, 200) self.assertEquals(json.loads(resp.body), [ "http://10.1.1.1:6000/sdb1/3/ac%20count/con%20tainer/ob%20ject", "http://10.1.2.2:6000/sdd1/3/ac%20count/con%20tainer/ob%20ject" ]) resp = Request.blank('/endpoints/a/c/o1', {'REQUEST_METHOD': 'POST'}) \ .get_response(self.list_endpoints) self.assertEquals(resp.status_int, 405) self.assertEquals(resp.status, '405 Method Not Allowed') self.assertEquals(resp.headers['allow'], 'GET') resp = Request.blank('/not-endpoints').get_response( self.list_endpoints) self.assertEquals(resp.status_int, 200) self.assertEquals(resp.status, '200 OK') self.assertEquals(resp.body, 'FakeApp') # test custom path with trailing slash custom_path_le = list_endpoints.filter_factory({ 'swift_dir': self.testdir, 'list_endpoints_path': '/some/another/path/' })(self.app) resp = Request.blank('/some/another/path/a/c/o1') \ .get_response(custom_path_le) self.assertEquals(resp.status_int, 200) self.assertEquals(resp.content_type, 'application/json') self.assertEquals(json.loads(resp.body), [ "http://10.1.1.1:6000/sdb1/1/a/c/o1", "http://10.1.2.2:6000/sdd1/1/a/c/o1" ]) # test ustom path without trailing slash custom_path_le = list_endpoints.filter_factory({ 'swift_dir': self.testdir, 'list_endpoints_path': '/some/another/path' })(self.app) resp = Request.blank('/some/another/path/a/c/o1') \ .get_response(custom_path_le) self.assertEquals(resp.status_int, 200) self.assertEquals(resp.content_type, 'application/json') self.assertEquals(json.loads(resp.body), [ "http://10.1.1.1:6000/sdb1/1/a/c/o1", "http://10.1.2.2:6000/sdd1/1/a/c/o1" ])
def GET(self, req): """ Handle GET Bucket (List Objects) request """ max_keys = req.get_validated_param('max-keys', CONF.max_bucket_listing) # TODO: Separate max_bucket_listing and default_bucket_listing tag_max_keys = max_keys max_keys = min(max_keys, CONF.max_bucket_listing) encoding_type = req.params.get('encoding-type') if encoding_type is not None and encoding_type != 'url': err_msg = 'Invalid Encoding Method specified in Request' raise InvalidArgument('encoding-type', encoding_type, err_msg) query = { 'format': 'json', 'limit': max_keys + 1, } if 'marker' in req.params: query.update({'marker': req.params['marker']}) if 'prefix' in req.params: query.update({'prefix': req.params['prefix']}) if 'delimiter' in req.params: query.update({'delimiter': req.params['delimiter']}) resp = req.get_response(self.app, query=query) objects = json.loads(resp.body) elem = Element('ListBucketResult') SubElement(elem, 'Name').text = req.container_name SubElement(elem, 'Prefix').text = req.params.get('prefix') SubElement(elem, 'Marker').text = req.params.get('marker') # in order to judge that truncated is valid, check whether # max_keys + 1 th element exists in swift. is_truncated = max_keys > 0 and len(objects) > max_keys objects = objects[:max_keys] if is_truncated and 'delimiter' in req.params: if 'name' in objects[-1]: SubElement(elem, 'NextMarker').text = \ objects[-1]['name'] if 'subdir' in objects[-1]: SubElement(elem, 'NextMarker').text = \ objects[-1]['subdir'] SubElement(elem, 'MaxKeys').text = str(tag_max_keys) if 'delimiter' in req.params: SubElement(elem, 'Delimiter').text = req.params['delimiter'] if encoding_type is not None: SubElement(elem, 'EncodingType').text = encoding_type SubElement(elem, 'IsTruncated').text = \ 'true' if is_truncated else 'false' for o in objects: if 'subdir' not in o: contents = SubElement(elem, 'Contents') SubElement(contents, 'Key').text = o['name'] SubElement(contents, 'LastModified').text = \ o['last_modified'][:-3] + 'Z' SubElement(contents, 'ETag').text = '"%s"' % o['hash'] SubElement(contents, 'Size').text = str(o['bytes']) owner = SubElement(contents, 'Owner') SubElement(owner, 'ID').text = req.user_id SubElement(owner, 'DisplayName').text = req.user_id SubElement(contents, 'StorageClass').text = 'STANDARD' for o in objects: if 'subdir' in o: common_prefixes = SubElement(elem, 'CommonPrefixes') SubElement(common_prefixes, 'Prefix').text = o['subdir'] body = tostring(elem, encoding_type=encoding_type) return HTTPOk(body=body, content_type='application/xml')
def handle_object(self, env, start_response): """ Handles a possible static web request for an object. This object could resolve into an index or listing request. :param env: The original WSGI environment dict. :param start_response: The original WSGI start_response hook. """ tmp_env = dict(env) tmp_env['HTTP_USER_AGENT'] = \ '%s StaticWeb' % env.get('HTTP_USER_AGENT') tmp_env['swift.source'] = 'SW' resp = self._app_call(tmp_env) status_int = self._get_status_int() self._get_container_info(env) if is_success(status_int) or is_redirection(status_int): # Treat directory marker objects as not found if not self._dir_type: self._dir_type = 'application/directory' content_length = self._response_header_value('content-length') content_length = int(content_length) if content_length else 0 if self._response_header_value('content-type') == self._dir_type \ and content_length <= 1: status_int = HTTP_NOT_FOUND else: start_response(self._response_status, self._response_headers, self._response_exc_info) return resp if status_int != HTTP_NOT_FOUND: # Retaining the previous code's behavior of not using custom error # pages for non-404 errors. self._error = None return self._error_response(resp, env, start_response) if not self._listings and not self._index: start_response(self._response_status, self._response_headers, self._response_exc_info) return resp status_int = HTTP_NOT_FOUND if self._index: tmp_env = dict(env) tmp_env['HTTP_USER_AGENT'] = \ '%s StaticWeb' % env.get('HTTP_USER_AGENT') tmp_env['swift.source'] = 'SW' if tmp_env['PATH_INFO'][-1] != '/': tmp_env['PATH_INFO'] += '/' tmp_env['PATH_INFO'] += self._index resp = self._app_call(tmp_env) status_int = self._get_status_int() if is_success(status_int) or is_redirection(status_int): if env['PATH_INFO'][-1] != '/': resp = HTTPMovedPermanently(location=env['PATH_INFO'] + '/') return resp(env, start_response) start_response(self._response_status, self._response_headers, self._response_exc_info) return resp if status_int == HTTP_NOT_FOUND: if env['PATH_INFO'][-1] != '/': tmp_env = make_pre_authed_env( env, 'GET', '/%s/%s/%s' % (self.version, self.account, self.container), self.agent, swift_source='SW') tmp_env['QUERY_STRING'] = 'limit=1&format=json&delimiter' \ '=/&limit=1&prefix=%s' % quote(self.obj + '/') resp = self._app_call(tmp_env) body = ''.join(resp) if not is_success(self._get_status_int()) or not body or \ not json.loads(body): resp = HTTPNotFound()(env, self._start_response) return self._error_response(resp, env, start_response) resp = HTTPMovedPermanently(location=env['PATH_INFO'] + '/') return resp(env, start_response) return self._listing(env, start_response, self.obj)
def GET(self, req): """ Handle GET Bucket (List Objects) request """ max_keys = req.get_validated_param('max-keys', self.conf.max_bucket_listing) # TODO: Separate max_bucket_listing and default_bucket_listing tag_max_keys = max_keys max_keys = min(max_keys, self.conf.max_bucket_listing) encoding_type = req.params.get('encoding-type') if encoding_type is not None and encoding_type != 'url': err_msg = 'Invalid Encoding Method specified in Request' raise InvalidArgument('encoding-type', encoding_type, err_msg) query = { 'format': 'json', 'limit': max_keys + 1, } if 'prefix' in req.params: query.update({'prefix': req.params['prefix']}) if 'delimiter' in req.params: query.update({'delimiter': req.params['delimiter']}) fetch_owner = False if 'versions' in req.params: listing_type = 'object-versions' if 'key-marker' in req.params: query.update({'marker': req.params['key-marker']}) elif 'version-id-marker' in req.params: err_msg = ('A version-id marker cannot be specified without ' 'a key marker.') raise InvalidArgument('version-id-marker', req.params['version-id-marker'], err_msg) elif int(req.params.get('list-type', '1')) == 2: listing_type = 'version-2' if 'start-after' in req.params: query.update({'marker': req.params['start-after']}) # continuation-token overrides start-after if 'continuation-token' in req.params: decoded = b64decode(req.params['continuation-token']) query.update({'marker': decoded}) if 'fetch-owner' in req.params: fetch_owner = config_true_value(req.params['fetch-owner']) else: listing_type = 'version-1' if 'marker' in req.params: query.update({'marker': req.params['marker']}) resp = req.get_response(self.app, query=query) objects = json.loads(resp.body) # in order to judge that truncated is valid, check whether # max_keys + 1 th element exists in swift. is_truncated = max_keys > 0 and len(objects) > max_keys objects = objects[:max_keys] if listing_type == 'object-versions': elem = Element('ListVersionsResult') SubElement(elem, 'Name').text = req.container_name SubElement(elem, 'Prefix').text = req.params.get('prefix') SubElement(elem, 'KeyMarker').text = req.params.get('key-marker') SubElement( elem, 'VersionIdMarker').text = req.params.get('version-id-marker') if is_truncated: if 'name' in objects[-1]: SubElement(elem, 'NextKeyMarker').text = \ objects[-1]['name'] if 'subdir' in objects[-1]: SubElement(elem, 'NextKeyMarker').text = \ objects[-1]['subdir'] SubElement(elem, 'NextVersionIdMarker').text = 'null' else: elem = Element('ListBucketResult') SubElement(elem, 'Name').text = req.container_name SubElement(elem, 'Prefix').text = req.params.get('prefix') if listing_type == 'version-1': SubElement(elem, 'Marker').text = req.params.get('marker') if is_truncated and 'delimiter' in req.params: if 'name' in objects[-1]: name = objects[-1]['name'] else: name = objects[-1]['subdir'] if encoding_type == 'url': name = quote(name) SubElement(elem, 'NextMarker').text = name elif listing_type == 'version-2': if is_truncated: if 'name' in objects[-1]: SubElement(elem, 'NextContinuationToken').text = \ b64encode(objects[-1]['name'].encode('utf8')) if 'subdir' in objects[-1]: SubElement(elem, 'NextContinuationToken').text = \ b64encode(objects[-1]['subdir'].encode('utf8')) if 'continuation-token' in req.params: SubElement(elem, 'ContinuationToken').text = \ req.params['continuation-token'] if 'start-after' in req.params: SubElement(elem, 'StartAfter').text = \ req.params['start-after'] SubElement(elem, 'KeyCount').text = str(len(objects)) SubElement(elem, 'MaxKeys').text = str(tag_max_keys) if 'delimiter' in req.params: SubElement(elem, 'Delimiter').text = req.params['delimiter'] if encoding_type == 'url': SubElement(elem, 'EncodingType').text = encoding_type SubElement(elem, 'IsTruncated').text = \ 'true' if is_truncated else 'false' for o in objects: if 'subdir' not in o: name = o['name'] if encoding_type == 'url': name = quote(name.encode('utf-8')) if listing_type == 'object-versions': contents = SubElement(elem, 'Version') SubElement(contents, 'Key').text = name SubElement(contents, 'VersionId').text = 'null' SubElement(contents, 'IsLatest').text = 'true' else: contents = SubElement(elem, 'Contents') SubElement(contents, 'Key').text = name SubElement(contents, 'LastModified').text = \ o['last_modified'][:-3] + 'Z' if 's3_etag' in o: # New-enough MUs are already in the right format etag = o['s3_etag'] elif 'slo_etag' in o: # SLOs may be in something *close* to the MU format etag = '"%s-N"' % o['slo_etag'].strip('"') else: # Normal objects just use the MD5 etag = '"%s"' % o['hash'] # This also catches sufficiently-old SLOs, but we have # no way to identify those from container listings SubElement(contents, 'ETag').text = etag SubElement(contents, 'Size').text = str(o['bytes']) if fetch_owner or listing_type != 'version-2': owner = SubElement(contents, 'Owner') SubElement(owner, 'ID').text = req.user_id SubElement(owner, 'DisplayName').text = req.user_id SubElement(contents, 'StorageClass').text = 'STANDARD' for o in objects: if 'subdir' in o: common_prefixes = SubElement(elem, 'CommonPrefixes') name = o['subdir'] if encoding_type == 'url': name = quote(name.encode('utf-8')) SubElement(common_prefixes, 'Prefix').text = name body = tostring(elem) return HTTPOk(body=body, content_type='application/xml')
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {} for key, val in resp.headers.iteritems(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val elif _key == 'content-type': headers['Content-Type'] = val # Query for the objects in the segments area to make sure it completed query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/' } container = req.container_name + MULTIUPLOAD_SUFFIX resp = req.get_response(self.app, 'GET', container, '', query=query) objinfo = json.loads(resp.body) objtable = dict((o['name'], { 'path': '/'.join(['', container, o['name']]), 'etag': o['hash'], 'size_bytes': o['bytes'] }) for o in objinfo) etag_hash = md5() for obj in objinfo: etag_hash.update(unhexlify(obj['hash'])) s3_etag = "%s-%d" % (etag_hash.hexdigest(), len(objinfo)) headers['Content-Type'] += ";s3_etag=%s" % s3_etag manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) complete_elem = fromstring(xml, 'CompleteMultipartUpload') for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = part_elem.find('./ETag').text if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] info = objtable.get("%s/%s/%s" % (req.object_name, upload_id, part_number)) if info is None or info['etag'] != etag: raise InvalidPart(upload_id=upload_id, part_number=part_number) info['size_bytes'] = int(info['size_bytes']) manifest.append(info) except (XMLSyntaxError, DocumentInvalid): raise MalformedXML() except ErrorResponse: raise except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() LOGGER.error(e) raise exc_type, exc_value, exc_traceback # Following swift commit 7f636a5, zero-byte segments aren't allowed, # even as the final segment empty_seg = None if manifest[-1]['size_bytes'] == 0: empty_seg = manifest.pop() # We'll check the sizes of all except the last segment below, but # since we just popped off a zero-byte segment, we should check # that last segment, too. if manifest and manifest[-1]['size_bytes'] < CONF.min_segment_size: raise EntityTooSmall() # Check the size of each segment except the last and make sure they are # all more than the minimum upload chunk size for info in manifest[:-1]: if info['size_bytes'] < CONF.min_segment_size: raise EntityTooSmall() try: # TODO: add support for versioning if manifest: resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={'multipart-manifest': 'put'}, headers=headers) else: # the upload must have consisted of a single zero-length part # just write it directly resp = req.get_response(self.app, 'PUT', body='', headers=headers) except BadSwiftRequest as e: msg = str(e) expected_msg = 'too small; each segment must be at least 1 byte' if expected_msg in msg: # FIXME: AWS S3 allows a smaller object than 5 MB if there is # only one part. Use a COPY request to copy the part object # from the segments container instead. raise EntityTooSmall(msg) else: raise if empty_seg: # clean up the zero-byte segment _, empty_seg_cont, empty_seg_name = empty_seg['path'].split('/', 2) req.get_response(self.app, 'DELETE', container=empty_seg_cont, obj=empty_seg_name) # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) req.get_response(self.app, 'DELETE', container, obj) result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at the # request header when the port is non default value and it makes # req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) if parsed_url.port: host_url += ':%s' % parsed_url.port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag del resp.headers['ETag'] resp.body = tostring(result_elem) resp.status = 200 resp.content_type = "application/xml" return resp
def GET(self, req): """ Handles List Parts. """ def filter_part_num_marker(o): try: num = int(os.path.basename(o['name'])) return num > part_num_marker except ValueError: return False encoding_type = req.params.get('encoding-type') if encoding_type is not None and encoding_type != 'url': err_msg = 'Invalid Encoding Method specified in Request' raise InvalidArgument('encoding-type', encoding_type, err_msg) upload_id = req.params['uploadId'] _get_upload_info(req, self.app, upload_id) maxparts = req.get_validated_param('max-parts', DEFAULT_MAX_PARTS_LISTING, self.conf.max_parts_listing) part_num_marker = req.get_validated_param('part-number-marker', 0) query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/', 'marker': '', } container = req.container_name + MULTIUPLOAD_SUFFIX # Because the parts are out of order in Swift, we list up to the # maximum number of parts and then apply the marker and limit options. objects = [] while True: resp = req.get_response(self.app, container=container, obj='', query=query) new_objects = json.loads(resp.body) if not new_objects: break objects.extend(new_objects) query['marker'] = new_objects[-1]['name'] last_part = 0 # If the caller requested a list starting at a specific part number, # construct a sub-set of the object list. objList = [obj for obj in objects if filter_part_num_marker(obj)] # pylint: disable-msg=E1103 objList.sort(key=lambda o: int(o['name'].split('/')[-1])) if len(objList) > maxparts: objList = objList[:maxparts] truncated = True else: truncated = False # TODO: We have to retrieve object list again when truncated is True # and some objects filtered by invalid name because there could be no # enough objects for limit defined by maxparts. if objList: o = objList[-1] last_part = os.path.basename(o['name']) result_elem = Element('ListPartsResult') SubElement(result_elem, 'Bucket').text = req.container_name name = req.object_name if encoding_type == 'url': name = quote(name) SubElement(result_elem, 'Key').text = name SubElement(result_elem, 'UploadId').text = upload_id initiator_elem = SubElement(result_elem, 'Initiator') SubElement(initiator_elem, 'ID').text = req.user_id SubElement(initiator_elem, 'DisplayName').text = req.user_id owner_elem = SubElement(result_elem, 'Owner') SubElement(owner_elem, 'ID').text = req.user_id SubElement(owner_elem, 'DisplayName').text = req.user_id SubElement(result_elem, 'StorageClass').text = 'STANDARD' SubElement(result_elem, 'PartNumberMarker').text = str(part_num_marker) SubElement(result_elem, 'NextPartNumberMarker').text = str(last_part) SubElement(result_elem, 'MaxParts').text = str(maxparts) if 'encoding-type' in req.params: SubElement(result_elem, 'EncodingType').text = \ req.params['encoding-type'] SubElement(result_elem, 'IsTruncated').text = \ 'true' if truncated else 'false' for i in objList: part_elem = SubElement(result_elem, 'Part') SubElement(part_elem, 'PartNumber').text = i['name'].split('/')[-1] SubElement(part_elem, 'LastModified').text = \ i['last_modified'][:-3] + 'Z' SubElement(part_elem, 'ETag').text = '"%s"' % i['hash'] SubElement(part_elem, 'Size').text = str(i['bytes']) body = tostring(result_elem) return HTTPOk(body=body, content_type='application/xml')
def test_GET_ACCscope_objAttrs_metadata(self): """ In account scope, give me object attrs Should return info for all 3 objects we uploaded in setup """ attrs = Oattrs req2 = Request.blank('/v1/TEST_acc1', environ={ 'REQUEST_METHOD': 'GET', 'HTTP_X_TIMESTAMP': '0' }, headers={ 'attributes': attrs, 'format': 'json' }) resp2 = req2.get_response(self.controller) self.assert_(resp2.status.startswith('200')) testList = json.loads(resp2.body) self.assert_(len(testList) == 3) testDict = testList[0] self.assert_('/TEST_acc1/TEST_con1/TEST_obj1' in testDict) metaReturned = testDict['/TEST_acc1/TEST_con1/TEST_obj1'] self.assertEquals(metaReturned['object_uri'], '/TEST_acc1/TEST_con1/TEST_obj1') self.assertEquals(metaReturned['object_name'], 'TEST_obj1') self.assertEquals(metaReturned['object_account_name'], 'TEST_acc1') self.assertEquals(metaReturned['object_container_name'], 'TEST_con1') self.assertEquals(metaReturned['object_uri_create_time'], self.t) self.assertEquals(metaReturned['object_etag_hash'], '0000000000000000') self.assertEquals(metaReturned['object_content_type'], 'text/plain') self.assertEquals(str(metaReturned['object_content_length']), '42') self.assertEquals(metaReturned['object_content_encoding'], 'gzip') self.assertEquals(metaReturned['object_content_language'], 'en') self.assertEquals(metaReturned['object_meta_TESTCUSTOM'], 'CUSTOM') testDict = testList[1] self.assert_('/TEST_acc1/TEST_con1/TEST_obj2' in testDict) metaReturned = testDict['/TEST_acc1/TEST_con1/TEST_obj2'] self.assertEquals(metaReturned['object_uri'], '/TEST_acc1/TEST_con1/TEST_obj2') self.assertEquals(metaReturned['object_name'], 'TEST_obj2') self.assertEquals(metaReturned['object_account_name'], 'TEST_acc1') self.assertEquals(metaReturned['object_container_name'], 'TEST_con1') self.assertEquals(metaReturned['object_uri_create_time'], self.t) self.assertEquals(metaReturned['object_etag_hash'], '0000000000000000') self.assertEquals(metaReturned['object_content_type'], 'text/plain') self.assertEquals(str(metaReturned['object_content_length']), '42') self.assertEquals(metaReturned['object_content_encoding'], 'gzip') self.assertEquals(metaReturned['object_content_language'], 'en') self.assertEquals(metaReturned['object_meta_TESTCUSTOM'], 'CUSTOM') testDict = testList[2] self.assert_('/TEST_acc1/TEST_con2/TEST_obj3' in testDict) metaReturned = testDict['/TEST_acc1/TEST_con2/TEST_obj3'] self.assertEquals(metaReturned['object_uri'], '/TEST_acc1/TEST_con2/TEST_obj3') self.assertEquals(metaReturned['object_name'], 'TEST_obj3') self.assertEquals(metaReturned['object_account_name'], 'TEST_acc1') self.assertEquals(metaReturned['object_container_name'], 'TEST_con2') self.assertEquals(metaReturned['object_uri_create_time'], self.t) self.assertEquals(metaReturned['object_etag_hash'], '0000000000000000') self.assertEquals(metaReturned['object_content_type'], 'text/plain') self.assertEquals(str(metaReturned['object_content_length']), '42') self.assertEquals(metaReturned['object_content_encoding'], 'gzip') self.assertEquals(metaReturned['object_content_language'], 'en') self.assertEquals(metaReturned['object_meta_TESTCUSTOM'], 'CUSTOM')
def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if not yielded_anything: yield ('<?xml version="1.0" ' 'encoding="UTF-8"?>\n') yielded_anything = True yield chunk body.append(chunk) body = json.loads(''.join(body)) if body['Response Status'] != '201 Created': raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) expected_msg = ('too small; each segment must be ' 'at least 1 byte') if expected_msg in msg: # FIXME: AWS S3 allows a smaller object than 5 MB if # there is only one part. Use a COPY request to copy # the part object from the segments container instead. raise EntityTooSmall(msg) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # We know that this existed long enough for us to HEAD pass result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at # the request header when the port is non default value and it # makes req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) if parsed_url.port: host_url += ':%s' % parsed_url.port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag resp.headers.pop('ETag', None) if yielded_anything: yield '\n' yield tostring(result_elem, xml_declaration=not yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield '\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {} for key, val in resp.headers.iteritems(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type # Query for the objects in the segments area to make sure it completed query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/' } container = req.container_name + MULTIUPLOAD_SUFFIX resp = req.get_response(self.app, 'GET', container, '', query=query) objinfo = json.loads(resp.body) objtable = dict((o['name'], { 'path': '/'.join(['', container, o['name']]), 'etag': o['hash'], 'size_bytes': o['bytes'] }) for o in objinfo) manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = part_elem.find('./ETag').text if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] info = objtable.get("%s/%s/%s" % (req.object_name, upload_id, part_number)) if info is None or info['etag'] != etag: raise InvalidPart(upload_id=upload_id, part_number=part_number) info['size_bytes'] = int(info['size_bytes']) manifest.append(info) except (XMLSyntaxError, DocumentInvalid): raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise # Check the size of each segment except the last and make sure they are # all more than the minimum upload chunk size for info in manifest[:-1]: if info['size_bytes'] < self.conf.min_segment_size: raise EntityTooSmall() try: # TODO: add support for versioning if manifest: resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={'multipart-manifest': 'put'}, headers=headers) else: # the upload must have consisted of a single zero-length part # just write it directly resp = req.get_response(self.app, 'PUT', body='', headers=headers) except BadSwiftRequest as e: msg = str(e) expected_msg = 'too small; each segment must be at least 1 byte' if expected_msg in msg: # FIXME: AWS S3 allows a smaller object than 5 MB if there is # only one part. Use a COPY request to copy the part object # from the segments container instead. raise EntityTooSmall(msg) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: pass # We know that this existed long enough for us to HEAD result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at the # request header when the port is non default value and it makes # req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) if parsed_url.port: host_url += ':%s' % parsed_url.port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = resp.etag resp.body = tostring(result_elem) resp.status = 200 resp.content_type = "application/xml" return resp
def map_objects_to_targets(self): """ Map objects to their local storage server data replicas and create a request for the generic backend interface """ self.logger.debug('Mapping objects to files') self.logger.debug('request_in(first 1024 bytes): %s', str(self.request_in)[0:1023]) request_in_dict = json.loads(self.request_in) # TODO consider modifying incoming request in place self.request_out = {} self.request_out['request'] = request_in_dict['request'] objects_and_files = [] oc = ObjectController(self.conf, self.logger) self.logger.debug('oc.node_timeout: %s', oc.node_timeout) for obj_and_dev in request_in_dict['objects']: obj_and_file = {} obj_and_file['object'] = obj_and_dev['object'] self.logger.debug('obj: %s', obj_and_dev) try: (account, container, obj) = split_path(obj_and_dev['object'].encode('utf-8'), 3, 3, True) except ValueError: self.logger.debug('split_path exception') raise device = obj_and_dev['device'] # TODO, can can storage_policy_index be determined from storage # node to not have to pass from proxy? # container_info = get_container_info( # {'PATH_INFO': '/v1/%s/%s' % (account, container)}, # self.app, swift_source='LE') # storage_policy_index = container_info['storage_policy'] # obj_ring = self.get_object_ring(storage_policy_index) swift_dir = request_in_dict['swift_dir'] storage_policy_index = request_in_dict['storage_policy_index'] obj_ring = POLICIES.get_object_ring(storage_policy_index, swift_dir) # need partition, same comment as for storage_policy_index partition, nodes = obj_ring.get_nodes(account, container, obj) self.logger.debug('Storage nodes: %s' % str(nodes)) self.logger.debug('partition: %s', partition) # scor (aux) # key = hash_path(account, container, obj, raw_digest=True) key = hash_path(account, container, obj) self.logger.debug('hash_path or key: %s', key) # Create/use Object Controller to map objects to files policy = POLICIES.get_by_index(storage_policy_index) self.logger.debug('policy: %s index: %s', policy, str(int(policy))) try: oc.disk_file = oc.get_diskfile(device, partition, account, container, obj, policy=policy) except DiskFileDeviceUnavailable: # scor self.logger.error( "Unavailable device: %s, for object: %s," "storage policy: %s", device, obj_and_dev['object'], policy) data_dir = oc.disk_file._datadir self.logger.debug('data_dir: %s', data_dir) # Swift-on-File detection sof_detected = False # Get the device path from the object server config file devpath = self.conf.get('devices', None) # The Swift-on-File device directory is a symlink # in the devpath directory constructed like shown below sofpath = devpath + '/' + obj_and_dev['device'] if data_dir.find(sofpath) == 0 and os.path.islink(sofpath): # data_dir starts with sofpath and sofpath is a symlink -> SoF sof_detected = True self.logger.debug('SOF detected, sofpath: %s, realpath: %s', sofpath, os.path.realpath(sofpath)) # Follow the symlink and append a/c/o to get the data file path oc._data_file = os.path.realpath(sofpath) + \ obj_and_file['object'] elif not self.gbi_provide_dirpaths_instead_of_filepaths: files = os.listdir(oc.disk_file._datadir) file_info = {} # DiskFile method got renamed between Liberty and Mitaka try: file_info = oc.disk_file._get_ondisk_file(files) except AttributeError: file_info = oc.disk_file._get_ondisk_files(files) oc._data_file = file_info.get('data_file') self.logger.debug('data_file: %s', oc._data_file) # Add file path to the request self.logger.debug('obj_and_dev: %s', obj_and_dev) if (not self.gbi_provide_dirpaths_instead_of_filepaths) or \ sof_detected: obj_and_file['file'] = oc._data_file else: obj_and_file['file'] = data_dir self.logger.debug('obj_and_file: %s', obj_and_file) objects_and_files.append(obj_and_file) self.logger.debug('objects_and_files(first 1024 bytes): %s', str(objects_and_files[0:1023])) self.request_out['objects'] = objects_and_files self.logger.debug('request_in(first 1024 bytes): %s', str(self.request_in)[0:1023]) self.logger.debug('request_out(first 1024 bytes): %s', str(self.request_out)[0:1023])
def test_extract_tar_works(self): # On systems where $TMPDIR is long (like OS X), we need to do this # or else every upload will fail due to the path being too long. self.app.max_pathlen += len(self.testdir) for compress_format in ['', 'gz', 'bz2']: base_name = 'base_works_%s' % compress_format dir_tree = [{ base_name: [ { 'sub_dir1': ['sub1_file1', 'sub1_file2'] }, { 'sub_dir2': ['sub2_file1', u'test obj \u2661'] }, 'sub_file1', { 'sub_dir3': [{ 'sub4_dir1': '../sub4 file1' }] }, { 'sub_dir4': None }, ] }] build_dir_tree(self.testdir, dir_tree) mode = 'w' extension = '' if compress_format: mode += ':' + compress_format extension += '.' + compress_format tar = tarfile.open(name=os.path.join(self.testdir, 'tar_works.tar' + extension), mode=mode) tar.add(os.path.join(self.testdir, base_name)) tar.close() req = Request.blank('/tar_works/acc/cont/') req.environ['wsgi.input'] = open( os.path.join(self.testdir, 'tar_works.tar' + extension)) req.headers['transfer-encoding'] = 'chunked' resp_body = self.handle_extract_and_iter(req, compress_format) resp_data = json.loads(resp_body) self.assertEquals(resp_data['Number Files Created'], 6) # test out xml req = Request.blank('/tar_works/acc/cont/') req.environ['wsgi.input'] = open( os.path.join(self.testdir, 'tar_works.tar' + extension)) req.headers['transfer-encoding'] = 'chunked' resp_body = self.handle_extract_and_iter(req, compress_format, 'application/xml') self.assert_( '<response_status>201 Created</response_status>' in resp_body) self.assert_( '<number_files_created>6</number_files_created>' in resp_body) # test out nonexistent format req = Request.blank('/tar_works/acc/cont/?extract-archive=tar', headers={'Accept': 'good_xml'}) req.environ['REQUEST_METHOD'] = 'PUT' req.environ['wsgi.input'] = open( os.path.join(self.testdir, 'tar_works.tar' + extension)) req.headers['transfer-encoding'] = 'chunked' def fake_start_response(*args, **kwargs): pass app_iter = self.bulk(req.environ, fake_start_response) resp_body = ''.join([i for i in app_iter]) self.assert_('Response Status: 406' in resp_body)
def parse_and_validate_input(req_body, req_path, min_segment_size): """ Given a request body, parses it and returns a list of dictionaries. The output structure is nearly the same as the input structure, but it is not an exact copy. Given a valid input dictionary `d_in`, its corresponding output dictionary `d_out` will be as follows: * d_out['etag'] == d_in['etag'] * d_out['path'] == d_in['path'] * d_in['size_bytes'] can be a string ("12") or an integer (12), but d_out['size_bytes'] is an integer. * (optional) d_in['range'] is a string of the form "M-N", "M-", or "-N", where M and N are non-negative integers. d_out['range'] is the corresponding swob.Range object. If d_in does not have a key 'range', neither will d_out. :raises: HTTPException on parse errors or semantic errors (e.g. bogus JSON structure, syntactically invalid ranges) :returns: a list of dictionaries on success """ try: parsed_data = json.loads(req_body) except ValueError: raise HTTPBadRequest("Manifest must be valid JSON.\n") if not isinstance(parsed_data, list): raise HTTPBadRequest("Manifest must be a list.\n") # If we got here, req_path refers to an object, so this won't ever raise # ValueError. vrs, account, _junk = split_path(req_path, 3, 3, True) errors = [] num_segs = len(parsed_data) for seg_index, seg_dict in enumerate(parsed_data): if not isinstance(seg_dict, dict): errors.append("Index %d: not a JSON object" % seg_index) continue missing_keys = [k for k in REQUIRED_SLO_KEYS if k not in seg_dict] if missing_keys: errors.append( "Index %d: missing keys %s" % (seg_index, ", ".join('"%s"' % (mk, ) for mk in sorted(missing_keys)))) continue extraneous_keys = [k for k in seg_dict if k not in ALLOWED_SLO_KEYS] if extraneous_keys: errors.append( "Index %d: extraneous keys %s" % (seg_index, ", ".join('"%s"' % (ek, ) for ek in sorted(extraneous_keys)))) continue if not isinstance(seg_dict['path'], basestring): errors.append("Index %d: \"path\" must be a string" % seg_index) continue if not (seg_dict['etag'] is None or isinstance(seg_dict['etag'], basestring)): errors.append("Index %d: \"etag\" must be a string or null" % seg_index) continue if '/' not in seg_dict['path'].strip('/'): errors.append( "Index %d: path does not refer to an object. Path must be of " "the form /container/object." % seg_index) continue seg_size = seg_dict['size_bytes'] if seg_size is not None: try: seg_size = int(seg_size) seg_dict['size_bytes'] = seg_size except (TypeError, ValueError): errors.append("Index %d: invalid size_bytes" % seg_index) continue if (seg_size < min_segment_size and seg_index < num_segs - 1): errors.append("Index %d: too small; each segment, except " "the last, must be at least %d bytes." % (seg_index, min_segment_size)) continue obj_path = '/'.join(['', vrs, account, seg_dict['path'].lstrip('/')]) if req_path == quote(obj_path): errors.append( "Index %d: manifest must not include itself as a segment" % seg_index) continue if seg_dict.get('range'): try: seg_dict['range'] = Range('bytes=%s' % seg_dict['range']) except ValueError: errors.append("Index %d: invalid range" % seg_index) continue if len(seg_dict['range'].ranges) > 1: errors.append("Index %d: multiple ranges (only one allowed)" % seg_index) continue # If the user *told* us the object's size, we can check range # satisfiability right now. If they lied about the size, we'll # fail that validation later. if (seg_size is not None and len(seg_dict['range'].ranges_for_length(seg_size)) != 1): errors.append("Index %d: unsatisfiable range" % seg_index) continue if errors: error_message = "".join(e + "\n" for e in errors) raise HTTPBadRequest(error_message, headers={"Content-Type": "text/plain"}) return parsed_data
def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if time.time() - start_time < 10: # Include some grace period to keep # ceph-s3tests happy continue if not yielded_anything: yield (b'<?xml version="1.0" ' b'encoding="UTF-8"?>\n') yielded_anything = True yield chunk continue body.append(chunk) body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': for seg, err in body['Errors']: if err == too_small_message: raise EntityTooSmall() elif err in ('Etag Mismatch', '404 Not Found'): raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) if too_small_message in msg: raise EntityTooSmall(msg) elif ', Etag Mismatch' in msg: raise InvalidPart(upload_id=upload_id) elif ', 404 Not Found' in msg: raise InvalidPart(upload_id=upload_id) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # We know that this existed long enough for us to HEAD pass result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at # the request header when the port is non default value and it # makes req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) # Why are we doing our own port parsing? Because py3 decided # to start raising ValueErrors on access after parsing such # an invalid port netloc = parsed_url.netloc.split('@')[-1].split(']')[-1] if ':' in netloc: port = netloc.split(':', 2)[1] host_url += ':%s' % port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag resp.headers.pop('ETag', None) if yielded_anything: yield b'\n' yield tostring(result_elem, xml_declaration=not yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield b'\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk
def POST(self, req): """ Handles Complete Multipart Upload. """ log_s3api_command(req, 'complete-multipart-upload') upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {} for key, val in resp.headers.iteritems(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val elif _key == 'content-type': headers['Content-Type'] = val for key, val in resp.sysmeta_headers.items(): _key = key.lower() if _key == OBJECT_TAGGING_HEADER.lower(): headers[key] = val # Query for the objects in the segments area to make sure it completed query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/' } # Force the master to be sure to fetch all uploaded parts req.environ.setdefault('oio.query', {}) req.environ['oio.query']['force_master'] = True container = req.container_name + MULTIUPLOAD_SUFFIX resp = req.get_response(self.app, 'GET', container, '', query=query) objinfo = json.loads(resp.body) # pylint: disable-msg=no-member objinfo.sort(key=lambda o: int(o['name'].split('/')[-1])) objtable = dict((o['name'].encode('utf-8'), { 'path': '/'.join(['', container, o['name']]), 'etag': o['hash'], 'size_bytes': o['bytes'] }) for o in objinfo) s3_etag_hasher = md5() manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') if 'content-md5' in req.headers: # If an MD5 was provided, we need to verify it. # Note that S3Request already took care of translating to ETag if req.headers['etag'] != md5(xml).hexdigest(): raise BadDigest(content_md5=req.headers['content-md5']) # We're only interested in the body here, in the # multipart-upload controller -- *don't* let it get # plumbed down to the object-server del req.headers['etag'] complete_elem = fromstring(xml, 'CompleteMultipartUpload') for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = part_elem.find('./ETag').text if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] info = objtable.get("%s/%s/%s" % (req.object_name, upload_id, part_number)) if info is None or info['etag'] != etag: raise InvalidPart(upload_id=upload_id, part_number=part_number) s3_etag_hasher.update(binascii.a2b_hex(etag)) info['size_bytes'] = int(info['size_bytes']) manifest.append(info) except (XMLSyntaxError, DocumentInvalid): raise MalformedXML() except ErrorResponse: raise except Exception as e: LOGGER.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) headers[sysmeta_header('object', 'etag')] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag # Following swift commit 7f636a5, zero-byte segments aren't allowed, # even as the final segment empty_seg = None if manifest[-1]['size_bytes'] == 0: empty_seg = manifest.pop() # We'll check the sizes of all except the last segment below, but # since we just popped off a zero-byte segment, we should check # that last segment, too. if manifest and manifest[-1]['size_bytes'] < CONF.min_segment_size: raise EntityTooSmall() # Check the size of each segment except the last and make sure they are # all more than the minimum upload chunk size for info in manifest[:-1]: if info['size_bytes'] < CONF.min_segment_size: raise EntityTooSmall() try: # TODO: add support for versioning if manifest: resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={'multipart-manifest': 'put'}, headers=headers) else: # the upload must have consisted of a single zero-length part # just write it directly resp = req.get_response(self.app, 'PUT', body='', headers=headers) except ErrorResponse as e: msg = str(e._msg) expected_msg = 'too small; each segment must be at least 1 byte' if expected_msg in msg: # FIXME: AWS S3 allows a smaller object than 5 MB if there is # only one part. Use a COPY request to copy the part object # from the segments container instead. raise EntityTooSmall(msg) else: raise if empty_seg: # clean up the zero-byte segment _, empty_seg_cont, empty_seg_name = empty_seg['path'].split('/', 2) req.get_response(self.app, 'DELETE', container=empty_seg_cont, obj=empty_seg_name) # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) req.environ['oio.ephemeral_object'] = True req.get_response(self.app, 'DELETE', container, obj) result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at the # request header when the port is non default value and it makes # req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) if parsed_url.port: host_url += ':%s' % parsed_url.port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag del resp.headers['ETag'] resp.body = tostring(result_elem) resp.status = 200 resp.content_type = "application/xml" return resp
def _listing(self, env, start_response, prefix=None): """ Sends an HTML object listing to the remote client. :param env: The original WSGI environment dict. :param start_response: The original WSGI start_response hook. :param prefix: Any prefix desired for the container listing. """ if not config_true_value(self._listings): resp = HTTPNotFound()(env, self._start_response) return self._error_response(resp, env, start_response) tmp_env = make_pre_authed_env( env, 'GET', '/%s/%s/%s' % (self.version, self.account, self.container), self.agent, swift_source='SW') tmp_env['QUERY_STRING'] = 'delimiter=/&format=json' if prefix: tmp_env['QUERY_STRING'] += '&prefix=%s' % quote(prefix) else: prefix = '' resp = self._app_call(tmp_env) if not is_success(self._get_status_int()): return self._error_response(resp, env, start_response) listing = None body = ''.join(resp) if body: listing = json.loads(body) if not listing: resp = HTTPNotFound()(env, self._start_response) return self._error_response(resp, env, start_response) headers = {'Content-Type': 'text/html; charset=UTF-8'} body = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 ' \ 'Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n' \ '<html>\n' \ ' <head>\n' \ ' <title>Listing of %s</title>\n' % \ cgi.escape(env['PATH_INFO']) if self._listings_css: body += ' <link rel="stylesheet" type="text/css" ' \ 'href="%s" />\n' % (self._build_css_path(prefix)) else: body += ' <style type="text/css">\n' \ ' h1 {font-size: 1em; font-weight: bold;}\n' \ ' th {text-align: left; padding: 0px 1em 0px 1em;}\n' \ ' td {padding: 0px 1em 0px 1em;}\n' \ ' a {text-decoration: none;}\n' \ ' </style>\n' body += ' </head>\n' \ ' <body>\n' \ ' <h1 id="title">Listing of %s</h1>\n' \ ' <table id="listing">\n' \ ' <tr id="heading">\n' \ ' <th class="colname">Name</th>\n' \ ' <th class="colsize">Size</th>\n' \ ' <th class="coldate">Date</th>\n' \ ' </tr>\n' % \ cgi.escape(env['PATH_INFO']) if prefix: body += ' <tr id="parent" class="item">\n' \ ' <td class="colname"><a href="../">../</a></td>\n' \ ' <td class="colsize"> </td>\n' \ ' <td class="coldate"> </td>\n' \ ' </tr>\n' for item in listing: if 'subdir' in item: subdir = item['subdir'] if prefix: subdir = subdir[len(prefix):] body += ' <tr class="item subdir">\n' \ ' <td class="colname"><a href="%s">%s</a></td>\n' \ ' <td class="colsize"> </td>\n' \ ' <td class="coldate"> </td>\n' \ ' </tr>\n' % \ (quote(subdir), cgi.escape(subdir)) for item in listing: if 'name' in item: name = item['name'] if prefix: name = name[len(prefix):] body += ' <tr class="item %s">\n' \ ' <td class="colname"><a href="%s">%s</a></td>\n' \ ' <td class="colsize">%s</td>\n' \ ' <td class="coldate">%s</td>\n' \ ' </tr>\n' % \ (' '.join('type-' + cgi.escape(t.lower(), quote=True) for t in item['content_type'].split('/')), quote(name), cgi.escape(name), human_readable(item['bytes']), cgi.escape(item['last_modified']).split('.')[0]. replace('T', ' ')) body += ' </table>\n' \ ' </body>\n' \ '</html>\n' resp = Response(headers=headers, body=body) return resp(env, start_response)
def GET(self, req): """ Handles List Multipart Uploads """ def separate_uploads(uploads, prefix, delimiter): """ separate_uploads will separate uploads into non_delimited_uploads (a subset of uploads) and common_prefixes according to the specified delimiter. non_delimited_uploads is a list of uploads which exclude the delimiter. common_prefixes is a set of prefixes prior to the specified delimiter. Note that the prefix in the common_prefixes includes the delimiter itself. i.e. if '/' delimiter specified and then the uploads is consists of ['foo', 'foo/bar'], this function will return (['foo'], ['foo/']). :param uploads: A list of uploads dictionary :param prefix: A string of prefix reserved on the upload path. (i.e. the delimiter must be searched behind the prefix) :param delimiter: A string of delimiter to split the path in each upload :return (non_delimited_uploads, common_prefixes) """ if six.PY2: (prefix, delimiter) = utf8encode(prefix, delimiter) non_delimited_uploads = [] common_prefixes = set() for upload in uploads: key = upload['key'] end = key.find(delimiter, len(prefix)) if end >= 0: common_prefix = key[:end + len(delimiter)] common_prefixes.add(common_prefix) else: non_delimited_uploads.append(upload) return non_delimited_uploads, sorted(common_prefixes) encoding_type = req.params.get('encoding-type') if encoding_type is not None and encoding_type != 'url': err_msg = 'Invalid Encoding Method specified in Request' raise InvalidArgument('encoding-type', encoding_type, err_msg) keymarker = req.params.get('key-marker', '') uploadid = req.params.get('upload-id-marker', '') maxuploads = req.get_validated_param('max-uploads', DEFAULT_MAX_UPLOADS, DEFAULT_MAX_UPLOADS) query = { 'format': 'json', 'limit': maxuploads + 1, } if uploadid and keymarker: query.update({'marker': '%s/%s' % (keymarker, uploadid)}) elif keymarker: query.update({'marker': '%s/~' % (keymarker)}) if 'prefix' in req.params: query.update({'prefix': req.params['prefix']}) container = req.container_name + MULTIUPLOAD_SUFFIX try: resp = req.get_response(self.app, container=container, query=query) objects = json.loads(resp.body) except NoSuchBucket: # Assume NoSuchBucket as no uploads objects = [] def object_to_upload(object_info): obj, upid = object_info['name'].rsplit('/', 1) obj_dict = { 'key': obj, 'upload_id': upid, 'last_modified': object_info['last_modified'] } return obj_dict # uploads is a list consists of dict, {key, upload_id, last_modified} # Note that pattern matcher will drop whole segments objects like as # object_name/upload_id/1. pattern = re.compile('/[0-9]+$') uploads = [ object_to_upload(obj) for obj in objects if pattern.search(obj.get('name', '')) is None ] prefixes = [] if 'delimiter' in req.params: prefix = req.params.get('prefix', '') delimiter = req.params['delimiter'] uploads, prefixes = separate_uploads(uploads, prefix, delimiter) if len(uploads) > maxuploads: uploads = uploads[:maxuploads] truncated = True else: truncated = False nextkeymarker = '' nextuploadmarker = '' if len(uploads) > 1: nextuploadmarker = uploads[-1]['upload_id'] nextkeymarker = uploads[-1]['key'] result_elem = Element('ListMultipartUploadsResult') SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'KeyMarker').text = keymarker SubElement(result_elem, 'UploadIdMarker').text = uploadid SubElement(result_elem, 'NextKeyMarker').text = nextkeymarker SubElement(result_elem, 'NextUploadIdMarker').text = nextuploadmarker if 'delimiter' in req.params: SubElement(result_elem, 'Delimiter').text = req.params['delimiter'] if 'prefix' in req.params: SubElement(result_elem, 'Prefix').text = req.params['prefix'] SubElement(result_elem, 'MaxUploads').text = str(maxuploads) if encoding_type is not None: SubElement(result_elem, 'EncodingType').text = encoding_type SubElement(result_elem, 'IsTruncated').text = \ 'true' if truncated else 'false' # TODO: don't show uploads which are initiated before this bucket is # created. for u in uploads: upload_elem = SubElement(result_elem, 'Upload') name = u['key'] if encoding_type == 'url': name = quote(name) SubElement(upload_elem, 'Key').text = name SubElement(upload_elem, 'UploadId').text = u['upload_id'] initiator_elem = SubElement(upload_elem, 'Initiator') SubElement(initiator_elem, 'ID').text = req.user_id SubElement(initiator_elem, 'DisplayName').text = req.user_id owner_elem = SubElement(upload_elem, 'Owner') SubElement(owner_elem, 'ID').text = req.user_id SubElement(owner_elem, 'DisplayName').text = req.user_id SubElement(upload_elem, 'StorageClass').text = 'STANDARD' SubElement(upload_elem, 'Initiated').text = \ u['last_modified'][:-3] + 'Z' for p in prefixes: elem = SubElement(result_elem, 'CommonPrefixes') SubElement(elem, 'Prefix').text = p body = tostring(result_elem) return HTTPOk(body=body, content_type='application/xml')
def handle_container_listing(self, env, start_response): # This code may be clearer by using Request(env).get_response() # instead of self._app_call(env) api_vers, account, container_name = split_path( env['PATH_INFO'], 3, 3, True) sub_env = env.copy() orig_container = get_unversioned_container(container_name) if orig_container != container_name: # Check that container_name is actually the versioning # container for orig_container sub_env['PATH_INFO'] = '/%s/%s/%s' % (api_vers, account, orig_container) info = get_container_info(sub_env, self.app, swift_source='VW') if info.get('sysmeta', {}).get('versions-location') != \ container_name: # We were wrong, do a standard listing orig_container = container_name if orig_container != container_name: qs = parse_qs(sub_env.get('QUERY_STRING', '')) if 'marker' in qs: marker, _ = swift3_split_object_name_version(qs['marker'][0]) qs['marker'] = [marker] if 'prefix' in qs: prefix, _ = swift3_split_object_name_version(qs['prefix'][0]) qs['prefix'] = prefix sub_env['QUERY_STRING'] = urlencode(qs, True) sub_env['oio_query'] = {'versions': True} resp = super(OioVersionedWritesContext, self).handle_container_request( sub_env, lambda x, y, z: None) if orig_container != container_name and \ self._response_status == '200 OK': with closing_if_possible(resp): versioned_objects = json.loads("".join(resp)) # Discard the latest version of each object, because it is # not supposed to appear in the versioning container. latest = dict() for obj in versioned_objects: ver = int(obj.get('version', '0')) if ver > latest.get(obj['name'], 0): latest[obj['name']] = ver versioned_objects = [obj for obj in versioned_objects if int(obj.get('version', '0')) != latest[obj['name']] or is_deleted(obj)] for obj in versioned_objects: obj['name'] = swift3_versioned_object_name( obj['name'], obj.get('version', '')) resp = json.dumps(versioned_objects) self._response_headers = [x for x in self._response_headers if x[0] != 'Content-Length'] self._response_headers.append(('Content-Length', str(len(resp)))) start_response(self._response_status, self._response_headers, self._response_exc_info) return resp
def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if time.time() - start_time < 10: # Include some grace period to keep # ceph-s3tests happy continue if not yielded_anything: yield (b'<?xml version="1.0" ' b'encoding="UTF-8"?>\n') yielded_anything = True yield chunk continue body.append(chunk) body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': for seg, err in body['Errors']: if err == too_small_message: raise EntityTooSmall() elif err in ('Etag Mismatch', '404 Not Found'): raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) if too_small_message in msg: raise EntityTooSmall(msg) elif ', Etag Mismatch' in msg: raise InvalidPart(upload_id=upload_id) elif ', 404 Not Found' in msg: raise InvalidPart(upload_id=upload_id) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # The important thing is that we wrote out a tombstone to # make sure the marker got cleaned up. If it's already # gone (e.g., because of concurrent completes or a retried # complete), so much the better. pass yield _make_complete_body(req, s3_etag, yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield b'\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] req.headers['x-object-meta-object-type'] = 'Multipart' resp = _get_upload_info(req, self.app, upload_id) headers = {} for key, val in resp.headers.iteritems(): _key = key.lower() if _key.startswith('x-oss-meta-'): headers['x-object-meta-' + _key[11:]] = val elif _key == 'content-type': headers['Content-Type'] = val # Query for the objects in the segments area to make sure it completed query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/' } container = req.container_name + MULTIUPLOAD_SUFFIX resp = req.get_response(self.app, 'GET', container, '', query=query) objinfo = json.loads(resp.body) objtable = dict((o['name'], {'path': '/'.join(['', container, o['name']]), 'etag': o['hash'], 'size_bytes': o['bytes']}) for o in objinfo) manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) complete_elem = fromstring(xml, 'CompleteMultipartUpload') for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = part_elem.find('./ETag').text if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] info = objtable.get("%s/%s/%s" % (req.object_name, upload_id, part_number)) if info is None or info['etag'] != etag: raise InvalidPart(upload_id=upload_id, part_number=part_number) manifest.append(info) except (XMLSyntaxError, DocumentInvalid): raise MalformedXML() except ErrorResponse: raise except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() LOGGER.error(e) raise exc_type, exc_value, exc_traceback # Following swift commit 7f636a5, zero-byte segments aren't allowed, # even as the final segment if int(info['size_bytes']) == 0: manifest.pop() # Ordinarily, we just let SLO check segment sizes. However, we # just popped off a zero-byte segment; if there was a second # zero-byte segment and it was at the end, it would succeed on # Swift < 2.6.0 and fail on newer Swift. It seems reasonable that # it should always fail. if manifest and int(manifest[-1]['size_bytes']) == 0: raise EntityTooSmall() try: # TODO: add support for versioning if manifest: resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={'multipart-manifest': 'put'}, headers=headers) else: # the upload must have consisted of a single zero-length part # just write it directly resp = req.get_response(self.app, 'PUT', body='', headers=headers) except BadSwiftRequest as e: msg = str(e) msg_pre_260 = 'Each segment, except the last, must be at least ' # see https://github.com/openstack/swift/commit/c0866ce msg_260 = ('too small; each segment, except the last, must be ' 'at least ') # see https://github.com/openstack/swift/commit/7f636a5 msg_post_260 = 'too small; each segment must be at least 1 byte' if msg.startswith(msg_pre_260) or \ msg_260 in msg or msg_post_260 in msg: # FIXME: Alibaba OSS allows a smaller object than 5 MB if there is # only one part. Use a COPY request to copy the part object # from the segments container instead. raise EntityTooSmall(msg) else: raise if int(info['size_bytes']) == 0: # clean up the zero-byte segment empty_seg_cont, empty_seg_name = info['path'].split('/', 2)[1:] req.get_response(self.app, 'DELETE', container=empty_seg_cont, obj=empty_seg_name) # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) req.get_response(self.app, 'DELETE', container, obj) result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at the # request header when the port is non default value and it makes # req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) if parsed_url.port: host_url += ':%s' % parsed_url.port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = resp.etag resp.body = tostring(result_elem) resp.status = 200 resp.content_type = "application/xml" return resp