def __call__(self, req): # We only want to step in on object DELETE requests if req.method != 'DELETE': return self.app try: vrs, acc, con, obj = req.split_path(4, 4, rest_with_last=True) except ValueError: # not an object request return self.app # Okay, this is definitely an object DELETE request; let's see if it's # one we want to step in for. if self.is_trash(con) and self.block_trash_deletes: return swob.HTTPMethodNotAllowed( content_type="text/plain", body=("Attempted to delete from a trash container, but " "block_trash_deletes is enabled\n")) elif not self.should_save_copy(req.environ, con, obj): return self.app trash_container = self.trash_prefix + con copy_status, copy_headers, copy_body = self.copy_object( req, trash_container, obj) if copy_status == 404: self.create_trash_container(req, vrs, acc, trash_container) copy_status, copy_headers, copy_body = self.copy_object( req, trash_container, obj) elif not http.is_success(copy_status): # other error; propagate this to the client return swob.Response(body=friendly_error(copy_body), status=copy_status, headers=copy_headers) return self.app
def test_copy_hook_passthrough(self): req = swob.Request.blank('/v1/AUTH_test/c/man') # no X-Object-Manifest header, so do nothing resp = swob.Response(request=req, status=200) modified_resp = self.copy_hook(req, resp) self.assertTrue(modified_resp is resp)
def get_err_response(): resp = swob.Response(content_type='text/xml') resp.status = HTTP_BAD_REQUEST resp.body = '<?xml version="1.0" encoding="UTF-8"?>\r\n<Error>\r\n ' \ '<Code>%s</Code>\r\n <Message>%s</Message>\r\n</Error>\r\n' \ % (HTTP_BAD_REQUEST, 'Unable to process requested MP4') return resp
def test_mark_for_deletion_one_update_no_yield(self): ts = '1558463777.42739' with FakeInternalClient([ swob.Response( json.dumps([ { 'name': '/obj1' }, { 'name': 'obj2' }, { 'name': 'obj3' }, ])), swob.Response(json.dumps([])), swob.Response(status=202), ]) as swift: self.assertEqual( container_deleter.mark_for_deletion( swift, 'account', 'container', '', '', '', timestamp=utils.Timestamp(ts), yield_time=None, ), 3) self.assertEqual(swift.calls, [ ('GET', '/v1/account/container', 'format=json&marker=&end_marker=&prefix=', {}, None), ('GET', '/v1/account/container', 'format=json&marker=obj3&end_marker=&prefix=', {}, None), ('UPDATE', '/v1/.expiring_objects/' + ts.split('.')[0], '', { 'X-Backend-Allow-Private-Methods': 'True', 'X-Backend-Storage-Policy-Index': '0', 'X-Timestamp': ts }, mock.ANY), ]) self.assertEqual( json.loads(swift.calls[-1].body), container_deleter.make_delete_jobs('account', 'container', ['/obj1', 'obj2', 'obj3'], utils.Timestamp(ts)))
def test_copy_hook_passthrough(self): source_req = swob.Request.blank('/v1/AUTH_test/c/man', environ={'REQUEST_METHOD': 'GET'}) sink_req = swob.Request.blank('/v1/AUTH_test/c/man', environ={'REQUEST_METHOD': 'PUT'}) source_resp = swob.Response(request=source_req, status=200) # no X-Object-Manifest header, so do nothing modified_resp = self.copy_hook(source_req, source_resp, sink_req) self.assertTrue(modified_resp is source_resp)
def test_copy_hook_manifest(self): req = swob.Request.blank('/v1/AUTH_test/c/man') resp = swob.Response(request=req, status=200, headers={"X-Object-Manifest": "c/o"}, app_iter=["manifest"]) modified_resp = self.copy_hook(req, resp) self.assertTrue(modified_resp is not resp) self.assertEqual(modified_resp.etag, hashlib.md5("o1-etago2-etag").hexdigest())
def __call__(self, req): try: vrs, acc, con, obj = req.split_path(2, 4, rest_with_last=True) except ValueError: # /info or similar... return self.app # Check if it's an account request... if con is None: return self.translate_sysmeta_and_complete( req, {'x-' + SYSMETA_UNDELETE_ENABLED: SYSMETA_ACCOUNT}) # ...or a container request... if obj is None: return self.translate_sysmeta_and_complete( req, {'x-' + SYSMETA_UNDELETE_ENABLED: SYSMETA_CONTAINER}) # ...must be object. # We only want to step in on object DELETE requests if req.method != 'DELETE': return self.app # Okay, this is definitely an object DELETE request; let's see if it's # one we want to step in for. if self.is_trash(con) and self.block_trash_deletes: return swob.HTTPMethodNotAllowed( content_type="text/plain", body=("Attempted to delete from a trash container, but " "block_trash_deletes is enabled\n")) elif self.is_trash(con) and not self.is_superuser(req.environ): return swob.HTTPForbidden( content_type="text/plain", body=("Attempted to delete from a trash container, but " "user is not a superuser\n")) elif not self.should_save_copy(req.environ, con, obj): return self.app trash_container = self.trash_prefix + con copy_status, copy_headers, copy_body = self.copy_object( req, trash_container, obj) if copy_status == 404: self.create_trash_container(req, vrs, acc, trash_container) copy_status, copy_headers, copy_body = self.copy_object( req, trash_container, obj) elif not http.is_success(copy_status): # other error; propagate this to the client return swob.Response(body=friendly_error(copy_body), status=copy_status, headers=copy_headers) return self.app
def __call__(self, env, start_response): method = env['REQUEST_METHOD'] path = env['PATH_INFO'] req = swob.Request(env) self._calls.append((method, path, swob.HeaderKeyDict(req.headers))) if (env.get('swift.authorize') and not env.get('swift.authorize_override')): denial_response = env['swift.authorize'](req) if denial_response: return denial_response try: status_int, headers, body = self._responses[(method, path)] except KeyError: print("Didn't find \"%s %s\" in registered responses" % (method, path)) raise if method in ('PUT', 'COALESCE'): bytes_read = 0 # consume the whole request body, just like a PUT would for chunk in iter(env['wsgi.input'].read, b''): bytes_read += len(chunk) cl = req.headers.get('Content-Length') if cl is not None and int(cl) != bytes_read: error_resp = swob.HTTPClientDisconnect( request=req, body=("Content-Length didn't match" " body length (says FakeProxy)")) return error_resp(env, start_response) if cl is None \ and "chunked" not in req.headers.get("Transfer-Encoding", ""): error_resp = swob.HTTPLengthRequired( request=req, body="No Content-Length (says FakeProxy)") return error_resp(env, start_response) resp = swob.Response( body=body, status=status_int, headers=headers, # We cheat a little here and use swob's handling of the Range # header instead of doing it ourselves. conditional_response=True) return resp(env, start_response)
def test_copy_hook_manifest(self): source_req = swob.Request.blank('/v1/AUTH_test/c/man', environ={'REQUEST_METHOD': 'GET'}) sink_req = swob.Request.blank('/v1/AUTH_test/c/man', environ={'REQUEST_METHOD': 'PUT'}) source_resp = swob.Response(request=source_req, status=200, headers={"X-Object-Manifest": "c/o"}, app_iter=["manifest"]) # it's a manifest, so copy the segments to make a normal object modified_resp = self.copy_hook(source_req, source_resp, sink_req) self.assertTrue(modified_resp is not source_resp) self.assertEqual(modified_resp.etag, hashlib.md5("o1-etago2-etag").hexdigest()) self.assertEqual(sink_req.headers.get('X-Object-Manifest'), None)
def handle_request(self, req): self.call_count += 1 req.path_info_pop() if isinstance(self.body, list): try: body = self.body.pop(0) except IndexError: body = '' else: body = self.body resp = swob.Response(request=req, body=body, conditional_response=True) try: resp.status_int = self.status_codes.pop(0) except IndexError: resp.status_int = self.default_status_code resp.app_iter = iter(body) return resp
def forward_raw_swift_req(swift_baseurl, req, logger, object_chunk_size): # logger.info('swift_baseurl: %s' % swift_baseurl) scheme, netloc, _, _, _ = urlsplit(swift_baseurl) ssl = (scheme == 'https') swift_host, swift_port = utils.parse_socket_string(netloc, 443 if ssl else 80) swift_port = int(swift_port) if ssl: conn = bufferedhttp.HTTPSConnection(swift_host, port=swift_port) else: conn = bufferedhttp.BufferedHTTPConnection(swift_host, port=swift_port) conn.path = req.path_qs conn.putrequest(req.method, req.path_qs, skip_host=True) proxy_satellite_host = '' for header, value in filter_hop_by_hop_headers(req.headers.items()): if header.lower() == 'host': proxy_satellite_host = value continue conn.putheader(header, str(value)) conn.putheader('Host', str(swift_host)) conn.endheaders() content_length = int(req.headers.get('content-length', '0')) if content_length != 0: chunk = req.body_file.read(object_chunk_size) while chunk: conn.send(chunk) chunk = req.body_file.read(object_chunk_size) resp = conn.getresponse() headers = dict(filter_hop_by_hop_headers(resp.getheaders())) if 'x-storage-url' in headers: swift_scheme, swift_netloc, swift_path, _, _ = \ urlsplit(headers['x-storage-url']) headers['x-storage-url'] = \ swift_scheme+"://"+proxy_satellite_host+swift_path body_len = 0 if req.method == 'HEAD' \ else int(headers.get('content-length', "0")) app_iter = ClosingResourceIterable(resource=conn, data_src=resp, length=body_len) return swob.Response(app_iter=app_iter, status=resp.status, headers=headers, request=req)
def test_copy_hook_manifest_with_multipart_manifest_get(self): source_req = swob.Request.blank( '/v1/AUTH_test/c/man', environ={'REQUEST_METHOD': 'GET', 'QUERY_STRING': 'multipart-manifest=get'}) sink_req = swob.Request.blank( '/v1/AUTH_test/c/man', environ={'REQUEST_METHOD': 'PUT'}) source_resp = swob.Response( request=source_req, status=200, headers={"X-Object-Manifest": "c/o"}, app_iter=["manifest"]) # make sure the sink request (the backend PUT) gets X-Object-Manifest # on it, but that's all modified_resp = self.copy_hook(source_req, source_resp, sink_req) self.assertTrue(modified_resp is source_resp) self.assertEqual(sink_req.headers.get('X-Object-Manifest'), 'c/o')
def test_mark_for_deletion_empty_no_yield(self): with FakeInternalClient([ swob.Response(json.dumps([])), ]) as swift: self.assertEqual( container_deleter.mark_for_deletion( swift, 'account', 'container', 'marker', 'end', 'prefix', timestamp=None, yield_time=None, ), 0) self.assertEqual(swift.calls, [ ('GET', '/v1/account/container', 'format=json&marker=marker&end_marker=end&prefix=prefix', {}, None), ])
def GET(self, req): """Serves a GET to the middleware.""" try: version, account, path = swift_utils.split_path( req.path, 2, 3, True) except ValueError: return swob.HTTPBadRequest(request=req) if path: path = utils.unicode_unquote(path).rstrip("/") self.logger.debug("Searching") # Get all of the request variables that we need. fmt = req.params.get('format', '').lower() accept_header = req.headers.get('Accept', '').lower() # Check for Accept header as well if fmt == '' and accept_header != '': if 'json' in accept_header: fmt = 'json' elif 'xml' in accept_header: fmt = 'xml' queries = [] for key, value in req.str_params.items(): if key.startswith('q.'): val = value.decode("utf-8").strip('*') queries.append((key[2:], val)) query = req.str_params.get('q') if query: query = query.decode("utf-8").strip('*') limit = int(req.params.get('limit', 0) or req.params.get('rows', 0)) or 100 start = int(req.params.get('start', 0) or req.params.get('offset', 0)) sort = req.params.get('sort', None) _type = req.params.get('type', None) if _type not in ['object', 'container', None, '']: return swob.HTTPBadRequest(request=req) field = (req.params.get('field', None) or req.params.get('df', None) or '_all') marker = req.params.get('marker', None) recursive = req.params.get('recursive', True) if type(recursive) is not bool: if recursive.lower() in ['false', '0', 'f']: recursive = False else: recursive = True srch = index.Searcher(self.elastic_hosts, self.search_index_name, account, logger=self.logger) srch.logger = self.logger if query: srch.add_condition(field, query) for f, q in queries: if f.startswith("meta-"): f = "meta." + f[5:] srch.add_condition(f, q) srch.path = path srch.recursive = recursive srch.type = _type srch.sort = sort srch.limit = limit srch.start = start srch.marker = marker try: results = srch.execute() except socket.timeout: return swob.HTTPServiceUnavailable(req=req) self.logger.debug(results) result_list = [] for item in results: t = index.filter_result_props(item) result_list.append(t) headers = [ ('X-Search-Items-Count', len(result_list)), ('X-Search-Items-Total', results.total), ('X-Search-Items-Offset', start), ] if fmt == 'json': headers.append(('Content-Type', 'application/json; charset=utf-8')) return swob.Response(request=req, body=json.dumps(result_list), headers=headers) elif fmt == 'xml': headers.append(('Content-Type', 'application/xml; charset=utf-8')) output_list = [ '<?xml version="1.0" encoding="UTF-8"?>', '<results>' ] for res in result_list: item = '<object>' for key, val in res.iteritems(): item += '<%s>%s</%s>' % (key, saxutils.escape( str(val)), key) item += '</object>' output_list.append(item) output_list.append('</results>') res_body = '\n'.join(output_list) return swob.Response(request=req, body=res_body, headers=headers) else: headers.append(('Content-Type', 'text/plain')) res_body = '' for res in result_list: for key, val in res.iteritems(): res_body += str(key) + ': ' + str(val) + '\n' res_body += '\n' return swob.Response(request=req, body=res_body, headers=headers)
def __call__(self, env, start_response): status, headers, body = self.status_headers_body return swob.Response(status=status, headers=headers, body=body)(env, start_response)
def handle_object_listing(self, req): # XXX(darrell): this is still uncomfortably-duplicated with # S3SyncShunt.handle_listing() resp_type = get_listing_content_type(req) limit, marker, prefix, delimiter, path = get_list_params(req, 1000) # TODO(darrell): handle "path" presence kind of like the Shunt does? # Figure that out when adding Swift API support. local_resp, local_iter = iter_listing( self.local_to_me_provider.list_objects, self.app.logger, marker, limit, prefix, delimiter) if local_resp.success: final_status = local_resp.status final_headers = local_resp.headers else: if local_resp.status != 404: self.app.logger.debug( 'handle_object_listing: local-to-me ' 'for %s got %d', self.aco_str, local_resp.status) return local_resp.to_swob_response(req=req) # This is ok because splice_listing() only iterates over the # local_iter--it doesn't try to call next() or anything if it's # empty. local_iter = [] remote_resp, remote_iter = iter_listing( self.remote_to_me_provider.list_objects, self.app.logger, marker, limit, prefix, delimiter) if not remote_resp.success: if not local_resp.success: # Two strikes and you're OUT! # If we got here, we know the first "error" was 404, so we'll # return whatever we have, here, which will either be a 404 # (fine) or some other error (also fine since that's more # interesting than any 404). if remote_resp.status != 404: self.app.logger.debug( 'handle_object_listing: ' 'remote-to-me for %s got %d', self.aco_str, remote_resp.status) return remote_resp.to_swob_response(req=req) # This one does need to be an actual iterator and conform to the # contract of yielding (None, None) when it's "done". remote_iter = iter([(None, None)]) elif not local_resp.success: final_status = remote_resp.status final_headers = remote_resp.headers self.app.logger.debug( 'handle_object_listing: final_status/headers: ' '%r %r', final_status, final_headers) spliced = splice_listing(local_iter, remote_iter, limit) self.app.logger.debug('handle_object_listing: spliced: %r', spliced) response_body = format_listing_response(spliced, resp_type, self.container_name) self.app.logger.debug('handle_object_listing: response_body: %r', response_body) encoded_headers = { k.encode('utf8'): v.encode('utf8') for k, v in final_headers.items() if k.lower not in ('content-type', 'content-length') } no_hop_headers = dict( filter_hop_by_hop_headers(encoded_headers.items())) return swob.Response(body=response_body, status=final_status, headers=no_hop_headers, request=req, content_type=resp_type)
def _handle_request(self, env, start_response): req = swob.Request(env) # Double (or triple, etc.) slashes in the URL should be ignored; # collapse them. fixes T34864 req.path_info = re.sub(r'/{2,}', '/', req.path_info) # Keep a copy of the original request so we can ask the scalers for it reqorig = swob.Request(req.environ.copy()) # Containers have 5 components: project, language, repo, zone, and shard. # If there's no zone in the URL, the zone is assumed to be 'public' (for b/c). # Shard is optional (and configurable), and is only used for large containers. # # Projects are wikipedia, wikinews, etc. # Languages are en, de, fr, commons, etc. # Repos are local, timeline, etc. # Zones are public, thumb, temp, etc. # Shard is extracted from "hash paths" in the URL and is 2 hex digits. # # These attributes are mapped to container names in the form of either: # (a) proj-lang-repo-zone (if not sharded) # (b) proj-lang-repo-zone.shard (if sharded) # (c) global-data-repo-zone (if not sharded) # (d) global-data-repo-zone.shard (if sharded) # # Rewrite wiki-global URLs of these forms: # (a) http://upload.wikimedia.org/math/<relpath> # => http://msfe/v1/AUTH_<hash>/global-data-math-render/<relpath> # (b) http://upload.wikimedia.org/<proj>/<lang>/math/<relpath> (legacy) # => http://msfe/v1/AUTH_<hash>/global-data-math-render/<relpath> # # Rewrite wiki-relative URLs of these forms: # (a) http://upload.wikimedia.org/<proj>/<lang>/<relpath> # => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-public/<relpath> # (b) http://upload.wikimedia.org/<proj>/<lang>/archive/<relpath> # => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-public/archive/<relpath> # (c) http://upload.wikimedia.org/<proj>/<lang>/thumb/<relpath> # => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-thumb/<relpath> # (d) http://upload.wikimedia.org/<proj>/<lang>/thumb/archive/<relpath> # => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-thumb/archive/<relpath> # (e) http://upload.wikimedia.org/<proj>/<lang>/thumb/temp/<relpath> # => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-thumb/temp/<relpath> # (f) http://upload.wikimedia.org/<proj>/<lang>/transcoded/<relpath> # => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-transcoded/<relpath> # (g) http://upload.wikimedia.org/<proj>/<lang>/timeline/<relpath> # => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-timeline-render/<relpath> # regular uploads match = re.match( (r'^/(?P<proj>[^/]+)/(?P<lang>[^/]+)/' r'((?P<zone>transcoded|thumb)/)?' r'(?P<path>((temp|archive)/)?[0-9a-f]/(?P<shard>[0-9a-f]{2})/.+)$' ), req.path) if match: proj = match.group('proj') lang = match.group('lang') repo = 'local' # the upload repo name is "local" # Get the repo zone (if not provided that means "public") zone = (match.group('zone') if match.group('zone') else 'public') # Get the object path relative to the zone (and thus container) obj = match.group('path') # e.g. "archive/a/ab/..." shard = match.group('shard') # timeline renderings if match is None: # /wikipedia/en/timeline/a876297c277d80dfd826e1f23dbfea3f.png match = re.match( r'^/(?P<proj>[^/]+)/(?P<lang>[^/]+)/(?P<repo>timeline)/(?P<path>.+)$', req.path) if match: proj = match.group('proj') # wikipedia lang = match.group('lang') # en repo = match.group('repo') # timeline zone = 'render' obj = match.group( 'path') # a876297c277d80dfd826e1f23dbfea3f.png shard = '' # math renderings if match is None: # /math/c/9/f/c9f2055dadfb49853eff822a453d9ceb.png # /wikipedia/en/math/c/9/f/c9f2055dadfb49853eff822a453d9ceb.png (legacy) match = re.match( (r'^(/(?P<proj>[^/]+)/(?P<lang>[^/]+))?/(?P<repo>math)/' r'(?P<path>(?P<shard1>[0-9a-f])/(?P<shard2>[0-9a-f])/.+)$'), req.path) if match: proj = 'global' lang = 'data' repo = match.group('repo') # math zone = 'render' obj = match.group( 'path') # c/9/f/c9f2055dadfb49853eff822a453d9ceb.png shard = match.group('shard1') + match.group('shard2') # c9 # score renderings if match is None: # /score/j/q/jqn99bwy8777srpv45hxjoiu24f0636/jqn99bwy.png # /score/override-midi/8/i/8i9pzt87wtpy45lpz1rox8wusjkt7ki.ogg match = re.match(r'^/(?P<repo>score)/(?P<path>.+)$', req.path) if match: proj = 'global' lang = 'data' repo = match.group('repo') # score zone = 'render' obj = match.group( 'path') # j/q/jqn99bwy8777srpv45hxjoiu24f0636/jqn99bwy.png shard = '' if match is None: match = re.match(r'^/monitoring/(?P<what>.+)$', req.path) if match: what = match.group('what') if what == 'frontend': headers = {'Content-Type': 'application/octet-stream'} resp = swob.Response(headers=headers, body="OK\n") elif what == 'backend': req.host = '127.0.0.1:%s' % self.bind_port req.path_info = "/v1/%s/monitoring/backend" % self.account app_iter = self._app_call(env) status = self._get_status_int() headers = self._response_headers resp = swob.Response(status=status, headers=headers, app_iter=app_iter) else: resp = swob.HTTPNotFound('Monitoring type not found "%s"' % (req.path)) return resp(env, start_response) if match is None: match = re.match(r'^/(?P<path>[^/]+)?$', req.path) # /index.html /favicon.ico /robots.txt etc. # serve from a default "root" container if match: path = match.group('path') if not path: path = 'index.html' req.host = '127.0.0.1:%s' % self.bind_port req.path_info = "/v1/%s/root/%s" % (self.account, path) app_iter = self._app_call(env) status = self._get_status_int() headers = self._response_headers resp = swob.Response(status=status, headers=headers, app_iter=app_iter) return resp(env, start_response) # Internally rewrite the URL based on the regex it matched... if match: # Get the per-project "conceptual" container name, e.g. "<proj><lang><repo><zone>" container = "%s-%s-%s-%s" % (proj, lang, repo, zone) # Add 2-digit shard to the container if it is supposed to be sharded. # We may thus have an "actual" container name like "<proj><lang><repo><zone>.<shard>" if container in self.shard_container_list: container += ".%s" % shard # Save a url with just the account name in it. req.path_info = "/v1/%s" % (self.account) port = self.bind_port req.host = '127.0.0.1:%s' % port url = req.url[:] # Create a path to our object's name. req.path_info = "/v1/%s/%s/%s" % (self.account, container, urllib2.unquote(obj)) # self.logger.warn("new path is %s" % req.path_info) # do_start_response just remembers what it got called with, # because our 404 handler will generate a different response. app_iter = self._app_call(env) status = self._get_status_int() headers = self._response_headers if status == 404: # only send thumbs to the 404 handler; just return a 404 for everything else. if repo == 'local' and zone == 'thumb': resp = self.handle404(reqorig, url, container, obj) return resp(env, start_response) else: resp = swob.HTTPNotFound('File not found: %s' % req.path) return resp(env, start_response) else: if zone == 'thumb': for key, value in headers: if key == 'X-Delete-At' and self.thumbnail_update_expiry_headers: # Update expiry header asynchronously eventlet.spawn(self.update_expiry, env) break # Return the response verbatim return swob.Response(status=status, headers=headers, app_iter=app_iter)(env, start_response) else: resp = swob.HTTPNotFound('Regexp failed to match URI: "%s"' % (req.path)) return resp(env, start_response)
def to_swob_response(self, req=None): headers = dict(filter_hop_by_hop_headers(self.headers.items())) return swob.Response(app_iter=iter(self.body), status=self.status, headers=headers, request=req)
def handle404(self, reqorig, url, container, obj): """ Return a swob.Response which fetches the thumbnail from the thumb host and returns it. Note also that the thumb host might write it out to Swift so it won't 404 next time. """ # go to the thumb media store for unknown files reqorig.host = self.thumbhost # upload doesn't like our User-agent, otherwise we could call it # using urllib2.url() proxy_handler = urllib2.ProxyHandler({'http': self.thumbhost}) redirect_handler = DumbRedirectHandler() opener = urllib2.build_opener(redirect_handler, proxy_handler) # Thumbor doesn't need (and doesn't like) the proxy thumbor_opener = urllib2.build_opener(redirect_handler) # Pass on certain headers from the caller squid to the scalers opener.addheaders = [] if reqorig.headers.get('User-Agent') is not None: opener.addheaders.append( ('User-Agent', reqorig.headers.get('User-Agent'))) else: opener.addheaders.append(('User-Agent', self.user_agent)) for header_to_pass in [ 'X-Forwarded-For', 'X-Forwarded-Proto', 'Accept', 'Accept-Encoding', 'X-Original-URI' ]: if reqorig.headers.get(header_to_pass) is not None: opener.addheaders.append( (header_to_pass, reqorig.headers.get(header_to_pass))) thumbor_opener.addheaders = opener.addheaders # At least in theory, we shouldn't be handing out links to originals # that we don't have (or in the case of thumbs, can't generate). # However, someone may have a formerly valid link to a file, so we # should do them the favor of giving them a 404. try: # break apach the url, url-encode it, and put it back together urlobj = list(urlparse.urlsplit(reqorig.url)) # encode the URL but don't encode %s and /s urlobj[2] = urllib2.quote(urlobj[2], '%/') encodedurl = urlparse.urlunsplit(urlobj) # Thumbor never needs URL mangling and it needs a different host if self.thumborhost: thumbor_reqorig = swob.Request(reqorig.environ.copy()) thumbor_reqorig.host = self.thumborhost thumbor_urlobj = list(urlparse.urlsplit(thumbor_reqorig.url)) thumbor_urlobj[2] = urllib2.quote(thumbor_urlobj[2], '%/') thumbor_encodedurl = urlparse.urlunsplit(thumbor_urlobj) # if sitelang, we're supposed to mangle the URL so that # http://upload.wm.o/wikipedia/commons/thumb/a/a2/Foo_.jpg/330px-Foo_.jpg # changes to # http://commons.wp.o/w/thumb_handler.php/a/a2/Foo_.jpg/330px-Foo_.jpg if self.backend_url_format == 'sitelang': match = re.match( r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)', encodedurl) if match: proj = match.group('proj') lang = match.group('lang') # and here are all the legacy special cases, imported from thumb_handler.php if (proj == 'wikipedia'): if (lang in ['meta', 'commons', 'internal', 'grants']): proj = 'wikimedia' if (lang in ['mediawiki']): lang = 'www' proj = 'mediawiki' hostname = '%s.%s.%s' % (lang, proj, self.tld) if (proj == 'wikipedia' and lang == 'sources'): # yay special case hostname = 'wikisource.%s' % self.tld # ok, replace the URL with just the part starting with thumb/ # take off the first two parts of the path # (eg /wikipedia/commons/); make sure the string starts # with a / encodedurl = 'http://%s/w/thumb_handler.php/%s' % ( hostname, match.group('path')) # add in the X-Original-URI with the swift got (minus the hostname) opener.addheaders.append( ('X-Original-URI', list(urlparse.urlsplit(reqorig.url))[2])) else: # ASSERT this code should never be hit since only thumbs # should call the 404 handler self.logger.warn( "non-thumb in 404 handler! encodedurl = %s" % encodedurl) resp = swob.HTTPNotFound('Unexpected error') return resp else: # log the result of the match here to test and make sure it's # sane before enabling the config match = re.match( r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)', encodedurl) if match: proj = match.group('proj') lang = match.group('lang') self.logger.warn( "sitelang match has proj %s lang %s encodedurl %s" % (proj, lang, encodedurl)) else: self.logger.warn("no sitelang match on encodedurl: %s" % encodedurl) # To turn thumbor off and have thumbnail traffic served by image scalers, # replace the line below with this one: # upcopy = opener.open(encodedurl) upcopy = thumbor_opener.open(thumbor_encodedurl) except urllib2.HTTPError as error: # Wrap the urllib2 HTTPError into a swob HTTPException status = error.code if status not in swob.RESPONSE_REASONS: # Generic status description in case of unknown status reasons. status = "%s Error" % status return swob.HTTPException(status=status, body=error.msg, headers=error.hdrs.items()) except urllib2.URLError as error: msg = 'There was a problem while contacting the thumbnailing service: %s' % \ error.reason return swob.HTTPServiceUnavailable(msg) # get the Content-Type. uinfo = upcopy.info() c_t = uinfo.gettype() resp = swob.Response(app_iter=upcopy, content_type=c_t) headers_whitelist = [ 'Content-Length', 'Content-Disposition', 'Last-Modified', 'Accept-Ranges', 'XKey', 'Thumbor-Engine', 'Server', 'Nginx-Request-Date', 'Nginx-Response-Date', 'Thumbor-Processing-Time', 'Thumbor-Processing-Utime', 'Thumbor-Request-Id', 'Thumbor-Request-Date' ] # add in the headers if we've got them for header in headers_whitelist: if (uinfo.getheader(header) != ''): resp.headers[header] = uinfo.getheader(header) # also add CORS; see also our CORS middleware resp.headers['Access-Control-Allow-Origin'] = '*' return resp
def handle404(self, reqorig, url, container, obj): """ Return a swob.Response which fetches the thumbnail from the thumb host and returns it. Note also that the thumb host might write it out to Swift so it won't 404 next time. """ # upload doesn't like our User-agent, otherwise we could call it # using urllib2.url() thumbor_opener = urllib2.build_opener(DumbRedirectHandler()) # Pass on certain headers from Varnish to Thumbor thumbor_opener.addheaders = [] if reqorig.headers.get('User-Agent') is not None: thumbor_opener.addheaders.append( ('User-Agent', reqorig.headers.get('User-Agent'))) else: thumbor_opener.addheaders.append(('User-Agent', self.user_agent)) for header_to_pass in [ 'X-Forwarded-For', 'X-Forwarded-Proto', 'Accept', 'Accept-Encoding', 'X-Original-URI' ]: if reqorig.headers.get(header_to_pass) is not None: header = (header_to_pass, reqorig.headers.get(header_to_pass)) thumbor_opener.addheaders.append(header) # At least in theory, we shouldn't be handing out links to originals # that we don't have (or in the case of thumbs, can't generate). # However, someone may have a formerly valid link to a file, so we # should do them the favor of giving them a 404. try: thumbor_encodedurl = self.thumborify_url(reqorig, self.thumborhost) upcopy = thumbor_opener.open(thumbor_encodedurl) except urllib2.HTTPError as error: # Wrap the urllib2 HTTPError into a swob HTTPException status = error.code body = error.fp.read() headers = error.hdrs.items() if status not in swob.RESPONSE_REASONS: # Generic status description in case of unknown status reasons. status = "%s Error" % status return swob.HTTPException(status=status, body=body, headers=headers) except urllib2.URLError as error: msg = 'There was a problem while contacting the thumbnailing service: %s' % \ error.reason return swob.HTTPServiceUnavailable(msg) # We successfully generated a thumbnail on the active DC, send the same request # blindly to the inactive DC to populate Swift there, not waiting for the response inactivedc_encodedurl = self.thumborify_url( reqorig, self.inactivedc_thumborhost) eventlet.spawn(self.inactivedc_request, thumbor_opener, inactivedc_encodedurl) # get the Content-Type. uinfo = upcopy.info() c_t = uinfo.gettype() resp = swob.Response(app_iter=upcopy, content_type=c_t) headers_whitelist = [ 'Content-Length', 'Content-Disposition', 'Last-Modified', 'Accept-Ranges', 'XKey', 'Thumbor-Engine', 'Server', 'Nginx-Request-Date', 'Nginx-Response-Date', 'Thumbor-Processing-Time', 'Thumbor-Processing-Utime', 'Thumbor-Request-Id', 'Thumbor-Request-Date' ] # add in the headers if we've got them for header in headers_whitelist: if (uinfo.getheader(header) != ''): resp.headers[header] = uinfo.getheader(header) # also add CORS; see also our CORS middleware resp.headers['Access-Control-Allow-Origin'] = '*' return resp
def __call__(self, env, start_response): return swob.Response('Fake Test App')(env, start_response)