Exemple #1
0
    def handle_request(self, env, start_response):
        req = swob.Request(env)

        inspector = req.headers.get('inspector', '').lower().split()
        if self.hmac_key:
            expires = req.headers.get('inspector_expires', '')
            sig = req.headers.get('inspector_sig', '')
            try:
                if sig == '':
                    raise InspectorError('Missing Header: Inspector-Sig')
                if expires == '':
                    raise InspectorError('Missing Header: Inspector-Expires')
                if int(expires) < int(time.time()):
                    raise InspectorError(
                        'Invalid Header: Inspector-Expires has expired')
                valid_sig = create_sig(inspector, expires, self.hmac_key)
                if sig != valid_sig:
                    raise InspectorError('Invalid Signature')
            except InspectorError as e:
                return self.handle_error(str(e), env, start_response)
            except ValueError:
                return self.handle_error(
                    'Invalid Header: Inspector-Expires must be an integer',
                    env, start_response)

        _start_response = start_response
        inspector_errors = []

        def inspector_start_response(status, headers, exc_info=None):
            if inspector_errors:
                errors = ', '.join(inspector_errors).title()
                headers.append(('Inspector-Error',
                                'Invalid Inspectors: {0}'.format(errors)))
            return _start_response(status, headers, exc_info)

        for i in inspector:
            if i not in inspector_handlers['object']:
                inspector_errors.append(i)
                continue
            _start_response = inspector_handlers['object'][i](
                env, _start_response, self.app, {
                    'swift_dir': self.swift_dir
                })
        return self.app(env, inspector_start_response)
Exemple #2
0
    def __call__(self, env, start_response):
        req = swob.Request(env)

        self._calls.append((
            req.method,
            req.path,
            # mutable dict; keep a copy so subsequent calls can't change it
            swob.HeaderKeyDict(req.headers)))

        if len(self.responses) > 1:
            resp = self.responses.pop(0)
        else:
            resp = self.responses[0]

        status = resp['status']
        headers = resp.get('headers', [])
        body_iter = resp.get('body_iter', [])
        start_response(status, headers)
        return body_iter
Exemple #3
0
    def _handle_request(self, env, start_response):
        req = swob.Request(env)

        # Double (or triple, etc.) slashes in the URL should be ignored;
        # collapse them. fixes T34864
        req.path_info = re.sub(r'/{2,}', '/', req.path_info)

        # Keep a copy of the original request so we can ask the scalers for it
        reqorig = swob.Request(req.environ.copy())

        # Containers have 5 components: project, language, repo, zone, and shard.
        # If there's no zone in the URL, the zone is assumed to be 'public' (for b/c).
        # Shard is optional (and configurable), and is only used for large containers.
        #
        # Projects are wikipedia, wikinews, etc.
        # Languages are en, de, fr, commons, etc.
        # Repos are local, timeline, etc.
        # Zones are public, thumb, temp, etc.
        # Shard is extracted from "hash paths" in the URL and is 2 hex digits.
        #
        # These attributes are mapped to container names in the form of either:
        # (a) proj-lang-repo-zone (if not sharded)
        # (b) proj-lang-repo-zone.shard (if sharded)
        # (c) global-data-repo-zone (if not sharded)
        # (d) global-data-repo-zone.shard (if sharded)
        #
        # Rewrite wiki-global URLs of these forms:
        # (a) http://upload.wikimedia.org/math/<relpath>
        #         => http://msfe/v1/AUTH_<hash>/global-data-math-render/<relpath>
        # (b) http://upload.wikimedia.org/<proj>/<lang>/math/<relpath> (legacy)
        #         => http://msfe/v1/AUTH_<hash>/global-data-math-render/<relpath>
        #
        # Rewrite wiki-relative URLs of these forms:
        # (a) http://upload.wikimedia.org/<proj>/<lang>/<relpath>
        #         => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-public/<relpath>
        # (b) http://upload.wikimedia.org/<proj>/<lang>/archive/<relpath>
        #         => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-public/archive/<relpath>
        # (c) http://upload.wikimedia.org/<proj>/<lang>/thumb/<relpath>
        #         => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-thumb/<relpath>
        # (d) http://upload.wikimedia.org/<proj>/<lang>/thumb/archive/<relpath>
        #         => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-thumb/archive/<relpath>
        # (e) http://upload.wikimedia.org/<proj>/<lang>/thumb/temp/<relpath>
        #         => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-thumb/temp/<relpath>
        # (f) http://upload.wikimedia.org/<proj>/<lang>/transcoded/<relpath>
        #         => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-transcoded/<relpath>
        # (g) http://upload.wikimedia.org/<proj>/<lang>/timeline/<relpath>
        #         => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-timeline-render/<relpath>

        # regular uploads
        match = re.match(
            (r'^/(?P<proj>[^/]+)/(?P<lang>[^/]+)/'
             r'((?P<zone>transcoded|thumb)/)?'
             r'(?P<path>((temp|archive)/)?[0-9a-f]/(?P<shard>[0-9a-f]{2})/.+)$'
             ), req.path)
        if match:
            proj = match.group('proj')
            lang = match.group('lang')
            repo = 'local'  # the upload repo name is "local"
            # Get the repo zone (if not provided that means "public")
            zone = (match.group('zone') if match.group('zone') else 'public')
            # Get the object path relative to the zone (and thus container)
            obj = match.group('path')  # e.g. "archive/a/ab/..."
            shard = match.group('shard')

        # timeline renderings
        if match is None:
            # /wikipedia/en/timeline/a876297c277d80dfd826e1f23dbfea3f.png
            match = re.match(
                r'^/(?P<proj>[^/]+)/(?P<lang>[^/]+)/(?P<repo>timeline)/(?P<path>.+)$',
                req.path)
            if match:
                proj = match.group('proj')  # wikipedia
                lang = match.group('lang')  # en
                repo = match.group('repo')  # timeline
                zone = 'render'
                obj = match.group(
                    'path')  # a876297c277d80dfd826e1f23dbfea3f.png
                shard = ''

        # math renderings
        if match is None:
            # /math/c/9/f/c9f2055dadfb49853eff822a453d9ceb.png
            # /wikipedia/en/math/c/9/f/c9f2055dadfb49853eff822a453d9ceb.png (legacy)
            match = re.match(
                (r'^(/(?P<proj>[^/]+)/(?P<lang>[^/]+))?/(?P<repo>math)/'
                 r'(?P<path>(?P<shard1>[0-9a-f])/(?P<shard2>[0-9a-f])/.+)$'),
                req.path)

            if match:
                proj = 'global'
                lang = 'data'
                repo = match.group('repo')  # math
                zone = 'render'
                obj = match.group(
                    'path')  # c/9/f/c9f2055dadfb49853eff822a453d9ceb.png
                shard = match.group('shard1') + match.group('shard2')  # c9

        # score renderings
        if match is None:
            # /score/j/q/jqn99bwy8777srpv45hxjoiu24f0636/jqn99bwy.png
            # /score/override-midi/8/i/8i9pzt87wtpy45lpz1rox8wusjkt7ki.ogg
            match = re.match(r'^/(?P<repo>score)/(?P<path>.+)$', req.path)
            if match:
                proj = 'global'
                lang = 'data'
                repo = match.group('repo')  # score
                zone = 'render'
                obj = match.group(
                    'path')  # j/q/jqn99bwy8777srpv45hxjoiu24f0636/jqn99bwy.png
                shard = ''

        if match is None:
            match = re.match(r'^/monitoring/(?P<what>.+)$', req.path)
            if match:
                what = match.group('what')
                if what == 'frontend':
                    headers = {'Content-Type': 'application/octet-stream'}
                    resp = swob.Response(headers=headers, body="OK\n")
                elif what == 'backend':
                    req.host = '127.0.0.1:%s' % self.bind_port
                    req.path_info = "/v1/%s/monitoring/backend" % self.account

                    app_iter = self._app_call(env)
                    status = self._get_status_int()
                    headers = self._response_headers

                    resp = swob.Response(status=status,
                                         headers=headers,
                                         app_iter=app_iter)
                else:
                    resp = swob.HTTPNotFound('Monitoring type not found "%s"' %
                                             (req.path))
                return resp(env, start_response)

        if match is None:
            match = re.match(r'^/(?P<path>[^/]+)?$', req.path)
            # /index.html /favicon.ico /robots.txt etc.
            # serve from a default "root" container
            if match:
                path = match.group('path')
                if not path:
                    path = 'index.html'

                req.host = '127.0.0.1:%s' % self.bind_port
                req.path_info = "/v1/%s/root/%s" % (self.account, path)

                app_iter = self._app_call(env)
                status = self._get_status_int()
                headers = self._response_headers

                resp = swob.Response(status=status,
                                     headers=headers,
                                     app_iter=app_iter)
                return resp(env, start_response)

        # Internally rewrite the URL based on the regex it matched...
        if match:
            # Get the per-project "conceptual" container name, e.g. "<proj><lang><repo><zone>"
            container = "%s-%s-%s-%s" % (proj, lang, repo, zone)
            # Add 2-digit shard to the container if it is supposed to be sharded.
            # We may thus have an "actual" container name like "<proj><lang><repo><zone>.<shard>"
            if container in self.shard_container_list:
                container += ".%s" % shard

            # Save a url with just the account name in it.
            req.path_info = "/v1/%s" % (self.account)
            port = self.bind_port
            req.host = '127.0.0.1:%s' % port
            url = req.url[:]
            # Create a path to our object's name.
            req.path_info = "/v1/%s/%s/%s" % (self.account, container,
                                              urllib2.unquote(obj))
            # self.logger.warn("new path is %s" % req.path_info)

            # do_start_response just remembers what it got called with,
            # because our 404 handler will generate a different response.
            app_iter = self._app_call(env)
            status = self._get_status_int()
            headers = self._response_headers

            if status == 404:
                # only send thumbs to the 404 handler; just return a 404 for everything else.
                if repo == 'local' and zone == 'thumb':
                    resp = self.handle404(reqorig, url, container, obj)
                    return resp(env, start_response)
                else:
                    resp = swob.HTTPNotFound('File not found: %s' % req.path)
                    return resp(env, start_response)
            else:
                if zone == 'thumb':
                    for key, value in headers:
                        if key == 'X-Delete-At' and self.thumbnail_update_expiry_headers:
                            # Update expiry header asynchronously
                            eventlet.spawn(self.update_expiry, env)
                            break

                # Return the response verbatim
                return swob.Response(status=status,
                                     headers=headers,
                                     app_iter=app_iter)(env, start_response)
        else:
            resp = swob.HTTPNotFound('Regexp failed to match URI: "%s"' %
                                     (req.path))
            return resp(env, start_response)
Exemple #4
0
    def __call__(self, env, start_response):
        if CONF.s3_acl:
            self._fake_auth_middleware(env)

        req = swob.Request(env)
        method = env['REQUEST_METHOD']
        path = env['PATH_INFO']
        _, acc, cont, obj = split_path(env['PATH_INFO'],
                                       0,
                                       4,
                                       rest_with_last=True)
        if env.get('QUERY_STRING'):
            path += '?' + env['QUERY_STRING']

        if 'swift.authorize' in env:
            resp = env['swift.authorize'](req)
            if resp:
                return resp(env, start_response)

        headers = req.headers
        self._calls.append((method, path, headers))
        self.swift_sources.append(env.get('swift.source'))

        try:
            resp_class, raw_headers, body = self._responses[(method, path)]
            headers = swob.HeaderKeyDict(raw_headers)
        except KeyError:
            # FIXME: suppress print state error for python3 compatibility.
            # pylint: disable-msg=E1601
            if (env.get('QUERY_STRING')
                    and (method, env['PATH_INFO']) in self._responses):
                resp_class, raw_headers, body = self._responses[(
                    method, env['PATH_INFO'])]
                headers = swob.HeaderKeyDict(raw_headers)
            elif method == 'HEAD' and ('GET', path) in self._responses:
                resp_class, raw_headers, _ = self._responses[('GET', path)]
                body = None
                headers = swob.HeaderKeyDict(raw_headers)
            elif method == 'GET' and obj and path in self.uploaded:
                resp_class = swob.HTTPOk
                headers, body = self.uploaded[path]
            else:
                print "Didn't find %r in allowed responses" % (
                    (method, path), )
                raise

        # simulate object PUT
        if method == 'PUT' and obj:
            input = env['wsgi.input'].read()
            etag = md5(input).hexdigest()
            headers.setdefault('Etag', etag)
            headers.setdefault('Content-Length', len(input))

            # keep it for subsequent GET requests later
            self.uploaded[path] = (deepcopy(headers), input)
            if "CONTENT_TYPE" in env:
                self.uploaded[path][0]['Content-Type'] = env["CONTENT_TYPE"]

        # range requests ought to work, but copies are special
        support_range_and_conditional = not (method == 'PUT'
                                             and 'X-Copy-From' in req.headers
                                             and 'Range' in req.headers)
        resp = resp_class(req=req,
                          headers=headers,
                          body=body,
                          conditional_response=support_range_and_conditional)
        return resp(env, start_response)
Exemple #5
0
    def __call__(self, env, start_response):
        method = env['REQUEST_METHOD']
        if method not in self.ALLOWED_METHODS:
            raise HTTPNotImplemented()

        path = env['PATH_INFO']
        _, acc, cont, obj = split_path(env['PATH_INFO'],
                                       0,
                                       4,
                                       rest_with_last=True)
        if env.get('QUERY_STRING'):
            path += '?' + env['QUERY_STRING']

        if 'swift.authorize' in env:
            resp = env['swift.authorize'](swob.Request(env))
            if resp:
                return resp(env, start_response)

        req = swob.Request(env)
        self.swift_sources.append(env.get('swift.source'))
        self.txn_ids.append(env.get('swift.trans_id'))

        try:
            resp_class, raw_headers, body = self._find_response(method, path)
            headers = HeaderKeyDict(raw_headers)
        except KeyError:
            if (env.get('QUERY_STRING')
                    and (method, env['PATH_INFO']) in self._responses):
                resp_class, raw_headers, body = self._find_response(
                    method, env['PATH_INFO'])
                headers = HeaderKeyDict(raw_headers)
            elif method == 'HEAD' and ('GET', path) in self._responses:
                resp_class, raw_headers, body = self._find_response(
                    'GET', path)
                body = None
                headers = HeaderKeyDict(raw_headers)
            elif method == 'GET' and obj and path in self.uploaded:
                resp_class = swob.HTTPOk
                headers, body = self.uploaded[path]
            else:
                raise KeyError("Didn't find %r in allowed responses" %
                               ((method, path), ))

        # simulate object PUT
        if method == 'PUT' and obj:
            put_body = ''.join(iter(env['wsgi.input'].read, ''))
            if 'swift.callback.update_footers' in env:
                footers = HeaderKeyDict()
                env['swift.callback.update_footers'](footers)
                req.headers.update(footers)
            etag = md5(put_body).hexdigest()
            headers.setdefault('Etag', etag)
            headers.setdefault('Content-Length', len(put_body))

            # keep it for subsequent GET requests later
            self.uploaded[path] = (dict(req.headers), put_body)
            if "CONTENT_TYPE" in env:
                self.uploaded[path][0]['Content-Type'] = env["CONTENT_TYPE"]

        # simulate object POST
        elif method == 'POST' and obj:
            metadata, data = self.uploaded.get(path, ({}, None))
            # select items to keep from existing...
            new_metadata = dict((k, v) for k, v in metadata.items()
                                if (not is_user_meta('object', k)
                                    and not is_object_transient_sysmeta(k)))
            # apply from new
            new_metadata.update(
                dict((k, v) for k, v in req.headers.items() if (
                    is_user_meta('object', k) or is_object_transient_sysmeta(k)
                    or k.lower == 'content-type')))
            self.uploaded[path] = new_metadata, data

        # note: tests may assume this copy of req_headers is case insensitive
        # so we deliberately use a HeaderKeyDict
        self._calls.append(
            FakeSwiftCall(method, path, HeaderKeyDict(req.headers)))

        # Apply conditional etag overrides
        conditional_etag = resolve_etag_is_at_header(req, headers)

        # range requests ought to work, hence conditional_response=True
        if isinstance(body, list):
            resp = resp_class(req=req,
                              headers=headers,
                              app_iter=body,
                              conditional_response=req.method
                              in ('GET', 'HEAD'),
                              conditional_etag=conditional_etag)
        else:
            resp = resp_class(req=req,
                              headers=headers,
                              body=body,
                              conditional_response=req.method
                              in ('GET', 'HEAD'),
                              conditional_etag=conditional_etag)
        wsgi_iter = resp(env, start_response)
        self.mark_opened(path)
        return LeakTrackingIter(wsgi_iter, self.mark_closed, path)
Exemple #6
0
def proxy_wrapper(env, start_response, app, config):
    swift_dir = config.get('swift_dir')
    request = swob.Request(env)

    def _start_response(status, headers, exc_info=None):
        """start_response wrapper to add request status to env."""
        try:
            version, account, container, obj = request.split_path(
                2, 4, rest_with_last=True)
        except ValueError:
            headers.append(('Inspector-Nodes', ''))
            return start_response(status, headers, exc_info)

        if account is not None:
            account = urllib.unquote(account)
        if container is not None:
            container = urllib.unquote(container)
        if obj is not None:
            obj = urllib.unquote(obj)

        storage_policy_index = None
        if obj is not None:
            container_info = controllers.get_container_info(
                {
                    'PATH_INFO': '/{0}/{1}/{2}'.format(version, account,
                                                       container)
                },
                app,
                swift_source='LE')
            storage_policy_index = container_info['storage_policy']
            obj_ring = storage_policy.POLICIES.get_object_ring(
                storage_policy_index, swift_dir)
            partition, nodes = obj_ring.get_nodes(account, container, obj)
            more_nodes = obj_ring.get_more_nodes(partition)
            nodes_template = ('http://{ip}:{port}/{device}/{partition}')
        elif container is not None:
            partition, nodes = ring.Ring(swift_dir,
                                         ring_name='container').get_nodes(
                                             account, container)
            more_nodes = ring.Ring(
                swift_dir, ring_name='container').get_more_nodes(partition)
            nodes_template = ('http://{ip}:{port}/{device}/{partition}')
        else:
            partition, nodes = ring.Ring(
                swift_dir, ring_name='account').get_nodes(account)
            more_nodes = ring.Ring(
                swift_dir, ring_name='account').get_more_nodes(partition)
            nodes_template = ('http://{ip}:{port}/{device}/{partition}')

        object_nodes = []
        for node in nodes:
            object_nodes.append(
                nodes_template.format(ip=node['ip'],
                                      port=node['port'],
                                      device=node['device'],
                                      partition=partition,
                                      account=urllib.quote(account),
                                      container=urllib.quote(container or ''),
                                      obj=urllib.quote(obj or '')))
        headers.append(('Inspector-Nodes', ', '.join(object_nodes)))

        object_more_nodes = []
        for node in more_nodes:
            object_more_nodes.append(
                nodes_template.format(ip=node['ip'],
                                      port=node['port'],
                                      device=node['device'],
                                      partition=partition,
                                      account=urllib.quote(account),
                                      container=urllib.quote(container or ''),
                                      obj=urllib.quote(obj or '')))
        headers.append(('Inspector-More-Nodes', ', '.join(object_more_nodes)))

        return start_response(status, headers, exc_info)

    return _start_response
Exemple #7
0
    def __call__(self, env, start_response):
        method = env['REQUEST_METHOD']
        path = env['PATH_INFO']
        _, acc, cont, obj = split_path(env['PATH_INFO'],
                                       0,
                                       4,
                                       rest_with_last=True)
        if env.get('QUERY_STRING'):
            path += '?' + env['QUERY_STRING']

        if 'swift.authorize' in env:
            resp = env['swift.authorize']()
            if resp:
                return resp(env, start_response)

        req_headers = swob.Request(env).headers
        self.swift_sources.append(env.get('swift.source'))

        try:
            resp_class, raw_headers, body = self._get_response(method, path)
            headers = swob.HeaderKeyDict(raw_headers)
        except KeyError:
            if (env.get('QUERY_STRING')
                    and (method, env['PATH_INFO']) in self._responses):
                resp_class, raw_headers, body = self._get_response(
                    method, env['PATH_INFO'])
                headers = swob.HeaderKeyDict(raw_headers)
            elif method == 'HEAD' and ('GET', path) in self._responses:
                resp_class, raw_headers, body = self._get_response('GET', path)
                body = None
                headers = swob.HeaderKeyDict(raw_headers)
            elif method == 'GET' and obj and path in self.uploaded:
                resp_class = swob.HTTPOk
                headers, body = self.uploaded[path]
            else:
                raise KeyError("Didn't find %r in allowed responses" %
                               ((method, path), ))

        self._calls.append((method, path, req_headers))

        # simulate object PUT
        if method == 'PUT' and obj:
            input = env['wsgi.input'].read()
            etag = md5(input).hexdigest()
            headers.setdefault('Etag', etag)
            headers.setdefault('Content-Length', len(input))

            # keep it for subsequent GET requests later
            self.uploaded[path] = (deepcopy(headers), input)
            if "CONTENT_TYPE" in env:
                self.uploaded[path][0]['Content-Type'] = env["CONTENT_TYPE"]

        # range requests ought to work, hence conditional_response=True
        req = swob.Request(env)
        resp = resp_class(req=req,
                          headers=headers,
                          body=body,
                          conditional_response=True)
        wsgi_iter = resp(env, start_response)
        self.mark_opened(path)
        return LeakTrackingIter(wsgi_iter, self, path)
Exemple #8
0
    def __call__(self, env, start_response):
        if time() > self._rtime:
            self._reload()

        req = swob.Request(env)
        try:
            vers, acct, cont, obj = req.split_path(2, 4, True)
        except ValueError:
            return self.app(env, start_response)

        if req.headers.get(SHUNT_BYPASS_HEADER, ''):
            self.logger.debug('Bypassing shunt (%s header) for %r',
                              SHUNT_BYPASS_HEADER, req.path_info)
            return self.app(env, start_response)

        if not constraints.valid_api_version(vers):
            return self.app(env, start_response)

        if not cont:
            sync_profile = self.sync_profiles.get((acct, '/*'))
            if req.method == 'GET' and sync_profile and\
                    sync_profile.get('migration'):
                # TODO: make the container an optional parameter
                profile, _ = maybe_munge_profile_for_all_containers(
                    sync_profile, '.stub-container')
                return self.handle_account(req, start_response, profile, acct)

            return self.app(env, start_response)

        sync_profile = next(
            (self.sync_profiles[(acct, c)]
             for c in (cont, '/*') if (acct, c) in self.sync_profiles), None)
        if sync_profile is None:
            return self.app(env, start_response)
        sync_profile, per_account = maybe_munge_profile_for_all_containers(
            sync_profile, cont)

        if req.method == 'DELETE' and sync_profile.get('migration'):
            return self.handle_delete(req, start_response, sync_profile, obj,
                                      per_account)

        if not obj:
            if req.method == 'GET':
                return self.handle_listing(req, start_response, sync_profile,
                                           cont, per_account)
            if req.method == 'HEAD' and sync_profile.get('migration'):
                return self.handle_container_head(req, start_response,
                                                  sync_profile, cont,
                                                  per_account)
        if obj and req.method in ('GET', 'HEAD'):
            # TODO: think about what to do for POST, COPY
            return self.handle_object(req, start_response, sync_profile, obj,
                                      per_account)
        if req.method == 'POST' and sync_profile.get('migration'):
            return self.handle_post(req, start_response, sync_profile, obj,
                                    per_account)

        if obj and req.method == 'PUT' and sync_profile.get('migration'):
            return self.handle_object_put(req, start_response, sync_profile,
                                          per_account)

        return self.app(env, start_response)
Exemple #9
0
    def handle404(self, reqorig, url, container, obj):
        """
        Return a swob.Response which fetches the thumbnail from the thumb
        host and returns it. Note also that the thumb host might write it out
        to Swift so it won't 404 next time.
        """
        # go to the thumb media store for unknown files
        reqorig.host = self.thumbhost
        # upload doesn't like our User-agent, otherwise we could call it
        # using urllib2.url()
        proxy_handler = urllib2.ProxyHandler({'http': self.thumbhost})
        redirect_handler = DumbRedirectHandler()
        opener = urllib2.build_opener(redirect_handler, proxy_handler)
        # Thumbor doesn't need (and doesn't like) the proxy
        thumbor_opener = urllib2.build_opener(redirect_handler)

        # Pass on certain headers from the caller squid to the scalers
        opener.addheaders = []
        if reqorig.headers.get('User-Agent') is not None:
            opener.addheaders.append(
                ('User-Agent', reqorig.headers.get('User-Agent')))
        else:
            opener.addheaders.append(('User-Agent', self.user_agent))
        for header_to_pass in [
                'X-Forwarded-For', 'X-Forwarded-Proto', 'Accept',
                'Accept-Encoding', 'X-Original-URI'
        ]:
            if reqorig.headers.get(header_to_pass) is not None:
                opener.addheaders.append(
                    (header_to_pass, reqorig.headers.get(header_to_pass)))

        thumbor_opener.addheaders = opener.addheaders

        # At least in theory, we shouldn't be handing out links to originals
        # that we don't have (or in the case of thumbs, can't generate).
        # However, someone may have a formerly valid link to a file, so we
        # should do them the favor of giving them a 404.
        try:
            # break apach the url, url-encode it, and put it back together
            urlobj = list(urlparse.urlsplit(reqorig.url))
            # encode the URL but don't encode %s and /s
            urlobj[2] = urllib2.quote(urlobj[2], '%/')
            encodedurl = urlparse.urlunsplit(urlobj)

            # Thumbor never needs URL mangling and it needs a different host
            if self.thumborhost:
                thumbor_reqorig = swob.Request(reqorig.environ.copy())
                thumbor_reqorig.host = self.thumborhost
                thumbor_urlobj = list(urlparse.urlsplit(thumbor_reqorig.url))
                thumbor_urlobj[2] = urllib2.quote(thumbor_urlobj[2], '%/')
                thumbor_encodedurl = urlparse.urlunsplit(thumbor_urlobj)

            # if sitelang, we're supposed to mangle the URL so that
            # http://upload.wm.o/wikipedia/commons/thumb/a/a2/Foo_.jpg/330px-Foo_.jpg
            # changes to
            # http://commons.wp.o/w/thumb_handler.php/a/a2/Foo_.jpg/330px-Foo_.jpg
            if self.backend_url_format == 'sitelang':
                match = re.match(
                    r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)',
                    encodedurl)
                if match:
                    proj = match.group('proj')
                    lang = match.group('lang')
                    # and here are all the legacy special cases, imported from thumb_handler.php
                    if (proj == 'wikipedia'):
                        if (lang in ['meta', 'commons', 'internal', 'grants']):
                            proj = 'wikimedia'
                        if (lang in ['mediawiki']):
                            lang = 'www'
                            proj = 'mediawiki'
                    hostname = '%s.%s.%s' % (lang, proj, self.tld)
                    if (proj == 'wikipedia' and lang == 'sources'):
                        # yay special case
                        hostname = 'wikisource.%s' % self.tld
                    # ok, replace the URL with just the part starting with thumb/
                    # take off the first two parts of the path
                    # (eg /wikipedia/commons/); make sure the string starts
                    # with a /
                    encodedurl = 'http://%s/w/thumb_handler.php/%s' % (
                        hostname, match.group('path'))
                    # add in the X-Original-URI with the swift got (minus the hostname)
                    opener.addheaders.append(
                        ('X-Original-URI',
                         list(urlparse.urlsplit(reqorig.url))[2]))
                else:
                    # ASSERT this code should never be hit since only thumbs
                    # should call the 404 handler
                    self.logger.warn(
                        "non-thumb in 404 handler! encodedurl = %s" %
                        encodedurl)
                    resp = swob.HTTPNotFound('Unexpected error')
                    return resp
            else:
                # log the result of the match here to test and make sure it's
                # sane before enabling the config
                match = re.match(
                    r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)',
                    encodedurl)
                if match:
                    proj = match.group('proj')
                    lang = match.group('lang')
                    self.logger.warn(
                        "sitelang match has proj %s lang %s encodedurl %s" %
                        (proj, lang, encodedurl))
                else:
                    self.logger.warn("no sitelang match on encodedurl: %s" %
                                     encodedurl)

            # To turn thumbor off and have thumbnail traffic served by image scalers,
            # replace the line below with this one:
            # upcopy = opener.open(encodedurl)
            upcopy = thumbor_opener.open(thumbor_encodedurl)
        except urllib2.HTTPError as error:
            # Wrap the urllib2 HTTPError into a swob HTTPException
            status = error.code
            if status not in swob.RESPONSE_REASONS:
                # Generic status description in case of unknown status reasons.
                status = "%s Error" % status
            return swob.HTTPException(status=status,
                                      body=error.msg,
                                      headers=error.hdrs.items())
        except urllib2.URLError as error:
            msg = 'There was a problem while contacting the thumbnailing service: %s' % \
                  error.reason
            return swob.HTTPServiceUnavailable(msg)

        # get the Content-Type.
        uinfo = upcopy.info()
        c_t = uinfo.gettype()

        resp = swob.Response(app_iter=upcopy, content_type=c_t)

        headers_whitelist = [
            'Content-Length', 'Content-Disposition', 'Last-Modified',
            'Accept-Ranges', 'XKey', 'Thumbor-Engine', 'Server',
            'Nginx-Request-Date', 'Nginx-Response-Date',
            'Thumbor-Processing-Time', 'Thumbor-Processing-Utime',
            'Thumbor-Request-Id', 'Thumbor-Request-Date'
        ]

        # add in the headers if we've got them
        for header in headers_whitelist:
            if (uinfo.getheader(header) != ''):
                resp.headers[header] = uinfo.getheader(header)

        # also add CORS; see also our CORS middleware
        resp.headers['Access-Control-Allow-Origin'] = '*'

        return resp