예제 #1
0
    def __call__(self, req):
        vrs, acc, con, obj = utils.parse_path(req.path)

        if not acc or not constraints.valid_api_version(vrs):
            # could be a GET /info request or something made up by some
            # other middleware; get out of the way.
            return self.app

        try:
            is_bimodal, owner_addrinfo = self._fetch_owning_proxyfs(req, acc)
        except (utils.RpcError, utils.RpcTimeout) as err:
            return swob.HTTPServiceUnavailable(
                request=req,
                headers={'Content-Type': 'text/plain'},
                body=str(err))

        # Other middlewares will find and act on this
        req.environ[utils.ENV_IS_BIMODAL] = is_bimodal
        req.environ[utils.ENV_OWNING_PROXYFS] = owner_addrinfo
        req.environ[utils.ENV_BIMODAL_CHECKER] = self

        return self.app
예제 #2
0
    def __call__(self):
        """
        Processes an SSYNC request.

        Acquires a semaphore lock and then proceeds through the steps
        of the SSYNC process.
        """
        # The general theme for functions __call__ calls is that they should
        # raise exceptions.MessageTimeout for client timeouts (logged locally),
        # swob.HTTPException classes for exceptions to return to the caller but
        # not log locally (unmounted, for example), and any other Exceptions
        # will be logged with a full stack trace.
        #       This is because the client is never just some random user but
        # is instead also our code and we definitely want to know if our code
        # is broken or doing something unexpected.
        try:
            # Double try blocks in case our main error handlers fail.
            try:
                # Need to send something to trigger wsgi to return response
                # headers and kick off the ssync exchange.
                yield '\r\n'
                # If semaphore is in use, try to acquire it, non-blocking, and
                # return a 503 if it fails.
                if self.app.replication_semaphore:
                    if not self.app.replication_semaphore.acquire(False):
                        raise swob.HTTPServiceUnavailable()
                try:
                    with self.diskfile_mgr.replication_lock(self.device):
                        for data in self.missing_check():
                            yield data
                        for data in self.updates():
                            yield data
                    # We didn't raise an exception, so end the request
                    # normally.
                    self.disconnect = False
                finally:
                    if self.app.replication_semaphore:
                        self.app.replication_semaphore.release()
            except exceptions.ReplicationLockTimeout as err:
                self.app.logger.debug('%s/%s/%s SSYNC LOCK TIMEOUT: %s' %
                                      (self.request.remote_addr, self.device,
                                       self.partition, err))
                yield ':ERROR: %d %r\n' % (0, str(err))
            except exceptions.MessageTimeout as err:
                self.app.logger.error(
                    '%s/%s/%s TIMEOUT in ssync.Receiver: %s' %
                    (self.request.remote_addr, self.device, self.partition,
                     err))
                yield ':ERROR: %d %r\n' % (408, str(err))
            except swob.HTTPException as err:
                body = ''.join(err({}, lambda *args: None))
                yield ':ERROR: %d %r\n' % (err.status_int, body)
            except Exception as err:
                self.app.logger.exception(
                    '%s/%s/%s EXCEPTION in ssync.Receiver' %
                    (self.request.remote_addr, self.device, self.partition))
                yield ':ERROR: %d %r\n' % (0, str(err))
        except Exception:
            self.app.logger.exception('EXCEPTION in ssync.Receiver')
        if self.disconnect:
            # This makes the socket close early so the remote side doesn't have
            # to send its whole request while the lower Eventlet-level just
            # reads it and throws it away. Instead, the connection is dropped
            # and the remote side will get a broken-pipe exception.
            try:
                socket = self.request.environ['wsgi.input'].get_socket()
                eventlet.greenio.shutdown_safe(socket)
                socket.close()
            except Exception:
                pass  # We're okay with the above failing.
예제 #3
0
    def handle404(self, reqorig, url, container, obj):
        """
        Return a swob.Response which fetches the thumbnail from the thumb
        host and returns it. Note also that the thumb host might write it out
        to Swift so it won't 404 next time.
        """
        # upload doesn't like our User-agent, otherwise we could call it
        # using urllib2.url()
        thumbor_opener = urllib2.build_opener(DumbRedirectHandler())

        # Pass on certain headers from Varnish to Thumbor
        thumbor_opener.addheaders = []
        if reqorig.headers.get('User-Agent') is not None:
            thumbor_opener.addheaders.append(
                ('User-Agent', reqorig.headers.get('User-Agent')))
        else:
            thumbor_opener.addheaders.append(('User-Agent', self.user_agent))
        for header_to_pass in [
                'X-Forwarded-For', 'X-Forwarded-Proto', 'Accept',
                'Accept-Encoding', 'X-Original-URI'
        ]:
            if reqorig.headers.get(header_to_pass) is not None:
                header = (header_to_pass, reqorig.headers.get(header_to_pass))
                thumbor_opener.addheaders.append(header)

        # At least in theory, we shouldn't be handing out links to originals
        # that we don't have (or in the case of thumbs, can't generate).
        # However, someone may have a formerly valid link to a file, so we
        # should do them the favor of giving them a 404.
        try:
            thumbor_encodedurl = self.thumborify_url(reqorig, self.thumborhost)
            upcopy = thumbor_opener.open(thumbor_encodedurl)
        except urllib2.HTTPError as error:
            # Wrap the urllib2 HTTPError into a swob HTTPException
            status = error.code
            body = error.fp.read()
            headers = error.hdrs.items()
            if status not in swob.RESPONSE_REASONS:
                # Generic status description in case of unknown status reasons.
                status = "%s Error" % status
            return swob.HTTPException(status=status,
                                      body=body,
                                      headers=headers)
        except urllib2.URLError as error:
            msg = 'There was a problem while contacting the thumbnailing service: %s' % \
                  error.reason
            return swob.HTTPServiceUnavailable(msg)

        # We successfully generated a thumbnail on the active DC, send the same request
        # blindly to the inactive DC to populate Swift there, not waiting for the response
        inactivedc_encodedurl = self.thumborify_url(
            reqorig, self.inactivedc_thumborhost)
        eventlet.spawn(self.inactivedc_request, thumbor_opener,
                       inactivedc_encodedurl)

        # get the Content-Type.
        uinfo = upcopy.info()
        c_t = uinfo.gettype()

        resp = swob.Response(app_iter=upcopy, content_type=c_t)

        headers_whitelist = [
            'Content-Length', 'Content-Disposition', 'Last-Modified',
            'Accept-Ranges', 'XKey', 'Thumbor-Engine', 'Server',
            'Nginx-Request-Date', 'Nginx-Response-Date',
            'Thumbor-Processing-Time', 'Thumbor-Processing-Utime',
            'Thumbor-Request-Id', 'Thumbor-Request-Date'
        ]

        # add in the headers if we've got them
        for header in headers_whitelist:
            if (uinfo.getheader(header) != ''):
                resp.headers[header] = uinfo.getheader(header)

        # also add CORS; see also our CORS middleware
        resp.headers['Access-Control-Allow-Origin'] = '*'

        return resp
예제 #4
0
    def _fetch_owning_proxyfs(self, req, account_name):
        """
        Checks to see if an account is bimodal or not, and if so, which proxyfs
        daemon owns it. Performs any necessary DNS resolution on the owner's
        address, raising an error if the owner is an unresolvable hostname.

        Will check a local cache first, falling back to a ProxyFS RPC call
        if necessary.

        Results are cached in memory locally, not memcached. ProxyFS has an
        in-memory list (or whatever data structure it uses) of known
        accounts, so it can answer the RPC request very quickly. Retrieving
        that result from memcached wouldn't be any faster than asking
        ProxyFS.

        :returns: 2-tuple (is-bimodal, proxyfsd-addrinfo).

        :raises utils.NoSuchHostnameError: if owning proxyfsd is
            unresolvable

        :raises utils.RpcTimeout: if proxyfsd is too slow in responding

        :raises utils.RpcError: if proxyfsd's response indicates an error
        """
        cached_result = self._cached_is_bimodal.get(account_name)
        if cached_result:
            res, res_time = cached_result
            if res_time + self.bimodal_recheck_interval >= time.time():
                # cache is populated and fresh; use it
                return res

        # First, ask Swift if the account is bimodal. This lets us keep
        # non-ProxyFS accounts functional during a ProxyFS outage.
        env_copy = req.environ.copy()
        if env_copy["PATH_INFO"].startswith("/proxyfs/"):
            env_copy["PATH_INFO"] = env_copy["PATH_INFO"].replace(
                "/proxyfs/", "/v1/", 1)
        env_copy[utils.ENV_IS_BIMODAL] = False
        account_info = get_account_info(env_copy, self.app, swift_source="PFS")
        if not swift_code.config_true_value(
                account_info["sysmeta"].get(SYSMETA_BIMODAL_INDICATOR)):
            res = (False, None)
            self._cached_is_bimodal[account_name] = (res, time.time())
            return res

        # We know where one proxyfsd is, and they'll all respond identically
        # to the same query.
        iab_req = rpc.is_account_bimodal_request(account_name)
        is_bimodal, proxyfsd_ip_or_hostname = \
            rpc.parse_is_account_bimodal_response(
                self._rpc_call(self.proxyfsd_addrinfos, iab_req))

        if not is_bimodal:
            # Swift account says bimodal, ProxyFS says otherwise.
            raise swob.HTTPServiceUnavailable(
                request=req,
                headers={"Content-Type": "text/plain"},
                body=("The Swift account says it has bimodal access, but "
                      "ProxyFS disagrees. Unable to proceed."))

        if not proxyfsd_ip_or_hostname:
            # When an account is moving between proxyfsd nodes, it's
            # bimodal but nobody owns it. This is usually a
            # very-short-lived temporary condition, so we don't cache
            # this result.
            return (True, None)

        # Just run whatever we got through socket.getaddrinfo(). If we
        # got an IPv4 or IPv6 address, it'll come out the other side
        # unchanged. If we got a hostname, it'll get resolved.
        try:
            # Someday, ProxyFS will probably start giving us port
            # numbers, too. Until then, assume they all use the same
            # port.
            addrinfos = socket.getaddrinfo(proxyfsd_ip_or_hostname,
                                           self.proxyfsd_port,
                                           socket.AF_UNSPEC,
                                           socket.SOCK_STREAM)
        except socket.gaierror:
            raise utils.NoSuchHostnameError(
                "Owning ProxyFS is at %s, but that could not "
                "be resolved to an IP address" % (proxyfsd_ip_or_hostname))

        # socket.getaddrinfo returns things already sorted according
        # to the various rules in RFC 3484, so instead of thinking
        # we're smarter than Python and glibc, we'll just take the
        # first (i.e. best) one.
        #
        # Since we didn't get an exception, we resolved the hostname
        # to *something*, which means there's at least one element
        # in addrinfos.
        res = (True, addrinfos[0])
        self._cached_is_bimodal[account_name] = (res, time.time())
        return res
예제 #5
0
    def GET(self, req):
        """Serves a GET to the middleware."""
        try:
            version, account, path = swift_utils.split_path(
                req.path, 2, 3, True)
        except ValueError:
            return swob.HTTPBadRequest(request=req)

        if path:
            path = utils.unicode_unquote(path).rstrip("/")

        self.logger.debug("Searching")

        # Get all of the request variables that we need.
        fmt = req.params.get('format', '').lower()
        accept_header = req.headers.get('Accept', '').lower()

        # Check for Accept header as well
        if fmt == '' and accept_header != '':
            if 'json' in accept_header:
                fmt = 'json'
            elif 'xml' in accept_header:
                fmt = 'xml'

        queries = []
        for key, value in req.str_params.items():
            if key.startswith('q.'):
                val = value.decode("utf-8").strip('*')
                queries.append((key[2:], val))

        query = req.str_params.get('q')
        if query:
            query = query.decode("utf-8").strip('*')
        limit = int(req.params.get('limit', 0)
                    or req.params.get('rows', 0)) or 100
        start = int(req.params.get('start', 0) or req.params.get('offset', 0))
        sort = req.params.get('sort', None)

        _type = req.params.get('type', None)

        if _type not in ['object', 'container', None, '']:
            return swob.HTTPBadRequest(request=req)

        field = (req.params.get('field', None) or req.params.get('df', None)
                 or '_all')

        marker = req.params.get('marker', None)

        recursive = req.params.get('recursive', True)
        if type(recursive) is not bool:
            if recursive.lower() in ['false', '0', 'f']:
                recursive = False
            else:
                recursive = True

        srch = index.Searcher(self.elastic_hosts,
                              self.search_index_name,
                              account,
                              logger=self.logger)
        srch.logger = self.logger
        if query:
            srch.add_condition(field, query)
        for f, q in queries:
            if f.startswith("meta-"):
                f = "meta." + f[5:]
            srch.add_condition(f, q)
        srch.path = path
        srch.recursive = recursive
        srch.type = _type
        srch.sort = sort
        srch.limit = limit
        srch.start = start
        srch.marker = marker

        try:
            results = srch.execute()
        except socket.timeout:
            return swob.HTTPServiceUnavailable(req=req)

        self.logger.debug(results)

        result_list = []
        for item in results:
            t = index.filter_result_props(item)
            result_list.append(t)

        headers = [
            ('X-Search-Items-Count', len(result_list)),
            ('X-Search-Items-Total', results.total),
            ('X-Search-Items-Offset', start),
        ]

        if fmt == 'json':
            headers.append(('Content-Type', 'application/json; charset=utf-8'))
            return swob.Response(request=req,
                                 body=json.dumps(result_list),
                                 headers=headers)
        elif fmt == 'xml':
            headers.append(('Content-Type', 'application/xml; charset=utf-8'))
            output_list = [
                '<?xml version="1.0" encoding="UTF-8"?>', '<results>'
            ]
            for res in result_list:
                item = '<object>'
                for key, val in res.iteritems():
                    item += '<%s>%s</%s>' % (key, saxutils.escape(
                        str(val)), key)
                item += '</object>'
                output_list.append(item)
            output_list.append('</results>')
            res_body = '\n'.join(output_list)
            return swob.Response(request=req, body=res_body, headers=headers)
        else:
            headers.append(('Content-Type', 'text/plain'))
            res_body = ''
            for res in result_list:
                for key, val in res.iteritems():
                    res_body += str(key) + ': ' + str(val) + '\n'
                res_body += '\n'
            return swob.Response(request=req, body=res_body, headers=headers)
예제 #6
0
    def handle404(self, reqorig, url, container, obj):
        """
        Return a swob.Response which fetches the thumbnail from the thumb
        host and returns it. Note also that the thumb host might write it out
        to Swift so it won't 404 next time.
        """
        # go to the thumb media store for unknown files
        reqorig.host = self.thumbhost
        # upload doesn't like our User-agent, otherwise we could call it
        # using urllib2.url()
        proxy_handler = urllib2.ProxyHandler({'http': self.thumbhost})
        redirect_handler = DumbRedirectHandler()
        opener = urllib2.build_opener(redirect_handler, proxy_handler)
        # Thumbor doesn't need (and doesn't like) the proxy
        thumbor_opener = urllib2.build_opener(redirect_handler)

        # Pass on certain headers from the caller squid to the scalers
        opener.addheaders = []
        if reqorig.headers.get('User-Agent') is not None:
            opener.addheaders.append(
                ('User-Agent', reqorig.headers.get('User-Agent')))
        else:
            opener.addheaders.append(('User-Agent', self.user_agent))
        for header_to_pass in [
                'X-Forwarded-For', 'X-Forwarded-Proto', 'Accept',
                'Accept-Encoding', 'X-Original-URI'
        ]:
            if reqorig.headers.get(header_to_pass) is not None:
                opener.addheaders.append(
                    (header_to_pass, reqorig.headers.get(header_to_pass)))

        thumbor_opener.addheaders = opener.addheaders

        # At least in theory, we shouldn't be handing out links to originals
        # that we don't have (or in the case of thumbs, can't generate).
        # However, someone may have a formerly valid link to a file, so we
        # should do them the favor of giving them a 404.
        try:
            # break apach the url, url-encode it, and put it back together
            urlobj = list(urlparse.urlsplit(reqorig.url))
            # encode the URL but don't encode %s and /s
            urlobj[2] = urllib2.quote(urlobj[2], '%/')
            encodedurl = urlparse.urlunsplit(urlobj)

            # Thumbor never needs URL mangling and it needs a different host
            if self.thumborhost:
                thumbor_reqorig = swob.Request(reqorig.environ.copy())
                thumbor_reqorig.host = self.thumborhost
                thumbor_urlobj = list(urlparse.urlsplit(thumbor_reqorig.url))
                thumbor_urlobj[2] = urllib2.quote(thumbor_urlobj[2], '%/')
                thumbor_encodedurl = urlparse.urlunsplit(thumbor_urlobj)

            # if sitelang, we're supposed to mangle the URL so that
            # http://upload.wm.o/wikipedia/commons/thumb/a/a2/Foo_.jpg/330px-Foo_.jpg
            # changes to
            # http://commons.wp.o/w/thumb_handler.php/a/a2/Foo_.jpg/330px-Foo_.jpg
            if self.backend_url_format == 'sitelang':
                match = re.match(
                    r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)',
                    encodedurl)
                if match:
                    proj = match.group('proj')
                    lang = match.group('lang')
                    # and here are all the legacy special cases, imported from thumb_handler.php
                    if (proj == 'wikipedia'):
                        if (lang in ['meta', 'commons', 'internal', 'grants']):
                            proj = 'wikimedia'
                        if (lang in ['mediawiki']):
                            lang = 'www'
                            proj = 'mediawiki'
                    hostname = '%s.%s.%s' % (lang, proj, self.tld)
                    if (proj == 'wikipedia' and lang == 'sources'):
                        # yay special case
                        hostname = 'wikisource.%s' % self.tld
                    # ok, replace the URL with just the part starting with thumb/
                    # take off the first two parts of the path
                    # (eg /wikipedia/commons/); make sure the string starts
                    # with a /
                    encodedurl = 'http://%s/w/thumb_handler.php/%s' % (
                        hostname, match.group('path'))
                    # add in the X-Original-URI with the swift got (minus the hostname)
                    opener.addheaders.append(
                        ('X-Original-URI',
                         list(urlparse.urlsplit(reqorig.url))[2]))
                else:
                    # ASSERT this code should never be hit since only thumbs
                    # should call the 404 handler
                    self.logger.warn(
                        "non-thumb in 404 handler! encodedurl = %s" %
                        encodedurl)
                    resp = swob.HTTPNotFound('Unexpected error')
                    return resp
            else:
                # log the result of the match here to test and make sure it's
                # sane before enabling the config
                match = re.match(
                    r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)',
                    encodedurl)
                if match:
                    proj = match.group('proj')
                    lang = match.group('lang')
                    self.logger.warn(
                        "sitelang match has proj %s lang %s encodedurl %s" %
                        (proj, lang, encodedurl))
                else:
                    self.logger.warn("no sitelang match on encodedurl: %s" %
                                     encodedurl)

            # To turn thumbor off and have thumbnail traffic served by image scalers,
            # replace the line below with this one:
            # upcopy = opener.open(encodedurl)
            upcopy = thumbor_opener.open(thumbor_encodedurl)
        except urllib2.HTTPError as error:
            # Wrap the urllib2 HTTPError into a swob HTTPException
            status = error.code
            if status not in swob.RESPONSE_REASONS:
                # Generic status description in case of unknown status reasons.
                status = "%s Error" % status
            return swob.HTTPException(status=status,
                                      body=error.msg,
                                      headers=error.hdrs.items())
        except urllib2.URLError as error:
            msg = 'There was a problem while contacting the thumbnailing service: %s' % \
                  error.reason
            return swob.HTTPServiceUnavailable(msg)

        # get the Content-Type.
        uinfo = upcopy.info()
        c_t = uinfo.gettype()

        resp = swob.Response(app_iter=upcopy, content_type=c_t)

        headers_whitelist = [
            'Content-Length', 'Content-Disposition', 'Last-Modified',
            'Accept-Ranges', 'XKey', 'Thumbor-Engine', 'Server',
            'Nginx-Request-Date', 'Nginx-Response-Date',
            'Thumbor-Processing-Time', 'Thumbor-Processing-Utime',
            'Thumbor-Request-Id', 'Thumbor-Request-Date'
        ]

        # add in the headers if we've got them
        for header in headers_whitelist:
            if (uinfo.getheader(header) != ''):
                resp.headers[header] = uinfo.getheader(header)

        # also add CORS; see also our CORS middleware
        resp.headers['Access-Control-Allow-Origin'] = '*'

        return resp