Beispiel #1
0
    def get_sig(self, request_method, path, x_timestamp, nonce, realm_key,
                user_key):
        """
        Returns the hexdigest string of the HMAC-SHA1 (RFC 2104) for
        the information given.

        :param request_method: HTTP method of the request.
        :param path: The path to the resource (url-encoded).
        :param x_timestamp: The X-Timestamp header value for the request.
        :param nonce: A unique value for the request.
        :param realm_key: Shared secret at the cluster operator level.
        :param user_key: Shared secret at the user's container level.
        :returns: hexdigest str of the HMAC-SHA1 for the request.
        """
        nonce = get_valid_utf8_str(nonce)
        realm_key = get_valid_utf8_str(realm_key)
        user_key = get_valid_utf8_str(user_key)
        # XXX We don't know what is the best here yet; wait for container
        # sync to be tested.
        if isinstance(path, six.text_type):
            path = path.encode('utf-8')
        return hmac.new(
            realm_key,
            b'%s\n%s\n%s\n%s\n%s' % (
                request_method.encode('ascii'), path,
                x_timestamp.encode('ascii'), nonce, user_key),
            hashlib.sha1).hexdigest()
Beispiel #2
0
    def get_sig(self, request_method, path, x_timestamp, nonce, realm_key,
                user_key):
        """
        Returns the hexdigest string of the HMAC-SHA1 (RFC 2104) for
        the information given.

        :param request_method: HTTP method of the request.
        :param path: The path to the resource (url-encoded).
        :param x_timestamp: The X-Timestamp header value for the request.
        :param nonce: A unique value for the request.
        :param realm_key: Shared secret at the cluster operator level.
        :param user_key: Shared secret at the user's container level.
        :returns: hexdigest str of the HMAC-SHA1 for the request.
        """
        nonce = get_valid_utf8_str(nonce)
        realm_key = get_valid_utf8_str(realm_key)
        user_key = get_valid_utf8_str(user_key)
        # XXX We don't know what is the best here yet; wait for container
        # sync to be tested.
        if isinstance(path, six.text_type):
            path = path.encode('utf-8')
        return hmac.new(
            realm_key, b'%s\n%s\n%s\n%s\n%s' %
            (request_method.encode('ascii'), path, x_timestamp.encode('ascii'),
             nonce, user_key), hashlib.sha1).hexdigest()
Beispiel #3
0
 def test_get_valid_utf8_str(self):
     unicode_sample = u'\uc77c\uc601'
     valid_utf8_str = unicode_sample.encode('utf-8')
     invalid_utf8_str = unicode_sample.encode('utf-8')[::-1]
     self.assertEquals(valid_utf8_str,
                       utils.get_valid_utf8_str(valid_utf8_str))
     self.assertEquals(valid_utf8_str,
                       utils.get_valid_utf8_str(unicode_sample))
     self.assertEquals('\xef\xbf\xbd\xef\xbf\xbd\xec\xbc\x9d\xef\xbf\xbd',
                       utils.get_valid_utf8_str(invalid_utf8_str))
Beispiel #4
0
    def _byterange_listing_iterator(self, req, version, account, segments,
                                    first_byte, last_byte,
                                    cached_fetch_sub_slo_segments,
                                    recursion_depth=1):
        last_sub_path = None
        for seg_dict in segments:
            seg_length = self._segment_length(seg_dict)
            if first_byte >= seg_length:
                # don't need any bytes from this segment
                first_byte -= seg_length
                last_byte -= seg_length
                continue

            if last_byte < 0:
                # no bytes are needed from this or any future segment
                return

            seg_range = seg_dict.get('range')
            if seg_range is None:
                range_start, range_end = 0, seg_length - 1
            else:
                # We already validated and supplied concrete values
                # for the range on upload
                range_start, range_end = map(int, seg_range.split('-'))

            if config_true_value(seg_dict.get('sub_slo')):
                # do this check here so that we can avoid fetching this last
                # manifest before raising the exception
                if recursion_depth >= self.max_slo_recursion_depth:
                    raise ListingIterError("Max recursion depth exceeded")

                sub_path = get_valid_utf8_str(seg_dict['name'])
                sub_cont, sub_obj = split_path(sub_path, 2, 2, True)
                if last_sub_path != sub_path:
                    sub_segments = cached_fetch_sub_slo_segments(
                        req, version, account, sub_cont, sub_obj)
                last_sub_path = sub_path

                # Use the existing machinery to slice into the sub-SLO.
                for sub_seg_dict, sb, eb in self._byterange_listing_iterator(
                        req, version, account, sub_segments,
                        # This adjusts first_byte and last_byte to be
                        # relative to the sub-SLO.
                        range_start + max(0, first_byte),
                        min(range_end, range_start + last_byte),

                        cached_fetch_sub_slo_segments,
                        recursion_depth=recursion_depth + 1):
                    yield sub_seg_dict, sb, eb
            else:
                if isinstance(seg_dict['name'], six.text_type):
                    seg_dict['name'] = seg_dict['name'].encode("utf-8")
                yield (seg_dict,
                       max(0, first_byte) + range_start,
                       min(range_end, range_start + last_byte))

            first_byte -= seg_length
            last_byte -= seg_length
Beispiel #5
0
    def _segment_listing_iterator(self, req, version, account, segments,
                                  recursion_depth=1):
        for seg_dict in segments:
            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(seg_dict,
                                                 logger=self.slo.logger)

        # We handle the range stuff here so that we can be smart about
        # skipping unused submanifests. For example, if our first segment is a
        # submanifest referencing 50 MiB total, but self.first_byte falls in
        # the 51st MiB, then we can avoid fetching the first submanifest.
        #
        # If we were to make SegmentedIterable handle all the range
        # calculations, we would be unable to make this optimization.
        total_length = sum(int(seg['bytes']) for seg in segments)
        if self.first_byte is None:
            self.first_byte = 0
        if self.last_byte is None:
            self.last_byte = total_length - 1

        for seg_dict in segments:
            seg_length = int(seg_dict['bytes'])

            if self.first_byte >= seg_length:
                # don't need any bytes from this segment
                self.first_byte = max(self.first_byte - seg_length, -1)
                self.last_byte = max(self.last_byte - seg_length, -1)
                continue

            if self.last_byte < 0:
                # no bytes are needed from this or any future segment
                break

            if config_true_value(seg_dict.get('sub_slo')):
                # do this check here so that we can avoid fetching this last
                # manifest before raising the exception
                if recursion_depth >= self.max_slo_recursion_depth:
                    raise ListingIterError("Max recursion depth exceeded")

                sub_path = get_valid_utf8_str(seg_dict['name'])
                sub_cont, sub_obj = split_path(sub_path, 2, 2, True)
                sub_segments = self._fetch_sub_slo_segments(
                    req, version, account, sub_cont, sub_obj)
                for sub_seg_dict, sb, eb in self._segment_listing_iterator(
                        req, version, account, sub_segments,
                        recursion_depth=recursion_depth + 1):
                    yield sub_seg_dict, sb, eb
            else:
                if isinstance(seg_dict['name'], unicode):
                    seg_dict['name'] = seg_dict['name'].encode("utf-8")
                seg_length = int(seg_dict['bytes'])
                yield (seg_dict,
                       (None if self.first_byte <= 0 else self.first_byte),
                       (None if self.last_byte >=
                        seg_length - 1 else self.last_byte))
                self.first_byte = max(self.first_byte - seg_length, -1)
                self.last_byte = max(self.last_byte - seg_length, -1)
Beispiel #6
0
        def do_head(obj_name):
            obj_path = '/'.join(['', vrs, account,
                                 get_valid_utf8_str(obj_name).lstrip('/')])

            sub_req = make_subrequest(
                req.environ, path=obj_path + '?',  # kill the query string
                method='HEAD',
                headers={'x-auth-token': req.headers.get('x-auth-token')},
                agent='%(orig)s SLO MultipartPUT', swift_source='SLO')
            return obj_name, sub_req.get_response(self)
Beispiel #7
0
    def log_request(self, req, status_int, bytes_received, bytes_sent,
                    request_time):
        """
        Log a request.

        :param req: swob.Request object for the request
        :param status_int: integer code for the response status
        :param bytes_received: bytes successfully read from the request body
        :param bytes_sent: bytes yielded to the WSGI server
        :param request_time: time taken to satisfy the request, in seconds
        """
        if self.req_already_logged(req):
            return
        req_path = get_valid_utf8_str(req.path)
        the_request = quote(unquote(req_path), QUOTE_SAFE)
        if req.query_string:
            the_request = the_request + '?' + req.query_string
        logged_headers = None
        if self.log_hdrs:
            logged_headers = '\n'.join('%s: %s' % (k, v)
                                       for k, v in req.headers.items())
        method = self.method_from_req(req)
        self.access_logger.info(' '.join(
            quote(str(x) if x else '-', QUOTE_SAFE)
            for x in (
                get_remote_client(req),
                req.remote_addr,
                time.strftime('%d/%b/%Y/%H/%M/%S', time.gmtime()),
                method,
                the_request,
                req.environ.get('SERVER_PROTOCOL'),
                status_int,
                req.referer,
                req.user_agent,
                self.obscure_sensitive(req.headers.get('x-auth-token')),
                bytes_received,
                bytes_sent,
                req.headers.get('etag', None),
                req.environ.get('swift.trans_id'),
                logged_headers,
                '%.4f' % request_time,
                req.environ.get('swift.source'),
                ','.join(req.environ.get('swift.log_info') or '-'),
            )))
        self.mark_req_logged(req)
        # Log timing and bytes-transfered data to StatsD
        metric_name = self.statsd_metric_name(req, status_int, method)
        # Only log data for valid controllers (or SOS) to keep the metric count
        # down (egregious errors will get logged by the proxy server itself).
        if metric_name:
            self.access_logger.timing(metric_name + '.timing',
                                      request_time * 1000)
            self.access_logger.update_stats(metric_name + '.xfer',
                                            bytes_received + bytes_sent)
Beispiel #8
0
    def log_request(self, req, status_int, bytes_received, bytes_sent,
                    request_time):
        """
        Log a request.

        :param req: swob.Request object for the request
        :param status_int: integer code for the response status
        :param bytes_received: bytes successfully read from the request body
        :param bytes_sent: bytes yielded to the WSGI server
        :param request_time: time taken to satisfy the request, in seconds
        """
        if self.req_already_logged(req):
            return
        req_path = get_valid_utf8_str(req.path)
        the_request = quote(unquote(req_path), QUOTE_SAFE)
        if req.query_string:
            the_request = the_request + '?' + req.query_string
        logged_headers = None
        if self.log_hdrs:
            logged_headers = '\n'.join('%s: %s' % (k, v)
                                       for k, v in req.headers.items())
        method = self.method_from_req(req)
        self.access_logger.info(' '.join(
            quote(str(x) if x else '-', QUOTE_SAFE)
            for x in (
                get_remote_client(req),
                req.remote_addr,
                time.strftime('%d/%b/%Y/%H/%M/%S', time.gmtime()),
                method,
                the_request,
                req.environ.get('SERVER_PROTOCOL'),
                status_int,
                req.referer,
                req.user_agent,
                self.obscure_sensitive(req.headers.get('x-auth-token')),
                bytes_received,
                bytes_sent,
                req.headers.get('etag', None),
                req.environ.get('swift.trans_id'),
                logged_headers,
                '%.4f' % request_time,
                req.environ.get('swift.source'),
                ','.join(req.environ.get('swift.log_info') or ''),
            )))
        self.mark_req_logged(req)
        # Log timing and bytes-transfered data to StatsD
        metric_name = self.statsd_metric_name(req, status_int, method)
        # Only log data for valid controllers (or SOS) to keep the metric count
        # down (egregious errors will get logged by the proxy server itself).
        if metric_name:
            self.access_logger.timing(metric_name + '.timing',
                                      request_time * 1000)
            self.access_logger.update_stats(metric_name + '.xfer',
                                            bytes_received + bytes_sent)
    def get_sig(self, request_method, path, x_timestamp, nonce, realm_key,
                user_key):
        """
        Returns the hexdigest string of the HMAC-SHA1 (RFC 2104) for
        the information given.

        :param request_method: HTTP method of the request.
        :param path: The path to the resource.
        :param x_timestamp: The X-Timestamp header value for the request.
        :param nonce: A unique value for the request.
        :param realm_key: Shared secret at the cluster operator level.
        :param user_key: Shared secret at the user's container level.
        :returns: hexdigest str of the HMAC-SHA1 for the request.
        """
        nonce = get_valid_utf8_str(nonce)
        realm_key = get_valid_utf8_str(realm_key)
        user_key = get_valid_utf8_str(user_key)
        return hmac.new(
            realm_key, '%s\n%s\n%s\n%s\n%s' %
            (request_method, path, x_timestamp, nonce, user_key),
            hashlib.sha1).hexdigest()
Beispiel #10
0
    def get_sig(self, request_method, path, x_timestamp, nonce, realm_key,
                user_key):
        """
        Returns the hexdigest string of the HMAC-SHA1 (RFC 2104) for
        the information given.

        :param request_method: HTTP method of the request.
        :param path: The path to the resource.
        :param x_timestamp: The X-Timestamp header value for the request.
        :param nonce: A unique value for the request.
        :param realm_key: Shared secret at the cluster operator level.
        :param user_key: Shared secret at the user's container level.
        :returns: hexdigest str of the HMAC-SHA1 for the request.
        """
        nonce = get_valid_utf8_str(nonce)
        realm_key = get_valid_utf8_str(realm_key)
        user_key = get_valid_utf8_str(user_key)
        return hmac.new(
            realm_key,
            '%s\n%s\n%s\n%s\n%s' % (
                request_method, path, x_timestamp, nonce, user_key),
            hashlib.sha1).hexdigest()
Beispiel #11
0
        def do_head(obj_name):
            obj_path = '/'.join(
                ['', vrs, account,
                 get_valid_utf8_str(obj_name).lstrip('/')])

            sub_req = make_subrequest(
                req.environ,
                path=obj_path + '?',  # kill the query string
                method='HEAD',
                headers={'x-auth-token': req.headers.get('x-auth-token')},
                agent='%(orig)s SLO MultipartPUT',
                swift_source='SLO')
            return obj_name, sub_req.get_response(self)
Beispiel #12
0
def get_tempurl_keys_from_metadata(meta):
    """
    Extracts the tempurl keys from metadata.

    :param meta: account metadata
    :returns: list of keys found (possibly empty if no keys set)

    Example:
      meta = get_account_info(...)['meta']
      keys = get_tempurl_keys_from_metadata(meta)
    """
    return [get_valid_utf8_str(value) for key, value in meta.items()
            if key.lower() in ('temp-url-key', 'temp-url-key-2')]
Beispiel #13
0
    def log_request(self, env, status_int, bytes_received, bytes_sent, request_time, client_disconnect):
        """
        Log a request.

        :param env: WSGI environment
        :param status_int: integer code for the response status
        :param bytes_received: bytes successfully read from the request body
        :param bytes_sent: bytes yielded to the WSGI server
        :param request_time: time taken to satisfy the request, in seconds
        """
        req = Request(env)
        if client_disconnect:  # log disconnected clients as '499' status code
            status_int = 499
        req_path = get_valid_utf8_str(req.path)
        the_request = quote(unquote(req_path))
        if req.query_string:
            the_request = the_request + "?" + req.query_string
        logged_headers = None
        if self.log_hdrs:
            logged_headers = "\n".join("%s: %s" % (k, v) for k, v in req.headers.items())
        self.access_logger.info(
            " ".join(
                quote(str(x) if x else "-")
                for x in (
                    get_remote_client(req),
                    req.remote_addr,
                    time.strftime("%d/%b/%Y/%H/%M/%S", time.gmtime()),
                    req.method,
                    the_request,
                    req.environ.get("SERVER_PROTOCOL"),
                    status_int,
                    req.referer,
                    req.user_agent,
                    req.headers.get("x-auth-token"),
                    bytes_received,
                    bytes_sent,
                    req.headers.get("etag", None),
                    req.environ.get("swift.trans_id"),
                    logged_headers,
                    "%.4f" % request_time,
                    req.environ.get("swift.source"),
                )
            )
        )
        self.access_logger.txn_id = None
Beispiel #14
0
    def __call__(self, env, start_response):
        req = Request(env)
        if env['REQUEST_METHOD'] not in ['GET']:
            return self.app(env, start_response)

        version, account, container, obj = split_path(
            env['PATH_INFO'], 1, 4, True)
        try:
            resp = self._app_call(env)
        except Exception:
            resp = HTTPServerError(request=req, body="error")
            return resp(env, start_response)
        status = int(self._response_status.split()[0])

        if status < 200 or status > 300:
            start_response(self._response_status, self._response_headers,
                           self._response_exc_info)
            return resp
        SLOmd5 = ''
        if req.params.get('multipart-manifest') == 'get':
            if req.params.get('format') == 'raw':
                resp = self.convert_segment_listing(
                    self._response_headers, resp)
            else:
                h = hashlib.md5()
                segments = self._get_manifest_read(resp)
                for seg_dict in segments:
                    if 'data' in seg_dict:
                        continue
                    sub_path = get_valid_utf8_str(seg_dict['name'])
                    sub_cont, sub_obj = split_path(sub_path, 2, 2, True)
                    h.update(self._fetch_sub_slo_segments(req, version,
                             account, sub_cont, sub_obj))
                SLOmd5 = h.hexdigest()
        self._post_slomd5_header(env, SLOmd5)
        return self.app(env, start_response)
Beispiel #15
0
    def _segment_listing_iterator(self, req, version, account, segments,
                                  recursion_depth=1):
        for seg_dict in segments:
            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(seg_dict,
                                                 logger=self.slo.logger)

        # We handle the range stuff here so that we can be smart about
        # skipping unused submanifests. For example, if our first segment is a
        # submanifest referencing 50 MiB total, but start_byte falls in
        # the 51st MiB, then we can avoid fetching the first submanifest.
        #
        # If we were to make SegmentedIterable handle all the range
        # calculations, we would be unable to make this optimization.
        total_length = sum(self._segment_length(seg) for seg in segments)
        if self.first_byte is None:
            self.first_byte = 0
        if self.last_byte is None:
            self.last_byte = total_length - 1

        last_sub_path = None
        for seg_dict in segments:
            seg_length = self._segment_length(seg_dict)
            if self.first_byte >= seg_length:
                # don't need any bytes from this segment
                self.first_byte -= seg_length
                self.last_byte -= seg_length
                continue

            if self.last_byte < 0:
                # no bytes are needed from this or any future segment
                break

            range = seg_dict.get('range')
            if range is None:
                range_start, range_end = 0, seg_length - 1
            else:
                # We already validated and supplied concrete values
                # for the range on upload
                range_start, range_end = map(int, range.split('-'))

            if config_true_value(seg_dict.get('sub_slo')):
                # do this check here so that we can avoid fetching this last
                # manifest before raising the exception
                if recursion_depth >= self.max_slo_recursion_depth:
                    raise ListingIterError("Max recursion depth exceeded")

                sub_path = get_valid_utf8_str(seg_dict['name'])
                sub_cont, sub_obj = split_path(sub_path, 2, 2, True)
                if last_sub_path != sub_path:
                    sub_segments = self._fetch_sub_slo_segments(
                        req, version, account, sub_cont, sub_obj)
                last_sub_path = sub_path

                # Use the existing machinery to slice into the sub-SLO.
                # This requires that we save off our current state, and
                # restore at the other end.
                orig_start, orig_end = self.first_byte, self.last_byte
                self.first_byte = range_start + max(0, self.first_byte)
                self.last_byte = min(range_end, range_start + self.last_byte)

                for sub_seg_dict, sb, eb in self._segment_listing_iterator(
                        req, version, account, sub_segments,
                        recursion_depth=recursion_depth + 1):
                    yield sub_seg_dict, sb, eb

                # Restore the first/last state
                self.first_byte, self.last_byte = orig_start, orig_end
            else:
                if isinstance(seg_dict['name'], six.text_type):
                    seg_dict['name'] = seg_dict['name'].encode("utf-8")
                yield (seg_dict,
                       max(0, self.first_byte) + range_start,
                       min(range_end, range_start + self.last_byte))

            self.first_byte -= seg_length
            self.last_byte -= seg_length
Beispiel #16
0
    def _byterange_listing_iterator(self,
                                    req,
                                    version,
                                    account,
                                    segments,
                                    first_byte,
                                    last_byte,
                                    cached_fetch_sub_slo_segments,
                                    recursion_depth=1):
        last_sub_path = None
        for seg_dict in segments:
            seg_length = self._segment_length(seg_dict)
            if first_byte >= seg_length:
                # don't need any bytes from this segment
                first_byte -= seg_length
                last_byte -= seg_length
                continue

            if last_byte < 0:
                # no bytes are needed from this or any future segment
                return

            seg_range = seg_dict.get('range')
            if seg_range is None:
                range_start, range_end = 0, seg_length - 1
            else:
                # We already validated and supplied concrete values
                # for the range on upload
                range_start, range_end = map(int, seg_range.split('-'))

            if config_true_value(seg_dict.get('sub_slo')):
                # do this check here so that we can avoid fetching this last
                # manifest before raising the exception
                if recursion_depth >= self.max_slo_recursion_depth:
                    raise ListingIterError("Max recursion depth exceeded")

                sub_path = get_valid_utf8_str(seg_dict['name'])
                sub_cont, sub_obj = split_path(sub_path, 2, 2, True)
                if last_sub_path != sub_path:
                    sub_segments = cached_fetch_sub_slo_segments(
                        req, version, account, sub_cont, sub_obj)
                last_sub_path = sub_path

                # Use the existing machinery to slice into the sub-SLO.
                for sub_seg_dict, sb, eb in self._byterange_listing_iterator(
                        req,
                        version,
                        account,
                        sub_segments,
                        # This adjusts first_byte and last_byte to be
                        # relative to the sub-SLO.
                        range_start + max(0, first_byte),
                        min(range_end, range_start + last_byte),
                        cached_fetch_sub_slo_segments,
                        recursion_depth=recursion_depth + 1):
                    yield sub_seg_dict, sb, eb
            else:
                if isinstance(seg_dict['name'], six.text_type):
                    seg_dict['name'] = seg_dict['name'].encode("utf-8")
                yield (seg_dict, max(0, first_byte) + range_start,
                       min(range_end, range_start + last_byte))

            first_byte -= seg_length
            last_byte -= seg_length
Beispiel #17
0
    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        List every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :param req: a :class:`~swift.common.swob.Request` with an obj in path
        :param start_response: WSGI start_response callable
        :raises HttpException: on errors
        """
        vrs, account, container, obj = req.split_path(4, rest_with_last=True)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" %
                                            self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_and_validate_input(
            req.body_file.read(self.max_manifest_size), req.path)
        problem_segments = []

        object_segments = [seg for seg in parsed_data if 'path' in seg]
        if len(object_segments) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of object-backed segments must be <= %d' %
                self.max_manifest_segments)
        try:
            out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        except ValueError:
            out_content_type = 'text/plain'  # Ignore invalid header
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = [None] * len(parsed_data)
        total_size = 0
        path2indices = defaultdict(list)
        for index, seg_dict in enumerate(parsed_data):
            if 'data' in seg_dict:
                data_for_storage[index] = seg_dict
                total_size += len(base64.b64decode(seg_dict['data']))
            else:
                path2indices[seg_dict['path']].append(index)

        # BEGIN: New OpenIO code
        obj_path = get_valid_utf8_str(object_segments[0]['path']).lstrip('/')
        split_path = obj_path.split('/')
        segments_container = split_path[0]
        seg_prefix = '/'.join(split_path[1:-1])
        segments_container_path = '/'.join(
            ['', vrs, account, segments_container])

        # END: New OpenIO code

        # BEGIN: Adapt for OpenIO code
        def validate_seg_dict(seg_dict, seg_resp, allow_empty_segment):
            obj_name = seg_dict['path']

            segment_length = seg_resp['bytes']
            if seg_dict.get('range'):
                # Since we now know the length, we can normalize the
                # range. We know that there is exactly one range
                # requested since we checked that earlier in
                # parse_and_validate_input().
                ranges = seg_dict['range'].ranges_for_length(seg_resp['bytes'])

                if not ranges:
                    problem_segments.append(
                        [quote(obj_name), 'Unsatisfiable Range'])
                elif ranges == [(0, seg_resp['bytes'])]:
                    # Just one range, and it exactly matches the object.
                    # Why'd we do this again?
                    del seg_dict['range']
                    segment_length = seg_resp['bytes']
                else:
                    rng = ranges[0]
                    seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1)
                    segment_length = rng[1] - rng[0]

            if segment_length < 1 and not allow_empty_segment:
                problem_segments.append([
                    quote(obj_name),
                    'Too small; each segment must be at least 1 byte.'
                ])

            _size_bytes = seg_dict.get('size_bytes')
            size_mismatch = (_size_bytes is not None
                             and _size_bytes != seg_resp['bytes'])
            if size_mismatch:
                problem_segments.append([quote(obj_name), 'Size Mismatch'])

            _etag = seg_dict.get('etag')
            etag_mismatch = (_etag is not None and _etag != seg_resp['hash'])
            if etag_mismatch:
                problem_segments.append([quote(obj_name), 'Etag Mismatch'])

            last_modified_formatted = seg_resp.get('last_modified')
            if not last_modified_formatted:
                # shouldn't happen
                last_modified_formatted = datetime.now().strftime(
                    '%Y-%m-%dT%H:%M:%S.%f')
            seg_data = {
                'name': '/' + seg_dict['path'].lstrip('/'),
                'bytes': seg_resp['bytes'],
                'hash': seg_resp['hash'],
                'content_type': seg_resp['content_type'],
                'last_modified': last_modified_formatted
            }
            if seg_dict.get('range'):
                seg_data['range'] = seg_dict['range']
            if config_true_value(seg_resp['slo']):
                seg_data['sub_slo'] = True

            return segment_length, seg_data

        # END: Adapt for OpenIO code

        heartbeat = config_true_value(req.params.get('heartbeat'))
        separator = ''
        if heartbeat:
            # Apparently some ways of deploying require that this to happens
            # *before* the return? Not sure why.
            req.environ['eventlet.minimum_write_chunk_size'] = 0
            start_response(
                '202 Accepted',
                [  # NB: not 201 !
                    ('Content-Type', out_content_type),
                ])
            separator = '\r\n\r\n'

        def resp_iter(total_size=total_size):
            # wsgi won't propagate start_response calls until some data has
            # been yielded so make sure first heartbeat is sent immediately
            if heartbeat:
                yield ' '
            last_yield_time = time.time()

            # BEGIN: New OpenIO code
            sub_req = make_subrequest(
                req.environ,
                path='%s?format=json&prefix=%s&limit=%d' %
                (segments_container_path, seg_prefix,
                 self.max_manifest_segments),
                method='GET',
                headers={'x-auth-token': req.headers.get('x-auth-token')},
                agent='%(orig)s SLO MultipartPUT',
                swift_source='SLO')
            sub_req.environ.setdefault('oio.query', {})
            # All meta2 databases may not be synchronized
            sub_req.environ['oio.query']['force_master'] = True
            sub_req.environ['oio.query']['slo'] = True
            list_seg_resp = sub_req.get_response(self)

            with closing_if_possible(list_seg_resp.app_iter):
                segments_resp = json.loads(list_seg_resp.body)

            seg_resp_dict = dict()
            for seg_resp in segments_resp:
                obj_name = '/'.join(('', segments_container, seg_resp['name']))
                seg_resp_dict[obj_name] = seg_resp

            for obj_name in path2indices:
                now = time.time()
                if heartbeat and (now - last_yield_time >
                                  self.yield_frequency):
                    # Make sure we've called start_response before
                    # sending data
                    yield ' '
                    last_yield_time = now

                for i in path2indices[obj_name]:
                    if not list_seg_resp.is_success:
                        problem_segments.append(
                            [quote(obj_name), list_seg_resp.status])
                        segment_length = 0
                        seg_data = None
                    else:
                        seg_resp = seg_resp_dict.get(obj_name)
                        if seg_resp:
                            segment_length, seg_data = validate_seg_dict(
                                parsed_data[i], seg_resp,
                                (i == len(parsed_data) - 1))
                        else:
                            problem_segments.append([quote(obj_name), 404])
                            segment_length = 0
                            seg_data = None
                    data_for_storage[i] = seg_data
                    total_size += segment_length
            # END: New OpenIO code

            if problem_segments:
                err = HTTPBadRequest(content_type=out_content_type)
                resp_dict = {}
                if heartbeat:
                    resp_dict['Response Status'] = err.status
                    resp_dict['Response Body'] = err.body or '\n'.join(
                        RESPONSE_REASONS.get(err.status_int, ['']))
                else:
                    start_response(err.status,
                                   [(h, v) for h, v in err.headers.items()
                                    if h.lower() != 'content-length'])
                yield separator + get_response_body(
                    out_content_type, resp_dict, problem_segments, 'upload')
                return

            slo_etag = md5()
            for seg_data in data_for_storage:
                if 'data' in seg_data:
                    raw_data = base64.b64decode(seg_data['data'])
                    slo_etag.update(md5(raw_data).hexdigest())
                elif seg_data.get('range'):
                    slo_etag.update('%s:%s;' %
                                    (seg_data['hash'], seg_data['range']))
                else:
                    slo_etag.update(seg_data['hash'])

            slo_etag = slo_etag.hexdigest()
            client_etag = req.headers.get('Etag')
            if client_etag and client_etag.strip('"') != slo_etag:
                err = HTTPUnprocessableEntity(request=req)
                if heartbeat:
                    yield separator + get_response_body(
                        out_content_type, {
                            'Response Status':
                            err.status,
                            'Response Body':
                            err.body or '\n'.join(
                                RESPONSE_REASONS.get(err.status_int, [''])),
                        }, problem_segments, 'upload')
                else:
                    for chunk in err(req.environ, start_response):
                        yield chunk
                return

            json_data = json.dumps(data_for_storage)
            if six.PY3:
                json_data = json_data.encode('utf-8')
            req.body = json_data
            req.headers.update({
                SYSMETA_SLO_ETAG: slo_etag,
                SYSMETA_SLO_SIZE: total_size,
                'X-Static-Large-Object': 'True',
                'Etag': md5(json_data).hexdigest(),
            })

            # Ensure container listings have both etags. However, if any
            # middleware to the left of us touched the base value, trust them.
            override_header = 'X-Object-Sysmeta-Container-Update-Override-Etag'
            val, sep, params = req.headers.get(override_header,
                                               '').partition(';')
            req.headers[override_header] = '%s; slo_etag=%s' % (
                (val or req.headers['Etag']) + sep + params, slo_etag)

            env = req.environ
            if not env.get('CONTENT_TYPE'):
                guessed_type, _junk = mimetypes.guess_type(req.path_info)
                env['CONTENT_TYPE'] = (guessed_type
                                       or 'application/octet-stream')
            env['swift.content_type_overridden'] = True
            env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size

            resp = req.get_response(self.app)
            resp_dict = {'Response Status': resp.status}
            if resp.is_success:
                resp.etag = slo_etag
                resp_dict['Etag'] = resp.headers['Etag']
                resp_dict['Last Modified'] = resp.headers['Last-Modified']

            if heartbeat:
                resp_dict['Response Body'] = resp.body
                yield separator + get_response_body(out_content_type,
                                                    resp_dict, [], 'upload')
            else:
                for chunk in resp(req.environ, start_response):
                    yield chunk

        return resp_iter()
Beispiel #18
0
    def log_request(self, req, status_int, bytes_received, bytes_sent,
                    start_time, end_time, resp_headers=None):
        """
        Log a request.

        :param req: swob.Request object for the request
        :param status_int: integer code for the response status
        :param bytes_received: bytes successfully read from the request body
        :param bytes_sent: bytes yielded to the WSGI server
        :param start_time: timestamp request started
        :param end_time: timestamp request completed
        :param resp_headers: dict of the response headers
        """
        resp_headers = resp_headers or {}
        req_path = get_valid_utf8_str(req.path)
        the_request = quote(unquote(req_path), QUOTE_SAFE)
        if req.query_string:
            the_request = the_request + '?' + req.query_string
        logged_headers = None
        if self.log_hdrs:
            if self.log_hdrs_only:
                logged_headers = '\n'.join('%s: %s' % (k, v)
                                           for k, v in req.headers.items()
                                           if k in self.log_hdrs_only)
            else:
                logged_headers = '\n'.join('%s: %s' % (k, v)
                                           for k, v in req.headers.items())

        method = self.method_from_req(req)
        end_gmtime_str = time.strftime('%d/%b/%Y/%H/%M/%S',
                                       time.gmtime(end_time))
        duration_time_str = "%.4f" % (end_time - start_time)
        start_time_str = "%.9f" % start_time
        end_time_str = "%.9f" % end_time
        policy_index = get_policy_index(req.headers, resp_headers)
        self.access_logger.info(' '.join(
            quote(str(x) if x else '-', QUOTE_SAFE)
            for x in (
                get_remote_client(req),
                req.remote_addr,
                end_gmtime_str,
                method,
                the_request,
                req.environ.get('SERVER_PROTOCOL'),
                status_int,
                req.referer,
                req.user_agent,
                self.obscure_sensitive(req.headers.get('x-auth-token')),
                bytes_received,
                bytes_sent,
                req.headers.get('etag', None),
                req.environ.get('swift.trans_id'),
                logged_headers,
                duration_time_str,
                req.environ.get('swift.source'),
                ','.join(req.environ.get('swift.log_info') or ''),
                start_time_str,
                end_time_str,
                policy_index
            )))
        # Log timing and bytes-transferred data to StatsD
        metric_name = self.statsd_metric_name(req, status_int, method)
        # Only log data for valid controllers (or SOS) to keep the metric count
        # down (egregious errors will get logged by the proxy server itself).
        if metric_name:
            self.access_logger.timing(metric_name + '.timing',
                                      (end_time - start_time) * 1000)
            self.access_logger.update_stats(metric_name + '.xfer',
                                            bytes_received + bytes_sent)
    def usage_request(self, req, status_int, bytes_sent):
        """
        Send the bytes_sent to a zmq queue
        """
        if self.req_already_accounted:
            return
        req_path = get_valid_utf8_str(req.path)
        the_request = quote(unquote(req_path), QUOTE_SAFE)
        if self.log_hdrs:
            logged_headers = '\n'.join('%s: %s' % (k, v) for k, v in req.headers.items())
        method = self.method_from_req(req)
        if not req.environ.get('swift.source'):
            accounting_data = {
                        'remote_address': req.remote_addr,
                        'method': method,
                        'bytes_sent': bytes_sent
                        'headers': logged_headers,
                        'request': the_request
                    }
            print accounting_data
            self.mark_req_accounted(req)


        def __call__(self, env, start_response):
            start_response_args = [None]
            input_proxy = InputProxy(env['wsgi.input'])
            env['wsgi.input'] = input_proxy
            
            def my_start_response(status, headers, exc_info=None):
                start_response_args[0] = (status, list(headers), exc_info)

        def status_int_for_logging(client_disconnect=False, start_status=None):
            # log disconnected clients as '499' status code
            if client_disconnect or input_proxy.client_disconnect:
                return 499
            elif start_status is None:
                return int(start_response_args[0][0].split(' ', 1)[0])
            return start_status

        def iter_response(iterable):
            iterator = iter(iterable)
            try:
                chunk = iterator.next()
                while not chunk:
                    chunk = iterator.next()
            except StopIteration:
                chunk = ''
            for h, v in start_response_args[0][1]:
                if h.lower() in ('content-length', 'transfer-encoding'):
                    break
            else:
                if not chunk:
                    start_response_args[0][1].append(('content-length', '0'))
                elif isinstance(iterable, list):
                    start_response_args[0][1].append(
                        ('content-length', str(sum(len(i) for i in iterable))))
            start_response(*start_response_args[0])
            req = Request(env)

            bytes_sent = 0
            client_disconnect = False
            try:
                while chunk:
                    bytes_sent += len(chunk)
                    yield chunk
                    chunk = iterator.next()
            except GeneratorExit:  # generator was closed before we finished
                client_disconnect = True
                raise
            finally:
                status_int = status_int_for_logging(client_disconnect)
                self.log_request(
                    req, status_int, bytes_sent)

        try:
            iterable = self.app(env, my_start_response)
        except Exception:
            req = Request(env)
            status_int = status_int_for_logging(start_status=500)
            self.log_request(
                req, status_int, 0)
            raise
        else:
            return iter_response(iterable)
Beispiel #20
0
    def _listing(self, env, start_response, prefix=None):
        """
        Sends an HTML object listing to the remote client.

        :param env: The original WSGI environment dict.
        :param start_response: The original WSGI start_response hook.
        :param prefix: Any prefix desired for the container listing.
        """
        if not config_true_value(self._listings):
            resp = HTTPNotFound()(env, self._start_response)
            return self._error_response(resp, env, start_response)
        tmp_env = make_pre_authed_env(
            env, "GET", "/%s/%s/%s" % (self.version, self.account, self.container), self.agent, swift_source="SW"
        )
        tmp_env["QUERY_STRING"] = "delimiter=/&format=json"
        if prefix:
            tmp_env["QUERY_STRING"] += "&prefix=%s" % quote(prefix)
        else:
            prefix = ""
        resp = self._app_call(tmp_env)
        if not is_success(self._get_status_int()):
            return self._error_response(resp, env, start_response)
        listing = None
        body = "".join(resp)
        if body:
            listing = json.loads(body)
        if not listing:
            resp = HTTPNotFound()(env, self._start_response)
            return self._error_response(resp, env, start_response)
        headers = {"Content-Type": "text/html; charset=UTF-8"}
        body = (
            '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 '
            'Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n'
            "<html>\n"
            " <head>\n"
            "  <title>Listing of %s</title>\n" % cgi.escape(env["PATH_INFO"])
        )
        if self._listings_css:
            body += '  <link rel="stylesheet" type="text/css" ' 'href="%s" />\n' % (self._build_css_path(prefix))
        else:
            body += (
                '  <style type="text/css">\n'
                "   h1 {font-size: 1em; font-weight: bold;}\n"
                "   th {text-align: left; padding: 0px 1em 0px 1em;}\n"
                "   td {padding: 0px 1em 0px 1em;}\n"
                "   a {text-decoration: none;}\n"
                "  </style>\n"
            )
        body += (
            " </head>\n"
            " <body>\n"
            '  <h1 id="title">Listing of %s</h1>\n'
            '  <table id="listing">\n'
            '   <tr id="heading">\n'
            '    <th class="colname">Name</th>\n'
            '    <th class="colsize">Size</th>\n'
            '    <th class="coldate">Date</th>\n'
            "   </tr>\n" % cgi.escape(env["PATH_INFO"])
        )
        if prefix:
            body += (
                '   <tr id="parent" class="item">\n'
                '    <td class="colname"><a href="../">../</a></td>\n'
                '    <td class="colsize">&nbsp;</td>\n'
                '    <td class="coldate">&nbsp;</td>\n'
                "   </tr>\n"
            )
        for item in listing:
            if "subdir" in item:
                subdir = get_valid_utf8_str(item["subdir"])
                if prefix:
                    subdir = subdir[len(prefix) :]
                body += (
                    '   <tr class="item subdir">\n'
                    '    <td class="colname"><a href="%s">%s</a></td>\n'
                    '    <td class="colsize">&nbsp;</td>\n'
                    '    <td class="coldate">&nbsp;</td>\n'
                    "   </tr>\n" % (quote(subdir), cgi.escape(subdir))
                )
        for item in listing:
            if "name" in item:
                name = get_valid_utf8_str(item["name"])
                if prefix:
                    name = name[len(prefix) :]
                content_type = get_valid_utf8_str(item["content_type"])
                bytes = get_valid_utf8_str(human_readable(item["bytes"]))
                last_modified = cgi.escape(item["last_modified"]).split(".")[0].replace("T", " ")
                body += (
                    '   <tr class="item %s">\n'
                    '    <td class="colname"><a href="%s">%s</a></td>\n'
                    '    <td class="colsize">%s</td>\n'
                    '    <td class="coldate">%s</td>\n'
                    "   </tr>\n"
                    % (
                        " ".join("type-" + cgi.escape(t.lower(), quote=True) for t in content_type.split("/")),
                        quote(name),
                        cgi.escape(name),
                        bytes,
                        get_valid_utf8_str(last_modified),
                    )
                )
        body += "  </table>\n" " </body>\n" "</html>\n"
        resp = Response(headers=headers, body=body)
        return resp(env, start_response)
Beispiel #21
0
    def _segment_listing_iterator(self, req, version, account, segments,
                                  recursion_depth=1):
        for seg_dict in segments:
            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(seg_dict,
                                                 logger=self.slo.logger)

        # We handle the range stuff here so that we can be smart about
        # skipping unused submanifests. For example, if our first segment is a
        # submanifest referencing 50 MiB total, but start_byte falls in
        # the 51st MiB, then we can avoid fetching the first submanifest.
        #
        # If we were to make SegmentedIterable handle all the range
        # calculations, we would be unable to make this optimization.
        total_length = sum(self._segment_length(seg) for seg in segments)
        if self.first_byte is None:
            self.first_byte = 0
        if self.last_byte is None:
            self.last_byte = total_length - 1

        last_sub_path = None
        for seg_dict in segments:
            seg_length = self._segment_length(seg_dict)
            if self.first_byte >= seg_length:
                # don't need any bytes from this segment
                self.first_byte -= seg_length
                self.last_byte -= seg_length
                continue

            if self.last_byte < 0:
                # no bytes are needed from this or any future segment
                break

            seg_range = seg_dict.get('range')
            if seg_range is None:
                range_start, range_end = 0, seg_length - 1
            else:
                # We already validated and supplied concrete values
                # for the range on upload
                range_start, range_end = map(int, seg_range.split('-'))

            if config_true_value(seg_dict.get('sub_slo')):
                # do this check here so that we can avoid fetching this last
                # manifest before raising the exception
                if recursion_depth >= self.max_slo_recursion_depth:
                    raise ListingIterError("Max recursion depth exceeded")

                sub_path = get_valid_utf8_str(seg_dict['name'])
                sub_cont, sub_obj = split_path(sub_path, 2, 2, True)
                if last_sub_path != sub_path:
                    sub_segments = self._fetch_sub_slo_segments(
                        req, version, account, sub_cont, sub_obj)
                last_sub_path = sub_path

                # Use the existing machinery to slice into the sub-SLO.
                # This requires that we save off our current state, and
                # restore at the other end.
                orig_start, orig_end = self.first_byte, self.last_byte
                self.first_byte = range_start + max(0, self.first_byte)
                self.last_byte = min(range_end, range_start + self.last_byte)

                for sub_seg_dict, sb, eb in self._segment_listing_iterator(
                        req, version, account, sub_segments,
                        recursion_depth=recursion_depth + 1):
                    yield sub_seg_dict, sb, eb

                # Restore the first/last state
                self.first_byte, self.last_byte = orig_start, orig_end
            else:
                if isinstance(seg_dict['name'], six.text_type):
                    seg_dict['name'] = seg_dict['name'].encode("utf-8")
                yield (seg_dict,
                       max(0, self.first_byte) + range_start,
                       min(range_end, range_start + self.last_byte))

            self.first_byte -= seg_length
            self.last_byte -= seg_length
Beispiel #22
0
    def _segment_listing_iterator(self,
                                  req,
                                  version,
                                  account,
                                  segments,
                                  recursion_depth=1):
        for seg_dict in segments:
            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(seg_dict,
                                                 logger=self.slo.logger)

        # We handle the range stuff here so that we can be smart about
        # skipping unused submanifests. For example, if our first segment is a
        # submanifest referencing 50 MiB total, but self.first_byte falls in
        # the 51st MiB, then we can avoid fetching the first submanifest.
        #
        # If we were to make SegmentedIterable handle all the range
        # calculations, we would be unable to make this optimization.
        total_length = sum(int(seg['bytes']) for seg in segments)
        if self.first_byte is None:
            self.first_byte = 0
        if self.last_byte is None:
            self.last_byte = total_length - 1

        for seg_dict in segments:
            seg_length = int(seg_dict['bytes'])

            if self.first_byte >= seg_length:
                # don't need any bytes from this segment
                self.first_byte = max(self.first_byte - seg_length, -1)
                self.last_byte = max(self.last_byte - seg_length, -1)
                continue

            if self.last_byte < 0:
                # no bytes are needed from this or any future segment
                break

            if config_true_value(seg_dict.get('sub_slo')):
                # do this check here so that we can avoid fetching this last
                # manifest before raising the exception
                if recursion_depth >= self.max_slo_recursion_depth:
                    raise ListingIterError("Max recursion depth exceeded")

                sub_path = get_valid_utf8_str(seg_dict['name'])
                sub_cont, sub_obj = split_path(sub_path, 2, 2, True)
                sub_segments = self._fetch_sub_slo_segments(
                    req, version, account, sub_cont, sub_obj)
                for sub_seg_dict, sb, eb in self._segment_listing_iterator(
                        req,
                        version,
                        account,
                        sub_segments,
                        recursion_depth=recursion_depth + 1):
                    yield sub_seg_dict, sb, eb
            else:
                if isinstance(seg_dict['name'], unicode):
                    seg_dict['name'] = seg_dict['name'].encode("utf-8")
                seg_length = int(seg_dict['bytes'])
                yield (seg_dict,
                       (None if self.first_byte <= 0 else self.first_byte),
                       (None if self.last_byte >= seg_length - 1 else
                        self.last_byte))
                self.first_byte = max(self.first_byte - seg_length, -1)
                self.last_byte = max(self.last_byte - seg_length, -1)
Beispiel #23
0
    def log_request(self, env, status_int, bytes_received, bytes_sent, request_time, client_disconnect):
        """
        Log a request.

        :param env: WSGI environment
        :param status_int: integer code for the response status
        :param bytes_received: bytes successfully read from the request body
        :param bytes_sent: bytes yielded to the WSGI server
        :param request_time: time taken to satisfy the request, in seconds
        """
        if env.get("swift.proxy_access_log_made"):
            return
        req = Request(env)
        if client_disconnect:  # log disconnected clients as '499' status code
            status_int = 499
        req_path = get_valid_utf8_str(req.path)
        the_request = quote(unquote(req_path))
        if req.query_string:
            the_request = the_request + "?" + req.query_string
        logged_headers = None
        if self.log_hdrs:
            logged_headers = "\n".join("%s: %s" % (k, v) for k, v in req.headers.items())
        method = req.environ.get("swift.orig_req_method", req.method)
        self.access_logger.info(
            " ".join(
                quote(str(x) if x else "-")
                for x in (
                    get_remote_client(req),
                    req.remote_addr,
                    time.strftime("%d/%b/%Y/%H/%M/%S", time.gmtime()),
                    method,
                    the_request,
                    req.environ.get("SERVER_PROTOCOL"),
                    status_int,
                    req.referer,
                    req.user_agent,
                    req.headers.get("x-auth-token"),
                    bytes_received,
                    bytes_sent,
                    req.headers.get("etag", None),
                    req.environ.get("swift.trans_id"),
                    logged_headers,
                    "%.4f" % request_time,
                    req.environ.get("swift.source"),
                )
            )
        )
        env["swift.proxy_access_log_made"] = True
        # Log timing and bytes-transfered data to StatsD
        if req.path.startswith("/v1/"):
            try:
                stat_type = [None, "account", "container", "object"][req.path.strip("/").count("/")]
            except IndexError:
                stat_type = "object"
        else:
            stat_type = env.get("swift.source")
        # Only log data for valid controllers (or SOS) to keep the metric count
        # down (egregious errors will get logged by the proxy server itself).
        if stat_type:
            stat_method = method if method in self.valid_methods else "BAD_METHOD"
            metric_name = ".".join((stat_type, stat_method, str(status_int)))
            self.access_logger.timing(metric_name + ".timing", request_time * 1000)
            self.access_logger.update_stats(metric_name + ".xfer", bytes_received + bytes_sent)
Beispiel #24
0
    def log_request(self, req, status_int, bytes_received, bytes_sent,
                    start_time, end_time, resp_headers=None):
        """
        Log a request.

        :param req: swob.Request object for the request
        :param status_int: integer code for the response status
        :param bytes_received: bytes successfully read from the request body
        :param bytes_sent: bytes yielded to the WSGI server
        :param start_time: timestamp request started
        :param end_time: timestamp request completed
        :param resp_headers: dict of the response headers
        """
        resp_headers = resp_headers or {}
        req_path = get_valid_utf8_str(req.path)
        the_request = quote(unquote(req_path), QUOTE_SAFE)
        if req.query_string:
            the_request = the_request + '?' + req.query_string
        logged_headers = None
        if self.log_hdrs:
            if self.log_hdrs_only:
                logged_headers = '\n'.join('%s: %s' % (k, v)
                                           for k, v in req.headers.items()
                                           if k in self.log_hdrs_only)
            else:
                logged_headers = '\n'.join('%s: %s' % (k, v)
                                           for k, v in req.headers.items())

        method = self.method_from_req(req)
        end_gmtime_str = time.strftime('%d/%b/%Y/%H/%M/%S',
                                       time.gmtime(end_time))
        duration_time_str = "%.4f" % (end_time - start_time)
        start_time_str = "%.9f" % start_time
        end_time_str = "%.9f" % end_time
        policy_index = get_policy_index(req.headers, resp_headers)
        self.access_logger.info(' '.join(
            quote(str(x) if x else '-', QUOTE_SAFE)
            for x in (
                get_remote_client(req),
                req.remote_addr,
                end_gmtime_str,
                method,
                the_request,
                req.environ.get('SERVER_PROTOCOL'),
                status_int,
                req.referer,
                req.user_agent,
                self.obscure_sensitive(req.headers.get('x-auth-token')),
                bytes_received,
                bytes_sent,
                req.headers.get('etag', None),
                req.environ.get('swift.trans_id'),
                logged_headers,
                duration_time_str,
                req.environ.get('swift.source'),
                ','.join(req.environ.get('swift.log_info') or ''),
                start_time_str,
                end_time_str,
                policy_index
            )))

        # Log timing and bytes-transferred data to StatsD
        metric_name = self.statsd_metric_name(req, status_int, method)
        metric_name_policy = self.statsd_metric_name_policy(req, status_int,
                                                            method,
                                                            policy_index)
        # Only log data for valid controllers (or SOS) to keep the metric count
        # down (egregious errors will get logged by the proxy server itself).

        if metric_name:
            self.access_logger.timing(metric_name + '.timing',
                                      (end_time - start_time) * 1000)
            self.access_logger.update_stats(metric_name + '.xfer',
                                            bytes_received + bytes_sent)
        if metric_name_policy:
            self.access_logger.timing(metric_name_policy + '.timing',
                                      (end_time - start_time) * 1000)
            self.access_logger.update_stats(metric_name_policy + '.xfer',
                                            bytes_received + bytes_sent)
Beispiel #25
0
    def _listing(self, env, start_response, prefix=None):
        """
        Sends an HTML object listing to the remote client.

        :param env: The original WSGI environment dict.
        :param start_response: The original WSGI start_response hook.
        :param prefix: Any prefix desired for the container listing.
        """
        if not config_true_value(self._listings):
            body = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 ' \
                'Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n' \
                '<html>\n' \
                '<head>\n' \
                '<title>Listing of %s</title>\n' % cgi.escape(env['PATH_INFO'])
            if self._listings_css:
                body += '  <link rel="stylesheet" type="text/css" ' \
                    'href="%s" />\n' % self._build_css_path(prefix or '')
            else:
                body += '  <style type="text/css">\n' \
                    '   h1 {font-size: 1em; font-weight: bold;}\n' \
                    '   p {font-size: 2}\n' \
                    '  </style>\n'
            body += '</head>\n<body>' \
                '  <h1>Web Listing Disabled</h1>' \
                '   <p>The owner of this web site has disabled web listing.' \
                '   <p>If you are the owner of this web site, you can enable' \
                '   web listing by setting X-Container-Meta-Web-Listings.</p>'
            if self._index:
                body += '<h1>Index File Not Found</h1>' \
                    ' <p>The owner of this web site has set ' \
                    ' <b>X-Container-Meta-Web-Index: %s</b>. ' \
                    ' However, this file is not found.</p>' % self._index
            body += ' </body>\n</html>\n'
            resp = HTTPNotFound(body=body)(env, self._start_response)
            return self._error_response(resp, env, start_response)
        tmp_env = make_pre_authed_env(
            env, 'GET', '/%s/%s/%s' % (
                self.version, self.account, self.container),
            self.agent, swift_source='SW')
        tmp_env['QUERY_STRING'] = 'delimiter=/&format=json'
        if prefix:
            tmp_env['QUERY_STRING'] += '&prefix=%s' % quote(prefix)
        else:
            prefix = ''
        resp = self._app_call(tmp_env)
        if not is_success(self._get_status_int()):
            return self._error_response(resp, env, start_response)
        listing = None
        body = ''.join(resp)
        if body:
            listing = json.loads(body)
        if not listing:
            resp = HTTPNotFound()(env, self._start_response)
            return self._error_response(resp, env, start_response)
        headers = {'Content-Type': 'text/html; charset=UTF-8'}
        body = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 ' \
               'Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n' \
               '<html>\n' \
               ' <head>\n' \
               '  <title>Listing of %s</title>\n' % \
               cgi.escape(env['PATH_INFO'])
        if self._listings_css:
            body += '  <link rel="stylesheet" type="text/css" ' \
                    'href="%s" />\n' % (self._build_css_path(prefix))
        else:
            body += '  <style type="text/css">\n' \
                    '   h1 {font-size: 1em; font-weight: bold;}\n' \
                    '   th {text-align: left; padding: 0px 1em 0px 1em;}\n' \
                    '   td {padding: 0px 1em 0px 1em;}\n' \
                    '   a {text-decoration: none;}\n' \
                    '  </style>\n'
        body += ' </head>\n' \
                ' <body>\n' \
                '  <h1 id="title">Listing of %s</h1>\n' \
                '  <table id="listing">\n' \
                '   <tr id="heading">\n' \
                '    <th class="colname">Name</th>\n' \
                '    <th class="colsize">Size</th>\n' \
                '    <th class="coldate">Date</th>\n' \
                '   </tr>\n' % \
                cgi.escape(env['PATH_INFO'])
        if prefix:
            body += '   <tr id="parent" class="item">\n' \
                    '    <td class="colname"><a href="../">../</a></td>\n' \
                    '    <td class="colsize">&nbsp;</td>\n' \
                    '    <td class="coldate">&nbsp;</td>\n' \
                    '   </tr>\n'
        for item in listing:
            if 'subdir' in item:
                subdir = get_valid_utf8_str(item['subdir'])
                if prefix:
                    subdir = subdir[len(prefix):]
                body += '   <tr class="item subdir">\n' \
                        '    <td class="colname"><a href="%s">%s</a></td>\n' \
                        '    <td class="colsize">&nbsp;</td>\n' \
                        '    <td class="coldate">&nbsp;</td>\n' \
                        '   </tr>\n' % \
                        (quote(subdir), cgi.escape(subdir))
        for item in listing:
            if 'name' in item:
                name = get_valid_utf8_str(item['name'])
                if prefix:
                    name = name[len(prefix):]
                content_type = get_valid_utf8_str(item['content_type'])
                bytes = get_valid_utf8_str(human_readable(item['bytes']))
                last_modified = (cgi.escape(item['last_modified']).
                                 split('.')[0].replace('T', ' '))
                body += '   <tr class="item %s">\n' \
                        '    <td class="colname"><a href="%s">%s</a></td>\n' \
                        '    <td class="colsize">%s</td>\n' \
                        '    <td class="coldate">%s</td>\n' \
                        '   </tr>\n' % \
                        (' '.join('type-' + cgi.escape(t.lower(), quote=True)
                                  for t in content_type.split('/')),
                         quote(name), cgi.escape(name),
                         bytes, get_valid_utf8_str(last_modified))
        body += '  </table>\n' \
                ' </body>\n' \
                '</html>\n'
        resp = Response(headers=headers, body=body)
        return resp(env, start_response)
    def log_request(self, env, status_int, bytes_received, bytes_sent,
                    request_time, client_disconnect):
        """
        Log a request.

        :param env: WSGI environment
        :param status_int: integer code for the response status
        :param bytes_received: bytes successfully read from the request body
        :param bytes_sent: bytes yielded to the WSGI server
        :param request_time: time taken to satisfy the request, in seconds
        """
        req = Request(env)
        if client_disconnect:  # log disconnected clients as '499' status code
            status_int = 499
        req_path = get_valid_utf8_str(req.path)
        the_request = quote(unquote(req_path))
        if req.query_string:
            the_request = the_request + '?' + req.query_string
        logged_headers = None
        if self.log_hdrs:
            logged_headers = '\n'.join('%s: %s' % (k, v)
                                       for k, v in req.headers.items())
        method = req.environ.get('swift.orig_req_method', req.method)
        self.access_logger.info(' '.join(
            quote(str(x) if x else '-') for x in (
                get_remote_client(req),
                req.remote_addr,
                time.strftime('%d/%b/%Y/%H/%M/%S', time.gmtime()),
                method,
                the_request,
                req.environ.get('SERVER_PROTOCOL'),
                status_int,
                req.referer,
                req.user_agent,
                req.headers.get('x-auth-token'),
                bytes_received,
                bytes_sent,
                req.headers.get('etag', None),
                req.environ.get('swift.trans_id'),
                logged_headers,
                '%.4f' % request_time,
                req.environ.get('swift.source'),
            )))
        # Log timing and bytes-transfered data to StatsD
        if req.path.startswith('/v1/'):
            try:
                stat_type = [None, 'account', 'container',
                             'object'][req.path.strip('/').count('/')]
            except IndexError:
                stat_type = 'object'
        else:
            stat_type = env.get('swift.source')
        # Only log data for valid controllers (or SOS) to keep the metric count
        # down (egregious errors will get logged by the proxy server itself).
        if stat_type:
            stat_method = method if method in self.valid_methods \
                else 'BAD_METHOD'
            metric_name = '.'.join((stat_type, stat_method, str(status_int)))
            self.access_logger.timing(metric_name + '.timing',
                                      request_time * 1000)
            self.access_logger.update_stats(metric_name + '.xfer',
                                            bytes_received + bytes_sent)
Beispiel #27
0
    def log_request(self, env, status_int, bytes_received, bytes_sent,
                    request_time, client_disconnect):
        """
        Log a request.

        :param env: WSGI environment
        :param status_int: integer code for the response status
        :param bytes_received: bytes successfully read from the request body
        :param bytes_sent: bytes yielded to the WSGI server
        :param request_time: time taken to satisfy the request, in seconds
        """
        req = Request(env)
        if client_disconnect:  # log disconnected clients as '499' status code
            status_int = 499
        req_path = get_valid_utf8_str(req.path)
        the_request = quote(unquote(req_path))
        if req.query_string:
            the_request = the_request + '?' + req.query_string
        logged_headers = None
        if self.log_hdrs:
            logged_headers = '\n'.join('%s: %s' % (k, v)
                                       for k, v in req.headers.items())
        method = req.environ.get('swift.orig_req_method', req.method)
        self.access_logger.info(' '.join(
            quote(str(x) if x else '-')
            for x in (
                get_remote_client(req),
                req.remote_addr,
                time.strftime('%d/%b/%Y/%H/%M/%S', time.gmtime()),
                method,
                the_request,
                req.environ.get('SERVER_PROTOCOL'),
                status_int,
                req.referer,
                req.user_agent,
                req.headers.get('x-auth-token'),
                bytes_received,
                bytes_sent,
                req.headers.get('etag', None),
                req.environ.get('swift.trans_id'),
                logged_headers,
                '%.4f' % request_time,
                req.environ.get('swift.source'),
            )))
        # Log timing and bytes-transfered data to StatsD
        if req.path.startswith('/v1/'):
            try:
                stat_type = [None, 'account', 'container',
                             'object'][req.path.strip('/').count('/')]
            except IndexError:
                stat_type = 'object'
        else:
            stat_type = env.get('swift.source')
        # Only log data for valid controllers (or SOS) to keep the metric count
        # down (egregious errors will get logged by the proxy server itself).
        if stat_type:
            stat_method = method if method in self.valid_methods \
                else 'BAD_METHOD'
            metric_name = '.'.join((stat_type, stat_method, str(status_int)))
            self.access_logger.timing(metric_name + '.timing',
                                      request_time * 1000)
            self.access_logger.update_stats(metric_name + '.xfer',
                                            bytes_received + bytes_sent)
Beispiel #28
0
    def _listing(self, env, start_response, prefix=None):
        """
        Sends an HTML object listing to the remote client.

        :param env: The original WSGI environment dict.
        :param start_response: The original WSGI start_response hook.
        :param prefix: Any prefix desired for the container listing.
        """
        if not config_true_value(self._listings):
            resp = HTTPNotFound()(env, self._start_response)
            return self._error_response(resp, env, start_response)
        tmp_env = make_pre_authed_env(
            env,
            'GET',
            '/%s/%s/%s' % (self.version, self.account, self.container),
            self.agent,
            swift_source='SW')
        tmp_env['QUERY_STRING'] = 'delimiter=/&format=json'
        if prefix:
            tmp_env['QUERY_STRING'] += '&prefix=%s' % quote(prefix)
        else:
            prefix = ''
        resp = self._app_call(tmp_env)
        if not is_success(self._get_status_int()):
            return self._error_response(resp, env, start_response)
        listing = None
        body = ''.join(resp)
        if body:
            listing = json.loads(body)
        if not listing:
            resp = HTTPNotFound()(env, self._start_response)
            return self._error_response(resp, env, start_response)
        headers = {'Content-Type': 'text/html; charset=UTF-8'}
        body = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 ' \
               'Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n' \
               '<html>\n' \
               ' <head>\n' \
               '  <title>Listing of %s</title>\n' % \
               cgi.escape(env['PATH_INFO'])
        if self._listings_css:
            body += '  <link rel="stylesheet" type="text/css" ' \
                    'href="%s" />\n' % (self._build_css_path(prefix))
        else:
            body += '  <style type="text/css">\n' \
                    '   h1 {font-size: 1em; font-weight: bold;}\n' \
                    '   th {text-align: left; padding: 0px 1em 0px 1em;}\n' \
                    '   td {padding: 0px 1em 0px 1em;}\n' \
                    '   a {text-decoration: none;}\n' \
                    '  </style>\n'
        body += ' </head>\n' \
                ' <body>\n' \
                '  <h1 id="title">Listing of %s</h1>\n' \
                '  <table id="listing">\n' \
                '   <tr id="heading">\n' \
                '    <th class="colname">Name</th>\n' \
                '    <th class="colsize">Size</th>\n' \
                '    <th class="coldate">Date</th>\n' \
                '   </tr>\n' % \
                cgi.escape(env['PATH_INFO'])
        if prefix:
            body += '   <tr id="parent" class="item">\n' \
                    '    <td class="colname"><a href="../">../</a></td>\n' \
                    '    <td class="colsize">&nbsp;</td>\n' \
                    '    <td class="coldate">&nbsp;</td>\n' \
                    '   </tr>\n'
        for item in listing:
            if 'subdir' in item:
                subdir = get_valid_utf8_str(item['subdir'])
                if prefix:
                    subdir = subdir[len(prefix):]
                body += '   <tr class="item subdir">\n' \
                        '    <td class="colname"><a href="%s">%s</a></td>\n' \
                        '    <td class="colsize">&nbsp;</td>\n' \
                        '    <td class="coldate">&nbsp;</td>\n' \
                        '   </tr>\n' % \
                        (quote(subdir), cgi.escape(subdir))
        for item in listing:
            if 'name' in item:
                name = get_valid_utf8_str(item['name'])
                if prefix:
                    name = name[len(prefix):]
                content_type = get_valid_utf8_str(item['content_type'])
                bytes = get_valid_utf8_str(human_readable(item['bytes']))
                last_modified = (cgi.escape(
                    item['last_modified']).split('.')[0].replace('T', ' '))
                body += '   <tr class="item %s">\n' \
                        '    <td class="colname"><a href="%s">%s</a></td>\n' \
                        '    <td class="colsize">%s</td>\n' \
                        '    <td class="coldate">%s</td>\n' \
                        '   </tr>\n' % \
                        (' '.join('type-' + cgi.escape(t.lower(), quote=True)
                                  for t in content_type.split('/')),
                         quote(name), cgi.escape(name),
                         bytes, get_valid_utf8_str(last_modified))
        body += '  </table>\n' \
                ' </body>\n' \
                '</html>\n'
        resp = Response(headers=headers, body=body)
        return resp(env, start_response)