Example #1
0
File: dlo.py Project: mahak/swift
    def _get_container_listing(self, req, version, account, container,
                               prefix, marker=''):
        '''
        :param version: whatever
        :param account: native
        :param container: native
        :param prefix: native
        :param marker: native
        '''
        con_req = make_subrequest(
            req.environ,
            path=wsgi_quote('/'.join([
                '', str_to_wsgi(version),
                str_to_wsgi(account), str_to_wsgi(container)])),
            method='GET',
            headers={'x-auth-token': req.headers.get('x-auth-token')},
            agent=('%(orig)s ' + 'DLO MultipartGET'), swift_source='DLO')
        con_req.query_string = 'prefix=%s' % quote(prefix)
        if marker:
            con_req.query_string += '&marker=%s' % quote(marker)

        con_resp = con_req.get_response(self.dlo.app)
        if not is_success(con_resp.status_int):
            if req.method == 'HEAD':
                con_resp.body = b''
            return con_resp, None
        with closing_if_possible(con_resp.app_iter):
            return None, json.loads(b''.join(con_resp.app_iter))
Example #2
0
    def _iter_items(
            self, path, marker='', end_marker='',
            acceptable_statuses=(2, HTTP_NOT_FOUND)):
        """
        Returns an iterator of items from a json listing.  Assumes listing has
        'name' key defined and uses markers.

        :param path: Path to do GET on.
        :param marker: Prefix of first desired item, defaults to ''.
        :param end_marker: Last item returned will be 'less' than this,
                           defaults to ''.
        :param acceptable_statuses: List of status for valid responses,
                                    defaults to (2, HTTP_NOT_FOUND).

        :raises UnexpectedResponse: Exception raised when requests fail
                                    to get a response with an acceptable status
        :raises Exception: Exception is raised when code fails in an
                           unexpected way.
        """

        while True:
            resp = self.make_request(
                'GET', '%s?format=json&marker=%s&end_marker=%s' %
                (path, quote(marker), quote(end_marker)),
                {}, acceptable_statuses)
            if not resp.status_int == 200:
                if resp.status_int >= HTTP_MULTIPLE_CHOICES:
                    ''.join(resp.app_iter)
                break
            data = json.loads(resp.body)
            if not data:
                break
            for item in data:
                yield item
            marker = data[-1]['name'].encode('utf8')
Example #3
0
    def base_request(self, method, container=None, name=None, prefix=None,
                     headers={}, proxy=None, contents=None, full_listing=None):
        # Common request method
        url = self.url

        if self.token:
            headers['X-Auth-Token'] = self.token

        if container:
            url = '%s/%s' % (url.rstrip('/'), quote(container))

        if name:
            url = '%s/%s' % (url.rstrip('/'), quote(name))

        url += '?format=json'

        if prefix:
            url += '&prefix=%s' % prefix

        if proxy:
            proxy = urlparse.urlparse(proxy)
            proxy = urllib2.ProxyHandler({proxy.scheme: proxy.netloc})
            opener = urllib2.build_opener(proxy)
            urllib2.install_opener(opener)

        req = urllib2.Request(url, headers=headers, data=contents)
        req.get_method = lambda: method
        urllib2.urlopen(req)
        conn = urllib2.urlopen(req)
        body = conn.read()
        try:
            body_data = json.loads(body)
        except ValueError:
            body_data = None
        return [None, body_data]
Example #4
0
def _get_direct_account_container(path, stype, node, part,
                                  marker=None, limit=None,
                                  prefix=None, delimiter=None, conn_timeout=5,
                                  response_timeout=15):
    """Base class for get direct account and container.

    Do not use directly use the get_direct_account or
    get_direct_container instead.
    """
    qs = 'format=json'
    if marker:
        qs += '&marker=%s' % quote(marker)
    if limit:
        qs += '&limit=%d' % limit
    if prefix:
        qs += '&prefix=%s' % quote(prefix)
    if delimiter:
        qs += '&delimiter=%s' % quote(delimiter)
    with Timeout(conn_timeout):
        conn = http_connect(node['ip'], node['port'], node['device'], part,
                            'GET', path, query_string=qs,
                            headers=gen_headers())
    with Timeout(response_timeout):
        resp = conn.getresponse()
    if not is_success(resp.status):
        resp.read()
        raise DirectClientException(stype, 'GET', node, part, path, resp)

    resp_headers = HeaderKeyDict()
    for header, value in resp.getheaders():
        resp_headers[header] = value
    if resp.status == HTTP_NO_CONTENT:
        resp.read()
        return resp_headers, []
    return resp_headers, json.loads(resp.read())
Example #5
0
    def _test_redirect_slash_remap_acct(self, anonymous):
        host = self.domain_remap_acct
        path = '/%s' % quote(self.env.container.name)
        self._test_redirect_with_slash(host, path, anonymous=anonymous)

        path = '/%s/%s' % (quote(self.env.container.name),
                           self.env.objects['dir/'].name)
        self._test_redirect_with_slash(host, path, anonymous=anonymous)
Example #6
0
def disposition_format(disposition_type, filename):
    # Content-Disposition in HTTP is defined in
    # https://tools.ietf.org/html/rfc6266 and references
    # https://tools.ietf.org/html/rfc5987#section-3.2
    # to explain the filename*= encoding format. The summary
    # is that it's the charset, then an optional (and empty) language
    # then the filename. Looks funny, but it's right.
    return '''%s; filename="%s"; filename*=UTF-8''%s''' % (
        disposition_type, quote(filename, safe=' /'), quote(filename))
Example #7
0
File: slo.py Project: bebule/swift
        def validate_seg_dict(seg_dict, head_seg_resp):
            if not head_seg_resp.is_success:
                problem_segments.append([quote(obj_name),
                                         head_seg_resp.status])
                return 0, None

            segment_length = head_seg_resp.content_length
            if seg_dict.get('range'):
                # Since we now know the length, we can normalize the
                # range. We know that there is exactly one range
                # requested since we checked that earlier in
                # parse_and_validate_input().
                ranges = seg_dict['range'].ranges_for_length(
                    head_seg_resp.content_length)

                if not ranges:
                    problem_segments.append([quote(obj_name),
                                             'Unsatisfiable Range'])
                elif ranges == [(0, head_seg_resp.content_length)]:
                    # Just one range, and it exactly matches the object.
                    # Why'd we do this again?
                    del seg_dict['range']
                    segment_length = head_seg_resp.content_length
                else:
                    rng = ranges[0]
                    seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1)
                    segment_length = rng[1] - rng[0]

            if segment_length < 1:
                problem_segments.append(
                    [quote(obj_name),
                     'Too small; each segment must be at least 1 byte.'])
            if seg_dict.get('size_bytes') is not None and \
                    seg_dict['size_bytes'] != head_seg_resp.content_length:
                problem_segments.append([quote(obj_name), 'Size Mismatch'])
            if seg_dict.get('etag') is not None and \
                    seg_dict['etag'] != head_seg_resp.etag:
                problem_segments.append([quote(obj_name), 'Etag Mismatch'])
            if head_seg_resp.last_modified:
                last_modified = head_seg_resp.last_modified
            else:
                # shouldn't happen
                last_modified = datetime.now()

            last_modified_formatted = \
                last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
            seg_data = {'name': '/' + seg_dict['path'].lstrip('/'),
                        'bytes': head_seg_resp.content_length,
                        'hash': head_seg_resp.etag,
                        'content_type': head_seg_resp.content_type,
                        'last_modified': last_modified_formatted}
            if seg_dict.get('range'):
                seg_data['range'] = seg_dict['range']
            if config_true_value(
                    head_seg_resp.headers.get('X-Static-Large-Object')):
                seg_data['sub_slo'] = True
            return segment_length, seg_data
Example #8
0
    def _test_redirect_slash_direct(self, anonymous):
        host = self.env.account.conn.storage_netloc
        path = '%s/%s' % (self.env.account.conn.storage_path,
                          quote(self.env.container.name))
        self._test_redirect_with_slash(host, path, anonymous=anonymous)

        path = '%s/%s/%s' % (self.env.account.conn.storage_path,
                             quote(self.env.container.name),
                             quote(self.env.objects['dir/'].name))
        self._test_redirect_with_slash(host, path, anonymous=anonymous)
Example #9
0
    def _test_index_direct(self, anonymous):
        objects = self.env.objects
        host = self.env.account.conn.storage_netloc
        path = '%s/%s/' % (self.env.account.conn.storage_path,
                           quote(self.env.container.name))
        self._test_index(host, path, anonymous=anonymous)

        path = '%s/%s/%s/' % (self.env.account.conn.storage_path,
                              quote(self.env.container.name),
                              quote(objects['dir/'].name))
        self._test_index(host, path, anonymous=anonymous, expected_status=404)
Example #10
0
    def _build_css_path(self, prefix=""):
        """
        Constructs a relative path from a given prefix within the container.
        URLs and paths starting with '/' are not modified.

        :param prefix: The prefix for the container listing.
        """
        if self._listings_css.startswith(("/", "http://", "https://")):
            css_path = quote(self._listings_css, ":/")
        else:
            css_path = "../" * prefix.count("/") + quote(self._listings_css)
        return css_path
Example #11
0
    def _build_css_path(self, prefix=''):
        """
        Constructs a relative path from a given prefix within the container.
        URLs and paths starting with '/' are not modified.

        :param prefix: The prefix for the container listing.
        """
        if self._listings_css.startswith(('/', 'http://', 'https://')):
            css_path = quote(self._listings_css, ':/')
        else:
            css_path = '../' * prefix.count('/') + quote(self._listings_css)
        return css_path
Example #12
0
    def _get_container_listing(self, req, version, account, container,
                               prefix, marker=''):
        con_req = make_request(
            req.environ, path='/'.join(['', version, account, container]),
            method='GET',
            headers={'x-auth-token': req.headers.get('x-auth-token')},
            agent=('%(orig)s ' + 'DLO MultipartGET'), swift_source='DLO')
        con_req.query_string = 'format=json&prefix=%s' % quote(prefix)
        if marker:
            con_req.query_string += '&marker=%s' % quote(marker)

        con_resp = con_req.get_response(self.dlo.app)
        if not is_success(con_resp.status_int):
            return con_resp, None
        return None, json.loads(''.join(con_resp.app_iter))
Example #13
0
File: dlo.py Project: charint/swift
    def _get_container_listing(self, req, version, account, container,
                               prefix, marker=''):
        con_req = req.copy_get()
        con_req.script_name = ''
        con_req.range = None
        con_req.path_info = '/'.join(['', version, account, container])
        con_req.query_string = 'format=json&prefix=%s' % quote(prefix)
        con_req.user_agent = '%s DLO MultipartGET' % con_req.user_agent
        if marker:
            con_req.query_string += '&marker=%s' % quote(marker)

        con_resp = con_req.get_response(self.dlo.app)
        if not is_success(con_resp.status_int):
            return con_resp, None
        return None, json.loads(''.join(con_resp.app_iter))
Example #14
0
    def _update_sync_to_headers(self, name, sync_to, user_key,
                                realm, realm_key, method, headers):
        """
        Updates container sync headers

        :param name: The name of the object
        :param sync_to: The URL to the remote container.
        :param user_key: The X-Container-Sync-Key to use when sending requests
                         to the other container.
        :param realm: The realm from self.realms_conf, if there is one.
            If None, fallback to using the older allowed_sync_hosts
            way of syncing.
        :param realm_key: The realm key from self.realms_conf, if there
            is one. If None, fallback to using the older
            allowed_sync_hosts way of syncing.
        :param method: HTTP method to create sig with
        :param headers: headers to update with container sync headers
        """
        if realm and realm_key:
            nonce = uuid.uuid4().hex
            path = urlparse(sync_to).path + '/' + quote(name)
            sig = self.realms_conf.get_sig(method, path,
                                           headers.get('x-timestamp', 0),
                                           nonce, realm_key,
                                           user_key)
            headers['x-container-sync-auth'] = '%s %s %s' % (realm,
                                                             nonce,
                                                             sig)
        else:
            headers['x-container-sync-key'] = user_key
Example #15
0
 def _test_listing(self, host, path, title=None, links=[], notins=[],
                   css=None, anonymous=False):
     self._set_staticweb_headers(listings=True,
                                 listings_css=(css is not None))
     if title is None:
         title = unquote(path)
     expected_in = ['Listing of %s' % title] + [
         '<a href="{0}">{1}</a>'.format(quote(link), link)
         for link in links]
     expected_not_in = notins
     if css:
         expected_in.append('<link rel="stylesheet" type="text/css" '
                            'href="%s" />' % quote(css))
     self._test_get_path(host, path, anonymous=anonymous,
                         expected_in=expected_in,
                         expected_not_in=expected_not_in)
    def test_retry_client_exception(self):
        logger = debug_logger('direct-client-test')

        with mock.patch('swift.common.direct_client.sleep') as mock_sleep, \
                mocked_http_conn(500) as conn:
            with self.assertRaises(direct_client.ClientException) as err_ctx:
                direct_client.retry(direct_client.direct_delete_object,
                                    self.node, self.part,
                                    self.account, self.container, self.obj,
                                    retries=2, error_log=logger.error)
        self.assertEqual('DELETE', conn.method)
        self.assertEqual(err_ctx.exception.http_status, 500)
        self.assertIn('DELETE', err_ctx.exception.message)
        self.assertIn(quote('/%s/%s/%s/%s/%s'
                            % (self.node['device'], self.part, self.account,
                               self.container, self.obj)),
                      err_ctx.exception.message)
        self.assertIn(self.node['ip'], err_ctx.exception.message)
        self.assertIn(self.node['port'], err_ctx.exception.message)
        self.assertEqual(self.node['ip'], err_ctx.exception.http_host)
        self.assertEqual(self.node['port'], err_ctx.exception.http_port)
        self.assertEqual(self.node['device'], err_ctx.exception.http_device)
        self.assertEqual(500, err_ctx.exception.http_status)
        self.assertEqual([mock.call(1), mock.call(2)],
                         mock_sleep.call_args_list)
        error_lines = logger.get_lines_for_level('error')
        self.assertEqual(3, len(error_lines))
        for line in error_lines:
            self.assertIn('500 Internal Error', line)
Example #17
0
    def _get_container_listing(self, req, version, account, container, prefix, marker=""):
        con_req = make_subrequest(
            req.environ,
            path="/".join(["", version, account, container]),
            method="GET",
            headers={"x-auth-token": req.headers.get("x-auth-token")},
            agent=("%(orig)s " + "DLO MultipartGET"),
            swift_source="DLO",
        )
        con_req.query_string = "format=json&prefix=%s" % quote(prefix)
        if marker:
            con_req.query_string += "&marker=%s" % quote(marker)

        con_resp = con_req.get_response(self.dlo.app)
        if not is_success(con_resp.status_int):
            return con_resp, None
        return None, json.loads("".join(con_resp.app_iter))
Example #18
0
def _get_direct_account_container(path,
                                  stype,
                                  node,
                                  part,
                                  marker=None,
                                  limit=None,
                                  prefix=None,
                                  delimiter=None,
                                  conn_timeout=5,
                                  response_timeout=15):
    """Base class for get direct account and container.

    Do not use directly use the get_direct_account or
    get_direct_container instead.
    """
    qs = 'format=json'
    if marker:
        qs += '&marker=%s' % quote(marker)
    if limit:
        qs += '&limit=%d' % limit
    if prefix:
        qs += '&prefix=%s' % quote(prefix)
    if delimiter:
        qs += '&delimiter=%s' % quote(delimiter)
    with Timeout(conn_timeout):
        conn = http_connect(node['ip'],
                            node['port'],
                            node['device'],
                            part,
                            'GET',
                            path,
                            query_string=qs,
                            headers=gen_headers())
    with Timeout(response_timeout):
        resp = conn.getresponse()
    if not is_success(resp.status):
        resp.read()
        raise DirectClientException(stype, 'GET', node, part, path, resp)

    resp_headers = HeaderKeyDict()
    for header, value in resp.getheaders():
        resp_headers[header] = value
    if resp.status == HTTP_NO_CONTENT:
        resp.read()
        return resp_headers, []
    return resp_headers, json.loads(resp.read())
Example #19
0
    def _iter_items(self,
                    path,
                    marker='',
                    end_marker='',
                    prefix='',
                    acceptable_statuses=(2, HTTP_NOT_FOUND)):
        """
        Returns an iterator of items from a json listing.  Assumes listing has
        'name' key defined and uses markers.

        :param path: Path to do GET on.
        :param marker: Prefix of first desired item, defaults to ''.
        :param end_marker: Last item returned will be 'less' than this,
                           defaults to ''.
        :param prefix: Prefix of items
        :param acceptable_statuses: List of status for valid responses,
                                    defaults to (2, HTTP_NOT_FOUND).

        :raises UnexpectedResponse: Exception raised when requests fail
                                    to get a response with an acceptable status
        :raises Exception: Exception is raised when code fails in an
                           unexpected way.
        """
        if not isinstance(marker, bytes):
            marker = marker.encode('utf8')
        if not isinstance(end_marker, bytes):
            end_marker = end_marker.encode('utf8')
        if not isinstance(prefix, bytes):
            prefix = prefix.encode('utf8')

        while True:
            resp = self.make_request(
                'GET', '%s?format=json&marker=%s&end_marker=%s&prefix=%s' %
                (path, bytes_to_wsgi(quote(marker)),
                 bytes_to_wsgi(quote(end_marker)), bytes_to_wsgi(
                     quote(prefix))), {}, acceptable_statuses)
            if not resp.status_int == 200:
                if resp.status_int >= HTTP_MULTIPLE_CHOICES:
                    b''.join(resp.app_iter)
                break
            data = json.loads(resp.body)
            if not data:
                break
            for item in data:
                yield item
            marker = data[-1]['name'].encode('utf8')
Example #20
0
def _get_direct_account_container(path, stype, node, part,
                                  marker=None, limit=None,
                                  prefix=None, delimiter=None,
                                  conn_timeout=5, response_timeout=15,
                                  end_marker=None, reverse=None, headers=None):
    """Base class for get direct account and container.

    Do not use directly use the get_direct_account or
    get_direct_container instead.
    """
    if headers is None:
        headers = {}

    params = ['format=json']
    if marker:
        params.append('marker=%s' % quote(marker))
    if limit:
        params.append('limit=%d' % limit)
    if prefix:
        params.append('prefix=%s' % quote(prefix))
    if delimiter:
        params.append('delimiter=%s' % quote(delimiter))
    if end_marker:
        params.append('end_marker=%s' % quote(end_marker))
    if reverse:
        params.append('reverse=%s' % quote(reverse))
    qs = '&'.join(params)

    ip, port = get_ip_port(node, headers)
    with Timeout(conn_timeout):
        conn = http_connect(ip, port, node['device'], part,
                            'GET', path, query_string=qs,
                            headers=gen_headers(hdrs_in=headers))
    with Timeout(response_timeout):
        resp = conn.getresponse()
    if not is_success(resp.status):
        resp.read()
        raise DirectClientException(stype, 'GET', node, part, path, resp)

    resp_headers = HeaderKeyDict()
    for header, value in resp.getheaders():
        resp_headers[header] = value
    if resp.status == HTTP_NO_CONTENT:
        resp.read()
        return resp_headers, []
    return resp_headers, json.loads(resp.read())
Example #21
0
    def base_request(self,
                     method,
                     container=None,
                     name=None,
                     prefix=None,
                     headers=None,
                     proxy=None,
                     contents=None,
                     full_listing=None):
        # Common request method
        url = self.url

        if headers is None:
            headers = {}

        if self.token:
            headers['X-Auth-Token'] = self.token

        if container:
            url = '%s/%s' % (url.rstrip('/'), quote(container))

        if name:
            url = '%s/%s' % (url.rstrip('/'), quote(name))

        url += '?format=json'

        if prefix:
            url += '&prefix=%s' % prefix

        if proxy:
            proxy = urlparse.urlparse(proxy)
            proxy = urllib2.ProxyHandler({proxy.scheme: proxy.netloc})
            opener = urllib2.build_opener(proxy)
            urllib2.install_opener(opener)

        req = urllib2.Request(url, headers=headers, data=contents)
        req.get_method = lambda: method
        urllib2.urlopen(req)
        conn = urllib2.urlopen(req)
        body = conn.read()
        try:
            body_data = json.loads(body)
        except ValueError:
            body_data = None
        return [None, body_data]
Example #22
0
 def __init__(self, stype, method, node, part, path, resp):
     full_path = quote('/%s/%s%s' % (node['device'], part, path))
     msg = '%s server %s:%s direct %s %r gave status %s' % (
         stype, node['ip'], node['port'], method, full_path, resp.status)
     headers = HeaderKeyDict(resp.getheaders())
     super(DirectClientException, self).__init__(
         msg, http_host=node['ip'], http_port=node['port'],
         http_device=node['device'], http_status=resp.status,
         http_reason=resp.reason, http_headers=headers)
Example #23
0
 def __init__(self, app, conf):
     self.app = app
     self.conf = conf
     self.logger = get_logger(conf, log_route='tempauth')
     self.log_headers = config_true_value(conf.get('log_headers', 'f'))
     self.reseller_prefixes, self.account_rules = \
         config_read_reseller_options(conf, dict(require_group=''))
     self.reseller_prefix = self.reseller_prefixes[0]
     self.logger.set_statsd_prefix(
         'tempauth.%s' %
         (self.reseller_prefix if self.reseller_prefix else 'NONE', ))
     self.auth_prefix = conf.get('auth_prefix', '/auth/')
     if not self.auth_prefix or not self.auth_prefix.strip('/'):
         self.logger.warning('Rewriting invalid auth prefix "%s" to '
                             '"/auth/" (Non-empty auth prefix path '
                             'is required)' % self.auth_prefix)
         self.auth_prefix = '/auth/'
     if not self.auth_prefix.startswith('/'):
         self.auth_prefix = '/' + self.auth_prefix
     if not self.auth_prefix.endswith('/'):
         self.auth_prefix += '/'
     self.token_life = int(conf.get('token_life', DEFAULT_TOKEN_LIFE))
     self.allow_overrides = config_true_value(
         conf.get('allow_overrides', 't'))
     self.storage_url_scheme = conf.get('storage_url_scheme', 'default')
     self.users = {}
     for conf_key in conf:
         if conf_key.startswith(('user_', 'user64_')):
             try:
                 account, username = conf_key.split('_', 1)[1].split('_')
             except ValueError:
                 raise ValueError("key %s was provided in an "
                                  "invalid format" % conf_key)
             if conf_key.startswith('user64_'):
                 # Because trailing equal signs would screw up config file
                 # parsing, we auto-pad with '=' chars.
                 account += '=' * (len(account) % 4)
                 account = base64.b64decode(account)
                 username += '=' * (len(username) % 4)
                 username = base64.b64decode(username)
                 if not six.PY2:
                     account = account.decode('utf8')
                     username = username.decode('utf8')
             values = conf[conf_key].split()
             if not values:
                 raise ValueError('%s has no key set' % conf_key)
             key = values.pop(0)
             if values and ('://' in values[-1] or '$HOST' in values[-1]):
                 url = values.pop()
             else:
                 url = '$HOST/v1/%s%s' % (self.reseller_prefix,
                                          quote(account))
             self.users[account + ':' + username] = {
                 'key': key,
                 'url': url,
                 'groups': values
             }
Example #24
0
 def __init__(self, stype, method, node, part, path, resp):
     full_path = quote('/%s/%s%s' % (node['device'], part, path))
     msg = '%s server %s:%s direct %s %r gave status %s' % (
         stype, node['ip'], node['port'], method, full_path, resp.status)
     headers = HeaderKeyDict(resp.getheaders())
     super(DirectClientException, self).__init__(
         msg, http_host=node['ip'], http_port=node['port'],
         http_device=node['device'], http_status=resp.status,
         http_reason=resp.reason, http_headers=headers)
Example #25
0
    def handle_object_put(self, req, start_response, sync_profile,
                          per_account):

        status, headers, app_iter = req.call_application(self.app)

        if not status.startswith('404 '):
            status, headers, app_iter = req.call_application(self.app)
            start_response(status, headers)
            return app_iter

        provider = create_provider(sync_profile,
                                   max_conns=1,
                                   per_account=per_account)
        headers = {}
        if sync_profile.get('protocol') == 'swift':
            try:
                headers = get_container_headers(provider)
            except RemoteHTTPError as e:
                self.logger.warning(
                    'Failed to query the remote container (%d): %s' %
                    (e.resp.status, e.resp.body))
                status, headers, app_iter = req.call_application(self.app)
                start_response(status, headers)
                return app_iter

        vers, acct, cont, _ = req.split_path(4, 4, True)
        container_path = '/%s' % '/'.join(
            [vers, utils.quote(acct),
             utils.quote(cont)])
        put_container_req = make_subrequest(req.environ,
                                            method='PUT',
                                            path=container_path,
                                            headers=headers,
                                            swift_source='CloudSync Shunt')
        put_container_req.environ['swift_owner'] = True
        status, headers, body = put_container_req.call_application(self.app)
        utils.close_if_possible(body)

        if int(status.split()[0]) // 100 != 2:
            self.logger.warning('Failed to create container: %s' % status)

        status, headers, app_iter = req.call_application(self.app)
        start_response(status, headers)
        return app_iter
Example #26
0
def validate_device_partition(device, partition):
    """
    Validate that a device and a partition are valid and won't lead to
    directory traversal when used.

    :param device: device to validate
    :param partition: partition to validate
    :raises: ValueError if given an invalid device or partition
    """
    invalid_device = False
    invalid_partition = False
    if not device or '/' in device or device in ['.', '..']:
        invalid_device = True
    if not partition or '/' in partition or partition in ['.', '..']:
        invalid_partition = True

    if invalid_device:
        raise ValueError('Invalid device: %s' % quote(device or ''))
    elif invalid_partition:
        raise ValueError('Invalid partition: %s' % quote(partition or ''))
Example #27
0
def validate_device_partition(device, partition):
    """
    Validate that a device and a partition are valid and won't lead to
    directory traversal when used.

    :param device: device to validate
    :param partition: partition to validate
    :raises: ValueError if given an invalid device or partition
    """
    invalid_device = False
    invalid_partition = False
    if not device or '/' in device or device in ['.', '..']:
        invalid_device = True
    if not partition or '/' in partition or partition in ['.', '..']:
        invalid_partition = True

    if invalid_device:
        raise ValueError('Invalid device: %s' % quote(device or ''))
    elif invalid_partition:
        raise ValueError('Invalid partition: %s' % quote(partition or ''))
Example #28
0
def _get_direct_account_container(path, stype, node, part,
                                  marker=None, limit=None,
                                  prefix=None, delimiter=None,
                                  conn_timeout=5, response_timeout=15,
                                  end_marker=None, reverse=None, headers=None):
    """Base class for get direct account and container.

    Do not use directly use the get_direct_account or
    get_direct_container instead.
    """
    params = ['format=json']
    if marker:
        params.append('marker=%s' % quote(marker))
    if limit:
        params.append('limit=%d' % limit)
    if prefix:
        params.append('prefix=%s' % quote(prefix))
    if delimiter:
        params.append('delimiter=%s' % quote(delimiter))
    if end_marker:
        params.append('end_marker=%s' % quote(end_marker))
    if reverse:
        params.append('reverse=%s' % quote(reverse))
    qs = '&'.join(params)
    with Timeout(conn_timeout):
        conn = http_connect(node['ip'], node['port'], node['device'], part,
                            'GET', path, query_string=qs,
                            headers=gen_headers(hdrs_in=headers))
    with Timeout(response_timeout):
        resp = conn.getresponse()
    if not is_success(resp.status):
        resp.read()
        raise DirectClientException(stype, 'GET', node, part, path, resp)

    resp_headers = HeaderKeyDict()
    for header, value in resp.getheaders():
        resp_headers[header] = value
    if resp.status == HTTP_NO_CONTENT:
        resp.read()
        return resp_headers, []
    return resp_headers, json.loads(resp.read())
Example #29
0
def _get_direct_account_container(path, stype, node, part,
                                  account, marker=None, limit=None,
                                  prefix=None, delimiter=None, conn_timeout=5,
                                  response_timeout=15):
    """Base class for get direct account and container.

    Do not use directly use the get_direct_account or
    get_direct_container instead.
    """
    qs = 'format=json'
    if marker:
        qs += '&marker=%s' % quote(marker)
    if limit:
        qs += '&limit=%d' % limit
    if prefix:
        qs += '&prefix=%s' % quote(prefix)
    if delimiter:
        qs += '&delimiter=%s' % quote(delimiter)
    with Timeout(conn_timeout):
        conn = http_connect(node['ip'], node['port'], node['device'], part,
                            'GET', path, query_string=qs,
                            headers=gen_headers())
    with Timeout(response_timeout):
        resp = conn.getresponse()
    if not is_success(resp.status):
        resp.read()
        raise ClientException(
            '%s server %s:%s direct GET %s gave stats %s' %
            (stype, node['ip'], node['port'],
             repr('/%s/%s%s' % (node['device'], part, path)),
             resp.status),
            http_host=node['ip'], http_port=node['port'],
            http_device=node['device'], http_status=resp.status,
            http_reason=resp.reason)
    resp_headers = {}
    for header, value in resp.getheaders():
        resp_headers[header.lower()] = value
    if resp.status == HTTP_NO_CONTENT:
        resp.read()
        return resp_headers, []
    return resp_headers, json_loads(resp.read())
Example #30
0
 def __init__(self, stype, method, node, part, path, resp, host=None):
     # host can be used to override the node ip and port reported in
     # the exception
     host = host if host is not None else node
     full_path = quote('/%s/%s%s' % (node['device'], part, path))
     msg = '%s server %s:%s direct %s %r gave status %s' % (
         stype, host['ip'], host['port'], method, full_path, resp.status)
     headers = HeaderKeyDict(resp.getheaders())
     super(DirectClientException, self).__init__(
         msg, http_host=host['ip'], http_port=host['port'],
         http_device=node['device'], http_status=resp.status,
         http_reason=resp.reason, http_headers=headers)
Example #31
0
    def _test_listing_direct(self, anonymous, listings_css):
        objects = self.env.objects
        host = self.env.account.conn.storage_netloc
        path = '%s/%s/' % (self.env.account.conn.storage_path,
                           quote(self.env.container.name))
        css = objects['listings_css'].name if listings_css else None
        self._test_listing(host, path, anonymous=True, css=css,
                           links=[objects['index'].name,
                                  objects['dir/'].name + '/'],
                           notins=[objects['dir/obj'].name])

        path = '%s/%s/%s/' % (self.env.account.conn.storage_path,
                              quote(self.env.container.name),
                              quote(objects['dir/'].name))
        css = '../%s' % objects['listings_css'].name if listings_css else None
        self._test_listing(
            host, path, anonymous=anonymous, css=css,
            links=[objects['dir/obj'].name.split('/')[-1],
                   objects['dir/some sub%dir/'].name.split('/')[-1] + '/'],
            notins=[objects['index'].name,
                    objects['dir/some sub%dir/obj'].name])
Example #32
0
    def _test_listing_direct(self, anonymous, listings_css):
        objects = self.env.objects
        host = self.env.account.conn.storage_netloc
        path = '%s/%s/' % (self.env.account.conn.storage_path,
                           quote(self.env.container.name))
        css = objects['listings_css'].name if listings_css else None
        self._test_listing(host, path, anonymous=True, css=css,
                           links=[objects['index'].name,
                                  objects['dir/'].name + '/'],
                           notins=[objects['dir/obj'].name])

        path = '%s/%s/%s/' % (self.env.account.conn.storage_path,
                              quote(self.env.container.name),
                              quote(objects['dir/'].name))
        css = '../%s' % objects['listings_css'].name if listings_css else None
        self._test_listing(
            host, path, anonymous=anonymous, css=css,
            links=[objects['dir/obj'].name.split('/')[-1],
                   objects['dir/some sub%dir/'].name.split('/')[-1] + '/'],
            notins=[objects['index'].name,
                    objects['dir/some sub%dir/obj'].name])
Example #33
0
    def _get_container_listing(self,
                               req,
                               version,
                               account,
                               container,
                               prefix,
                               marker=''):
        con_req = make_subrequest(
            req.environ,
            path='/'.join(['', version, account, container]),
            method='GET',
            headers={'x-auth-token': req.headers.get('x-auth-token')},
            agent=('%(orig)s ' + 'DLO MultipartGET'),
            swift_source='DLO')
        con_req.query_string = 'format=json&prefix=%s' % quote(prefix)
        if marker:
            con_req.query_string += '&marker=%s' % quote(marker)

        con_resp = con_req.get_response(self.dlo.app)
        if not is_success(con_resp.status_int):
            return con_resp, None
        return None, json.loads(''.join(con_resp.app_iter))
Example #34
0
    def make_path(self, account, container=None, obj=None):
        """
        Returns a swift path for a request quoting and utf-8 encoding the path
        parts as need be.

        :param account: swift account
        :param container: container, defaults to None
        :param obj: object, defaults to None

        :raises ValueError: Is raised if obj is specified and container is
                            not.
        """

        path = '/v1/%s' % quote(account)
        if container:
            path += '/%s' % quote(container)

            if obj:
                path += '/%s' % quote(obj)
        elif obj:
            raise ValueError('Object specified without container')
        return path
Example #35
0
    def make_path(self, account, container=None, obj=None):
        """
        Returns a swift path for a request quoting and utf-8 encoding the path
        parts as need be.

        :param account: swift account
        :param container: container, defaults to None
        :param obj: object, defaults to None

        :raises ValueError: Is raised if obj is specified and container is
                            not.
        """

        path = '/v1/%s' % quote(account)
        if container:
            path += '/%s' % quote(container)

            if obj:
                path += '/%s' % quote(obj)
        elif obj:
            raise ValueError('Object specified without container')
        return path
Example #36
0
    def _get_container_listing(self,
                               req,
                               version,
                               account,
                               container,
                               prefix,
                               marker=''):
        '''
        :param version: whatever
        :param account: native
        :param container: native
        :param prefix: native
        :param marker: native
        '''
        con_req = make_subrequest(
            req.environ,
            path=wsgi_quote('/'.join([
                '',
                str_to_wsgi(version),
                str_to_wsgi(account),
                str_to_wsgi(container)
            ])),
            method='GET',
            headers={'x-auth-token': req.headers.get('x-auth-token')},
            agent=('%(orig)s ' + 'DLO MultipartGET'),
            swift_source='DLO')
        con_req.query_string = 'prefix=%s' % quote(prefix)
        if marker:
            con_req.query_string += '&marker=%s' % quote(marker)

        con_resp = con_req.get_response(self.dlo.app)
        if not is_success(con_resp.status_int):
            if req.method == 'HEAD':
                con_resp.body = b''
            return con_resp, None
        with closing_if_possible(con_resp.app_iter):
            return None, json.loads(b''.join(con_resp.app_iter))
Example #37
0
 def __init__(self, app, conf):
     self.app = app
     self.conf = conf
     self.logger = get_logger(conf, log_route='tempauth')
     self.log_headers = config_true_value(conf.get('log_headers', 'f'))
     self.reseller_prefixes, self.account_rules = \
         config_read_reseller_options(conf, dict(require_group=''))
     self.reseller_prefix = self.reseller_prefixes[0]
     self.logger.set_statsd_prefix('tempauth.%s' % (
         self.reseller_prefix if self.reseller_prefix else 'NONE',))
     self.auth_prefix = conf.get('auth_prefix', '/auth/')
     if not self.auth_prefix or not self.auth_prefix.strip('/'):
         self.logger.warning('Rewriting invalid auth prefix "%s" to '
                             '"/auth/" (Non-empty auth prefix path '
                             'is required)' % self.auth_prefix)
         self.auth_prefix = '/auth/'
     if not self.auth_prefix.startswith('/'):
         self.auth_prefix = '/' + self.auth_prefix
     if not self.auth_prefix.endswith('/'):
         self.auth_prefix += '/'
     self.token_life = int(conf.get('token_life', DEFAULT_TOKEN_LIFE))
     self.allow_overrides = config_true_value(
         conf.get('allow_overrides', 't'))
     self.storage_url_scheme = conf.get('storage_url_scheme', 'default')
     self.users = {}
     for conf_key in conf:
         if conf_key.startswith(('user_', 'user64_')):
             account, username = conf_key.split('_', 1)[1].split('_')
             if conf_key.startswith('user64_'):
                 # Because trailing equal signs would screw up config file
                 # parsing, we auto-pad with '=' chars.
                 account += '=' * (len(account) % 4)
                 account = base64.b64decode(account)
                 username += '=' * (len(username) % 4)
                 username = base64.b64decode(username)
                 if not six.PY2:
                     account = account.decode('utf8')
                     username = username.decode('utf8')
             values = conf[conf_key].split()
             if not values:
                 raise ValueError('%s has no key set' % conf_key)
             key = values.pop(0)
             if values and ('://' in values[-1] or '$HOST' in values[-1]):
                 url = values.pop()
             else:
                 url = '$HOST/v1/%s%s' % (
                     self.reseller_prefix, quote(account))
             self.users[account + ':' + username] = {
                 'key': key, 'url': url, 'groups': values}
Example #38
0
 def _create_req_container(self,
                           req,
                           headers,
                           migration=False,
                           storage_policy=None):
     vers, acct, cont, _ = req.split_path(3, 4, True)
     container_path = '/%s' % '/'.join(
         [vers, utils.quote(acct),
          utils.quote(cont)])
     headers = dict(headers)
     if migration:
         headers[get_sys_migrator_header('container')] =\
             MigrationContainerStates.MIGRATING
     if storage_policy:
         headers['X-Storage-Policy'] = storage_policy
     put_container_req = make_subrequest(req.environ,
                                         method='PUT',
                                         path=container_path,
                                         headers=headers,
                                         swift_source='1space-shunt')
     put_container_req.environ['swift_owner'] = True
     status, headers, body = put_container_req.call_application(self.app)
     utils.close_if_possible(body)
     return status, headers
Example #39
0
 def _test_listing(self,
                   host,
                   path,
                   title=None,
                   links=[],
                   notins=[],
                   css=None,
                   anonymous=False):
     self._set_staticweb_headers(listings=True,
                                 listings_css=(css is not None))
     if title is None:
         title = unquote(path)
     expected_in = ['Listing of %s' % title] + [
         '<a href="{0}">{1}</a>'.format(quote(link), link) for link in links
     ]
     expected_not_in = notins
     if css:
         expected_in.append('<link rel="stylesheet" type="text/css" '
                            'href="%s" />' % quote(css))
     self._test_get_path(host,
                         path,
                         anonymous=anonymous,
                         expected_in=expected_in,
                         expected_not_in=expected_not_in)
Example #40
0
 def _test_direct_get_suffix_hashes_fail(self, status_code):
     with mocked_http_conn(status_code):
         with self.assertRaises(DirectClientException) as cm:
             direct_client.direct_get_suffix_hashes(
                 self.node, self.part, ['a83', 'b52'])
     self.assertIn('REPLICATE', cm.exception.message)
     self.assertIn(quote('/%s/%s/a83-b52'
                         % (self.node['device'], self.part)),
                   cm.exception.message)
     self.assertIn(self.node['replication_ip'], cm.exception.message)
     self.assertIn(self.node['replication_port'], cm.exception.message)
     self.assertEqual(self.node['replication_ip'], cm.exception.http_host)
     self.assertEqual(self.node['replication_port'], cm.exception.http_port)
     self.assertEqual(self.node['device'], cm.exception.http_device)
     self.assertEqual(status_code, cm.exception.http_status)
Example #41
0
 def build_traversal_req(symlink_target):
     """
     :returns: new request for target path if it's symlink otherwise
               None
     """
     version, account, _junk = req.split_path(2, 3, True)
     account = self._response_header_value(
         TGT_ACCT_SYSMETA_SYMLINK_HDR) or quote(account)
     target_path = os.path.join('/', version, account,
                                symlink_target.lstrip('/'))
     self._last_target_path = target_path
     new_req = make_subrequest(req.environ,
                               path=target_path,
                               method=req.method,
                               headers=req.headers,
                               swift_source='SYM')
     new_req.headers.pop('X-Backend-Storage-Policy-Index', None)
     return new_req
Example #42
0
    def handle_post(self, req):
        """
        Handle post request. If POSTing to a symlink, a HTTPTemporaryRedirect
        error message is returned to client.

        Clients that POST to symlinks should understand that the POST is not
        redirected to the target object like in a HEAD/GET request. POSTs to a
        symlink will be handled just like a normal object by the object server.
        It cannot reject it because it may not have symlink state when the POST
        lands.  The object server has no knowledge of what is a symlink object
        is. On the other hand, on POST requests, the object server returns all
        sysmeta of the object. This method uses that sysmeta to determine if
        the stored object is a symlink or not.

        :param req: HTTP POST object request
        :raises: HTTPTemporaryRedirect if POSTing to a symlink.
        :returns: Response Iterator
        """
        if TGT_OBJ_SYMLINK_HDR in req.headers:
            raise HTTPBadRequest(
                body='A PUT request is required to set a symlink target',
                request=req,
                content_type='text/plain')

        resp = self._app_call(req.environ)
        if not is_success(self._get_status_int()):
            return resp

        tgt_co = self._response_header_value(TGT_OBJ_SYSMETA_SYMLINK_HDR)
        if tgt_co:
            version, account, _junk = req.split_path(2, 3, True)
            target_acc = self._response_header_value(
                TGT_ACCT_SYSMETA_SYMLINK_HDR) or quote(account)
            location_hdr = os.path.join('/', version, target_acc, tgt_co)
            req.environ['swift.leave_relative_location'] = True
            errmsg = 'The requested POST was applied to a symlink. POST ' +\
                     'directly to the target to apply requested metadata.'
            raise HTTPTemporaryRedirect(body=errmsg,
                                        headers={'location': location_hdr})
        else:
            return resp
Example #43
0
    def _listing(self, env, start_response, prefix=None):
        """
        Sends an HTML object listing to the remote client.

        :param env: The original WSGI environment dict.
        :param start_response: The original WSGI start_response hook.
        :param prefix: Any prefix desired for the container listing.
        """
        if not config_true_value(self._listings):
            body = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 ' \
                'Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n' \
                '<html>\n' \
                '<head>\n' \
                '<title>Listing of %s</title>\n' % cgi.escape(env['PATH_INFO'])
            if self._listings_css:
                body += '  <link rel="stylesheet" type="text/css" ' \
                    'href="%s" />\n' % self._build_css_path(prefix or '')
            else:
                body += '  <style type="text/css">\n' \
                    '   h1 {font-size: 1em; font-weight: bold;}\n' \
                    '   p {font-size: 2}\n' \
                    '  </style>\n'
            body += '</head>\n<body>' \
                '  <h1>Web Listing Disabled</h1>' \
                '   <p>The owner of this web site has disabled web listing.' \
                '   <p>If you are the owner of this web site, you can enable' \
                '   web listing by setting X-Container-Meta-Web-Listings.</p>'
            if self._index:
                body += '<h1>Index File Not Found</h1>' \
                    ' <p>The owner of this web site has set ' \
                    ' <b>X-Container-Meta-Web-Index: %s</b>. ' \
                    ' However, this file is not found.</p>' % self._index
            body += ' </body>\n</html>\n'
            resp = HTTPNotFound(body=body)(env, self._start_response)
            return self._error_response(resp, env, start_response)
        tmp_env = make_env(env,
                           'GET',
                           '/%s/%s/%s' %
                           (self.version, self.account, self.container),
                           self.agent,
                           swift_source='SW')
        tmp_env['QUERY_STRING'] = 'delimiter=/&format=json'
        if prefix:
            tmp_env['QUERY_STRING'] += '&prefix=%s' % quote(prefix)
        else:
            prefix = ''
        resp = self._app_call(tmp_env)
        if not is_success(self._get_status_int()):
            return self._error_response(resp, env, start_response)
        listing = None
        body = ''.join(resp)
        if body:
            listing = json.loads(body)
        if not listing:
            resp = HTTPNotFound()(env, self._start_response)
            return self._error_response(resp, env, start_response)
        headers = {'Content-Type': 'text/html; charset=UTF-8'}
        body = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 ' \
               'Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n' \
               '<html>\n' \
               ' <head>\n' \
               '  <title>Listing of %s</title>\n' % \
               cgi.escape(env['PATH_INFO'])
        if self._listings_css:
            body += '  <link rel="stylesheet" type="text/css" ' \
                    'href="%s" />\n' % (self._build_css_path(prefix))
        else:
            body += '  <style type="text/css">\n' \
                    '   h1 {font-size: 1em; font-weight: bold;}\n' \
                    '   th {text-align: left; padding: 0px 1em 0px 1em;}\n' \
                    '   td {padding: 0px 1em 0px 1em;}\n' \
                    '   a {text-decoration: none;}\n' \
                    '  </style>\n'
        body += ' </head>\n' \
                ' <body>\n' \
                '  <h1 id="title">Listing of %s</h1>\n' \
                '  <table id="listing">\n' \
                '   <tr id="heading">\n' \
                '    <th class="colname">Name</th>\n' \
                '    <th class="colsize">Size</th>\n' \
                '    <th class="coldate">Date</th>\n' \
                '   </tr>\n' % \
                cgi.escape(env['PATH_INFO'])
        if prefix:
            body += '   <tr id="parent" class="item">\n' \
                    '    <td class="colname"><a href="../">../</a></td>\n' \
                    '    <td class="colsize">&nbsp;</td>\n' \
                    '    <td class="coldate">&nbsp;</td>\n' \
                    '   </tr>\n'
        for item in listing:
            if 'subdir' in item:
                subdir = item['subdir'].encode("utf-8")
                if prefix:
                    subdir = subdir[len(prefix):]
                body += '   <tr class="item subdir">\n' \
                        '    <td class="colname"><a href="%s">%s</a></td>\n' \
                        '    <td class="colsize">&nbsp;</td>\n' \
                        '    <td class="coldate">&nbsp;</td>\n' \
                        '   </tr>\n' % \
                        (quote(subdir), cgi.escape(subdir))
        for item in listing:
            if 'name' in item:
                name = item['name'].encode("utf-8")
                if prefix:
                    name = name[len(prefix):]
                content_type = item['content_type'].encode("utf-8")
                bytes = human_readable(item['bytes'])
                last_modified = (cgi.escape(item['last_modified'].encode(
                    "utf-8")).split('.')[0].replace('T', ' '))
                body += '   <tr class="item %s">\n' \
                        '    <td class="colname"><a href="%s">%s</a></td>\n' \
                        '    <td class="colsize">%s</td>\n' \
                        '    <td class="coldate">%s</td>\n' \
                        '   </tr>\n' % \
                        (' '.join('type-' + cgi.escape(t.lower(), quote=True)
                                  for t in content_type.split('/')),
                         quote(name), cgi.escape(name),
                         bytes, last_modified)
        body += '  </table>\n' \
                ' </body>\n' \
                '</html>\n'
        resp = Response(headers=headers, body=body)
        return resp(env, start_response)
Example #44
0
    def container_sync_row(self, row, sync_to, user_key, broker, info,
                           realm, realm_key):
        """
        Sends the update the row indicates to the sync_to container.

        :param row: The updated row in the local database triggering the sync
                    update.
        :param sync_to: The URL to the remote container.
        :param user_key: The X-Container-Sync-Key to use when sending requests
                         to the other container.
        :param broker: The local container database broker.
        :param info: The get_info result from the local container database
                     broker.
        :param realm: The realm from self.realms_conf, if there is one.
            If None, fallback to using the older allowed_sync_hosts
            way of syncing.
        :param realm_key: The realm key from self.realms_conf, if there
            is one. If None, fallback to using the older
            allowed_sync_hosts way of syncing.
        :returns: True on success
        """
        try:
            start_time = time()
            if row['deleted']:
                try:
                    headers = {'x-timestamp': row['created_at']}
                    if realm and realm_key:
                        nonce = uuid.uuid4().hex
                        path = urlparse(sync_to).path + '/' + quote(
                            row['name'])
                        sig = self.realms_conf.get_sig(
                            'DELETE', path, headers['x-timestamp'], nonce,
                            realm_key, user_key)
                        headers['x-container-sync-auth'] = '%s %s %s' % (
                            realm, nonce, sig)
                    else:
                        headers['x-container-sync-key'] = user_key
                    delete_object(sync_to, name=row['name'], headers=headers,
                                  proxy=self.select_http_proxy(),
                                  logger=self.logger,
                                  timeout=self.conn_timeout)
                except ClientException as err:
                    if err.http_status != HTTP_NOT_FOUND:
                        raise
                self.container_deletes += 1
                self.logger.increment('deletes')
                self.logger.timing_since('deletes.timing', start_time)
            else:
                part, nodes = \
                    self.get_object_ring(info['storage_policy_index']). \
                    get_nodes(info['account'], info['container'],
                              row['name'])
                shuffle(nodes)
                exc = None
                looking_for_timestamp = Timestamp(row['created_at'])
                timestamp = -1
                headers = body = None
                # look up for the newest one
                headers_out = {'X-Newest': True,
                               'X-Backend-Storage-Policy-Index':
                               str(info['storage_policy_index'])}
                try:
                    source_obj_status, source_obj_info, source_obj_iter = \
                        self.swift.get_object(info['account'],
                                              info['container'], row['name'],
                                              headers=headers_out,
                                              acceptable_statuses=(2, 4))

                except (Exception, UnexpectedResponse, Timeout) as err:
                    source_obj_info = {}
                    source_obj_iter = None
                    exc = err
                timestamp = Timestamp(source_obj_info.get(
                                      'x-timestamp', 0))
                headers = source_obj_info
                body = source_obj_iter
                if timestamp < looking_for_timestamp:
                    if exc:
                        raise exc
                    raise Exception(
                        _('Unknown exception trying to GET: '
                          '%(account)r %(container)r %(object)r'),
                        {'account': info['account'],
                         'container': info['container'],
                         'object': row['name']})
                for key in ('date', 'last-modified'):
                    if key in headers:
                        del headers[key]
                if 'etag' in headers:
                    headers['etag'] = headers['etag'].strip('"')
                if 'content-type' in headers:
                    headers['content-type'] = clean_content_type(
                        headers['content-type'])
                headers['x-timestamp'] = row['created_at']
                if realm and realm_key:
                    nonce = uuid.uuid4().hex
                    path = urlparse(sync_to).path + '/' + quote(row['name'])
                    sig = self.realms_conf.get_sig(
                        'PUT', path, headers['x-timestamp'], nonce, realm_key,
                        user_key)
                    headers['x-container-sync-auth'] = '%s %s %s' % (
                        realm, nonce, sig)
                else:
                    headers['x-container-sync-key'] = user_key
                put_object(sync_to, name=row['name'], headers=headers,
                           contents=FileLikeIter(body),
                           proxy=self.select_http_proxy(), logger=self.logger,
                           timeout=self.conn_timeout)
                self.container_puts += 1
                self.logger.increment('puts')
                self.logger.timing_since('puts.timing', start_time)
        except ClientException as err:
            if err.http_status == HTTP_UNAUTHORIZED:
                self.logger.info(
                    _('Unauth %(sync_from)r => %(sync_to)r'),
                    {'sync_from': '%s/%s' %
                        (quote(info['account']), quote(info['container'])),
                     'sync_to': sync_to})
            elif err.http_status == HTTP_NOT_FOUND:
                self.logger.info(
                    _('Not found %(sync_from)r => %(sync_to)r \
                      - object %(obj_name)r'),
                    {'sync_from': '%s/%s' %
                        (quote(info['account']), quote(info['container'])),
                     'sync_to': sync_to, 'obj_name': row['name']})
            else:
                self.logger.exception(
                    _('ERROR Syncing %(db_file)s %(row)s'),
                    {'db_file': str(broker), 'row': row})
            self.container_failures += 1
            self.logger.increment('failures')
            return False
        except (Exception, Timeout) as err:
            self.logger.exception(
                _('ERROR Syncing %(db_file)s %(row)s'),
                {'db_file': str(broker), 'row': row})
            self.container_failures += 1
            self.logger.increment('failures')
            return False
        return True
Example #45
0
def disposition_format(filename):
    return '''attachment; filename="%s"; filename*=UTF-8''%s''' % (quote(
        filename, safe=' /'), quote(filename))
Example #46
0
    def container_sync_row(self, row, sync_to, user_key, broker, info, realm,
                           realm_key):
        """
        Sends the update the row indicates to the sync_to container.
        Update can be either delete or put.

        :param row: The updated row in the local database triggering the sync
                    update.
        :param sync_to: The URL to the remote container.
        :param user_key: The X-Container-Sync-Key to use when sending requests
                         to the other container.
        :param broker: The local container database broker.
        :param info: The get_info result from the local container database
                     broker.
        :param realm: The realm from self.realms_conf, if there is one.
            If None, fallback to using the older allowed_sync_hosts
            way of syncing.
        :param realm_key: The realm key from self.realms_conf, if there
            is one. If None, fallback to using the older
            allowed_sync_hosts way of syncing.
        :returns: True on success
        """
        try:
            start_time = time()
            # extract last modified time from the created_at value
            ts_data, ts_ctype, ts_meta = decode_timestamps(row['created_at'])
            if row['deleted']:
                # when sync'ing a deleted object, use ts_data - this is the
                # timestamp of the source tombstone
                try:
                    headers = {'x-timestamp': ts_data.internal}
                    self._update_sync_to_headers(row['name'], sync_to,
                                                 user_key, realm, realm_key,
                                                 'DELETE', headers)
                    delete_object(sync_to,
                                  name=row['name'],
                                  headers=headers,
                                  proxy=self.select_http_proxy(),
                                  logger=self.logger,
                                  timeout=self.conn_timeout)
                except ClientException as err:
                    if err.http_status != HTTP_NOT_FOUND:
                        raise
                self.container_deletes += 1
                self.container_stats['deletes'] += 1
                self.logger.increment('deletes')
                self.logger.timing_since('deletes.timing', start_time)
            else:
                # when sync'ing a live object, use ts_meta - this is the time
                # at which the source object was last modified by a PUT or POST
                if self._object_in_remote_container(row['name'], sync_to,
                                                    user_key, realm, realm_key,
                                                    ts_meta):
                    return True
                exc = None
                # look up for the newest one; the symlink=get query-string has
                # no effect unless symlinks are enabled in the internal client
                # in which case it ensures that symlink objects retain their
                # symlink property when sync'd.
                headers_out = {
                    'X-Newest':
                    True,
                    'X-Backend-Storage-Policy-Index':
                    str(info['storage_policy_index'])
                }
                try:
                    source_obj_status, headers, body = \
                        self.swift.get_object(info['account'],
                                              info['container'], row['name'],
                                              headers=headers_out,
                                              acceptable_statuses=(2, 4),
                                              params={'symlink': 'get'})

                except (Exception, UnexpectedResponse, Timeout) as err:
                    headers = {}
                    body = None
                    exc = err
                timestamp = Timestamp(headers.get('x-timestamp', 0))
                if timestamp < ts_meta:
                    if exc:
                        raise exc
                    raise Exception(
                        _('Unknown exception trying to GET: '
                          '%(account)r %(container)r %(object)r'), {
                              'account': info['account'],
                              'container': info['container'],
                              'object': row['name']
                          })
                for key in ('date', 'last-modified'):
                    if key in headers:
                        del headers[key]
                if 'etag' in headers:
                    headers['etag'] = normalize_etag(headers['etag'])
                if 'content-type' in headers:
                    headers['content-type'] = clean_content_type(
                        headers['content-type'])
                self._update_sync_to_headers(row['name'], sync_to, user_key,
                                             realm, realm_key, 'PUT', headers)
                put_object(sync_to,
                           name=row['name'],
                           headers=headers,
                           contents=FileLikeIter(body),
                           proxy=self.select_http_proxy(),
                           logger=self.logger,
                           timeout=self.conn_timeout)
                self.container_puts += 1
                self.container_stats['puts'] += 1
                self.container_stats['bytes'] += row['size']
                self.logger.increment('puts')
                self.logger.timing_since('puts.timing', start_time)
        except ClientException as err:
            if err.http_status == HTTP_UNAUTHORIZED:
                self.logger.info(
                    _('Unauth %(sync_from)r => %(sync_to)r'), {
                        'sync_from':
                        '%s/%s' %
                        (quote(info['account']), quote(info['container'])),
                        'sync_to':
                        sync_to
                    })
            elif err.http_status == HTTP_NOT_FOUND:
                self.logger.info(
                    _('Not found %(sync_from)r => %(sync_to)r \
                      - object %(obj_name)r'), {
                        'sync_from':
                        '%s/%s' %
                        (quote(info['account']), quote(info['container'])),
                        'sync_to':
                        sync_to,
                        'obj_name':
                        row['name']
                    })
            else:
                self.logger.exception(_('ERROR Syncing %(db_file)s %(row)s'), {
                    'db_file': str(broker),
                    'row': row
                })
            self.container_failures += 1
            self.logger.increment('failures')
            return False
        except (Exception, Timeout) as err:
            self.logger.exception(_('ERROR Syncing %(db_file)s %(row)s'), {
                'db_file': str(broker),
                'row': row
            })
            self.container_failures += 1
            self.logger.increment('failures')
            return False
        return True
Example #47
0
def disposition_format(filename):
    return 'attachment; filename="%s"' % quote(filename, safe='/ ')
Example #48
0
    def base_request(self, method, container=None, name=None, prefix=None,
                     headers=None, proxy=None, contents=None,
                     full_listing=None, logger=None, additional_info=None,
                     timeout=None):
        # Common request method
        trans_start = time()
        url = self.url

        if headers is None:
            headers = {}

        if self.token:
            headers['X-Auth-Token'] = self.token

        if container:
            url = '%s/%s' % (url.rstrip('/'), quote(container))

        if name:
            url = '%s/%s' % (url.rstrip('/'), quote(name))
        else:
            url += '?format=json'
            if prefix:
                url += '&prefix=%s' % prefix

        req = urllib2.Request(url, headers=headers, data=contents)
        if proxy:
            proxy = urllib.parse.urlparse(proxy)
            req.set_proxy(proxy.netloc, proxy.scheme)
        req.get_method = lambda: method
        conn = urllib2.urlopen(req, timeout=timeout)
        body = conn.read()
        try:
            body_data = json.loads(body)
        except ValueError:
            body_data = None
        trans_stop = time()
        if logger:
            sent_content_length = 0
            for n, v in headers.items():
                nl = n.lower()
                if nl == 'content-length':
                    try:
                        sent_content_length = int(v)
                        break
                    except ValueError:
                        pass
            logger.debug("-> " + " ".join(
                quote(str(x) if x else "-", ":/")
                for x in (
                    strftime('%Y-%m-%dT%H:%M:%S', gmtime(trans_stop)),
                    method,
                    url,
                    conn.getcode(),
                    sent_content_length,
                    conn.info()['content-length'],
                    trans_start,
                    trans_stop,
                    trans_stop - trans_start,
                    additional_info
                )))
        return [None, body_data]
Example #49
0
    def _coalesce_requests(self):
        pending_req = pending_etag = pending_size = None
        try:
            for seg_dict in self.listing_iter:
                if 'raw_data' in seg_dict:
                    if pending_req:
                        yield pending_req, pending_etag, pending_size

                    to_yield = seg_dict['raw_data'][
                        seg_dict['first_byte']:seg_dict['last_byte'] + 1]
                    yield to_yield, None, len(seg_dict['raw_data'])
                    pending_req = pending_etag = pending_size = None
                    continue

                seg_path, seg_etag, seg_size, first_byte, last_byte = (
                    seg_dict['path'], seg_dict.get('hash'),
                    seg_dict.get('bytes'), seg_dict['first_byte'],
                    seg_dict['last_byte'])
                if seg_size is not None:
                    seg_size = int(seg_size)
                first_byte = first_byte or 0
                go_to_end = last_byte is None or (seg_size is not None and
                                                  last_byte == seg_size - 1)
                # The "multipart-manifest=get" query param ensures that the
                # segment is a plain old object, not some flavor of large
                # object; therefore, its etag is its MD5sum and hence we can
                # check it.
                path = quote(seg_path) + '?multipart-manifest=get'
                seg_req = make_subrequest(
                    self.req.environ,
                    path=path,
                    method='GET',
                    headers={
                        'x-auth-token': self.req.headers.get('x-auth-token')
                    },
                    agent=('%(orig)s ' + self.ua_suffix),
                    swift_source=self.swift_source)

                seg_req_rangeval = None
                if first_byte != 0 or not go_to_end:
                    seg_req_rangeval = "%s-%s" % (first_byte, ''
                                                  if go_to_end else last_byte)
                    seg_req.headers['Range'] = "bytes=" + seg_req_rangeval

                # We can only coalesce if paths match and we know the segment
                # size (so we can check that the ranges will be allowed)
                if pending_req and pending_req.path == seg_req.path and \
                        seg_size is not None:

                    # Make a new Range object so that we don't goof up the
                    # existing one in case of invalid ranges. Note that a
                    # range set with too many individual byteranges is
                    # invalid, so we can combine N valid byteranges and 1
                    # valid byterange and get an invalid range set.
                    if pending_req.range:
                        new_range_str = str(pending_req.range)
                    else:
                        new_range_str = "bytes=0-%d" % (seg_size - 1)

                    if seg_req.range:
                        new_range_str += "," + seg_req_rangeval
                    else:
                        new_range_str += ",0-%d" % (seg_size - 1)

                    if Range(new_range_str).ranges_for_length(seg_size):
                        # Good news! We can coalesce the requests
                        pending_req.headers['Range'] = new_range_str
                        continue
                    # else, Too many ranges, or too much backtracking, or ...

                if pending_req:
                    yield pending_req, pending_etag, pending_size
                pending_req = seg_req
                pending_etag = seg_etag
                pending_size = seg_size

        except ListingIterError:
            e_type, e_value, e_traceback = sys.exc_info()
            if pending_req:
                yield pending_req, pending_etag, pending_size
            six.reraise(e_type, e_value, e_traceback)

        if pending_req:
            yield pending_req, pending_etag, pending_size
Example #50
0
    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge(
                "Manifest File > %d bytes" % self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_and_validate_input(
            req.body_file.read(self.max_manifest_size),
            req.path, self.min_segment_size)
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of segments must be <= %d' %
                self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        slo_etag = md5()
        last_obj_path = None
        for index, seg_dict in enumerate(parsed_data):
            obj_name = seg_dict['path']
            if isinstance(obj_name, six.text_type):
                obj_name = obj_name.encode('utf-8')
            obj_path = '/'.join(['', vrs, account, obj_name.lstrip('/')])

            new_env = req.environ.copy()
            new_env['PATH_INFO'] = obj_path
            new_env['REQUEST_METHOD'] = 'HEAD'
            new_env['swift.source'] = 'SLO'
            del(new_env['wsgi.input'])
            del(new_env['QUERY_STRING'])
            new_env['CONTENT_LENGTH'] = 0
            new_env['HTTP_USER_AGENT'] = \
                '%s MultipartPUT' % req.environ.get('HTTP_USER_AGENT')
            if obj_path != last_obj_path:
                last_obj_path = obj_path
                head_seg_resp = \
                    Request.blank(obj_path, new_env).get_response(self)

            if head_seg_resp.is_success:
                segment_length = head_seg_resp.content_length
                if seg_dict.get('range'):
                    # Since we now know the length, we can normalize the
                    # range. We know that there is exactly one range
                    # requested since we checked that earlier in
                    # parse_and_validate_input().
                    ranges = seg_dict['range'].ranges_for_length(
                        head_seg_resp.content_length)

                    if not ranges:
                        problem_segments.append([quote(obj_name),
                                                 'Unsatisfiable Range'])
                    elif ranges == [(0, head_seg_resp.content_length)]:
                        # Just one range, and it exactly matches the object.
                        # Why'd we do this again?
                        del seg_dict['range']
                        segment_length = head_seg_resp.content_length
                    else:
                        rng = ranges[0]
                        seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1)
                        segment_length = rng[1] - rng[0]

                if segment_length < self.min_segment_size and \
                        index < len(parsed_data) - 1:
                    problem_segments.append(
                        [quote(obj_name),
                         'Too small; each segment, except the last, must be '
                         'at least %d bytes.' % self.min_segment_size])
                total_size += segment_length
                if seg_dict['size_bytes'] is not None and \
                        seg_dict['size_bytes'] != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_name), 'Size Mismatch'])
                if seg_dict['etag'] is None or \
                        seg_dict['etag'] == head_seg_resp.etag:
                    if seg_dict.get('range'):
                        slo_etag.update('%s:%s;' % (head_seg_resp.etag,
                                                    seg_dict['range']))
                    else:
                        slo_etag.update(head_seg_resp.etag)
                else:
                    problem_segments.append([quote(obj_name), 'Etag Mismatch'])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = \
                    last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
                seg_data = {'name': '/' + seg_dict['path'].lstrip('/'),
                            'bytes': head_seg_resp.content_length,
                            'hash': head_seg_resp.etag,
                            'content_type': head_seg_resp.content_type,
                            'last_modified': last_modified_formatted}
                if seg_dict.get('range'):
                    seg_data['range'] = seg_dict['range']

                if config_true_value(
                        head_seg_resp.headers.get('X-Static-Large-Object')):
                    seg_data['sub_slo'] = True
                data_for_storage.append(seg_data)

            else:
                problem_segments.append([quote(obj_name),
                                         head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(
                out_content_type, {}, problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overridden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size
        env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True'
        json_data = json.dumps(data_for_storage)
        if six.PY3:
            json_data = json_data.encode('utf-8')
        env['CONTENT_LENGTH'] = str(len(json_data))
        env['wsgi.input'] = BytesIO(json_data)

        slo_put_context = SloPutContext(self, slo_etag)
        return slo_put_context.handle_slo_put(req, start_response)
Example #51
0
def parse_and_validate_input(req_body, req_path, min_segment_size):
    """
    Given a request body, parses it and returns a list of dictionaries.

    The output structure is nearly the same as the input structure, but it
    is not an exact copy. Given a valid input dictionary `d_in`, its
    corresponding output dictionary `d_out` will be as follows:

      * d_out['etag'] == d_in['etag']

      * d_out['path'] == d_in['path']

      * d_in['size_bytes'] can be a string ("12") or an integer (12), but
        d_out['size_bytes'] is an integer.

      * (optional) d_in['range'] is a string of the form "M-N", "M-", or
        "-N", where M and N are non-negative integers. d_out['range'] is the
        corresponding swob.Range object. If d_in does not have a key
        'range', neither will d_out.

    :raises: HTTPException on parse errors or semantic errors (e.g. bogus
        JSON structure, syntactically invalid ranges)

    :returns: a list of dictionaries on success
    """
    try:
        parsed_data = json.loads(req_body)
    except ValueError:
        raise HTTPBadRequest("Manifest must be valid JSON.\n")

    if not isinstance(parsed_data, list):
        raise HTTPBadRequest("Manifest must be a list.\n")

    # If we got here, req_path refers to an object, so this won't ever raise
    # ValueError.
    vrs, account, _junk = split_path(req_path, 3, 3, True)

    errors = []
    num_segs = len(parsed_data)
    for seg_index, seg_dict in enumerate(parsed_data):
        if not isinstance(seg_dict, dict):
            errors.append("Index %d: not a JSON object" % seg_index)
            continue

        missing_keys = [k for k in REQUIRED_SLO_KEYS if k not in seg_dict]
        if missing_keys:
            errors.append(
                "Index %d: missing keys %s"
                % (seg_index,
                   ", ".join('"%s"' % (mk,) for mk in sorted(missing_keys))))
            continue

        extraneous_keys = [k for k in seg_dict if k not in ALLOWED_SLO_KEYS]
        if extraneous_keys:
            errors.append(
                "Index %d: extraneous keys %s"
                % (seg_index,
                   ", ".join('"%s"' % (ek,)
                             for ek in sorted(extraneous_keys))))
            continue

        if not isinstance(seg_dict['path'], basestring):
            errors.append("Index %d: \"path\" must be a string" % seg_index)
            continue
        if not (seg_dict['etag'] is None or
                isinstance(seg_dict['etag'], basestring)):
            errors.append(
                "Index %d: \"etag\" must be a string or null" % seg_index)
            continue

        if '/' not in seg_dict['path'].strip('/'):
            errors.append(
                "Index %d: path does not refer to an object. Path must be of "
                "the form /container/object." % seg_index)
            continue

        seg_size = seg_dict['size_bytes']
        if seg_size is not None:
            try:
                seg_size = int(seg_size)
                seg_dict['size_bytes'] = seg_size
            except (TypeError, ValueError):
                errors.append("Index %d: invalid size_bytes" % seg_index)
                continue
            if (seg_size < min_segment_size and seg_index < num_segs - 1):
                errors.append("Index %d: too small; each segment, except "
                              "the last, must be at least %d bytes."
                              % (seg_index, min_segment_size))
                continue

        obj_path = '/'.join(['', vrs, account, seg_dict['path'].lstrip('/')])
        if req_path == quote(obj_path):
            errors.append(
                "Index %d: manifest must not include itself as a segment"
                % seg_index)
            continue

        if seg_dict.get('range'):
            try:
                seg_dict['range'] = Range('bytes=%s' % seg_dict['range'])
            except ValueError:
                errors.append("Index %d: invalid range" % seg_index)
                continue

            if len(seg_dict['range'].ranges) > 1:
                errors.append("Index %d: multiple ranges (only one allowed)"
                              % seg_index)
                continue

            # If the user *told* us the object's size, we can check range
            # satisfiability right now. If they lied about the size, we'll
            # fail that validation later.
            if (seg_size is not None and
                    len(seg_dict['range'].ranges_for_length(seg_size)) != 1):
                errors.append("Index %d: unsatisfiable range" % seg_index)
                continue

    if errors:
        error_message = "".join(e + "\n" for e in errors)
        raise HTTPBadRequest(error_message,
                             headers={"Content-Type": "text/plain"})

    return parsed_data
Example #52
0
def parse_and_validate_input(req_body, req_path):
    """
    Given a request body, parses it and returns a list of dictionaries.

    The output structure is nearly the same as the input structure, but it
    is not an exact copy. Given a valid input dictionary `d_in`, its
    corresponding output dictionary `d_out` will be as follows:

      * d_out['etag'] == d_in['etag']

      * d_out['path'] == d_in['path']

      * d_in['size_bytes'] can be a string ("12") or an integer (12), but
        d_out['size_bytes'] is an integer.

      * (optional) d_in['range'] is a string of the form "M-N", "M-", or
        "-N", where M and N are non-negative integers. d_out['range'] is the
        corresponding swob.Range object. If d_in does not have a key
        'range', neither will d_out.

    :raises: HTTPException on parse errors or semantic errors (e.g. bogus
        JSON structure, syntactically invalid ranges)

    :returns: a list of dictionaries on success
    """
    try:
        parsed_data = json.loads(req_body)
    except ValueError:
        raise HTTPBadRequest("Manifest must be valid JSON.\n")

    if not isinstance(parsed_data, list):
        raise HTTPBadRequest("Manifest must be a list.\n")

    # If we got here, req_path refers to an object, so this won't ever raise
    # ValueError.
    vrs, account, _junk = split_path(req_path, 3, 3, True)

    errors = []
    for seg_index, seg_dict in enumerate(parsed_data):
        if not isinstance(seg_dict, dict):
            errors.append("Index %d: not a JSON object" % seg_index)
            continue

        missing_keys = [k for k in REQUIRED_SLO_KEYS if k not in seg_dict]
        if missing_keys:
            errors.append(
                "Index %d: missing keys %s" %
                (seg_index, ", ".join('"%s"' % (mk, )
                                      for mk in sorted(missing_keys))))
            continue

        extraneous_keys = [k for k in seg_dict if k not in ALLOWED_SLO_KEYS]
        if extraneous_keys:
            errors.append(
                "Index %d: extraneous keys %s" %
                (seg_index, ", ".join('"%s"' % (ek, )
                                      for ek in sorted(extraneous_keys))))
            continue

        if not isinstance(seg_dict['path'], six.string_types):
            errors.append("Index %d: \"path\" must be a string" % seg_index)
            continue
        if not (seg_dict['etag'] is None
                or isinstance(seg_dict['etag'], six.string_types)):
            errors.append("Index %d: \"etag\" must be a string or null" %
                          seg_index)
            continue

        if '/' not in seg_dict['path'].strip('/'):
            errors.append(
                "Index %d: path does not refer to an object. Path must be of "
                "the form /container/object." % seg_index)
            continue

        seg_size = seg_dict['size_bytes']
        if seg_size is not None:
            try:
                seg_size = int(seg_size)
                seg_dict['size_bytes'] = seg_size
            except (TypeError, ValueError):
                errors.append("Index %d: invalid size_bytes" % seg_index)
                continue
            if seg_size < 1:
                errors.append("Index %d: too small; each segment must be "
                              "at least 1 byte." % (seg_index, ))
                continue

        obj_path = '/'.join(['', vrs, account, seg_dict['path'].lstrip('/')])
        if req_path == quote(obj_path):
            errors.append(
                "Index %d: manifest must not include itself as a segment" %
                seg_index)
            continue

        if seg_dict.get('range'):
            try:
                seg_dict['range'] = Range('bytes=%s' % seg_dict['range'])
            except ValueError:
                errors.append("Index %d: invalid range" % seg_index)
                continue

            if len(seg_dict['range'].ranges) > 1:
                errors.append("Index %d: multiple ranges (only one allowed)" %
                              seg_index)
                continue

            # If the user *told* us the object's size, we can check range
            # satisfiability right now. If they lied about the size, we'll
            # fail that validation later.
            if (seg_size is not None and
                    len(seg_dict['range'].ranges_for_length(seg_size)) != 1):
                errors.append("Index %d: unsatisfiable range" % seg_index)
                continue

    if errors:
        error_message = "".join(e + "\n" for e in errors)
        raise HTTPBadRequest(error_message,
                             headers={"Content-Type": "text/plain"})

    return parsed_data
Example #53
0
    def _replicate_object(self, partition, object_file, node_id):
        """
        Replicate the db, choosing method based on whether or not it
        already exists on peers.

        :param partition: partition to be replicated to
        :param object_file: DB file name to be replicated
        :param node_id: node id of the node to be replicated to
        :returns: a tuple (success, responses). ``success`` is a boolean that
            is True if the method completed successfully, False otherwise.
            ``responses`` is a list of booleans each of which indicates the
            success or not of replicating to a peer node if replication has
            been attempted. ``success`` is False if any of ``responses`` is
            False; when ``responses`` is empty, ``success`` may be either True
            or False.
        """
        start_time = now = time.time()
        self.logger.debug('Replicating db %s', object_file)
        self.stats['attempted'] += 1
        self.logger.increment('attempts')
        shouldbehere = True
        responses = []
        try:
            broker = self.brokerclass(object_file, pending_timeout=30)
            broker.reclaim(now - self.reclaim_age,
                           now - (self.reclaim_age * 2))
            info = broker.get_replication_info()
            bpart = self.ring.get_part(info['account'], info.get('container'))
            if bpart != int(partition):
                partition = bpart
                # Important to set this false here since the later check only
                # checks if it's on the proper device, not partition.
                shouldbehere = False
                name = '/' + quote(info['account'])
                if 'container' in info:
                    name += '/' + quote(info['container'])
                self.logger.error(
                    'Found %s for %s when it should be on partition %s; will '
                    'replicate out and remove.' % (object_file, name, bpart))
        except (Exception, Timeout) as e:
            if 'no such table' in str(e):
                self.logger.error(_('Quarantining DB %s'), object_file)
                quarantine_db(broker.db_file, broker.db_type)
            else:
                self.logger.exception(_('ERROR reading db %s'), object_file)
            nodes = self.ring.get_part_nodes(int(partition))
            self._add_failure_stats([(failure_dev['replication_ip'],
                                      failure_dev['device'])
                                     for failure_dev in nodes])
            self.logger.increment('failures')
            return False, responses
        if broker.is_reclaimable(now, self.reclaim_age):
            if self.report_up_to_date(info):
                self.delete_db(broker)
            self.logger.timing_since('timing', start_time)
            return True, responses
        failure_devs_info = set()
        nodes = self.ring.get_part_nodes(int(partition))
        local_dev = None
        for node in nodes:
            if node['id'] == node_id:
                local_dev = node
                break
        if shouldbehere:
            shouldbehere = bool([n for n in nodes if n['id'] == node_id])
        # See Footnote [1] for an explanation of the repl_nodes assignment.
        if len(nodes) > 1:
            i = 0
            while i < len(nodes) and nodes[i]['id'] != node_id:
                i += 1
            repl_nodes = nodes[i + 1:] + nodes[:i]
        else:  # Special case if using only a single replica
            repl_nodes = nodes
        more_nodes = self.ring.get_more_nodes(int(partition))
        if not local_dev:
            # Check further if local device is a handoff node
            for node in self.ring.get_more_nodes(int(partition)):
                if node['id'] == node_id:
                    local_dev = node
                    break
        for node in repl_nodes:
            different_region = False
            if local_dev and local_dev['region'] != node['region']:
                # This additional information will help later if we
                # want to handle syncing to a node in different
                # region with some optimizations.
                different_region = True
            success = False
            try:
                success = self._repl_to_node(node, broker, partition, info,
                                             different_region)
            except DriveNotMounted:
                try:
                    repl_nodes.append(next(more_nodes))
                except StopIteration:
                    self.logger.error(
                        _('ERROR There are not enough handoff nodes to reach '
                          'replica count for partition %s'), partition)
                self.logger.error(_('ERROR Remote drive not mounted %s'), node)
            except (Exception, Timeout):
                self.logger.exception(
                    _('ERROR syncing %(file)s with node'
                      ' %(node)s'), {
                          'file': object_file,
                          'node': node
                      })
            if not success:
                failure_devs_info.add((node['replication_ip'], node['device']))
            self.logger.increment('successes' if success else 'failures')
            responses.append(success)
        try:
            self._post_replicate_hook(broker, info, responses)
        except (Exception, Timeout):
            self.logger.exception(
                'UNHANDLED EXCEPTION: in post replicate '
                'hook for %s', broker.db_file)
        if not shouldbehere:
            if not self.cleanup_post_replicate(broker, info, responses):
                failure_devs_info.update([(failure_dev['replication_ip'],
                                           failure_dev['device'])
                                          for failure_dev in repl_nodes])
        target_devs_info = set([(target_dev['replication_ip'],
                                 target_dev['device'])
                                for target_dev in repl_nodes])
        self.stats['success'] += len(target_devs_info - failure_devs_info)
        self._add_failure_stats(failure_devs_info)

        self.logger.timing_since('timing', start_time)
        if shouldbehere:
            responses.append(True)
        return all(responses), responses
Example #54
0
    def base_request(self,
                     method,
                     container=None,
                     name=None,
                     prefix=None,
                     headers=None,
                     proxy=None,
                     contents=None,
                     full_listing=None,
                     logger=None,
                     additional_info=None):
        # Common request method
        trans_start = time()
        url = self.url

        if headers is None:
            headers = {}

        if self.token:
            headers['X-Auth-Token'] = self.token

        if container:
            url = '%s/%s' % (url.rstrip('/'), quote(container))

        if name:
            url = '%s/%s' % (url.rstrip('/'), quote(name))
        else:
            url += '?format=json'
            if prefix:
                url += '&prefix=%s' % prefix

        if proxy:
            proxy = urlparse.urlparse(proxy)
            proxy = urllib2.ProxyHandler({proxy.scheme: proxy.netloc})
            opener = urllib2.build_opener(proxy)
            urllib2.install_opener(opener)

        req = urllib2.Request(url, headers=headers, data=contents)
        req.get_method = lambda: method
        urllib2.urlopen(req)
        conn = urllib2.urlopen(req)
        body = conn.read()
        try:
            body_data = json.loads(body)
        except ValueError:
            body_data = None
        trans_stop = time()
        if logger:
            sent_content_length = 0
            for n, v in headers.items():
                nl = n.lower()
                if nl == 'content-length':
                    try:
                        sent_content_length = int(v)
                        break
                    except ValueError:
                        pass
            logger.debug("-> " + " ".join(
                quote(str(x) if x else "-", ":/")
                for x in (strftime('%Y-%m-%dT%H:%M:%S', gmtime(trans_stop)),
                          method, url, conn.getcode(), sent_content_length,
                          conn.info()['content-length'], trans_start,
                          trans_stop, trans_stop - trans_start,
                          additional_info)))
        return [None, body_data]
Example #55
0
    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" %
                                            self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_input(req.body_file.read(self.max_manifest_size))
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of segments must be <= %d' %
                self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        slo_etag = md5()
        for index, seg_dict in enumerate(parsed_data):
            obj_name = seg_dict['path']
            if isinstance(obj_name, unicode):
                obj_name = obj_name.encode('utf-8')
            obj_path = '/'.join(['', vrs, account, obj_name.lstrip('/')])
            try:
                seg_size = int(seg_dict['size_bytes'])
            except (ValueError, TypeError):
                raise HTTPBadRequest('Invalid Manifest File')
            if seg_size < self.min_segment_size and \
                    (index == 0 or index < len(parsed_data) - 1):
                raise HTTPBadRequest(
                    'Each segment, except the last, must be at least '
                    '%d bytes.' % self.min_segment_size)

            new_env = req.environ.copy()
            new_env['PATH_INFO'] = obj_path
            new_env['REQUEST_METHOD'] = 'HEAD'
            new_env['swift.source'] = 'SLO'
            del (new_env['wsgi.input'])
            del (new_env['QUERY_STRING'])
            new_env['CONTENT_LENGTH'] = 0
            new_env['HTTP_USER_AGENT'] = \
                '%s MultipartPUT' % req.environ.get('HTTP_USER_AGENT')
            head_seg_resp = \
                Request.blank(obj_path, new_env).get_response(self)
            if head_seg_resp.is_success:
                total_size += seg_size
                if seg_size != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_name), 'Size Mismatch'])
                if seg_dict['etag'] == head_seg_resp.etag:
                    slo_etag.update(seg_dict['etag'])
                else:
                    problem_segments.append([quote(obj_name), 'Etag Mismatch'])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = \
                    last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
                seg_data = {
                    'name': '/' + seg_dict['path'].lstrip('/'),
                    'bytes': seg_size,
                    'hash': seg_dict['etag'],
                    'content_type': head_seg_resp.content_type,
                    'last_modified': last_modified_formatted
                }
                if config_true_value(
                        head_seg_resp.headers.get('X-Static-Large-Object')):
                    seg_data['sub_slo'] = True
                data_for_storage.append(seg_data)

            else:
                problem_segments.append(
                    [quote(obj_name), head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(out_content_type, {},
                                          problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overridden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size
        env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True'
        json_data = json.dumps(data_for_storage)
        env['CONTENT_LENGTH'] = str(len(json_data))
        env['wsgi.input'] = StringIO(json_data)

        slo_put_context = SloPutContext(self, slo_etag)
        return slo_put_context.handle_slo_put(req, start_response)
Example #56
0
    def handle_object(self, env, start_response):
        """
        Handles a possible static web request for an object. This object could
        resolve into an index or listing request.

        :param env: The original WSGI environment dict.
        :param start_response: The original WSGI start_response hook.
        """
        tmp_env = dict(env)
        tmp_env['HTTP_USER_AGENT'] = \
            '%s StaticWeb' % env.get('HTTP_USER_AGENT')
        tmp_env['swift.source'] = 'SW'
        resp = self._app_call(tmp_env)
        status_int = self._get_status_int()
        self._get_container_info(env)
        if is_success(status_int) or is_redirection(status_int):
            # Treat directory marker objects as not found
            if not self._dir_type:
                self._dir_type = 'application/directory'
            content_length = self._response_header_value('content-length')
            content_length = int(content_length) if content_length else 0
            if self._response_header_value('content-type') == self._dir_type \
                    and content_length <= 1:
                status_int = HTTP_NOT_FOUND
            else:
                start_response(self._response_status, self._response_headers,
                               self._response_exc_info)
                return resp
        if status_int != HTTP_NOT_FOUND:
            # Retaining the previous code's behavior of not using custom error
            # pages for non-404 errors.
            self._error = None
            return self._error_response(resp, env, start_response)
        if not self._listings and not self._index:
            start_response(self._response_status, self._response_headers,
                           self._response_exc_info)
            return resp
        status_int = HTTP_NOT_FOUND
        if self._index:
            tmp_env = dict(env)
            tmp_env['HTTP_USER_AGENT'] = \
                '%s StaticWeb' % env.get('HTTP_USER_AGENT')
            tmp_env['swift.source'] = 'SW'
            if not tmp_env['PATH_INFO'].endswith('/'):
                tmp_env['PATH_INFO'] += '/'
            tmp_env['PATH_INFO'] += self._index
            resp = self._app_call(tmp_env)
            status_int = self._get_status_int()
            if is_success(status_int) or is_redirection(status_int):
                if not env['PATH_INFO'].endswith('/'):
                    resp = HTTPMovedPermanently(location=env['PATH_INFO'] +
                                                '/')
                    return resp(env, start_response)
                start_response(self._response_status, self._response_headers,
                               self._response_exc_info)
                return resp
        if status_int == HTTP_NOT_FOUND:
            if not env['PATH_INFO'].endswith('/'):
                tmp_env = make_env(
                    env,
                    'GET',
                    '/%s/%s/%s' % (self.version, self.account, self.container),
                    self.agent,
                    swift_source='SW')
                tmp_env['QUERY_STRING'] = 'limit=1&format=json&delimiter' \
                    '=/&limit=1&prefix=%s' % quote(self.obj + '/')
                resp = self._app_call(tmp_env)
                body = ''.join(resp)
                if not is_success(self._get_status_int()) or not body or \
                        not json.loads(body):
                    resp = HTTPNotFound()(env, self._start_response)
                    return self._error_response(resp, env, start_response)
                resp = HTTPMovedPermanently(location=env['PATH_INFO'] + '/')
                return resp(env, start_response)
            return self._listing(env, start_response, self.obj)
Example #57
0
    def base_request(self,
                     method,
                     container=None,
                     name=None,
                     prefix=None,
                     headers=None,
                     proxy=None,
                     contents=None,
                     full_listing=None,
                     logger=None,
                     additional_info=None,
                     timeout=None,
                     marker=None):
        # Common request method
        trans_start = time()
        url = self.url

        if full_listing:
            info, body_data = self.base_request(method,
                                                container,
                                                name,
                                                prefix,
                                                headers,
                                                proxy,
                                                timeout=timeout,
                                                marker=marker)
            listing = body_data
            while listing:
                marker = listing[-1]['name']
                info, listing = self.base_request(method,
                                                  container,
                                                  name,
                                                  prefix,
                                                  headers,
                                                  proxy,
                                                  timeout=timeout,
                                                  marker=marker)
                if listing:
                    body_data.extend(listing)
            return [info, body_data]

        if headers is None:
            headers = {}

        if self.token:
            headers['X-Auth-Token'] = self.token

        if container:
            url = '%s/%s' % (url.rstrip('/'), quote(container))

        if name:
            url = '%s/%s' % (url.rstrip('/'), quote(name))
        else:
            params = ['format=json']
            if prefix:
                params.append('prefix=%s' % prefix)

            if marker:
                params.append('marker=%s' % quote(marker))

            url += '?' + '&'.join(params)

        req = urllib2.Request(url, headers=headers, data=contents)
        if proxy:
            proxy = urllib.parse.urlparse(proxy)
            req.set_proxy(proxy.netloc, proxy.scheme)
        req.get_method = lambda: method
        conn = urllib2.urlopen(req, timeout=timeout)
        body = conn.read()
        info = conn.info()
        try:
            body_data = json.loads(body)
        except ValueError:
            body_data = None
        trans_stop = time()
        if logger:
            sent_content_length = 0
            for n, v in headers.items():
                nl = n.lower()
                if nl == 'content-length':
                    try:
                        sent_content_length = int(v)
                        break
                    except ValueError:
                        pass
            logger.debug("-> " + " ".join(
                quote(str(x) if x else "-", ":/")
                for x in (strftime('%Y-%m-%dT%H:%M:%S', gmtime(trans_stop)),
                          method, url, conn.getcode(), sent_content_length,
                          info['content-length'], trans_start, trans_stop,
                          trans_stop - trans_start, additional_info)))
        return [info, body_data]
Example #58
0
    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge(
                "Manifest File > %d bytes" % self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_and_validate_input(
            req.body_file.read(self.max_manifest_size),
            req.path)
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of segments must be <= %d' %
                self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        slo_etag = md5()
        last_obj_path = None
        for index, seg_dict in enumerate(parsed_data):
            obj_name = seg_dict['path']
            if isinstance(obj_name, six.text_type):
                obj_name = obj_name.encode('utf-8')
            obj_path = '/'.join(['', vrs, account, obj_name.lstrip('/')])

            if obj_path != last_obj_path:
                last_obj_path = obj_path
                sub_req = make_subrequest(
                    req.environ, path=obj_path + '?',  # kill the query string
                    method='HEAD',
                    headers={'x-auth-token': req.headers.get('x-auth-token')},
                    agent='%(orig)s SLO MultipartPUT', swift_source='SLO')
                head_seg_resp = sub_req.get_response(self)

            if head_seg_resp.is_success:
                segment_length = head_seg_resp.content_length
                if seg_dict.get('range'):
                    # Since we now know the length, we can normalize the
                    # range. We know that there is exactly one range
                    # requested since we checked that earlier in
                    # parse_and_validate_input().
                    ranges = seg_dict['range'].ranges_for_length(
                        head_seg_resp.content_length)

                    if not ranges:
                        problem_segments.append([quote(obj_name),
                                                 'Unsatisfiable Range'])
                    elif ranges == [(0, head_seg_resp.content_length)]:
                        # Just one range, and it exactly matches the object.
                        # Why'd we do this again?
                        del seg_dict['range']
                        segment_length = head_seg_resp.content_length
                    else:
                        rng = ranges[0]
                        seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1)
                        segment_length = rng[1] - rng[0]

                if segment_length < 1:
                    problem_segments.append(
                        [quote(obj_name),
                         'Too small; each segment must be at least 1 byte.'])
                total_size += segment_length
                if seg_dict['size_bytes'] is not None and \
                        seg_dict['size_bytes'] != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_name), 'Size Mismatch'])
                if seg_dict['etag'] is None or \
                        seg_dict['etag'] == head_seg_resp.etag:
                    if seg_dict.get('range'):
                        slo_etag.update('%s:%s;' % (head_seg_resp.etag,
                                                    seg_dict['range']))
                    else:
                        slo_etag.update(head_seg_resp.etag)
                else:
                    problem_segments.append([quote(obj_name), 'Etag Mismatch'])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = \
                    last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
                seg_data = {'name': '/' + seg_dict['path'].lstrip('/'),
                            'bytes': head_seg_resp.content_length,
                            'hash': head_seg_resp.etag,
                            'content_type': head_seg_resp.content_type,
                            'last_modified': last_modified_formatted}
                if seg_dict.get('range'):
                    seg_data['range'] = seg_dict['range']

                if config_true_value(
                        head_seg_resp.headers.get('X-Static-Large-Object')):
                    seg_data['sub_slo'] = True
                data_for_storage.append(seg_data)

            else:
                problem_segments.append([quote(obj_name),
                                         head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(
                out_content_type, {}, problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overridden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size
        env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True'
        json_data = json.dumps(data_for_storage)
        if six.PY3:
            json_data = json_data.encode('utf-8')
        env['CONTENT_LENGTH'] = str(len(json_data))
        env['wsgi.input'] = BytesIO(json_data)

        slo_put_context = SloPutContext(self, slo_etag)
        return slo_put_context.handle_slo_put(req, start_response)