Example #1
0
 def _integral_keystone_identity(self, environ):
     """Extract the identity from the Keystone auth component."""
     if environ.get("HTTP_X_IDENTITY_STATUS") != "Confirmed":
         return
     roles = list_from_csv(environ.get("HTTP_X_ROLES", ""))
     service_roles = list_from_csv(environ.get("HTTP_X_SERVICE_ROLES", ""))
     identity = {
         "user": (environ.get("HTTP_X_USER_ID"), environ.get("HTTP_X_USER_NAME")),
         "tenant": (environ.get("HTTP_X_TENANT_ID"), environ.get("HTTP_X_TENANT_NAME")),
         "roles": roles,
         "service_roles": service_roles,
     }
     token_info = environ.get("keystone.token_info", {})
     auth_version = 0
     user_domain = project_domain = (None, None)
     if "access" in token_info:
         # ignore any domain id headers that authtoken may have set
         auth_version = 2
     elif "token" in token_info:
         auth_version = 3
         user_domain = (environ.get("HTTP_X_USER_DOMAIN_ID"), environ.get("HTTP_X_USER_DOMAIN_NAME"))
         project_domain = (environ.get("HTTP_X_PROJECT_DOMAIN_ID"), environ.get("HTTP_X_PROJECT_DOMAIN_NAME"))
     identity["user_domain"] = user_domain
     identity["project_domain"] = project_domain
     identity["auth_version"] = auth_version
     return identity
Example #2
0
    def run_once(self, *args, **kwargs):
        start = time.time()
        self.logger.info(_("Running object replicator in script mode."))

        override_devices = list_from_csv(kwargs.get('devices'))
        override_partitions = list_from_csv(kwargs.get('partitions'))
        override_policies = list_from_csv(kwargs.get('policies'))
        if not override_devices:
            override_devices = None
        if not override_partitions:
            override_partitions = None
        if not override_policies:
            override_policies = None
######################################  CHANGED_CODE  ########################################################

        override_devices = ['sda4']
######################################  CHANGED_CODE  ########################################################

        self.replicate(
            override_devices=override_devices,
            override_partitions=override_partitions,
            override_policies=override_policies)
        total = (time.time() - start) / 60
        self.logger.info(
            _("Object replication complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            dump_recon_cache({'object_replication_time': total,
                              'object_replication_last': time.time()},
                             self.rcache, self.logger)
 def _integral_keystone_identity(self, environ):
     """Extract the identity from the Keystone auth component."""
     if environ.get('HTTP_X_IDENTITY_STATUS') != 'Confirmed':
         return
     roles = list_from_csv(environ.get('HTTP_X_ROLES', ''))
     service_roles = list_from_csv(environ.get('HTTP_X_SERVICE_ROLES', ''))
     identity = {'user': (environ.get('HTTP_X_USER_ID'),
                          environ.get('HTTP_X_USER_NAME')),
                 'tenant': (environ.get('HTTP_X_TENANT_ID'),
                            environ.get('HTTP_X_TENANT_NAME')),
                 'roles': roles,
                 'service_roles': service_roles}
     token_info = environ.get('keystone.token_info', {})
     auth_version = 0
     user_domain = project_domain = (None, None)
     if 'access' in token_info:
         # ignore any domain id headers that authtoken may have set
         auth_version = 2
     elif 'token' in token_info:
         auth_version = 3
         user_domain = (environ.get('HTTP_X_USER_DOMAIN_ID'),
                        environ.get('HTTP_X_USER_DOMAIN_NAME'))
         project_domain = (environ.get('HTTP_X_PROJECT_DOMAIN_ID'),
                           environ.get('HTTP_X_PROJECT_DOMAIN_NAME'))
     identity['user_domain'] = user_domain
     identity['project_domain'] = project_domain
     identity['auth_version'] = auth_version
     return identity
Example #4
0
    def run_once(self, *args, **kwargs):
        self._zero_stats()
        self.logger.info(_("Running object replicator in script mode."))

        override_devices = list_from_csv(kwargs.get('devices'))
        override_partitions = list_from_csv(kwargs.get('partitions'))
        override_policies = list_from_csv(kwargs.get('policies'))
        if not override_devices:
            override_devices = None
        if not override_partitions:
            override_partitions = None
        if not override_policies:
            override_policies = None

        self.replicate(
            override_devices=override_devices,
            override_partitions=override_partitions,
            override_policies=override_policies)
        total = (time.time() - self.stats['start']) / 60
        self.logger.info(
            _("Object replication complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            replication_last = time.time()
            dump_recon_cache({'replication_stats': self.stats,
                              'replication_time': total,
                              'replication_last': replication_last,
                              'object_replication_time': total,
                              'object_replication_last': replication_last},
                             self.rcache, self.logger)
Example #5
0
    def run_once(self, *args, **kwargs):
        self._zero_stats()
        self.logger.info(_("Running object replicator in script mode."))

        override_devices = list_from_csv(kwargs.get("devices"))
        override_partitions = list_from_csv(kwargs.get("partitions"))
        override_policies = list_from_csv(kwargs.get("policies"))
        if not override_devices:
            override_devices = None
        if not override_partitions:
            override_partitions = None
        if not override_policies:
            override_policies = None

        self.replicate(
            override_devices=override_devices,
            override_partitions=override_partitions,
            override_policies=override_policies,
        )
        total = (time.time() - self.stats["start"]) / 60
        self.logger.info(_("Object replication complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            replication_last = time.time()
            dump_recon_cache(
                {
                    "replication_stats": self.stats,
                    "replication_time": total,
                    "replication_last": replication_last,
                    "object_replication_time": total,
                    "object_replication_last": replication_last,
                },
                self.rcache,
                self.logger,
            )
 def __init__(self, app, conf):
     if not MODULE_DEPENDENCY_MET:
         # reraise the exception if the dependency wasn't met
         raise ImportError("dnspython is required for this module")
     self.app = app
     storage_domain = conf.get("storage_domain", "example.com")
     self.storage_domain = ["." + s for s in list_from_csv(storage_domain) if not s.startswith(".")]
     self.storage_domain += [s for s in list_from_csv(storage_domain) if s.startswith(".")]
     self.lookup_depth = int(conf.get("lookup_depth", "1"))
     self.memcache = None
     self.logger = get_logger(conf, log_route="cname-lookup")
Example #7
0
 def run_once(self, *args, **kwargs):
     start = time.time()
     self.logger.info(_("Running object replicator in script mode."))
     override_devices = list_from_csv(kwargs.get("devices"))
     override_partitions = list_from_csv(kwargs.get("partitions"))
     self.replicate(override_devices=override_devices, override_partitions=override_partitions)
     total = (time.time() - start) / 60
     self.logger.info(_("Object replication complete (once). (%.02f minutes)"), total)
     if not (override_partitions or override_devices):
         dump_recon_cache(
             {"object_replication_time": total, "object_replication_last": time.time()}, self.rcache, self.logger
         )
Example #8
0
 def __init__(self, app, conf):
     if not MODULE_DEPENDENCY_MET:
         # reraise the exception if the dependency wasn't met
         raise ImportError('dnspython is required for this module')
     self.app = app
     storage_domain = conf.get('storage_domain', 'example.com')
     self.storage_domain = ['.' + s for s in
                            list_from_csv(storage_domain)
                            if not s.startswith('.')]
     self.storage_domain += [s for s in list_from_csv(storage_domain)
                             if s.startswith('.')]
     self.lookup_depth = int(conf.get('lookup_depth', '1'))
     self.memcache = None
     self.logger = get_logger(conf, log_route='cname-lookup')
Example #9
0
 def __init__(self, app, conf):
     self.app = app
     storage_domain = conf.get('storage_domain', 'example.com')
     self.storage_domain = ['.' + s for s in
                            list_from_csv(storage_domain)
                            if not s.startswith('.')]
     self.storage_domain += [s for s in list_from_csv(storage_domain)
                             if s.startswith('.')]
     self.path_root = '/' + conf.get('path_root', 'v1').strip('/')
     prefixes = conf.get('reseller_prefixes', 'AUTH')
     self.reseller_prefixes = list_from_csv(prefixes)
     self.reseller_prefixes_lower = [x.lower()
                                     for x in self.reseller_prefixes]
     self.default_reseller_prefix = conf.get('default_reseller_prefix')
Example #10
0
    def __init__(self, idx, name='', is_default=False, is_deprecated=False,
                 object_ring=None, aliases=''):
        # do not allow BaseStoragePolicy class to be instantiated directly
        if type(self) == BaseStoragePolicy:
            raise TypeError("Can't instantiate BaseStoragePolicy directly")
        # policy parameter validation
        try:
            self.idx = int(idx)
        except ValueError:
            raise PolicyError('Invalid index', idx)
        if self.idx < 0:
            raise PolicyError('Invalid index', idx)
        self.alias_list = []
        self.add_name(name)
        if aliases:
            names_list = list_from_csv(aliases)
            for alias in names_list:
                if alias == name:
                    continue
                self.add_name(alias)
        self.is_deprecated = config_true_value(is_deprecated)
        self.is_default = config_true_value(is_default)
        if self.policy_type not in BaseStoragePolicy.policy_type_to_policy_cls:
            raise PolicyError('Invalid type', self.policy_type)
        if self.is_deprecated and self.is_default:
            raise PolicyError('Deprecated policy can not be default.  '
                              'Invalid config', self.idx)

        self.ring_name = _get_policy_string('object', self.idx)
        self.object_ring = object_ring
Example #11
0
 def run_once(self, *args, **kwargs):
     start = time.time()
     self.logger.info(_("Running object reconstructor in script mode."))
     override_devices = list_from_csv(kwargs.get('devices'))
     override_partitions = [int(p) for p in
                            list_from_csv(kwargs.get('partitions'))]
     self.reconstruct(
         override_devices=override_devices,
         override_partitions=override_partitions)
     total = (time.time() - start) / 60
     self.logger.info(
         _("Object reconstruction complete (once). (%.02f minutes)"), total)
     if not (override_partitions or override_devices):
         dump_recon_cache({'object_reconstruction_time': total,
                           'object_reconstruction_last': time.time()},
                          self.rcache, self.logger)
Example #12
0
 def __init__(self, conf, logger, rcache, devices, zero_byte_only_at_fps=0):
     self.conf = conf
     self.logger = logger
     self.devices = devices
     self.diskfile_mgr = diskfile.DiskFileManager(conf, self.logger)
     self.max_files_per_second = float(conf.get('files_per_second', 20))
     self.max_bytes_per_second = float(conf.get('bytes_per_second',
                                                10000000))
     self.auditor_type = 'ALL'
     self.zero_byte_only_at_fps = zero_byte_only_at_fps
     if self.zero_byte_only_at_fps:
         self.max_files_per_second = float(self.zero_byte_only_at_fps)
         self.auditor_type = 'ZBF'
     self.log_time = int(conf.get('log_time', 3600))
     self.last_logged = 0
     self.files_running_time = 0
     self.bytes_running_time = 0
     self.bytes_processed = 0
     self.total_bytes_processed = 0
     self.total_files_processed = 0
     self.passes = 0
     self.quarantines = 0
     self.errors = 0
     self.rcache = rcache
     self.stats_sizes = sorted(
         [int(s) for s in list_from_csv(conf.get('object_size_stats'))])
     self.stats_buckets = dict(
         [(s, 0) for s in self.stats_sizes + ['OVER']])
Example #13
0
def reload_constraints():
    """
    Parse SWIFT_CONF_FILE and reset module level global contraint attrs,
    populating OVERRIDE_CONSTRAINTS AND EFFECTIVE_CONSTRAINTS along the way.
    """
    global SWIFT_CONSTRAINTS_LOADED, OVERRIDE_CONSTRAINTS
    SWIFT_CONSTRAINTS_LOADED = False
    OVERRIDE_CONSTRAINTS = {}
    constraints_conf = ConfigParser()
    if constraints_conf.read(utils.SWIFT_CONF_FILE):
        SWIFT_CONSTRAINTS_LOADED = True
        for name in DEFAULT_CONSTRAINTS:
            try:
                value = constraints_conf.get('swift-constraints', name)
            except NoOptionError:
                pass
            except NoSectionError:
                # We are never going to find the section for another option
                break
            else:
                try:
                    value = int(value)
                except ValueError:
                    value = utils.list_from_csv(value)
                OVERRIDE_CONSTRAINTS[name] = value
    for name, default in DEFAULT_CONSTRAINTS.items():
        value = OVERRIDE_CONSTRAINTS.get(name, default)
        EFFECTIVE_CONSTRAINTS[name] = value
        # "globals" in this context is module level globals, always.
        globals()[name.upper()] = value
def main():
    options, commands = parser.parse_args()
    commands.remove('split-brain')
    if not commands:
        parser.print_help()
        return 'ERROR: must specify at least one command'
    for cmd_args in commands:
        cmd = cmd_args.split(':', 1)[0]
        if cmd not in BrainSplitter.__commands__:
            parser.print_help()
            return 'ERROR: unknown command %s' % cmd
    url, token = get_auth('http://127.0.0.1:8080/auth/v1.0',
                          'test:tester', 'testing')
    brain = BrainSplitter(url, token, options.container, options.object)
    for cmd_args in commands:
        parts = cmd_args.split(':', 1)
        command = parts[0]
        if len(parts) > 1:
            args = utils.list_from_csv(parts[1])
        else:
            args = ()
        try:
            brain.run(command, *args)
        except ClientException as e:
            print '**WARNING**: %s raised %s' % (command, e)
    print 'STATUS'.join(['*' * 25] * 2)
    brain.servers.status()
    sys.exit()
    def __init__(self, app, conf, logger=None):
        self.app = app
        self.log_hdrs = config_true_value(conf.get("access_log_headers", conf.get("log_headers", "no")))
        log_hdrs_only = list_from_csv(conf.get("access_log_headers_only", ""))
        self.log_hdrs_only = [x.title() for x in log_hdrs_only]

        # The leading access_* check is in case someone assumes that
        # log_statsd_valid_http_methods behaves like the other log_statsd_*
        # settings.
        self.valid_methods = conf.get(
            "access_log_statsd_valid_http_methods",
            conf.get("log_statsd_valid_http_methods", "GET,HEAD,POST,PUT,DELETE,COPY,OPTIONS"),
        )
        self.valid_methods = [m.strip().upper() for m in self.valid_methods.split(",") if m.strip()]
        access_log_conf = {}
        for key in (
            "log_facility",
            "log_name",
            "log_level",
            "log_udp_host",
            "log_udp_port",
            "log_statsd_host",
            "log_statsd_port",
            "log_statsd_default_sample_rate",
            "log_statsd_sample_rate_factor",
            "log_statsd_metric_prefix",
        ):
            value = conf.get("access_" + key, conf.get(key, None))
            if value:
                access_log_conf[key] = value
        self.access_logger = logger or get_logger(access_log_conf, log_route="proxy-access")
        self.access_logger.set_statsd_prefix("proxy-server")
        self.reveal_sensitive_prefix = int(conf.get("reveal_sensitive_prefix", MAX_HEADER_SIZE))
Example #16
0
    def __init__(self, app, conf, logger=None):
        self.app = app
        self.log_hdrs = config_true_value(conf.get(
            'access_log_headers',
            conf.get('log_headers', 'no')))
        log_hdrs_only = list_from_csv(conf.get(
            'access_log_headers_only', ''))
        self.log_hdrs_only = [x.title() for x in log_hdrs_only]

        # The leading access_* check is in case someone assumes that
        # log_statsd_valid_http_methods behaves like the other log_statsd_*
        # settings.
        self.valid_methods = conf.get(
            'access_log_statsd_valid_http_methods',
            conf.get('log_statsd_valid_http_methods',
                     'GET,HEAD,POST,PUT,DELETE,COPY,OPTIONS'))
        self.valid_methods = [m.strip().upper() for m in
                              self.valid_methods.split(',') if m.strip()]
        access_log_conf = {}
        for key in ('log_facility', 'log_name', 'log_level', 'log_udp_host',
                    'log_udp_port', 'log_statsd_host', 'log_statsd_port',
                    'log_statsd_default_sample_rate',
                    'log_statsd_sample_rate_factor',
                    'log_statsd_metric_prefix'):
            value = conf.get('access_' + key, conf.get(key, None))
            if value:
                access_log_conf[key] = value
        self.access_logger = logger or get_logger(access_log_conf,
                                                  log_route='proxy-access')
        self.access_logger.set_statsd_prefix('proxy-server')
        self.reveal_sensitive_prefix = int(
            conf.get('reveal_sensitive_prefix', 16))
Example #17
0
 def __init__(self, conf, logger, zero_byte_only_at_fps=0):
     self.conf = conf
     self.logger = logger
     self.devices = conf.get("devices", "/srv/node")
     self.mount_check = config_true_value(conf.get("mount_check", "true"))
     self.max_files_per_second = float(conf.get("files_per_second", 20))
     self.max_bytes_per_second = float(conf.get("bytes_per_second", 10000000))
     self.auditor_type = "ALL"
     self.zero_byte_only_at_fps = zero_byte_only_at_fps
     if self.zero_byte_only_at_fps:
         self.max_files_per_second = float(self.zero_byte_only_at_fps)
         self.auditor_type = "ZBF"
     self.log_time = int(conf.get("log_time", 3600))
     self.files_running_time = 0
     self.bytes_running_time = 0
     self.bytes_processed = 0
     self.total_bytes_processed = 0
     self.total_files_processed = 0
     self.passes = 0
     self.quarantines = 0
     self.errors = 0
     self.recon_cache_path = conf.get("recon_cache_path", "/var/cache/swift")
     self.rcache = os.path.join(self.recon_cache_path, "object.recon")
     self.stats_sizes = sorted([int(s) for s in list_from_csv(conf.get("object_size_stats"))])
     self.stats_buckets = dict([(s, 0) for s in self.stats_sizes + ["OVER"]])
Example #18
0
 def __init__(self, conf, logger, zero_byte_only_at_fps=0):
     self.conf = conf
     self.logger = logger
     self.devices = conf.get('devices', '/srv/node')
     self.mount_check = config_true_value(conf.get('mount_check', 'true'))
     self.max_files_per_second = float(conf.get('files_per_second', 20))
     self.max_bytes_per_second = float(conf.get('bytes_per_second',
                                                10000000))
     self.auditor_type = 'ALL'
     self.zero_byte_only_at_fps = zero_byte_only_at_fps
     if self.zero_byte_only_at_fps:
         self.max_files_per_second = float(self.zero_byte_only_at_fps)
         self.auditor_type = 'ZBF'
     self.log_time = int(conf.get('log_time', 3600))
     self.files_running_time = 0
     self.bytes_running_time = 0
     self.bytes_processed = 0
     self.total_bytes_processed = 0
     self.total_files_processed = 0
     self.passes = 0
     self.quarantines = 0
     self.errors = 0
     self.recon_cache_path = conf.get('recon_cache_path',
                                      '/var/cache/swift')
     self.rcache = os.path.join(self.recon_cache_path, "object.recon")
     self.stats_sizes = sorted(
         [int(s) for s in list_from_csv(conf.get('object_size_stats'))])
     self.stats_buckets = dict(
         [(s, 0) for s in self.stats_sizes + ['OVER']])
Example #19
0
    def OPTIONS(self, req):
        """
        Base handler for OPTIONS requests

        :param req: swob.Request object
        :returns: swob.Response object
        """
        # Prepare the default response
        headers = {'Allow': ', '.join(self.allowed_methods)}
        resp = Response(status=200, request=req, headers=headers)

        # If this isn't a CORS pre-flight request then return now
        req_origin_value = req.headers.get('Origin', None)
        if not req_origin_value:
            return resp

        # This is a CORS preflight request so check it's allowed
        try:
            container_info = \
                self.container_info(self.account_name,
                                    self.container_name, req)
        except AttributeError:
            # This should only happen for requests to the Account. A future
            # change could allow CORS requests to the Account level as well.
            return resp

        cors = container_info.get('cors', {})

        # If the CORS origin isn't allowed return a 401
        if not self.is_origin_allowed(cors, req_origin_value) or (
                req.headers.get('Access-Control-Request-Method') not in
                self.allowed_methods):
            resp.status = HTTP_UNAUTHORIZED
            return resp

        # Allow all headers requested in the request. The CORS
        # specification does leave the door open for this, as mentioned in
        # http://www.w3.org/TR/cors/#resource-preflight-requests
        # Note: Since the list of headers can be unbounded
        # simply returning headers can be enough.
        allow_headers = set()
        if req.headers.get('Access-Control-Request-Headers'):
            allow_headers.update(
                list_from_csv(req.headers['Access-Control-Request-Headers']))

        # Populate the response with the CORS preflight headers
        if cors.get('allow_origin', '').strip() == '*':
            headers['access-control-allow-origin'] = '*'
        else:
            headers['access-control-allow-origin'] = req_origin_value
        if cors.get('max_age') is not None:
            headers['access-control-max-age'] = cors.get('max_age')
        headers['access-control-allow-methods'] = \
            ', '.join(self.allowed_methods)
        if allow_headers:
            headers['access-control-allow-headers'] = ', '.join(allow_headers)
        resp.headers = headers

        return resp
Example #20
0
    def __init__(self, conf, logger, rcache, devices, zero_byte_only_at_fps=0):
        self.conf = conf
        self.logger = logger
        self.devices = devices
        self.max_files_per_second = float(conf.get('files_per_second', 20))
        self.max_bytes_per_second = float(conf.get('bytes_per_second',
                                                   10000000))
        try:
            # ideally unless ops overrides the rsync_tempfile_timeout in the
            # auditor section we can base our behavior on whatever they
            # configure for their replicator
            replicator_config = readconf(self.conf['__file__'],
                                         'object-replicator')
        except (KeyError, ValueError, IOError):
            # if we can't parse the real config (generally a KeyError on
            # __file__, or ValueError on no object-replicator section, or
            # IOError if reading the file failed) we use
            # a very conservative default for rsync_timeout
            default_rsync_timeout = 86400
        else:
            replicator_rsync_timeout = int(replicator_config.get(
                'rsync_timeout', replicator.DEFAULT_RSYNC_TIMEOUT))
            # Here we can do some light math for ops and use the *replicator's*
            # rsync_timeout (plus 15 mins to avoid deleting local tempfiles
            # before the remote replicator kills it's rsync)
            default_rsync_timeout = replicator_rsync_timeout + 900
            # there's not really a good reason to assume the replicator
            # section's reclaim_age is more appropriate than the reconstructor
            # reclaim_age - but we're already parsing the config so we can set
            # the default value in our config if it's not already set
            if 'reclaim_age' in replicator_config:
                conf.setdefault('reclaim_age',
                                replicator_config['reclaim_age'])
        self.rsync_tempfile_timeout = config_auto_int_value(
            self.conf.get('rsync_tempfile_timeout'), default_rsync_timeout)
        self.diskfile_router = diskfile.DiskFileRouter(conf, self.logger)

        self.auditor_type = 'ALL'
        self.zero_byte_only_at_fps = zero_byte_only_at_fps
        if self.zero_byte_only_at_fps:
            self.max_files_per_second = float(self.zero_byte_only_at_fps)
            self.auditor_type = 'ZBF'
        self.log_time = int(conf.get('log_time', 3600))
        self.last_logged = 0
        self.files_running_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_files_processed = 0
        self.passes = 0
        self.quarantines = 0
        self.errors = 0
        self.rcache = rcache
        self.stats_sizes = sorted(
            [int(s) for s in list_from_csv(conf.get('object_size_stats'))])
        self.stats_buckets = dict(
            [(s, 0) for s in self.stats_sizes + ['OVER']])
Example #21
0
 def __init__(self, app, conf):
     self.app = app
     self.storage_domain = conf.get("storage_domain", "example.com")
     if self.storage_domain and not self.storage_domain.startswith("."):
         self.storage_domain = "." + self.storage_domain
     self.path_root = conf.get("path_root", "v1").strip("/")
     prefixes = conf.get("reseller_prefixes", "AUTH")
     self.reseller_prefixes = list_from_csv(prefixes)
     self.reseller_prefixes_lower = [x.lower() for x in self.reseller_prefixes]
     self.default_reseller_prefix = conf.get("default_reseller_prefix")
Example #22
0
 def run_once(self, *args, **kwargs):
     self.reset_stats()
     override_devices = list_from_csv(kwargs.get('devices'))
     devices = override_devices or self._get_devices()
     self.logger.info('Starting sweep of %r', devices)
     start = time.time()
     for device in devices:
         self.audit_device(device)
     self.logger.info('Finished sweep of %r (%ds) => %r', devices,
                      time.time() - start, self.stats)
Example #23
0
 def __init__(self, app, conf):
     self.app = app
     self.storage_domain = conf.get('storage_domain', 'example.com')
     if self.storage_domain and self.storage_domain[0] != '.':
         self.storage_domain = '.' + self.storage_domain
     self.path_root = conf.get('path_root', 'v1').strip('/')
     prefixes = conf.get('reseller_prefixes', 'AUTH')
     self.reseller_prefixes = list_from_csv(prefixes)
     self.reseller_prefixes_lower = [x.lower()
                                     for x in self.reseller_prefixes]
     self.default_reseller_prefix = conf.get('default_reseller_prefix')
 def _keystone_identity(self, environ):
     """Extract the identity from the Keystone auth component."""
     # In next release, we would add user id in env['keystone.identity'] by
     # using _integral_keystone_identity to replace current
     # _keystone_identity. The purpose of keeping it in this release it for
     # back compatibility.
     if (environ.get('HTTP_X_IDENTITY_STATUS') != 'Confirmed'
         or environ.get(
             'HTTP_X_SERVICE_IDENTITY_STATUS') not in (None, 'Confirmed')):
         return
     roles = list_from_csv(environ.get('HTTP_X_ROLES', ''))
     identity = {'user': environ.get('HTTP_X_USER_NAME'),
                 'tenant': (environ.get('HTTP_X_TENANT_ID'),
                            environ.get('HTTP_X_TENANT_NAME')),
                 'roles': roles}
     return identity
Example #25
0
 def run_once(self, *args, **kwargs):
     self.reset_stats()
     override_devices = list_from_csv(kwargs.get('devices'))
     devices = override_devices or self._get_devices()
     self.logger.info('Starting sweep of %r', devices)
     start = time.time()
     for device in devices:
         for location in self._find_objects(device):
             self.stats['found_objects'] += 1
             success = self._audit_object(device, location)
             if success:
                 self.stats['success'] += 1
             else:
                 self.stats['failures'] += 1
     self.logger.info('Finished sweep of %r (%ds) => %r', devices,
                      time.time() - start, self.stats)
Example #26
0
    def run_once(self, *args, **kwargs):
        """Run the object audit once"""
        # zero byte only command line option
        zbo_fps = kwargs.get("zero_byte_fps", 0)
        override_devices = list_from_csv(kwargs.get("devices"))
        # Remove bogus entries and duplicates from override_devices
        override_devices = list(set(listdir(self.devices)).intersection(set(override_devices)))
        parent = False
        if zbo_fps:
            # only start parent
            parent = True
        kwargs = {"mode": "once"}

        try:
            self.audit_loop(parent, zbo_fps, override_devices=override_devices, **kwargs)
        except (Exception, Timeout) as err:
            self.logger.exception(_("ERROR auditing: %s" % err))
Example #27
0
    def __init__(self, app, conf, logger=None):
        self.app = app
        self.log_formatter = LogStringFormatter(default='-', quote=True)
        self.log_msg_template = conf.get(
            'log_msg_template', (
                '{client_ip} {remote_addr} {end_time.datetime} {method} '
                '{path} {protocol} {status_int} {referer} {user_agent} '
                '{auth_token} {bytes_recvd} {bytes_sent} {client_etag} '
                '{transaction_id} {headers} {request_time} {source} '
                '{log_info} {start_time} {end_time} {policy_index}'))
        # The salt is only used in StrAnonymizer. This class requires bytes,
        # convert it now to prevent useless convertion later.
        self.anonymization_method = conf.get('log_anonymization_method', 'md5')
        self.anonymization_salt = conf.get('log_anonymization_salt', '')
        self.log_hdrs = config_true_value(conf.get(
            'access_log_headers',
            conf.get('log_headers', 'no')))
        log_hdrs_only = list_from_csv(conf.get(
            'access_log_headers_only', ''))
        self.log_hdrs_only = [x.title() for x in log_hdrs_only]

        # The leading access_* check is in case someone assumes that
        # log_statsd_valid_http_methods behaves like the other log_statsd_*
        # settings.
        self.valid_methods = conf.get(
            'access_log_statsd_valid_http_methods',
            conf.get('log_statsd_valid_http_methods',
                     'GET,HEAD,POST,PUT,DELETE,COPY,OPTIONS'))
        self.valid_methods = [m.strip().upper() for m in
                              self.valid_methods.split(',') if m.strip()]
        access_log_conf = {}
        for key in ('log_facility', 'log_name', 'log_level', 'log_udp_host',
                    'log_udp_port', 'log_statsd_host', 'log_statsd_port',
                    'log_statsd_default_sample_rate',
                    'log_statsd_sample_rate_factor',
                    'log_statsd_metric_prefix'):
            value = conf.get('access_' + key, conf.get(key, None))
            if value:
                access_log_conf[key] = value
        self.access_logger = logger or get_logger(access_log_conf,
                                                  log_route='proxy-access')
        self.access_logger.set_statsd_prefix('proxy-server')
        self.reveal_sensitive_prefix = int(
            conf.get('reveal_sensitive_prefix', 16))
        self.check_log_msg_template_validity()
Example #28
0
 def __init__(self, app, conf):
     self.app = app
     self.logger = get_logger(conf, log_route='sos-python')
     self.valid_request_types = list_from_csv(
         conf.get('valid_request_types', 'SOS_DB,SOS_ORIGIN,SOS_ADMIN'))
     self.conf = OriginServer._translate_conf(conf)
     self.origin_prefix = self.conf.get('origin_prefix', '/origin/')
     self.origin_db_hosts = [
         host.strip() for host in
         self.conf.get('origin_db_hosts', '').split(',') if host.strip()]
     self.origin_cdn_host_suffixes = [
         host.strip() for host in
         self.conf.get('origin_cdn_host_suffixes', '').split(',')
         if host.strip()]
     if not self.origin_cdn_host_suffixes:
         raise InvalidConfiguration('Please add origin_cdn_host_suffixes')
     self.log_access_requests = \
         self.conf.get('log_access_requests', 't') in TRUE_VALUES
Example #29
0
    def run_once(self, *args, **kwargs):
        """Run the object audit once"""
        # zero byte only command line option
        zbo_fps = kwargs.get('zero_byte_fps', 0)
        override_devices = list_from_csv(kwargs.get('devices'))
        # Remove bogus entries and duplicates from override_devices
        override_devices = list(
            set(listdir(self.devices)).intersection(set(override_devices)))
        parent = False
        if zbo_fps:
            # only start parent
            parent = True
        kwargs = {'mode': 'once'}

        try:
            self.audit_loop(parent, zbo_fps, override_devices=override_devices,
                            **kwargs)
        except (Exception, Timeout) as err:
            self.logger.exception(_('ERROR auditing: %s'), err)
Example #30
0
 def run_once(self, *args, **kwargs):
     self.stats = defaultdict(int)
     override_devices = list_from_csv(kwargs.get('devices'))
     devices = override_devices or self._get_devices()
     for device in devices:
         success = False
         try:
             self.object_sweep(device)
         except DiskFileDeviceUnavailable:
             self.logger.warning('Unable to connect to %s', device)
         except Exception:
             self.logger.exception('Unhandled exception trying to '
                                   'sweep object updates on %s', device)
         else:
             success = True
         if success:
             self.stats['device.success'] += 1
         else:
             self.stats['device.failures'] += 1
Example #31
0
 def _keystone_identity(self, environ):
     """Extract the identity from the Keystone auth component."""
     # In next release, we would add user id in env['keystone.identity'] by
     # using _integral_keystone_identity to replace current
     # _keystone_identity. The purpose of keeping it in this release it for
     # back compatibility.
     if (environ.get('HTTP_X_IDENTITY_STATUS') != 'Confirmed'
             or environ.get('HTTP_X_SERVICE_IDENTITY_STATUS')
             not in (None, 'Confirmed')):
         return
     roles = list_from_csv(environ.get('HTTP_X_ROLES', ''))
     identity = {
         'user':
         environ.get('HTTP_X_USER_NAME'),
         'tenant': (environ.get('HTTP_X_TENANT_ID'),
                    environ.get('HTTP_X_TENANT_NAME')),
         'roles':
         roles
     }
     return identity
Example #32
0
def main():
    options, commands = parser.parse_args()
    if not commands:
        parser.print_help()
        return 'ERROR: must specify at least one command'
    for cmd_args in commands:
        cmd = cmd_args.split(':', 1)[0]
        if cmd not in BrainSplitter.__commands__:
            parser.print_help()
            return 'ERROR: unknown command %s' % cmd
    url, token = get_auth('http://127.0.0.1:8080/auth/v1.0', 'test:tester',
                          'testing')
    if options.server_type == 'object' and not options.policy_name:
        options.policy_name = POLICIES.default.name
    if options.policy_name:
        options.server_type = 'object'
        policy = POLICIES.get_by_name(options.policy_name)
        if not policy:
            return 'ERROR: unknown policy %r' % options.policy_name
    else:
        policy = None
    brain = BrainSplitter(url,
                          token,
                          options.container,
                          options.object,
                          options.server_type,
                          policy=policy)
    for cmd_args in commands:
        parts = cmd_args.split(':', 1)
        command = parts[0]
        if len(parts) > 1:
            args = utils.list_from_csv(parts[1])
        else:
            args = ()
        try:
            brain.run(command, *args)
        except ClientException as e:
            print '**WARNING**: %s raised %s' % (command, e)
    print 'STATUS'.join(['*' * 25] * 2)
    brain.servers.status()
    sys.exit()
Example #33
0
    def __init__(self, idx, name='', is_default=False, is_deprecated=False,
                 object_ring=None, aliases=''):
        # do not allow BaseStoragePolicy class to be instantiated directly
        # 不允许直接初始化BaseStoragePolicy类对象
        if type(self) == BaseStoragePolicy:
            raise TypeError("Can't instantiate BaseStoragePolicy directly")
        # policy parameter validation
        # 设置存储策略索引index
        try:
            self.idx = int(idx)
        except ValueError:
            raise PolicyError('Invalid index', idx)
        if self.idx < 0:
            raise PolicyError('Invalid index', idx)
        # 保存存储策略别名的列表
        self.alias_list = []

        # 将name添加到存储策略别名的列表中
        self.add_name(name)

        # 获取配置文件中记录的存储策略别名,并添加到存储策略别名的列表中
        if aliases:
            names_list = list_from_csv(aliases)
            for alias in names_list:
                if alias == name:
                    continue
                self.add_name(alias)
        self.is_deprecated = config_true_value(is_deprecated)
        self.is_default = config_true_value(is_default)

        # 如果策略类型字符串不在BaseStoragePolicy.policy_type_to_policy_cls字典的keys中,报错
        if self.policy_type not in BaseStoragePolicy.policy_type_to_policy_cls:
            raise PolicyError('Invalid type', self.policy_type)
        if self.is_deprecated and self.is_default:
            raise PolicyError('Deprecated policy can not be default.  '
                              'Invalid config', self.idx)

        # 获取存储策略的字符串object或object-index格式
        self.ring_name = _get_policy_string('object', self.idx)
        self.object_ring = object_ring
Example #34
0
    def __init__(self,
                 idx,
                 name='',
                 is_default=False,
                 is_deprecated=False,
                 object_ring=None,
                 aliases='',
                 diskfile_module='egg:swift#replication.fs'):
        # do not allow BaseStoragePolicy class to be instantiated directly
        if type(self) == BaseStoragePolicy:
            raise TypeError("Can't instantiate BaseStoragePolicy directly")
        # policy parameter validation
        try:
            self.idx = int(idx)
        except ValueError:
            raise PolicyError('Invalid index', idx)
        if self.idx < 0:
            raise PolicyError('Invalid index', idx)
        self.alias_list = []
        self.add_name(name)
        if aliases:
            names_list = list_from_csv(aliases)
            for alias in names_list:
                if alias == name:
                    continue
                self.add_name(alias)
        self.is_deprecated = config_true_value(is_deprecated)
        self.is_default = config_true_value(is_default)
        if self.policy_type not in BaseStoragePolicy.policy_type_to_policy_cls:
            raise PolicyError('Invalid type', self.policy_type)
        if self.is_deprecated and self.is_default:
            raise PolicyError(
                'Deprecated policy can not be default.  '
                'Invalid config', self.idx)

        self.ring_name = _get_policy_string('object', self.idx)
        self.object_ring = object_ring

        self.diskfile_module = diskfile_module
Example #35
0
def resolve_etag_is_at_header(req, metadata):
    """
    Helper function to resolve an alternative etag value that may be stored in
    metadata under an alternate name.

    The value of the request's X-Backend-Etag-Is-At header (if it exists) is a
    comma separated list of alternate names in the metadata at which an
    alternate etag value may be found. This list is processed in order until an
    alternate etag is found.

    The left most value in X-Backend-Etag-Is-At will have been set by the left
    most middleware, or if no middleware, by ECObjectController, if an EC
    policy is in use. The left most middleware is assumed to be the authority
    on what the etag value of the object content is.

    The resolver will work from left to right in the list until it finds a
    value that is a name in the given metadata. So the left most wins, IF it
    exists in the metadata.

    By way of example, assume the encrypter middleware is installed. If an
    object is *not* encrypted then the resolver will not find the encrypter
    middleware's alternate etag sysmeta (X-Object-Sysmeta-Crypto-Etag) but will
    then find the EC alternate etag (if EC policy). But if the object *is*
    encrypted then X-Object-Sysmeta-Crypto-Etag is found and used, which is
    correct because it should be preferred over X-Object-Sysmeta-Ec-Etag.

    :param req: a swob Request
    :param metadata: a dict containing object metadata
    :return: an alternate etag value if any is found, otherwise None
    """
    alternate_etag = None
    metadata = HeaderKeyDict(metadata)
    if "X-Backend-Etag-Is-At" in req.headers:
        names = list_from_csv(req.headers["X-Backend-Etag-Is-At"])
        for name in names:
            if name in metadata:
                alternate_etag = metadata[name]
                break
    return alternate_etag
Example #36
0
    def GETorHEAD(self, req):
        had_match = False
        for match_header in ('if-match', 'if-none-match'):
            if match_header not in req.headers:
                continue
            had_match = True
            for value in list_from_csv(req.headers[match_header]):
                value = normalize_etag(value)
                if value.endswith('-N'):
                    # Deal with fake S3-like etags for SLOs uploaded via Swift
                    req.headers[match_header] += ', ' + value[:-2]

        if had_match:
            # Update where to look
            update_etag_is_at_header(req, sysmeta_header('object', 'etag'))

        object_name = req.object_name
        version_id = req.params.get('versionId')
        if version_id not in ('null', None) and \
                'object_versioning' not in get_swift_info():
            raise S3NotImplemented()
        query = {} if version_id is None else {'version-id': version_id}
        resp = req.get_response(self.app, query=query)

        if req.method == 'HEAD':
            resp.app_iter = None

        if 'x-amz-meta-deleted' in resp.headers:
            raise NoSuchKey(object_name)

        for key in ('content-type', 'content-language', 'expires',
                    'cache-control', 'content-disposition',
                    'content-encoding'):
            if 'response-' + key in req.params:
                resp.headers[key] = req.params['response-' + key]

        return resp
Example #37
0
    def __init__(self,
                 conf,
                 logger,
                 rcache,
                 devices,
                 zero_byte_only_at_fps=0,
                 watcher_defs=None):
        if watcher_defs is None:
            watcher_defs = {}
        self.conf = conf
        self.logger = logger
        self.devices = devices
        self.max_files_per_second = float(conf.get('files_per_second', 20))
        self.max_bytes_per_second = float(
            conf.get('bytes_per_second', 10000000))
        try:
            # ideally unless ops overrides the rsync_tempfile_timeout in the
            # auditor section we can base our behavior on whatever they
            # configure for their replicator
            replicator_config = readconf(self.conf['__file__'],
                                         'object-replicator')
        except (KeyError, ValueError, IOError):
            # if we can't parse the real config (generally a KeyError on
            # __file__, or ValueError on no object-replicator section, or
            # IOError if reading the file failed) we use
            # a very conservative default for rsync_timeout
            default_rsync_timeout = 86400
        else:
            replicator_rsync_timeout = int(
                replicator_config.get('rsync_timeout',
                                      replicator.DEFAULT_RSYNC_TIMEOUT))
            # Here we can do some light math for ops and use the *replicator's*
            # rsync_timeout (plus 15 mins to avoid deleting local tempfiles
            # before the remote replicator kills it's rsync)
            default_rsync_timeout = replicator_rsync_timeout + 900
            # there's not really a good reason to assume the replicator
            # section's reclaim_age is more appropriate than the reconstructor
            # reclaim_age - but we're already parsing the config so we can set
            # the default value in our config if it's not already set
            if 'reclaim_age' in replicator_config:
                conf.setdefault('reclaim_age',
                                replicator_config['reclaim_age'])
        self.rsync_tempfile_timeout = config_auto_int_value(
            self.conf.get('rsync_tempfile_timeout'), default_rsync_timeout)
        self.diskfile_router = diskfile.DiskFileRouter(conf, self.logger)

        self.auditor_type = 'ALL'
        self.zero_byte_only_at_fps = zero_byte_only_at_fps
        if self.zero_byte_only_at_fps:
            self.max_files_per_second = float(self.zero_byte_only_at_fps)
            self.auditor_type = 'ZBF'
        self.log_time = int(conf.get('log_time', 3600))
        self.last_logged = 0
        self.files_running_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_files_processed = 0
        self.passes = 0
        self.quarantines = 0
        self.errors = 0
        self.rcache = rcache
        self.stats_sizes = sorted(
            [int(s) for s in list_from_csv(conf.get('object_size_stats'))])
        self.stats_buckets = dict([(s, 0)
                                   for s in self.stats_sizes + ['OVER']])

        self.watchers = [
            WatcherWrapper(wdef['klass'], name, wdef['conf'], logger)
            for name, wdef in watcher_defs.items()
        ]
        logger.debug("%d audit watcher(s) loaded", len(self.watchers))
Example #38
0
    def __init__(self,
                 conf,
                 memcache=None,
                 logger=None,
                 account_ring=None,
                 container_ring=None):
        if conf is None:
            conf = {}
        if logger is None:
            self.logger = get_logger(conf, log_route='proxy-server')
        else:
            self.logger = logger

        self._error_limiting = {}

        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.swift_dir = swift_dir
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.recoverable_node_timeout = float(
            conf.get('recoverable_node_timeout', self.node_timeout))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.client_timeout = int(conf.get('client_timeout', 60))
        self.put_queue_depth = int(conf.get('put_queue_depth', 10))
        self.object_chunk_size = int(conf.get('object_chunk_size', 65536))
        self.client_chunk_size = int(conf.get('client_chunk_size', 65536))
        self.trans_id_suffix = conf.get('trans_id_suffix', '')
        self.post_quorum_timeout = float(conf.get('post_quorum_timeout', 0.5))
        self.error_suppression_interval = \
            int(conf.get('error_suppression_interval', 60))
        self.error_suppression_limit = \
            int(conf.get('error_suppression_limit', 10))
        self.recheck_container_existence = \
            int(conf.get('recheck_container_existence', 60))
        self.recheck_account_existence = \
            int(conf.get('recheck_account_existence', 60))
        self.allow_account_management = \
            config_true_value(conf.get('allow_account_management', 'no'))
        self.object_post_as_copy = \
            config_true_value(conf.get('object_post_as_copy', 'true'))
        self.container_ring = container_ring or Ring(swift_dir,
                                                     ring_name='container')
        self.account_ring = account_ring or Ring(swift_dir,
                                                 ring_name='account')
        # ensure rings are loaded for all configured storage policies
        for policy in POLICIES:
            policy.load_ring(swift_dir)
        self.obj_controller_router = ObjectControllerRouter()
        self.memcache = memcache
        mimetypes.init(mimetypes.knownfiles +
                       [os.path.join(swift_dir, 'mime.types')])
        self.account_autocreate = \
            config_true_value(conf.get('account_autocreate', 'no'))
        self.auto_create_account_prefix = (
            conf.get('auto_create_account_prefix') or '.')
        self.expiring_objects_account = self.auto_create_account_prefix + \
            (conf.get('expiring_objects_account_name') or 'expiring_objects')
        self.expiring_objects_container_divisor = \
            int(conf.get('expiring_objects_container_divisor') or 86400)
        self.max_containers_per_account = \
            int(conf.get('max_containers_per_account') or 0)
        self.max_containers_whitelist = [
            a.strip()
            for a in conf.get('max_containers_whitelist', '').split(',')
            if a.strip()
        ]
        self.deny_host_headers = [
            host.strip()
            for host in conf.get('deny_host_headers', '').split(',')
            if host.strip()
        ]
        self.log_handoffs = config_true_value(conf.get('log_handoffs', 'true'))
        self.cors_allow_origin = [
            a.strip() for a in conf.get('cors_allow_origin', '').split(',')
            if a.strip()
        ]
        self.strict_cors_mode = config_true_value(
            conf.get('strict_cors_mode', 't'))
        self.node_timings = {}
        self.timing_expiry = int(conf.get('timing_expiry', 300))
        self.sorting_method = conf.get('sorting_method', 'shuffle').lower()
        self.concurrent_gets = \
            config_true_value(conf.get('concurrent_gets'))
        self.concurrency_timeout = float(
            conf.get('concurrency_timeout', self.conn_timeout))
        value = conf.get('request_node_count', '2 * replicas').lower().split()
        if len(value) == 1:
            rnc_value = int(value[0])
            self.request_node_count = lambda replicas: rnc_value
        elif len(value) == 3 and value[1] == '*' and value[2] == 'replicas':
            rnc_value = int(value[0])
            self.request_node_count = lambda replicas: rnc_value * replicas
        else:
            raise ValueError('Invalid request_node_count value: %r' %
                             ''.join(value))
        try:
            self._read_affinity = read_affinity = conf.get('read_affinity', '')
            self.read_affinity_sort_key = affinity_key_function(read_affinity)
        except ValueError as err:
            # make the message a little more useful
            raise ValueError("Invalid read_affinity value: %r (%s)" %
                             (read_affinity, err.message))
        try:
            write_affinity = conf.get('write_affinity', '')
            self.write_affinity_is_local_fn \
                = affinity_locality_predicate(write_affinity)
        except ValueError as err:
            # make the message a little more useful
            raise ValueError("Invalid write_affinity value: %r (%s)" %
                             (write_affinity, err.message))
        value = conf.get('write_affinity_node_count',
                         '2 * replicas').lower().split()
        if len(value) == 1:
            wanc_value = int(value[0])
            self.write_affinity_node_count = lambda replicas: wanc_value
        elif len(value) == 3 and value[1] == '*' and value[2] == 'replicas':
            wanc_value = int(value[0])
            self.write_affinity_node_count = \
                lambda replicas: wanc_value * replicas
        else:
            raise ValueError('Invalid write_affinity_node_count value: %r' %
                             ''.join(value))
        # swift_owner_headers are stripped by the account and container
        # controllers; we should extend header stripping to object controller
        # when a privileged object header is implemented.
        swift_owner_headers = conf.get(
            'swift_owner_headers', 'x-container-read, x-container-write, '
            'x-container-sync-key, x-container-sync-to, '
            'x-account-meta-temp-url-key, x-account-meta-temp-url-key-2, '
            'x-container-meta-temp-url-key, x-container-meta-temp-url-key-2, '
            'x-account-access-control')
        self.swift_owner_headers = [
            name.strip().title() for name in swift_owner_headers.split(',')
            if name.strip()
        ]
        # Initialization was successful, so now apply the client chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because it affects the client as well, currently, we use the
        # client chunk size as the govenor and not the object chunk size.
        socket._fileobject.default_bufsize = self.client_chunk_size
        self.expose_info = config_true_value(conf.get('expose_info', 'yes'))
        self.disallowed_sections = list_from_csv(
            conf.get('disallowed_sections', 'swift.valid_api_versions'))
        self.admin_key = conf.get('admin_key', None)
        register_swift_info(
            version=swift_version,
            strict_cors_mode=self.strict_cors_mode,
            policies=POLICIES.get_policy_info(),
            allow_account_management=self.allow_account_management,
            account_autocreate=self.account_autocreate,
            **constraints.EFFECTIVE_CONSTRAINTS)
Example #39
0
    def __call__(self, env, start_response):
        req = Request(env)

        try:
            version, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(env, start_response)

        if account is None:
            return self.app(env, start_response)

        if env.get('swift.authorize_override', False):
            return self.app(env, start_response)

        # First, restrict modification of auth meta-data to only users with
        # the admin role (or roles that have been specially enabled in
        # the swift config).
        if req.method == "POST":
            # following code to get roles is borrowed from keystoneauth
            roles = set()
            if (env.get('HTTP_X_IDENTITY_STATUS') == 'Confirmed'
                    or env.get('HTTP_X_SERVICE_IDENTITY_STATUS')
                    in (None, 'Confirmed')):
                roles = set(list_from_csv(env.get('HTTP_X_ROLES', '')))

            if not roles.intersection(self.allowed_meta_write_roles):
                for k, v in req.headers.iteritems():
                    if k.startswith('X-Container-Meta-Allowed-Iprange') \
                            or k.startswith('X-Account-Meta-Allowed-Iprange'):
                        return Response(status=403,
                                        body=deny_meta_change,
                                        request=req)(env, start_response)

        # Grab the metadata for the account and container
        if container is not None and container != "":
            try:
                container_info = \
                    get_container_info(req.environ, self.app,
                                       swift_source='IPRangeACLMiddleware')
            except ValueError:
                # if we can't get container info, then we deny the request
                return Response(status=403,
                                body="Invalid container (%s)" % container,
                                request=req)(env, start_response)
        else:
            container_info = None

        try:
            acc_info = get_account_info(req.environ,
                                        self.app,
                                        swift_source='IPRangeACLMiddleware')
        except ValueError:
            # if we can't get account info, then we deny the request
            return Response(status=403,
                            body="Invalid account (%s)" % account,
                            request=req)(env, start_response)

        remote_ip = get_remote_client(req)

        allowed = set()
        default = "denied"

        # Read any account-level ACLs
        meta = acc_info['meta']
        for k, v in meta.iteritems():
            if k.startswith("allowed-iprange") and len(v) > 0:
                allowed.add(v)

            # This key is used to set the default access policy in
            # cases where no ACLs are present in the meta-data.
            if k == "ipacl-default":
                default = v

        # If the request is for a container or object, check for any
        # container-level ACLs
        if container_info is not None:
            meta = container_info['meta']
            for k, v in meta.iteritems():
                # Each allowed range must have a unique meta-data key, but
                # the key must begin with 'allowed-iprange-'
                if k.startswith('allowed-iprange-') and len(v) > 0:
                    allowed.add(v)

                # This key is used to set the default access policy in
                # cases where no ACLs are present in the meta-data.

                # NOTE: Container-level default behaviour will override
                # account-level defaults.
                if k == "ipacl-default":
                    default = v

        # XXX Could probably condense this into one tree, but not sure
        # whether Pytricia is OK with mixing IPv4 and IPv6 prefixes.
        self.pyt = pytricia.PyTricia(32)
        self.pyt6 = pytricia.PyTricia(128)

        # If there are no IP range ACLs in the meta-data and the
        # default policy is "allowed", then we can grant access.
        if len(allowed) == 0 and default == "allowed":
            return self.app(env, start_response)
        else:
            # Build the patricia tree of allowed IP prefixes
            for pref in allowed:

                if ':' in pref:
                    try:
                        addrcheck = ipaddress.IPv6Network(unicode(pref), False)
                    except ipaddress.AddressValueError:
                        self.logger.debug(
                            "iprange_acl -- skipping invalid IP prefix: %(pref)s",
                            {'pref': pref})
                        continue
                    self.pyt6[pref] = "allowed"
                else:
                    try:
                        addrcheck = ipaddress.IPv4Network(unicode(pref), False)
                    except ipaddress.AddressValueError:
                        self.logger.debug(
                            "iprange_acl -- skipping invalid IP prefix: %(pref)s",
                            {'pref': pref})
                        continue

                    self.pyt[pref] = "allowed"

        # Always allow our own IP, otherwise we could lock ourselves out from
        # the container!
        if ':' in self.local_ip:
            self.pyt6[self.local_ip] = "allowed"
        else:
            self.pyt[self.local_ip] = "allowed"

        # Add our default allowed IP ranges to the patricia tree
        for default_range in self.default_ranges:
            if ':' in default_range:
                try:
                    addrcheck = ipaddress.IPv6Network(unicode(default_range), \
                            False)
                except ipaddress.AddressValueError:
                    self.logger.debug("Invalid always_allow prefix for IPv6: %s" \
                            % (default_range))
                else:
                    self.pyt6[default_range] = "allowed"
            else:
                try:
                    addrcheck = ipaddress.IPv4Network(unicode(default_range), \
                            False)
                except ipaddress.AddressValueError:
                    self.logger.debug("Invalid always_allow prefix for IPv4: %s" \
                            % (default_range))
                else:
                    self.pyt[default_range] = "allowed"

        # Look up the address of the client in the patricia tree
        if ':' in remote_ip:
            status = self.pyt6.get(remote_ip)
        else:
            status = self.pyt.get(remote_ip)

        if status == "allowed":
            return self.app(env, start_response)

        return Response(status=403, body=self.deny_message,
                        request=req)(env, start_response)
Example #40
0
    def __init__(self,
                 conf,
                 logger=None,
                 account_ring=None,
                 container_ring=None):
        if conf is None:
            conf = {}
        if logger is None:
            self.logger = get_logger(conf, log_route='proxy-server')
        else:
            self.logger = logger
        self._error_limiting = {}

        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.swift_dir = swift_dir
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.recoverable_node_timeout = float(
            conf.get('recoverable_node_timeout', self.node_timeout))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.client_timeout = float(conf.get('client_timeout', 60))
        self.object_chunk_size = int(conf.get('object_chunk_size', 65536))
        self.client_chunk_size = int(conf.get('client_chunk_size', 65536))
        self.trans_id_suffix = conf.get('trans_id_suffix', '')
        self.post_quorum_timeout = float(conf.get('post_quorum_timeout', 0.5))
        self.error_suppression_interval = \
            int(conf.get('error_suppression_interval', 60))
        self.error_suppression_limit = \
            int(conf.get('error_suppression_limit', 10))
        self.recheck_container_existence = \
            int(conf.get('recheck_container_existence',
                         DEFAULT_RECHECK_CONTAINER_EXISTENCE))
        self.recheck_updating_shard_ranges = \
            int(conf.get('recheck_updating_shard_ranges',
                         DEFAULT_RECHECK_UPDATING_SHARD_RANGES))
        self.recheck_account_existence = \
            int(conf.get('recheck_account_existence',
                         DEFAULT_RECHECK_ACCOUNT_EXISTENCE))
        self.allow_account_management = \
            config_true_value(conf.get('allow_account_management', 'no'))
        self.container_ring = container_ring or Ring(swift_dir,
                                                     ring_name='container')
        self.account_ring = account_ring or Ring(swift_dir,
                                                 ring_name='account')
        # ensure rings are loaded for all configured storage policies
        for policy in POLICIES:
            policy.load_ring(swift_dir)
        self.obj_controller_router = ObjectControllerRouter()
        mimetypes.init(mimetypes.knownfiles +
                       [os.path.join(swift_dir, 'mime.types')])
        self.account_autocreate = \
            config_true_value(conf.get('account_autocreate', 'no'))
        if conf.get('auto_create_account_prefix'):
            self.logger.warning('Option auto_create_account_prefix is '
                                'deprecated. Configure '
                                'auto_create_account_prefix under the '
                                'swift-constraints section of '
                                'swift.conf. This option will '
                                'be ignored in a future release.')
            self.auto_create_account_prefix = \
                conf['auto_create_account_prefix']
        else:
            self.auto_create_account_prefix = \
                constraints.AUTO_CREATE_ACCOUNT_PREFIX
        self.expiring_objects_account = self.auto_create_account_prefix + \
            (conf.get('expiring_objects_account_name') or 'expiring_objects')
        self.expiring_objects_container_divisor = \
            int(conf.get('expiring_objects_container_divisor') or 86400)
        self.max_containers_per_account = \
            int(conf.get('max_containers_per_account') or 0)
        self.max_containers_whitelist = [
            a.strip()
            for a in conf.get('max_containers_whitelist', '').split(',')
            if a.strip()
        ]
        self.deny_host_headers = [
            host.strip()
            for host in conf.get('deny_host_headers', '').split(',')
            if host.strip()
        ]
        self.log_handoffs = config_true_value(conf.get('log_handoffs', 'true'))
        self.cors_allow_origin = [
            a.strip() for a in conf.get('cors_allow_origin', '').split(',')
            if a.strip()
        ]
        self.cors_expose_headers = [
            a.strip() for a in conf.get('cors_expose_headers', '').split(',')
            if a.strip()
        ]
        self.strict_cors_mode = config_true_value(
            conf.get('strict_cors_mode', 't'))
        self.node_timings = {}
        self.timing_expiry = int(conf.get('timing_expiry', 300))
        value = conf.get('request_node_count', '2 * replicas').lower().split()
        if len(value) == 1:
            rnc_value = int(value[0])
            self.request_node_count = lambda replicas: rnc_value
        elif len(value) == 3 and value[1] == '*' and value[2] == 'replicas':
            rnc_value = int(value[0])
            self.request_node_count = lambda replicas: rnc_value * replicas
        else:
            raise ValueError('Invalid request_node_count value: %r' %
                             ''.join(value))
        # swift_owner_headers are stripped by the account and container
        # controllers; we should extend header stripping to object controller
        # when a privileged object header is implemented.
        swift_owner_headers = conf.get(
            'swift_owner_headers', 'x-container-read, x-container-write, '
            'x-container-sync-key, x-container-sync-to, '
            'x-account-meta-temp-url-key, x-account-meta-temp-url-key-2, '
            'x-container-meta-temp-url-key, x-container-meta-temp-url-key-2, '
            'x-account-access-control')
        self.swift_owner_headers = [
            name.strip().title() for name in swift_owner_headers.split(',')
            if name.strip()
        ]

        # When upgrading from liberasurecode<=1.5.0, you may want to continue
        # writing legacy CRCs until all nodes are upgraded and capabale of
        # reading fragments with zlib CRCs.
        # See https://bugs.launchpad.net/liberasurecode/+bug/1886088 for more
        # information.
        if 'write_legacy_ec_crc' in conf:
            os.environ['LIBERASURECODE_WRITE_LEGACY_CRC'] = \
                '1' if config_true_value(conf['write_legacy_ec_crc']) else '0'
        # else, assume operators know what they're doing and leave env alone

        # Initialization was successful, so now apply the client chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because it affects the client as well, currently, we use the
        # client chunk size as the govenor and not the object chunk size.
        if sys.version_info < (3, ):
            socket._fileobject.default_bufsize = self.client_chunk_size
        # TODO: find a way to enable similar functionality in py3

        self.expose_info = config_true_value(conf.get('expose_info', 'yes'))
        self.disallowed_sections = list_from_csv(
            conf.get(
                'disallowed_sections', ', '.join([
                    'swift.auto_create_account_prefix',
                    'swift.valid_api_versions',
                ])))
        self.admin_key = conf.get('admin_key', None)
        self._override_options = self._load_per_policy_config(conf)
        self.sorts_by_timing = any(pc.sorting_method == 'timing'
                                   for pc in self._override_options.values())

        register_swift_info(
            version=swift_version,
            strict_cors_mode=self.strict_cors_mode,
            policies=POLICIES.get_policy_info(),
            allow_account_management=self.allow_account_management,
            account_autocreate=self.account_autocreate,
            **constraints.EFFECTIVE_CONSTRAINTS)
        self.watchdog = Watchdog()
        self.watchdog.spawn()
Example #41
0
    def GET(self, req):
        """
        Handle HTTP GET request.

        The body of the response to a successful GET request contains a listing
        of either objects or shard ranges. The exact content of the listing is
        determined by a combination of request headers and query string
        parameters, as follows:

        * The type of the listing is determined by the
          ``X-Backend-Record-Type`` header. If this header has value ``shard``
          then the response body will be a list of shard ranges; if this header
          has value ``auto``, and the container state is ``sharding`` or
          ``sharded``, then the listing will be a list of shard ranges;
          otherwise the response body will be a list of objects.

        * Both shard range and object listings may be constrained to a name
          range by the ``marker`` and ``end_marker`` query string parameters.
          Object listings will only contain objects whose names are greater
          than any ``marker`` value and less than any ``end_marker`` value.
          Shard range listings will only contain shard ranges whose namespace
          is greater than or includes any ``marker`` value and is less than or
          includes any ``end_marker`` value.

        * Shard range listings may also be constrained by an ``includes`` query
          string parameter. If this parameter is present the listing will only
          contain shard ranges whose namespace includes the value of the
          parameter; any ``marker`` or ``end_marker`` parameters are ignored

        * The length of an object listing may be constrained by the ``limit``
          parameter. Object listings may also be constrained by ``prefix``,
          ``delimiter`` and ``path`` query string parameters.

        * Shard range listings will include deleted shard ranges if and only if
          the ``X-Backend-Include-Deleted`` header value is one of
          :attr:`swift.common.utils.TRUE_VALUES`. Object listings never
          include deleted objects.

        * Shard range listings may be constrained to include only shard ranges
          whose state is specified by a query string ``states`` parameter. If
          present, the ``states`` parameter should be a comma separated list of
          either the string or integer representation of
          :data:`~swift.common.utils.ShardRange.STATES`.

          Two alias values may be used in a ``states`` parameter value:
          ``listing`` will cause the listing to include all shard ranges in a
          state suitable for contributing to an object listing; ``updating``
          will cause the listing to include all shard ranges in a state
          suitable to accept an object update.

          If either of these aliases is used then the shard range listing will
          if necessary be extended with a synthesised 'filler' range in order
          to satisfy the requested name range when insufficient actual shard
          ranges are found. Any 'filler' shard range will cover the otherwise
          uncovered tail of the requested name range and will point back to the
          same container.

        * Listings are not normally returned from a deleted container. However,
          the ``X-Backend-Override-Deleted`` header may be used with a value in
          :attr:`swift.common.utils.TRUE_VALUES` to force a shard range
          listing to be returned from a deleted container whose DB file still
          exists.

        :param req: an instance of :class:`swift.common.swob.Request`
        :returns: an instance of :class:`swift.common.swob.Response`
        """
        drive, part, account, container, obj = get_obj_name_and_placement(req)
        path = get_param(req, 'path')
        prefix = get_param(req, 'prefix')
        delimiter = get_param(req, 'delimiter')
        marker = get_param(req, 'marker', '')
        end_marker = get_param(req, 'end_marker')
        limit = constraints.CONTAINER_LISTING_LIMIT
        given_limit = get_param(req, 'limit')
        reverse = config_true_value(get_param(req, 'reverse'))
        if given_limit and given_limit.isdigit():
            limit = int(given_limit)
            if limit > constraints.CONTAINER_LISTING_LIMIT:
                return HTTPPreconditionFailed(
                    request=req,
                    body='Maximum limit is %d' %
                    constraints.CONTAINER_LISTING_LIMIT)
        out_content_type = listing_formats.get_listing_content_type(req)
        try:
            check_drive(self.root, drive, self.mount_check)
        except ValueError:
            return HTTPInsufficientStorage(drive=drive, request=req)
        broker = self._get_container_broker(drive,
                                            part,
                                            account,
                                            container,
                                            pending_timeout=0.1,
                                            stale_reads_ok=True)
        info, is_deleted = broker.get_info_is_deleted()
        record_type = req.headers.get('x-backend-record-type', '').lower()
        if record_type == 'auto' and info.get('db_state') in (SHARDING,
                                                              SHARDED):
            record_type = 'shard'
        if record_type == 'shard':
            override_deleted = info and config_true_value(
                req.headers.get('x-backend-override-deleted', False))
            resp_headers = gen_resp_headers(info,
                                            is_deleted=is_deleted
                                            and not override_deleted)
            if is_deleted and not override_deleted:
                return HTTPNotFound(request=req, headers=resp_headers)
            resp_headers['X-Backend-Record-Type'] = 'shard'
            includes = get_param(req, 'includes')
            states = get_param(req, 'states')
            fill_gaps = False
            if states:
                states = list_from_csv(states)
                fill_gaps = any(('listing' in states, 'updating' in states))
                try:
                    states = broker.resolve_shard_range_states(states)
                except ValueError:
                    return HTTPBadRequest(request=req, body='Bad state')
            include_deleted = config_true_value(
                req.headers.get('x-backend-include-deleted', False))
            container_list = broker.get_shard_ranges(
                marker,
                end_marker,
                includes,
                reverse,
                states=states,
                include_deleted=include_deleted,
                fill_gaps=fill_gaps)
        else:
            resp_headers = gen_resp_headers(info, is_deleted=is_deleted)
            if is_deleted:
                return HTTPNotFound(request=req, headers=resp_headers)
            resp_headers['X-Backend-Record-Type'] = 'object'
            # Use the retired db while container is in process of sharding,
            # otherwise use current db
            src_broker = broker.get_brokers()[0]
            container_list = src_broker.list_objects_iter(
                limit,
                marker,
                end_marker,
                prefix,
                delimiter,
                path,
                storage_policy_index=info['storage_policy_index'],
                reverse=reverse,
                allow_reserved=req.allow_reserved_names)
        return self.create_listing(req, out_content_type, info, resp_headers,
                                   broker.metadata, container_list, container)
Example #42
0
    def __init__(self, app, conf, logger=None):
        self.app = app
        self.pid = os.getpid()
        self.logger = get_logger(
            conf, log_route=conf.get('log_name', 'proxy-logging'))
        self.log_formatter = LogStringFormatter(default='-', quote=True)
        self.log_msg_template = conf.get(
            'log_msg_template', (
                '{client_ip} {remote_addr} {end_time.datetime} {method} '
                '{path} {protocol} {status_int} {referer} {user_agent} '
                '{auth_token} {bytes_recvd} {bytes_sent} {client_etag} '
                '{transaction_id} {headers} {request_time} {source} '
                '{log_info} {start_time} {end_time} {policy_index}'))
        # The salt is only used in StrAnonymizer. This class requires bytes,
        # convert it now to prevent useless convertion later.
        self.anonymization_method = conf.get('log_anonymization_method', 'md5')
        self.anonymization_salt = conf.get('log_anonymization_salt', '')
        self.log_hdrs = config_true_value(conf.get(
            'access_log_headers',
            conf.get('log_headers', 'no')))
        log_hdrs_only = list_from_csv(conf.get(
            'access_log_headers_only', ''))
        self.log_hdrs_only = [x.title() for x in log_hdrs_only]

        # The leading access_* check is in case someone assumes that
        # log_statsd_valid_http_methods behaves like the other log_statsd_*
        # settings.
        self.valid_methods = conf.get(
            'access_log_statsd_valid_http_methods',
            conf.get('log_statsd_valid_http_methods',
                     'GET,HEAD,POST,PUT,DELETE,COPY,OPTIONS'))
        self.valid_methods = [m.strip().upper() for m in
                              self.valid_methods.split(',') if m.strip()]
        access_log_conf = {}
        for key in ('log_facility', 'log_name', 'log_level', 'log_udp_host',
                    'log_udp_port', 'log_statsd_host', 'log_statsd_port',
                    'log_statsd_default_sample_rate',
                    'log_statsd_sample_rate_factor',
                    'log_statsd_metric_prefix'):
            value = conf.get('access_' + key, conf.get(key, None))
            if value:
                access_log_conf[key] = value
        self.access_logger = logger or get_logger(
            access_log_conf,
            log_route=conf.get('access_log_route', 'proxy-access'))
        self.access_logger.set_statsd_prefix('proxy-server')
        self.reveal_sensitive_prefix = int(
            conf.get('reveal_sensitive_prefix', 16))
        self.check_log_msg_template_validity()

        self.perfdata = config_true_value(conf.get('perfdata', 'false'))
        self.perfdata_user_agents = None
        if self.perfdata:
            pattern_dict = {k: v for k, v in conf.items()
                            if k.startswith('perfdata_user_agent')}
            self.perfdata_user_agents = [re.compile(pattern_dict[k])
                                         for k in sorted(pattern_dict.keys())]
            if not self.perfdata_user_agents:
                self.logger.warn('No user-agent pattern defined, '
                                 'performance data will be logged '
                                 'for every request.')
Example #43
0
 def GET(self, req):
     """Handle HTTP GET request."""
     drive, part, account, container, obj = split_and_validate_path(
         req, 4, 5, True)
     path = get_param(req, 'path')
     prefix = get_param(req, 'prefix')
     delimiter = get_param(req, 'delimiter')
     if delimiter and (len(delimiter) > 1 or ord(delimiter) > 254):
         # delimiters can be made more flexible later
         return HTTPPreconditionFailed(body='Bad delimiter')
     marker = get_param(req, 'marker', '')
     end_marker = get_param(req, 'end_marker')
     limit = constraints.CONTAINER_LISTING_LIMIT
     given_limit = get_param(req, 'limit')
     reverse = config_true_value(get_param(req, 'reverse'))
     if given_limit and given_limit.isdigit():
         limit = int(given_limit)
         if limit > constraints.CONTAINER_LISTING_LIMIT:
             return HTTPPreconditionFailed(
                 request=req,
                 body='Maximum limit is %d' %
                 constraints.CONTAINER_LISTING_LIMIT)
     out_content_type = listing_formats.get_listing_content_type(req)
     if not check_drive(self.root, drive, self.mount_check):
         return HTTPInsufficientStorage(drive=drive, request=req)
     broker = self._get_container_broker(drive,
                                         part,
                                         account,
                                         container,
                                         pending_timeout=0.1,
                                         stale_reads_ok=True)
     info, is_deleted = broker.get_info_is_deleted()
     record_type = req.headers.get('x-backend-record-type', '').lower()
     if record_type == 'auto' and info.get('db_state') in (SHARDING,
                                                           SHARDED):
         record_type = 'shard'
     if record_type == 'shard':
         override_deleted = info and config_true_value(
             req.headers.get('x-backend-override-deleted', False))
         resp_headers = gen_resp_headers(info,
                                         is_deleted=is_deleted
                                         and not override_deleted)
         if is_deleted and not override_deleted:
             return HTTPNotFound(request=req, headers=resp_headers)
         resp_headers['X-Backend-Record-Type'] = 'shard'
         includes = get_param(req, 'includes')
         states = get_param(req, 'states')
         fill_gaps = False
         if states:
             states = list_from_csv(states)
             fill_gaps = any(('listing' in states, 'updating' in states))
             try:
                 states = broker.resolve_shard_range_states(states)
             except ValueError:
                 return HTTPBadRequest(request=req, body='Bad state')
         include_deleted = config_true_value(
             req.headers.get('x-backend-include-deleted', False))
         container_list = broker.get_shard_ranges(
             marker,
             end_marker,
             includes,
             reverse,
             states=states,
             include_deleted=include_deleted,
             fill_gaps=fill_gaps)
     else:
         resp_headers = gen_resp_headers(info, is_deleted=is_deleted)
         if is_deleted:
             return HTTPNotFound(request=req, headers=resp_headers)
         resp_headers['X-Backend-Record-Type'] = 'object'
         # Use the retired db while container is in process of sharding,
         # otherwise use current db
         src_broker = broker.get_brokers()[0]
         container_list = src_broker.list_objects_iter(
             limit,
             marker,
             end_marker,
             prefix,
             delimiter,
             path,
             storage_policy_index=info['storage_policy_index'],
             reverse=reverse)
     return self.create_listing(req, out_content_type, info, resp_headers,
                                broker.metadata, container_list, container)
Example #44
0
 def run_once(self, *args, **kwargs):
     override_devices = list_from_csv(kwargs.get('devices'))
     devices = override_devices or self._get_devices()
     for device in devices:
         self.object_sweep(device)
Example #45
0
    def __init__(self,
                 conf,
                 memcache=None,
                 logger=None,
                 account_ring=None,
                 container_ring=None):
        if conf is None:
            conf = {}
        if logger is None:
            self.logger = get_logger(conf, log_route='proxy-server')
        else:
            self.logger = logger

        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.swift_dir = swift_dir
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.recoverable_node_timeout = float(
            conf.get('recoverable_node_timeout', self.node_timeout))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.client_timeout = int(conf.get('client_timeout', 60))
        self.put_queue_depth = int(conf.get('put_queue_depth', 10))
        self.object_chunk_size = int(conf.get('object_chunk_size', 65536))
        self.client_chunk_size = int(conf.get('client_chunk_size', 65536))
        self.trans_id_suffix = conf.get('trans_id_suffix', '')
        self.allow_account_management = \
            config_true_value(conf.get('allow_account_management', 'no'))
        self.memcache = memcache
        mimetypes.init(mimetypes.knownfiles +
                       [os.path.join(swift_dir, 'mime.types')])
        self.account_autocreate = \
            config_true_value(conf.get('account_autocreate', 'no'))
        self.deny_host_headers = [
            host.strip()
            for host in conf.get('deny_host_headers', '').split(',')
            if host.strip()
        ]
        self.strict_cors_mode = config_true_value(
            conf.get('strict_cors_mode', 't'))
        value = conf.get('request_node_count', '2 * replicas').lower().split()
        if len(value) == 1:
            rnc_value = int(value[0])
            self.request_node_count = lambda replicas: rnc_value
        elif len(value) == 3 and value[1] == '*' and value[2] == 'replicas':
            rnc_value = int(value[0])
            self.request_node_count = lambda replicas: rnc_value * replicas
        else:
            raise ValueError('Invalid request_node_count value: %r' %
                             ''.join(value))
        swift_owner_headers = conf.get(
            'swift_owner_headers', 'x-container-read, x-container-write, '
            'x-container-sync-key, x-container-sync-to, '
            'x-account-meta-temp-url-key, x-account-meta-temp-url-key-2, '
            'x-container-meta-temp-url-key, x-container-meta-temp-url-key-2, '
            'x-account-access-control')
        self.swift_owner_headers = [
            name.strip().title() for name in swift_owner_headers.split(',')
            if name.strip()
        ]
        socket._fileobject.default_bufsize = self.client_chunk_size
        self.expose_info = config_true_value(conf.get('expose_info', 'yes'))
        self.disallowed_sections = list_from_csv(
            conf.get('disallowed_sections', 'swift.valid_api_versions'))
        self.admin_key = conf.get('admin_key', None)
        register_swift_info(
            version=swift_version,
            strict_cors_mode=self.strict_cors_mode,
            policies=POLICIES.get_policy_info(),
            allow_account_management=self.allow_account_management,
            account_autocreate=self.account_autocreate,
            **constraints.EFFECTIVE_CONSTRAINTS)
        self.swift_baseurl = conf.get('swift_baseurl')
Example #46
0
    def GETorHEAD(self, req):
        """
        Handled GET or HEAD request on a part of a multipart object.
        """
        part_number = self.parse_part_number(req)

        had_match = False
        for match_header in ('if-match', 'if-none-match'):
            if match_header not in req.headers:
                continue
            had_match = True
            for value in list_from_csv(req.headers[match_header]):
                if value.startswith('"') and value.endswith('"'):
                    value = value[1:-1]
                if value.endswith('-N'):
                    # Deal with fake S3-like etags for SLOs uploaded via Swift
                    req.headers[match_header] += ', ' + value[:-2]

        if had_match:
            # Update where to look
            update_etag_is_at_header(req, sysmeta_header('object', 'etag'))

        # Get the list of parts. Must be raw to get all response headers.
        slo_resp = req.get_response(self.app,
                                    'GET',
                                    req.container_name,
                                    req.object_name,
                                    query={
                                        'multipart-manifest': 'get',
                                        'format': 'raw'
                                    })

        # Check if the object is really a SLO. If not, and user asked
        # for the first part, do a regular request.
        if 'X-Static-Large-Object' not in slo_resp.sw_headers:
            if part_number == 1:
                if slo_resp.is_success and req.method == 'HEAD':
                    # Clear body
                    slo_resp.body = ''
                return slo_resp
            else:
                close_if_possible(slo_resp.app_iter)
                raise InvalidRange()

        # Locate the part
        slo = json.loads(slo_resp.body)
        try:
            part = slo[part_number - 1]
        except IndexError:
            raise InvalidRange()

        # Redirect the request on the part
        _, req.container_name, req.object_name = part['path'].split('/', 2)
        # XXX enforce container_name and object_name to be <str>
        # or it will rise issues in swift3/requests when merging both
        req.container_name = req.container_name.encode('utf-8')
        req.object_name = req.object_name.encode('utf8')
        # The etag check was performed with the manifest
        if had_match:
            for match_header in ('if-match', 'if-none-match'):
                req.headers.pop(match_header, None)
        resp = req.get_response(self.app)

        # Replace status
        slo_resp.status = resp.status
        # Replace body
        slo_resp.app_iter = resp.app_iter
        # Update with the size of the part
        slo_resp.headers['Content-Length'] = \
            resp.headers.get('Content-Length', 0)
        slo_resp.sw_headers['Content-Length'] = \
            slo_resp.headers['Content-Length']
        # Add the number of parts in this object
        slo_resp.headers['X-Amz-Mp-Parts-Count'] = len(slo)
        return slo_resp
Example #47
0
    def __init__(self, app, conf, logger=None):
        self.app = app

        if logger:
            self.logger = logger
        else:
            self.logger = get_logger(conf, log_route='avrofilter')

        # Any roles specified as "nostrip_roles" will always receive the
        # full uncensored Avro data
        if 'nostrip_roles' in conf:
            self.nostrip_roles = set([x.strip() \
                    for x in conf['nostrip_roles'].split(',')])
        else:
            self.nostrip_roles = set()

        # admin should always be a nostrip role
        self.nostrip_roles.add('admin')

        self.defaultstrip = {}
        self.dontstrip = {}

        # Any field mentioned in a "retain_keys" option will be stripped
        # by default, unless the user matches a role where that field is
        # explicitly listed as being retained

        # In other words: defaultstrip is the union of all of the fields that
        # are explicitly configured as retainable. Any "public" fields should
        # NOT be listed as a retained field for any role.
        for k, v in conf.iteritems():
            # The role that this option applies to is specified in the
            # prefix of the configuration option name
            # e.g. "swiftro_retain_keys" -> role = "swiftro"
            if not k.endswith("_retain_keys"):
                continue

            role = k[:-12]

            if role in self.dontstrip:
                self.logger.info(
                    "Warning: role '%s' appears multiple times in AvroFilterMiddleware configuration"
                    % (role))
                # TODO only warn once per duplicate role
                continue

            self.dontstrip[role] = {}

            for ts in list_from_csv(v):
                ts = ts.strip()
                if len(ts) == 0:
                    continue

                # fields are listed using <datatype>:<fieldname> format, e.g.
                # "flowtuple:netacq_country"
                ts = ts.split(':')
                if len(ts) != 2:
                    self.logger.info(
                        "Invalid 'retain_keys' parameter format, should be <data type>:<field name> (not %s)"
                        % (ts))
                    continue

                if ts[0] not in self.dontstrip[role]:
                    self.dontstrip[role][ts[0]] = set()
                if ts[0] not in self.defaultstrip:
                    self.defaultstrip[ts[0]] = set()

                self.dontstrip[role][ts[0]].add(ts[1])
                self.defaultstrip[ts[0]].add(ts[1])
Example #48
0
    def __call__(self, req):
        try:
            (version, account, container, obj) = \
                    split_path(req.path_info, 4, 4, True)
        except ValueError:
            return req.get_response(self.app)

        # Only worry about data fetches, not uploads.
        if not valid_api_version(version) or req.method not in ('GET', 'HEAD'):
            return req.get_response(self.app)

        # Get all roles that apply to the user making the request
        roles = set()
        if (req.environ.get('HTTP_X_IDENTITY_STATUS') == 'Confirmed' or \
                req.environ.get('HTTP_X_SERVICE_IDENTITY_STATUS') in \
                        (None, "Confirmed")):
            roles = set(list_from_csv(req.environ.get('HTTP_X_ROLES', '')))

        # If we have one of the "nostrip" roles, then don't do any stripping
        if roles.intersection(self.nostrip_roles):
            return req.get_response(self.app)

        # Perform the request and grab a response object that we can work
        # with
        resp = req.get_response(self.app)

        # Check that the requested object is actually a CAIDA avro file
        conttype = resp.headers.get("Content-Type", None)

        if conttype is None:
            return resp

        if not conttype.startswith("application/vnd.caida."):
            return resp

        if not conttype.endswith(".avro"):
            return resp

        dtype = conttype.replace("application/vnd.caida.", "", 1)[:-5]

        if dtype not in self.defaultstrip:
            return resp

        # Start by planning to strip all fields for this datatype that have
        # been explicitly appeared in the config file. Then for each role that
        # the user has, remove any fields from the strip set that should be
        # retained for that role.
        tostrip = self.defaultstrip[dtype]

        for r in roles:
            if r not in self.dontstrip:
                # No specified config for this role, so leave strip set as is
                continue

            if dtype not in self.dontstrip[r]:
                continue

            tostrip = tostrip - self.dontstrip[r][dtype]

        # Remove the Etag because otherwise swift clients get very upset
        # about the md5sum of the response body not matching the md5sum
        # in the Etag header :/
        if 'Etag' in resp.headers:
            del (resp.headers['Etag'])

        # If we are going to be stripping fields, replace our response
        # iterable with one that will parse the received Avro and remove
        # the desired fields. The swift proxy should handle the rest.
        x = GenericStrippingAvroParser(resp.app_iter, resp.body, tostrip)
        resp.app_iter = x

        return resp
Example #49
0
    def __init__(self,
                 conf,
                 memcache=None,
                 logger=None,
                 account_ring=None,
                 container_ring=None,
                 object_ring=None):
        if conf is None:
            conf = {}
        if logger is None:
            self.logger = get_logger(conf, log_route='proxy-server')
        else:
            self.logger = logger

        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.node_timeout = int(conf.get('node_timeout', 10))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.client_timeout = int(conf.get('client_timeout', 60))
        self.put_queue_depth = int(conf.get('put_queue_depth', 10))
        self.object_chunk_size = int(conf.get('object_chunk_size', 65536))
        self.client_chunk_size = int(conf.get('client_chunk_size', 65536))
        self.trans_id_suffix = conf.get('trans_id_suffix', '')
        self.post_quorum_timeout = float(conf.get('post_quorum_timeout', 0.5))
        self.error_suppression_interval = \
            int(conf.get('error_suppression_interval', 60))
        self.error_suppression_limit = \
            int(conf.get('error_suppression_limit', 10))
        self.recheck_container_existence = \
            int(conf.get('recheck_container_existence', 60))
        self.recheck_account_existence = \
            int(conf.get('recheck_account_existence', 60))
        self.allow_account_management = \
            config_true_value(conf.get('allow_account_management', 'no'))
        self.object_post_as_copy = \
            config_true_value(conf.get('object_post_as_copy', 'true'))
        self.object_ring = object_ring or Ring(swift_dir, ring_name='object')
        self.container_ring = container_ring or Ring(swift_dir,
                                                     ring_name='container')
        self.account_ring = account_ring or Ring(swift_dir,
                                                 ring_name='account')
        self.memcache = memcache
        mimetypes.init(mimetypes.knownfiles +
                       [os.path.join(swift_dir, 'mime.types')])
        self.account_autocreate = \
            config_true_value(conf.get('account_autocreate', 'no'))
        self.expiring_objects_account = \
            (conf.get('auto_create_account_prefix') or '.') + \
            'expiring_objects'
        self.expiring_objects_container_divisor = \
            int(conf.get('expiring_objects_container_divisor') or 86400)
        self.max_containers_per_account = \
            int(conf.get('max_containers_per_account') or 0)
        self.max_containers_whitelist = [
            a.strip()
            for a in conf.get('max_containers_whitelist', '').split(',')
            if a.strip()
        ]
        self.deny_host_headers = [
            host.strip()
            for host in conf.get('deny_host_headers', '').split(',')
            if host.strip()
        ]
        self.rate_limit_after_segment = \
            int(conf.get('rate_limit_after_segment', 10))
        self.rate_limit_segments_per_sec = \
            int(conf.get('rate_limit_segments_per_sec', 1))
        self.log_handoffs = config_true_value(conf.get('log_handoffs', 'true'))
        self.cors_allow_origin = [
            a.strip() for a in conf.get('cors_allow_origin', '').split(',')
            if a.strip()
        ]
        self.node_timings = {}
        self.timing_expiry = int(conf.get('timing_expiry', 300))
        self.sorting_method = conf.get('sorting_method', 'shuffle').lower()
        self.allow_static_large_object = config_true_value(
            conf.get('allow_static_large_object', 'true'))
        self.max_large_object_get_time = float(
            conf.get('max_large_object_get_time', '86400'))
        value = conf.get('request_node_count', '2 * replicas').lower().split()
        if len(value) == 1:
            value = int(value[0])
            self.request_node_count = lambda r: value
        elif len(value) == 3 and value[1] == '*' and value[2] == 'replicas':
            value = int(value[0])
            self.request_node_count = lambda r: value * r.replica_count
        else:
            raise ValueError('Invalid request_node_count value: %r' %
                             ''.join(value))
        try:
            read_affinity = conf.get('read_affinity', '')
            self.read_affinity_sort_key = affinity_key_function(read_affinity)
        except ValueError as err:
            # make the message a little more useful
            raise ValueError("Invalid read_affinity value: %r (%s)" %
                             (read_affinity, err.message))
        try:
            write_affinity = conf.get('write_affinity', '')
            self.write_affinity_is_local_fn \
                = affinity_locality_predicate(write_affinity)
        except ValueError as err:
            # make the message a little more useful
            raise ValueError("Invalid write_affinity value: %r (%s)" %
                             (write_affinity, err.message))
        value = conf.get('write_affinity_node_count',
                         '2 * replicas').lower().split()
        if len(value) == 1:
            value = int(value[0])
            self.write_affinity_node_count = lambda r: value
        elif len(value) == 3 and value[1] == '*' and value[2] == 'replicas':
            value = int(value[0])
            self.write_affinity_node_count = lambda r: value * r.replica_count
        else:
            raise ValueError('Invalid write_affinity_node_count value: %r' %
                             ''.join(value))
        swift_owner_headers = conf.get(
            'swift_owner_headers', 'x-container-read, x-container-write, '
            'x-container-sync-key, x-container-sync-to, '
            'x-account-meta-temp-url-key, x-account-meta-temp-url-key-2')
        self.swift_owner_headers = [
            name.strip() for name in swift_owner_headers.split(',')
            if name.strip()
        ]
        # Initialization was successful, so now apply the client chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because it affects the client as well, currently, we use the
        # client chunk size as the govenor and not the object chunk size.
        socket._fileobject.default_bufsize = self.client_chunk_size
        self.expose_info = config_true_value(conf.get('expose_info', 'yes'))
        self.disallowed_sections = list_from_csv(
            conf.get('disallowed_sections'))
        self.admin_key = conf.get('admin_key', None)
        register_swift_info(
            version=swift_version,
            max_file_size=constraints.MAX_FILE_SIZE,
            max_meta_name_length=constraints.MAX_META_NAME_LENGTH,
            max_meta_value_length=constraints.MAX_META_VALUE_LENGTH,
            max_meta_count=constraints.MAX_META_COUNT,
            account_listing_limit=constraints.ACCOUNT_LISTING_LIMIT,
            container_listing_limit=constraints.CONTAINER_LISTING_LIMIT,
            max_account_name_length=constraints.MAX_ACCOUNT_NAME_LENGTH,
            max_container_name_length=constraints.MAX_CONTAINER_NAME_LENGTH,
            max_object_name_length=constraints.MAX_OBJECT_NAME_LENGTH)
Example #50
0
    def __call__(self, env, start_response):
        req = Request(env)
        try:
            # account and container only
            version, acct, cont = req.split_path(2, 3)
        except ValueError:
            is_account_or_container_req = False
        else:
            is_account_or_container_req = True
        if not is_account_or_container_req:
            return self.app(env, start_response)

        if not valid_api_version(version) or req.method not in ('GET', 'HEAD'):
            return self.app(env, start_response)

        # OK, definitely have an account/container request.
        # Get the desired content-type, then force it to a JSON request.
        try:
            out_content_type = get_listing_content_type(req)
        except HTTPException as err:
            return err(env, start_response)

        params = req.params
        can_vary = 'format' not in params
        params['format'] = 'json'
        req.params = params

        # Give other middlewares a chance to be in charge
        env.setdefault('swift.format_listing', True)
        status, headers, resp_iter = req.call_application(self.app)
        if not env.get('swift.format_listing'):
            start_response(status, headers)
            return resp_iter

        header_to_index = {}
        resp_content_type = resp_length = None
        for i, (header, value) in enumerate(headers):
            header = header.lower()
            if header == 'content-type':
                header_to_index[header] = i
                resp_content_type = value.partition(';')[0]
            elif header == 'content-length':
                header_to_index[header] = i
                resp_length = int(value)
            elif header == 'vary':
                header_to_index[header] = i

        if not status.startswith(('200 ', '204 ')):
            start_response(status, headers)
            return resp_iter

        if can_vary:
            if 'vary' in header_to_index:
                value = headers[header_to_index['vary']][1]
                if 'accept' not in list_from_csv(value.lower()):
                    headers[header_to_index['vary']] = ('Vary',
                                                        value + ', Accept')
            else:
                headers.append(('Vary', 'Accept'))

        if resp_content_type != 'application/json':
            start_response(status, headers)
            return resp_iter

        if resp_length is None or \
                resp_length > MAX_CONTAINER_LISTING_CONTENT_LENGTH:
            start_response(status, headers)
            return resp_iter

        def set_header(header, value):
            if value is None:
                del headers[header_to_index[header]]
            else:
                headers[header_to_index[header]] = (
                    headers[header_to_index[header]][0], str(value))

        if req.method == 'HEAD':
            set_header('content-type', out_content_type + '; charset=utf-8')
            set_header('content-length', None)  # don't know, can't determine
            start_response(status, headers)
            return resp_iter

        body = b''.join(resp_iter)
        try:
            listing = json.loads(body)
            # Do a couple sanity checks
            if not isinstance(listing, list):
                raise ValueError
            if not all(isinstance(item, dict) for item in listing):
                raise ValueError
        except ValueError:
            # Static web listing that's returning invalid JSON?
            # Just pass it straight through; that's about all we *can* do.
            start_response(status, headers)
            return [body]

        if not req.allow_reserved_names:
            listing = self.filter_reserved(listing, acct, cont)

        try:
            if out_content_type.endswith('/xml'):
                if cont:
                    body = container_to_xml(
                        listing,
                        wsgi_to_bytes(cont).decode('utf-8'))
                else:
                    body = account_to_xml(listing,
                                          wsgi_to_bytes(acct).decode('utf-8'))
            elif out_content_type == 'text/plain':
                body = listing_to_text(listing)
            else:
                body = json.dumps(listing).encode('ascii')
        except KeyError:
            # listing was in a bad format -- funky static web listing??
            start_response(status, headers)
            return [body]

        if not body:
            status = '%s %s' % (HTTP_NO_CONTENT,
                                RESPONSE_REASONS[HTTP_NO_CONTENT][0])

        set_header('content-type', out_content_type + '; charset=utf-8')
        set_header('content-length', len(body))
        start_response(status, headers)
        return [body]