Ejemplo n.º 1
0
 def __init__(self, app, conf):
     self.app = app
     self.logger = get_logger(conf, log_route='endpoints')
     self.swift_dir = conf.get('swift_dir', '/etc/swift')
     self.account_ring = Ring(self.swift_dir, ring_name='account')
     self.container_ring = Ring(self.swift_dir, ring_name='container')
     self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/')
     if not self.endpoints_path.endswith('/'):
         self.endpoints_path += '/'
Ejemplo n.º 2
0
 def __call__(self, env, start_response):
     req = Request(env)
     if env.get('REQUEST_METHOD') == "PUT" and env.get("HTTP_X_OBJECT_META_LXC_DEPLOY"):
         ring = Ring(self.object_ring_path)
         raw_path = env.get("RAW_PATH_INFO").split("/")
         node_data = ring.get_nodes(raw_path[2],raw_path[3],raw_path[4])
         deploy_host = node_data[1][0]["ip"]
         req.headers["X-Object-Meta-LXC-HOST"] = deploy_host
         req.headers["REMOTE_USER"] = raw_path[2]
     return self.app(env, start_response)
Ejemplo n.º 3
0
 def create_account(act):
     ts = utils.normalize_timestamp(time())
     account_ring = Ring(_testdir, ring_name='account')
     partition, nodes = account_ring.get_nodes(act)
     for node in nodes:
         # Note: we are just using the http_connect method in the object
         # controller here to talk to the account server nodes.
         conn = swift.proxy.controllers.obj.http_connect(
             node['ip'], node['port'], node['device'], partition, 'PUT',
             '/' + act, {'X-Timestamp': ts, 'x-trans-id': act})
         resp = conn.getresponse()
         assert(resp.status == 201)
Ejemplo n.º 4
0
 def __init__(self, app, conf):
     self.app = app
     self.logger = get_logger(conf, log_route='endpoints')
     self.swift_dir = conf.get('swift_dir', '/etc/swift')
     self.account_ring = Ring(self.swift_dir, ring_name='account')
     self.container_ring = Ring(self.swift_dir, ring_name='container')
     self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/')
     if not self.endpoints_path.endswith('/'):
         self.endpoints_path += '/'
     self.default_response_version = 1.0
     self.response_map = {
         1.0: self.v1_format_response,
         2.0: self.v2_format_response,
     }
Ejemplo n.º 5
0
def is_valid_ring(ring_file):
    """Check if a ring file is 'valid'
        - make sure it has more than one device
        - make sure get_part_nodes works
    :returns: True or False if ring is valid
    """
    try:
        ring = Ring(ring_file)
        if len(ring.devs) < 1:
            return False
        if not ring.get_part_nodes(1):
            return False
    except Exception:
        return False
    return True
Ejemplo n.º 6
0
class SwiftUsageInput(IDataSource):
    def __init__(self, project, username, password, auth_url, ring_location="/etc/swift/account.ring.gz"):
        self.__project=project
        self.__username=username
        self.__password=password
        self.__auth_url=auth_url
        self.__ring_location=ring_location
        self.__ring=None
    def get_data(self, **kwargs):
        query_id=str(uuid.uuid4())
        data_list=[]
        from keystoneclient.v2_0 import client
        from swift.common.ring import Ring
        keystone=client.Client(username=self.__username, password=self.__password, tenant_name=self.__project, auth_url=self.__auth_url)
        self.__ring = Ring(self.__ring_location)
        tenants = [tenant.id for tenant in keystone.tenants.list()]
        random.shuffle(tenants)
        data=init_message()
        data["swift"] = {}
        for tenant, stats in zip(tenants, ThreadPool().map(self.fetch, tenants)):
            if stats is not None:
                data["swift"][tenant] = stats
        return data
        
    def fetch(self, tenant):
        account = "AUTH_%s" % tenant
        partition = self.__ring.get_part(account, None, None)
        nodes = self.__ring.get_part_nodes(partition)
        random.shuffle(nodes)
        for node in nodes:
            url = "http://%s:%s/%s/%s/%s" % (node["ip"], node["port"], node["device"], partition, account)
            try:
                response = requests.head(url, timeout=5)
                if response.status_code == 204:
                    return {
                        "containers" : int(response.headers["x-account-container-count"]),
                        "objects" : int(response.headers["x-account-object-count"]),
                        "bytes" : int(response.headers["x-account-bytes-used"]),
                        "quota" : int(response.headers["x-account-meta-quota-bytes"]) if "x-account-meta-quota-bytes" in response.headers else None
                    }
                elif response.status_code == 404:
                    return None
                else:
                    log.warning("error fetching %s [HTTP %s]", url, response.status_code)
            except:
                log.warning("error fetching %s", url, exc_info=True)
        log.error("failed to fetch info for tenant %s", tenant)
        return None
Ejemplo n.º 7
0
 def __init__(self, conf):
     """
     :param conf: configuration object obtained from ConfigParser
     :param logger: logging object
     """
     self.conf = conf
     self.logger = get_logger(conf, log_route='object-replicator')
     self.devices_dir = conf.get('devices', '/srv/node')
     self.mount_check = conf.get('mount_check', 'true').lower() in \
                           ('true', 't', '1', 'on', 'yes', 'y')
     self.vm_test_mode = conf.get(
             'vm_test_mode', 'no').lower() in ('yes', 'true', 'on', '1')
     self.swift_dir = conf.get('swift_dir', '/etc/swift')
     self.port = int(conf.get('bind_port', 6000))
     self.concurrency = int(conf.get('concurrency', 1))
     self.stats_interval = int(conf.get('stats_interval', '300'))
     self.object_ring = Ring(self.swift_dir, ring_name='object')
     self.ring_check_interval = int(conf.get('ring_check_interval', 15))
     self.next_check = time.time() + self.ring_check_interval
     self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))
     self.partition_times = []
     self.run_pause = int(conf.get('run_pause', 30))
     self.rsync_timeout = int(conf.get('rsync_timeout', 900))
     self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
     self.http_timeout = int(conf.get('http_timeout', 60))
     self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
     self.recon_enable = conf.get(
             'recon_enable', 'no').lower() in TRUE_VALUES
     self.recon_cache_path = conf.get(
             'recon_cache_path', '/var/cache/swift')
     self.recon_object = os.path.join(self.recon_cache_path, "object.recon")
Ejemplo n.º 8
0
 def __init__(self, conf):
     """
     :param conf: configuration object obtained from ConfigParser
     :param logger: logging object
     """
     self.conf = conf
     self.logger = get_logger(conf, log_route='object-replicator')
     self.devices_dir = conf.get('devices', '/srv/node')
     self.mount_check = config_true_value(conf.get('mount_check', 'true'))
     self.vm_test_mode = config_true_value(conf.get('vm_test_mode', 'no'))
     self.swift_dir = conf.get('swift_dir', '/etc/swift')
     self.port = int(conf.get('bind_port', 6000))
     self.concurrency = int(conf.get('concurrency', 1))
     self.stats_interval = int(conf.get('stats_interval', '300'))
     self.object_ring = Ring(self.swift_dir, ring_name='object')
     self.ring_check_interval = int(conf.get('ring_check_interval', 15))
     self.next_check = time.time() + self.ring_check_interval
     self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))
     self.partition_times = []
     self.run_pause = int(conf.get('run_pause', 30))
     self.rsync_timeout = int(conf.get('rsync_timeout', 900))
     self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
     self.rsync_bwlimit = conf.get('rsync_bwlimit', '0')
     self.http_timeout = int(conf.get('http_timeout', 60))
     self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
     self.recon_cache_path = conf.get('recon_cache_path',
                                      '/var/cache/swift')
     self.rcache = os.path.join(self.recon_cache_path, "object.recon")
     self.headers = {
         'Content-Length': '0',
         'user-agent': 'obj-replicator %s' % os.getpid()}
     self.rsync_error_log_line_length = \
         int(conf.get('rsync_error_log_line_length', 0))
Ejemplo n.º 9
0
 def __init__(self, conf):
     """
     :param conf: configuration object obtained from ConfigParser
     :param logger: logging object
     """
     self.conf = conf
     self.logger = get_logger(conf, log_route="object-replicator")
     self.devices_dir = conf.get("devices", "/srv/node")
     self.mount_check = conf.get("mount_check", "true").lower() in ("true", "t", "1", "on", "yes", "y")
     self.vm_test_mode = conf.get("vm_test_mode", "no").lower() in ("yes", "true", "on", "1")
     self.swift_dir = conf.get("swift_dir", "/etc/swift")
     self.port = int(conf.get("bind_port", 6000))
     self.concurrency = int(conf.get("concurrency", 1))
     self.stats_interval = int(conf.get("stats_interval", "300"))
     self.object_ring = Ring(self.swift_dir, ring_name="object")
     self.ring_check_interval = int(conf.get("ring_check_interval", 15))
     self.next_check = time.time() + self.ring_check_interval
     self.reclaim_age = int(conf.get("reclaim_age", 86400 * 7))
     self.partition_times = []
     self.run_pause = int(conf.get("run_pause", 30))
     self.rsync_timeout = int(conf.get("rsync_timeout", 900))
     self.rsync_io_timeout = conf.get("rsync_io_timeout", "30")
     self.http_timeout = int(conf.get("http_timeout", 60))
     self.lockup_timeout = int(conf.get("lockup_timeout", 1800))
     self.recon_cache_path = conf.get("recon_cache_path", "/var/cache/swift")
     self.rcache = os.path.join(self.recon_cache_path, "object.recon")
 def __init__(self, conf):
     self.conf = conf
     self.container_ring = Ring('/etc/swift', ring_name='container')
     self.logger = get_logger(conf, log_route='object-restorer')
     self.logger.set_statsd_prefix('s3-object-restorer')
     self.interval = int(conf.get('interval') or 300)
     self.restoring_object_account = '.s3_restoring_objects'
     self.expiring_restored_account = '.s3_expiring_restored_objects'
     self.glacier_account_prefix = '.glacier_'
     self.todo_container = 'todo'
     self.restoring_container = 'restoring'
     conf_path = '/etc/swift/s3-object-restorer.conf'
     request_tries = int(conf.get('request_tries') or 3)
     self.glacier = self._init_glacier()
     self.glacier_tmpdir = conf.get('temp_path', '/var/cache/s3/')
     self.swift = InternalClient(conf_path,
                                 'Swift Object Restorer',
                                 request_tries)
     self.report_interval = int(conf.get('report_interval') or 300)
     self.report_first_time = self.report_last_time = time()
     self.report_objects = 0
     self.recon_cache_path = conf.get('recon_cache_path',
                                      '/var/cache/swift')
     self.rcache = join(self.recon_cache_path, 'object.recon')
     self.concurrency = int(conf.get('concurrency', 1))
     if self.concurrency < 1:
         raise ValueError("concurrency must be set to at least 1")
     self.processes = int(self.conf.get('processes', 0))
     self.process = int(self.conf.get('process', 0))
     self.client = Client(self.conf.get('sentry_sdn', ''))
 def __init__(self, app, conf, *args, **kwargs):
     self.app = app
     self.conf = conf
     self.logger = get_logger(self.conf, log_route='transition')
     self.container_ring = Ring('/etc/swift', ring_name='container')
     self.glacier_account_prefix = '.glacier_'
     self.temp_path = conf.get('temp_path', '/var/cache/s3/')
 def __init__(self, conf):
     self.conf = conf
     self.logger = get_logger(conf, log_route='utilization-aggregator')
     self.interval = int(conf.get('interval') or 60)
     self.aggregate_account = '.utilization'
     self.sample_account = '.transfer_record'
     conf_path = conf.get('__file__') or \
                 '/etc/swift/swift-utilization-aggregator.conf'
     request_tries = int(conf.get('request_tries') or 3)
     self.swift = InternalClient(conf_path,
                                 'Swift Utilization Aggregator',
                                 request_tries)
     self.report_interval = int(conf.get('report_interval') or 60)
     self.report_first_time = self.report_last_time = time()
     self.report_containers = 0
     self.report_objects = 0
     self.recon_cache_path = conf.get('recon_cache_path',
                                      '/var/cache/swift')
     self.rcache = join(self.recon_cache_path, 'object.recon')
     self.concurrency = int(conf.get('concurrency', 1))
     if self.concurrency < 1:
         raise ValueError("concurrency must be set to at least 1")
     self.processes = int(self.conf.get('processes', 0))
     self.process = int(self.conf.get('process', 0))
     self.container_ring = Ring('/etc/swift', ring_name='container')
     self.sample_rate = int(self.conf.get('sample_rate', 600))
     self.last_chk = iso8601_to_timestamp(self.conf.get(
         'service_start'))
     self.kinx_api_url = self.conf.get('kinx_api_url')
 def __init__(self, app, conf, *args, **kwargs):
     self.app = app
     self.conf = conf
     self.sample_account = '.transfer_record'
     self.aggregate_account = '.utilization'
     self.logger = get_logger(self.conf, log_route='utilization')
     self.container_ring = Ring('/etc/swift', ring_name='container')
     self.sample_rate = int(self.conf.get('sample_rate', 600))
Ejemplo n.º 14
0
 def create_account(act):
     ts = utils.normalize_timestamp(time())
     account_ring = Ring(_testdir, ring_name="account")
     partition, nodes = account_ring.get_nodes(act)
     for node in nodes:
         # Note: we are just using the http_connect method in the object
         # controller here to talk to the account server nodes.
         conn = swift.proxy.controllers.obj.http_connect(
             node["ip"],
             node["port"],
             node["device"],
             partition,
             "PUT",
             "/" + act,
             {"X-Timestamp": ts, "x-trans-id": act},
         )
         resp = conn.getresponse()
         assert resp.status == 201
def get_container_list(account):
    #Require a account eg. AUTH_ss
    #Return a list of containers within this account
    account_ring = Ring(swift_dir, ring_name="account")
    container_ring = Ring(swift_dir, ring_name="container")
    object_ring = Ring(swift_dir, ring_name="object")
    part, nodes = account_ring.get_nodes(account)

    URL="http://%s:%s/%s/%s/%s" % (nodes[0]['ip'], nodes[0]['port'], nodes[0]['device'],
                                   part, account)
    r = requests.get(URL)
    if r.status_code == 404:
        logger.warning("Account not existing yet")
    content = str(r.text)
    req = urllib2.Request(URL)
    container_list_hash = hashlib.md5(content).hexdigest()
    content = content.split("\n")
    content.remove('')
    return content, container_list_hash
Ejemplo n.º 16
0
 def get_data(self, **kwargs):
     query_id=str(uuid.uuid4())
     data_list=[]
     from keystoneclient.v2_0 import client
     from swift.common.ring import Ring
     keystone=client.Client(username=self.__username, password=self.__password, tenant_name=self.__project, auth_url=self.__auth_url)
     self.__ring = Ring(self.__ring_location)
     tenants = [tenant.id for tenant in keystone.tenants.list()]
     random.shuffle(tenants)
     data=init_message()
     data["swift"] = {}
     for tenant, stats in zip(tenants, ThreadPool().map(self.fetch, tenants)):
         if stats is not None:
             data["swift"][tenant] = stats
     return data
    def _delete_or_save_lifecycle(self, method, lifecycle=None):
        path = '/.s3_bucket_lifecycle/%s/%s' % (self.account, self.container)
        oring = Ring('/etc/swift', ring_name='object')
        cring = Ring('/etc/swift', ring_name='container')
        part, nodes = oring.get_nodes('.s3_bucket_lifecycle', self.account,
                                      self.container)
        cpart, cnodes = cring.get_nodes('.s3_bucket_lifecycle', self.account)
        now_ts = normalize_timestamp(time.time())

        i = 0
        for node in nodes:
            ip = node['ip']
            port = node['port']
            dev = node['device']
            headers = dict()
            headers['user-agent'] = 'lifecycle-uploader'
            headers['X-Timestamp'] = now_ts
            headers['referer'] = 'lifecycle-uploader'
            headers['X-Container-Partition'] = cpart
            headers['X-Container-Host'] = '%(ip)s:%(port)s' % cnodes[i]
            headers['X-Container-Device'] = cnodes[i]['device']

            if lifecycle:
                headers['content-length'] = len(lifecycle)
                headers['etags'] = self._compute_md5(lifecycle)
                headers['content-type'] = 'text/plain'

            conn = http_connect(ip, port, dev, part, method, path,
                                headers)

            if method == 'PUT':
                conn.send(lifecycle)

            response = conn.getresponse()
            i += 1
        return response
Ejemplo n.º 18
0
    def _get_db_info(self, account, container, number):
        server_type = 'container'
        obj_conf = self.configs['%s-server' % server_type]
        config_path = obj_conf[number]
        options = utils.readconf(config_path, 'app:container-server')
        root = options.get('devices')

        swift_dir = options.get('swift_dir', '/etc/swift')
        ring = Ring(swift_dir, ring_name=server_type)
        part, nodes = ring.get_nodes(account, container)
        for node in nodes:
            # assumes one to one mapping
            if node['port'] == int(options.get('bind_port')):
                device = node['device']
                break
        else:
            return None

        path_hash = utils.hash_path(account, container)
        _dir = utils.storage_directory('%ss' % server_type, part, path_hash)
        db_dir = os.path.join(root, device, _dir)
        db_file = os.path.join(db_dir, '%s.db' % path_hash)
        db = ContainerBroker(db_file)
        return db.get_info()
class ObjectEndpoint(object):

    def __init__(self, app, conf):
        self.app = app
        self.logger = get_logger(conf, log_route='object_endpoint')
        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.object_ring = Ring(swift_dir, ring_name='object')

    def __call__(self, env, start_response):
        request = Request(env)

        url_prefix = '/object_endpoint/'

        if request.path.startswith(url_prefix):

            if request.method != 'GET':
                raise HTTPMethodNotAllowed()

            aco = split_path(request.path[len(url_prefix) - 1:], 1, 3, True)
            account = aco[0]
            container = aco[1]
            obj = aco[2]
            if obj.endswith('/'):
                obj = obj[:-1]

            object_partition, objects = self.object_ring.get_nodes(
                account, container, obj)

            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}/{obj}'
            endpoints = []
            for element in objects:
                endpoint = endpoint_template.format(ip=element['ip'],
                                                    port=element['port'],
                                                    device=element['device'],
                                                    partition=object_partition,
                                                    account=account,
                                                    container=container,
                                                    obj=obj)
                endpoints.append(endpoint)

            start_response('200 OK', {})
            return json.dumps(endpoints)

        return self.app(env, start_response)
Ejemplo n.º 20
0
 def __init__(self, conf):
     """
     :param conf: configuration object obtained from ConfigParser
     :param logger: logging object
     """
     self.conf = conf
     self.logger = get_logger(conf, log_route="object-replicator")
     self.devices_dir = conf.get("devices", "/srv/node")
     self.mount_check = config_true_value(conf.get("mount_check", "true"))
     self.vm_test_mode = config_true_value(conf.get("vm_test_mode", "no"))
     self.swift_dir = conf.get("swift_dir", "/etc/swift")
     self.port = int(conf.get("bind_port", 6000))
     self.concurrency = int(conf.get("concurrency", 1))
     self.stats_interval = int(conf.get("stats_interval", "300"))
     self.object_ring = Ring(self.swift_dir, ring_name="object")
     self.ring_check_interval = int(conf.get("ring_check_interval", 15))
     self.next_check = time.time() + self.ring_check_interval
     self.reclaim_age = int(conf.get("reclaim_age", 86400 * 7))
     self.partition_times = []
     self.run_pause = int(conf.get("run_pause", 30))
     self.rsync_timeout = int(conf.get("rsync_timeout", 900))
     self.rsync_io_timeout = conf.get("rsync_io_timeout", "30")
     self.rsync_bwlimit = conf.get("rsync_bwlimit", "0")
     self.http_timeout = int(conf.get("http_timeout", 60))
     self.lockup_timeout = int(conf.get("lockup_timeout", 1800))
     self.recon_cache_path = conf.get("recon_cache_path", "/var/cache/swift")
     self.rcache = os.path.join(self.recon_cache_path, "object.recon")
     self.conn_timeout = float(conf.get("conn_timeout", 0.5))
     self.node_timeout = float(conf.get("node_timeout", 10))
     self.sync_method = getattr(self, conf.get("sync_method") or "rsync")
     self.network_chunk_size = int(conf.get("network_chunk_size", 65536))
     self.disk_chunk_size = int(conf.get("disk_chunk_size", 65536))
     self.headers = {"Content-Length": "0", "user-agent": "obj-replicator %s" % os.getpid()}
     self.rsync_error_log_line_length = int(conf.get("rsync_error_log_line_length", 0))
     self.handoffs_first = config_true_value(conf.get("handoffs_first", False))
     self.handoff_delete = config_auto_int_value(conf.get("handoff_delete", "auto"), 0)
     self._diskfile_mgr = DiskFileManager(conf, self.logger)
Ejemplo n.º 21
0
    def _test_ondisk_data_after_write_with_crypto(self, policy_name):
        policy = storage_policy.POLICIES.get_by_name(policy_name)
        self._create_container(self.proxy_app, policy_name=policy_name)
        self._put_object(self.crypto_app, self.plaintext)
        self._post_object(self.crypto_app)

        # Verify container listing etag is encrypted by direct GET to container
        # server. We can use any server for all nodes since they all share same
        # devices dir.
        cont_server = self._test_context['test_servers'][3]
        cont_ring = Ring(self._test_context['testdir'], ring_name='container')
        part, nodes = cont_ring.get_nodes('a', self.container_name)
        for node in nodes:
            req = Request.blank('/%s/%s/a/%s'
                                % (node['device'], part, self.container_name),
                                method='GET', query_string='format=json')
            resp = req.get_response(cont_server)
            listing = json.loads(resp.body)
            # sanity checks...
            self.assertEqual(1, len(listing))
            self.assertEqual('o', listing[0]['name'])
            self.assertEqual('application/test', listing[0]['content_type'])
            # verify encrypted etag value
            parts = listing[0]['hash'].rsplit(';', 1)
            crypto_meta_param = parts[1].strip()
            crypto_meta = crypto_meta_param[len('swift_meta='):]
            listing_etag_iv = load_crypto_meta(crypto_meta)['iv']
            exp_enc_listing_etag = base64.b64encode(
                encrypt(self.plaintext_etag,
                        self.km.create_key('/a/%s' % self.container_name),
                        listing_etag_iv))
            self.assertEqual(exp_enc_listing_etag, parts[0])

        # Verify diskfile data and metadata is encrypted
        ring_object = self.proxy_app.get_object_ring(int(policy))
        partition, nodes = ring_object.get_nodes('a', self.container_name, 'o')
        conf = {'devices': self._test_context["testdir"],
                'mount_check': 'false'}
        df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[policy]
        ondisk_data = []
        exp_enc_body = None
        for node_index, node in enumerate(nodes):
            df = df_mgr.get_diskfile(node['device'], partition,
                                     'a', self.container_name, 'o',
                                     policy=policy)
            with df.open():
                meta = df.get_metadata()
                contents = ''.join(df.reader())
                metadata = dict((k.lower(), v) for k, v in meta.items())
                # verify on disk data - body
                body_iv = load_crypto_meta(
                    metadata['x-object-sysmeta-crypto-body-meta'])['iv']
                body_key_meta = load_crypto_meta(
                    metadata['x-object-sysmeta-crypto-body-meta'])['body_key']
                obj_key = self.km.create_key('/a/%s/o' % self.container_name)
                body_key = Crypto().unwrap_key(obj_key, body_key_meta)
                exp_enc_body = encrypt(self.plaintext, body_key, body_iv)
                ondisk_data.append((node, contents))

                # verify on disk user metadata
                enc_val, meta = metadata[
                    'x-object-transient-sysmeta-crypto-meta-fruit'].split(';')
                meta = meta.strip()[len('swift_meta='):]
                metadata_iv = load_crypto_meta(meta)['iv']
                exp_enc_meta = base64.b64encode(encrypt('Kiwi', obj_key,
                                                        metadata_iv))
                self.assertEqual(exp_enc_meta, enc_val)
                self.assertNotIn('x-object-meta-fruit', metadata)

                self.assertIn(
                    'x-object-transient-sysmeta-crypto-meta', metadata)
                meta = load_crypto_meta(
                    metadata['x-object-transient-sysmeta-crypto-meta'])
                self.assertIn('key_id', meta)
                self.assertIn('path', meta['key_id'])
                self.assertEqual(
                    '/a/%s/%s' % (self.container_name, self.object_name),
                    meta['key_id']['path'])
                self.assertIn('v', meta['key_id'])
                self.assertEqual('1', meta['key_id']['v'])
                self.assertIn('cipher', meta)
                self.assertEqual(Crypto.cipher, meta['cipher'])

                # verify etag
                actual_enc_etag, _junk, actual_etag_meta = metadata[
                    'x-object-sysmeta-crypto-etag'].partition('; swift_meta=')
                etag_iv = load_crypto_meta(actual_etag_meta)['iv']
                exp_enc_etag = base64.b64encode(encrypt(self.plaintext_etag,
                                                        obj_key, etag_iv))
                self.assertEqual(exp_enc_etag, actual_enc_etag)

                # verify etag hmac
                exp_etag_mac = hmac.new(
                    obj_key, self.plaintext_etag, digestmod=hashlib.sha256)
                exp_etag_mac = base64.b64encode(exp_etag_mac.digest())
                self.assertEqual(exp_etag_mac,
                                 metadata['x-object-sysmeta-crypto-etag-mac'])

                # verify etag override for container updates
                override = 'x-object-sysmeta-container-update-override-etag'
                parts = metadata[override].rsplit(';', 1)
                crypto_meta_param = parts[1].strip()
                crypto_meta = crypto_meta_param[len('swift_meta='):]
                listing_etag_iv = load_crypto_meta(crypto_meta)['iv']
                cont_key = self.km.create_key('/a/%s' % self.container_name)
                exp_enc_listing_etag = base64.b64encode(
                    encrypt(self.plaintext_etag, cont_key,
                            listing_etag_iv))
                self.assertEqual(exp_enc_listing_etag, parts[0])

        self._check_GET_and_HEAD(self.crypto_app)
        return exp_enc_body, ondisk_data
Ejemplo n.º 22
0
class ObjectReplicator(Daemon):
    """
    Replicate objects.

    Encapsulates most logic and data needed by the object replication process.
    Each call to .replicate() performs one replication pass.  It's up to the
    caller to do this in a loop.
    """

    def __init__(self, conf):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """
        self.conf = conf
        self.logger = get_logger(conf, log_route='object-replicator')
        self.devices_dir = conf.get('devices', '/srv/node')
        self.mount_check = conf.get('mount_check', 'true').lower() in \
                              ('true', 't', '1', 'on', 'yes', 'y')
        self.vm_test_mode = conf.get(
                'vm_test_mode', 'no').lower() in ('yes', 'true', 'on', '1')
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.port = int(conf.get('bind_port', 6000))
        self.concurrency = int(conf.get('concurrency', 1))
        self.stats_interval = int(conf.get('stats_interval', '300'))
        self.object_ring = Ring(self.swift_dir, ring_name='object')
        self.ring_check_interval = int(conf.get('ring_check_interval', 15))
        self.next_check = time.time() + self.ring_check_interval
        self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))
        self.partition_times = []
        self.run_pause = int(conf.get('run_pause', 30))
        self.rsync_timeout = int(conf.get('rsync_timeout', 900))
        self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
        self.http_timeout = int(conf.get('http_timeout', 60))
        self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
        self.recon_enable = conf.get(
                'recon_enable', 'no').lower() in TRUE_VALUES
        self.recon_cache_path = conf.get(
                'recon_cache_path', '/var/cache/swift')
        self.recon_object = os.path.join(self.recon_cache_path, "object.recon")

    def _rsync(self, args):
        """
        Execute the rsync binary to replicate a partition.

        :returns: return code of rsync process. 0 is successful
        """
        start_time = time.time()
        ret_val = None
        try:
            with Timeout(self.rsync_timeout):
                proc = subprocess.Popen(args, stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT)
                results = proc.stdout.read()
                ret_val = proc.wait()
        except Timeout:
            self.logger.error(_("Killing long-running rsync: %s"), str(args))
            proc.kill()
            return 1  # failure response code
        total_time = time.time() - start_time
        for result in results.split('\n'):
            if result == '':
                continue
            if result.startswith('cd+'):
                continue
            if not ret_val:
                self.logger.info(result)
            else:
                self.logger.error(result)
        if ret_val:
            self.logger.error(_('Bad rsync return code: %(args)s -> %(ret)d'),
                    {'args': str(args), 'ret': ret_val})
        elif results:
            self.logger.info(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"),
                {'src': args[-2], 'dst': args[-1], 'time': total_time})
        else:
            self.logger.debug(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"),
                {'src': args[-2], 'dst': args[-1], 'time': total_time})
        return ret_val

    def rsync(self, node, job, suffixes):
        """
        Synchronize local suffix directories from a partition with a remote
        node.

        :param node: the "dev" entry for the remote node to sync with
        :param job: information about the partition being synced
        :param suffixes: a list of suffixes which need to be pushed

        :returns: boolean indicating success or failure
        """
        if not os.path.exists(job['path']):
            return False
        args = [
            'rsync',
            '--recursive',
            '--whole-file',
            '--human-readable',
            '--xattrs',
            '--itemize-changes',
            '--ignore-existing',
            '--timeout=%s' % self.rsync_io_timeout,
            '--contimeout=%s' % self.rsync_io_timeout,
        ]
        if self.vm_test_mode:
            rsync_module = '[%s]::object%s' % (node['ip'], node['port'])
        else:
            rsync_module = '[%s]::object' % node['ip']
        had_any = False
        for suffix in suffixes:
            spath = join(job['path'], suffix)
            if os.path.exists(spath):
                args.append(spath)
                had_any = True
        if not had_any:
            return False
        args.append(join(rsync_module, node['device'],
                    'objects', job['partition']))
        return self._rsync(args) == 0

    def check_ring(self):
        """
        Check to see if the ring has been updated

        :returns: boolean indicating whether or not the ring has changed
        """
        if time.time() > self.next_check:
            self.next_check = time.time() + self.ring_check_interval
            if self.object_ring.has_changed():
                return False
        return True

    def update_deleted(self, job):
        """
        High-level method that replicates a single partition that doesn't
        belong on this node.

        :param job: a dict containing info about the partition to be replicated
        """

        def tpool_get_suffixes(path):
            return [suff for suff in os.listdir(path)
                    if len(suff) == 3 and isdir(join(path, suff))]
        self.replication_count += 1
        self.logger.increment('partition.delete.count.%s' % (job['device'],))
        begin = time.time()
        try:
            responses = []
            suffixes = tpool.execute(tpool_get_suffixes, job['path'])
            if suffixes:
                for node in job['nodes']:
                    success = self.rsync(node, job, suffixes)
                    if success:
                        with Timeout(self.http_timeout):
                            http_connect(node['ip'], node['port'],
                                node['device'], job['partition'], 'REPLICATE',
                                '/' + '-'.join(suffixes),
                          headers={'Content-Length': '0'}).getresponse().read()
                    responses.append(success)
            if not suffixes or (len(responses) == \
                        len(job['nodes']) and all(responses)):
                self.logger.info(_("Removing partition: %s"), job['path'])
                tpool.execute(shutil.rmtree, job['path'], ignore_errors=True)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing handoff partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.delete.timing', begin)

    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment('partition.update.count.%s' % (job['device'],))
        begin = time.time()
        try:
            hashed, local_hash = tpool.execute(tpooled_get_hashes, job['path'],
                    do_listdir=(self.replication_count % 10) == 0,
                    reclaim_age=self.reclaim_age)
            # See tpooled_get_hashes "Hack".
            if isinstance(hashed, BaseException):
                raise hashed
            self.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)
            attempts_left = len(job['nodes'])
            nodes = itertools.chain(job['nodes'],
                        self.object_ring.get_more_nodes(int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIterator it will be caught way below
                node = next(nodes)
                attempts_left -= 1
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(node['ip'], node['port'],
                                node['device'], job['partition'], 'REPLICATE',
                            '', headers={'Content-Length': '0'}).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(_('%(ip)s/%(device)s responded'
                                    ' as unmounted'), node)
                            attempts_left += 1
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(_("Invalid response %(resp)s "
                                "from %(ip)s"),
                                {'resp': resp.status, 'ip': node['ip']})
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [suffix for suffix in local_hash if
                            local_hash[suffix] != remote_hash.get(suffix, -1)]
                    if not suffixes:
                        continue
                    hashed, recalc_hash = tpool.execute(tpooled_get_hashes,
                        job['path'], recalculate=suffixes,
                        reclaim_age=self.reclaim_age)
                    # See tpooled_get_hashes "Hack".
                    if isinstance(hashed, BaseException):
                        raise hashed
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [suffix for suffix in local_hash if
                            local_hash[suffix] != remote_hash.get(suffix, -1)]
                    self.rsync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(node['ip'], node['port'],
                            node['device'], job['partition'], 'REPLICATE',
                            '/' + '-'.join(suffixes),
                            headers={'Content-Length': '0'})
                        conn.getresponse().read()
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                except (Exception, Timeout):
                    self.logger.exception(_("Error syncing with node: %s") %
                                            node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)

    def stats_line(self):
        """
        Logs various stats for the currently running replication pass.
        """
        if self.replication_count:
            elapsed = (time.time() - self.start) or 0.000001
            rate = self.replication_count / elapsed
            self.logger.info(_("%(replicated)d/%(total)d (%(percentage).2f%%)"
                " partitions replicated in %(time).2fs (%(rate).2f/sec, "
                "%(remaining)s remaining)"),
                {'replicated': self.replication_count, 'total': self.job_count,
                 'percentage': self.replication_count * 100.0 / self.job_count,
                 'time': time.time() - self.start, 'rate': rate,
                 'remaining': '%d%s' % compute_eta(self.start,
                           self.replication_count, self.job_count)})
            if self.suffix_count:
                self.logger.info(_("%(checked)d suffixes checked - "
                    "%(hashed).2f%% hashed, %(synced).2f%% synced"),
                    {'checked': self.suffix_count,
                     'hashed': (self.suffix_hash * 100.0) / self.suffix_count,
                     'synced': (self.suffix_sync * 100.0) / self.suffix_count})
                self.partition_times.sort()
                self.logger.info(_("Partition times: max %(max).4fs, "
                    "min %(min).4fs, med %(med).4fs"),
                    {'max': self.partition_times[-1],
                     'min': self.partition_times[0],
                     'med': self.partition_times[
                                len(self.partition_times) // 2]})
        else:
            self.logger.info(_("Nothing replicated for %s seconds."),
                (time.time() - self.start))

    def kill_coros(self):
        """Utility function that kills all coroutines currently running."""
        for coro in list(self.run_pool.coroutines_running):
            try:
                coro.kill(GreenletExit)
            except GreenletExit:
                pass

    def heartbeat(self):
        """
        Loop that runs in the background during replication.  It periodically
        logs progress.
        """
        while True:
            eventlet.sleep(self.stats_interval)
            self.stats_line()

    def detect_lockups(self):
        """
        In testing, the pool.waitall() call very occasionally failed to return.
        This is an attempt to make sure the replicator finishes its replication
        pass in some eventuality.
        """
        while True:
            eventlet.sleep(self.lockup_timeout)
            if self.replication_count == self.last_replication_count:
                self.logger.error(_("Lockup detected.. killing live coros."))
                self.kill_coros()
            self.last_replication_count = self.replication_count

    def collect_jobs(self):
        """
        Returns a sorted list of jobs (dictionaries) that specify the
        partitions, nodes, etc to be rsynced.
        """
        jobs = []
        ips = whataremyips()
        for local_dev in [dev for dev in self.object_ring.devs
                if dev and dev['ip'] in ips and dev['port'] == self.port]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, 'objects')
            tmp_path = join(dev_path, 'tmp')
            if self.mount_check and not os.path.ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                continue
            for partition in os.listdir(obj_path):
                try:
                    part_nodes = \
                        self.object_ring.get_part_nodes(int(partition))
                    nodes = [node for node in part_nodes
                             if node['id'] != local_dev['id']]
                    jobs.append(dict(path=join(obj_path, partition),
                        device=local_dev['device'],
                        nodes=nodes,
                        delete=len(nodes) > len(part_nodes) - 1,
                        partition=partition))
                except ValueError:
                    continue
        random.shuffle(jobs)
        # Partititons that need to be deleted take priority
        jobs.sort(key=lambda job: not job['delete'])
        self.job_count = len(jobs)
        return jobs

    def replicate(self):
        """Run a replication pass"""
        self.start = time.time()
        self.suffix_count = 0
        self.suffix_sync = 0
        self.suffix_hash = 0
        self.replication_count = 0
        self.last_replication_count = -1
        self.partition_times = []
        stats = eventlet.spawn(self.heartbeat)
        lockup_detector = eventlet.spawn(self.detect_lockups)
        eventlet.sleep()  # Give spawns a cycle
        try:
            self.run_pool = GreenPool(size=self.concurrency)
            jobs = self.collect_jobs()
            for job in jobs:
                if not self.check_ring():
                    self.logger.info(_("Ring change detected. Aborting "
                            "current replication pass."))
                    return
                if job['delete']:
                    self.run_pool.spawn(self.update_deleted, job)
                else:
                    self.run_pool.spawn(self.update, job)
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
        except (Exception, Timeout):
            self.logger.exception(_("Exception in top-level replication loop"))
            self.kill_coros()
        finally:
            stats.kill()
            lockup_detector.kill()
            self.stats_line()

    def run_once(self, *args, **kwargs):
        start = time.time()
        self.logger.info(_("Running object replicator in script mode."))
        self.replicate()
        total = (time.time() - start) / 60
        self.logger.info(
            _("Object replication complete. (%.02f minutes)"), total)
        if self.recon_enable:
            try:
                dump_recon_cache('object_replication_time', total, \
                    self.recon_object)
            except (Exception, Timeout):
                self.logger.exception(_('Exception dumping recon cache'))

    def run_forever(self, *args, **kwargs):
        self.logger.info(_("Starting object replicator in daemon mode."))
        # Run the replicator continually
        while True:
            start = time.time()
            self.logger.info(_("Starting object replication pass."))
            # Run the replicator
            self.replicate()
            total = (time.time() - start) / 60
            self.logger.info(
                _("Object replication complete. (%.02f minutes)"), total)
            if self.recon_enable:
                try:
                    dump_recon_cache('object_replication_time', total, \
                        self.recon_object)
                except (Exception, Timeout):
                    self.logger.exception(_('Exception dumping recon cache'))
            self.logger.debug(_('Replication sleeping for %s seconds.'),
                self.run_pause)
            sleep(self.run_pause)
Ejemplo n.º 23
0
    def __init__(self,
                 conf,
                 memcache=None,
                 logger=None,
                 account_ring=None,
                 container_ring=None,
                 object_ring=None):
        if conf is None:
            conf = {}
        if logger is None:
            self.logger = get_logger(conf, log_route='proxy-server')
        else:
            self.logger = logger

        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.node_timeout = int(conf.get('node_timeout', 10))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.client_timeout = int(conf.get('client_timeout', 60))
        self.put_queue_depth = int(conf.get('put_queue_depth', 10))
        self.object_chunk_size = int(conf.get('object_chunk_size', 65536))
        self.client_chunk_size = int(conf.get('client_chunk_size', 65536))
        self.trans_id_suffix = conf.get('trans_id_suffix', '')
        self.error_suppression_interval = \
            int(conf.get('error_suppression_interval', 60))
        self.error_suppression_limit = \
            int(conf.get('error_suppression_limit', 10))
        self.recheck_container_existence = \
            int(conf.get('recheck_container_existence', 60))
        self.recheck_account_existence = \
            int(conf.get('recheck_account_existence', 60))
        self.allow_account_management = \
            config_true_value(conf.get('allow_account_management', 'no'))
        self.object_post_as_copy = \
            config_true_value(conf.get('object_post_as_copy', 'true'))
        self.object_ring = object_ring or Ring(swift_dir, ring_name='object')
        self.container_ring = container_ring or Ring(swift_dir,
                                                     ring_name='container')
        self.account_ring = account_ring or Ring(swift_dir,
                                                 ring_name='account')
        self.memcache = memcache
        mimetypes.init(mimetypes.knownfiles +
                       [os.path.join(swift_dir, 'mime.types')])
        self.account_autocreate = \
            config_true_value(conf.get('account_autocreate', 'no'))
        self.expiring_objects_account = \
            (conf.get('auto_create_account_prefix') or '.') + \
            'expiring_objects'
        self.expiring_objects_container_divisor = \
            int(conf.get('expiring_objects_container_divisor') or 86400)
        self.max_containers_per_account = \
            int(conf.get('max_containers_per_account') or 0)
        self.max_containers_whitelist = [
            a.strip()
            for a in conf.get('max_containers_whitelist', '').split(',')
            if a.strip()
        ]
        self.deny_host_headers = [
            host.strip()
            for host in conf.get('deny_host_headers', '').split(',')
            if host.strip()
        ]
        self.rate_limit_after_segment = \
            int(conf.get('rate_limit_after_segment', 10))
        self.rate_limit_segments_per_sec = \
            int(conf.get('rate_limit_segments_per_sec', 1))
        self.log_handoffs = config_true_value(conf.get('log_handoffs', 'true'))
        self.cors_allow_origin = [
            a.strip() for a in conf.get('cors_allow_origin', '').split(',')
            if a.strip()
        ]
        self.node_timings = {}
        self.timing_expiry = int(conf.get('timing_expiry', 300))
        self.sorting_method = conf.get('sorting_method', 'shuffle').lower()
        self.allow_static_large_object = config_true_value(
            conf.get('allow_static_large_object', 'true'))
        self.max_large_object_get_time = float(
            conf.get('max_large_object_get_time', '86400'))
        value = conf.get('request_node_count', '2 * replicas').lower().split()
        if len(value) == 1:
            value = int(value[0])
            self.request_node_count = lambda r: value
        elif len(value) == 3 and value[1] == '*' and value[2] == 'replicas':
            value = int(value[0])
            self.request_node_count = lambda r: value * r.replica_count
        else:
            raise ValueError('Invalid request_node_count value: %r' %
                             ''.join(value))
        try:
            read_affinity = conf.get('read_affinity', '')
            self.read_affinity_sort_key = affinity_key_function(read_affinity)
        except ValueError as err:
            # make the message a little more useful
            raise ValueError("Invalid read_affinity value: %r (%s)" %
                             (read_affinity, err.message))
        try:
            write_affinity = conf.get('write_affinity', '')
            self.write_affinity_is_local_fn \
                = affinity_locality_predicate(write_affinity)
        except ValueError as err:
            # make the message a little more useful
            raise ValueError("Invalid write_affinity value: %r (%s)" %
                             (write_affinity, err.message))
        value = conf.get('write_affinity_node_count',
                         '2 * replicas').lower().split()
        if len(value) == 1:
            value = int(value[0])
            self.write_affinity_node_count = lambda r: value
        elif len(value) == 3 and value[1] == '*' and value[2] == 'replicas':
            value = int(value[0])
            self.write_affinity_node_count = lambda r: value * r.replica_count
        else:
            raise ValueError('Invalid write_affinity_node_count value: %r' %
                             ''.join(value))
        swift_owner_headers = conf.get(
            'swift_owner_headers', 'x-container-read, x-container-write, '
            'x-container-sync-key, x-container-sync-to, '
            'x-account-meta-temp-url-key, x-account-meta-temp-url-key-2')
        self.swift_owner_headers = [
            name.strip() for name in swift_owner_headers.split(',')
            if name.strip()
        ]
Ejemplo n.º 24
0
 def __init__(self, options, *_args, **_kwargs):
     self.ring = Ring(options.ring)
     self.path = options.path
     self.options = options
Ejemplo n.º 25
0
class ObjectReplicator(Daemon):
    """
    Replicate objects.

    Encapsulates most logic and data needed by the object replication process.
    Each call to .replicate() performs one replication pass.  It's up to the
    caller to do this in a loop.
    """

    def __init__(self, conf):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """
        self.conf = conf
        self.logger = get_logger(conf, log_route='object-replicator')
        self.devices_dir = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.vm_test_mode = config_true_value(conf.get('vm_test_mode', 'no'))
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.port = int(conf.get('bind_port', 6000))
        self.concurrency = int(conf.get('concurrency', 1))
        self.stats_interval = int(conf.get('stats_interval', '300'))
        self.object_ring = Ring(self.swift_dir, ring_name='object')
        self.ring_check_interval = int(conf.get('ring_check_interval', 15))
        self.next_check = time.time() + self.ring_check_interval
        self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))
        self.partition_times = []
        self.run_pause = int(conf.get('run_pause', 30))
        self.rsync_timeout = int(conf.get('rsync_timeout', 900))
        self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
        self.rsync_bwlimit = conf.get('rsync_bwlimit', '0')
        self.http_timeout = int(conf.get('http_timeout', 60))
        self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/swift')
        self.rcache = os.path.join(self.recon_cache_path, "object.recon")
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.sync_method = getattr(self, conf.get('sync_method') or 'rsync')
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536))
        self.headers = {
            'Content-Length': '0',
            'user-agent': 'obj-replicator %s' % os.getpid()}
        self.rsync_error_log_line_length = \
            int(conf.get('rsync_error_log_line_length', 0))
        self.handoffs_first = config_true_value(conf.get('handoffs_first',
                                                         False))
        self.handoff_delete = config_auto_int_value(
            conf.get('handoff_delete', 'auto'), 0)
        self._diskfile_mgr = DiskFileManager(conf, self.logger)

    def sync(self, node, job, suffixes):  # Just exists for doc anchor point
        """
        Synchronize local suffix directories from a partition with a remote
        node.

        :param node: the "dev" entry for the remote node to sync with
        :param job: information about the partition being synced
        :param suffixes: a list of suffixes which need to be pushed

        :returns: boolean indicating success or failure
        """
        return self.sync_method(node, job, suffixes)

    def _rsync(self, args):
        """
        Execute the rsync binary to replicate a partition.

        :returns: return code of rsync process. 0 is successful
        """
        start_time = time.time()
        ret_val = None
        try:
            with Timeout(self.rsync_timeout):
                proc = subprocess.Popen(args,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.STDOUT)
                results = proc.stdout.read()
                ret_val = proc.wait()
        except Timeout:
            self.logger.error(_("Killing long-running rsync: %s"), str(args))
            proc.kill()
            return 1  # failure response code
        total_time = time.time() - start_time
        for result in results.split('\n'):
            if result == '':
                continue
            if result.startswith('cd+'):
                continue
            if not ret_val:
                self.logger.info(result)
            else:
                self.logger.error(result)
        if ret_val:
            error_line = _('Bad rsync return code: %(ret)d <- %(args)s') % \
                {'args': str(args), 'ret': ret_val}
            if self.rsync_error_log_line_length:
                error_line = error_line[:self.rsync_error_log_line_length]
            self.logger.error(error_line)
        elif results:
            self.logger.info(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"),
                {'src': args[-2], 'dst': args[-1], 'time': total_time})
        else:
            self.logger.debug(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"),
                {'src': args[-2], 'dst': args[-1], 'time': total_time})
        return ret_val

    def rsync(self, node, job, suffixes):
        """
        Uses rsync to implement the sync method. This was the first
        sync method in Swift.
        """
        if not os.path.exists(job['path']):
            return False
        args = [
            'rsync',
            '--recursive',
            '--whole-file',
            '--human-readable',
            '--xattrs',
            '--itemize-changes',
            '--ignore-existing',
            '--timeout=%s' % self.rsync_io_timeout,
            '--contimeout=%s' % self.rsync_io_timeout,
            '--bwlimit=%s' % self.rsync_bwlimit,
        ]
        node_ip = rsync_ip(node['replication_ip'])
        if self.vm_test_mode:
            rsync_module = '%s::object%s' % (node_ip, node['replication_port'])
        else:
            rsync_module = '%s::object' % node_ip
        had_any = False
        for suffix in suffixes:
            spath = join(job['path'], suffix)
            if os.path.exists(spath):
                args.append(spath)
                had_any = True
        if not had_any:
            return False
        args.append(join(rsync_module, node['device'],
                    'objects', job['partition']))
        return self._rsync(args) == 0

    def ssync(self, node, job, suffixes):
        return ssync_sender.Sender(self, node, job, suffixes)()

    def check_ring(self):
        """
        Check to see if the ring has been updated

        :returns: boolean indicating whether or not the ring has changed
        """
        if time.time() > self.next_check:
            self.next_check = time.time() + self.ring_check_interval
            if self.object_ring.has_changed():
                return False
        return True

    def update_deleted(self, job):
        """
        High-level method that replicates a single partition that doesn't
        belong on this node.

        :param job: a dict containing info about the partition to be replicated
        """

        def tpool_get_suffixes(path):
            return [suff for suff in os.listdir(path)
                    if len(suff) == 3 and isdir(join(path, suff))]
        self.replication_count += 1
        self.logger.increment('partition.delete.count.%s' % (job['device'],))
        begin = time.time()
        try:
            responses = []
            suffixes = tpool.execute(tpool_get_suffixes, job['path'])
            if suffixes:
                for node in job['nodes']:
                    success = self.sync(node, job, suffixes)
                    if success:
                        with Timeout(self.http_timeout):
                            conn = http_connect(
                                node['replication_ip'],
                                node['replication_port'],
                                node['device'], job['partition'], 'REPLICATE',
                                '/' + '-'.join(suffixes), headers=self.headers)
                            conn.getresponse().read()
                    responses.append(success)
            if self.handoff_delete:
                # delete handoff if we have had handoff_delete successes
                delete_handoff = len([resp for resp in responses if resp]) >= \
                    self.handoff_delete
            else:
                # delete handoff if all syncs were successful
                delete_handoff = len(responses) == len(job['nodes']) and \
                    all(responses)
            if not suffixes or delete_handoff:
                self.logger.info(_("Removing partition: %s"), job['path'])
                tpool.execute(shutil.rmtree, job['path'], ignore_errors=True)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing handoff partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.delete.timing', begin)

    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment('partition.update.count.%s' % (job['device'],))
        begin = time.time()
        try:
        #MODIFIED LightSync
            local_hash = None
            part_hash_local = tpool_reraise(get_part_hash, job['path'])  
            #Partition has been modified
            if part_hash_local is None:
                hashed, local_hash = tpool_reraise(
                    get_hashes, job['path'],
                    do_listdir=(self.replication_count % 10) == 0,
                    reclaim_age=self.reclaim_age)
                self.suffix_hash += hashed
                self.logger.update_stats('suffix.hashes', hashed)  

                part_hash_local = tpool_reraise(get_part_hash, job['path'])               
            """hashed, local_hash = tpool_reraise(
                get_hashes, job['path'],
                do_listdir=(self.replication_count % 10) == 0,
                reclaim_age=self.reclaim_age)
            self.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)"""
            attempts_left = True
            nodes = itertools.chain(job['nodes'])
            while (True):
        ##
                # If this throws StopIterator it will be caught way below
                node = next(nodes)
                try:
                #MODIFIED LightSync
                    req_suff = '' if part_hash_local is None else '/_SHORTREP_-'\
                               +part_hash_local
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            req_suff, headers=self.headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(_('%(ip)s/%(device)s responded'
                                                ' as unmounted'), node)
                            if(attempts_left):
                                attempts_left = False
                                ########To modify to start from current node's hand-off: Hash node info to get hand-off position
                                nodes = itertools.chain(
                                    self.object_ring.get_more_nodes(int(job['partition'])),
                                    nodes)
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(_("Invalid response %(resp)s "
                                                "from %(ip)s"),
                                                {'resp': resp.status,
                                                'ip': node['replication_ip']})
                            continue
                        part_hash_remote = pickle.loads(resp.read())
                        del resp
                    if part_hash_remote == "OK":
                        break
                    remote_hash = part_hash_remote
                    if local_hash is None:
                        hashed, local_hash = tpool_reraise(
                            get_hashes, job['path'],
                            do_listdir=(self.replication_count % 10) == 0,
                            reclaim_age=self.reclaim_age)
                        self.suffix_hash += hashed
                        self.logger.update_stats('suffix.hashes', hashed)
                    '''
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            '', headers=self.headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(_('%(ip)s/%(device)s responded'
                                                ' as unmounted'), node)
                            attempts_left += 1
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(_("Invalid response %(resp)s "
                                                "from %(ip)s"),
                                              {'resp': resp.status,
                                               'ip': node['replication_ip']})
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    '''
                ##
                    suffixes = [suffix for suffix in local_hash if
                                local_hash[suffix] !=
                                remote_hash.get(suffix, -1)]
                    if not suffixes:
                #MODIFIED LightSync
                        break
                ##
                    hashed, recalc_hash = tpool_reraise(
                        get_hashes,
                        job['path'], recalculate=suffixes,
                        reclaim_age=self.reclaim_age)
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [suffix for suffix in local_hash if
                                local_hash[suffix] !=
                                remote_hash.get(suffix, -1)]
                    self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            '/' + '-'.join(suffixes),
                            headers=self.headers)
                        conn.getresponse().read()
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                #MODIFIED LightSync
                    break
                ##
                except (Exception, Timeout):
                    self.logger.exception(_("Error syncing with node: %s") %
                                          node)
#MODIFIED LightSync  (after if)
            self.suffix_count += len(local_hash) if local_hash is not None else 0
##
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)

    def stats_line(self):
        """
        Logs various stats for the currently running replication pass.
        """
        if self.replication_count:
            elapsed = (time.time() - self.start) or 0.000001
            rate = self.replication_count / elapsed
            self.logger.info(
                _("%(replicated)d/%(total)d (%(percentage).2f%%)"
                  " partitions replicated in %(time).2fs (%(rate).2f/sec, "
                  "%(remaining)s remaining)"),
                {'replicated': self.replication_count, 'total': self.job_count,
                 'percentage': self.replication_count * 100.0 / self.job_count,
                 'time': time.time() - self.start, 'rate': rate,
                 'remaining': '%d%s' % compute_eta(self.start,
                                                   self.replication_count,
                                                   self.job_count)})
            if self.suffix_count:
                self.logger.info(
                    _("%(checked)d suffixes checked - "
                      "%(hashed).2f%% hashed, %(synced).2f%% synced"),
                    {'checked': self.suffix_count,
                     'hashed': (self.suffix_hash * 100.0) / self.suffix_count,
                     'synced': (self.suffix_sync * 100.0) / self.suffix_count})
                self.partition_times.sort()
                self.logger.info(
                    _("Partition times: max %(max).4fs, "
                      "min %(min).4fs, med %(med).4fs"),
                    {'max': self.partition_times[-1],
                     'min': self.partition_times[0],
                     'med': self.partition_times[
                         len(self.partition_times) // 2]})
        else:
            self.logger.info(
                _("Nothing replicated for %s seconds."),
                (time.time() - self.start))

    def kill_coros(self):
        """Utility function that kills all coroutines currently running."""
        for coro in list(self.run_pool.coroutines_running):
            try:
                coro.kill(GreenletExit)
            except GreenletExit:
                pass

    def heartbeat(self):
        """
        Loop that runs in the background during replication.  It periodically
        logs progress.
        """
        while True:
            eventlet.sleep(self.stats_interval)
            self.stats_line()

    def detect_lockups(self):
        """
        In testing, the pool.waitall() call very occasionally failed to return.
        This is an attempt to make sure the replicator finishes its replication
        pass in some eventuality.
        """
        while True:
            eventlet.sleep(self.lockup_timeout)
            if self.replication_count == self.last_replication_count:
                self.logger.error(_("Lockup detected.. killing live coros."))
                self.kill_coros()
            self.last_replication_count = self.replication_count

    def collect_jobs(self):
        """
        Returns a sorted list of jobs (dictionaries) that specify the
        partitions, nodes, etc to be synced.
        """
        jobs = []
        ips = whataremyips()
        for local_dev in [dev for dev in self.object_ring.devs
                          if dev and dev['replication_ip'] in ips and
                          dev['replication_port'] == self.port]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, 'objects')
            tmp_path = join(dev_path, 'tmp')
            if self.mount_check and not ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                try:
                    job_path = join(obj_path, partition)
                    if isfile(job_path):
                        # Clean up any (probably zero-byte) files where a
                        # partition should be.
                        self.logger.warning('Removing partition directory '
                                            'which was a file: %s', job_path)
                        os.remove(job_path)
                        continue
                    part_nodes = \
                        self.object_ring.get_part_nodes(int(partition))
		#MODIFIED LightSync
                    for mypos in range(len(part_nodes)):
                        if part_nodes[mypos]['id'] == local_dev['id']:
                            break
                    nodes = part_nodes[mypos+1:]+part_nodes[:mypos]
		##
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             partition=partition))
                except (ValueError, OSError):
                    continue
        random.shuffle(jobs)
        if self.handoffs_first:
            # Move the handoff parts to the front of the list
            jobs.sort(key=lambda job: not job['delete'])
        self.job_count = len(jobs)
        return jobs

    def replicate(self, override_devices=None, override_partitions=None):
        """Run a replication pass"""
        self.start = time.time()
        self.suffix_count = 0
        self.suffix_sync = 0
        self.suffix_hash = 0
        self.replication_count = 0
        self.last_replication_count = -1
        self.partition_times = []

        if override_devices is None:
            override_devices = []
        if override_partitions is None:
            override_partitions = []

        stats = eventlet.spawn(self.heartbeat)
        lockup_detector = eventlet.spawn(self.detect_lockups)
        eventlet.sleep()  # Give spawns a cycle

        try:
            self.run_pool = GreenPool(size=self.concurrency)
            jobs = self.collect_jobs()
            for job in jobs:
                if override_devices and job['device'] not in override_devices:
                    continue
                if override_partitions and \
                        job['partition'] not in override_partitions:
                    continue
                dev_path = join(self.devices_dir, job['device'])
                if self.mount_check and not ismount(dev_path):
                    self.logger.warn(_('%s is not mounted'), job['device'])
                    continue
                if not self.check_ring():
                    self.logger.info(_("Ring change detected. Aborting "
                                       "current replication pass."))
                    return
                if job['delete']:
                    self.run_pool.spawn(self.update_deleted, job)
                else:
                    self.run_pool.spawn(self.update, job)
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
        except (Exception, Timeout):
            self.logger.exception(_("Exception in top-level replication loop"))
            self.kill_coros()
        finally:
            stats.kill()
            lockup_detector.kill()
            self.stats_line()

    def run_once(self, *args, **kwargs):
        start = time.time()
        self.logger.info(_("Running object replicator in script mode."))
        override_devices = list_from_csv(kwargs.get('devices'))
        override_partitions = list_from_csv(kwargs.get('partitions'))
        self.replicate(
            override_devices=override_devices,
            override_partitions=override_partitions)
        total = (time.time() - start) / 60
        self.logger.info(
            _("Object replication complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            dump_recon_cache({'object_replication_time': total,
                              'object_replication_last': time.time()},
                             self.rcache, self.logger)

    def run_forever(self, *args, **kwargs):
        self.logger.info(_("Starting object replicator in daemon mode."))
        # Run the replicator continually
        while True:
            start = time.time()
            self.logger.info(_("Starting object replication pass."))
            # Run the replicator
            self.replicate()
            total = (time.time() - start) / 60
            self.logger.info(
                _("Object replication complete. (%.02f minutes)"), total)
            dump_recon_cache({'object_replication_time': total,
                              'object_replication_last': time.time()},
                             self.rcache, self.logger)
            self.logger.debug('Replication sleeping for %s seconds.',
                              self.run_pause)
            sleep(self.run_pause)
Ejemplo n.º 26
0
import botocore.exceptions
import hashlib
import json
import os
from s3_sync.utils import ACCOUNT_ACL_KEY
import subprocess
from swift.common.middleware.acl import format_acl
from swift.common.ring import Ring
import swiftclient
import time
import unittest
import urllib

import utils

CONTAINER_RING = Ring('/etc/swift', ring_name='container')


class WaitTimedOut(RuntimeError):
    pass


def wait_for_condition(timeout, checker):
    start = time.time()
    while time.time() < start + timeout:
        ret = checker()
        if ret:
            return ret
        time.sleep(0.1)
    raise WaitTimedOut('Timeout (%s) expired' % timeout)
Ejemplo n.º 27
0
#!/usr/bin/python

from sys import exit
from smtplib import SMTP
from socket import gethostname
from swift.common.constraints import check_mount
from swift.common.utils import whataremyips
from swift.common.ring import Ring

try:
    ring = Ring('/etc/swift/object.ring.gz')
except IOError:
    exit()

my_ips = whataremyips()
mounted = 0
drivecount = 0
drivelabels = []

for dev in ring.devs:
    try:
        if dev['ip'] in my_ips and float(dev['weight']) > 0:
            drivecount += 1
            if check_mount('/srv/node', dev['device']):
                mounted += 1
            else:
                drivelabels.append(dev['device'])
    except TypeError:
        pass

unmounted = drivecount - mounted
Ejemplo n.º 28
0
 def test_account_container_reload(self):
     for server in ("account", "container"):
         ring = Ring("/etc/swift", ring_name=server)
         node = random.choice(ring.get_part_nodes(1))
         self._check_reload(server, node["ip"], node["port"])
Ejemplo n.º 29
0
class ContainerCrawler(object):
    def __init__(self, conf, handler_class, logger=None):
        self.logger = logger
        self.conf = conf
        self.root = conf['devices']
        self.bulk = conf.get('bulk_process', False)
        self.interval = 10
        self.swift_dir = '/etc/swift'
        self.container_ring = Ring(self.swift_dir, ring_name='container')

        self.status_dir = conf['status_dir']
        self.myips = whataremyips('0.0.0.0')
        self.items_chunk = conf['items_chunk']
        self.poll_interval = conf.get('poll_interval', 5)
        self.handler_class = handler_class

        if not self.bulk:
            self._init_workers(conf)

        self.log('debug', 'Created the Container Crawler instance')

    def _init_workers(self, conf):
        self.workers = conf.get('workers', 10)
        self.pool = eventlet.GreenPool(self.workers)
        self.work_queue = eventlet.queue.Queue(self.workers * 2)

        # max_size=None means a Queue is infinite
        self.error_queue = eventlet.queue.Queue(maxsize=None)
        self.stats_queue = eventlet.queue.Queue(maxsize=None)
        for _ in range(0, self.workers):
            self.pool.spawn_n(self._worker)

    def _worker(self):
        while 1:
            work = self.work_queue.get()
            if not work:
                self.work_queue.task_done()
                break
            row, handler = work
            try:
                handler.handle(row)
            except Exception as e:
                self.error_queue.put((row, e))
            self.work_queue.task_done()

    def _stop(self):
        for _ in range(0, self.workers):
            self.work_queue.put(None)
        self.pool.waitall()

    def _check_errors(self):
        if self.error_queue.empty():
            return

        while not self.error_queue.empty():
            row, error = self.error_queue.get()
            self.log('error',
                     'Failed to handle row %s: %r' % (row['ROWID'], error))
        raise RuntimeError('Failed to process rows')

    def log(self, level, message):
        if not self.logger:
            return
        getattr(self.logger, level)(message)

    def get_broker(self, account, container, part, node):
        db_hash = hash_path(account, container)
        db_dir = storage_directory(DATADIR, part, db_hash)
        db_path = os.path.join(self.root, node['device'], db_dir,
                               db_hash + '.db')
        return ContainerBroker(db_path, account=account, container=container)

    def submit_items(self, handler, rows):
        if self.bulk:
            handler.handle(rows)
            return

        for row in rows:
            self.work_queue.put((row, handler))
        self.work_queue.join()
        self._check_errors()

    def process_items(self, handler, rows, nodes_count, node_id):
        owned_rows = filter(lambda row: row['ROWID'] % nodes_count == node_id,
                            rows)
        self.submit_items(handler, owned_rows)

        verified_rows = filter(
            lambda row: row['ROWID'] % nodes_count != node_id, rows)
        self.submit_items(handler, verified_rows)

    def handle_container(self, settings):
        part, container_nodes = self.container_ring.get_nodes(
            settings['account'], settings['container'])
        nodes_count = len(container_nodes)
        handler = self.handler_class(self.status_dir, settings)

        for index, node in enumerate(container_nodes):
            if not is_local_device(self.myips, None, node['ip'], node['port']):
                continue
            broker = self.get_broker(settings['account'],
                                     settings['container'], part, node)
            broker_info = broker.get_info()
            last_row = handler.get_last_row(broker_info['id'])
            if not last_row:
                last_row = 0
            try:
                items = broker.get_items_since(last_row, self.items_chunk)
            except DatabaseConnectionError:
                continue
            if items:
                self.process_items(handler, items, nodes_count, index)
                handler.save_last_row(items[-1]['ROWID'], broker_info['id'])
            return

    def run_always(self):
        # Since we don't support reloading, the daemon should quit if there are
        # no containers configured
        if 'containers' not in self.conf or not self.conf['containers']:
            return
        self.log('debug', 'Entering the poll loop')
        while True:
            start = time.time()
            self.run_once()
            elapsed = time.time() - start
            if elapsed < self.poll_interval:
                time.sleep(self.poll_interval - elapsed)

    def run_once(self):
        for container_settings in self.conf['containers']:
            try:
                self.handle_container(container_settings)
            except Exception as e:
                account = container_settings.get('account', 'N/A')
                container = container_settings.get('container', 'N/A')
                self.log(
                    'error', "Failed to process %s/%s with %s: %s" %
                    (account, container, self.handler_class, repr(e)))
                self.log('error', traceback.format_exc(e))
Ejemplo n.º 30
0
 def get_container_ring(self):
     """Get the container ring.  Load it, if it hasn't been yet."""
     if not self.container_ring:
         self.container_ring = Ring(self.swift_dir, ring_name='container')
     return self.container_ring
Ejemplo n.º 31
0
def print_obj(datafile, check_etag=True, swift_dir='/etc/swift',
              policy_name='', drop_prefixes=False):
    """
    Display information about an object read from the datafile.
    Optionally verify the datafile content matches the ETag metadata.

    :param datafile: path on disk to object file
    :param check_etag: boolean, will read datafile content and verify
                       computed checksum matches value stored in
                       metadata.
    :param swift_dir: the path on disk to rings
    :param policy_name: optionally the name to use when finding the ring
    :param drop_prefixes: if True, strip "X-Object-Meta-", "X-Object-Sysmeta-",
                          and "X-Object-Transient-Sysmeta-" when displaying
                          User Metadata, System Metadata, and Transient
                          System Metadata entries
    """
    if not os.path.exists(datafile):
        print("Data file doesn't exist")
        raise InfoSystemExit()
    if not datafile.startswith(('/', './')):
        datafile = './' + datafile

    policy_index = None
    ring = None
    datadir = DATADIR_BASE

    # try to extract policy index from datafile disk path
    fullpath = os.path.abspath(datafile)
    policy_index = int(extract_policy(fullpath) or POLICIES.legacy)

    try:
        if policy_index:
            datadir += '-' + str(policy_index)
            ring = Ring(swift_dir, ring_name='object-' + str(policy_index))
        elif policy_index == 0:
            ring = Ring(swift_dir, ring_name='object')
    except IOError:
        # no such ring
        pass

    if policy_name:
        policy = POLICIES.get_by_name(policy_name)
        if policy:
            policy_index_for_name = policy.idx
            if (policy_index is not None and
               policy_index_for_name is not None and
               policy_index != policy_index_for_name):
                print('Warning: Ring does not match policy!')
                print('Double check your policy name!')
            if not ring and policy_index_for_name:
                ring = POLICIES.get_object_ring(policy_index_for_name,
                                                swift_dir)
                datadir = get_data_dir(policy_index_for_name)

    with open(datafile, 'rb') as fp:
        try:
            metadata = read_metadata(fp)
        except EOFError:
            print("Invalid metadata")
            raise InfoSystemExit()

        etag = metadata.pop('ETag', '')
        length = metadata.pop('Content-Length', '')
        path = metadata.get('name', '')
        print_obj_metadata(metadata, drop_prefixes)

        # Optional integrity check; it's useful, but slow.
        file_len = None
        if check_etag:
            h = md5()
            file_len = 0
            while True:
                data = fp.read(64 * 1024)
                if not data:
                    break
                h.update(data)
                file_len += len(data)
            h = h.hexdigest()
            if etag:
                if h == etag:
                    print('ETag: %s (valid)' % etag)
                else:
                    print("ETag: %s doesn't match file hash of %s!" %
                          (etag, h))
            else:
                print('ETag: Not found in metadata')
        else:
            print('ETag: %s (not checked)' % etag)
            file_len = os.fstat(fp.fileno()).st_size

        if length:
            if file_len == int(length):
                print('Content-Length: %s (valid)' % length)
            else:
                print("Content-Length: %s doesn't match file length of %s"
                      % (length, file_len))
        else:
            print('Content-Length: Not found in metadata')

        account, container, obj = path.split('/', 3)[1:]
        if ring:
            print_ring_locations(ring, datadir, account, container, obj,
                                 policy_index=policy_index)
Ejemplo n.º 32
0
 def setUp(self):
     resetswift()
     self.ip_ports = [
         (dev['ip'], dev['port'])
         for dev in Ring('/etc/swift', ring_name='account').devs if dev
     ]
Ejemplo n.º 33
0
 def test_account_container_reload(self):
     for server in ('account', 'container'):
         ring = Ring('/etc/swift', ring_name=server)
         node = random.choice(ring.get_part_nodes(1))
         self._check_reload(server, node['ip'], node['port'])
Ejemplo n.º 34
0
    def __init__(self, conf, memcache=None, logger=None, account_ring=None,
                 container_ring=None):
        if conf is None:
            conf = {}
        if logger is None:
            self.logger = get_logger(conf, log_route='proxy-server')
        else:
            self.logger = logger
        self._override_options = self._load_per_policy_config(conf)
        self.sorts_by_timing = any(pc.sorting_method == 'timing'
                                   for pc in self._override_options.values())

        self._error_limiting = {}

        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.swift_dir = swift_dir
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.recoverable_node_timeout = float(
            conf.get('recoverable_node_timeout', self.node_timeout))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.client_timeout = int(conf.get('client_timeout', 60))
        self.put_queue_depth = int(conf.get('put_queue_depth', 10))
        self.object_chunk_size = int(conf.get('object_chunk_size', 65536))
        self.client_chunk_size = int(conf.get('client_chunk_size', 65536))
        self.trans_id_suffix = conf.get('trans_id_suffix', '')
        self.post_quorum_timeout = float(conf.get('post_quorum_timeout', 0.5))
        self.error_suppression_interval = \
            int(conf.get('error_suppression_interval', 60))
        self.error_suppression_limit = \
            int(conf.get('error_suppression_limit', 10))
        self.recheck_container_existence = \
            int(conf.get('recheck_container_existence',
                         DEFAULT_RECHECK_CONTAINER_EXISTENCE))
        self.recheck_account_existence = \
            int(conf.get('recheck_account_existence',
                         DEFAULT_RECHECK_ACCOUNT_EXISTENCE))
        self.allow_account_management = \
            config_true_value(conf.get('allow_account_management', 'no'))
        self.container_ring = container_ring or Ring(swift_dir,
                                                     ring_name='container')
        self.account_ring = account_ring or Ring(swift_dir,
                                                 ring_name='account')
        # ensure rings are loaded for all configured storage policies
        for policy in POLICIES:
            policy.load_ring(swift_dir)
        self.obj_controller_router = ObjectControllerRouter()
        self.memcache = memcache
        mimetypes.init(mimetypes.knownfiles +
                       [os.path.join(swift_dir, 'mime.types')])
        self.account_autocreate = \
            config_true_value(conf.get('account_autocreate', 'no'))
        self.auto_create_account_prefix = (
            conf.get('auto_create_account_prefix') or '.')
        self.expiring_objects_account = self.auto_create_account_prefix + \
            (conf.get('expiring_objects_account_name') or 'expiring_objects')
        self.expiring_objects_container_divisor = \
            int(conf.get('expiring_objects_container_divisor') or 86400)
        self.max_containers_per_account = \
            int(conf.get('max_containers_per_account') or 0)
        self.max_containers_whitelist = [
            a.strip()
            for a in conf.get('max_containers_whitelist', '').split(',')
            if a.strip()]
        self.deny_host_headers = [
            host.strip() for host in
            conf.get('deny_host_headers', '').split(',') if host.strip()]
        self.log_handoffs = config_true_value(conf.get('log_handoffs', 'true'))
        self.cors_allow_origin = [
            a.strip()
            for a in conf.get('cors_allow_origin', '').split(',')
            if a.strip()]
        self.cors_expose_headers = [
            a.strip()
            for a in conf.get('cors_expose_headers', '').split(',')
            if a.strip()]
        self.strict_cors_mode = config_true_value(
            conf.get('strict_cors_mode', 't'))
        self.node_timings = {}
        self.timing_expiry = int(conf.get('timing_expiry', 300))
        self.concurrent_gets = \
            config_true_value(conf.get('concurrent_gets'))
        self.concurrency_timeout = float(conf.get('concurrency_timeout',
                                                  self.conn_timeout))
        value = conf.get('request_node_count', '2 * replicas').lower().split()
        if len(value) == 1:
            rnc_value = int(value[0])
            self.request_node_count = lambda replicas: rnc_value
        elif len(value) == 3 and value[1] == '*' and value[2] == 'replicas':
            rnc_value = int(value[0])
            self.request_node_count = lambda replicas: rnc_value * replicas
        else:
            raise ValueError(
                'Invalid request_node_count value: %r' % ''.join(value))
        # swift_owner_headers are stripped by the account and container
        # controllers; we should extend header stripping to object controller
        # when a privileged object header is implemented.
        swift_owner_headers = conf.get(
            'swift_owner_headers',
            'x-container-read, x-container-write, '
            'x-container-sync-key, x-container-sync-to, '
            'x-account-meta-temp-url-key, x-account-meta-temp-url-key-2, '
            'x-container-meta-temp-url-key, x-container-meta-temp-url-key-2, '
            'x-account-access-control')
        self.swift_owner_headers = [
            name.strip().title()
            for name in swift_owner_headers.split(',') if name.strip()]
        # Initialization was successful, so now apply the client chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because it affects the client as well, currently, we use the
        # client chunk size as the govenor and not the object chunk size.
        socket._fileobject.default_bufsize = self.client_chunk_size
        self.expose_info = config_true_value(
            conf.get('expose_info', 'yes'))
        self.disallowed_sections = list_from_csv(
            conf.get('disallowed_sections', 'swift.valid_api_versions'))
        self.admin_key = conf.get('admin_key', None)
        register_swift_info(
            version=swift_version,
            strict_cors_mode=self.strict_cors_mode,
            policies=POLICIES.get_policy_info(),
            allow_account_management=self.allow_account_management,
            account_autocreate=self.account_autocreate,
            **constraints.EFFECTIVE_CONSTRAINTS)
class UtilizationMiddleware(object):
    def __init__(self, app, conf, *args, **kwargs):
        self.app = app
        self.conf = conf
        self.sample_account = '.transfer_record'
        self.aggregate_account = '.utilization'
        self.logger = get_logger(self.conf, log_route='utilization')
        self.container_ring = Ring('/etc/swift', ring_name='container')
        self.sample_rate = int(self.conf.get('sample_rate', 600))

    def swift_account(self, env, tenant_id):
        path = '/v1/%s/%s?format=json&prefix=account/' \
               % (self.aggregate_account, tenant_id)
        req = make_pre_authed_request(env, 'GET', path)
        req.environ['swift.proxy_access_log_made'] = True
        resp = req.get_response(self.app)
        if resp.status_int == 404:
            return None
        return json.loads(resp.body)[0]['name'].split('/')[1]

    def check_api_call(self, env):
        path = env.get('RAW_PATH_INFO', None)

        if env['REQUEST_METHOD'] == 'GET' and path == '/api/v1/metering':
            return True
        return False

    def get_account_info(self, env, account):
        path = '/v1/%s' % account
        req = make_pre_authed_request(env, 'HEAD', path)
        req.environ['swift.proxy_access_log_made'] = True
        resp = req.get_response(self.app)
        if not  resp.status_int // 100 == 2:
            return (0, 0, 0)
        return (int(resp.headers.get('x-account-container-count', 0)),
                int(resp.headers.get('x-account-object-count', 0)),
                int(resp.headers.get('x-account-bytes-used', 0)))

    def record_usage_data(self, env, tenant_id, account, timestamp):
        path = '/v1/%s/%s?prefix=usage/%d&format=json' % (
            self.aggregate_account, tenant_id, timestamp)
        req = make_pre_authed_request(env, 'GET', path)
        req.environ['swift.proxy_access_log_made'] = True
        resp = req.get_response(self.app)
        if resp.status_int == 404:
            return
        body = json.loads(resp.body)

        if len(body) != 0:
            return

        container_cnt, obj_cnt, bt_used = self.get_account_info(env, account)
        u_object = 'usage/%d/%d_%d_%d' % (timestamp, container_cnt,
                                          obj_cnt, bt_used)

        self.put_hidden_object(self.aggregate_account, tenant_id, u_object)

    def iter_objects(self, env, path, prefix, marker, end, count):
        path_with_params = '%s?format=json&prefix=%s' % (path, prefix)
        seg = ''
        force_break = False
        while count > 0:
            l = 1000 if count > 1000 else count
            count -= 1000
            rpath = path_with_params + ('&marker=%s' % marker) + (
                '&limit=%d' % l)
            req = make_pre_authed_request(env, 'GET', rpath)
            req.environ['swift.proxy_access_log_made'] = True
            resp = req.get_response(self.app)
            segments = json.loads(resp.body)
            for seg in segments:
                name = seg['name']
                record_ts = int(name.split('/')[1])
                if record_ts > end:
                    force_break = True
                    break
                yield name

            if force_break:
                break

            if len(segments) != l:
                break

            if segments:
                marker = seg['name']
            else:
                break

    def retrieve_utilization_data(self, env, tenant_id, start, end, count):
        path = '/v1/%s/%s' % (self.aggregate_account, tenant_id)
        data = dict()
        data['transfer'] = {}
        data['utilization'] = {}
        marker = 'transfer/%d' % start
        data['transfer'] = list()
        data['utilization']['container_count'] = 0
        data['utilization']['object_count'] = 0
        data['utilization']['bytes_used'] = 0

        bytes_recvs = dict()
        bytes_sents = dict()
        req_counts = dict()
        for o in self.iter_objects(env, path, 'transfer/', marker, end, count):
            bill_type = o.split('/')[2]
            bytes_recv, bytes_sent, req_cnt = o.split('/')[3].split('_')
            bytes_recvs[bill_type] = bytes_recvs.get(bill_type, 0) + int(
                bytes_recv)
            bytes_sents[bill_type] = bytes_sents.get(bill_type, 0) + int(
                bytes_sent)
            req_counts[bill_type] = req_counts.get(bill_type, 0) + int(req_cnt)

        for bill_type, bt_rv in bytes_recvs.items():
            d = dict()
            d['bill_type'] = int(bill_type)
            d['bytes_in'] = bt_rv
            d['bytes_out'] = bytes_sents[bill_type]
            d['req_count'] = req_counts[bill_type]
            data['transfer'].append(d)

        last = None
        marker = 'usage/%d' % start
        for o in self.iter_objects(env, path, 'usage/', marker, end, count):
            last = o

        if last:
            container_cnt, obj_cnt, bytes_used = last.split('/')[2].split('_')
            data['utilization']['container_count'] = container_cnt
            data['utilization']['object_count'] = obj_cnt
            data['utilization']['bytes_used'] = bytes_used
        return data

    def GET(self, req):
        start = req.params.get('start')
        tenant_id = req.params.get('tenantid')
        identity = req.environ.get('HTTP_X_IDENTITY_STATUS')
        roles = req.environ.get('keystone.identity', None)

        if identity == 'Invalid' or not roles or 'admin' not in roles['roles']:
            return Response(request=req, status="403 Forbidden",
                            body="Access Denied",
                            content_type="text/plain")

        if not tenant_id:
            return Response(request=req, status="400 Bad Request",
                            body="tenant_id parameter doesn't exist",
                            content_type="text/plain")

        if not start:
            return Response(request=req, status="400 Bad Request",
                            body="start parameter doesn't exist",
                            content_type="text/plain")

        end = req.params.get('end')
        if end is None:
            end = datetime.utcfromtimestamp(int(time.time())).isoformat()

        # check if tenant_id's users utilization was recorded.
        account = self.swift_account(req.environ.copy(), tenant_id)
        if not account:
            return Response(status="400 Bad Request",
                            content_type="text/plain",
                            body="This tenant_id never used.")

        try:
            # start time is "rounded down"
            start_ts = iso8601_to_timestamp(start)
            # end time is "rounded up"
            end_ts = iso8601_to_timestamp(end)
        except ValueError:
            return Response(status="400 Bad Request",
                            content_type="text/plain",
                            body="start or end time is incorrect format."
                                 "please check start or end parameter")
        if start_ts > end_ts:
            return Response(status="400 Bad Request",
                            content_type="text/plain",
                            body="start time must be before the end time")

        end_ts = (end_ts // 3600 + 1) * 3600
        start_ts = (start_ts // 3600) * 3600

        objsize = (end_ts - start_ts) / self.sample_rate

        content = self.retrieve_utilization_data(req.environ.copy(), tenant_id,
                                                 start_ts, end_ts, objsize)

        content['period_start'] = timestamp_to_iso8601(start_ts)
        content['period_end'] = timestamp_to_iso8601(end_ts)
        content['tenant_id'] = tenant_id
        content['swift_account'] = account
        return Response(request=req, body=json.dumps(content),
                        content_type="application/json")

    def __call__(self, env, start_response):
        self.logger.debug('Calling Utilization Middleware')

        req = Request(env)
        if self.check_api_call(env):
            return self.GET(req)(env, start_response)

        try:
            version, account, container, obj = req.split_path(2, 4, True)
        except ValueError:
            return self.app(env, start_response)

        remote_user = env.get('REMOTE_USER')
        if not remote_user or (isinstance(remote_user, basestring) and
                               remote_user.startswith('.wsgi')):
            self.logger.debug('### SKIP: REMOTE_USER is %s' % remote_user)
            return self.app(env, start_response)

        start_response_args = [None]
        input_proxy = InputProxy(env['wsgi.input'])
        env['wsgi.input'] = input_proxy

        def my_start_response(status, headers, exc_info=None):
            start_response_args[0] = (status, list(headers), exc_info)

        def iter_response(iterable):
            iterator = iter(iterable)
            try:
                chunk = next(iterator)
                while not chunk:
                    chunk = next(iterator)
            except StopIteration:
                chunk = ''

            if start_response_args[0]:
                start_response(*start_response_args[0])

            bytes_sent = 0
            try:
                while chunk:
                    bytes_sent += len(chunk)
                    yield chunk
                    chunk = next(iterator)
            finally:
                try:
                    self.publish_sample(env, account,
                                        input_proxy.bytes_received,
                                        bytes_sent)
                except Exception:
                    self.logger.exception('Failed to publish samples')

        try:
            iterable = self.app(env, my_start_response)
        except Exception:
            self.publish_sample(env, account, input_proxy.bytes_received, 0)
            raise
        else:
            return iter_response(iterable)

    def publish_sample(self, env, account, bytes_received, bytes_sent):
        timestamp = normalize_timestamp(time.time())
        sample_time = (float(
            timestamp) // self.sample_rate + 1) * self.sample_rate
        trans_id = env.get('swift.trans_id')
        tenant_id = env.get('HTTP_X_TENANT_ID')
        remote_addr = env.get('REMOTE_ADDR')

        # check if account information object is existed.
        if not self.swift_account(env, tenant_id):
            obj = 'account/%s' % account
            self.put_hidden_object(self.aggregate_account, tenant_id, obj)

        # recording account's storage usage data
        self.record_usage_data(env, tenant_id, account, sample_time)

        container = '%s_%s_%s' % (sample_time, tenant_id, account)

        obj = '%s/%d/%d/%s/%s' % (timestamp, bytes_received, bytes_sent,
                                  trans_id, remote_addr)
        self.put_hidden_object(self.sample_account, container, obj)

    def put_hidden_object(self, account, container, obj):
        hidden_path = '/%s/%s/%s' % (account, container, obj)
        self.logger.debug('put sample_path: %s' % hidden_path)
        part, nodes = self.container_ring.get_nodes(self.sample_account,
                                                    container)
        for node in nodes:
            ip = node['ip']
            port = node['port']
            dev = node['device']
            action_headers = dict()
            action_headers['user-agent'] = 'utilization'
            action_headers['X-Timestamp'] = normalize_timestamp(time.time())
            action_headers['referer'] = 'utilization-middleware'
            action_headers['x-size'] = '0'
            action_headers['x-content-type'] = "text/plain"
            action_headers['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e'

            conn = http_connect(ip, port, dev, part, 'PUT', hidden_path,
                                action_headers)
            response = conn.getresponse()
            response.read()
Ejemplo n.º 36
0
 def get_container_ring(self):
     """The container :class:`swift.common.ring.Ring` for the cluster."""
     if not self.container_ring:
         self.container_ring = Ring(self.swift_dir, ring_name='container')
     return self.container_ring
Ejemplo n.º 37
0
def get_ring(ring_name, required_replicas, required_devices,
             server=None, force_validate=None, ipport2server=None,
             config_paths=None):
    if not server:
        server = ring_name
    ring = Ring('/etc/swift', ring_name=ring_name)
    if ipport2server is None:
        ipport2server = {}  # used internally, even if not passed in
    if config_paths is None:
        config_paths = defaultdict(dict)
    store_config_paths(server, config_paths)

    repl_name = '%s-replicator' % server
    repl_configs = {i: readconf(c, section_name=repl_name)
                    for i, c in config_paths[repl_name].items()}
    servers_per_port = any(int(c.get('servers_per_port', '0'))
                           for c in repl_configs.values())

    add_ring_devs_to_ipport2server(ring, server, ipport2server,
                                   servers_per_port=servers_per_port)
    if not VALIDATE_RSYNC and not force_validate:
        return ring
    # easy sanity checks
    if ring.replica_count != required_replicas:
        raise SkipTest('%s has %s replicas instead of %s' % (
            ring.serialized_path, ring.replica_count, required_replicas))

    devs = [dev for dev in ring.devs if dev is not None]
    if len(devs) != required_devices:
        raise SkipTest('%s has %s devices instead of %s' % (
            ring.serialized_path, len(devs), required_devices))
    for dev in devs:
        # verify server is exposing mounted device
        ipport = (dev['ip'], dev['port'])
        _, server_number = get_server_number(ipport, ipport2server)
        conf = repl_configs[server_number]
        for device in os.listdir(conf['devices']):
            if device == dev['device']:
                dev_path = os.path.join(conf['devices'], device)
                full_path = os.path.realpath(dev_path)
                if not os.path.exists(full_path):
                    raise SkipTest(
                        'device %s in %s was not found (%s)' %
                        (device, conf['devices'], full_path))
                break
        else:
            raise SkipTest(
                "unable to find ring device %s under %s's devices (%s)" % (
                    dev['device'], server, conf['devices']))
        # verify server is exposing rsync device
        rsync_export = conf.get('rsync_module', '').rstrip('/')
        if not rsync_export:
            rsync_export = '{replication_ip}::%s' % server
            if config_true_value(conf.get('vm_test_mode', 'no')):
                rsync_export += '{replication_port}'
        cmd = "rsync %s" % rsync_module_interpolation(rsync_export, dev)
        p = Popen(cmd, shell=True, stdout=PIPE)
        stdout, _stderr = p.communicate()
        if p.returncode:
            raise SkipTest('unable to connect to rsync '
                           'export %s (%s)' % (rsync_export, cmd))
        for line in stdout.splitlines():
            if line.rsplit(None, 1)[-1] == dev['device']:
                break
        else:
            raise SkipTest("unable to find ring device %s under rsync's "
                           "exported devices for %s (%s)" %
                           (dev['device'], rsync_export, cmd))
    return ring
Ejemplo n.º 38
0
    def __init__(self, conf, container_ring=None, logger=None):
        #: The dict of configuration values from the [container-sync] section
        #: of the container-server.conf.
        self.conf = conf
        #: Logger to use for container-sync log lines.
        self.logger = logger or get_logger(conf, log_route='container-sync')
        #: Path to the local device mount points.
        self.devices = conf.get('devices', '/srv/node')
        #: Indicates whether mount points should be verified as actual mount
        #: points (normally true, false for tests and SAIO).
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        #: Minimum time between full scans. This is to keep the daemon from
        #: running wild on near empty systems.
        self.interval = int(conf.get('interval', 300))
        #: Maximum amount of time to spend syncing a container before moving on
        #: to the next one. If a conatiner sync hasn't finished in this time,
        #: it'll just be resumed next scan.
        self.container_time = int(conf.get('container_time', 60))
        #: ContainerSyncCluster instance for validating sync-to values.
        self.realms_conf = ContainerSyncRealms(
            os.path.join(conf.get('swift_dir', '/etc/swift'),
                         'container-sync-realms.conf'), self.logger)
        #: The list of hosts we're allowed to send syncs to. This can be
        #: overridden by data in self.realms_conf
        self.allowed_sync_hosts = [
            h.strip()
            for h in conf.get('allowed_sync_hosts', '127.0.0.1').split(',')
            if h.strip()
        ]
        self.http_proxies = [
            a.strip() for a in conf.get('sync_proxy', '').split(',')
            if a.strip()
        ]
        #: ContainerSyncStore instance for iterating over synced containers
        self.sync_store = ContainerSyncStore(self.devices, self.logger,
                                             self.mount_check)
        #: Number of containers with sync turned on that were successfully
        #: synced.
        self.container_syncs = 0
        #: Number of successful DELETEs triggered.
        self.container_deletes = 0
        #: Number of successful PUTs triggered.
        self.container_puts = 0
        #: Number of containers whose sync has been turned off, but
        #: are not yet cleared from the sync store.
        self.container_skips = 0
        #: Number of containers that had a failure of some type.
        self.container_failures = 0
        #: Time of last stats report.
        self.reported = time()
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        #: swift.common.ring.Ring for locating containers.
        self.container_ring = container_ring or Ring(self.swift_dir,
                                                     ring_name='container')
        bind_ip = conf.get('bind_ip', '0.0.0.0')
        self._myips = whataremyips(bind_ip)
        self._myport = int(conf.get('bind_port', 6001))
        swift.common.db.DB_PREALLOCATION = \
            config_true_value(conf.get('db_preallocation', 'f'))
        self.conn_timeout = float(conf.get('conn_timeout', 5))
        request_tries = int(conf.get('request_tries') or 3)

        internal_client_conf_path = conf.get('internal_client_conf_path')
        if not internal_client_conf_path:
            self.logger.warning(
                _('Configuration option internal_client_conf_path not '
                  'defined. Using default configuration, See '
                  'internal-client.conf-sample for options'))
            internal_client_conf = ConfigString(ic_conf_body)
        else:
            internal_client_conf = internal_client_conf_path
        try:
            self.swift = InternalClient(internal_client_conf,
                                        'Swift Container Sync', request_tries)
        except IOError as err:
            if err.errno != errno.ENOENT:
                raise
            raise SystemExit(
                _('Unable to load internal client from config: %r (%s)') %
                (internal_client_conf_path, err))
Ejemplo n.º 39
0
    def __init__(self,
                 conf,
                 memcache=None,
                 logger=None,
                 account_ring=None,
                 container_ring=None,
                 object_ring=None):
        if conf is None:
            conf = {}
        if logger is None:
            self.logger = get_logger(conf, log_route='proxy-server')
        else:
            self.logger = logger

        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.node_timeout = int(conf.get('node_timeout', 10))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.client_timeout = int(conf.get('client_timeout', 60))
        self.put_queue_depth = int(conf.get('put_queue_depth', 10))
        self.object_chunk_size = int(conf.get('object_chunk_size', 65536))
        self.client_chunk_size = int(conf.get('client_chunk_size', 65536))
        self.error_suppression_interval = \
            int(conf.get('error_suppression_interval', 60))
        self.error_suppression_limit = \
            int(conf.get('error_suppression_limit', 10))
        self.recheck_container_existence = \
            int(conf.get('recheck_container_existence', 60))
        self.recheck_account_existence = \
            int(conf.get('recheck_account_existence', 60))
        self.allow_account_management = \
            config_true_value(conf.get('allow_account_management', 'no'))
        self.object_post_as_copy = \
            config_true_value(conf.get('object_post_as_copy', 'true'))
        self.resellers_conf = ConfigParser()
        self.resellers_conf.read(os.path.join(swift_dir, 'resellers.conf'))
        self.object_ring = object_ring or Ring(swift_dir, ring_name='object')
        self.container_ring = container_ring or Ring(swift_dir,
                                                     ring_name='container')
        self.account_ring = account_ring or Ring(swift_dir,
                                                 ring_name='account')
        self.memcache = memcache
        mimetypes.init(mimetypes.knownfiles +
                       [os.path.join(swift_dir, 'mime.types')])
        self.account_autocreate = \
            config_true_value(conf.get('account_autocreate', 'no'))
        self.expiring_objects_account = \
            (conf.get('auto_create_account_prefix') or '.') + \
            'expiring_objects'
        self.expiring_objects_container_divisor = \
            int(conf.get('expiring_objects_container_divisor') or 86400)
        self.max_containers_per_account = \
            int(conf.get('max_containers_per_account') or 0)
        self.max_containers_whitelist = [
            a.strip()
            for a in conf.get('max_containers_whitelist', '').split(',')
            if a.strip()
        ]
        self.deny_host_headers = [
            host.strip()
            for host in conf.get('deny_host_headers', '').split(',')
            if host.strip()
        ]
        self.rate_limit_after_segment = \
            int(conf.get('rate_limit_after_segment', 10))
        self.rate_limit_segments_per_sec = \
            int(conf.get('rate_limit_segments_per_sec', 1))
        self.log_handoffs = config_true_value(conf.get('log_handoffs', 'true'))
        self.cors_allow_origin = [
            a.strip() for a in conf.get('cors_allow_origin', '').split(',')
            if a.strip()
        ]
Ejemplo n.º 40
0
 def get_account_ring(self):
     """Get the account ring.  Load it if it hasn't been yet."""
     if not self.account_ring:
         self.account_ring = Ring(self.swift_dir, ring_name='account')
     return self.account_ring
Ejemplo n.º 41
0
class FileMover(object):
    def __init__(self, options, *_args, **_kwargs):
        self.ring = Ring(options.ring)
        self.path = options.path
        self.options = options

    def _get_acc_cont_obj(self, filename):
        """ Returns account, container, object from XFS object metadata """

        obj_fd = open(filename)
        metadata = ''
        key = 0
        try:
            while True:
                metadata += xattr.getxattr(
                    obj_fd, '%s%s' % ("user.swift.metadata", (key or '')))
                key += 1
        except IOError:
            pass
        obj_fd.close()
        object_name = pickle.loads(metadata).get('name')
        account = object_name.split('/')[1]
        container = object_name.split('/')[2]
        obj = '/'.join(object_name.split('/')[3:])

        return {'account': account,
                'container': container,
                'object': obj}

    def start(self):
        for root, _dirs, files in os.walk(self.path):
            if "quarantined" in root:
                continue
            for filename in files:
                fullname = os.path.join(root, filename)
                if (self.options.move_object_files is True and
                        fullname.split('.')[-1] in ["data", "ts"]):
                    self._move_file(fullname, "objects")

                if (self.options.move_container_dbs is True and
                        fullname.split('.')[-1] in ["db"] and
                        "containers" in fullname):
                    self._move_file(fullname, "containers")

                if (self.options.move_account_dbs is True and
                        fullname.split('.')[-1] in ["db"] and
                        "accounts" in fullname):
                    self._move_file(fullname, "accounts")

    def _move_file(self, filename, filetype):
        if filetype == 'accounts':
            broker = AccountBroker(filename)
            info = broker.get_info()
        elif filetype == 'containers':
            broker = ContainerBroker(filename)
            info = broker.get_info()
        elif filetype == 'objects':
            info = self._get_acc_cont_obj(filename)
        else:
            raise Exception

        acc = info.get('account')
        cont = info.get('container')
        obj = info.get('object')

        partition, _nodes = self.ring.get_nodes(acc, cont, obj)

        # replace the old partition value with the new one
        # old name like '/a/b/objects/123/c/d'
        # new name like '/a/b/objects/456/c/d'
        filename_parts = filename.split('/')
        part_pos = filename_parts.index(filetype)
        filename_parts[part_pos+1] = str(partition)
        newname = '/'.join(filename_parts)

        dst_dir = os.path.dirname(newname)
        try:
            os.makedirs(dst_dir)
            logging.info("mkdir %s" % dst_dir)
        except OSError as ex:
            logging.info("mkdir %s failed: %s" % (dst_dir, ex))

        try:
            os.rename(filename, newname)
            logging.info("moved %s -> %s" % (filename, newname))
        except OSError as ex:
            logging.warning("FAILED TO MOVE %s -> %s" % (filename, newname))
Ejemplo n.º 42
0
    def _test_ondisk_data_after_write_with_crypto(self, policy_name):
        policy = storage_policy.POLICIES.get_by_name(policy_name)
        self._create_container(self.proxy_app, policy_name=policy_name)
        self._put_object(self.crypto_app, self.plaintext)
        self._post_object(self.crypto_app)

        # Verify container listing etag is encrypted by direct GET to container
        # server. We can use any server for all nodes since they all share same
        # devices dir.
        cont_server = self._test_context['test_servers'][3]
        cont_ring = Ring(self._test_context['testdir'], ring_name='container')
        part, nodes = cont_ring.get_nodes('a', self.container_name)
        for node in nodes:
            req = Request.blank('/%s/%s/a/%s'
                                % (node['device'], part, self.container_name),
                                method='GET', query_string='format=json')
            resp = req.get_response(cont_server)
            listing = json.loads(resp.body)
            # sanity checks...
            self.assertEqual(1, len(listing))
            self.assertEqual('o', listing[0]['name'])
            self.assertEqual('application/test', listing[0]['content_type'])
            # verify encrypted etag value
            parts = listing[0]['hash'].rsplit(';', 1)
            crypto_meta_param = parts[1].strip()
            crypto_meta = crypto_meta_param[len('swift_meta='):]
            listing_etag_iv = load_crypto_meta(crypto_meta)['iv']
            exp_enc_listing_etag = base64.b64encode(
                encrypt(self.plaintext_etag,
                        self.km.create_key('/a/%s' % self.container_name),
                        listing_etag_iv))
            self.assertEqual(exp_enc_listing_etag, parts[0])

        # Verify diskfile data and metadata is encrypted
        ring_object = self.proxy_app.get_object_ring(int(policy))
        partition, nodes = ring_object.get_nodes('a', self.container_name, 'o')
        conf = {'devices': self._test_context["testdir"],
                'mount_check': 'false'}
        df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[policy]
        ondisk_data = []
        exp_enc_body = None
        for node_index, node in enumerate(nodes):
            df = df_mgr.get_diskfile(node['device'], partition,
                                     'a', self.container_name, 'o',
                                     policy=policy)
            with df.open():
                meta = df.get_metadata()
                contents = ''.join(df.reader())
                metadata = dict((k.lower(), v) for k, v in meta.items())
                # verify on disk data - body
                body_iv = load_crypto_meta(
                    metadata['x-object-sysmeta-crypto-body-meta'])['iv']
                body_key_meta = load_crypto_meta(
                    metadata['x-object-sysmeta-crypto-body-meta'])['body_key']
                obj_key = self.km.create_key('/a/%s/o' % self.container_name)
                body_key = Crypto().unwrap_key(obj_key, body_key_meta)
                exp_enc_body = encrypt(self.plaintext, body_key, body_iv)
                ondisk_data.append((node, contents))

                # verify on disk user metadata
                enc_val, meta = metadata[
                    'x-object-transient-sysmeta-crypto-meta-fruit'].split(';')
                meta = meta.strip()[len('swift_meta='):]
                metadata_iv = load_crypto_meta(meta)['iv']
                exp_enc_meta = base64.b64encode(encrypt('Kiwi', obj_key,
                                                        metadata_iv))
                self.assertEqual(exp_enc_meta, enc_val)
                self.assertNotIn('x-object-meta-fruit', metadata)

                self.assertIn(
                    'x-object-transient-sysmeta-crypto-meta', metadata)
                meta = load_crypto_meta(
                    metadata['x-object-transient-sysmeta-crypto-meta'])
                self.assertIn('key_id', meta)
                self.assertIn('path', meta['key_id'])
                self.assertEqual(
                    '/a/%s/%s' % (self.container_name, self.object_name),
                    meta['key_id']['path'])
                self.assertIn('v', meta['key_id'])
                self.assertEqual('1', meta['key_id']['v'])
                self.assertIn('cipher', meta)
                self.assertEqual(Crypto.cipher, meta['cipher'])

                # verify etag
                actual_enc_etag, _junk, actual_etag_meta = metadata[
                    'x-object-sysmeta-crypto-etag'].partition('; swift_meta=')
                etag_iv = load_crypto_meta(actual_etag_meta)['iv']
                exp_enc_etag = base64.b64encode(encrypt(self.plaintext_etag,
                                                        obj_key, etag_iv))
                self.assertEqual(exp_enc_etag, actual_enc_etag)

                # verify etag hmac
                exp_etag_mac = hmac.new(
                    obj_key, self.plaintext_etag, digestmod=hashlib.sha256)
                exp_etag_mac = base64.b64encode(exp_etag_mac.digest())
                self.assertEqual(exp_etag_mac,
                                 metadata['x-object-sysmeta-crypto-etag-mac'])

                # verify etag override for container updates
                override = 'x-object-sysmeta-container-update-override-etag'
                parts = metadata[override].rsplit(';', 1)
                crypto_meta_param = parts[1].strip()
                crypto_meta = crypto_meta_param[len('swift_meta='):]
                listing_etag_iv = load_crypto_meta(crypto_meta)['iv']
                cont_key = self.km.create_key('/a/%s' % self.container_name)
                exp_enc_listing_etag = base64.b64encode(
                    encrypt(self.plaintext_etag, cont_key,
                            listing_etag_iv))
                self.assertEqual(exp_enc_listing_etag, parts[0])

        self._check_GET_and_HEAD(self.crypto_app)
        return exp_enc_body, ondisk_data
Ejemplo n.º 43
0
 def load_ring(self, swift_dir):
     if self.object_ring:
         return
     self.object_ring = Ring(swift_dir, ring_name=self.ring_name)
Ejemplo n.º 44
0
class ObjectReplicator(Daemon):
    """
    Replicate objects.

    Encapsulates most logic and data needed by the object replication process.
    Each call to .replicate() performs one replication pass.  It's up to the
    caller to do this in a loop.
    """
    def __init__(self, conf):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """
        self.conf = conf
        self.logger = get_logger(conf, log_route='object-replicator')
        self.devices_dir = conf.get('devices', '/srv/node')
        self.mount_check = conf.get('mount_check', 'true').lower() in \
                              ('true', 't', '1', 'on', 'yes', 'y')
        self.vm_test_mode = conf.get('vm_test_mode',
                                     'no').lower() in ('yes', 'true', 'on',
                                                       '1')
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.port = int(conf.get('bind_port', 6000))
        self.concurrency = int(conf.get('concurrency', 1))
        self.stats_interval = int(conf.get('stats_interval', '300'))
        self.object_ring = Ring(self.swift_dir, ring_name='object')
        self.ring_check_interval = int(conf.get('ring_check_interval', 15))
        self.next_check = time.time() + self.ring_check_interval
        self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))
        self.partition_times = []
        self.run_pause = int(conf.get('run_pause', 30))
        self.rsync_timeout = int(conf.get('rsync_timeout', 900))
        self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
        self.http_timeout = int(conf.get('http_timeout', 60))
        self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/swift')
        self.rcache = os.path.join(self.recon_cache_path, "object.recon")

    def _rsync(self, args):
        """
        Execute the rsync binary to replicate a partition.

        :returns: return code of rsync process. 0 is successful
        """
        start_time = time.time()
        ret_val = None
        try:
            with Timeout(self.rsync_timeout):
                proc = subprocess.Popen(args,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.STDOUT)
                results = proc.stdout.read()
                ret_val = proc.wait()
        except Timeout:
            self.logger.error(_("Killing long-running rsync: %s"), str(args))
            proc.kill()
            return 1  # failure response code
        total_time = time.time() - start_time
        for result in results.split('\n'):
            if result == '':
                continue
            if result.startswith('cd+'):
                continue
            if not ret_val:
                self.logger.info(result)
            else:
                self.logger.error(result)
        if ret_val:
            self.logger.error(_('Bad rsync return code: %(args)s -> %(ret)d'),
                              {
                                  'args': str(args),
                                  'ret': ret_val
                              })
        elif results:
            self.logger.info(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"), {
                    'src': args[-2],
                    'dst': args[-1],
                    'time': total_time
                })
        else:
            self.logger.debug(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"), {
                    'src': args[-2],
                    'dst': args[-1],
                    'time': total_time
                })
        return ret_val

    def rsync(self, node, job, suffixes):
        """
        Synchronize local suffix directories from a partition with a remote
        node.

        :param node: the "dev" entry for the remote node to sync with
        :param job: information about the partition being synced
        :param suffixes: a list of suffixes which need to be pushed

        :returns: boolean indicating success or failure
        """
        if not os.path.exists(job['path']):
            return False
        args = [
            'rsync',
            '--recursive',
            '--whole-file',
            '--human-readable',
            '--xattrs',
            '--itemize-changes',
            '--ignore-existing',
            '--timeout=%s' % self.rsync_io_timeout,
            '--contimeout=%s' % self.rsync_io_timeout,
        ]
        node_ip = rsync_ip(node['ip'])
        if self.vm_test_mode:
            rsync_module = '%s::object%s' % (node_ip, node['port'])
        else:
            rsync_module = '%s::object' % node_ip
        had_any = False
        for suffix in suffixes:
            spath = join(job['path'], suffix)
            if os.path.exists(spath):
                args.append(spath)
                had_any = True
        if not had_any:
            return False
        args.append(
            join(rsync_module, node['device'], 'objects', job['partition']))
        return self._rsync(args) == 0

    def check_ring(self):
        """
        Check to see if the ring has been updated

        :returns: boolean indicating whether or not the ring has changed
        """
        if time.time() > self.next_check:
            self.next_check = time.time() + self.ring_check_interval
            if self.object_ring.has_changed():
                return False
        return True

    def update_deleted(self, job):
        """
        High-level method that replicates a single partition that doesn't
        belong on this node.

        :param job: a dict containing info about the partition to be replicated
        """
        def tpool_get_suffixes(path):
            return [
                suff for suff in os.listdir(path)
                if len(suff) == 3 and isdir(join(path, suff))
            ]

        self.replication_count += 1
        self.logger.increment('partition.delete.count.%s' % (job['device'], ))
        begin = time.time()
        try:
            responses = []
            suffixes = tpool.execute(tpool_get_suffixes, job['path'])
            if suffixes:
                for node in job['nodes']:
                    success = self.rsync(node, job, suffixes)
                    if success:
                        with Timeout(self.http_timeout):
                            http_connect(node['ip'],
                                         node['port'],
                                         node['device'],
                                         job['partition'],
                                         'REPLICATE',
                                         '/' + '-'.join(suffixes),
                                         headers={
                                             'Content-Length': '0'
                                         }).getresponse().read()
                    responses.append(success)
            if not suffixes or (len(responses) == \
                        len(job['nodes']) and all(responses)):
                self.logger.info(_("Removing partition: %s"), job['path'])
                tpool.execute(shutil.rmtree, job['path'], ignore_errors=True)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing handoff partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.delete.timing', begin)

    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment('partition.update.count.%s' % (job['device'], ))
        begin = time.time()
        try:
            hashed, local_hash = tpool.execute(
                tpooled_get_hashes,
                job['path'],
                do_listdir=(self.replication_count % 10) == 0,
                reclaim_age=self.reclaim_age)
            # See tpooled_get_hashes "Hack".
            if isinstance(hashed, BaseException):
                raise hashed
            self.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)
            attempts_left = len(job['nodes'])
            nodes = itertools.chain(
                job['nodes'],
                self.object_ring.get_more_nodes(int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIterator it will be caught way below
                node = next(nodes)
                attempts_left -= 1
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(node['ip'],
                                            node['port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '',
                                            headers={
                                                'Content-Length': '0'
                                            }).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(
                                _('%(ip)s/%(device)s responded'
                                  ' as unmounted'), node)
                            attempts_left += 1
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(
                                _("Invalid response %(resp)s "
                                  "from %(ip)s"), {
                                      'resp': resp.status,
                                      'ip': node['ip']
                                  })
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    if not suffixes:
                        continue
                    hashed, recalc_hash = tpool.execute(
                        tpooled_get_hashes,
                        job['path'],
                        recalculate=suffixes,
                        reclaim_age=self.reclaim_age)
                    # See tpooled_get_hashes "Hack".
                    if isinstance(hashed, BaseException):
                        raise hashed
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    self.rsync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(node['ip'],
                                            node['port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '/' + '-'.join(suffixes),
                                            headers={'Content-Length': '0'})
                        conn.getresponse().read()
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                except (Exception, Timeout):
                    self.logger.exception(
                        _("Error syncing with node: %s") % node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)

    def stats_line(self):
        """
        Logs various stats for the currently running replication pass.
        """
        if self.replication_count:
            elapsed = (time.time() - self.start) or 0.000001
            rate = self.replication_count / elapsed
            self.logger.info(
                _("%(replicated)d/%(total)d (%(percentage).2f%%)"
                  " partitions replicated in %(time).2fs (%(rate).2f/sec, "
                  "%(remaining)s remaining)"), {
                      'replicated':
                      self.replication_count,
                      'total':
                      self.job_count,
                      'percentage':
                      self.replication_count * 100.0 / self.job_count,
                      'time':
                      time.time() - self.start,
                      'rate':
                      rate,
                      'remaining':
                      '%d%s' % compute_eta(self.start, self.replication_count,
                                           self.job_count)
                  })
            if self.suffix_count:
                self.logger.info(
                    _("%(checked)d suffixes checked - "
                      "%(hashed).2f%% hashed, %(synced).2f%% synced"), {
                          'checked': self.suffix_count,
                          'hashed':
                          (self.suffix_hash * 100.0) / self.suffix_count,
                          'synced':
                          (self.suffix_sync * 100.0) / self.suffix_count
                      })
                self.partition_times.sort()
                self.logger.info(
                    _("Partition times: max %(max).4fs, "
                      "min %(min).4fs, med %(med).4fs"), {
                          'max': self.partition_times[-1],
                          'min': self.partition_times[0],
                          'med':
                          self.partition_times[len(self.partition_times) // 2]
                      })
        else:
            self.logger.info(_("Nothing replicated for %s seconds."),
                             (time.time() - self.start))

    def kill_coros(self):
        """Utility function that kills all coroutines currently running."""
        for coro in list(self.run_pool.coroutines_running):
            try:
                coro.kill(GreenletExit)
            except GreenletExit:
                pass

    def heartbeat(self):
        """
        Loop that runs in the background during replication.  It periodically
        logs progress.
        """
        while True:
            eventlet.sleep(self.stats_interval)
            self.stats_line()

    def detect_lockups(self):
        """
        In testing, the pool.waitall() call very occasionally failed to return.
        This is an attempt to make sure the replicator finishes its replication
        pass in some eventuality.
        """
        while True:
            eventlet.sleep(self.lockup_timeout)
            if self.replication_count == self.last_replication_count:
                self.logger.error(_("Lockup detected.. killing live coros."))
                self.kill_coros()
            self.last_replication_count = self.replication_count

    def collect_jobs(self):
        """
        Returns a sorted list of jobs (dictionaries) that specify the
        partitions, nodes, etc to be rsynced.
        """
        jobs = []
        ips = whataremyips()
        for local_dev in [
                dev for dev in self.object_ring.devs
                if dev and dev['ip'] in ips and dev['port'] == self.port
        ]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, 'objects')
            tmp_path = join(dev_path, 'tmp')
            if self.mount_check and not os.path.ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                continue
            for partition in os.listdir(obj_path):
                try:
                    part_nodes = \
                        self.object_ring.get_part_nodes(int(partition))
                    nodes = [
                        node for node in part_nodes
                        if node['id'] != local_dev['id']
                    ]
                    jobs.append(
                        dict(path=join(obj_path, partition),
                             device=local_dev['device'],
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             partition=partition))
                except ValueError:
                    continue
        random.shuffle(jobs)
        # Partititons that need to be deleted take priority
        jobs.sort(key=lambda job: not job['delete'])
        self.job_count = len(jobs)
        return jobs

    def replicate(self):
        """Run a replication pass"""
        self.start = time.time()
        self.suffix_count = 0
        self.suffix_sync = 0
        self.suffix_hash = 0
        self.replication_count = 0
        self.last_replication_count = -1
        self.partition_times = []
        stats = eventlet.spawn(self.heartbeat)
        lockup_detector = eventlet.spawn(self.detect_lockups)
        eventlet.sleep()  # Give spawns a cycle
        try:
            self.run_pool = GreenPool(size=self.concurrency)
            jobs = self.collect_jobs()
            for job in jobs:
                if not self.check_ring():
                    self.logger.info(
                        _("Ring change detected. Aborting "
                          "current replication pass."))
                    return
                if job['delete']:
                    self.run_pool.spawn(self.update_deleted, job)
                else:
                    self.run_pool.spawn(self.update, job)
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
        except (Exception, Timeout):
            self.logger.exception(_("Exception in top-level replication loop"))
            self.kill_coros()
        finally:
            stats.kill()
            lockup_detector.kill()
            self.stats_line()

    def run_once(self, *args, **kwargs):
        start = time.time()
        self.logger.info(_("Running object replicator in script mode."))
        self.replicate()
        total = (time.time() - start) / 60
        self.logger.info(_("Object replication complete. (%.02f minutes)"),
                         total)
        dump_recon_cache({'object_replication_time': total}, self.rcache,
                         self.logger)

    def run_forever(self, *args, **kwargs):
        self.logger.info(_("Starting object replicator in daemon mode."))
        # Run the replicator continually
        while True:
            start = time.time()
            self.logger.info(_("Starting object replication pass."))
            # Run the replicator
            self.replicate()
            total = (time.time() - start) / 60
            self.logger.info(_("Object replication complete. (%.02f minutes)"),
                             total)
            dump_recon_cache({'object_replication_time': total}, self.rcache,
                             self.logger)
            self.logger.debug(_('Replication sleeping for %s seconds.'),
                              self.run_pause)
            sleep(self.run_pause)
Ejemplo n.º 45
0
from swift.common.ring import Ring

if __name__ == '__main__':
    # example path for sample object
    # update this to and existing account/container/object 
    # in your environment
    account = 'AUTH_9fbaa44c45ab4902a46110fd90629a79'
    container = 'testing'
    obj = 'testing.pem'

    ring = Ring('.', ring_name='object')
    part, nodes =  ring.get_nodes(account, container, obj)
    print 'nodes: '
    for n in nodes:
        print 'node: ', n

    print 'part = ', part
    morenodes = ring.get_more_nodes(part)
    print 'more nodes:'
    for n in morenodes:
        print 'node: ', n
Ejemplo n.º 46
0
    def __init__(self, conf, memcache=None, logger=None, account_ring=None,
                 container_ring=None, object_ring=None):
        if conf is None:
            conf = {}
        if logger is None:
            self.logger = get_logger(conf, log_route='proxy-server')
        else:
            self.logger = logger

        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.node_timeout = int(conf.get('node_timeout', 10))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.client_timeout = int(conf.get('client_timeout', 60))
        self.put_queue_depth = int(conf.get('put_queue_depth', 10))
        self.object_chunk_size = int(conf.get('object_chunk_size', 65536))
        self.client_chunk_size = int(conf.get('client_chunk_size', 65536))
        self.trans_id_suffix = conf.get('trans_id_suffix', '')
        self.post_quorum_timeout = float(conf.get('post_quorum_timeout', 0.5))
        self.error_suppression_interval = \
            int(conf.get('error_suppression_interval', 60))
        self.error_suppression_limit = \
            int(conf.get('error_suppression_limit', 10))
        self.recheck_container_existence = \
            int(conf.get('recheck_container_existence', 60))
        self.recheck_account_existence = \
            int(conf.get('recheck_account_existence', 60))
        self.allow_account_management = \
            config_true_value(conf.get('allow_account_management', 'no'))
        self.object_post_as_copy = \
            config_true_value(conf.get('object_post_as_copy', 'true'))
        self.object_ring = object_ring or Ring(swift_dir, ring_name='object')
        self.container_ring = container_ring or Ring(swift_dir,
                                                     ring_name='container')
        self.account_ring = account_ring or Ring(swift_dir,
                                                 ring_name='account')
        self.memcache = memcache
        mimetypes.init(mimetypes.knownfiles +
                       [os.path.join(swift_dir, 'mime.types')])
        self.account_autocreate = \
            config_true_value(conf.get('account_autocreate', 'no'))
        self.expiring_objects_account = \
            (conf.get('auto_create_account_prefix') or '.') + \
            'expiring_objects'
        self.expiring_objects_container_divisor = \
            int(conf.get('expiring_objects_container_divisor') or 86400)
        self.max_containers_per_account = \
            int(conf.get('max_containers_per_account') or 0)
        self.max_containers_whitelist = [
            a.strip()
            for a in conf.get('max_containers_whitelist', '').split(',')
            if a.strip()]
        self.deny_host_headers = [
            host.strip() for host in
            conf.get('deny_host_headers', '').split(',') if host.strip()]
        self.rate_limit_after_segment = \
            int(conf.get('rate_limit_after_segment', 10))
        self.rate_limit_segments_per_sec = \
            int(conf.get('rate_limit_segments_per_sec', 1))
        self.log_handoffs = config_true_value(conf.get('log_handoffs', 'true'))
        self.cors_allow_origin = [
            a.strip()
            for a in conf.get('cors_allow_origin', '').split(',')
            if a.strip()]
        self.node_timings = {}
        self.timing_expiry = int(conf.get('timing_expiry', 300))
        self.sorting_method = conf.get('sorting_method', 'shuffle').lower()
        self.allow_static_large_object = config_true_value(
            conf.get('allow_static_large_object', 'true'))
        self.max_large_object_get_time = float(
            conf.get('max_large_object_get_time', '86400'))
        value = conf.get('request_node_count', '2 * replicas').lower().split()
        if len(value) == 1:
            value = int(value[0])
            self.request_node_count = lambda r: value
        elif len(value) == 3 and value[1] == '*' and value[2] == 'replicas':
            value = int(value[0])
            self.request_node_count = lambda r: value * r.replica_count
        else:
            raise ValueError(
                'Invalid request_node_count value: %r' % ''.join(value))
        try:
            read_affinity = conf.get('read_affinity', '')
            self.read_affinity_sort_key = affinity_key_function(read_affinity)
        except ValueError as err:
            # make the message a little more useful
            raise ValueError("Invalid read_affinity value: %r (%s)" %
                             (read_affinity, err.message))
        try:
            write_affinity = conf.get('write_affinity', '')
            self.write_affinity_is_local_fn \
                = affinity_locality_predicate(write_affinity)
        except ValueError as err:
            # make the message a little more useful
            raise ValueError("Invalid write_affinity value: %r (%s)" %
                             (write_affinity, err.message))
        value = conf.get('write_affinity_node_count',
                         '2 * replicas').lower().split()
        if len(value) == 1:
            value = int(value[0])
            self.write_affinity_node_count = lambda r: value
        elif len(value) == 3 and value[1] == '*' and value[2] == 'replicas':
            value = int(value[0])
            self.write_affinity_node_count = lambda r: value * r.replica_count
        else:
            raise ValueError(
                'Invalid write_affinity_node_count value: %r' % ''.join(value))
        swift_owner_headers = conf.get(
            'swift_owner_headers',
            'x-container-read, x-container-write, '
            'x-container-sync-key, x-container-sync-to, '
            'x-account-meta-temp-url-key, x-account-meta-temp-url-key-2')
        self.swift_owner_headers = [
            name.strip()
            for name in swift_owner_headers.split(',') if name.strip()]
        # Initialization was successful, so now apply the client chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because it affects the client as well, currently, we use the
        # client chunk size as the govenor and not the object chunk size.
        socket._fileobject.default_bufsize = self.client_chunk_size
        self.expose_info = config_true_value(
            conf.get('expose_info', 'yes'))
        self.disallowed_sections = list_from_csv(
            conf.get('disallowed_sections'))
        self.admin_key = conf.get('admin_key', None)
        register_swift_info(version=swift_version)
 def __init__(self, app, conf):
     self.app = app
     self.logger = get_logger(conf, log_route='object_endpoint')
     swift_dir = conf.get('swift_dir', '/etc/swift')
     self.object_ring = Ring(swift_dir, ring_name='object')
Ejemplo n.º 48
0
class ListEndpointsMiddleware(object):
    """
    List endpoints for an object, account or container.

    See above for a full description.

    Uses configuration parameter `swift_dir` (default `/etc/swift`).

    :param app: The next WSGI filter or app in the paste.deploy
                chain.
    :param conf: The configuration dict for the middleware.
    """
    def __init__(self, app, conf):
        self.app = app
        self.logger = get_logger(conf, log_route='endpoints')
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.account_ring = Ring(self.swift_dir, ring_name='account')
        self.container_ring = Ring(self.swift_dir, ring_name='container')
        self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/')
        if not self.endpoints_path.endswith('/'):
            self.endpoints_path += '/'
        self.default_response_version = 1.0
        self.response_map = {
            1.0: self.v1_format_response,
            2.0: self.v2_format_response,
        }

    def get_object_ring(self, policy_idx):
        """
        Get the ring object to use to handle a request based on its policy.

        :policy_idx: policy index as defined in swift.conf
        :returns: appropriate ring object
        """
        return POLICIES.get_object_ring(policy_idx, self.swift_dir)

    def _parse_version(self, raw_version):
        err_msg = 'Unsupported version %r' % raw_version
        try:
            version = float(raw_version.lstrip('v'))
        except ValueError:
            raise ValueError(err_msg)
        if not any(version == v for v in RESPONSE_VERSIONS):
            raise ValueError(err_msg)
        return version

    def _parse_path(self, request):
        """
        Parse path parts of request into a tuple of version, account,
        container, obj.  Unspecified path parts are filled in as None,
        except version which is always returned as a float using the
        configured default response version if not specified in the
        request.

        :param request: the swob request

        :returns: parsed path parts as a tuple with version filled in as
                  configured default response version if not specified.
        :raises: ValueError if path is invalid, message will say why.
        """
        clean_path = request.path[len(self.endpoints_path) - 1:]
        # try to peel off version
        try:
            raw_version, rest = split_path(clean_path, 1, 2, True)
        except ValueError:
            raise ValueError('No account specified')
        try:
            version = self._parse_version(raw_version)
        except ValueError:
            if raw_version.startswith('v') and '_' not in raw_version:
                # looks more like a invalid version than an account
                raise
            # probably no version specified, but if the client really
            # said /endpoints/v_3/account they'll probably be sorta
            # confused by the useless response and lack of error.
            version = self.default_response_version
            rest = clean_path
        else:
            rest = '/' + rest if rest else '/'
        try:
            account, container, obj = split_path(rest, 1, 3, True)
        except ValueError:
            raise ValueError('No account specified')
        return version, account, container, obj

    def v1_format_response(self, req, endpoints, **kwargs):
        return Response(json.dumps(endpoints), content_type='application/json')

    def v2_format_response(self, req, endpoints, storage_policy_index,
                           **kwargs):
        resp = {
            'endpoints': endpoints,
            'headers': {},
        }
        if storage_policy_index is not None:
            resp['headers']['X-Backend-Storage-Policy-Index'] = str(
                storage_policy_index)
        return Response(json.dumps(resp), content_type='application/json')

    def __call__(self, env, start_response):
        request = Request(env)
        if not request.path.startswith(self.endpoints_path):
            return self.app(env, start_response)

        if request.method != 'GET':
            return HTTPMethodNotAllowed(req=request,
                                        headers={"Allow":
                                                 "GET"})(env, start_response)

        try:
            version, account, container, obj = self._parse_path(request)
        except ValueError as err:
            return HTTPBadRequest(str(err))(env, start_response)

        if account is not None:
            account = unquote(account)
        if container is not None:
            container = unquote(container)
        if obj is not None:
            obj = unquote(obj)

        storage_policy_index = None
        if obj is not None:
            container_info = get_container_info(
                {'PATH_INFO': '/v1/%s/%s' % (account, container)},
                self.app,
                swift_source='LE')
            storage_policy_index = container_info['storage_policy']
            obj_ring = self.get_object_ring(storage_policy_index)
            partition, nodes = obj_ring.get_nodes(account, container, obj)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}/{obj}'
        elif container is not None:
            partition, nodes = self.container_ring.get_nodes(
                account, container)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}'
        else:
            partition, nodes = self.account_ring.get_nodes(account)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}'

        endpoints = []
        for node in nodes:
            endpoint = endpoint_template.format(ip=node['ip'],
                                                port=node['port'],
                                                device=node['device'],
                                                partition=partition,
                                                account=quote(account),
                                                container=quote(container
                                                                or ''),
                                                obj=quote(obj or ''))
            endpoints.append(endpoint)

        resp = self.response_map[version](
            request,
            endpoints=endpoints,
            storage_policy_index=storage_policy_index)
        return resp(env, start_response)
Ejemplo n.º 49
0
class ListEndpointsMiddleware(object):
    """
    List endpoints for an object, account or container.

    See above for a full description.

    Uses configuration parameter `swift_dir` (default `/etc/swift`).

    :param app: The next WSGI filter or app in the paste.deploy
                chain.
    :param conf: The configuration dict for the middleware.
    """

    def __init__(self, app, conf):
        self.app = app
        self.logger = get_logger(conf, log_route='endpoints')
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.account_ring = Ring(self.swift_dir, ring_name='account')
        self.container_ring = Ring(self.swift_dir, ring_name='container')
        self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/')
        if not self.endpoints_path.endswith('/'):
            self.endpoints_path += '/'
        self.default_response_version = 1.0
        self.response_map = {
            1.0: self.v1_format_response,
            2.0: self.v2_format_response,
        }

    def get_object_ring(self, policy_idx):
        """
        Get the ring object to use to handle a request based on its policy.

        :policy_idx: policy index as defined in swift.conf
        :returns: appropriate ring object
        """
        return POLICIES.get_object_ring(policy_idx, self.swift_dir)

    def _parse_version(self, raw_version):
        err_msg = 'Unsupported version %r' % raw_version
        try:
            version = float(raw_version.lstrip('v'))
        except ValueError:
            raise ValueError(err_msg)
        if not any(version == v for v in RESPONSE_VERSIONS):
            raise ValueError(err_msg)
        return version

    def _parse_path(self, request):
        """
        Parse path parts of request into a tuple of version, account,
        container, obj.  Unspecified path parts are filled in as None,
        except version which is always returned as a float using the
        configured default response version if not specified in the
        request.

        :param request: the swob request

        :returns: parsed path parts as a tuple with version filled in as
                  configured default response version if not specified.
        :raises: ValueError if path is invalid, message will say why.
        """
        clean_path = request.path[len(self.endpoints_path) - 1:]
        # try to peel off version
        try:
            raw_version, rest = split_path(clean_path, 1, 2, True)
        except ValueError:
            raise ValueError('No account specified')
        try:
            version = self._parse_version(raw_version)
        except ValueError:
            if raw_version.startswith('v') and '_' not in raw_version:
                # looks more like a invalid version than an account
                raise
            # probably no version specified, but if the client really
            # said /endpoints/v_3/account they'll probably be sorta
            # confused by the useless response and lack of error.
            version = self.default_response_version
            rest = clean_path
        else:
            rest = '/' + rest if rest else '/'
        try:
            account, container, obj = split_path(rest, 1, 3, True)
        except ValueError:
            raise ValueError('No account specified')
        return version, account, container, obj

    def v1_format_response(self, req, endpoints, **kwargs):
        return Response(json.dumps(endpoints),
                        content_type='application/json')

    def v2_format_response(self, req, endpoints, storage_policy_index,
                           **kwargs):
        resp = {
            'endpoints': endpoints,
            'headers': {},
        }
        if storage_policy_index is not None:
            resp['headers'][
                'X-Backend-Storage-Policy-Index'] = str(storage_policy_index)
        return Response(json.dumps(resp),
                        content_type='application/json')

    def __call__(self, env, start_response):
        request = Request(env)
        if not request.path.startswith(self.endpoints_path):
            return self.app(env, start_response)

        if request.method != 'GET':
            return HTTPMethodNotAllowed(
                req=request, headers={"Allow": "GET"})(env, start_response)

        try:
            version, account, container, obj = self._parse_path(request)
        except ValueError as err:
            return HTTPBadRequest(str(err))(env, start_response)

        if account is not None:
            account = unquote(account)
        if container is not None:
            container = unquote(container)
        if obj is not None:
            obj = unquote(obj)

        storage_policy_index = None
        if obj is not None:
            container_info = get_container_info(
                {'PATH_INFO': '/v1/%s/%s' % (account, container)},
                self.app, swift_source='LE')
            storage_policy_index = container_info['storage_policy']
            obj_ring = self.get_object_ring(storage_policy_index)
            partition, nodes = obj_ring.get_nodes(
                account, container, obj)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}/{obj}'
        elif container is not None:
            partition, nodes = self.container_ring.get_nodes(
                account, container)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}'
        else:
            partition, nodes = self.account_ring.get_nodes(
                account)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}'

        endpoints = []
        for node in nodes:
            endpoint = endpoint_template.format(
                ip=node['ip'],
                port=node['port'],
                device=node['device'],
                partition=partition,
                account=quote(account),
                container=quote(container or ''),
                obj=quote(obj or ''))
            endpoints.append(endpoint)

        resp = self.response_map[version](
            request, endpoints=endpoints,
            storage_policy_index=storage_policy_index)
        return resp(env, start_response)
Ejemplo n.º 50
0
def generate_report(conf, policy_name=None):
    global json_output
    json_output = config_true_value(conf.get('dump_json', 'no'))
    if policy_name is None:
        policy = POLICIES.default
    else:
        policy = POLICIES.get_by_name(policy_name)
        if policy is None:
            exit('Unable to find policy: %s' % policy_name)
    if not json_output:
        print('Using storage policy: %s ' % policy.name)

    swift_dir = conf.get('swift_dir', '/etc/swift')
    retries = int(conf.get('retries', 5))
    concurrency = int(conf.get('concurrency', 25))
    endpoint_type = str(conf.get('endpoint_type', 'publicURL'))
    region_name = str(conf.get('region_name', ''))
    container_report = config_true_value(conf.get('container_report', 'yes'))
    object_report = config_true_value(conf.get('object_report', 'yes'))
    if not (object_report or container_report):
        exit("Neither container or object report is set to run")
    user_domain_name = str(conf.get('user_domain_name', ''))
    project_domain_name = str(conf.get('project_domain_name', ''))
    project_name = str(conf.get('project_name', ''))
    insecure = config_true_value(conf.get('keystone_api_insecure', 'no'))

    coropool = GreenPool(size=concurrency)

    os_options = {'endpoint_type': endpoint_type}
    if user_domain_name:
        os_options['user_domain_name'] = user_domain_name
    if project_domain_name:
        os_options['project_domain_name'] = project_domain_name
    if project_name:
        os_options['project_name'] = project_name
    if region_name:
        os_options['region_name'] = region_name

    url, token = get_auth(conf['auth_url'],
                          conf['auth_user'],
                          conf['auth_key'],
                          auth_version=conf.get('auth_version', '1.0'),
                          os_options=os_options,
                          insecure=insecure)
    account = url.rsplit('/', 1)[1]
    connpool = Pool(max_size=concurrency)
    connpool.create = lambda: SimpleClient(
        url=url, token=token, retries=retries)

    container_ring = Ring(swift_dir, ring_name='container')
    object_ring = Ring(swift_dir, ring_name=policy.ring_name)

    output = {}
    if container_report:
        output['container'] = container_dispersion_report(
            coropool, connpool, account, container_ring, retries,
            conf.get('partitions'), policy)
    if object_report:
        output['object'] = object_dispersion_report(coropool, connpool,
                                                    account, object_ring,
                                                    retries,
                                                    conf.get('partitions'),
                                                    policy)

    return output
Ejemplo n.º 51
0
class ContainerCrawler(object):
    def __init__(self, conf, handler_class, logger=None):
        if not handler_class:
            raise RuntimeError('Handler class must be defined')

        self.logger = logger
        self.conf = conf
        self.root = conf['devices']
        self.bulk = config_true_value(conf.get('bulk_process', False))
        self.interval = 10
        self.swift_dir = '/etc/swift'
        self.container_ring = Ring(self.swift_dir, ring_name='container')

        self.status_dir = conf['status_dir']
        self.myips = whataremyips('0.0.0.0')
        self.items_chunk = conf['items_chunk']
        self.poll_interval = conf.get('poll_interval', 5)
        self.handler_class = handler_class

        if not self.bulk:
            self._init_workers(conf)
        else:
            self.workers = 1
        self._init_ic_pool(conf)

        self.log('debug', 'Created the Container Crawler instance')

    def _init_workers(self, conf):
        self.workers = conf.get('workers', 10)
        self.pool = eventlet.GreenPool(self.workers)
        self.work_queue = eventlet.queue.Queue(self.workers * 2)

        # max_size=None means a Queue is infinite
        self.error_queue = eventlet.queue.Queue(maxsize=None)
        self.stats_queue = eventlet.queue.Queue(maxsize=None)
        for _ in xrange(self.workers):
            self.pool.spawn_n(self._worker)

    def _init_ic_pool(self, conf):
        pool_size = self.workers
        self._swift_pool = eventlet.pools.Pool(
            create=lambda: create_internal_client(conf, self.swift_dir),
            min_size=pool_size,
            max_size=pool_size)

    def _worker(self):
        while 1:
            try:
                work = self.work_queue.get()
            except:
                self.log(
                    'error', 'Failed to fetch items from the queue: %s' %
                    traceback.format_exc())
                time.sleep(100)
                continue

            try:
                if work:
                    row, handler = work
                    with self._swift_pool.item() as swift_client:
                        handler.handle(row, swift_client)
            except RetryError:
                self.error_queue.put((row, None))
            except:
                self.error_queue.put((row, traceback.format_exc()))
            finally:
                self.work_queue.task_done()

    def _stop(self):
        for _ in xrange(self.workers):
            self.work_queue.put(None)
        self.pool.waitall()

    def _check_errors(self):
        # When working in bulk, errors are propagated immediately
        if self.bulk or self.error_queue.empty():
            return

        retry_error = False

        while not self.error_queue.empty():
            row, error = self.error_queue.get()
            if error:
                self.log(
                    'error', u'Failed to handle row %s (%s): %r' %
                    (row['ROWID'], row['name'].decode('utf-8'), error))
            else:
                retry_error = True
        if not retry_error:
            raise RuntimeError('Failed to process rows')
        else:
            raise RetryError('Rows must be retried later')

    def log(self, level, message):
        if not self.logger:
            return
        getattr(self.logger, level)(message)

    def get_broker(self, account, container, part, node):
        db_hash = hash_path(account.encode('utf-8'), container.encode('utf-8'))
        db_dir = storage_directory(DATADIR, part, db_hash)
        db_path = os.path.join(self.root, node['device'], db_dir,
                               db_hash + '.db')
        return ContainerBroker(db_path, account=account, container=container)

    def submit_items(self, handler, rows):
        if self.bulk:
            with self._swift_pool.item() as swift_client:
                handler.handle(rows, swift_client)
            return

        for row in rows:
            self.work_queue.put((row, handler))
        self.work_queue.join()

    def process_items(self, handler, rows, nodes_count, node_id):
        owned_rows = filter(lambda row: row['ROWID'] % nodes_count == node_id,
                            rows)
        self.submit_items(handler, owned_rows)

        verified_rows = filter(
            lambda row: row['ROWID'] % nodes_count != node_id, rows)
        if verified_rows:
            self.submit_items(handler, verified_rows)
        self._check_errors()
        return len(owned_rows), len(verified_rows)

    def handle_container(self, handler):
        part, container_nodes = self.container_ring.get_nodes(
            handler._account.encode('utf-8'),
            handler._container.encode('utf-8'))
        nodes_count = len(container_nodes)

        for index, node in enumerate(container_nodes):
            if not is_local_device(self.myips, None, node['ip'], node['port']):
                continue
            broker = self.get_broker(handler._account, handler._container,
                                     part, node)
            broker_info = broker.get_info()
            last_row = handler.get_last_row(broker_info['id'])
            if not last_row:
                last_row = 0
            try:
                items = broker.get_items_since(last_row, self.items_chunk)
            except DatabaseConnectionError:
                continue
            if items:
                self.log(
                    'info', 'Processing %d rows since row %d for %s/%s' %
                    (len(items), last_row, handler._account,
                     handler._container))
                owned_count, verified_count = self.process_items(
                    handler, items, nodes_count, index)
                handler.save_last_row(items[-1]['ROWID'], broker_info['id'])
                self.log(
                    'info',
                    'Processed %d rows; verified %d rows; last row: %d' %
                    (owned_count, verified_count, items[-1]['ROWID']))

    def call_handle_container(self, settings, per_account=False):
        """ Thin wrapper around the handle_container() method for error
            handling.

            Arguments
            settings -- dictionary with settings used for the
            per_account -- whether the whole account is crawled.
        """
        try:
            handler = self.handler_class(self.status_dir,
                                         settings,
                                         per_account=per_account)
            self.handle_container(handler)
        except SkipContainer:
            self.log('info', "Skipping %(account)s/%(container)s" % settings)
        except RetryError:
            pass
        except:
            account = settings['account']
            container = settings['container']
            self.log(
                'error', "Failed to process %s/%s with %s" %
                (account, container, self.handler_class.__name__))
            self.log('error', traceback.format_exc())

    def list_containers(self, account):
        # TODO: we should not have to retrieve all of the containers at once,
        # but it will require allocating a swift_client for this purpose from
        # the pool -- consider doing that at some point. However, as long as
        # there are fewer than a few million containers, getting all of them at
        # once should be cheap, paginating 10000 at a time.
        with self._swift_pool.item() as swift_client:
            return [c['name'] for c in swift_client.iter_containers(account)]

    def run_always(self):
        # Since we don't support reloading, the daemon should quit if there are
        # no containers configured
        if 'containers' not in self.conf or not self.conf['containers']:
            return
        self.log('debug', 'Entering the poll loop')
        while True:
            start = time.time()
            self.run_once()
            elapsed = time.time() - start
            if elapsed < self.poll_interval:
                time.sleep(self.poll_interval - elapsed)

    def run_once(self):
        for container_settings in self.conf['containers']:
            # TODO: perform validation of the settings on startup
            if 'container' not in container_settings:
                self.log(
                    'error',
                    'Container name not specified in settings -- continue')
                continue
            if 'account' not in container_settings:
                self.log('error',
                         'Account not in specified in settings -- continue')
                continue

            if container_settings['container'] == '/*':
                all_containers = self.list_containers(
                    container_settings['account'])
                for container in all_containers:
                    settings_copy = container_settings.copy()
                    settings_copy['container'] = container.decode('utf-8')
                    self.call_handle_container(settings_copy, per_account=True)
                # After iterating over all of the containers, we prune any
                # entries from containers that may have been deleted (so as to
                # avoid missing data). There is still a chance where a
                # container is removed and created between the calls to
                # CloudSync, however there is nothing we can do about that.
                # TODO: keep track of container creation date to detect when
                # they are removed and then added.
                if not os.path.exists(
                        os.path.join(self.status_dir,
                                     container_settings['account'])):
                    continue
                tracked_containers = os.listdir(
                    os.path.join(self.status_dir,
                                 container_settings['account']))
                disappeared = set(tracked_containers) - set(all_containers)
                for container in disappeared:
                    try:
                        os.unlink(
                            os.path.join(self.status_dir,
                                         container_settings['account'],
                                         container))
                    except Exception as e:
                        self.log(
                            'warning',
                            'Failed to remove the status file for %s: %s' %
                            (os.path.join(container_settings['account'],
                                          container), repr(e)))
            else:
                self.call_handle_container(container_settings)
Ejemplo n.º 52
0
    def __init__(self,
                 conf,
                 memcache=None,
                 logger=None,
                 account_ring=None,
                 container_ring=None,
                 object_ring=None):
        if conf is None:
            conf = {}
        if logger is None:
            self.logger = get_logger(conf, log_route='proxy-server')
        else:
            self.logger = logger

        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.node_timeout = int(conf.get('node_timeout', 10))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.client_timeout = int(conf.get('client_timeout', 60))
        self.put_queue_depth = int(conf.get('put_queue_depth', 10))
        self.object_chunk_size = int(conf.get('object_chunk_size', 65536))
        self.client_chunk_size = int(conf.get('client_chunk_size', 65536))
        self.trans_id_suffix = conf.get('trans_id_suffix', '')
        self.error_suppression_interval = \
            int(conf.get('error_suppression_interval', 60))
        self.error_suppression_limit = \
            int(conf.get('error_suppression_limit', 10))
        self.recheck_container_existence = \
            int(conf.get('recheck_container_existence', 60))
        self.recheck_account_existence = \
            int(conf.get('recheck_account_existence', 60))
        self.allow_account_management = \
            config_true_value(conf.get('allow_account_management', 'no'))
        self.object_post_as_copy = \
            config_true_value(conf.get('object_post_as_copy', 'true'))
        self.resellers_conf = ConfigParser()
        self.resellers_conf.read(os.path.join(swift_dir, 'resellers.conf'))
        self.object_ring = object_ring or Ring(swift_dir, ring_name='object')
        self.container_ring = container_ring or Ring(swift_dir,
                                                     ring_name='container')
        self.account_ring = account_ring or Ring(swift_dir,
                                                 ring_name='account')
        self.memcache = memcache
        mimetypes.init(mimetypes.knownfiles +
                       [os.path.join(swift_dir, 'mime.types')])
        self.account_autocreate = \
            config_true_value(conf.get('account_autocreate', 'no'))
        self.expiring_objects_account = \
            (conf.get('auto_create_account_prefix') or '.') + \
            'expiring_objects'
        self.expiring_objects_container_divisor = \
            int(conf.get('expiring_objects_container_divisor') or 86400)
        self.max_containers_per_account = \
            int(conf.get('max_containers_per_account') or 0)
        self.max_containers_whitelist = [
            a.strip()
            for a in conf.get('max_containers_whitelist', '').split(',')
            if a.strip()
        ]
        self.deny_host_headers = [
            host.strip()
            for host in conf.get('deny_host_headers', '').split(',')
            if host.strip()
        ]
        self.rate_limit_after_segment = \
            int(conf.get('rate_limit_after_segment', 10))
        self.rate_limit_segments_per_sec = \
            int(conf.get('rate_limit_segments_per_sec', 1))
        self.log_handoffs = config_true_value(conf.get('log_handoffs', 'true'))
        self.cors_allow_origin = [
            a.strip() for a in conf.get('cors_allow_origin', '').split(',')
            if a.strip()
        ]
        self.node_timings = {}
        self.timing_expiry = int(conf.get('timing_expiry', 300))
        self.sorting_method = conf.get('sorting_method', 'shuffle').lower()
        self.allow_static_large_object = config_true_value(
            conf.get('allow_static_large_object', 'true'))
        value = conf.get('request_node_count', '2 * replicas').lower().split()
        if len(value) == 1:
            value = int(value[0])
            self.request_node_count = lambda r: value
        elif len(value) == 3 and value[1] == '*' and value[2] == 'replicas':
            value = int(value[0])
            self.request_node_count = lambda r: value * r.replica_count
        else:
            raise ValueError('Invalid request_node_count value: %r' %
                             ''.join(value))
Ejemplo n.º 53
0
def print_item_locations(ring, ring_name=None, account=None, container=None,
                         obj=None, **kwargs):
    """
    Display placement information for an item based on ring lookup.

    If a ring is provided it always takes precedence, but warnings will be
    emitted if it doesn't match other optional arguments like the policy_name
    or ring_name.

    If no ring is provided the ring_name and/or policy_name will be used to
    lookup the ring.

    :param ring: a ring instance
    :param ring_name: server type, or storage policy ring name if object ring
    :param account: account name
    :param container: container name
    :param obj: object name
    :param partition: part number for non path lookups
    :param policy_name: name of storage policy to use to lookup the ring
    :param all_nodes: include all handoff nodes. If false, only the N primary
                      nodes and first N handoffs will be printed.
    """

    policy_name = kwargs.get('policy_name', None)
    part = kwargs.get('partition', None)
    all_nodes = kwargs.get('all', False)
    swift_dir = kwargs.get('swift_dir', '/etc/swift')

    if ring and policy_name:
        policy = POLICIES.get_by_name(policy_name)
        if policy:
            if ring_name != policy.ring_name:
                print('Warning: mismatch between ring and policy name!')
        else:
            print('Warning: Policy %s is not valid' % policy_name)

    policy_index = None
    if ring is None and (obj or part):
        if not policy_name:
            print('Need a ring or policy')
            raise InfoSystemExit()
        policy = POLICIES.get_by_name(policy_name)
        if not policy:
            print('No policy named %r' % policy_name)
            raise InfoSystemExit()
        policy_index = int(policy)
        ring = POLICIES.get_object_ring(policy_index, swift_dir)
        ring_name = (POLICIES.get_by_name(policy_name)).ring_name

    if account is None and (container is not None or obj is not None):
        print('No account specified')
        raise InfoSystemExit()

    if container is None and obj is not None:
        print('No container specified')
        raise InfoSystemExit()

    if account is None and part is None:
        print('No target specified')
        raise InfoSystemExit()

    loc = '<type>'
    if part and ring_name:
        if '-' in ring_name and ring_name.startswith('object'):
            loc = 'objects-' + ring_name.split('-', 1)[1]
        else:
            loc = ring_name + 's'
    if account and container and obj:
        loc = 'objects'
        if '-' in ring_name and ring_name.startswith('object'):
            policy_index = int(ring_name.rsplit('-', 1)[1])
            loc = 'objects-%d' % policy_index
    if account and container and not obj:
        loc = 'containers'
        if not any([ring, ring_name]):
            ring = Ring(swift_dir, ring_name='container')
        else:
            if ring_name != 'container':
                print('Warning: account/container specified ' +
                      'but ring not named "container"')
    if account and not container and not obj:
        loc = 'accounts'
        if not any([ring, ring_name]):
            ring = Ring(swift_dir, ring_name='account')
        else:
            if ring_name != 'account':
                print('Warning: account specified ' +
                      'but ring not named "account"')

    print('\nAccount  \t%s' % account)
    print('Container\t%s' % container)
    print('Object   \t%s\n\n' % obj)
    print_ring_locations(ring, loc, account, container, obj, part, all_nodes,
                         policy_index=policy_index)
Ejemplo n.º 54
0
 def get_object_ring(self):
     """ The object :class:`swift.common.ring.Ring` for the cluster. """
     if not self.object_ring:
         self.object_ring = Ring(self.swift_dir, ring_name='object')
     return self.object_ring
Ejemplo n.º 55
0
class ListEndpointsMiddleware(object):
    """
    List endpoints for an object, account or container.

    See above for a full description.

    Uses configuration parameter `swift_dir` (default `/etc/swift`).

    :param app: The next WSGI filter or app in the paste.deploy
                chain.
    :param conf: The configuration dict for the middleware.
    """

    def __init__(self, app, conf):
        self.app = app
        self.logger = get_logger(conf, log_route='endpoints')
        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.account_ring = Ring(swift_dir, ring_name='account')
        self.container_ring = Ring(swift_dir, ring_name='container')
        self.object_ring = Ring(swift_dir, ring_name='object')
        self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/')
        if not self.endpoints_path.endswith('/'):
            self.endpoints_path += '/'

    def __call__(self, env, start_response):
        request = Request(env)

        if not request.path.startswith(self.endpoints_path):
            return self.app(env, start_response)

        if request.method != 'GET':
            return HTTPMethodNotAllowed(
                req=request, headers={"Allow": "GET"})(env, start_response)

        try:
            clean_path = request.path[len(self.endpoints_path) - 1:]
            account, container, obj = \
                split_path(clean_path, 1, 3, True)
        except ValueError:
            return HTTPBadRequest('No account specified')(env, start_response)

        if account is not None:
            account = unquote(account)
        if container is not None:
            container = unquote(container)
        if obj is not None:
            obj = unquote(obj)

        if obj is not None:
            partition, nodes = self.object_ring.get_nodes(
                account, container, obj)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}/{obj}'
        elif container is not None:
            partition, nodes = self.container_ring.get_nodes(
                account, container)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}'
        else:
            partition, nodes = self.account_ring.get_nodes(
                account)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}'

        endpoints = []
        for node in nodes:
            endpoint = endpoint_template.format(
                ip=node['ip'],
                port=node['port'],
                device=node['device'],
                partition=partition,
                account=quote(account),
                container=quote(container or ''),
                obj=quote(obj or ''))
            endpoints.append(endpoint)

        return Response(json.dumps(endpoints),
                        content_type='application/json')(env, start_response)
Ejemplo n.º 56
0
 def get_account_ring(self):
     """The account :class:`swift.common.ring.Ring` for the cluster."""
     if not self.account_ring:
         self.account_ring = Ring(self.swift_dir, ring_name='account')
     return self.account_ring