Ejemplo n.º 1
0
    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.rcache = os.path.join(self.testdir, 'object.recon')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        os.mkdir(os.path.join(self.devices, 'sdb'))

        # policy 0
        self.objects = os.path.join(self.devices, 'sda', get_data_dir(0))
        self.objects_2 = os.path.join(self.devices, 'sdb', get_data_dir(0))
        os.mkdir(self.objects)
        # policy 1
        self.objects_p1 = os.path.join(self.devices, 'sda', get_data_dir(1))
        self.objects_2_p1 = os.path.join(self.devices, 'sdb', get_data_dir(1))
        os.mkdir(self.objects_p1)

        self.parts = self.parts_p1 = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            self.parts_p1[part] = os.path.join(self.objects_p1, part)
            os.mkdir(os.path.join(self.objects, part))
            os.mkdir(os.path.join(self.objects_p1, part))

        self.conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)

        # diskfiles for policy 0, 1
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', 0)
        self.disk_file_p1 = self.df_mgr.get_diskfile('sda', '0', 'a', 'c',
                                                     'o', 1)
Ejemplo n.º 2
0
        def do_test(headers_out, expected):
            # write an async
            dfmanager = DiskFileManager(conf, daemon.logger)
            account, container, obj = 'a', 'c', 'o'
            op = 'PUT'
            data = {'op': op, 'account': account, 'container': container,
                    'obj': obj, 'headers': headers_out}
            dfmanager.pickle_async_update(self.sda1, account, container, obj,
                                          data, next(ts_iter), policies[0])

            request_log = []

            def capture(*args, **kwargs):
                request_log.append((args, kwargs))

            # run once
            fake_status_codes = [
                200,  # object update success
                200,  # object update success
                200,  # object update conflict
            ]
            with mocked_http_conn(*fake_status_codes, give_connect=capture):
                daemon.run_once()
            self.assertEqual(len(fake_status_codes), len(request_log))
            for request_args, request_kwargs in request_log:
                ip, part, method, path, headers, qs, ssl = request_args
                self.assertEqual(method, 'PUT')
                self.assertDictEqual(expected, headers)
            self.assertEqual(
                daemon.logger.get_increment_counts(),
                {'successes': 1, 'unlinks': 1, 'async_pendings': 1})
            self.assertFalse(os.listdir(async_dir))
            daemon.logger.clear()
Ejemplo n.º 3
0
    def test_obj_put_legacy_updates(self):
        ts = (normalize_timestamp(t) for t in itertools.count(int(time())))
        policy = POLICIES.get_by_index(0)
        # setup updater
        conf = {
            'devices': self.devices_dir,
            'mount_check': 'false',
            'swift_dir': self.testdir,
        }
        async_dir = os.path.join(self.sda1, get_async_dir(policy))
        os.mkdir(async_dir)

        account, container, obj = 'a', 'c', 'o'
        # write an async
        for op in ('PUT', 'DELETE'):
            self.logger._clear()
            daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
            dfmanager = DiskFileManager(conf, daemon.logger)
            # don't include storage-policy-index in headers_out pickle
            headers_out = HeaderKeyDict({
                'x-size': 0,
                'x-content-type': 'text/plain',
                'x-etag': 'd41d8cd98f00b204e9800998ecf8427e',
                'x-timestamp': next(ts),
            })
            data = {
                'op': op,
                'account': account,
                'container': container,
                'obj': obj,
                'headers': headers_out
            }
            dfmanager.pickle_async_update(self.sda1, account, container, obj,
                                          data, next(ts), policy)

            request_log = []

            def capture(*args, **kwargs):
                request_log.append((args, kwargs))

            # run once
            fake_status_codes = [200, 200, 200]
            with mocked_http_conn(*fake_status_codes, give_connect=capture):
                daemon.run_once()
            self.assertEqual(len(fake_status_codes), len(request_log))
            for request_args, request_kwargs in request_log:
                ip, part, method, path, headers, qs, ssl = request_args
                self.assertEqual(method, op)
                self.assertEqual(headers['X-Backend-Storage-Policy-Index'],
                                 str(int(policy)))
            self.assertEqual(daemon.logger.get_increment_counts(), {
                'successes': 1,
                'unlinks': 1,
                'async_pendings': 1
            })
Ejemplo n.º 4
0
    def setup(self, conf):
        """
        Implementation specific setup. This method is called at the very end
        by the constructor to allow a specific implementation to modify
        existing attributes or add its own attributes.

        :param conf: WSGI configuration parameter
        """

        # Common on-disk hierarchy shared across account, container and object
        # servers.
        self._diskfile_mgr = DiskFileManager(conf, self.logger)
Ejemplo n.º 5
0
    def test_obj_put_async_updates(self):
        ts = (normalize_timestamp(t) for t in
              itertools.count(int(time())))
        policy = random.choice(list(POLICIES))
        # setup updater
        conf = {
            'devices': self.devices_dir,
            'mount_check': 'false',
            'swift_dir': self.testdir,
        }
        daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
        async_dir = os.path.join(self.sda1, get_async_dir(policy))
        os.mkdir(async_dir)

        # write an async
        dfmanager = DiskFileManager(conf, daemon.logger)
        account, container, obj = 'a', 'c', 'o'
        op = 'PUT'
        headers_out = swob.HeaderKeyDict({
            'x-size': 0,
            'x-content-type': 'text/plain',
            'x-etag': 'd41d8cd98f00b204e9800998ecf8427e',
            'x-timestamp': next(ts),
            'X-Backend-Storage-Policy-Index': int(policy),
        })
        data = {'op': op, 'account': account, 'container': container,
                'obj': obj, 'headers': headers_out}
        dfmanager.pickle_async_update(self.sda1, account, container, obj,
                                      data, next(ts), policy)

        request_log = []

        def capture(*args, **kwargs):
            request_log.append((args, kwargs))

        # run once
        fake_status_codes = [
            200,  # object update success
            200,  # object update success
            200,  # object update conflict
        ]
        with mocked_http_conn(*fake_status_codes, give_connect=capture):
            daemon.run_once()
        self.assertEqual(len(fake_status_codes), len(request_log))
        for request_args, request_kwargs in request_log:
            ip, part, method, path, headers, qs, ssl = request_args
            self.assertEqual(method, 'PUT')
            self.assertEqual(headers['X-Backend-Storage-Policy-Index'],
                             str(int(policy)))
        self.assertEqual(daemon.logger.get_increment_counts(),
                         {'successes': 1, 'unlinks': 1, 'async_pendings': 1})
Ejemplo n.º 6
0
    def __init__(self, conf):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """

        self.conf = conf
        self.logger = get_logger(conf, log_route='object-mover')
        self.devices_dir = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.vm_test_mode = config_true_value(conf.get('vm_test_mode', 'no'))
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.bind_ip = conf.get('bind_ip', '0.0.0.0')
        self.servers_per_port = int(conf.get('servers_per_port', '0') or 0)
        self.port = None if self.servers_per_port else \
            int(conf.get('bind_port', 6000))
        self.concurrency = int(conf.get('concurrency', 1))
        self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))

        self.handoffs_first = config_true_value(
            conf.get('handoffs_first', False))

        self.data_moving_map_dump = (conf.get('data_moving_map_dump')
                                     or DEFAULT_DUMP_FILE)

        self._diskfile_mgr = DiskFileManager(conf, self.logger)

        self.mover_tmp_dir = (conf.get('mover_tmp_dir') or 'data_mover')
        self.retries = int(conf.get('retries', 3))
        self.test = bool(conf.get('test', False))

        self.retrie_list = []
Ejemplo n.º 7
0
 def __init__(self, conf):
     self.conf = conf
     self.logger = get_logger(conf, log_route='object-updater')
     self.devices = conf.get('devices', '/srv/node')
     self.ip = conf.get('md-server-ip', '127.0.0.1')
     self.port = conf.get('md-server-port', '6090')
     self.mount_check = config_true_value(conf.get('mount_check', 'true'))
     self.swift_dir = conf.get('swift_dir', '/etc/swift')
     self.interval = int(conf.get('interval', 30))
     #self.container_ring = None
     #self.concurrency = int(conf.get('concurrency', 1))
     #self.slowdown = float(conf.get('slowdown', 0.01))
     #self.node_timeout = int(conf.get('node_timeout', 10))
     #self.conn_timeout = float(conf.get('conn_timeout', .5))
     self.last_time_ran = 0
     self.diskfile_mgr = DiskFileManager(conf, self.logger)
Ejemplo n.º 8
0
        def do_test(headers_out, expected, container_path=None):
            # write an async
            dfmanager = DiskFileManager(conf, daemon.logger)
            self._write_async_update(dfmanager,
                                     next(ts_iter),
                                     policies[0],
                                     headers=headers_out,
                                     container_path=container_path)
            request_log = []

            def capture(*args, **kwargs):
                request_log.append((args, kwargs))

            # run once
            fake_status_codes = [
                200,  # object update success
                200,  # object update success
                200,  # object update conflict
            ]
            with mocked_http_conn(*fake_status_codes, give_connect=capture):
                daemon.run_once()
            self.assertEqual(len(fake_status_codes), len(request_log))
            for request_args, request_kwargs in request_log:
                ip, part, method, path, headers, qs, ssl = request_args
                self.assertEqual(method, 'PUT')
                self.assertDictEqual(expected, headers)
            self.assertEqual(daemon.logger.get_increment_counts(), {
                'successes': 1,
                'unlinks': 1,
                'async_pendings': 1
            })
            self.assertFalse(os.listdir(async_dir))
            daemon.logger.clear()
Ejemplo n.º 9
0
    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), "tmp_test_object_auditor")
        self.devices = os.path.join(self.testdir, "node")
        self.rcache = os.path.join(self.testdir, "object.recon")
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, "sda"))
        os.mkdir(os.path.join(self.devices, "sdb"))

        # policy 0
        self.objects = os.path.join(self.devices, "sda", get_data_dir(POLICIES[0]))
        self.objects_2 = os.path.join(self.devices, "sdb", get_data_dir(POLICIES[0]))
        os.mkdir(self.objects)
        # policy 1
        self.objects_p1 = os.path.join(self.devices, "sda", get_data_dir(POLICIES[1]))
        self.objects_2_p1 = os.path.join(self.devices, "sdb", get_data_dir(POLICIES[1]))
        os.mkdir(self.objects_p1)

        self.parts = self.parts_p1 = {}
        for part in ["0", "1", "2", "3"]:
            self.parts[part] = os.path.join(self.objects, part)
            self.parts_p1[part] = os.path.join(self.objects_p1, part)
            os.mkdir(os.path.join(self.objects, part))
            os.mkdir(os.path.join(self.objects_p1, part))

        self.conf = dict(devices=self.devices, mount_check="false", object_size_stats="10,100,1024,10240")
        self.df_mgr = DiskFileManager(self.conf, self.logger)

        # diskfiles for policy 0, 1
        self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o", policy=POLICIES[0])
        self.disk_file_p1 = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o", policy=POLICIES[1])
Ejemplo n.º 10
0
    def test_obj_put_async_root_update_redirected_previous_success(self):
        policies = list(POLICIES)
        random.shuffle(policies)
        # setup updater
        conf = {
            'devices': self.devices_dir,
            'mount_check': 'false',
            'swift_dir': self.testdir,
        }
        daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
        async_dir = os.path.join(self.sda1, get_async_dir(policies[0]))
        os.mkdir(async_dir)
        dfmanager = DiskFileManager(conf, daemon.logger)

        ts_obj = next(self.ts_iter)
        self._write_async_update(dfmanager, ts_obj, policies[0])
        orig_async_path, orig_async_data = self._check_async_file(async_dir)

        # run once
        with mocked_http_conn(
                507, 200, 507) as conn:
            with mock.patch('swift.obj.updater.dump_recon_cache'):
                daemon.run_once()

        self._check_update_requests(conn.requests, ts_obj, policies[0])
        self.assertEqual(['/sda1/0/a/c/o'] * 3,
                         [req['path'] for req in conn.requests])
        self.assertEqual(
            {'failures': 1, 'async_pendings': 1},
            daemon.logger.get_increment_counts())
        async_path, async_data = self._check_async_file(async_dir)
        self.assertEqual(dict(orig_async_data, successes=[1]), async_data)

        # run again - expect 3 redirected updates despite previous success
        ts_redirect = next(self.ts_iter)
        resp_headers_1 = {'Location': '/.shards_a/c_shard_1/o',
                          'X-Backend-Redirect-Timestamp': ts_redirect.internal}
        fake_responses = (
            # 1st round of redirects, 2nd round of redirects
            [(301, resp_headers_1)] * 2 + [(200, {})] * 3)
        fake_status_codes, fake_headers = zip(*fake_responses)
        with mocked_http_conn(
                *fake_status_codes, headers=fake_headers) as conn:
            with mock.patch('swift.obj.updater.dump_recon_cache'):
                daemon.run_once()

        self._check_update_requests(conn.requests[:2], ts_obj, policies[0])
        self._check_update_requests(conn.requests[2:], ts_obj, policies[0])
        root_part = daemon.container_ring.get_part('a/c')
        shard_1_part = daemon.container_ring.get_part('.shards_a/c_shard_1')
        self.assertEqual(
            ['/sda1/%s/a/c/o' % root_part] * 2 +
            ['/sda1/%s/.shards_a/c_shard_1/o' % shard_1_part] * 3,
            [req['path'] for req in conn.requests])
        self.assertEqual(
            {'redirects': 1, 'successes': 1, 'failures': 1, 'unlinks': 1,
             'async_pendings': 1},
            daemon.logger.get_increment_counts())
        self.assertFalse(os.listdir(async_dir))  # no async file
Ejemplo n.º 11
0
 def _get_object_info(self, account, container, obj, number):
     obj_conf = self.configs['object-server']
     config_path = obj_conf[number]
     options = utils.readconf(config_path, 'app:object-server')
     swift_dir = options.get('swift_dir', '/etc/swift')
     ring = POLICIES.get_object_ring(int(self.policy), swift_dir)
     part, nodes = ring.get_nodes(account, container, obj)
     for node in nodes:
         # assumes one to one mapping
         if node['port'] == int(options.get('bind_port')):
             device = node['device']
             break
     else:
         return None
     mgr = DiskFileManager(options, get_logger(options))
     disk_file = mgr.get_diskfile(device, part, account, container, obj,
                                  self.policy)
     info = disk_file.read_metadata()
     return info
Ejemplo n.º 12
0
 def _get_object_info(self, account, container, obj, number):
     obj_conf = self.configs['object-server']
     config_path = obj_conf[number]
     options = utils.readconf(config_path, 'app:object-server')
     swift_dir = options.get('swift_dir', '/etc/swift')
     ring = POLICIES.get_object_ring(int(self.policy), swift_dir)
     part, nodes = ring.get_nodes(account, container, obj)
     for node in nodes:
         # assumes one to one mapping
         if node['port'] == int(options.get('bind_port')):
             device = node['device']
             break
     else:
         return None
     mgr = DiskFileManager(options, get_logger(options))
     disk_file = mgr.get_diskfile(device, part, account, container, obj,
                                  self.policy)
     info = disk_file.read_metadata()
     return info
Ejemplo n.º 13
0
    def __init__(self, conf, logger=None):
        # location/directory of the metadata database (meta.db)
        self.location = conf.get('location', '/srv/node/sdb1/metadata/')
        # path the the actual file
        #self.db_file = os.path.join(self.location, 'meta.db')
        self.logger = logger or get_logger(conf, log_route='metadata-server')
        self.root = conf.get('devices', '/srv/node')
        #workaround for device listings
        self.node_count = conf.get('nodecount', '8')
        self.devicelist = []
        for x in range(0, int(self.node_count)):
            self.devicelist.append(conf.get('device' + str(x), ''))
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.node_timeout = int(conf.get('node_timeout', 3))
        self.conn_timeout = float(conf.get('node_timeout', 3))
        replication_server = conf.get('replication_server', None)
        if replication_server is not None:
            replication_server = config_true_value(replication_server)
        self.replication_server = replication_server
        self.allowed_sync_hosts = [
            h.strip()
            for h in conf.get('allowed_sync_hosts', '127.0.0.1').split(',')
            if h.strip()
        ]
        self.replicator_rpc = ReplicatorRpc(self.root,
                                            DATADIR,
                                            MetadataBroker,
                                            self.mount_check,
                                            logger=self.logger)
        self.diskfile_mgr = DiskFileManager(conf, self.logger)

        self.db_ip = conf.get('db_ip', '127.0.0.1')
        self.db_port = int(conf.get('db_port', 2424))
        self.db_user = conf.get('db_user', 'root')
        self.db_pw = conf.get('db_pw', 'root')

        if config_true_value(conf.get('allow_versions', 'f')):
            self.save_headers.append('x-versions-location')

        swift.common.db.DB_PREALLOCATION = config_true_value(
            conf.get('db_preallocation', 'f'))
        self.broker = self._get_metadata_broker()  #
        self.broker.initialize()  #
Ejemplo n.º 14
0
    def test_obj_put_async_root_update_redirected(self):
        policies = list(POLICIES)
        random.shuffle(policies)
        # setup updater
        conf = {
            'devices': self.devices_dir,
            'mount_check': 'false',
            'swift_dir': self.testdir,
        }
        daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
        async_dir = os.path.join(self.sda1, get_async_dir(policies[0]))
        os.mkdir(async_dir)
        dfmanager = DiskFileManager(conf, daemon.logger)

        ts_obj = next(self.ts_iter)
        self._write_async_update(dfmanager, ts_obj, policies[0])

        # run once
        ts_redirect_1 = next(self.ts_iter)
        ts_redirect_2 = next(self.ts_iter)
        fake_responses = [
            # first round of update attempts, newest redirect should be chosen
            (200, {}),
            (301, {
                'Location': '/.shards_a/c_shard_new/o',
                'X-Backend-Redirect-Timestamp': ts_redirect_2.internal
            }),
            (301, {
                'Location': '/.shards_a/c_shard_old/o',
                'X-Backend-Redirect-Timestamp': ts_redirect_1.internal
            }),
            # second round of update attempts
            (200, {}),
            (200, {}),
            (200, {}),
        ]
        fake_status_codes, fake_headers = zip(*fake_responses)
        with mocked_http_conn(*fake_status_codes,
                              headers=fake_headers) as conn:
            with mock.patch('swift.obj.updater.dump_recon_cache'):
                daemon.run_once()

        self._check_update_requests(conn.requests[:3], ts_obj, policies[0])
        self._check_update_requests(conn.requests[3:], ts_obj, policies[0])
        self.assertEqual(['/sda1/0/a/c/o'] * 3 +
                         ['/sda1/0/.shards_a/c_shard_new/o'] * 3,
                         [req['path'] for req in conn.requests])
        self.assertEqual(
            {
                'redirects': 1,
                'successes': 1,
                'unlinks': 1,
                'async_pendings': 1
            }, daemon.logger.get_increment_counts())
        self.assertFalse(os.listdir(async_dir))  # no async file
Ejemplo n.º 15
0
    def test_obj_put_legacy_updates(self):
        ts = (normalize_timestamp(t) for t in itertools.count(int(time())))
        policy = POLICIES.get_by_index(0)
        # setup updater
        conf = {"devices": self.devices_dir, "mount_check": "false", "swift_dir": self.testdir}
        async_dir = os.path.join(self.sda1, get_async_dir(policy.idx))
        os.mkdir(async_dir)

        account, container, obj = "a", "c", "o"
        # write an async
        for op in ("PUT", "DELETE"):
            self.logger._clear()
            daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
            dfmanager = DiskFileManager(conf, daemon.logger)
            # don't include storage-policy-index in headers_out pickle
            headers_out = swob.HeaderKeyDict(
                {
                    "x-size": 0,
                    "x-content-type": "text/plain",
                    "x-etag": "d41d8cd98f00b204e9800998ecf8427e",
                    "x-timestamp": ts.next(),
                }
            )
            data = {"op": op, "account": account, "container": container, "obj": obj, "headers": headers_out}
            dfmanager.pickle_async_update(self.sda1, account, container, obj, data, ts.next(), policy.idx)

            request_log = []

            def capture(*args, **kwargs):
                request_log.append((args, kwargs))

            # run once
            fake_status_codes = [200, 200, 200]
            with mocked_http_conn(*fake_status_codes, give_connect=capture):
                daemon.run_once()
            self.assertEqual(len(fake_status_codes), len(request_log))
            for request_args, request_kwargs in request_log:
                ip, part, method, path, headers, qs, ssl = request_args
                self.assertEqual(method, op)
                self.assertEqual(headers["X-Backend-Storage-Policy-Index"], str(policy.idx))
            self.assertEqual(daemon.logger.get_increment_counts(), {"successes": 1, "unlinks": 1, "async_pendings": 1})
Ejemplo n.º 16
0
 def __init__(self, conf, logger=None):
     """
     :param conf: configuration object obtained from ConfigParser
     :param logger: logging object
     """
     self.conf = conf
     self.logger = logger or get_logger(conf, log_route='object-replicator')
     self.devices_dir = conf.get('devices', '/srv/node')
     self.mount_check = config_true_value(conf.get('mount_check', 'true'))
     self.vm_test_mode = config_true_value(conf.get('vm_test_mode', 'no'))
     self.swift_dir = conf.get('swift_dir', '/etc/swift')
     self.bind_ip = conf.get('bind_ip', '0.0.0.0')
     self.servers_per_port = int(conf.get('servers_per_port', '0') or 0)
     self.port = None if self.servers_per_port else \
         int(conf.get('bind_port', 6000))
     self.concurrency = int(conf.get('concurrency', 1))
     self.stats_interval = int(conf.get('stats_interval', '300'))
     self.ring_check_interval = int(conf.get('ring_check_interval', 15))
     self.next_check = time.time() + self.ring_check_interval
     self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))
     self.partition_times = []
     self.interval = int(
         conf.get('interval') or conf.get('run_pause') or 30)
     self.rsync_timeout = int(conf.get('rsync_timeout', 900))
     self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
     self.rsync_bwlimit = conf.get('rsync_bwlimit', '0')
     self.rsync_compress = config_true_value(
         conf.get('rsync_compress', 'no'))
     self.http_timeout = int(conf.get('http_timeout', 60))
     self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
     self.recon_cache_path = conf.get('recon_cache_path',
                                      '/var/cache/swift')
     self.rcache = os.path.join(self.recon_cache_path, "object.recon")
     self.conn_timeout = float(conf.get('conn_timeout', 0.5))
     self.node_timeout = float(conf.get('node_timeout', 10))
     self.sync_method = getattr(self, conf.get('sync_method') or 'rsync')
     self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
     self.default_headers = {
         'Content-Length': '0',
         'user-agent': 'object-replicator %s' % os.getpid()
     }
     self.rsync_error_log_line_length = \
         int(conf.get('rsync_error_log_line_length', 0))
     self.handoffs_first = config_true_value(
         conf.get('handoffs_first', False))
     self.handoff_delete = config_auto_int_value(
         conf.get('handoff_delete', 'auto'), 0)
     if any((self.handoff_delete, self.handoffs_first)):
         self.logger.warn('Handoff only mode is not intended for normal '
                          'operation, please disable handoffs_first and '
                          'handoff_delete before the next '
                          'normal rebalance')
     self._diskfile_mgr = DiskFileManager(conf, self.logger)
Ejemplo n.º 17
0
    def setup(self, conf):
        """
        Implementation specific setup. This method is called at the very end
        by the constructor to allow a specific implementation to modify
        existing attributes or add its own attributes.

        :param conf: WSGI configuration parameter
        """

        # Common on-disk hierarchy shared across account, container and object
        # servers.
        self._diskfile_mgr = DiskFileManager(conf, self.logger)
Ejemplo n.º 18
0
    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        self.objects = os.path.join(self.devices, 'sda', 'objects')

        os.mkdir(os.path.join(self.devices, 'sdb'))
        self.objects_2 = os.path.join(self.devices, 'sdb', 'objects')

        os.mkdir(self.objects)
        self.parts = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            os.mkdir(os.path.join(self.objects, part))

        self.conf = dict(devices=self.devices,
                         mount_check='false',
                         object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')
Ejemplo n.º 19
0
    def setup(self, conf):
        """
        Implementation specific setup. This method is called at the very end
        by the constructor to allow a specific implementation to modify
        existing attributes or add its own attributes.

        :param conf: WSGI configuration parameter
        """

        # Common on-disk hierarchy shared across account, container and object
        # servers.
        self._diskfile_mgr = DiskFileManager(conf, self.logger)
        # This is populated by global_conf_callback way below as the semaphore
        # is shared by all workers.
        if 'replication_semaphore' in conf:
            # The value was put in a list so it could get past paste
            self.replication_semaphore = conf['replication_semaphore'][0]
        else:
            self.replication_semaphore = None
        self.replication_failure_threshold = int(
            conf.get('replication_failure_threshold') or 100)
        self.replication_failure_ratio = float(
            conf.get('replication_failure_ratio') or 1.0)
Ejemplo n.º 20
0
    def __init__(self, conf, logger=None):
        # location/directory of the metadata database (meta.db)
        self.location = conf.get('location', '/srv/node/sdb1/metadata/')
        # path the the actual file
        #self.db_file = os.path.join(self.location, 'meta.db')
        self.logger = logger or get_logger(conf, log_route='metadata-server')
        self.root = conf.get('devices', '/srv/node')
        #workaround for device listings
        self.node_count = conf.get('nodecount','8')
        self.devicelist = []
        for x in range(0,int(self.node_count)):
            self.devicelist.append(conf.get('device'+str(x),''))
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.node_timeout = int(conf.get('node_timeout', 3))
        self.conn_timeout = float(conf.get('node_timeout', 3))
        replication_server = conf.get('replication_server', None)
        if replication_server is not None:
            replication_server = config_true_value(replication_server)
        self.replication_server = replication_server
        self.allowed_sync_hosts = [
            h.strip()
            for h in conf.get('allowed_sync_hosts', '127.0.0.1').split(',')
            if h.strip()
        ]
        self.replicator_rpc = ReplicatorRpc(
            self.root,
            DATADIR,
            MetadataBroker,
            self.mount_check,
            logger=self.logger
        )
        self.diskfile_mgr = DiskFileManager(conf,self.logger)

        self.db_ip = conf.get('db_ip', '127.0.0.1')
        self.db_port = int(conf.get('db_port', 2424))
        self.db_user = conf.get('db_user', 'root')
        self.db_pw = conf.get('db_pw', 'root')

        if config_true_value(conf.get('allow_versions', 'f')):
            self.save_headers.append('x-versions-location')

        swift.common.db.DB_PREALLOCATION = config_true_value(
            conf.get('db_preallocation', 'f'))
        self.broker = self._get_metadata_broker()#
        self.broker.initialize()#
Ejemplo n.º 21
0
 def __init__(self, conf):
     """
     :param conf: configuration object obtained from ConfigParser
     :param logger: logging object
     """
     self.conf = conf
     self.logger = get_logger(conf, log_route='object-replicator')
     self.devices_dir = conf.get('devices', '/srv/node')
     self.mount_check = config_true_value(conf.get('mount_check', 'true'))
     self.vm_test_mode = config_true_value(conf.get('vm_test_mode', 'no'))
     self.swift_dir = conf.get('swift_dir', '/etc/swift')
     self.port = int(conf.get('bind_port', 6000))
     self.concurrency = int(conf.get('concurrency', 1))
     self.stats_interval = int(conf.get('stats_interval', '300'))
     self.object_ring = Ring(self.swift_dir, ring_name='object')
     self.ring_check_interval = int(conf.get('ring_check_interval', 15))
     self.next_check = time.time() + self.ring_check_interval
     self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))
     self.partition_times = []
     self.run_pause = int(conf.get('run_pause', 30))
     self.rsync_timeout = int(conf.get('rsync_timeout', 900))
     self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
     self.rsync_bwlimit = conf.get('rsync_bwlimit', '0')
     self.http_timeout = int(conf.get('http_timeout', 60))
     self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
     self.recon_cache_path = conf.get('recon_cache_path',
                                      '/var/cache/swift')
     self.rcache = os.path.join(self.recon_cache_path, "object.recon")
     self.conn_timeout = float(conf.get('conn_timeout', 0.5))
     self.node_timeout = float(conf.get('node_timeout', 10))
     self.sync_method = getattr(self, conf.get('sync_method') or 'rsync')
     self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
     self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536))
     self.headers = {
         'Content-Length': '0',
         'user-agent': 'obj-replicator %s' % os.getpid()
     }
     self.rsync_error_log_line_length = \
         int(conf.get('rsync_error_log_line_length', 0))
     self.handoffs_first = config_true_value(
         conf.get('handoffs_first', False))
     self.handoff_delete = config_auto_int_value(
         conf.get('handoff_delete', 'auto'), 0)
     self._diskfile_mgr = DiskFileManager(conf, self.logger)
Ejemplo n.º 22
0
    def _check_obj_put_async_update_bad_redirect_headers(self, headers):
        policies = list(POLICIES)
        random.shuffle(policies)
        # setup updater
        conf = {
            'devices': self.devices_dir,
            'mount_check': 'false',
            'swift_dir': self.testdir,
        }
        daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
        async_dir = os.path.join(self.sda1, get_async_dir(policies[0]))
        os.mkdir(async_dir)
        dfmanager = DiskFileManager(conf, daemon.logger)

        ts_obj = next(self.ts_iter)
        self._write_async_update(dfmanager, ts_obj, policies[0])
        orig_async_path, orig_async_data = self._check_async_file(async_dir)

        fake_responses = [
            (301, headers),
            (301, headers),
            (301, headers),
        ]
        fake_status_codes, fake_headers = zip(*fake_responses)
        with mocked_http_conn(*fake_status_codes,
                              headers=fake_headers) as conn:
            with mock.patch('swift.obj.updater.dump_recon_cache'):
                daemon.run_once()

        self._check_update_requests(conn.requests, ts_obj, policies[0])
        self.assertEqual(['/sda1/0/a/c/o'] * 3,
                         [req['path'] for req in conn.requests])
        self.assertEqual({
            'failures': 1,
            'async_pendings': 1
        }, daemon.logger.get_increment_counts())
        # async file still intact
        async_path, async_data = self._check_async_file(async_dir)
        self.assertEqual(orig_async_path, async_path)
        self.assertEqual(orig_async_data, async_data)
        return daemon
Ejemplo n.º 23
0
    def setup(self, conf):
        """
        Implementation specific setup. This method is called at the very end
        by the constructor to allow a specific implementation to modify
        existing attributes or add its own attributes.

        :param conf: WSGI configuration parameter
        """

        # Common on-disk hierarchy shared across account, container and object
        # servers.
        self._diskfile_mgr = DiskFileManager(conf, self.logger)
        # This is populated by global_conf_callback way below as the semaphore
        # is shared by all workers.
        if "replication_semaphore" in conf:
            # The value was put in a list so it could get past paste
            self.replication_semaphore = conf["replication_semaphore"][0]
        else:
            self.replication_semaphore = None
        self.replication_failure_threshold = int(conf.get("replication_failure_threshold") or 100)
        self.replication_failure_ratio = float(conf.get("replication_failure_ratio") or 1.0)
Ejemplo n.º 24
0
    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        self.objects = os.path.join(self.devices, 'sda', 'objects')

        os.mkdir(os.path.join(self.devices, 'sdb'))
        self.objects_2 = os.path.join(self.devices, 'sdb', 'objects')

        os.mkdir(self.objects)
        self.parts = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            os.mkdir(os.path.join(self.objects, part))

        self.conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')
Ejemplo n.º 25
0
class ObjectController(object):
    """Implements the WSGI application for the Swift Object Server."""

    def __init__(self, conf, logger=None):
        """
        Creates a new WSGI application for the Swift Object Server. An
        example configuration is given at
        <source-dir>/etc/object-server.conf-sample or
        /etc/swift/object-server.conf-sample.
        """
        self.logger = logger or get_logger(conf, log_route='object-server')
        self.node_timeout = int(conf.get('node_timeout', 3))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.client_timeout = int(conf.get('client_timeout', 60))
        self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536))
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.log_requests = config_true_value(conf.get('log_requests', 'true'))
        self.max_upload_time = int(conf.get('max_upload_time', 86400))
        self.slow = int(conf.get('slow', 0))
        self.keep_cache_private = \
            config_true_value(conf.get('keep_cache_private', 'false'))
        replication_server = conf.get('replication_server', None)
        if replication_server is not None:
            replication_server = config_true_value(replication_server)
        self.replication_server = replication_server

        default_allowed_headers = '''
            content-disposition,
            content-encoding,
            x-delete-at,
            x-object-manifest,
            x-static-large-object,
        '''
        extra_allowed_headers = [
            header.strip().lower() for header in conf.get(
                'allowed_headers', default_allowed_headers).split(',')
            if header.strip()
        ]
        self.allowed_headers = set()
        for header in extra_allowed_headers:
            if header not in DATAFILE_SYSTEM_META:
                self.allowed_headers.add(header)
        self.expiring_objects_account = \
            (conf.get('auto_create_account_prefix') or '.') + \
            (conf.get('expiring_objects_account_name') or 'expiring_objects')
        self.expiring_objects_container_divisor = \
            int(conf.get('expiring_objects_container_divisor') or 86400)
        # Initialization was successful, so now apply the network chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because the primary motivation for this is to optimize how data
        # is written back to the proxy server, we could use the value from the
        # disk_chunk_size parameter. However, it affects all created sockets
        # using this class so we have chosen to tie it to the
        # network_chunk_size parameter value instead.
        socket._fileobject.default_bufsize = self.network_chunk_size

        # Provide further setup sepecific to an object server implemenation.
        self.setup(conf)

    def setup(self, conf):
        """
        Implementation specific setup. This method is called at the very end
        by the constructor to allow a specific implementation to modify
        existing attributes or add its own attributes.

        :param conf: WSGI configuration parameter
        """

        # Common on-disk hierarchy shared across account, container and object
        # servers.
        self._diskfile_mgr = DiskFileManager(conf, self.logger)
        # This is populated by global_conf_callback way below as the semaphore
        # is shared by all workers.
        if 'replication_semaphore' in conf:
            # The value was put in a list so it could get past paste
            self.replication_semaphore = conf['replication_semaphore'][0]
        else:
            self.replication_semaphore = None
        self.replication_failure_threshold = int(
            conf.get('replication_failure_threshold') or 100)
        self.replication_failure_ratio = float(
            conf.get('replication_failure_ratio') or 1.0)

    def get_diskfile(self, device, partition, account, container, obj,
                     **kwargs):
        """
        Utility method for instantiating a DiskFile object supporting a given
        REST API.

        An implementation of the object server that wants to use a different
        DiskFile class would simply over-ride this method to provide that
        behavior.
        """
        return self._diskfile_mgr.get_diskfile(
            device, partition, account, container, obj, **kwargs)

    def async_update(self, op, account, container, obj, host, partition,
                     contdevice, headers_out, objdevice):
        """
        Sends or saves an async update.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param host: host that the container is on
        :param partition: partition that the container is on
        :param contdevice: device name that the container is on
        :param headers_out: dictionary of headers to send in the container
                            request
        :param objdevice: device name that the object is in
        """
        headers_out['user-agent'] = 'obj-server %s' % os.getpid()
        full_path = '/%s/%s/%s' % (account, container, obj)
        if all([host, partition, contdevice]):
            try:
                with ConnectionTimeout(self.conn_timeout):
                    ip, port = host.rsplit(':', 1)
                    conn = http_connect(ip, port, contdevice, partition, op,
                                        full_path, headers_out)
                with Timeout(self.node_timeout):
                    response = conn.getresponse()
                    response.read()
                    if is_success(response.status):
                        return
                    else:
                        self.logger.error(_(
                            'ERROR Container update failed '
                            '(saving for async update later): %(status)d '
                            'response from %(ip)s:%(port)s/%(dev)s'),
                            {'status': response.status, 'ip': ip, 'port': port,
                             'dev': contdevice})
            except (Exception, Timeout):
                self.logger.exception(_(
                    'ERROR container update failed with '
                    '%(ip)s:%(port)s/%(dev)s (saving for async update later)'),
                    {'ip': ip, 'port': port, 'dev': contdevice})
        data = {'op': op, 'account': account, 'container': container,
                'obj': obj, 'headers': headers_out}
        timestamp = headers_out['x-timestamp']
        self._diskfile_mgr.pickle_async_update(objdevice, account, container,
                                               obj, data, timestamp)

    def container_update(self, op, account, container, obj, request,
                         headers_out, objdevice):
        """
        Update the container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request object driving the update
        :param headers_out: dictionary of headers to send in the container
                            request(s)
        :param objdevice: device name that the object is in
        """
        headers_in = request.headers
        conthosts = [h.strip() for h in
                     headers_in.get('X-Container-Host', '').split(',')]
        contdevices = [d.strip() for d in
                       headers_in.get('X-Container-Device', '').split(',')]
        contpartition = headers_in.get('X-Container-Partition', '')

        if len(conthosts) != len(contdevices):
            # This shouldn't happen unless there's a bug in the proxy,
            # but if there is, we want to know about it.
            self.logger.error(_('ERROR Container update failed: different  '
                                'numbers of hosts and devices in request: '
                                '"%s" vs "%s"') %
                               (headers_in.get('X-Container-Host', ''),
                                headers_in.get('X-Container-Device', '')))
            return

        if contpartition:
            updates = zip(conthosts, contdevices)
        else:
            updates = []

        headers_out['x-trans-id'] = headers_in.get('x-trans-id', '-')
        headers_out['referer'] = request.as_referer()
        for conthost, contdevice in updates:
            self.async_update(op, account, container, obj, conthost,
                              contpartition, contdevice, headers_out,
                              objdevice)

    def delete_at_update(self, op, delete_at, account, container, obj,
                         request, objdevice):
        """
        Update the expiring objects container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param delete_at: scheduled delete in UNIX seconds, int
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request driving the update
        :param objdevice: device name that the object is in
        """
        if config_true_value(
                request.headers.get('x-backend-replication', 'f')):
            return
        delete_at = normalize_delete_at_timestamp(delete_at)
        updates = [(None, None)]

        partition = None
        hosts = contdevices = [None]
        headers_in = request.headers
        headers_out = HeaderKeyDict({
            'x-timestamp': headers_in['x-timestamp'],
            'x-trans-id': headers_in.get('x-trans-id', '-'),
            'referer': request.as_referer()})
        if op != 'DELETE':
            delete_at_container = headers_in.get('X-Delete-At-Container', None)
            if not delete_at_container:
                self.logger.warning(
                    'X-Delete-At-Container header must be specified for '
                    'expiring objects background %s to work properly. Making '
                    'best guess as to the container name for now.' % op)
                # TODO(gholt): In a future release, change the above warning to
                # a raised exception and remove the guess code below.
                delete_at_container = (
                    int(delete_at) / self.expiring_objects_container_divisor *
                    self.expiring_objects_container_divisor)
            partition = headers_in.get('X-Delete-At-Partition', None)
            hosts = headers_in.get('X-Delete-At-Host', '')
            contdevices = headers_in.get('X-Delete-At-Device', '')
            updates = [upd for upd in
                       zip((h.strip() for h in hosts.split(',')),
                           (c.strip() for c in contdevices.split(',')))
                       if all(upd) and partition]
            if not updates:
                updates = [(None, None)]
            headers_out['x-size'] = '0'
            headers_out['x-content-type'] = 'text/plain'
            headers_out['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e'
        else:
            # DELETEs of old expiration data have no way of knowing what the
            # old X-Delete-At-Container was at the time of the initial setting
            # of the data, so a best guess is made here.
            # Worst case is a DELETE is issued now for something that doesn't
            # exist there and the original data is left where it is, where
            # it will be ignored when the expirer eventually tries to issue the
            # object DELETE later since the X-Delete-At value won't match up.
            delete_at_container = str(
                int(delete_at) / self.expiring_objects_container_divisor *
                self.expiring_objects_container_divisor)
        delete_at_container = normalize_delete_at_timestamp(
            delete_at_container)

        for host, contdevice in updates:
            self.async_update(
                op, self.expiring_objects_account, delete_at_container,
                '%s-%s/%s/%s' % (delete_at, account, container, obj),
                host, partition, contdevice, headers_out, objdevice)

    @public
    @timing_stats()
    def POST(self, request):
        """Handle HTTP POST requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)

        if 'x-timestamp' not in request.headers or \
                not check_float(request.headers['x-timestamp']):
            return HTTPBadRequest(body='Missing timestamp', request=request,
                                  content_type='text/plain')
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body='X-Delete-At in past', request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request)
        orig_timestamp = orig_metadata.get('X-Timestamp', '0')
        if orig_timestamp >= request.headers['x-timestamp']:
            return HTTPConflict(request=request)
        metadata = {'X-Timestamp': request.headers['x-timestamp']}
        metadata.update(val for val in request.headers.iteritems()
                        if is_user_meta('object', val[0]))
        for header_key in self.allowed_headers:
            if header_key in request.headers:
                header_caps = header_key.title()
                metadata[header_caps] = request.headers[header_key]
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update('PUT', new_delete_at, account, container,
                                      obj, request, device)
            if orig_delete_at:
                self.delete_at_update('DELETE', orig_delete_at, account,
                                      container, obj, request, device)
        disk_file.write_metadata(metadata)
        return HTTPAccepted(request=request)

    @public
    @timing_stats()
    def PUT(self, request):
        """Handle HTTP PUT requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)

	my_debug("local variables in PUT()",locals())
	my_debug("request.path", request.path)
        if 'x-timestamp' not in request.headers or \
                not check_float(request.headers['x-timestamp']):
            return HTTPBadRequest(body='Missing timestamp', request=request,
                                  content_type='text/plain')
        error_response = check_object_creation(request, obj)
        if error_response:
            return error_response
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body='X-Delete-At in past', request=request,
                                  content_type='text/plain')
        try:
            fsize = request.message_length()
        except ValueError as e:
            return HTTPBadRequest(body=str(e), request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_metadata = {}

        # Checks for If-None-Match
        if request.if_none_match is not None and orig_metadata:
            if '*' in request.if_none_match:
                # File exists already so return 412
                return HTTPPreconditionFailed(request=request)
            if orig_metadata.get('ETag') in request.if_none_match:
                # The current ETag matches, so return 412
                return HTTPPreconditionFailed(request=request)

        orig_timestamp = orig_metadata.get('X-Timestamp')
        if orig_timestamp and orig_timestamp >= request.headers['x-timestamp']:
            return HTTPConflict(request=request)
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        upload_expiration = time.time() + self.max_upload_time
        etag = md5()
        elapsed_time = 0
        try:
            with disk_file.create(size=fsize) as writer:
                upload_size = 0

                def timeout_reader():
                    with ChunkReadTimeout(self.client_timeout):
                        return request.environ['wsgi.input'].read(
                            self.network_chunk_size)

                try:
                    for chunk in iter(lambda: timeout_reader(), ''):
                        start_time = time.time()
                        if start_time > upload_expiration:
                            self.logger.increment('PUT.timeouts')
                            return HTTPRequestTimeout(request=request)
                        etag.update(chunk)
                        upload_size = writer.write(chunk)
                        elapsed_time += time.time() - start_time
                except ChunkReadTimeout:
                    return HTTPRequestTimeout(request=request)
                if upload_size:
                    self.logger.transfer_rate(
                        'PUT.' + device + '.timing', elapsed_time,
                        upload_size)
                if fsize is not None and fsize != upload_size:
                    return HTTPClientDisconnect(request=request)
                etag = etag.hexdigest()
                if 'etag' in request.headers and \
                        request.headers['etag'].lower() != etag:
                    return HTTPUnprocessableEntity(request=request)
                metadata = {
                    'X-Timestamp': request.headers['x-timestamp'],
                    'Content-Type': request.headers['content-type'],
                    'ETag': etag,
                    'Content-Length': str(upload_size),
                }
                metadata.update(val for val in request.headers.iteritems()
                                if is_user_meta('object', val[0]))
                for header_key in (
                        request.headers.get('X-Backend-Replication-Headers') or
                        self.allowed_headers):
                    if header_key in request.headers:
                        header_caps = header_key.title()
                        metadata[header_caps] = request.headers[header_key]
                writer.put(metadata)
        except DiskFileNoSpace:
            return HTTPInsufficientStorage(drive=device, request=request)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update(
                    'PUT', new_delete_at, account, container, obj,
                    request, device)
            if orig_delete_at:
                self.delete_at_update(
                    'DELETE', orig_delete_at, account, container, obj,
                    request, device)
        self.container_update(
            'PUT', account, container, obj, request,
            HeaderKeyDict({
                'x-size': metadata['Content-Length'],
                'x-content-type': metadata['Content-Type'],
                'x-timestamp': metadata['X-Timestamp'],
                'x-etag': metadata['ETag']}),
            device)
        return HTTPCreated(request=request, etag=etag)


    def tmp_disk_file(self, request=None, device=None, partition=None,container=None, obj=None, data=None, account=None):
    	 try:
	 	disk_file = self.get_diskfile(
		             device, partition, account, container, obj)

		with disk_file.create(size=len(data)) as writer:
		#writer = disk_file.create(size=len(data)) 
		
			my_debug( "diskfile.create", writer)

			writer.write(data)
			etag = md5()
			etag.update(data)
			etag = etag.hexdigest()

			metadata = { 
			    'X-Timestamp': request.headers['X-Timestamp'], 
			    'Content-Type': 'application/json',
			    'Content-Length': len(data),
			    'X-Delete-At':time.time() + 1000,
			    'ETag': etag,
			    }
			writer.put(metadata)
			return disk_file
	 except Exception as e:
	 	my_debug("error creating tmp file", e)
		return None
	
    	 
    @public
    @timing_stats()
    def GET(self, request):

    	__CBAC__ = True

        """Handle HTTP GET requests for the Swift Object Server."""
	my_debug('testing', '@GET()')
	my_debug('#request headers', request.headers)
	#print my_debug
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)
        keep_cache = self.keep_cache_private or (
            'X-Auth-Token' not in request.headers and
            'X-Storage-Token' not in request.headers)

        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)

	my_debug("locals()",locals())
	my_debug("original Diskfile", disk_file)
        try:
            with disk_file.open():
                metadata = disk_file.get_metadata()

		my_debug("metadata", metadata)
                obj_size = int(metadata['Content-Length'])
                file_x_ts = metadata['X-Timestamp']
		#json_policy = metadata['JSON-Policy']
                file_x_ts_flt = float(file_x_ts)
                keep_cache = (self.keep_cache_private or
                              ('X-Auth-Token' not in request.headers and
                               'X-Storage-Token' not in request.headers))

		
		''' if policy file is present & user_label is present apply JSONAC
			Additional thing to have:
				1. Check if policy from metadata is valid json
				2. Check if user_clearance is present is headers. Read keystone info.
				3. Pass JSONPath as headers
				4. 
		
		'''

		if __CBAC__:		
			my_debug("----CBAC Starts here-----", "***********")	
			disk_file_iter =  disk_file.reader(keep_cache=keep_cache)
			original_data = ""

			for chunk in iter(disk_file_iter):
				original_data += chunk

			
			userRoles = request.headers['X-Roles']

			json_policy = metadata['X-Object-Meta-Jsonpolicy'] \
				if metadata.has_key('X-Object-Meta-Jsonpolicy') else None
			user_labels = metadata['X-Object-Meta-Userlabels'] \
				if metadata.has_key('X-Object-Meta-Userlabels') else None
			object_labels = metadata['X-Object-Meta-Objectlabels'] \
				if metadata.has_key('X-Object-Meta-Objectlabels') else None
			object_labelling = metadata['X-Object-Meta-Jsonlabelling'] \
				if metadata.has_key('X-Object-Meta-Jsonlabelling') else None
	
			if json_policy and user_labels and object_labels :				
				cbac_policy = {}
				cbac_policy['user_labels'] = json.loads(user_labels)
				cbac_policy['object_labels'] = json.loads(object_labels)
				cbac_policy['policy'] = json.loads(json_policy)

			#user_clearance = ['manager']
			user_clearance = userRoles
			jsonpath = "/"
			filtered_content = ""

			
			my_debug("json_policy is", json_policy)
			my_debug("original data is ", original_data)
			#try:
			if json_policy :
				if json_policy and user_clearance and jsonpath and object_labelling:
					'''filtered_content = ContentFilter(content_str=original_data,
						labeling_policy_str=object_labelling, 
						user_clearance=user_clearance, query=jsonpath, 
						cbac_policy=cbac_policy).apply()'''
					filtered_content = "testing"
					my_debug("#filtered content is ", filtered_content)
					#else:
					#my_debug("#content_filter not working", True)
					#except Exception as e:
					#else:
					#	my_debug("Exception with content filtering {}".format("e"), True)

					'''end of content -filter '''

					tmp_file = self.tmp_disk_file( request=request, device=device, partition=partition,
							container=container, obj="tmp", data=filtered_content, account=account)

					
					tmp_file = self.get_diskfile(
						      device, partition, account, container, "tmp")
					my_debug("tmp_file is", tmp_file)
				
					try:
						with tmp_file.open():
							tmp_metadata = tmp_file.get_metadata()
							my_debug("with tmp_file.open()", tmp_file.reader(keep_cache=keep_cache))
							response = Response(
								 app_iter=tmp_file.reader(keep_cache=keep_cache),
								 request=request, conditional_response=True)
						
							response.headers['Content-Type'] = tmp_metadata.get(
							    'Content-Type', 'application/octet-stream')
							response.content_length = len(filtered_content)


							for key, value in tmp_metadata.iteritems():
							    if is_user_meta('object', key) or \
								    key.lower() in self.allowed_headers:
								response.headers[key] = value
							response.etag = tmp_metadata['ETag']
							#response.last_modified = math.ceil(file_x_ts_flt)
							#response.content_length = obj_size

							my_debug("get_response", "test")
							resp = request.get_response(response)
							my_debug( "response", resp )
							my_debug("rsponse.__dict__", resp.__dict__)
					except (DiskFileNotExist, DiskFileQuarantined):	
					    my_debug("tmp file is not found", "test")
					    resp = HTTPNotFound(request=request, conditional_response=True)
					my_debug("final resp object", resp) 
					return resp
		
		disk_file.open()
                response = Response(
                    app_iter=disk_file.reader(keep_cache=keep_cache),
		    # instead of app_iter which reads from the file, content is given in body
		    #body="Yes, you are smart!\n",
                    request=request, conditional_response=True)
			
                response.headers['Content-Type'] = metadata.get(
                    'Content-Type', 'application/octet-stream')
                for key, value in metadata.iteritems():
                    if is_user_meta('object', key) or \
                            key.lower() in self.allowed_headers:
                        response.headers[key] = value
                response.etag = metadata['ETag']
                response.last_modified = math.ceil(file_x_ts_flt)
                response.content_length = obj_size
		#response.content_length = response.headers['Content-Length']

                try:
                    response.content_encoding = metadata[
                        'Content-Encoding']
                except KeyError:
                    pass
                response.headers['X-Timestamp'] = file_x_ts
                resp = request.get_response(response)
        except (DiskFileNotExist, DiskFileQuarantined):
            resp = HTTPNotFound(request=request, conditional_response=True)
	my_debug("resp object without cbac", resp.__dict__)
        return resp

    @public
    @timing_stats(sample_rate=0.8)
    def HEAD(self, request):
        """Handle HTTP HEAD requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request, conditional_response=True)
        response = Response(request=request, conditional_response=True)
        response.headers['Content-Type'] = metadata.get(
            'Content-Type', 'application/octet-stream')
        for key, value in metadata.iteritems():
            if is_user_meta('object', key) or \
                    key.lower() in self.allowed_headers:
                response.headers[key] = value
        response.etag = metadata['ETag']
        ts = metadata['X-Timestamp']
        response.last_modified = math.ceil(float(ts))
        # Needed for container sync feature
        response.headers['X-Timestamp'] = ts
        response.content_length = int(metadata['Content-Length'])
        try:
            response.content_encoding = metadata['Content-Encoding']
        except KeyError:
            pass
        return response

    @public
    @timing_stats()
    def DELETE(self, request):
        """Handle HTTP DELETE requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)
        if 'x-timestamp' not in request.headers or \
                not check_float(request.headers['x-timestamp']):
            return HTTPBadRequest(body='Missing timestamp', request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileExpired as e:
            orig_timestamp = e.timestamp
            orig_metadata = e.metadata
            response_class = HTTPNotFound
        except DiskFileDeleted as e:
            orig_timestamp = e.timestamp
            orig_metadata = {}
            response_class = HTTPNotFound
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_timestamp = 0
            orig_metadata = {}
            response_class = HTTPNotFound
        else:
            orig_timestamp = orig_metadata.get('X-Timestamp', 0)
            if orig_timestamp < request.headers['x-timestamp']:
                response_class = HTTPNoContent
            else:
                response_class = HTTPConflict
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        try:
            req_if_delete_at_val = request.headers['x-if-delete-at']
            req_if_delete_at = int(req_if_delete_at_val)
        except KeyError:
            pass
        except ValueError:
            return HTTPBadRequest(
                request=request,
                body='Bad X-If-Delete-At header value')
        else:
            if orig_delete_at != req_if_delete_at:
                return HTTPPreconditionFailed(
                    request=request,
                    body='X-If-Delete-At and X-Delete-At do not match')
        if orig_delete_at:
            self.delete_at_update('DELETE', orig_delete_at, account,
                                  container, obj, request, device)
        req_timestamp = request.headers['X-Timestamp']
        if orig_timestamp < req_timestamp:
            disk_file.delete(req_timestamp)
            self.container_update(
                'DELETE', account, container, obj, request,
                HeaderKeyDict({'x-timestamp': req_timestamp}),
                device)
        return response_class(request=request)

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATE(self, request):
        """
        Handle REPLICATE requests for the Swift Object Server.  This is used
        by the object replicator to get hashes for directories.
        """
        device, partition, suffix = split_and_validate_path(
            request, 2, 3, True)
        try:
            hashes = self._diskfile_mgr.get_hashes(device, partition, suffix)
        except DiskFileDeviceUnavailable:
            resp = HTTPInsufficientStorage(drive=device, request=request)
        else:
            resp = Response(body=pickle.dumps(hashes))
        return resp

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATION(self, request):
        return Response(app_iter=ssync_receiver.Receiver(self, request)())

    def __call__(self, env, start_response):
        """WSGI Application entry point for the Swift Object Server."""
        start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get('x-trans-id', None)

        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL')
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    method = getattr(self, req.method)
                    getattr(method, 'publicly_accessible')
                    replication_method = getattr(method, 'replication', False)
                    if (self.replication_server is not None and
                            self.replication_server != replication_method):
                        raise AttributeError('Not allowed method.')
                except AttributeError:
                    res = HTTPMethodNotAllowed()
                else:
                    res = method(req)
            except DiskFileCollision:
                res = HTTPForbidden(request=req)
            except HTTPException as error_response:
                res = error_response
            except (Exception, Timeout):
                self.logger.exception(_(
                    'ERROR __call__ error with %(method)s'
                    ' %(path)s '), {'method': req.method, 'path': req.path})
                res = HTTPInternalServerError(body=traceback.format_exc())
        trans_time = time.time() - start_time
        if self.log_requests:
            log_line = get_log_line(req, res, trans_time, '')
            if req.method in ('REPLICATE', 'REPLICATION') or \
                    'X-Backend-Replication' in req.headers:
                self.logger.debug(log_line)
            else:
                self.logger.info(log_line)
        if req.method in ('PUT', 'DELETE'):
            slow = self.slow - trans_time
            if slow > 0:
                sleep(slow)
	my_debug('returning from __call__ or objserver', res)
	my_debug('returnign from __call__ of objserver', start_response)
        return res(env, start_response)
Ejemplo n.º 26
0
class TestAuditor(unittest.TestCase):

    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.rcache = os.path.join(self.testdir, 'object.recon')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        self.objects = os.path.join(self.devices, 'sda', 'objects')

        os.mkdir(os.path.join(self.devices, 'sdb'))
        self.objects_2 = os.path.join(self.devices, 'sdb', 'objects')

        os.mkdir(self.objects)
        self.parts = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            os.mkdir(os.path.join(self.objects, part))

        self.conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_object_audit_extra_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            timestamp = str(normalize_timestamp(time.time()))
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

            auditor_worker.object_audit(
                AuditLocation(self.disk_file._datadir, 'sda', '0'))
            self.assertEquals(auditor_worker.quarantines, pre_quarantines)

            os.write(writer._fd, 'extra_data')
            auditor_worker.object_audit(
                AuditLocation(self.disk_file._datadir, 'sda', '0'))
            self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        data = '0' * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update('1' + '0' * 1023)
        etag = etag.hexdigest()
        metadata['ETag'] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        fp = open(path, 'w')
        fp.write('0' * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch.object(DiskFileManager,
                               'get_diskfile_from_audit_location', blowup):
            self.assertRaises(NameError, auditor_worker.object_audit,
                              AuditLocation(os.path.dirname(path), 'sda', '0'))

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch('swift.obj.diskfile.DiskFile', blowup):
            auditor_worker.failsafe_object_audit(
                AuditLocation(os.path.dirname(path), 'sda', '0'))
        self.assertEquals(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        with mock.patch('swift.obj.diskfile.DiskFile', lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 1)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)

        # pick up some additional code coverage, large file
        data = '0' * 1024 * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 1)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)
        self.assertEquals(auditor_worker.stats_buckets['OVER'], 1)

        # pick up even more additional code coverage, misc paths
        auditor_worker.log_time = -1
        auditor_worker.stats_sizes = []
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 1)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)
        self.assertEquals(auditor_worker.stats_buckets['OVER'], 1)

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob')
        data = '1' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            etag = md5()
            etag.update('1' + '0' * 1023)
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.isdir(quarantine_path))
        del(kwargs['zero_byte_fps'])
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, with_ts=False):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        ts_file_path = ''
        if with_ts:
            name_hash = hash_path('a', 'c', 'o')
            dir_path = os.path.join(
                self.devices, 'sda',
                storage_directory(DATADIR, '0', name_hash))
            ts_file_path = os.path.join(dir_path, '99999.ts')
            if not os.path.exists(dir_path):
                mkdirs(dir_path)
            fp = open(ts_file_path, 'w')
            write_metadata(fp, {'X-Timestamp': '99999', 'name': '/a/c/o'})
            fp.close()

        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': 10,
            }
            writer.put(metadata)
            etag = md5()
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)
        return ts_file_path

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):

            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        was_df = auditor.diskfile.DiskFile
        try:
            auditor.diskfile.DiskFile = FakeFile
            kwargs = {'mode': 'once'}
            kwargs['zero_byte_fps'] = 50
            self.auditor.run_audit(**kwargs)
            quarantine_path = os.path.join(self.devices,
                                           'sda', 'quarantined', 'objects')
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])
        finally:
            auditor.diskfile.DiskFile = was_df

    def test_with_tombstone(self):
        ts_file_path = self.setup_bad_zero_byte(with_ts=True)
        self.assertTrue(ts_file_path.endswith('ts'))
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.exists(ts_file_path))

    def test_sleeper(self):
        auditor.SLEEP_BETWEEN_AUDITS = 0.10
        my_auditor = auditor.ObjectAuditor(self.conf)
        start = time.time()
        my_auditor._sleep()
        delta_t = time.time() - start
        self.assert_(delta_t > 0.08)
        self.assert_(delta_t < 0.12)

    def test_run_audit(self):

        class StopForever(Exception):
            pass

        class Bogus(Exception):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            check_device_dir = None
            fork_called = 0
            master = 0
            wait_called = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs
                if 'zero_byte_fps' in kwargs:
                    self.check_device_dir = kwargs.get('device_dirs')

            def mock_sleep(self):
                raise StopForever('stop')

            def mock_audit_loop_error(self, parent, zbo_fps,
                                      override_devices=None, **kwargs):
                raise Bogus('exception')

            def mock_fork(self):
                self.fork_called += 1
                if self.master:
                    return self.fork_called
                else:
                    return 0

            def mock_wait(self):
                self.wait_called += 1
                return (self.wait_called, 0)

        for i in string.ascii_letters[2:26]:
            mkdirs(os.path.join(self.devices, 'sd%s' % i))

        my_auditor = auditor.ObjectAuditor(dict(devices=self.devices,
                                                mount_check='false',
                                                zero_byte_files_per_second=89))
        mocker = ObjectAuditorMock()
        my_auditor.logger.exception = mock.MagicMock()
        real_audit_loop = my_auditor.audit_loop
        my_auditor.audit_loop = mocker.mock_audit_loop_error
        my_auditor.run_audit = mocker.mock_run
        my_auditor._sleep = mocker.mock_sleep
        was_fork = os.fork
        was_wait = os.wait
        os.fork = mocker.mock_fork
        os.wait = mocker.mock_wait
        try:
            my_auditor.run_once(zero_byte_fps=50)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: exception')
            my_auditor.logger.exception.reset_mock()
            self.assertRaises(StopForever, my_auditor.run_forever)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: exception')
            my_auditor.audit_loop = real_audit_loop

            self.assertRaises(StopForever,
                              my_auditor.run_forever, zero_byte_fps=50)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 50)
            self.assertEquals(mocker.fork_called, 0)

            self.assertRaises(SystemExit, my_auditor.run_forever)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEquals(mocker.check_device_dir, None)
            self.assertEquals(mocker.check_args, ())

            device_list = ['sd%s' % i for i in string.ascii_letters[2:10]]
            device_string = ','.join(device_list)
            device_string_bogus = device_string + ',bogus'

            mocker.fork_called = 0
            self.assertRaises(SystemExit, my_auditor.run_once,
                              devices=device_string_bogus)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEquals(sorted(mocker.check_device_dir), device_list)

            mocker.master = 1

            mocker.fork_called = 0
            self.assertRaises(StopForever, my_auditor.run_forever)
            # Fork is called 3 times since the zbf process is forked twice
            self.assertEquals(mocker.fork_called, 3)
            self.assertEquals(mocker.wait_called, 3)

        finally:
            os.fork = was_fork
            os.wait = was_wait
Ejemplo n.º 27
0
class ObjectController(object):
    """Implements the WSGI application for the Swift Object Server."""

    def __init__(self, conf, logger=None):
        """
        Creates a new WSGI application for the Swift Object Server. An
        example configuration is given at
        <source-dir>/etc/object-server.conf-sample or
        /etc/swift/object-server.conf-sample.
        """
        self.logger = logger or get_logger(conf, log_route="object-server")
        self.node_timeout = int(conf.get("node_timeout", 3))
        self.conn_timeout = float(conf.get("conn_timeout", 0.5))
        self.client_timeout = int(conf.get("client_timeout", 60))
        self.disk_chunk_size = int(conf.get("disk_chunk_size", 65536))
        self.network_chunk_size = int(conf.get("network_chunk_size", 65536))
        self.log_requests = config_true_value(conf.get("log_requests", "true"))
        self.max_upload_time = int(conf.get("max_upload_time", 86400))
        self.slow = int(conf.get("slow", 0))
        self.keep_cache_private = config_true_value(conf.get("keep_cache_private", "false"))
        replication_server = conf.get("replication_server", None)
        if replication_server is not None:
            replication_server = config_true_value(replication_server)
        self.replication_server = replication_server

        default_allowed_headers = """
            content-disposition,
            content-encoding,
            x-delete-at,
            x-object-manifest,
            x-static-large-object,
        """
        extra_allowed_headers = [
            header.strip().lower()
            for header in conf.get("allowed_headers", default_allowed_headers).split(",")
            if header.strip()
        ]
        self.allowed_headers = set()
        for header in extra_allowed_headers:
            if header not in DATAFILE_SYSTEM_META:
                self.allowed_headers.add(header)
        self.expiring_objects_account = (conf.get("auto_create_account_prefix") or ".") + "expiring_objects"
        self.expiring_objects_container_divisor = int(conf.get("expiring_objects_container_divisor") or 86400)
        # Initialization was successful, so now apply the network chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because the primary motivation for this is to optimize how data
        # is written back to the proxy server, we could use the value from the
        # disk_chunk_size parameter. However, it affects all created sockets
        # using this class so we have chosen to tie it to the
        # network_chunk_size parameter value instead.
        socket._fileobject.default_bufsize = self.network_chunk_size

        # Provide further setup sepecific to an object server implemenation.
        self.setup(conf)

    def setup(self, conf):
        """
        Implementation specific setup. This method is called at the very end
        by the constructor to allow a specific implementation to modify
        existing attributes or add its own attributes.

        :param conf: WSGI configuration parameter
        """

        # Common on-disk hierarchy shared across account, container and object
        # servers.
        self._diskfile_mgr = DiskFileManager(conf, self.logger)
        # This is populated by global_conf_callback way below as the semaphore
        # is shared by all workers.
        if "replication_semaphore" in conf:
            # The value was put in a list so it could get past paste
            self.replication_semaphore = conf["replication_semaphore"][0]
        else:
            self.replication_semaphore = None
        self.replication_failure_threshold = int(conf.get("replication_failure_threshold") or 100)
        self.replication_failure_ratio = float(conf.get("replication_failure_ratio") or 1.0)

    def get_diskfile(self, device, partition, account, container, obj, **kwargs):
        """
        Utility method for instantiating a DiskFile object supporting a given
        REST API.

        An implementation of the object server that wants to use a different
        DiskFile class would simply over-ride this method to provide that
        behavior.
        """
        return self._diskfile_mgr.get_diskfile(device, partition, account, container, obj, **kwargs)

    def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice):
        """
        Sends or saves an async update.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param host: host that the container is on
        :param partition: partition that the container is on
        :param contdevice: device name that the container is on
        :param headers_out: dictionary of headers to send in the container
                            request
        :param objdevice: device name that the object is in
        """
        headers_out["user-agent"] = "obj-server %s" % os.getpid()
        full_path = "/%s/%s/%s" % (account, container, obj)
        if all([host, partition, contdevice]):
            try:
                with ConnectionTimeout(self.conn_timeout):
                    ip, port = host.rsplit(":", 1)
                    conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out)
                with Timeout(self.node_timeout):
                    response = conn.getresponse()
                    response.read()
                    if is_success(response.status):
                        return
                    else:
                        self.logger.error(
                            _(
                                "ERROR Container update failed "
                                "(saving for async update later): %(status)d "
                                "response from %(ip)s:%(port)s/%(dev)s"
                            ),
                            {"status": response.status, "ip": ip, "port": port, "dev": contdevice},
                        )
            except (Exception, Timeout):
                self.logger.exception(
                    _("ERROR container update failed with " "%(ip)s:%(port)s/%(dev)s (saving for async update later)"),
                    {"ip": ip, "port": port, "dev": contdevice},
                )
        data = {"op": op, "account": account, "container": container, "obj": obj, "headers": headers_out}
        timestamp = headers_out["x-timestamp"]
        self._diskfile_mgr.pickle_async_update(objdevice, account, container, obj, data, timestamp)

    def container_update(self, op, account, container, obj, request, headers_out, objdevice):
        """
        Update the container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request object driving the update
        :param headers_out: dictionary of headers to send in the container
                            request(s)
        :param objdevice: device name that the object is in
        """
        headers_in = request.headers
        conthosts = [h.strip() for h in headers_in.get("X-Container-Host", "").split(",")]
        contdevices = [d.strip() for d in headers_in.get("X-Container-Device", "").split(",")]
        contpartition = headers_in.get("X-Container-Partition", "")

        if len(conthosts) != len(contdevices):
            # This shouldn't happen unless there's a bug in the proxy,
            # but if there is, we want to know about it.
            self.logger.error(
                _(
                    "ERROR Container update failed: different  "
                    "numbers of hosts and devices in request: "
                    '"%s" vs "%s"'
                )
                % (headers_in.get("X-Container-Host", ""), headers_in.get("X-Container-Device", ""))
            )
            return

        if contpartition:
            updates = zip(conthosts, contdevices)
        else:
            updates = []

        headers_out["x-trans-id"] = headers_in.get("x-trans-id", "-")
        headers_out["referer"] = request.as_referer()
        for conthost, contdevice in updates:
            self.async_update(op, account, container, obj, conthost, contpartition, contdevice, headers_out, objdevice)

    def delete_at_update(self, op, delete_at, account, container, obj, request, objdevice):
        """
        Update the expiring objects container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param delete_at: scheduled delete in UNIX seconds, int
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request driving the update
        :param objdevice: device name that the object is in
        """
        if config_true_value(request.headers.get("x-backend-replication", "f")):
            return
        delete_at = normalize_delete_at_timestamp(delete_at)
        updates = [(None, None)]

        partition = None
        hosts = contdevices = [None]
        headers_in = request.headers
        headers_out = HeaderKeyDict(
            {
                "x-timestamp": headers_in["x-timestamp"],
                "x-trans-id": headers_in.get("x-trans-id", "-"),
                "referer": request.as_referer(),
            }
        )
        if op != "DELETE":
            delete_at_container = headers_in.get("X-Delete-At-Container", None)
            if not delete_at_container:
                self.logger.warning(
                    "X-Delete-At-Container header must be specified for "
                    "expiring objects background %s to work properly. Making "
                    "best guess as to the container name for now." % op
                )
                # TODO(gholt): In a future release, change the above warning to
                # a raised exception and remove the guess code below.
                delete_at_container = (
                    int(delete_at) / self.expiring_objects_container_divisor * self.expiring_objects_container_divisor
                )
            partition = headers_in.get("X-Delete-At-Partition", None)
            hosts = headers_in.get("X-Delete-At-Host", "")
            contdevices = headers_in.get("X-Delete-At-Device", "")
            updates = [
                upd
                for upd in zip((h.strip() for h in hosts.split(",")), (c.strip() for c in contdevices.split(",")))
                if all(upd) and partition
            ]
            if not updates:
                updates = [(None, None)]
            headers_out["x-size"] = "0"
            headers_out["x-content-type"] = "text/plain"
            headers_out["x-etag"] = "d41d8cd98f00b204e9800998ecf8427e"
        else:
            # DELETEs of old expiration data have no way of knowing what the
            # old X-Delete-At-Container was at the time of the initial setting
            # of the data, so a best guess is made here.
            # Worst case is a DELETE is issued now for something that doesn't
            # exist there and the original data is left where it is, where
            # it will be ignored when the expirer eventually tries to issue the
            # object DELETE later since the X-Delete-At value won't match up.
            delete_at_container = str(
                int(delete_at) / self.expiring_objects_container_divisor * self.expiring_objects_container_divisor
            )
        delete_at_container = normalize_delete_at_timestamp(delete_at_container)

        for host, contdevice in updates:
            self.async_update(
                op,
                self.expiring_objects_account,
                delete_at_container,
                "%s-%s/%s/%s" % (delete_at, account, container, obj),
                host,
                partition,
                contdevice,
                headers_out,
                objdevice,
            )

    @public
    @timing_stats()
    def POST(self, request):
        """Handle HTTP POST requests for the Swift Object Server."""
        device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True)

        if "x-timestamp" not in request.headers or not check_float(request.headers["x-timestamp"]):
            return HTTPBadRequest(body="Missing timestamp", request=request, content_type="text/plain")
        new_delete_at = int(request.headers.get("X-Delete-At") or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body="X-Delete-At in past", request=request, content_type="text/plain")
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request)
        orig_timestamp = orig_metadata.get("X-Timestamp", "0")
        if orig_timestamp >= request.headers["x-timestamp"]:
            return HTTPConflict(request=request)
        metadata = {"X-Timestamp": request.headers["x-timestamp"]}
        metadata.update(val for val in request.headers.iteritems() if is_user_meta("object", val[0]))
        for header_key in self.allowed_headers:
            if header_key in request.headers:
                header_caps = header_key.title()
                metadata[header_caps] = request.headers[header_key]
        orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update("PUT", new_delete_at, account, container, obj, request, device)
            if orig_delete_at:
                self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device)
        disk_file.write_metadata(metadata)
        return HTTPAccepted(request=request)

    @public
    @timing_stats()
    def PUT(self, request):
        """Handle HTTP PUT requests for the Swift Object Server."""
        device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True)

        if "x-timestamp" not in request.headers or not check_float(request.headers["x-timestamp"]):
            return HTTPBadRequest(body="Missing timestamp", request=request, content_type="text/plain")
        error_response = check_object_creation(request, obj)
        if error_response:
            return error_response
        new_delete_at = int(request.headers.get("X-Delete-At") or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body="X-Delete-At in past", request=request, content_type="text/plain")
        try:
            fsize = request.message_length()
        except ValueError as e:
            return HTTPBadRequest(body=str(e), request=request, content_type="text/plain")
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_metadata = {}
        orig_timestamp = orig_metadata.get("X-Timestamp")
        if orig_timestamp and orig_timestamp >= request.headers["x-timestamp"]:
            return HTTPConflict(request=request)
        orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0)
        upload_expiration = time.time() + self.max_upload_time
        etag = md5()
        elapsed_time = 0
        try:
            with disk_file.create(size=fsize) as writer:
                upload_size = 0
                reader = request.environ["wsgi.input"].read
                for chunk in iter(lambda: reader(self.network_chunk_size), ""):
                    start_time = time.time()
                    if start_time > upload_expiration:
                        self.logger.increment("PUT.timeouts")
                        return HTTPRequestTimeout(request=request)
                    etag.update(chunk)
                    upload_size = writer.write(chunk)
                    elapsed_time += time.time() - start_time
                if upload_size:
                    self.logger.transfer_rate("PUT." + device + ".timing", elapsed_time, upload_size)
                if fsize is not None and fsize != upload_size:
                    return HTTPClientDisconnect(request=request)
                etag = etag.hexdigest()
                if "etag" in request.headers and request.headers["etag"].lower() != etag:
                    return HTTPUnprocessableEntity(request=request)
                metadata = {
                    "X-Timestamp": request.headers["x-timestamp"],
                    "Content-Type": request.headers["content-type"],
                    "ETag": etag,
                    "Content-Length": str(upload_size),
                }
                metadata.update(val for val in request.headers.iteritems() if is_user_meta("object", val[0]))
                for header_key in request.headers.get("X-Backend-Replication-Headers") or self.allowed_headers:
                    if header_key in request.headers:
                        header_caps = header_key.title()
                        metadata[header_caps] = request.headers[header_key]
                writer.put(metadata)
        except DiskFileNoSpace:
            return HTTPInsufficientStorage(drive=device, request=request)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update("PUT", new_delete_at, account, container, obj, request, device)
            if orig_delete_at:
                self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device)
        self.container_update(
            "PUT",
            account,
            container,
            obj,
            request,
            HeaderKeyDict(
                {
                    "x-size": metadata["Content-Length"],
                    "x-content-type": metadata["Content-Type"],
                    "x-timestamp": metadata["X-Timestamp"],
                    "x-etag": metadata["ETag"],
                }
            ),
            device,
        )
        return HTTPCreated(request=request, etag=etag)

    @public
    @timing_stats()
    def GET(self, request):
        """Handle HTTP GET requests for the Swift Object Server."""
        device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True)
        keep_cache = self.keep_cache_private or (
            "X-Auth-Token" not in request.headers and "X-Storage-Token" not in request.headers
        )
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            with disk_file.open():
                metadata = disk_file.get_metadata()
                obj_size = int(metadata["Content-Length"])
                if request.headers.get("if-match") not in (None, "*") and metadata["ETag"] not in request.if_match:
                    return HTTPPreconditionFailed(request=request)
                if request.headers.get("if-none-match") is not None:
                    if metadata["ETag"] in request.if_none_match:
                        resp = HTTPNotModified(request=request)
                        resp.etag = metadata["ETag"]
                        return resp
                file_x_ts = metadata["X-Timestamp"]
                file_x_ts_flt = float(file_x_ts)
                try:
                    if_unmodified_since = request.if_unmodified_since
                except (OverflowError, ValueError):
                    # catches timestamps before the epoch
                    return HTTPPreconditionFailed(request=request)
                file_x_ts_utc = datetime.fromtimestamp(file_x_ts_flt, UTC)
                if if_unmodified_since and file_x_ts_utc > if_unmodified_since:
                    return HTTPPreconditionFailed(request=request)
                try:
                    if_modified_since = request.if_modified_since
                except (OverflowError, ValueError):
                    # catches timestamps before the epoch
                    return HTTPPreconditionFailed(request=request)
                if if_modified_since and file_x_ts_utc <= if_modified_since:
                    return HTTPNotModified(request=request)
                keep_cache = self.keep_cache_private or (
                    "X-Auth-Token" not in request.headers and "X-Storage-Token" not in request.headers
                )
                response = Response(
                    app_iter=disk_file.reader(keep_cache=keep_cache), request=request, conditional_response=True
                )
                response.headers["Content-Type"] = metadata.get("Content-Type", "application/octet-stream")
                for key, value in metadata.iteritems():
                    if is_user_meta("object", key) or key.lower() in self.allowed_headers:
                        response.headers[key] = value
                response.etag = metadata["ETag"]
                response.last_modified = math.ceil(file_x_ts_flt)
                response.content_length = obj_size
                try:
                    response.content_encoding = metadata["Content-Encoding"]
                except KeyError:
                    pass
                response.headers["X-Timestamp"] = file_x_ts
                resp = request.get_response(response)
        except DiskFileNotExist:
            if request.headers.get("if-match") == "*":
                resp = HTTPPreconditionFailed(request=request)
            else:
                resp = HTTPNotFound(request=request)
        except DiskFileQuarantined:
            resp = HTTPNotFound(request=request)
        return resp

    @public
    @timing_stats(sample_rate=0.8)
    def HEAD(self, request):
        """Handle HTTP HEAD requests for the Swift Object Server."""
        device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True)
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request)
        response = Response(request=request, conditional_response=True)
        response.headers["Content-Type"] = metadata.get("Content-Type", "application/octet-stream")
        for key, value in metadata.iteritems():
            if is_user_meta("object", key) or key.lower() in self.allowed_headers:
                response.headers[key] = value
        response.etag = metadata["ETag"]
        ts = metadata["X-Timestamp"]
        response.last_modified = math.ceil(float(ts))
        # Needed for container sync feature
        response.headers["X-Timestamp"] = ts
        response.content_length = int(metadata["Content-Length"])
        try:
            response.content_encoding = metadata["Content-Encoding"]
        except KeyError:
            pass
        return response

    @public
    @timing_stats()
    def DELETE(self, request):
        """Handle HTTP DELETE requests for the Swift Object Server."""
        device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True)
        if "x-timestamp" not in request.headers or not check_float(request.headers["x-timestamp"]):
            return HTTPBadRequest(body="Missing timestamp", request=request, content_type="text/plain")
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileExpired as e:
            orig_timestamp = e.timestamp
            orig_metadata = e.metadata
            response_class = HTTPNotFound
        except DiskFileDeleted as e:
            orig_timestamp = e.timestamp
            orig_metadata = {}
            response_class = HTTPNotFound
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_timestamp = 0
            orig_metadata = {}
            response_class = HTTPNotFound
        else:
            orig_timestamp = orig_metadata.get("X-Timestamp", 0)
            if orig_timestamp < request.headers["x-timestamp"]:
                response_class = HTTPNoContent
            else:
                response_class = HTTPConflict
        orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0)
        try:
            req_if_delete_at_val = request.headers["x-if-delete-at"]
            req_if_delete_at = int(req_if_delete_at_val)
        except KeyError:
            pass
        except ValueError:
            return HTTPBadRequest(request=request, body="Bad X-If-Delete-At header value")
        else:
            if orig_delete_at != req_if_delete_at:
                return HTTPPreconditionFailed(request=request, body="X-If-Delete-At and X-Delete-At do not match")
        if orig_delete_at:
            self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device)
        req_timestamp = request.headers["X-Timestamp"]
        if orig_timestamp < req_timestamp:
            disk_file.delete(req_timestamp)
            self.container_update(
                "DELETE", account, container, obj, request, HeaderKeyDict({"x-timestamp": req_timestamp}), device
            )
        return response_class(request=request)

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATE(self, request):
        """
        Handle REPLICATE requests for the Swift Object Server.  This is used
        by the object replicator to get hashes for directories.
        """
        device, partition, suffix = split_and_validate_path(request, 2, 3, True)
        try:
            hashes = self._diskfile_mgr.get_hashes(device, partition, suffix)
        except DiskFileDeviceUnavailable:
            resp = HTTPInsufficientStorage(drive=device, request=request)
        else:
            resp = Response(body=pickle.dumps(hashes))
        return resp

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATION(self, request):
        return Response(app_iter=ssync_receiver.Receiver(self, request)())

    def __call__(self, env, start_response):
        """WSGI Application entry point for the Swift Object Server."""
        start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get("x-trans-id", None)

        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body="Invalid UTF8 or contains NULL")
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    method = getattr(self, req.method)
                    getattr(method, "publicly_accessible")
                    replication_method = getattr(method, "replication", False)
                    if self.replication_server is not None and self.replication_server != replication_method:
                        raise AttributeError("Not allowed method.")
                except AttributeError:
                    res = HTTPMethodNotAllowed()
                else:
                    res = method(req)
            except DiskFileCollision:
                res = HTTPForbidden(request=req)
            except HTTPException as error_response:
                res = error_response
            except (Exception, Timeout):
                self.logger.exception(
                    _("ERROR __call__ error with %(method)s" " %(path)s "), {"method": req.method, "path": req.path}
                )
                res = HTTPInternalServerError(body=traceback.format_exc())
        trans_time = time.time() - start_time
        if self.log_requests:
            log_line = '%s - - [%s] "%s %s" %s %s "%s" "%s" "%s" %.4f' % (
                req.remote_addr,
                time.strftime("%d/%b/%Y:%H:%M:%S +0000", time.gmtime()),
                req.method,
                req.path,
                res.status.split()[0],
                res.content_length or "-",
                req.referer or "-",
                req.headers.get("x-trans-id", "-"),
                req.user_agent or "-",
                trans_time,
            )
            if req.method in ("REPLICATE", "REPLICATION") or "X-Backend-Replication" in req.headers:
                self.logger.debug(log_line)
            else:
                self.logger.info(log_line)
        if req.method in ("PUT", "DELETE"):
            slow = self.slow - trans_time
            if slow > 0:
                sleep(slow)
        return res(env, start_response)
Ejemplo n.º 28
0
class MetadataController(object):
    """"
    WSGI Controller for metadata server
    """
    save_headers = [
        'x-metadata-read', 'x-metadata-write', 'x-metadata-sync-key',
        'x-metadata-sync-to'
    ]

    def __init__(self, conf, logger=None):
        # location/directory of the metadata database (meta.db)
        self.location = conf.get('location', '/srv/node/sdb1/metadata/')
        # path the the actual file
        #self.db_file = os.path.join(self.location, 'meta.db')
        self.logger = logger or get_logger(conf, log_route='metadata-server')
        self.root = conf.get('devices', '/srv/node')
        #workaround for device listings
        self.node_count = conf.get('nodecount', '8')
        self.devicelist = []
        for x in range(0, int(self.node_count)):
            self.devicelist.append(conf.get('device' + str(x), ''))
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.node_timeout = int(conf.get('node_timeout', 3))
        self.conn_timeout = float(conf.get('node_timeout', 3))
        replication_server = conf.get('replication_server', None)
        if replication_server is not None:
            replication_server = config_true_value(replication_server)
        self.replication_server = replication_server
        self.allowed_sync_hosts = [
            h.strip()
            for h in conf.get('allowed_sync_hosts', '127.0.0.1').split(',')
            if h.strip()
        ]
        self.replicator_rpc = ReplicatorRpc(self.root,
                                            DATADIR,
                                            MetadataBroker,
                                            self.mount_check,
                                            logger=self.logger)
        self.diskfile_mgr = DiskFileManager(conf, self.logger)

        self.db_ip = conf.get('db_ip', '127.0.0.1')
        self.db_port = int(conf.get('db_port', 2424))
        self.db_user = conf.get('db_user', 'root')
        self.db_pw = conf.get('db_pw', 'root')

        if config_true_value(conf.get('allow_versions', 'f')):
            self.save_headers.append('x-versions-location')

        swift.common.db.DB_PREALLOCATION = config_true_value(
            conf.get('db_preallocation', 'f'))
        self.broker = self._get_metadata_broker()  #
        self.broker.initialize()  #

    def _get_metadata_broker(self, **kwargs):
        """
        Returns an instance of the DB abstraction layer object (broker)
        """
        return MetadataBroker(self.db_ip, self.db_port, self.db_user,
                              self.db_pw)

    def check_attrs(self, attrs, acc, con, obj):
        """
        Verify that attributes are valid
        Checks the attr list against a list of system attributes
        Allows for custom metadata.

        returns: boolean wether the attrs are valid
        """
        for attr in attrs.split(','):
            if attr.startswith('object_meta') or \
                    attr.startswith('container_meta') or \
                    attr.startswith('account_meta'):
                pass
            elif attr not in [
                    'object_uri', 'object_name', 'object_account_name',
                    'object_container_name', 'object_location',
                    'object_uri_create_time', 'object_last_modified_time',
                    'object_last_changed_time', 'object_delete_time',
                    'object_last_activity_time', 'object_etag_hash',
                    'object_content_type', 'object_content_length',
                    'object_content_encoding', 'object_content_disposition',
                    'object_content_language', 'object_cache_control',
                    'object_delete_at', 'object_manifest_type',
                    'object_manifest', 'object_access_control_allow_origin',
                    'object_access_control_allow_credentials',
                    'object_access_control_expose_headers',
                    'object_access_control_max_age',
                    'object_access_control_allow_methods',
                    'object_access_control_allow_headers', 'object_origin',
                    'object_access_control_request_method',
                    'object_access_control_request_headers', 'object_meta',
                    'container_uri', 'container_name',
                    'container_account_name', 'container_create_time',
                    'container_last_modified_time',
                    'container_last_changed_time', 'container_delete_time',
                    'container_last_activity_time',
                    'container_read_permissions',
                    'container_write_permissions', 'container_sync_to',
                    'container_sync_key', 'container_versions_location',
                    'container_object_count', 'container_bytes_used',
                    'container_meta', 'account_uri', 'account_name',
                    'account_tenant_id', 'account_first_use_time',
                    'account_last_modified_time', 'account_last_changed_time',
                    'account_delete_time', 'account_last_activity_time',
                    'account_container_count', 'account_object_count',
                    'account_bytes_used', 'account_meta', 'all_attrs',
                    'all_system_attrs', 'all_meta_attrs', 'all_account_attrs'
                    'all_account_system_attrs', 'all_account_meta_attrs',
                    'all_container_attrs', 'all_container_system_attrs'
                    'all_container_meta_attrs', 'all_object_attrs',
                    'all_object_system_attrs'
                    'all_object_meta_attrs'
            ]:
                return False
        return True

    @public
    @timing_stats()
    def GET(self, req):
        """
        Handle HTTP GET requests
        Build SQL queries piece by piece and then execute
        Custom attributes need to be handled specially, since they exist
        in a seperate table
        """
        #broker = self._get_metadata_broker()
        #broker.initialize()

        base_version, acc, con, obj = split_path(req.path, 1, 4, True)
        if 'sorted' in req.headers:
            sort_value_list = req.headers['sorted']
            if sort_value_list == '':
                sort_value_list = 'uri'
            toSort = True
        else:
            toSort = False
        if 'attributes' in req.headers:
            attrs = req.headers['attributes']
        # if there is no attributes lists, include everything in scope
        # since no attributes passed in, there can be
        #  things from multiple levels of scope
        else:
            attrs = "object_uri,container_uri,account_uri"

        attrs, all_obj_meta, all_con_meta, all_acc_meta = \
            eval_superset(attrs.split(","))
        format = "text/plain"
        if self.check_attrs(attrs, acc, con, obj) or attrs == '':
            accAttrs, conAttrs, objAttrs, superAttrs, customAttrs = \
                split_attrs_by_scope(attrs)
            """
            If we have a thing from which we don't request any sys attrs
            Then we need to add its uri so that it appears in the list
            returned from the query. After we query for custom attrs,
            we need to delete any thing that is empty.
            """
            if all_obj_meta and objAttrs == "":
                objAttrs = "object_uri"
            if all_con_meta and conAttrs == "":
                conAttrs = "container_uri"
            if all_acc_meta and accAttrs == "":
                accAttrs = "account_uri"

            # Builds initial query containing the
            # split attributes for each item type
            accQuery = self.broker.get_attributes_query(
                acc, con, obj, accAttrs)
            conQuery = self.broker.get_attributes_query(
                acc, con, obj, conAttrs)
            objQuery = self.broker.get_attributes_query(
                acc, con, obj, objAttrs)

            # If there is a query in the request add it to the end
            # of the WHERE clause of the SQL
            if 'query' in req.headers:
                query = req.headers['query']
                accQuery = self.broker.get_uri_query(accQuery, query)
                conQuery = self.broker.get_uri_query(conQuery, query)
                objQuery = self.broker.get_uri_query(objQuery, query)

            # if successful query add the results to the end of the
            # accumulator list
            ret = []
            if not accQuery.startswith("BAD"):
                ret.extend(
                    self.broker.execute_query(
                        accQuery, acc, con, obj, 'account_uri'
                        in attrs.split(',')))
            if not conQuery.startswith("BAD"):
                ret.extend(
                    self.broker.execute_query(
                        conQuery, acc, con, obj, 'container_uri'
                        in attrs.split(',')))
            if not objQuery.startswith("BAD"):
                ret.extend(
                    self.broker.execute_query(objQuery, acc, con, obj,
                                              'object_uri'
                                              in attrs.split(',')))

            # query the custom table
            ret = self.broker.custom_attributes_query(customAttrs, ret,
                                                      all_obj_meta,
                                                      all_con_meta,
                                                      all_acc_meta)
            """
            Do the deletion thing mentioned above
            """
            ret = [x for x in ret if x[x.keys()[0]] != {}]

            if toSort:
                sorter = Sort_metadata()
                ret = sorter.sort_data(ret, sort_value_list.split(","))

            # default format is plain text
            # can choose between json/xml as well
            # no error handling done right now
            # just default everything to plain if spelling error
            if "format" in req.headers:
                if req.headers['format'] == "json":
                    format = "application/json"
                    out = output_json(ret)
                elif req.headers['format'] == "xml":
                    format = "application/xml"
                    out = output_xml(ret)
                else:
                    out = output_plain(ret)
            else:
                out = output_plain(ret)
            status = 200

        else:
            out = "One or more attributes not supported"
            status = 400
            format = "text/plain"

        # Returns the HTTP Response object with the result of the API request
        return Response(request=req,
                        body=out + "\n",
                        content_type=format,
                        status=status)

    @public
    #@timing_stats()
    #TODO: reorganize code to generalize repeated calls
    #TODO: abstract data/object type names for generic calls
    def PUT(self, req):
        version, acc, con, obj = split_path(req.path, 1, 4, True)
        stor_policy = req.headers['storage_policy']
        ring = POLICIES.get_object_ring(stor_policy, '/etc/swift')
        #broker = self._get_metadata_broker()
        #broker.initialize()

        #Handle Container PUT
        if not obj:
            hsh = hash_path(acc, con)
            part = ring.get_part(acc, con)
            db_dir = storage_directory(swift.container.backend.DATADIR, part,
                                       hsh)
            nodes = ring.get_part_nodes(part)
            for node in nodes:
                for item in self.devicelist:
                    if node['device'] in item:
                        try:
                            path = os.path.join(self.root + item, db_dir,
                                                hsh + '.db')
                            #TODO: move kwargs
                            kwargs = {
                                'account': acc,
                                'container': con,
                                'logger': self.logger
                            }
                            md_broker = swift.container.backend.ContainerBroker(
                                path, **kwargs)
                            md = md_broker.get_info()
                            md.update(
                                (key, value)
                                for key, (value, timestamp
                                          ) in md_broker.metadata.iteritems()
                                if value != ''
                                and is_sys_or_user_meta('container', key))
                            sys_md = format_con_metadata(md)
                            user_md = format_custom_metadata(md)
                            if 'X-Container-Read' in req.headers:
                                sys_md[
                                    'container_read_permissions'] = req.headers[
                                        'X-Container-Read']
                            if 'X-Container-Write' in req.headers:
                                sys_md[
                                    'container_write_permissions'] = req.headers[
                                        'X-Container-Write']
                            #TODO: insert container_last_activity_time
                            #TODO: split meta user/sys
                            #TODO: insert meta
                            self.broker.insert_container_md([sys_md])
                            return
                        except DatabaseConnectionError as e:
                            self.logger.warn("DatabaseConnectionError: " +
                                             e.path + "\n")
                            pass
                        except:
                            self.logger.warn("%s: %s\n" % (str(
                                sys.exc_info()[0]), str(sys.exc_info()[1])))
                            pass
        #handle object PUT
        else:
            part = ring.get_part(acc, con, obj)
            nodes = ring.get_part_nodes(part)
            for node in nodes:
                for item in self.devicelist:
                    if node['device'] in item:
                        try:
                            df = self.diskfile_mgr.get_diskfile(
                                item, part, acc, con, obj, stor_policy)
                            md = df.read_metadata()
                            sys_md = format_obj_metadata(md)
                            #df._data_file is a direct path to the objects data
                            sys_md['object_location'] = df._data_file
                            user_md = format_custom_metadata(md)
                            #TODO: insert user meta and sys meta
                            self.broker.insert_object_md([sys_md])
                        except:
                            self.logger.warn("%s: %s\n" % (str(
                                sys.exc_info()[0]), str(sys.exc_info()[1])))
                            pass
        return

    @public
    @timing_stats()
    def DELETE(self, req):
        version, acc, con, obj = split_path(req.path, 1, 4, True)
        timestamp = Timestamp(time.time()).isoformat()
        data_type = ''
        md = {}
        if not con and not obj:
            #do nothing. accounts cannot be deleted
            return
        elif not obj:
            md = format_con_metadata(md)
            md['container_delete_time'] = timestamp
            md['container_last_activity_time'] = timestamp
            data_type = 'container'
            for item in \
                (data_type + '_uri', data_type + '_name'):
                if item in md:
                    del md[item]
            #TODO: overwrite container metadata
            #TODO: delete container custom metadata
        else:
            md = format_obj_metadata(md)
            md['object_delete_time'] = timestamp
            md['object_last_activity_time'] = timestamp
            data_type = 'object'
            for item in \
                (data_type + '_uri', data_type + '_name'):
                if item in md:
                    del md[item]
            #TODO: overwrite object metadata
            #TODO: delete object user meta
        return

    #TODO: generalize strings used for repeat calls
    @public
    #@timing_stats()
    def POST(self, req):
        version, acc, con, obj = split_path(req.path, 1, 4, True)
        stor_policy = req.headers['storage_policy']
        ring = POLICIES.get_object_ring(stor_policy, '/etc/swift')
        if not con and not obj:
            meta_type = 'account'
            kwargs = {'account': acc, 'logger': self.logger}
            data_dir = swift.account.backend.DATADIR
            hsh = hash_path(acc)
            part = ring.get_part(acc)
            db_dir = storage_directory(data_dir, part, hsh)
            nodes = ring.get_part_nodes(part)
            for node in nodes:
                for item in self.devicelist:
                    if node['device'] in item:
                        try:
                            path = os.path.join(self.root + item, db_dir,
                                                hsh + '.db')
                            broker = swift.account.backend.AccountBroker(
                                path, **kwargs)
                            md = broker.get_info()
                            md.update((key, value) for key, (
                                value,
                                timestamp) in broker.metadata.iteritems()
                                      if value != ''
                                      and is_sys_or_user_meta(meta_type, key))
                            sys_md = format_acc_metadata(md)
                            user_md = format_custom_metadata(md)
                            #TODO: call overwrite_account_metadata
                            #TODO: call overwrite_custom_metadata
                            return
                        except:
                            self.logger.warn("%s: %s\n" % (str(
                                sys.exc_info()[0]), str(sys.exc_info()[1])))
                            pass
        #Handle Container POST
        elif not obj:
            meta_type = 'container'
            kwargs = {'account': acc, 'container': con, 'logger': self.logger}
            data_dir = swift.container.backend.DATADIR
            try:
                hsh = hash_path(acc, con)
                part = ring.get_part(acc, con)
                db_dir = storage_directory(data_dir, part, hsh)
                nodes = ring.get_part_nodes(part)
                for node in nodes:
                    for item in self.devicelist:
                        if node['device'] in item:
                            try:
                                path = os.path.join(self.root + item, db_dir,
                                                    hsh + '.db')
                                broker = swift.container.backend.ContainerBroker(
                                    path, **kwargs)
                                md = broker.get_info()
                                md.update(
                                    (key, value)
                                    for key, (value, timestamp
                                              ) in broker.metadata.iteritems()
                                    if value != ''
                                    and is_sys_or_user_meta('container', key))
                                sys_md = format_con_metadata(md)
                                user_md = format_custom_metadata(md)
                                if 'X-Container-Read' in req.headers:
                                    sys_md[
                                        'container_read_permissions'] = req.headers[
                                            'X-Container-Read']
                                    if 'X-Container-Write' in req.headers:
                                        sys_md[
                                            'container_write_permissions'] = req.headers[
                                                'X-Container-Write']
                                        #TODO: call overwrite_container_metadata
                                        #TODO: call overwrite_custom_metadata
                                        return
                            except DatabaseConnectionError as e:
                                self.logger.warn("DatabaseConnectionError: " +
                                                 e.path + "\n")
                                pass
            except:
                self.logger.warn(
                    "%s: %s\n" %
                    (str(sys.exc_info()[0]), str(sys.exc_info()[1])))
                pass
        else:
            part = ring.get_part(acc, con, obj)
            nodes = ring.get_part_nodes(part)
            for node in nodes:
                for item in self.devicelist:
                    if node['device'] in item:
                        try:
                            df = self.diskfile_mgr.get_diskfile(
                                item, part, acc, con, obj, stor_policy)
                            md = df.read_metadata()
                            sys_md = format_obj_metadata(md)
                            user_md = format_custom_metadata(md)
                            #TODO: call overwrite_object_metadata
                            #TODO: call overwrite_custom_metadata
                        except:
                            self.logger.warn("%s: %s\n" % (str(
                                sys.exc_info()[0]), str(sys.exc_info()[1])))
                            pass
        return

    @public
    @timing_stats()
    def COPY(self, req):
        version, acc, con, obj = split_path(req.path, 1, 4, True)

    def __call__(self, env, start_response):
        """
        Boilerplate code for how the server's code gets called
        upon receiving a request.
        Taken directly from other servers.
        """
        # start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get('x-trans-id', None)
        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL')
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    method = getattr(self, req.method)
                    getattr(method, 'publicly_accessible')
                    replication_method = getattr(method, 'replication', False)
                    if (self.replication_server is not None
                            and self.replication_server != replication_method):
                        raise AttributeError('Not allowed method.')
                except AttributeError:
                    res = HTTPMethodNotAllowed()
                else:
                    res = method(req)
            except HTTPException as error_response:
                res = error_response
            except (Exception, Timeout):
                self.logger.exception(
                    _('ERROR __call__ error with %(method)s %(path)s '), {
                        'method': req.method,
                        'path': req.path
                    })
                res = HTTPInternalServerError(body=traceback.format_exc())
        # trans_time = '%.4f' % (time.time() - start_time)
        # if self.log_requests:
        #     log_message = '%s - - [%s] "%s %s" %s %s "%s" "%s" "%s" %s' % (
        #         req.remote_addr,
        #         time.strftime('%d/%b/%Y:%H:%M:%S +0000',
        #                       time.gmtime()),
        #         req.method, req.path,
        #         res.status.split()[0], res.content_length or '-',
        #         req.headers.get('x-trans-id', '-'),
        #         req.referer or '-', req.user_agent or '-',
        #         trans_time)
        #     if req.method.upper() == 'REPLICATE':
        #         self.logger.debug(log_message)
        #     else:
        #         self.logger.info(log_message)
        return res(env, start_response)
Ejemplo n.º 29
0
class TestAuditor(unittest.TestCase):
    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), "tmp_test_object_auditor")
        self.devices = os.path.join(self.testdir, "node")
        self.rcache = os.path.join(self.testdir, "object.recon")
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, "sda"))
        os.mkdir(os.path.join(self.devices, "sdb"))

        # policy 0
        self.objects = os.path.join(self.devices, "sda", get_data_dir(POLICIES[0]))
        self.objects_2 = os.path.join(self.devices, "sdb", get_data_dir(POLICIES[0]))
        os.mkdir(self.objects)
        # policy 1
        self.objects_p1 = os.path.join(self.devices, "sda", get_data_dir(POLICIES[1]))
        self.objects_2_p1 = os.path.join(self.devices, "sdb", get_data_dir(POLICIES[1]))
        os.mkdir(self.objects_p1)

        self.parts = self.parts_p1 = {}
        for part in ["0", "1", "2", "3"]:
            self.parts[part] = os.path.join(self.objects, part)
            self.parts_p1[part] = os.path.join(self.objects_p1, part)
            os.mkdir(os.path.join(self.objects, part))
            os.mkdir(os.path.join(self.objects_p1, part))

        self.conf = dict(devices=self.devices, mount_check="false", object_size_stats="10,100,1024,10240")
        self.df_mgr = DiskFileManager(self.conf, self.logger)

        # diskfiles for policy 0, 1
        self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o", policy=POLICIES[0])
        self.disk_file_p1 = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o", policy=POLICIES[1])

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_worker_conf_parms(self):
        def check_common_defaults():
            self.assertEquals(auditor_worker.max_bytes_per_second, 10000000)
            self.assertEquals(auditor_worker.log_time, 3600)

        # test default values
        conf = dict(devices=self.devices, mount_check="false", object_size_stats="10,100,1024,10240")
        auditor_worker = auditor.AuditorWorker(conf, self.logger, self.rcache, self.devices)
        check_common_defaults()
        self.assertEquals(auditor_worker.diskfile_mgr.disk_chunk_size, 65536)
        self.assertEquals(auditor_worker.max_files_per_second, 20)
        self.assertEquals(auditor_worker.zero_byte_only_at_fps, 0)

        # test specified audit value overrides
        conf.update({"disk_chunk_size": 4096})
        auditor_worker = auditor.AuditorWorker(conf, self.logger, self.rcache, self.devices, zero_byte_only_at_fps=50)
        check_common_defaults()
        self.assertEquals(auditor_worker.diskfile_mgr.disk_chunk_size, 4096)
        self.assertEquals(auditor_worker.max_files_per_second, 50)
        self.assertEquals(auditor_worker.zero_byte_only_at_fps, 50)

    def test_object_audit_extra_data(self):
        def run_tests(disk_file):
            auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
            data = "0" * 1024
            etag = md5()
            with disk_file.create() as writer:
                writer.write(data)
                etag.update(data)
                etag = etag.hexdigest()
                timestamp = str(normalize_timestamp(time.time()))
                metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
                writer.put(metadata)
                pre_quarantines = auditor_worker.quarantines

                auditor_worker.object_audit(AuditLocation(disk_file._datadir, "sda", "0", policy=POLICIES.legacy))
                self.assertEquals(auditor_worker.quarantines, pre_quarantines)

                os.write(writer._fd, "extra_data")

                auditor_worker.object_audit(AuditLocation(disk_file._datadir, "sda", "0", policy=POLICIES.legacy))
                self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

        run_tests(self.disk_file)
        run_tests(self.disk_file_p1)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        data = "0" * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o", policy=POLICIES.legacy)

        auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0", policy=POLICIES.legacy))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update("1" + "0" * 1023)
        etag = etag.hexdigest()
        metadata["ETag"] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)

        auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0", policy=POLICIES.legacy))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + ".data")
        mkdirs(self.disk_file._datadir)
        fp = open(path, "w")
        fp.write("0" * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0", policy=POLICIES.legacy))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + ".data")
        mkdirs(self.disk_file._datadir)
        with open(path, "w") as f:
            write_metadata(f, {"name": "/a/c/o"})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)

        def blowup(*args):
            raise NameError("tpyo")

        with mock.patch.object(DiskFileManager, "get_diskfile_from_audit_location", blowup):
            self.assertRaises(
                NameError,
                auditor_worker.object_audit,
                AuditLocation(os.path.dirname(path), "sda", "0", policy=POLICIES.legacy),
            )

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + ".data")
        mkdirs(self.disk_file._datadir)
        with open(path, "w") as f:
            write_metadata(f, {"name": "/a/c/o"})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)

        def blowup(*args):
            raise NameError("tpyo")

        with mock.patch("swift.obj.diskfile.DiskFileManager.diskfile_cls", blowup):
            auditor_worker.failsafe_object_audit(
                AuditLocation(os.path.dirname(path), "sda", "0", policy=POLICIES.legacy)
            )
        self.assertEquals(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = "0" * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
        with mock.patch("swift.obj.diskfile.DiskFileManager.diskfile_cls", lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = "0" * 1024

        def write_file(df):
            etag = md5()
            with df.create() as writer:
                writer.write(data)
                etag.update(data)
                etag = etag.hexdigest()
                metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
                writer.put(metadata)

        # policy 0
        write_file(self.disk_file)
        # policy 1
        write_file(self.disk_file_p1)

        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        # 1 object per policy falls into 1024 bucket
        self.assertEquals(auditor_worker.stats_buckets[1024], 2)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)

        # pick up some additional code coverage, large file
        data = "0" * 1024 * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
        auditor_worker.audit_all_objects(device_dirs=["sda", "sdb"])
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        # still have the 1024 byte object left in policy-1 (plus the
        # stats from the original 2)
        self.assertEquals(auditor_worker.stats_buckets[1024], 3)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)
        # and then policy-0 disk_file was re-written as a larger object
        self.assertEquals(auditor_worker.stats_buckets["OVER"], 1)

        # pick up even more additional code coverage, misc paths
        auditor_worker.log_time = -1
        auditor_worker.stats_sizes = []
        auditor_worker.audit_all_objects(device_dirs=["sda", "sdb"])
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 3)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)
        self.assertEquals(auditor_worker.stats_buckets["OVER"], 1)

    def test_object_run_logging(self):
        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger, self.rcache, self.devices)
        auditor_worker.audit_all_objects(device_dirs=["sda"])
        log_lines = logger.get_lines_for_level("info")
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index("ALL - parallel, sda"))

        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger, self.rcache, self.devices, zero_byte_only_at_fps=50)
        auditor_worker.audit_all_objects(device_dirs=["sda"])
        log_lines = logger.get_lines_for_level("info")
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index("ZBF - sda"))

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        data = "0" * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
            os.write(writer._fd, "extra_data")
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = "0" * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "ob", policy=POLICIES.legacy)
        data = "1" * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
            os.write(writer._fd, "extra_data")
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = "0" * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                "ETag": etag,
                "X-Timestamp": str(normalize_timestamp(time.time())),
                "Content-Length": str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            etag = md5()
            etag.update("1" + "0" * 1023)
            etag = etag.hexdigest()
            metadata["ETag"] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects")
        kwargs = {"mode": "once"}
        kwargs["zero_byte_fps"] = 50
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.isdir(quarantine_path))
        del (kwargs["zero_byte_fps"])
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, timestamp=None):
        if timestamp is None:
            timestamp = Timestamp(time.time())
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp.internal, "Content-Length": 10}
            writer.put(metadata)
            etag = md5()
            etag = etag.hexdigest()
            metadata["ETag"] = etag
            write_metadata(writer._fd, metadata)

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        kwargs = {"mode": "once"}
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects")
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        kwargs = {"mode": "once"}
        kwargs["zero_byte_fps"] = 50
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects")
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):
            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        with mock.patch("swift.obj.diskfile.DiskFileManager.diskfile_cls", FakeFile):
            kwargs = {"mode": "once"}
            kwargs["zero_byte_fps"] = 50
            self.auditor.run_audit(**kwargs)
            quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects")
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])

    @mock.patch.object(auditor.ObjectAuditor, "run_audit")
    @mock.patch("os.fork", return_value=0)
    def test_with_inaccessible_object_location(self, mock_os_fork, mock_run_audit):
        # Need to ensure that any failures in run_audit do
        # not prevent sys.exit() from running.  Otherwise we get
        # zombie processes.
        e = OSError("permission denied")
        mock_run_audit.side_effect = e
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.assertRaises(SystemExit, self.auditor.fork_child, self)

    def test_with_only_tombstone(self):
        # sanity check that auditor doesn't touch solitary tombstones
        ts_iter = make_timestamp_iter()
        self.setup_bad_zero_byte(timestamp=ts_iter.next())
        self.disk_file.delete(ts_iter.next())
        files = os.listdir(self.disk_file._datadir)
        self.assertEqual(1, len(files))
        self.assertTrue(files[0].endswith("ts"))
        kwargs = {"mode": "once"}
        self.auditor.run_audit(**kwargs)
        files_after = os.listdir(self.disk_file._datadir)
        self.assertEqual(files, files_after)

    def test_with_tombstone_and_data(self):
        # rsync replication could leave a tombstone and data file in object
        # dir - verify they are both removed during audit
        ts_iter = make_timestamp_iter()
        ts_tomb = ts_iter.next()
        ts_data = ts_iter.next()
        self.setup_bad_zero_byte(timestamp=ts_data)
        tomb_file_path = os.path.join(self.disk_file._datadir, "%s.ts" % ts_tomb.internal)
        with open(tomb_file_path, "wb") as fd:
            write_metadata(fd, {"X-Timestamp": ts_tomb.internal})
        files = os.listdir(self.disk_file._datadir)
        self.assertEqual(2, len(files))
        self.assertTrue(os.path.basename(tomb_file_path) in files, files)
        kwargs = {"mode": "once"}
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.exists(self.disk_file._datadir))

    def test_sleeper(self):
        with mock.patch("time.sleep", mock.MagicMock()) as mock_sleep:
            auditor.SLEEP_BETWEEN_AUDITS = 0.10
            my_auditor = auditor.ObjectAuditor(self.conf)
            my_auditor._sleep()
            mock_sleep.assert_called_with(auditor.SLEEP_BETWEEN_AUDITS)

    def test_run_parallel_audit(self):
        class StopForever(Exception):
            pass

        class Bogus(Exception):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            check_device_dir = None
            fork_called = 0
            master = 0
            wait_called = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs
                if "zero_byte_fps" in kwargs:
                    self.check_device_dir = kwargs.get("device_dirs")

            def mock_sleep_stop(self):
                raise StopForever("stop")

            def mock_sleep_continue(self):
                return

            def mock_audit_loop_error(self, parent, zbo_fps, override_devices=None, **kwargs):
                raise Bogus("exception")

            def mock_fork(self):
                self.fork_called += 1
                if self.master:
                    return self.fork_called
                else:
                    return 0

            def mock_wait(self):
                self.wait_called += 1
                return (self.wait_called, 0)

        for i in string.ascii_letters[2:26]:
            mkdirs(os.path.join(self.devices, "sd%s" % i))

        my_auditor = auditor.ObjectAuditor(
            dict(devices=self.devices, mount_check="false", zero_byte_files_per_second=89, concurrency=1)
        )

        mocker = ObjectAuditorMock()
        my_auditor.logger.exception = mock.MagicMock()
        real_audit_loop = my_auditor.audit_loop
        my_auditor.audit_loop = mocker.mock_audit_loop_error
        my_auditor.run_audit = mocker.mock_run
        was_fork = os.fork
        was_wait = os.wait
        os.fork = mocker.mock_fork
        os.wait = mocker.mock_wait
        try:
            my_auditor._sleep = mocker.mock_sleep_stop
            my_auditor.run_once(zero_byte_fps=50)
            my_auditor.logger.exception.assert_called_once_with("ERROR auditing: exception")
            my_auditor.logger.exception.reset_mock()
            self.assertRaises(StopForever, my_auditor.run_forever)
            my_auditor.logger.exception.assert_called_once_with("ERROR auditing: exception")
            my_auditor.audit_loop = real_audit_loop

            self.assertRaises(StopForever, my_auditor.run_forever, zero_byte_fps=50)
            self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 50)
            self.assertEquals(mocker.fork_called, 0)

            self.assertRaises(SystemExit, my_auditor.run_once)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 89)
            self.assertEquals(mocker.check_device_dir, [])
            self.assertEquals(mocker.check_args, ())

            device_list = ["sd%s" % i for i in string.ascii_letters[2:10]]
            device_string = ",".join(device_list)
            device_string_bogus = device_string + ",bogus"

            mocker.fork_called = 0
            self.assertRaises(SystemExit, my_auditor.run_once, devices=device_string_bogus)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 89)
            self.assertEquals(sorted(mocker.check_device_dir), device_list)

            mocker.master = 1

            mocker.fork_called = 0
            self.assertRaises(StopForever, my_auditor.run_forever)
            # Fork is called 2 times since the zbf process is forked just
            # once before self._sleep() is called and StopForever is raised
            # Also wait is called just once before StopForever is raised
            self.assertEquals(mocker.fork_called, 2)
            self.assertEquals(mocker.wait_called, 1)

            my_auditor._sleep = mocker.mock_sleep_continue

            my_auditor.concurrency = 2
            mocker.fork_called = 0
            mocker.wait_called = 0
            my_auditor.run_once()
            # Fork is called no. of devices + (no. of devices)/2 + 1 times
            # since zbf process is forked (no.of devices)/2 + 1 times
            no_devices = len(os.listdir(self.devices))
            self.assertEquals(mocker.fork_called, no_devices + no_devices / 2 + 1)
            self.assertEquals(mocker.wait_called, no_devices + no_devices / 2 + 1)

        finally:
            os.fork = was_fork
            os.wait = was_wait
class TruncateMiddleware(object):
    def __init__(self, app, conf, *args, **kwargs):
        self.app = app
        self.conf = conf
        self.logger = get_logger(self.conf, log_route='truncate')
        self._diskfile_mgr = DiskFileManager(conf, self.logger)

    def truncate(self, env):
        req = Request(env)
        try:
            disk_file = self.get_diskfile(self.device, self.partition,
                                          self.account, self.container,
                                          self.obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=self.device,
                                           request=Request(copy(env)))
        
        # object flow 상, 임시 데이터를 삭제 후 DiskFileWrite 의 put을 하게 되면,
        # _finalize_put을 호출하게 된다. 이 때, metadata에 설정된 X-Timestamp 값으로
        # object 파일명을 생성하고, 임시 파일로 대체한다.
        # 따라서 별 다른 truncate를 할 필요가 없다.

        ori_meta = disk_file.read_metadata()
        metadata = {
            'X-Timestamp': ori_meta['X-Timestamp'],
            'Content-Type': ori_meta['Content-Type'],
            'ETag': 'd41d8cd98f00b204e9800998ecf8427e',
            'Content-Length': 0,
            'X-Object-Meta-Glacier': True,
            'X-Object-Meta-S3-Content-Length': ori_meta['Content-Length'],
            'X-Object-Meta-S3-ETag': ori_meta['ETag']
        }
        # 원본 Object Metatdata 도 저장
        metadata.update(val for val in ori_meta.iteritems()
                        if is_user_meta('object', val[0]))

        # Object Restore 정보가 있으면 해당 정보 지움.
        # 이 경우는 restored object expiration 임.
        if 'X-Object-Meta-S3-Restore' in metadata:
            del metadata['X-Object-Meta-S3-Restore']

        with disk_file.create(size=0) as writer:
            writer.put(metadata)

        return HTTPCreated(request=req, etag=ori_meta['ETag'])

    def get_diskfile(self, device, partition, account, container, obj,
                     **kwargs):
        return self._diskfile_mgr.get_diskfile(device, partition, account,
                                               container, obj, **kwargs)

    def __call__(self, env, start_response):
        req = Request(copy(env))
        method = req.method

        if (method == 'PUT' or method == 'POST') and \
           GLACIER_FLAG_META in req.headers:
            self.device, self.partition, self.account, self.container, \
                self.obj = split_and_validate_path(req, 5, 5, True)
            return self.truncate(env)(env, start_response)

        return self.app(env, start_response)
Ejemplo n.º 31
0
class ObjectController(BaseStorageServer):
    """Implements the WSGI application for the Swift Object Server."""

    server_type = 'object-server'

    def __init__(self, conf, logger=None):
        """
        Creates a new WSGI application for the Swift Object Server. An
        example configuration is given at
        <source-dir>/etc/object-server.conf-sample or
        /etc/swift/object-server.conf-sample.
        """
        super(ObjectController, self).__init__(conf)
        self.logger = logger or get_logger(conf, log_route='object-server')
        self.node_timeout = int(conf.get('node_timeout', 3))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.client_timeout = int(conf.get('client_timeout', 60))
        self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536))
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.log_requests = config_true_value(conf.get('log_requests', 'true'))
        self.max_upload_time = int(conf.get('max_upload_time', 86400))
        self.slow = int(conf.get('slow', 0))
        self.keep_cache_private = \
            config_true_value(conf.get('keep_cache_private', 'false'))

        default_allowed_headers = '''
            content-disposition,
            content-encoding,
            x-delete-at,
            x-object-manifest,
            x-static-large-object,
        '''
        extra_allowed_headers = [
            header.strip().lower() for header in conf.get(
                'allowed_headers', default_allowed_headers).split(',')
            if header.strip()
        ]
        self.allowed_headers = set()
        for header in extra_allowed_headers:
            if header not in DATAFILE_SYSTEM_META:
                self.allowed_headers.add(header)
        self.auto_create_account_prefix = \
            conf.get('auto_create_account_prefix') or '.'
        self.expiring_objects_account = self.auto_create_account_prefix + \
            (conf.get('expiring_objects_account_name') or 'expiring_objects')
        self.expiring_objects_container_divisor = \
            int(conf.get('expiring_objects_container_divisor') or 86400)
        # Initialization was successful, so now apply the network chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because the primary motivation for this is to optimize how data
        # is written back to the proxy server, we could use the value from the
        # disk_chunk_size parameter. However, it affects all created sockets
        # using this class so we have chosen to tie it to the
        # network_chunk_size parameter value instead.
        socket._fileobject.default_bufsize = self.network_chunk_size

        # Provide further setup specific to an object server implementation.
        self.setup(conf)

    def setup(self, conf):
        """
        Implementation specific setup. This method is called at the very end
        by the constructor to allow a specific implementation to modify
        existing attributes or add its own attributes.

        :param conf: WSGI configuration parameter
        """

        # Common on-disk hierarchy shared across account, container and object
        # servers.
        self._diskfile_mgr = DiskFileManager(conf, self.logger)
        # This is populated by global_conf_callback way below as the semaphore
        # is shared by all workers.
        if 'replication_semaphore' in conf:
            # The value was put in a list so it could get past paste
            self.replication_semaphore = conf['replication_semaphore'][0]
        else:
            self.replication_semaphore = None
        self.replication_failure_threshold = int(
            conf.get('replication_failure_threshold') or 100)
        self.replication_failure_ratio = float(
            conf.get('replication_failure_ratio') or 1.0)

    def get_diskfile(self, device, partition, account, container, obj,
                     policy_idx, **kwargs):
        """
        Utility method for instantiating a DiskFile object supporting a given
        REST API.

        An implementation of the object server that wants to use a different
        DiskFile class would simply over-ride this method to provide that
        behavior.
        """
        return self._diskfile_mgr.get_diskfile(device, partition, account,
                                               container, obj, policy_idx,
                                               **kwargs)

    def async_update(self, op, account, container, obj, host, partition,
                     contdevice, headers_out, objdevice, policy_index):
        """
        Sends or saves an async update.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param host: host that the container is on
        :param partition: partition that the container is on
        :param contdevice: device name that the container is on
        :param headers_out: dictionary of headers to send in the container
                            request
        :param objdevice: device name that the object is in
        :param policy_index: the associated storage policy index
        """
        headers_out['user-agent'] = 'object-server %s' % os.getpid()
        full_path = '/%s/%s/%s' % (account, container, obj)
        if all([host, partition, contdevice]):
            try:
                with ConnectionTimeout(self.conn_timeout):
                    ip, port = host.rsplit(':', 1)
                    conn = http_connect(ip, port, contdevice, partition, op,
                                        full_path, headers_out)
                with Timeout(self.node_timeout):
                    response = conn.getresponse()
                    response.read()
                    if is_success(response.status):
                        return
                    else:
                        self.logger.error(
                            _('ERROR Container update failed '
                              '(saving for async update later): %(status)d '
                              'response from %(ip)s:%(port)s/%(dev)s'), {
                                  'status': response.status,
                                  'ip': ip,
                                  'port': port,
                                  'dev': contdevice
                              })
            except (Exception, Timeout):
                self.logger.exception(
                    _('ERROR container update failed with '
                      '%(ip)s:%(port)s/%(dev)s (saving for async update later)'
                      ), {
                          'ip': ip,
                          'port': port,
                          'dev': contdevice
                      })
        data = {
            'op': op,
            'account': account,
            'container': container,
            'obj': obj,
            'headers': headers_out
        }
        timestamp = headers_out['x-timestamp']
        self._diskfile_mgr.pickle_async_update(objdevice, account, container,
                                               obj, data, timestamp,
                                               policy_index)

    def container_update(self, op, account, container, obj, request,
                         headers_out, objdevice, policy_idx):
        """
        Update the container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request object driving the update
        :param headers_out: dictionary of headers to send in the container
                            request(s)
        :param objdevice: device name that the object is in
        """
        headers_in = request.headers
        conthosts = [
            h.strip()
            for h in headers_in.get('X-Container-Host', '').split(',')
        ]
        contdevices = [
            d.strip()
            for d in headers_in.get('X-Container-Device', '').split(',')
        ]
        contpartition = headers_in.get('X-Container-Partition', '')

        if len(conthosts) != len(contdevices):
            # This shouldn't happen unless there's a bug in the proxy,
            # but if there is, we want to know about it.
            self.logger.error(
                _('ERROR Container update failed: different '
                  'numbers of hosts and devices in request: '
                  '"%s" vs "%s"') % (headers_in.get('X-Container-Host', ''),
                                     headers_in.get('X-Container-Device', '')))
            return

        if contpartition:
            updates = zip(conthosts, contdevices)
        else:
            updates = []

        headers_out['x-trans-id'] = headers_in.get('x-trans-id', '-')
        headers_out['referer'] = request.as_referer()
        headers_out['X-Backend-Storage-Policy-Index'] = policy_idx
        for conthost, contdevice in updates:
            self.async_update(op, account, container, obj, conthost,
                              contpartition, contdevice, headers_out,
                              objdevice, policy_idx)

    def delete_at_update(self, op, delete_at, account, container, obj, request,
                         objdevice, policy_index):
        """
        Update the expiring objects container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param delete_at: scheduled delete in UNIX seconds, int
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request driving the update
        :param objdevice: device name that the object is in
        :param policy_index: the policy index to be used for tmp dir
        """
        if config_true_value(request.headers.get('x-backend-replication',
                                                 'f')):
            return
        delete_at = normalize_delete_at_timestamp(delete_at)
        updates = [(None, None)]

        partition = None
        hosts = contdevices = [None]
        headers_in = request.headers
        headers_out = HeaderKeyDict({
            # system accounts are always Policy-0
            'X-Backend-Storage-Policy-Index':
            0,
            'x-timestamp':
            request.timestamp.internal,
            'x-trans-id':
            headers_in.get('x-trans-id', '-'),
            'referer':
            request.as_referer()
        })
        if op != 'DELETE':
            delete_at_container = headers_in.get('X-Delete-At-Container', None)
            if not delete_at_container:
                self.logger.warning(
                    'X-Delete-At-Container header must be specified for '
                    'expiring objects background %s to work properly. Making '
                    'best guess as to the container name for now.' % op)
                # TODO(gholt): In a future release, change the above warning to
                # a raised exception and remove the guess code below.
                delete_at_container = get_expirer_container(
                    delete_at, self.expiring_objects_container_divisor,
                    account, container, obj)
            partition = headers_in.get('X-Delete-At-Partition', None)
            hosts = headers_in.get('X-Delete-At-Host', '')
            contdevices = headers_in.get('X-Delete-At-Device', '')
            updates = [
                upd for upd in zip((h.strip() for h in hosts.split(',')), (
                    c.strip() for c in contdevices.split(',')))
                if all(upd) and partition
            ]
            if not updates:
                updates = [(None, None)]
            headers_out['x-size'] = '0'
            headers_out['x-content-type'] = 'text/plain'
            headers_out['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e'
        else:
            # DELETEs of old expiration data have no way of knowing what the
            # old X-Delete-At-Container was at the time of the initial setting
            # of the data, so a best guess is made here.
            # Worst case is a DELETE is issued now for something that doesn't
            # exist there and the original data is left where it is, where
            # it will be ignored when the expirer eventually tries to issue the
            # object DELETE later since the X-Delete-At value won't match up.
            delete_at_container = get_expirer_container(
                delete_at, self.expiring_objects_container_divisor, account,
                container, obj)
        delete_at_container = normalize_delete_at_timestamp(
            delete_at_container)

        for host, contdevice in updates:
            self.async_update(
                op, self.expiring_objects_account, delete_at_container,
                '%s-%s/%s/%s' % (delete_at, account, container, obj), host,
                partition, contdevice, headers_out, objdevice, policy_index)

    @public
    @timing_stats()
    def POST(self, request):
        """Handle HTTP POST requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body='X-Delete-At in past',
                                  request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(device,
                                          partition,
                                          account,
                                          container,
                                          obj,
                                          policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request)
        orig_timestamp = Timestamp(orig_metadata.get('X-Timestamp', 0))
        if orig_timestamp >= req_timestamp:
            return HTTPConflict(
                request=request,
                headers={'X-Backend-Timestamp': orig_timestamp.internal})
        metadata = {'X-Timestamp': req_timestamp.internal}
        metadata.update(val for val in request.headers.iteritems()
                        if is_user_meta('object', val[0]))
        for header_key in self.allowed_headers:
            if header_key in request.headers:
                header_caps = header_key.title()
                metadata[header_caps] = request.headers[header_key]
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update('PUT', new_delete_at, account, container,
                                      obj, request, device, policy_idx)
            if orig_delete_at:
                self.delete_at_update('DELETE', orig_delete_at, account,
                                      container, obj, request, device,
                                      policy_idx)
        try:
            disk_file.write_metadata(metadata)
        except (DiskFileXattrNotSupported, DiskFileNoSpace):
            return HTTPInsufficientStorage(drive=device, request=request)
        return HTTPAccepted(request=request)

    @public
    @timing_stats()
    def PUT(self, request):
        """Handle HTTP PUT requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        error_response = check_object_creation(request, obj)
        if error_response:
            return error_response
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body='X-Delete-At in past',
                                  request=request,
                                  content_type='text/plain')
        try:
            fsize = request.message_length()
        except ValueError as e:
            return HTTPBadRequest(body=str(e),
                                  request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(device,
                                          partition,
                                          account,
                                          container,
                                          obj,
                                          policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_metadata = {}

        # Checks for If-None-Match
        if request.if_none_match is not None and orig_metadata:
            if '*' in request.if_none_match:
                # File exists already so return 412
                return HTTPPreconditionFailed(request=request)
            if orig_metadata.get('ETag') in request.if_none_match:
                # The current ETag matches, so return 412
                return HTTPPreconditionFailed(request=request)

        orig_timestamp = Timestamp(orig_metadata.get('X-Timestamp', 0))
        if orig_timestamp >= req_timestamp:
            return HTTPConflict(
                request=request,
                headers={'X-Backend-Timestamp': orig_timestamp.internal})
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        upload_expiration = time.time() + self.max_upload_time
        etag = md5()
        elapsed_time = 0
        try:
            with disk_file.create(size=fsize) as writer:
                upload_size = 0

                def timeout_reader():
                    with ChunkReadTimeout(self.client_timeout):
                        return request.environ['wsgi.input'].read(
                            self.network_chunk_size)

                try:
                    for chunk in iter(lambda: timeout_reader(), ''):
                        start_time = time.time()
                        if start_time > upload_expiration:
                            self.logger.increment('PUT.timeouts')
                            return HTTPRequestTimeout(request=request)
                        etag.update(chunk)
                        upload_size = writer.write(chunk)
                        elapsed_time += time.time() - start_time
                except ChunkReadTimeout:
                    return HTTPRequestTimeout(request=request)
                if upload_size:
                    self.logger.transfer_rate('PUT.' + device + '.timing',
                                              elapsed_time, upload_size)
                if fsize is not None and fsize != upload_size:
                    return HTTPClientDisconnect(request=request)
                etag = etag.hexdigest()
                if 'etag' in request.headers and \
                        request.headers['etag'].lower() != etag:
                    return HTTPUnprocessableEntity(request=request)
                metadata = {
                    'X-Timestamp': request.timestamp.internal,
                    'Content-Type': request.headers['content-type'],
                    'ETag': etag,
                    'Content-Length': str(upload_size),
                }
                metadata.update(val for val in request.headers.iteritems()
                                if is_sys_or_user_meta('object', val[0]))
                headers_to_copy = (request.headers.get(
                    'X-Backend-Replication-Headers', '').split() +
                                   list(self.allowed_headers))
                for header_key in headers_to_copy:
                    if header_key in request.headers:
                        header_caps = header_key.title()
                        metadata[header_caps] = request.headers[header_key]
                writer.put(metadata)
        except (DiskFileXattrNotSupported, DiskFileNoSpace):
            return HTTPInsufficientStorage(drive=device, request=request)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update('PUT', new_delete_at, account, container,
                                      obj, request, device, policy_idx)
            if orig_delete_at:
                self.delete_at_update('DELETE', orig_delete_at, account,
                                      container, obj, request, device,
                                      policy_idx)
        self.container_update(
            'PUT', account, container, obj, request,
            HeaderKeyDict({
                'x-size': metadata['Content-Length'],
                'x-content-type': metadata['Content-Type'],
                'x-timestamp': metadata['X-Timestamp'],
                'x-etag': metadata['ETag']
            }), device, policy_idx)
        return HTTPCreated(request=request, etag=etag)

    @public
    @timing_stats()
    def GET(self, request):
        """Handle HTTP GET requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        keep_cache = self.keep_cache_private or (
            'X-Auth-Token' not in request.headers
            and 'X-Storage-Token' not in request.headers)
        try:
            disk_file = self.get_diskfile(device,
                                          partition,
                                          account,
                                          container,
                                          obj,
                                          policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            with disk_file.open():
                metadata = disk_file.get_metadata()
                obj_size = int(metadata['Content-Length'])
                file_x_ts = Timestamp(metadata['X-Timestamp'])
                keep_cache = (self.keep_cache_private or
                              ('X-Auth-Token' not in request.headers
                               and 'X-Storage-Token' not in request.headers))
                response = Response(
                    app_iter=disk_file.reader(keep_cache=keep_cache),
                    request=request,
                    conditional_response=True)
                response.headers['Content-Type'] = metadata.get(
                    'Content-Type', 'application/octet-stream')
                for key, value in metadata.iteritems():
                    if is_sys_or_user_meta('object', key) or \
                            key.lower() in self.allowed_headers:
                        response.headers[key] = value
                response.etag = metadata['ETag']
                response.last_modified = math.ceil(float(file_x_ts))
                response.content_length = obj_size
                try:
                    response.content_encoding = metadata['Content-Encoding']
                except KeyError:
                    pass
                response.headers['X-Timestamp'] = file_x_ts.normal
                response.headers['X-Backend-Timestamp'] = file_x_ts.internal
                resp = request.get_response(response)
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined) as e:
            headers = {}
            if hasattr(e, 'timestamp'):
                headers['X-Backend-Timestamp'] = e.timestamp.internal
            resp = HTTPNotFound(request=request,
                                headers=headers,
                                conditional_response=True)
        return resp

    @public
    @timing_stats(sample_rate=0.8)
    def HEAD(self, request):
        """Handle HTTP HEAD requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        try:
            disk_file = self.get_diskfile(device,
                                          partition,
                                          account,
                                          container,
                                          obj,
                                          policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined) as e:
            headers = {}
            if hasattr(e, 'timestamp'):
                headers['X-Backend-Timestamp'] = e.timestamp.internal
            return HTTPNotFound(request=request,
                                headers=headers,
                                conditional_response=True)
        response = Response(request=request, conditional_response=True)
        response.headers['Content-Type'] = metadata.get(
            'Content-Type', 'application/octet-stream')
        for key, value in metadata.iteritems():
            if is_sys_or_user_meta('object', key) or \
                    key.lower() in self.allowed_headers:
                response.headers[key] = value
        response.etag = metadata['ETag']
        ts = Timestamp(metadata['X-Timestamp'])
        response.last_modified = math.ceil(float(ts))
        # Needed for container sync feature
        response.headers['X-Timestamp'] = ts.normal
        response.headers['X-Backend-Timestamp'] = ts.internal
        response.content_length = int(metadata['Content-Length'])
        try:
            response.content_encoding = metadata['Content-Encoding']
        except KeyError:
            pass
        return response

    @public
    @timing_stats()
    def DELETE(self, request):
        """Handle HTTP DELETE requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        try:
            disk_file = self.get_diskfile(device,
                                          partition,
                                          account,
                                          container,
                                          obj,
                                          policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except DiskFileExpired as e:
            orig_timestamp = e.timestamp
            orig_metadata = e.metadata
            response_class = HTTPNotFound
        except DiskFileDeleted as e:
            orig_timestamp = e.timestamp
            orig_metadata = {}
            response_class = HTTPNotFound
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_timestamp = 0
            orig_metadata = {}
            response_class = HTTPNotFound
        else:
            orig_timestamp = Timestamp(orig_metadata.get('X-Timestamp', 0))
            if orig_timestamp < req_timestamp:
                response_class = HTTPNoContent
            else:
                response_class = HTTPConflict
        response_timestamp = max(orig_timestamp, req_timestamp)
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        try:
            req_if_delete_at_val = request.headers['x-if-delete-at']
            req_if_delete_at = int(req_if_delete_at_val)
        except KeyError:
            pass
        except ValueError:
            return HTTPBadRequest(request=request,
                                  body='Bad X-If-Delete-At header value')
        else:
            # request includes x-if-delete-at; we must not place a tombstone
            # if we can not verify the x-if-delete-at time
            if not orig_timestamp:
                # no object found at all
                return HTTPNotFound()
            if orig_delete_at != req_if_delete_at:
                return HTTPPreconditionFailed(
                    request=request,
                    body='X-If-Delete-At and X-Delete-At do not match')
            else:
                # differentiate success from no object at all
                response_class = HTTPNoContent
        if orig_delete_at:
            self.delete_at_update('DELETE', orig_delete_at, account, container,
                                  obj, request, device, policy_idx)
        if orig_timestamp < req_timestamp:
            disk_file.delete(req_timestamp)
            self.container_update(
                'DELETE', account, container, obj, request,
                HeaderKeyDict({'x-timestamp': req_timestamp.internal}), device,
                policy_idx)
        return response_class(
            request=request,
            headers={'X-Backend-Timestamp': response_timestamp.internal})

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATE(self, request):
        """
        Handle REPLICATE requests for the Swift Object Server.  This is used
        by the object replicator to get hashes for directories.
        """
        device, partition, suffix, policy_idx = \
            get_name_and_placement(request, 2, 3, True)
        try:
            hashes = self._diskfile_mgr.get_hashes(device, partition, suffix,
                                                   policy_idx)
        except DiskFileDeviceUnavailable:
            resp = HTTPInsufficientStorage(drive=device, request=request)
        else:
            resp = Response(body=pickle.dumps(hashes))
        return resp

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATION(self, request):
        return Response(app_iter=ssync_receiver.Receiver(self, request)())

    def __call__(self, env, start_response):
        """WSGI Application entry point for the Swift Object Server."""
        start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get('x-trans-id', None)

        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL')
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    if req.method not in self.allowed_methods:
                        raise AttributeError('Not allowed method.')
                except AttributeError:
                    res = HTTPMethodNotAllowed()
                else:
                    method = getattr(self, req.method)
                    res = method(req)
            except DiskFileCollision:
                res = HTTPForbidden(request=req)
            except HTTPException as error_response:
                res = error_response
            except (Exception, Timeout):
                self.logger.exception(
                    _('ERROR __call__ error with %(method)s'
                      ' %(path)s '), {
                          'method': req.method,
                          'path': req.path
                      })
                res = HTTPInternalServerError(body=traceback.format_exc())
        trans_time = time.time() - start_time
        if self.log_requests:
            log_line = get_log_line(req, res, trans_time, '')
            if req.method in ('REPLICATE', 'REPLICATION') or \
                    'X-Backend-Replication' in req.headers:
                self.logger.debug(log_line)
            else:
                self.logger.info(log_line)
        if req.method in ('PUT', 'DELETE'):
            slow = self.slow - trans_time
            if slow > 0:
                sleep(slow)

        # To be able to zero-copy send the object, we need a few things.
        # First, we have to be responding successfully to a GET, or else we're
        # not sending the object. Second, we have to be able to extract the
        # socket file descriptor from the WSGI input object. Third, the
        # diskfile has to support zero-copy send.
        #
        # There's a good chance that this could work for 206 responses too,
        # but the common case is sending the whole object, so we'll start
        # there.
        if req.method == 'GET' and res.status_int == 200 and \
           isinstance(env['wsgi.input'], wsgi.Input):
            app_iter = getattr(res, 'app_iter', None)
            checker = getattr(app_iter, 'can_zero_copy_send', None)
            if checker and checker():
                # For any kind of zero-copy thing like sendfile or splice, we
                # need the file descriptor. Eventlet doesn't provide a clean
                # way of getting that, so we resort to this.
                wsock = env['wsgi.input'].get_socket()
                wsockfd = wsock.fileno()

                # Don't call zero_copy_send() until after we force the HTTP
                # headers out of Eventlet and into the socket.
                def zero_copy_iter():
                    # If possible, set TCP_CORK so that headers don't
                    # immediately go on the wire, but instead, wait for some
                    # response body to make the TCP frames as large as
                    # possible (and hence as few packets as possible).
                    #
                    # On non-Linux systems, we might consider TCP_NODELAY, but
                    # since the only known zero-copy-capable diskfile uses
                    # Linux-specific syscalls, we'll defer that work until
                    # someone needs it.
                    if hasattr(socket, 'TCP_CORK'):
                        wsock.setsockopt(socket.IPPROTO_TCP, socket.TCP_CORK,
                                         1)
                    yield EventletPlungerString()
                    try:
                        app_iter.zero_copy_send(wsockfd)
                    except Exception:
                        self.logger.exception("zero_copy_send() blew up")
                        raise
                    yield ''

                # Get headers ready to go out
                res(env, start_response)
                return zero_copy_iter()
            else:
                return res(env, start_response)
        else:
            return res(env, start_response)
Ejemplo n.º 32
0
class TestAuditor(unittest.TestCase):

    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        self.objects = os.path.join(self.devices, 'sda', 'objects')

        os.mkdir(os.path.join(self.devices, 'sdb'))
        self.objects_2 = os.path.join(self.devices, 'sdb', 'objects')

        os.mkdir(self.objects)
        self.parts = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            os.mkdir(os.path.join(self.objects, part))

        self.conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_object_audit_extra_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            timestamp = str(normalize_timestamp(time.time()))
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

            auditor_worker.object_audit(
                os.path.join(self.disk_file._datadir, timestamp + '.data'),
                'sda', '0')
            self.assertEquals(auditor_worker.quarantines, pre_quarantines)

            os.write(writer._fd, 'extra_data')
            auditor_worker.object_audit(
                os.path.join(self.disk_file._datadir, timestamp + '.data'),
                'sda', '0')
            self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        data = '0' * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')

        auditor_worker.object_audit(
            os.path.join(self.disk_file._datadir, timestamp + '.data'),
            'sda', '0')
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update('1' + '0' * 1023)
        etag = etag.hexdigest()
        metadata['ETag'] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)

        auditor_worker.object_audit(
            os.path.join(self.disk_file._datadir, timestamp + '.data'),
            'sda', '0')
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        fp = open(path, 'w')
        fp.write('0' * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(
            os.path.join(self.disk_file._datadir, timestamp + '.data'),
            'sda', '0')
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch('swift.obj.diskfile.DiskFile',
                        blowup):
            self.assertRaises(NameError, auditor_worker.object_audit,
                              path, 'sda', '0')

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch('swift.obj.diskfile.DiskFile',
                        blowup):
            auditor_worker.failsafe_object_audit(path, 'sda', '0')
        self.assertEquals(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        with mock.patch('swift.obj.diskfile.DiskFile',
                        lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 1)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob')
        data = '1' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            etag = md5()
            etag.update('1' + '0' * 1023)
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.auditor.run_once(zero_byte_fps=50)
        self.assertFalse(os.path.isdir(quarantine_path))
        self.auditor.run_once()
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, with_ts=False):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        ts_file_path = ''
        if with_ts:
            name_hash = hash_path('a', 'c', 'o')
            dir_path = os.path.join(
                self.devices, 'sda',
                storage_directory(DATADIR, '0', name_hash))
            ts_file_path = os.path.join(dir_path, '99999.ts')
            if not os.path.exists(dir_path):
                mkdirs(dir_path)
            fp = open(ts_file_path, 'w')
            fp.close()

        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': 10,
            }
            writer.put(metadata)
            etag = md5()
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)
        return ts_file_path

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        self.auditor.run_once()
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        self.auditor.run_once(zero_byte_fps=50)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):

            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        was_df = auditor.diskfile.DiskFile
        try:
            auditor.diskfile.DiskFile = FakeFile
            self.auditor.run_once(zero_byte_fps=50)
            quarantine_path = os.path.join(self.devices,
                                           'sda', 'quarantined', 'objects')
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])
        finally:
            auditor.diskfile.DiskFile = was_df

    def test_with_tombstone(self):
        ts_file_path = self.setup_bad_zero_byte(with_ts=True)
        self.auditor.run_once()
        self.assertTrue(ts_file_path.endswith('ts'))
        self.assertTrue(os.path.exists(ts_file_path))

    def test_sleeper(self):
        auditor.SLEEP_BETWEEN_AUDITS = 0.10
        my_auditor = auditor.ObjectAuditor(self.conf)
        start = time.time()
        my_auditor._sleep()
        delta_t = time.time() - start
        self.assert_(delta_t > 0.08)
        self.assert_(delta_t < 0.12)

    def test_run_forever(self):

        class StopForever(Exception):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            fork_called = 0
            fork_res = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs

            def mock_sleep(self):
                raise StopForever('stop')

            def mock_fork(self):
                self.fork_called += 1
                return self.fork_res

        my_auditor = auditor.ObjectAuditor(dict(devices=self.devices,
                                                mount_check='false',
                                                zero_byte_files_per_second=89))
        mocker = ObjectAuditorMock()
        my_auditor.run_once = mocker.mock_run
        my_auditor._sleep = mocker.mock_sleep
        was_fork = os.fork
        try:
            os.fork = mocker.mock_fork
            self.assertRaises(StopForever,
                              my_auditor.run_forever, zero_byte_fps=50)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 50)
            self.assertEquals(mocker.fork_called, 0)

            self.assertRaises(StopForever, my_auditor.run_forever)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_args, ())

            mocker.fork_res = 1
            self.assertRaises(StopForever, my_auditor.run_forever)
            self.assertEquals(mocker.fork_called, 2)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89)

        finally:
            os.fork = was_fork
class RestoreMiddleware(object):
    def __init__(self, app, conf, *args, **kwargs):
        self.app = app
        self.conf = conf
        self.logger = get_logger(self.conf, log_route='restore')
        self._diskfile_mgr = DiskFileManager(conf, self.logger)

    def __call__(self, env, start_response):
        req = Request(env)

        if (req.method == 'PUT') or (req.method == 'POST'):
            if 'X-Object-Meta-S3-Restored' in req.headers:
                return self.save_object(env)(env, start_response)
            if 'X-Object-Meta-S3-Restore' in req.headers:
                return self.set_restoring(env)(env, start_response)
        return self.app(env, start_response)

    def _split_request_path(self, req):
        self.device, self.partition, self.account, self.container, \
            self.obj = split_and_validate_path(req, 5, 5, True)

    def save_object(self, env):
        # Restorer 데몬에 의해 호출됨
        req = Request(env)
        self._split_request_path(req)
        try:
            disk_file = self.get_diskfile(self.device, self.partition,
                                          self.account, self.container,
                                          self.obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=self.device,
                                           request=Request(env))
        ori_meta = disk_file.read_metadata()
        metadata = {}
        metadata.update(val for val in req.headers.iteritems()
                        if is_user_meta('object', val[0]))
        del metadata['X-Object-Meta-S3-Restored']
        # Timestamp 값 유지
        metadata['X-Timestamp'] = ori_meta['X-Timestamp']
        metadata['Content-Type'] = ori_meta['Content-Type']
        fsize = req.message_length()
        etag = md5()
        try:
            with disk_file.create(size=fsize) as writer:
                def timeout_reader():
                    with ChunkReadTimeout(60):
                        return req.environ['wsgi.input'].read(65536)

                try:
                    for chunk in iter(lambda: timeout_reader(), ''):
                        etag.update(chunk)
                        writer.write(chunk)
                except ChunkReadTimeout:
                    return HTTPRequestTimeout(request=req)

                etag = etag.hexdigest()
                metadata['ETag'] = etag
                metadata['Content-Length'] = str(fsize)

                writer.put(metadata)
        except DiskFileNoSpace:
            return HTTPInsufficientStorage(drive=self.device, request=req)

        return HTTPCreated(request=req, etag=etag)

    def set_restoring(self, env):
        # Lifecycle Middleware 에서 restore 중이라고 object 를 설정할 때 호출됨
        req = Request(env)
        self._split_request_path(req)
        try:
            disk_file = self.get_diskfile(self.device, self.partition,
                                          self.account, self.container,
                                          self.obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=self.device,
                                           request=Request(env))
        ori_meta = disk_file.read_metadata()
        metadata = ori_meta
        metadata.update(val for val in req.headers.iteritems()
                        if is_user_meta('object', val[0]))

        # Timestamp 값 유지
        with disk_file.create(size=0) as writer:
            writer.put(metadata)
        return HTTPCreated(request=req, etag=ori_meta['ETag'])

    def get_diskfile(self, device, partition, account, container, obj,
                     **kwargs):
        return self._diskfile_mgr.get_diskfile(device, partition, account,
                                               container, obj, **kwargs)
 def __init__(self, app, conf, *args, **kwargs):
     self.app = app
     self.conf = conf
     self.logger = get_logger(self.conf, log_route='restore')
     self._diskfile_mgr = DiskFileManager(conf, self.logger)
Ejemplo n.º 35
0
class ObjectController(BaseStorageServer):
    """Implements the WSGI application for the Swift Object Server."""

    def __init__(self, conf, logger=None):
        """
        Creates a new WSGI application for the Swift Object Server. An
        example configuration is given at
        <source-dir>/etc/object-server.conf-sample or
        /etc/swift/object-server.conf-sample.
        """
        super(ObjectController, self).__init__(conf)
        self.logger = logger or get_logger(conf, log_route="object-server")
        self.node_timeout = int(conf.get("node_timeout", 3))
        self.conn_timeout = float(conf.get("conn_timeout", 0.5))
        self.client_timeout = int(conf.get("client_timeout", 60))
        self.disk_chunk_size = int(conf.get("disk_chunk_size", 65536))
        self.network_chunk_size = int(conf.get("network_chunk_size", 65536))
        self.log_requests = config_true_value(conf.get("log_requests", "true"))
        self.max_upload_time = int(conf.get("max_upload_time", 86400))
        self.slow = int(conf.get("slow", 0))
        self.keep_cache_private = config_true_value(conf.get("keep_cache_private", "false"))

        default_allowed_headers = """
            content-disposition,
            content-encoding,
            x-delete-at,
            x-object-manifest,
            x-static-large-object,
        """
        extra_allowed_headers = [
            header.strip().lower()
            for header in conf.get("allowed_headers", default_allowed_headers).split(",")
            if header.strip()
        ]
        self.allowed_headers = set()
        for header in extra_allowed_headers:
            if header not in DATAFILE_SYSTEM_META:
                self.allowed_headers.add(header)
        self.auto_create_account_prefix = conf.get("auto_create_account_prefix") or "."
        self.expiring_objects_account = self.auto_create_account_prefix + (
            conf.get("expiring_objects_account_name") or "expiring_objects"
        )
        self.expiring_objects_container_divisor = int(conf.get("expiring_objects_container_divisor") or 86400)
        # Initialization was successful, so now apply the network chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because the primary motivation for this is to optimize how data
        # is written back to the proxy server, we could use the value from the
        # disk_chunk_size parameter. However, it affects all created sockets
        # using this class so we have chosen to tie it to the
        # network_chunk_size parameter value instead.
        socket._fileobject.default_bufsize = self.network_chunk_size

        # Provide further setup specific to an object server implementation.
        self.setup(conf)

    def setup(self, conf):
        """
        Implementation specific setup. This method is called at the very end
        by the constructor to allow a specific implementation to modify
        existing attributes or add its own attributes.

        :param conf: WSGI configuration parameter
        """

        # Common on-disk hierarchy shared across account, container and object
        # servers.
        self._diskfile_mgr = DiskFileManager(conf, self.logger)
        # This is populated by global_conf_callback way below as the semaphore
        # is shared by all workers.
        if "replication_semaphore" in conf:
            # The value was put in a list so it could get past paste
            self.replication_semaphore = conf["replication_semaphore"][0]
        else:
            self.replication_semaphore = None
        self.replication_failure_threshold = int(conf.get("replication_failure_threshold") or 100)
        self.replication_failure_ratio = float(conf.get("replication_failure_ratio") or 1.0)

    def get_diskfile(self, device, partition, account, container, obj, policy_idx, **kwargs):
        """
        Utility method for instantiating a DiskFile object supporting a given
        REST API.

        An implementation of the object server that wants to use a different
        DiskFile class would simply over-ride this method to provide that
        behavior.
        """
        return self._diskfile_mgr.get_diskfile(device, partition, account, container, obj, policy_idx, **kwargs)

    def async_update(
        self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice, policy_index
    ):
        """
        Sends or saves an async update.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param host: host that the container is on
        :param partition: partition that the container is on
        :param contdevice: device name that the container is on
        :param headers_out: dictionary of headers to send in the container
                            request
        :param objdevice: device name that the object is in
        :param policy_index: the associated storage policy index
        """
        headers_out["user-agent"] = "object-server %s" % os.getpid()
        full_path = "/%s/%s/%s" % (account, container, obj)
        if all([host, partition, contdevice]):
            try:
                with ConnectionTimeout(self.conn_timeout):
                    ip, port = host.rsplit(":", 1)
                    conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out)
                with Timeout(self.node_timeout):
                    response = conn.getresponse()
                    response.read()
                    if is_success(response.status):
                        return
                    else:
                        self.logger.error(
                            _(
                                "ERROR Container update failed "
                                "(saving for async update later): %(status)d "
                                "response from %(ip)s:%(port)s/%(dev)s"
                            ),
                            {"status": response.status, "ip": ip, "port": port, "dev": contdevice},
                        )
            except (Exception, Timeout):
                self.logger.exception(
                    _("ERROR container update failed with " "%(ip)s:%(port)s/%(dev)s (saving for async update later)"),
                    {"ip": ip, "port": port, "dev": contdevice},
                )
        data = {"op": op, "account": account, "container": container, "obj": obj, "headers": headers_out}
        timestamp = headers_out["x-timestamp"]
        self._diskfile_mgr.pickle_async_update(objdevice, account, container, obj, data, timestamp, policy_index)

    def container_update(self, op, account, container, obj, request, headers_out, objdevice, policy_idx):
        """
        Update the container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request object driving the update
        :param headers_out: dictionary of headers to send in the container
                            request(s)
        :param objdevice: device name that the object is in
        """
        headers_in = request.headers
        conthosts = [h.strip() for h in headers_in.get("X-Container-Host", "").split(",")]
        contdevices = [d.strip() for d in headers_in.get("X-Container-Device", "").split(",")]
        contpartition = headers_in.get("X-Container-Partition", "")

        if len(conthosts) != len(contdevices):
            # This shouldn't happen unless there's a bug in the proxy,
            # but if there is, we want to know about it.
            self.logger.error(
                _(
                    "ERROR Container update failed: different "
                    "numbers of hosts and devices in request: "
                    '"%s" vs "%s"'
                )
                % (headers_in.get("X-Container-Host", ""), headers_in.get("X-Container-Device", ""))
            )
            return

        if contpartition:
            updates = zip(conthosts, contdevices)
        else:
            updates = []

        headers_out["x-trans-id"] = headers_in.get("x-trans-id", "-")
        headers_out["referer"] = request.as_referer()
        headers_out["X-Backend-Storage-Policy-Index"] = policy_idx
        for conthost, contdevice in updates:
            self.async_update(
                op, account, container, obj, conthost, contpartition, contdevice, headers_out, objdevice, policy_idx
            )

    def delete_at_update(self, op, delete_at, account, container, obj, request, objdevice, policy_index):
        """
        Update the expiring objects container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param delete_at: scheduled delete in UNIX seconds, int
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request driving the update
        :param objdevice: device name that the object is in
        :param policy_index: the policy index to be used for tmp dir
        """
        if config_true_value(request.headers.get("x-backend-replication", "f")):
            return
        delete_at = normalize_delete_at_timestamp(delete_at)
        updates = [(None, None)]

        partition = None
        hosts = contdevices = [None]
        headers_in = request.headers
        headers_out = HeaderKeyDict(
            {
                # system accounts are always Policy-0
                "X-Backend-Storage-Policy-Index": 0,
                "x-timestamp": request.timestamp.internal,
                "x-trans-id": headers_in.get("x-trans-id", "-"),
                "referer": request.as_referer(),
            }
        )
        if op != "DELETE":
            delete_at_container = headers_in.get("X-Delete-At-Container", None)
            if not delete_at_container:
                self.logger.warning(
                    "X-Delete-At-Container header must be specified for "
                    "expiring objects background %s to work properly. Making "
                    "best guess as to the container name for now." % op
                )
                # TODO(gholt): In a future release, change the above warning to
                # a raised exception and remove the guess code below.
                delete_at_container = get_expirer_container(
                    delete_at, self.expiring_objects_container_divisor, account, container, obj
                )
            partition = headers_in.get("X-Delete-At-Partition", None)
            hosts = headers_in.get("X-Delete-At-Host", "")
            contdevices = headers_in.get("X-Delete-At-Device", "")
            updates = [
                upd
                for upd in zip((h.strip() for h in hosts.split(",")), (c.strip() for c in contdevices.split(",")))
                if all(upd) and partition
            ]
            if not updates:
                updates = [(None, None)]
            headers_out["x-size"] = "0"
            headers_out["x-content-type"] = "text/plain"
            headers_out["x-etag"] = "d41d8cd98f00b204e9800998ecf8427e"
        else:
            # DELETEs of old expiration data have no way of knowing what the
            # old X-Delete-At-Container was at the time of the initial setting
            # of the data, so a best guess is made here.
            # Worst case is a DELETE is issued now for something that doesn't
            # exist there and the original data is left where it is, where
            # it will be ignored when the expirer eventually tries to issue the
            # object DELETE later since the X-Delete-At value won't match up.
            delete_at_container = get_expirer_container(
                delete_at, self.expiring_objects_container_divisor, account, container, obj
            )
        delete_at_container = normalize_delete_at_timestamp(delete_at_container)

        for host, contdevice in updates:
            self.async_update(
                op,
                self.expiring_objects_account,
                delete_at_container,
                "%s-%s/%s/%s" % (delete_at, account, container, obj),
                host,
                partition,
                contdevice,
                headers_out,
                objdevice,
                policy_index,
            )

    @public
    @timing_stats()
    def POST(self, request):
        """Handle HTTP POST requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        new_delete_at = int(request.headers.get("X-Delete-At") or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body="X-Delete-At in past", request=request, content_type="text/plain")
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj, policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request)
        orig_timestamp = Timestamp(orig_metadata.get("X-Timestamp", 0))
        if orig_timestamp >= req_timestamp:
            return HTTPConflict(request=request, headers={"X-Backend-Timestamp": orig_timestamp.internal})
        metadata = {"X-Timestamp": req_timestamp.internal}
        metadata.update(val for val in request.headers.iteritems() if is_user_meta("object", val[0]))
        for header_key in self.allowed_headers:
            if header_key in request.headers:
                header_caps = header_key.title()
                metadata[header_caps] = request.headers[header_key]
        orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update("PUT", new_delete_at, account, container, obj, request, device, policy_idx)
            if orig_delete_at:
                self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device, policy_idx)
        try:
            disk_file.write_metadata(metadata)
        except (DiskFileXattrNotSupported, DiskFileNoSpace):
            return HTTPInsufficientStorage(drive=device, request=request)
        return HTTPAccepted(request=request)

    @public
    @timing_stats()
    def PUT(self, request):
        """Handle HTTP PUT requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        error_response = check_object_creation(request, obj)
        if error_response:
            return error_response
        new_delete_at = int(request.headers.get("X-Delete-At") or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body="X-Delete-At in past", request=request, content_type="text/plain")
        try:
            fsize = request.message_length()
        except ValueError as e:
            return HTTPBadRequest(body=str(e), request=request, content_type="text/plain")
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj, policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_metadata = {}

        # Checks for If-None-Match
        if request.if_none_match is not None and orig_metadata:
            if "*" in request.if_none_match:
                # File exists already so return 412
                return HTTPPreconditionFailed(request=request)
            if orig_metadata.get("ETag") in request.if_none_match:
                # The current ETag matches, so return 412
                return HTTPPreconditionFailed(request=request)

        orig_timestamp = Timestamp(orig_metadata.get("X-Timestamp", 0))
        if orig_timestamp >= req_timestamp:
            return HTTPConflict(request=request, headers={"X-Backend-Timestamp": orig_timestamp.internal})
        orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0)
        upload_expiration = time.time() + self.max_upload_time
        etag = md5()
        elapsed_time = 0
        try:
            with disk_file.create(size=fsize) as writer:
                upload_size = 0

                def timeout_reader():
                    with ChunkReadTimeout(self.client_timeout):
                        return request.environ["wsgi.input"].read(self.network_chunk_size)

                try:
                    for chunk in iter(lambda: timeout_reader(), ""):
                        start_time = time.time()
                        if start_time > upload_expiration:
                            self.logger.increment("PUT.timeouts")
                            return HTTPRequestTimeout(request=request)
                        etag.update(chunk)
                        upload_size = writer.write(chunk)
                        elapsed_time += time.time() - start_time
                except ChunkReadTimeout:
                    return HTTPRequestTimeout(request=request)
                if upload_size:
                    self.logger.transfer_rate("PUT." + device + ".timing", elapsed_time, upload_size)
                if fsize is not None and fsize != upload_size:
                    return HTTPClientDisconnect(request=request)
                etag = etag.hexdigest()
                if "etag" in request.headers and request.headers["etag"].lower() != etag:
                    return HTTPUnprocessableEntity(request=request)
                metadata = {
                    "X-Timestamp": request.timestamp.internal,
                    "Content-Type": request.headers["content-type"],
                    "ETag": etag,
                    "Content-Length": str(upload_size),
                }
                metadata.update(val for val in request.headers.iteritems() if is_sys_or_user_meta("object", val[0]))
                headers_to_copy = request.headers.get("X-Backend-Replication-Headers", "").split() + list(
                    self.allowed_headers
                )
                for header_key in headers_to_copy:
                    if header_key in request.headers:
                        header_caps = header_key.title()
                        metadata[header_caps] = request.headers[header_key]
                writer.put(metadata)
        except (DiskFileXattrNotSupported, DiskFileNoSpace):
            return HTTPInsufficientStorage(drive=device, request=request)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update("PUT", new_delete_at, account, container, obj, request, device, policy_idx)
            if orig_delete_at:
                self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device, policy_idx)
        self.container_update(
            "PUT",
            account,
            container,
            obj,
            request,
            HeaderKeyDict(
                {
                    "x-size": metadata["Content-Length"],
                    "x-content-type": metadata["Content-Type"],
                    "x-timestamp": metadata["X-Timestamp"],
                    "x-etag": metadata["ETag"],
                }
            ),
            device,
            policy_idx,
        )
        return HTTPCreated(request=request, etag=etag)

    @public
    @timing_stats()
    def GET(self, request):
        """Handle HTTP GET requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = get_name_and_placement(request, 5, 5, True)
        keep_cache = self.keep_cache_private or (
            "X-Auth-Token" not in request.headers and "X-Storage-Token" not in request.headers
        )
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj, policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            with disk_file.open():
                metadata = disk_file.get_metadata()
                obj_size = int(metadata["Content-Length"])
                file_x_ts = Timestamp(metadata["X-Timestamp"])
                keep_cache = self.keep_cache_private or (
                    "X-Auth-Token" not in request.headers and "X-Storage-Token" not in request.headers
                )
                response = Response(
                    app_iter=disk_file.reader(keep_cache=keep_cache), request=request, conditional_response=True
                )
                response.headers["Content-Type"] = metadata.get("Content-Type", "application/octet-stream")
                for key, value in metadata.iteritems():
                    if is_sys_or_user_meta("object", key) or key.lower() in self.allowed_headers:
                        response.headers[key] = value
                response.etag = metadata["ETag"]
                response.last_modified = math.ceil(float(file_x_ts))
                response.content_length = obj_size
                try:
                    response.content_encoding = metadata["Content-Encoding"]
                except KeyError:
                    pass
                response.headers["X-Timestamp"] = file_x_ts.normal
                response.headers["X-Backend-Timestamp"] = file_x_ts.internal
                resp = request.get_response(response)
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined) as e:
            headers = {}
            if hasattr(e, "timestamp"):
                headers["X-Backend-Timestamp"] = e.timestamp.internal
            resp = HTTPNotFound(request=request, headers=headers, conditional_response=True)
        return resp

    @public
    @timing_stats(sample_rate=0.8)
    def HEAD(self, request):
        """Handle HTTP HEAD requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = get_name_and_placement(request, 5, 5, True)
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj, policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined) as e:
            headers = {}
            if hasattr(e, "timestamp"):
                headers["X-Backend-Timestamp"] = e.timestamp.internal
            return HTTPNotFound(request=request, headers=headers, conditional_response=True)
        response = Response(request=request, conditional_response=True)
        response.headers["Content-Type"] = metadata.get("Content-Type", "application/octet-stream")
        for key, value in metadata.iteritems():
            if is_sys_or_user_meta("object", key) or key.lower() in self.allowed_headers:
                response.headers[key] = value
        response.etag = metadata["ETag"]
        ts = Timestamp(metadata["X-Timestamp"])
        response.last_modified = math.ceil(float(ts))
        # Needed for container sync feature
        response.headers["X-Timestamp"] = ts.normal
        response.headers["X-Backend-Timestamp"] = ts.internal
        response.content_length = int(metadata["Content-Length"])
        try:
            response.content_encoding = metadata["Content-Encoding"]
        except KeyError:
            pass
        return response

    @public
    @timing_stats()
    def DELETE(self, request):
        """Handle HTTP DELETE requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj, policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except DiskFileExpired as e:
            orig_timestamp = e.timestamp
            orig_metadata = e.metadata
            response_class = HTTPNotFound
        except DiskFileDeleted as e:
            orig_timestamp = e.timestamp
            orig_metadata = {}
            response_class = HTTPNotFound
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_timestamp = 0
            orig_metadata = {}
            response_class = HTTPNotFound
        else:
            orig_timestamp = Timestamp(orig_metadata.get("X-Timestamp", 0))
            if orig_timestamp < req_timestamp:
                response_class = HTTPNoContent
            else:
                response_class = HTTPConflict
        response_timestamp = max(orig_timestamp, req_timestamp)
        orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0)
        try:
            req_if_delete_at_val = request.headers["x-if-delete-at"]
            req_if_delete_at = int(req_if_delete_at_val)
        except KeyError:
            pass
        except ValueError:
            return HTTPBadRequest(request=request, body="Bad X-If-Delete-At header value")
        else:
            # request includes x-if-delete-at; we must not place a tombstone
            # if we can not verify the x-if-delete-at time
            if not orig_timestamp:
                # no object found at all
                return HTTPNotFound()
            if orig_delete_at != req_if_delete_at:
                return HTTPPreconditionFailed(request=request, body="X-If-Delete-At and X-Delete-At do not match")
            else:
                # differentiate success from no object at all
                response_class = HTTPNoContent
        if orig_delete_at:
            self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device, policy_idx)
        if orig_timestamp < req_timestamp:
            disk_file.delete(req_timestamp)
            self.container_update(
                "DELETE",
                account,
                container,
                obj,
                request,
                HeaderKeyDict({"x-timestamp": req_timestamp.internal}),
                device,
                policy_idx,
            )
        return response_class(request=request, headers={"X-Backend-Timestamp": response_timestamp.internal})

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATE(self, request):
        """
        Handle REPLICATE requests for the Swift Object Server.  This is used
        by the object replicator to get hashes for directories.
        """
        device, partition, suffix, policy_idx = get_name_and_placement(request, 2, 3, True)
        try:
            hashes = self._diskfile_mgr.get_hashes(device, partition, suffix, policy_idx)
        except DiskFileDeviceUnavailable:
            resp = HTTPInsufficientStorage(drive=device, request=request)
        else:
            resp = Response(body=pickle.dumps(hashes))
        return resp

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATION(self, request):
        return Response(app_iter=ssync_receiver.Receiver(self, request)())

    def __call__(self, env, start_response):
        """WSGI Application entry point for the Swift Object Server."""
        start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get("x-trans-id", None)

        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body="Invalid UTF8 or contains NULL")
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    if req.method not in self.allowed_methods:
                        raise AttributeError("Not allowed method.")
                except AttributeError:
                    res = HTTPMethodNotAllowed()
                else:
                    method = getattr(self, req.method)
                    res = method(req)
            except DiskFileCollision:
                res = HTTPForbidden(request=req)
            except HTTPException as error_response:
                res = error_response
            except (Exception, Timeout):
                self.logger.exception(
                    _("ERROR __call__ error with %(method)s" " %(path)s "), {"method": req.method, "path": req.path}
                )
                res = HTTPInternalServerError(body=traceback.format_exc())
        trans_time = time.time() - start_time
        if self.log_requests:
            log_line = get_log_line(req, res, trans_time, "")
            if req.method in ("REPLICATE", "REPLICATION") or "X-Backend-Replication" in req.headers:
                self.logger.debug(log_line)
            else:
                self.logger.info(log_line)
        if req.method in ("PUT", "DELETE"):
            slow = self.slow - trans_time
            if slow > 0:
                sleep(slow)

        # To be able to zero-copy send the object, we need a few things.
        # First, we have to be responding successfully to a GET, or else we're
        # not sending the object. Second, we have to be able to extract the
        # socket file descriptor from the WSGI input object. Third, the
        # diskfile has to support zero-copy send.
        #
        # There's a good chance that this could work for 206 responses too,
        # but the common case is sending the whole object, so we'll start
        # there.
        if req.method == "GET" and res.status_int == 200 and isinstance(env["wsgi.input"], wsgi.Input):
            app_iter = getattr(res, "app_iter", None)
            checker = getattr(app_iter, "can_zero_copy_send", None)
            if checker and checker():
                # For any kind of zero-copy thing like sendfile or splice, we
                # need the file descriptor. Eventlet doesn't provide a clean
                # way of getting that, so we resort to this.
                wsock = env["wsgi.input"].get_socket()
                wsockfd = wsock.fileno()

                # Don't call zero_copy_send() until after we force the HTTP
                # headers out of Eventlet and into the socket.
                def zero_copy_iter():
                    # If possible, set TCP_CORK so that headers don't
                    # immediately go on the wire, but instead, wait for some
                    # response body to make the TCP frames as large as
                    # possible (and hence as few packets as possible).
                    #
                    # On non-Linux systems, we might consider TCP_NODELAY, but
                    # since the only known zero-copy-capable diskfile uses
                    # Linux-specific syscalls, we'll defer that work until
                    # someone needs it.
                    if hasattr(socket, "TCP_CORK"):
                        wsock.setsockopt(socket.IPPROTO_TCP, socket.TCP_CORK, 1)
                    yield EventletPlungerString()
                    try:
                        app_iter.zero_copy_send(wsockfd)
                    except Exception:
                        self.logger.exception("zero_copy_send() blew up")
                        raise
                    yield ""

                # Get headers ready to go out
                res(env, start_response)
                return zero_copy_iter()
            else:
                return res(env, start_response)
        else:
            return res(env, start_response)
Ejemplo n.º 36
0
class ObjectController(object):
    """Implements the WSGI application for the Swift Object Server."""
    def __init__(self, conf):
        """
        Creates a new WSGI application for the Swift Object Server. An
        example configuration is given at
        <source-dir>/etc/object-server.conf-sample or
        /etc/swift/object-server.conf-sample.
        """
        self.logger = get_logger(conf, log_route='object-server')
        self.node_timeout = int(conf.get('node_timeout', 3))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.log_requests = config_true_value(conf.get('log_requests', 'true'))
        self.max_upload_time = int(conf.get('max_upload_time', 86400))
        self.slow = int(conf.get('slow', 0))
        self.keep_cache_private = \
            config_true_value(conf.get('keep_cache_private', 'false'))
        replication_server = conf.get('replication_server', None)
        if replication_server is not None:
            replication_server = config_true_value(replication_server)
        self.replication_server = replication_server

        default_allowed_headers = '''
            content-disposition,
            content-encoding,
            x-delete-at,
            x-object-manifest,
            x-static-large-object,
        '''
        extra_allowed_headers = [
            header.strip().lower() for header in conf.get(
                'allowed_headers', default_allowed_headers).split(',')
            if header.strip()
        ]
        self.allowed_headers = set()
        for header in extra_allowed_headers:
            if header not in DATAFILE_SYSTEM_META:
                self.allowed_headers.add(header)
        self.expiring_objects_account = \
            (conf.get('auto_create_account_prefix') or '.') + \
            'expiring_objects'
        self.expiring_objects_container_divisor = \
            int(conf.get('expiring_objects_container_divisor') or 86400)
        # Initialization was successful, so now apply the network chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because the primary motivation for this is to optimize how data
        # is written back to the proxy server, we could use the value from the
        # disk_chunk_size parameter. However, it affects all created sockets
        # using this class so we have chosen to tie it to the
        # network_chunk_size parameter value instead.
        socket._fileobject.default_bufsize = self.network_chunk_size

        # Provide further setup sepecific to an object server implemenation.
        self.setup(conf)

    def setup(self, conf):
        """
        Implementation specific setup. This method is called at the very end
        by the constructor to allow a specific implementation to modify
        existing attributes or add its own attributes.

        :param conf: WSGI configuration parameter
        """

        # Common on-disk hierarchy shared across account, container and object
        # servers.
        self._diskfile_mgr = DiskFileManager(conf, self.logger)

    def get_diskfile(self, device, partition, account, container, obj,
                     **kwargs):
        """
        Utility method for instantiating a DiskFile object supporting a given
        REST API.

        An implementation of the object server that wants to use a different
        DiskFile class would simply over-ride this method to provide that
        behavior.
        """
        return self._diskfile_mgr.get_diskfile(device, partition, account,
                                               container, obj, **kwargs)

    def async_update(self, op, account, container, obj, host, partition,
                     contdevice, headers_out, objdevice):
        """
        Sends or saves an async update.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param host: host that the container is on
        :param partition: partition that the container is on
        :param contdevice: device name that the container is on
        :param headers_out: dictionary of headers to send in the container
                            request
        :param objdevice: device name that the object is in
        """
        headers_out['user-agent'] = 'obj-server %s' % os.getpid()
        full_path = '/%s/%s/%s' % (account, container, obj)
        if all([host, partition, contdevice]):
            try:
                with ConnectionTimeout(self.conn_timeout):
                    ip, port = host.rsplit(':', 1)
                    conn = http_connect(ip, port, contdevice, partition, op,
                                        full_path, headers_out)
                with Timeout(self.node_timeout):
                    response = conn.getresponse()
                    response.read()
                    if is_success(response.status):
                        return
                    else:
                        self.logger.error(
                            _('ERROR Container update failed '
                              '(saving for async update later): %(status)d '
                              'response from %(ip)s:%(port)s/%(dev)s'), {
                                  'status': response.status,
                                  'ip': ip,
                                  'port': port,
                                  'dev': contdevice
                              })
            except (Exception, Timeout):
                self.logger.exception(
                    _('ERROR container update failed with '
                      '%(ip)s:%(port)s/%(dev)s (saving for async update later)'
                      ), {
                          'ip': ip,
                          'port': port,
                          'dev': contdevice
                      })
        data = {
            'op': op,
            'account': account,
            'container': container,
            'obj': obj,
            'headers': headers_out
        }
        timestamp = headers_out['x-timestamp']
        self._diskfile_mgr.pickle_async_update(objdevice, account, container,
                                               obj, data, timestamp)

    def container_update(self, op, account, container, obj, request,
                         headers_out, objdevice):
        """
        Update the container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request object driving the update
        :param headers_out: dictionary of headers to send in the container
                            request(s)
        :param objdevice: device name that the object is in
        """
        headers_in = request.headers
        conthosts = [
            h.strip()
            for h in headers_in.get('X-Container-Host', '').split(',')
        ]
        contdevices = [
            d.strip()
            for d in headers_in.get('X-Container-Device', '').split(',')
        ]
        contpartition = headers_in.get('X-Container-Partition', '')

        if len(conthosts) != len(contdevices):
            # This shouldn't happen unless there's a bug in the proxy,
            # but if there is, we want to know about it.
            self.logger.error(
                _('ERROR Container update failed: different  '
                  'numbers of hosts and devices in request: '
                  '"%s" vs "%s"') % (headers_in.get('X-Container-Host', ''),
                                     headers_in.get('X-Container-Device', '')))
            return

        if contpartition:
            updates = zip(conthosts, contdevices)
        else:
            updates = []

        headers_out['x-trans-id'] = headers_in.get('x-trans-id', '-')
        headers_out['referer'] = request.as_referer()
        for conthost, contdevice in updates:
            self.async_update(op, account, container, obj, conthost,
                              contpartition, contdevice, headers_out,
                              objdevice)

    def delete_at_update(self, op, delete_at, account, container, obj, request,
                         objdevice):
        """
        Update the expiring objects container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param delete_at: scheduled delete in UNIX seconds, int
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request driving the update
        :param objdevice: device name that the object is in
        """
        # Quick cap that will work from now until Sat Nov 20 17:46:39 2286
        # At that time, Swift will be so popular and pervasive I will have
        # created income for thousands of future programmers.
        delete_at = max(min(delete_at, 9999999999), 0)
        updates = [(None, None)]

        partition = None
        hosts = contdevices = [None]
        headers_in = request.headers
        headers_out = HeaderKeyDict({
            'x-timestamp':
            headers_in['x-timestamp'],
            'x-trans-id':
            headers_in.get('x-trans-id', '-'),
            'referer':
            request.as_referer()
        })
        if op != 'DELETE':
            delete_at_container = headers_in.get('X-Delete-At-Container', None)
            if not delete_at_container:
                self.logger.warning(
                    'X-Delete-At-Container header must be specified for '
                    'expiring objects background %s to work properly. Making '
                    'best guess as to the container name for now.' % op)
                # TODO(gholt): In a future release, change the above warning to
                # a raised exception and remove the guess code below.
                delete_at_container = str(
                    delete_at / self.expiring_objects_container_divisor *
                    self.expiring_objects_container_divisor)
            partition = headers_in.get('X-Delete-At-Partition', None)
            hosts = headers_in.get('X-Delete-At-Host', '')
            contdevices = headers_in.get('X-Delete-At-Device', '')
            updates = [
                upd for upd in zip((h.strip() for h in hosts.split(',')), (
                    c.strip() for c in contdevices.split(',')))
                if all(upd) and partition
            ]
            if not updates:
                updates = [(None, None)]
            headers_out['x-size'] = '0'
            headers_out['x-content-type'] = 'text/plain'
            headers_out['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e'
        else:
            # DELETEs of old expiration data have no way of knowing what the
            # old X-Delete-At-Container was at the time of the initial setting
            # of the data, so a best guess is made here.
            # Worst case is a DELETE is issued now for something that doesn't
            # exist there and the original data is left where it is, where
            # it will be ignored when the expirer eventually tries to issue the
            # object DELETE later since the X-Delete-At value won't match up.
            delete_at_container = str(delete_at /
                                      self.expiring_objects_container_divisor *
                                      self.expiring_objects_container_divisor)

        for host, contdevice in updates:
            self.async_update(
                op, self.expiring_objects_account, delete_at_container,
                '%s-%s/%s/%s' % (delete_at, account, container, obj), host,
                partition, contdevice, headers_out, objdevice)

    @public
    @timing_stats()
    def POST(self, request):
        """Handle HTTP POST requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)

        if 'x-timestamp' not in request.headers or \
                not check_float(request.headers['x-timestamp']):
            return HTTPBadRequest(body='Missing timestamp',
                                  request=request,
                                  content_type='text/plain')
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body='X-Delete-At in past',
                                  request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(device, partition, account,
                                          container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request)
        orig_timestamp = orig_metadata.get('X-Timestamp', '0')
        if orig_timestamp >= request.headers['x-timestamp']:
            return HTTPConflict(request=request)
        metadata = {'X-Timestamp': request.headers['x-timestamp']}
        metadata.update(val for val in request.headers.iteritems()
                        if val[0].startswith('X-Object-Meta-'))
        for header_key in self.allowed_headers:
            if header_key in request.headers:
                header_caps = header_key.title()
                metadata[header_caps] = request.headers[header_key]
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update('PUT', new_delete_at, account, container,
                                      obj, request, device)
            if orig_delete_at:
                self.delete_at_update('DELETE', orig_delete_at, account,
                                      container, obj, request, device)
        try:
            disk_file.write_metadata(metadata)
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request)
        else:
            return HTTPAccepted(request=request)

    @public
    @timing_stats()
    def PUT(self, request):
        """Handle HTTP PUT requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)

        if 'x-timestamp' not in request.headers or \
                not check_float(request.headers['x-timestamp']):
            return HTTPBadRequest(body='Missing timestamp',
                                  request=request,
                                  content_type='text/plain')
        error_response = check_object_creation(request, obj)
        if error_response:
            return error_response
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body='X-Delete-At in past',
                                  request=request,
                                  content_type='text/plain')
        try:
            fsize = request.message_length()
        except ValueError as e:
            return HTTPBadRequest(body=str(e),
                                  request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(device, partition, account,
                                          container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_metadata = {}
        orig_timestamp = orig_metadata.get('X-Timestamp')
        if orig_timestamp and orig_timestamp >= request.headers['x-timestamp']:
            return HTTPConflict(request=request)
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        upload_expiration = time.time() + self.max_upload_time
        etag = md5()
        elapsed_time = 0
        try:
            with disk_file.create(size=fsize) as writer:
                upload_size = 0
                reader = request.environ['wsgi.input'].read
                for chunk in iter(lambda: reader(self.network_chunk_size), ''):
                    start_time = time.time()
                    if start_time > upload_expiration:
                        self.logger.increment('PUT.timeouts')
                        return HTTPRequestTimeout(request=request)
                    etag.update(chunk)
                    upload_size = writer.write(chunk)
                    sleep()
                    elapsed_time += time.time() - start_time
                if upload_size:
                    self.logger.transfer_rate('PUT.' + device + '.timing',
                                              elapsed_time, upload_size)
                if fsize is not None and fsize != upload_size:
                    return HTTPClientDisconnect(request=request)
                etag = etag.hexdigest()
                if 'etag' in request.headers and \
                        request.headers['etag'].lower() != etag:
                    return HTTPUnprocessableEntity(request=request)
                metadata = {
                    'X-Timestamp': request.headers['x-timestamp'],
                    'Content-Type': request.headers['content-type'],
                    'ETag': etag,
                    'Content-Length': str(upload_size),
                }
                metadata.update(val for val in request.headers.iteritems()
                                if val[0].lower().startswith('x-object-meta-')
                                and len(val[0]) > 14)
                for header_key in self.allowed_headers:
                    if header_key in request.headers:
                        header_caps = header_key.title()
                        metadata[header_caps] = request.headers[header_key]
                writer.put(metadata)
        except DiskFileNoSpace:
            return HTTPInsufficientStorage(drive=device, request=request)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update('PUT', new_delete_at, account, container,
                                      obj, request, device)
            if orig_delete_at:
                self.delete_at_update('DELETE', orig_delete_at, account,
                                      container, obj, request, device)
        if not orig_timestamp or \
                orig_timestamp < request.headers['x-timestamp']:
            self.container_update(
                'PUT', account, container, obj, request,
                HeaderKeyDict({
                    'x-size': metadata['Content-Length'],
                    'x-content-type': metadata['Content-Type'],
                    'x-timestamp': metadata['X-Timestamp'],
                    'x-etag': metadata['ETag']
                }), device)
        return HTTPCreated(request=request, etag=etag)

    @public
    @timing_stats()
    def GET(self, request):
        """Handle HTTP GET requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)
        keep_cache = self.keep_cache_private or (
            'X-Auth-Token' not in request.headers
            and 'X-Storage-Token' not in request.headers)
        try:
            disk_file = self.get_diskfile(device, partition, account,
                                          container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            with disk_file.open():
                metadata = disk_file.get_metadata()
                obj_size = int(metadata['Content-Length'])
                if request.headers.get('if-match') not in (None, '*') and \
                        metadata['ETag'] not in request.if_match:
                    return HTTPPreconditionFailed(request=request)
                if request.headers.get('if-none-match') is not None:
                    if metadata['ETag'] in request.if_none_match:
                        resp = HTTPNotModified(request=request)
                        resp.etag = metadata['ETag']
                        return resp
                file_x_ts = metadata['X-Timestamp']
                file_x_ts_flt = float(file_x_ts)
                try:
                    if_unmodified_since = request.if_unmodified_since
                except (OverflowError, ValueError):
                    # catches timestamps before the epoch
                    return HTTPPreconditionFailed(request=request)
                file_x_ts_utc = datetime.fromtimestamp(file_x_ts_flt, UTC)
                if if_unmodified_since and file_x_ts_utc > if_unmodified_since:
                    return HTTPPreconditionFailed(request=request)
                try:
                    if_modified_since = request.if_modified_since
                except (OverflowError, ValueError):
                    # catches timestamps before the epoch
                    return HTTPPreconditionFailed(request=request)
                if if_modified_since and file_x_ts_utc < if_modified_since:
                    return HTTPNotModified(request=request)
                keep_cache = (self.keep_cache_private or
                              ('X-Auth-Token' not in request.headers
                               and 'X-Storage-Token' not in request.headers))
                response = Response(app_iter=disk_file.reader(
                    iter_hook=sleep, keep_cache=keep_cache),
                                    request=request,
                                    conditional_response=True)
                response.headers['Content-Type'] = metadata.get(
                    'Content-Type', 'application/octet-stream')
                for key, value in metadata.iteritems():
                    if key.lower().startswith('x-object-meta-') or \
                            key.lower() in self.allowed_headers:
                        response.headers[key] = value
                response.etag = metadata['ETag']
                response.last_modified = file_x_ts_flt
                response.content_length = obj_size
                try:
                    response.content_encoding = metadata['Content-Encoding']
                except KeyError:
                    pass
                response.headers['X-Timestamp'] = file_x_ts
                resp = request.get_response(response)
        except DiskFileNotExist:
            if request.headers.get('if-match') == '*':
                resp = HTTPPreconditionFailed(request=request)
            else:
                resp = HTTPNotFound(request=request)
        except DiskFileQuarantined:
            resp = HTTPNotFound(request=request)
        return resp

    @public
    @timing_stats(sample_rate=0.8)
    def HEAD(self, request):
        """Handle HTTP HEAD requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)
        try:
            disk_file = self.get_diskfile(device, partition, account,
                                          container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request)
        response = Response(request=request, conditional_response=True)
        response.headers['Content-Type'] = metadata.get(
            'Content-Type', 'application/octet-stream')
        for key, value in metadata.iteritems():
            if key.lower().startswith('x-object-meta-') or \
                    key.lower() in self.allowed_headers:
                response.headers[key] = value
        response.etag = metadata['ETag']
        ts = metadata['X-Timestamp']
        response.last_modified = float(ts)
        # Needed for container sync feature
        response.headers['X-Timestamp'] = ts
        response.content_length = int(metadata['Content-Length'])
        try:
            response.content_encoding = metadata['Content-Encoding']
        except KeyError:
            pass
        return response

    @public
    @timing_stats()
    def DELETE(self, request):
        """Handle HTTP DELETE requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)
        if 'x-timestamp' not in request.headers or \
                not check_float(request.headers['x-timestamp']):
            return HTTPBadRequest(body='Missing timestamp',
                                  request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(device, partition, account,
                                          container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileDeleted as e:
            orig_timestamp = e.timestamp
            orig_metadata = {}
            response_class = HTTPNotFound
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_timestamp = 0
            orig_metadata = {}
            response_class = HTTPNotFound
        else:
            orig_timestamp = orig_metadata.get('X-Timestamp', 0)
            if orig_timestamp < request.headers['x-timestamp']:
                response_class = HTTPNoContent
            else:
                response_class = HTTPConflict
        if 'x-if-delete-at' in request.headers and \
                int(request.headers['x-if-delete-at']) != \
                int(orig_metadata.get('X-Delete-At') or 0):
            return HTTPPreconditionFailed(
                request=request,
                body='X-If-Delete-At and X-Delete-At do not match')
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        if orig_delete_at:
            self.delete_at_update('DELETE', orig_delete_at, account, container,
                                  obj, request, device)
        req_timestamp = request.headers['X-Timestamp']
        if orig_timestamp < req_timestamp:
            disk_file.delete(req_timestamp)
            self.container_update(
                'DELETE', account, container, obj, request,
                HeaderKeyDict({'x-timestamp': req_timestamp}), device)
        return response_class(request=request)

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATE(self, request):
        """
        Handle REPLICATE requests for the Swift Object Server.  This is used
        by the object replicator to get hashes for directories.
        """
        device, partition, suffix = split_and_validate_path(
            request, 2, 3, True)
        try:
            hashes = self._diskfile_mgr.get_hashes(device, partition, suffix)
        except DiskFileDeviceUnavailable:
            resp = HTTPInsufficientStorage(drive=device, request=request)
        else:
            resp = Response(body=pickle.dumps(hashes))
        return resp

    def __call__(self, env, start_response):
        """WSGI Application entry point for the Swift Object Server."""
        start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get('x-trans-id', None)

        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL')
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    method = getattr(self, req.method)
                    getattr(method, 'publicly_accessible')
                    replication_method = getattr(method, 'replication', False)
                    if (self.replication_server is not None
                            and self.replication_server != replication_method):
                        raise AttributeError('Not allowed method.')
                except AttributeError:
                    res = HTTPMethodNotAllowed()
                else:
                    res = method(req)
            except DiskFileCollision:
                res = HTTPForbidden(request=req)
            except HTTPException as error_response:
                res = error_response
            except (Exception, Timeout):
                self.logger.exception(
                    _('ERROR __call__ error with %(method)s'
                      ' %(path)s '), {
                          'method': req.method,
                          'path': req.path
                      })
                res = HTTPInternalServerError(body=traceback.format_exc())
        trans_time = time.time() - start_time
        if self.log_requests:
            log_line = '%s - - [%s] "%s %s" %s %s "%s" "%s" "%s" %.4f' % (
                req.remote_addr,
                time.strftime('%d/%b/%Y:%H:%M:%S +0000', time.gmtime()),
                req.method, req.path, res.status.split()[0], res.content_length
                or '-', req.referer or '-', req.headers.get(
                    'x-trans-id', '-'), req.user_agent or '-', trans_time)
            if req.method == 'REPLICATE':
                self.logger.debug(log_line)
            else:
                self.logger.info(log_line)
        if req.method in ('PUT', 'DELETE'):
            slow = self.slow - trans_time
            if slow > 0:
                sleep(slow)
        return res(env, start_response)
Ejemplo n.º 37
0
class ObjectController(object):
    """Implements the WSGI application for the Swift Object Server."""

    def __init__(self, conf, logger=None):
        """
        Creates a new WSGI application for the Swift Object Server. An
        example configuration is given at
        <source-dir>/etc/object-server.conf-sample or
        /etc/swift/object-server.conf-sample.
        """
        self.logger = logger or get_logger(conf, log_route='object-server')
        self.node_timeout = int(conf.get('node_timeout', 3))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.client_timeout = int(conf.get('client_timeout', 60))
        self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536))
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.log_requests = config_true_value(conf.get('log_requests', 'true'))
        self.max_upload_time = int(conf.get('max_upload_time', 86400))
        self.slow = int(conf.get('slow', 0))
        self.keep_cache_private = \
            config_true_value(conf.get('keep_cache_private', 'false'))
        replication_server = conf.get('replication_server', None)
        if replication_server is not None:
            replication_server = config_true_value(replication_server)
        self.replication_server = replication_server

        default_allowed_headers = '''
            content-disposition,
            content-encoding,
            x-delete-at,
            x-object-manifest,
            x-static-large-object,
        '''
        extra_allowed_headers = [
            header.strip().lower() for header in conf.get(
                'allowed_headers', default_allowed_headers).split(',')
            if header.strip()
        ]
        self.allowed_headers = set()
        for header in extra_allowed_headers:
            if header not in DATAFILE_SYSTEM_META:
                self.allowed_headers.add(header)
        self.expiring_objects_account = \
            (conf.get('auto_create_account_prefix') or '.') + \
            (conf.get('expiring_objects_account_name') or 'expiring_objects')
        self.expiring_objects_container_divisor = \
            int(conf.get('expiring_objects_container_divisor') or 86400)
        # Initialization was successful, so now apply the network chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because the primary motivation for this is to optimize how data
        # is written back to the proxy server, we could use the value from the
        # disk_chunk_size parameter. However, it affects all created sockets
        # using this class so we have chosen to tie it to the
        # network_chunk_size parameter value instead.
        socket._fileobject.default_bufsize = self.network_chunk_size

        # Provide further setup sepecific to an object server implemenation.
        self.setup(conf)

    def setup(self, conf):
        """
        Implementation specific setup. This method is called at the very end
        by the constructor to allow a specific implementation to modify
        existing attributes or add its own attributes.

        :param conf: WSGI configuration parameter
        """

        # Common on-disk hierarchy shared across account, container and object
        # servers.
        self._diskfile_mgr = DiskFileManager(conf, self.logger)
        # This is populated by global_conf_callback way below as the semaphore
        # is shared by all workers.
        if 'replication_semaphore' in conf:
            # The value was put in a list so it could get past paste
            self.replication_semaphore = conf['replication_semaphore'][0]
        else:
            self.replication_semaphore = None
        self.replication_failure_threshold = int(
            conf.get('replication_failure_threshold') or 100)
        self.replication_failure_ratio = float(
            conf.get('replication_failure_ratio') or 1.0)

    def get_diskfile(self, device, partition, account, container, obj,
                     **kwargs):
        """
        Utility method for instantiating a DiskFile object supporting a given
        REST API.

        An implementation of the object server that wants to use a different
        DiskFile class would simply over-ride this method to provide that
        behavior.
        """
        return self._diskfile_mgr.get_diskfile(
            device, partition, account, container, obj, **kwargs)

    def async_update(self, op, account, container, obj, host, partition,
                     contdevice, headers_out, objdevice):
        """
        Sends or saves an async update.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param host: host that the container is on
        :param partition: partition that the container is on
        :param contdevice: device name that the container is on
        :param headers_out: dictionary of headers to send in the container
                            request
        :param objdevice: device name that the object is in
        """
        headers_out['user-agent'] = 'obj-server %s' % os.getpid()
        full_path = '/%s/%s/%s' % (account, container, obj)
        if all([host, partition, contdevice]):
            try:
                with ConnectionTimeout(self.conn_timeout):
                    ip, port = host.rsplit(':', 1)
                    conn = http_connect(ip, port, contdevice, partition, op,
                                        full_path, headers_out)
                with Timeout(self.node_timeout):
                    response = conn.getresponse()
                    response.read()
                    if is_success(response.status):
                        return
                    else:
                        self.logger.error(_(
                            'ERROR Container update failed '
                            '(saving for async update later): %(status)d '
                            'response from %(ip)s:%(port)s/%(dev)s'),
                            {'status': response.status, 'ip': ip, 'port': port,
                             'dev': contdevice})
            except (Exception, Timeout):
                self.logger.exception(_(
                    'ERROR container update failed with '
                    '%(ip)s:%(port)s/%(dev)s (saving for async update later)'),
                    {'ip': ip, 'port': port, 'dev': contdevice})
        data = {'op': op, 'account': account, 'container': container,
                'obj': obj, 'headers': headers_out}
        timestamp = headers_out['x-timestamp']
        self._diskfile_mgr.pickle_async_update(objdevice, account, container,
                                               obj, data, timestamp)

    def container_update(self, op, account, container, obj, request,
                         headers_out, objdevice):
        """
        Update the container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request object driving the update
        :param headers_out: dictionary of headers to send in the container
                            request(s)
        :param objdevice: device name that the object is in
        """
        headers_in = request.headers
        conthosts = [h.strip() for h in
                     headers_in.get('X-Container-Host', '').split(',')]
        contdevices = [d.strip() for d in
                       headers_in.get('X-Container-Device', '').split(',')]
        contpartition = headers_in.get('X-Container-Partition', '')

        if len(conthosts) != len(contdevices):
            # This shouldn't happen unless there's a bug in the proxy,
            # but if there is, we want to know about it.
            self.logger.error(_('ERROR Container update failed: different  '
                                'numbers of hosts and devices in request: '
                                '"%s" vs "%s"') %
                               (headers_in.get('X-Container-Host', ''),
                                headers_in.get('X-Container-Device', '')))
            return

        if contpartition:
            updates = zip(conthosts, contdevices)
        else:
            updates = []

        headers_out['x-trans-id'] = headers_in.get('x-trans-id', '-')
        headers_out['referer'] = request.as_referer()
        for conthost, contdevice in updates:
            self.async_update(op, account, container, obj, conthost,
                              contpartition, contdevice, headers_out,
                              objdevice)

    def delete_at_update(self, op, delete_at, account, container, obj,
                         request, objdevice):
        """
        Update the expiring objects container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param delete_at: scheduled delete in UNIX seconds, int
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request driving the update
        :param objdevice: device name that the object is in
        """
        if config_true_value(
                request.headers.get('x-backend-replication', 'f')):
            return
        delete_at = normalize_delete_at_timestamp(delete_at)
        updates = [(None, None)]

        partition = None
        hosts = contdevices = [None]
        headers_in = request.headers
        headers_out = HeaderKeyDict({
            'x-timestamp': headers_in['x-timestamp'],
            'x-trans-id': headers_in.get('x-trans-id', '-'),
            'referer': request.as_referer()})
        if op != 'DELETE':
            delete_at_container = headers_in.get('X-Delete-At-Container', None)
            if not delete_at_container:
                self.logger.warning(
                    'X-Delete-At-Container header must be specified for '
                    'expiring objects background %s to work properly. Making '
                    'best guess as to the container name for now.' % op)
                # TODO(gholt): In a future release, change the above warning to
                # a raised exception and remove the guess code below.
                delete_at_container = (
                    int(delete_at) / self.expiring_objects_container_divisor *
                    self.expiring_objects_container_divisor)
            partition = headers_in.get('X-Delete-At-Partition', None)
            hosts = headers_in.get('X-Delete-At-Host', '')
            contdevices = headers_in.get('X-Delete-At-Device', '')
            updates = [upd for upd in
                       zip((h.strip() for h in hosts.split(',')),
                           (c.strip() for c in contdevices.split(',')))
                       if all(upd) and partition]
            if not updates:
                updates = [(None, None)]
            headers_out['x-size'] = '0'
            headers_out['x-content-type'] = 'text/plain'
            headers_out['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e'
        else:
            # DELETEs of old expiration data have no way of knowing what the
            # old X-Delete-At-Container was at the time of the initial setting
            # of the data, so a best guess is made here.
            # Worst case is a DELETE is issued now for something that doesn't
            # exist there and the original data is left where it is, where
            # it will be ignored when the expirer eventually tries to issue the
            # object DELETE later since the X-Delete-At value won't match up.
            delete_at_container = str(
                int(delete_at) / self.expiring_objects_container_divisor *
                self.expiring_objects_container_divisor)
        delete_at_container = normalize_delete_at_timestamp(
            delete_at_container)

        for host, contdevice in updates:
            self.async_update(
                op, self.expiring_objects_account, delete_at_container,
                '%s-%s/%s/%s' % (delete_at, account, container, obj),
                host, partition, contdevice, headers_out, objdevice)

    @public
    @timing_stats()
    def POST(self, request):
        """Handle HTTP POST requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)

        if 'x-timestamp' not in request.headers or \
                not check_float(request.headers['x-timestamp']):
            return HTTPBadRequest(body='Missing timestamp', request=request,
                                  content_type='text/plain')
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body='X-Delete-At in past', request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request)
        orig_timestamp = orig_metadata.get('X-Timestamp', '0')
        if orig_timestamp >= request.headers['x-timestamp']:
            return HTTPConflict(request=request)
        metadata = {'X-Timestamp': request.headers['x-timestamp']}
        metadata.update(val for val in request.headers.iteritems()
                        if is_user_meta('object', val[0]))
        for header_key in self.allowed_headers:
            if header_key in request.headers:
                header_caps = header_key.title()
                metadata[header_caps] = request.headers[header_key]
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update('PUT', new_delete_at, account, container,
                                      obj, request, device)
            if orig_delete_at:
                self.delete_at_update('DELETE', orig_delete_at, account,
                                      container, obj, request, device)
        disk_file.write_metadata(metadata)
        return HTTPAccepted(request=request)

    @public
    @timing_stats()
    def PUT(self, request):
        """Handle HTTP PUT requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)

        if 'x-timestamp' not in request.headers or \
                not check_float(request.headers['x-timestamp']):
            return HTTPBadRequest(body='Missing timestamp', request=request,
                                  content_type='text/plain')
        error_response = check_object_creation(request, obj)
        if error_response:
            return error_response
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body='X-Delete-At in past', request=request,
                                  content_type='text/plain')
        try:
            fsize = request.message_length()
        except ValueError as e:
            return HTTPBadRequest(body=str(e), request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_metadata = {}

        # Checks for If-None-Match
        if request.if_none_match is not None and orig_metadata:
            if '*' in request.if_none_match:
                # File exists already so return 412
                return HTTPPreconditionFailed(request=request)
            if orig_metadata.get('ETag') in request.if_none_match:
                # The current ETag matches, so return 412
                return HTTPPreconditionFailed(request=request)

        orig_timestamp = orig_metadata.get('X-Timestamp')
        if orig_timestamp and orig_timestamp >= request.headers['x-timestamp']:
            return HTTPConflict(request=request)
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        upload_expiration = time.time() + self.max_upload_time
        etag = md5()
        elapsed_time = 0
        try:
            with disk_file.create(size=fsize) as writer:
                upload_size = 0

                def timeout_reader():
                    with ChunkReadTimeout(self.client_timeout):
                        return request.environ['wsgi.input'].read(
                            self.network_chunk_size)

                try:
                    for chunk in iter(lambda: timeout_reader(), ''):
                        start_time = time.time()
                        if start_time > upload_expiration:
                            self.logger.increment('PUT.timeouts')
                            return HTTPRequestTimeout(request=request)
                        etag.update(chunk)
                        upload_size = writer.write(chunk)
                        elapsed_time += time.time() - start_time
                except ChunkReadTimeout:
                    return HTTPRequestTimeout(request=request)
                if upload_size:
                    self.logger.transfer_rate(
                        'PUT.' + device + '.timing', elapsed_time,
                        upload_size)
                if fsize is not None and fsize != upload_size:
                    return HTTPClientDisconnect(request=request)
                etag = etag.hexdigest()
                if 'etag' in request.headers and \
                        request.headers['etag'].lower() != etag:
                    return HTTPUnprocessableEntity(request=request)
                metadata = {
                    'X-Timestamp': request.headers['x-timestamp'],
                    'Content-Type': request.headers['content-type'],
                    'ETag': etag,
                    'Content-Length': str(upload_size),
                }
                metadata.update(val for val in request.headers.iteritems()
                                if is_user_meta('object', val[0]))
                for header_key in (
                        request.headers.get('X-Backend-Replication-Headers') or
                        self.allowed_headers):
                    if header_key in request.headers:
                        header_caps = header_key.title()
                        metadata[header_caps] = request.headers[header_key]
                writer.put(metadata)
        except DiskFileNoSpace:
            return HTTPInsufficientStorage(drive=device, request=request)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update(
                    'PUT', new_delete_at, account, container, obj,
                    request, device)
            if orig_delete_at:
                self.delete_at_update(
                    'DELETE', orig_delete_at, account, container, obj,
                    request, device)
        self.container_update(
            'PUT', account, container, obj, request,
            HeaderKeyDict({
                'x-size': metadata['Content-Length'],
                'x-content-type': metadata['Content-Type'],
                'x-timestamp': metadata['X-Timestamp'],
                'x-etag': metadata['ETag']}),
            device)
        return HTTPCreated(request=request, etag=etag)

    @public
    @timing_stats()
    def GET(self, request):
        """Handle HTTP GET requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)
        keep_cache = self.keep_cache_private or (
            'X-Auth-Token' not in request.headers and
            'X-Storage-Token' not in request.headers)
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            with disk_file.open():
                metadata = disk_file.get_metadata()
                obj_size = int(metadata['Content-Length'])
                file_x_ts = metadata['X-Timestamp']
                file_x_ts_flt = float(file_x_ts)
                keep_cache = (self.keep_cache_private or
                              ('X-Auth-Token' not in request.headers and
                               'X-Storage-Token' not in request.headers))
                response = Response(
                    app_iter=disk_file.reader(keep_cache=keep_cache),
                    request=request, conditional_response=True)
                response.headers['Content-Type'] = metadata.get(
                    'Content-Type', 'application/octet-stream')
                for key, value in metadata.iteritems():
                    if is_user_meta('object', key) or \
                            key.lower() in self.allowed_headers:
                        response.headers[key] = value
                response.etag = metadata['ETag']
                response.last_modified = math.ceil(file_x_ts_flt)
                response.content_length = obj_size
                try:
                    response.content_encoding = metadata[
                        'Content-Encoding']
                except KeyError:
                    pass
                response.headers['X-Timestamp'] = file_x_ts
                resp = request.get_response(response)
        except (DiskFileNotExist, DiskFileQuarantined):
            resp = HTTPNotFound(request=request, conditional_response=True)
        return resp

    @public
    @timing_stats(sample_rate=0.8)
    def HEAD(self, request):
        """Handle HTTP HEAD requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request, conditional_response=True)
        response = Response(request=request, conditional_response=True)
        response.headers['Content-Type'] = metadata.get(
            'Content-Type', 'application/octet-stream')
        for key, value in metadata.iteritems():
            if is_user_meta('object', key) or \
                    key.lower() in self.allowed_headers:
                response.headers[key] = value
        response.etag = metadata['ETag']
        ts = metadata['X-Timestamp']
        response.last_modified = math.ceil(float(ts))
        # Needed for container sync feature
        response.headers['X-Timestamp'] = ts
        response.content_length = int(metadata['Content-Length'])
        try:
            response.content_encoding = metadata['Content-Encoding']
        except KeyError:
            pass
        return response

    @public
    @timing_stats()
    def DELETE(self, request):
        """Handle HTTP DELETE requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)
        if 'x-timestamp' not in request.headers or \
                not check_float(request.headers['x-timestamp']):
            return HTTPBadRequest(body='Missing timestamp', request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileExpired as e:
            orig_timestamp = e.timestamp
            orig_metadata = e.metadata
            response_class = HTTPNotFound
        except DiskFileDeleted as e:
            orig_timestamp = e.timestamp
            orig_metadata = {}
            response_class = HTTPNotFound
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_timestamp = 0
            orig_metadata = {}
            response_class = HTTPNotFound
        else:
            orig_timestamp = orig_metadata.get('X-Timestamp', 0)
            if orig_timestamp < request.headers['x-timestamp']:
                response_class = HTTPNoContent
            else:
                response_class = HTTPConflict
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        try:
            req_if_delete_at_val = request.headers['x-if-delete-at']
            req_if_delete_at = int(req_if_delete_at_val)
        except KeyError:
            pass
        except ValueError:
            return HTTPBadRequest(
                request=request,
                body='Bad X-If-Delete-At header value')
        else:
            if orig_delete_at != req_if_delete_at:
                return HTTPPreconditionFailed(
                    request=request,
                    body='X-If-Delete-At and X-Delete-At do not match')
        if orig_delete_at:
            self.delete_at_update('DELETE', orig_delete_at, account,
                                  container, obj, request, device)
        req_timestamp = request.headers['X-Timestamp']
        if orig_timestamp < req_timestamp:
            disk_file.delete(req_timestamp)
            self.container_update(
                'DELETE', account, container, obj, request,
                HeaderKeyDict({'x-timestamp': req_timestamp}),
                device)
        return response_class(request=request)

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATE(self, request):
        """
        Handle REPLICATE requests for the Swift Object Server.  This is used
        by the object replicator to get hashes for directories.
        """
        device, partition, suffix = split_and_validate_path(
            request, 2, 3, True)
        try:
            hashes = self._diskfile_mgr.get_hashes(device, partition, suffix)
        except DiskFileDeviceUnavailable:
            resp = HTTPInsufficientStorage(drive=device, request=request)
        else:
            resp = Response(body=pickle.dumps(hashes))
        return resp

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATION(self, request):
        return Response(app_iter=ssync_receiver.Receiver(self, request)())

    def __call__(self, env, start_response):
        """WSGI Application entry point for the Swift Object Server."""
        start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get('x-trans-id', None)

        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL')
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    method = getattr(self, req.method)
                    getattr(method, 'publicly_accessible')
                    replication_method = getattr(method, 'replication', False)
                    if (self.replication_server is not None and
                            self.replication_server != replication_method):
                        raise AttributeError('Not allowed method.')
                except AttributeError:
                    res = HTTPMethodNotAllowed()
                else:
                    res = method(req)
            except DiskFileCollision:
                res = HTTPForbidden(request=req)
            except HTTPException as error_response:
                res = error_response
            except (Exception, Timeout):
                self.logger.exception(_(
                    'ERROR __call__ error with %(method)s'
                    ' %(path)s '), {'method': req.method, 'path': req.path})
                res = HTTPInternalServerError(body=traceback.format_exc())
        trans_time = time.time() - start_time
        if self.log_requests:
            log_line = get_log_line(req, res, trans_time, '')
            if req.method in ('REPLICATE', 'REPLICATION') or \
                    'X-Backend-Replication' in req.headers:
                self.logger.debug(log_line)
            else:
                self.logger.info(log_line)
        if req.method in ('PUT', 'DELETE'):
            slow = self.slow - trans_time
            if slow > 0:
                sleep(slow)
        return res(env, start_response)
Ejemplo n.º 38
0
class TestAuditor(unittest.TestCase):
    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.rcache = os.path.join(self.testdir, 'object.recon')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        self.objects = os.path.join(self.devices, 'sda', 'objects')

        os.mkdir(os.path.join(self.devices, 'sdb'))
        self.objects_2 = os.path.join(self.devices, 'sdb', 'objects')

        os.mkdir(self.objects)
        self.parts = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            os.mkdir(os.path.join(self.objects, part))

        self.conf = dict(devices=self.devices,
                         mount_check='false',
                         object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_object_audit_extra_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            timestamp = str(normalize_timestamp(time.time()))
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

            auditor_worker.object_audit(
                AuditLocation(self.disk_file._datadir, 'sda', '0'))
            self.assertEquals(auditor_worker.quarantines, pre_quarantines)

            os.write(writer._fd, 'extra_data')
            auditor_worker.object_audit(
                AuditLocation(self.disk_file._datadir, 'sda', '0'))
            self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        data = '0' * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update('1' + '0' * 1023)
        etag = etag.hexdigest()
        metadata['ETag'] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        fp = open(path, 'w')
        fp.write('0' * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')

        with mock.patch.object(DiskFileManager,
                               'get_diskfile_from_audit_location', blowup):
            self.assertRaises(NameError, auditor_worker.object_audit,
                              AuditLocation(os.path.dirname(path), 'sda', '0'))

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')

        with mock.patch('swift.obj.diskfile.DiskFile', blowup):
            auditor_worker.failsafe_object_audit(
                AuditLocation(os.path.dirname(path), 'sda', '0'))
        self.assertEquals(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        with mock.patch('swift.obj.diskfile.DiskFile', lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 1)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)

        # pick up some additional code coverage, large file
        data = '0' * 1024 * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 1)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)
        self.assertEquals(auditor_worker.stats_buckets['OVER'], 1)

        # pick up even more additional code coverage, misc paths
        auditor_worker.log_time = -1
        auditor_worker.stats_sizes = []
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 1)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)
        self.assertEquals(auditor_worker.stats_buckets['OVER'], 1)

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob')
        data = '1' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            etag = md5()
            etag.update('1' + '0' * 1023)
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices, 'sda', 'quarantined',
                                       'objects')
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.isdir(quarantine_path))
        del (kwargs['zero_byte_fps'])
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, with_ts=False):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        ts_file_path = ''
        if with_ts:
            name_hash = hash_path('a', 'c', 'o')
            dir_path = os.path.join(self.devices, 'sda',
                                    storage_directory(DATADIR, '0', name_hash))
            ts_file_path = os.path.join(dir_path, '99999.ts')
            if not os.path.exists(dir_path):
                mkdirs(dir_path)
            fp = open(ts_file_path, 'w')
            write_metadata(fp, {'X-Timestamp': '99999', 'name': '/a/c/o'})
            fp.close()

        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': 10,
            }
            writer.put(metadata)
            etag = md5()
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)
        return ts_file_path

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices, 'sda', 'quarantined',
                                       'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices, 'sda', 'quarantined',
                                       'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):
            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        was_df = auditor.diskfile.DiskFile
        try:
            auditor.diskfile.DiskFile = FakeFile
            kwargs = {'mode': 'once'}
            kwargs['zero_byte_fps'] = 50
            self.auditor.run_audit(**kwargs)
            quarantine_path = os.path.join(self.devices, 'sda', 'quarantined',
                                           'objects')
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])
        finally:
            auditor.diskfile.DiskFile = was_df

    def test_with_tombstone(self):
        ts_file_path = self.setup_bad_zero_byte(with_ts=True)
        self.assertTrue(ts_file_path.endswith('ts'))
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.exists(ts_file_path))

    def test_sleeper(self):
        auditor.SLEEP_BETWEEN_AUDITS = 0.10
        my_auditor = auditor.ObjectAuditor(self.conf)
        start = time.time()
        my_auditor._sleep()
        delta_t = time.time() - start
        self.assert_(delta_t > 0.08)
        self.assert_(delta_t < 0.12)

    def test_run_audit(self):
        class StopForever(Exception):
            pass

        class Bogus(Exception):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            check_device_dir = None
            fork_called = 0
            master = 0
            wait_called = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs
                if 'zero_byte_fps' in kwargs:
                    self.check_device_dir = kwargs.get('device_dirs')

            def mock_sleep(self):
                raise StopForever('stop')

            def mock_audit_loop_error(self,
                                      parent,
                                      zbo_fps,
                                      override_devices=None,
                                      **kwargs):
                raise Bogus('exception')

            def mock_fork(self):
                self.fork_called += 1
                if self.master:
                    return self.fork_called
                else:
                    return 0

            def mock_wait(self):
                self.wait_called += 1
                return (self.wait_called, 0)

        for i in string.ascii_letters[2:26]:
            mkdirs(os.path.join(self.devices, 'sd%s' % i))

        my_auditor = auditor.ObjectAuditor(
            dict(devices=self.devices,
                 mount_check='false',
                 zero_byte_files_per_second=89))
        mocker = ObjectAuditorMock()
        my_auditor.logger.exception = mock.MagicMock()
        real_audit_loop = my_auditor.audit_loop
        my_auditor.audit_loop = mocker.mock_audit_loop_error
        my_auditor.run_audit = mocker.mock_run
        my_auditor._sleep = mocker.mock_sleep
        was_fork = os.fork
        was_wait = os.wait
        os.fork = mocker.mock_fork
        os.wait = mocker.mock_wait
        try:
            my_auditor.run_once(zero_byte_fps=50)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: exception')
            my_auditor.logger.exception.reset_mock()
            self.assertRaises(StopForever, my_auditor.run_forever)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: exception')
            my_auditor.audit_loop = real_audit_loop

            self.assertRaises(StopForever,
                              my_auditor.run_forever,
                              zero_byte_fps=50)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 50)
            self.assertEquals(mocker.fork_called, 0)

            self.assertRaises(SystemExit, my_auditor.run_forever)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEquals(mocker.check_device_dir, None)
            self.assertEquals(mocker.check_args, ())

            device_list = ['sd%s' % i for i in string.ascii_letters[2:10]]
            device_string = ','.join(device_list)
            device_string_bogus = device_string + ',bogus'

            mocker.fork_called = 0
            self.assertRaises(SystemExit,
                              my_auditor.run_once,
                              devices=device_string_bogus)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEquals(sorted(mocker.check_device_dir), device_list)

            mocker.master = 1

            mocker.fork_called = 0
            self.assertRaises(StopForever, my_auditor.run_forever)
            # Fork is called 3 times since the zbf process is forked twice
            self.assertEquals(mocker.fork_called, 3)
            self.assertEquals(mocker.wait_called, 3)

        finally:
            os.fork = was_fork
            os.wait = was_wait
Ejemplo n.º 39
0
class ObjectController(BaseStorageServer):
    """Implements the WSGI application for the Swift Object Server."""

    server_type = 'object-server'

    def __init__(self, conf, logger=None):
        """
        Creates a new WSGI application for the Swift Object Server. An
        example configuration is given at
        <source-dir>/etc/object-server.conf-sample or
        /etc/swift/object-server.conf-sample.
        """
        super(ObjectController, self).__init__(conf)
        self.logger = logger or get_logger(conf, log_route='object-server')
        self.node_timeout = int(conf.get('node_timeout', 3))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.client_timeout = int(conf.get('client_timeout', 60))
        self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536))
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.log_requests = config_true_value(conf.get('log_requests', 'true'))
        self.max_upload_time = int(conf.get('max_upload_time', 86400))
        self.slow = int(conf.get('slow', 0))
        self.keep_cache_private = \
            config_true_value(conf.get('keep_cache_private', 'false'))

        default_allowed_headers = '''
            content-disposition,
            content-encoding,
            x-delete-at,
            x-object-manifest,
            x-static-large-object,
        '''
        extra_allowed_headers = [
            header.strip().lower() for header in conf.get(
                'allowed_headers', default_allowed_headers).split(',')
            if header.strip()
        ]
        self.allowed_headers = set()
        for header in extra_allowed_headers:
            if header not in DATAFILE_SYSTEM_META:
                self.allowed_headers.add(header)
        self.auto_create_account_prefix = \
            conf.get('auto_create_account_prefix') or '.'
        self.expiring_objects_account = self.auto_create_account_prefix + \
            (conf.get('expiring_objects_account_name') or 'expiring_objects')
        self.expiring_objects_container_divisor = \
            int(conf.get('expiring_objects_container_divisor') or 86400)
        # Initialization was successful, so now apply the network chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because the primary motivation for this is to optimize how data
        # is written back to the proxy server, we could use the value from the
        # disk_chunk_size parameter. However, it affects all created sockets
        # using this class so we have chosen to tie it to the
        # network_chunk_size parameter value instead.
        socket._fileobject.default_bufsize = self.network_chunk_size

        # Provide further setup specific to an object server implementation.
        self.setup(conf)

    def setup(self, conf):
        """
        Implementation specific setup. This method is called at the very end
        by the constructor to allow a specific implementation to modify
        existing attributes or add its own attributes.

        :param conf: WSGI configuration parameter
        """

        # Common on-disk hierarchy shared across account, container and object
        # servers.
        self._diskfile_mgr = DiskFileManager(conf, self.logger)
        # This is populated by global_conf_callback way below as the semaphore
        # is shared by all workers.
        if 'replication_semaphore' in conf:
            # The value was put in a list so it could get past paste
            self.replication_semaphore = conf['replication_semaphore'][0]
        else:
            self.replication_semaphore = None
        self.replication_failure_threshold = int(
            conf.get('replication_failure_threshold') or 100)
        self.replication_failure_ratio = float(
            conf.get('replication_failure_ratio') or 1.0)

    def get_diskfile(self, device, partition, account, container, obj,
                     policy_idx, **kwargs):
        """
        Utility method for instantiating a DiskFile object supporting a given
        REST API.

        An implementation of the object server that wants to use a different
        DiskFile class would simply over-ride this method to provide that
        behavior.
        """
        return self._diskfile_mgr.get_diskfile(
            device, partition, account, container, obj, policy_idx, **kwargs)

    def async_update(self, op, account, container, obj, host, partition,
                     contdevice, headers_out, objdevice, policy_index):
        """
        Sends or saves an async update.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param host: host that the container is on
        :param partition: partition that the container is on
        :param contdevice: device name that the container is on
        :param headers_out: dictionary of headers to send in the container
                            request
        :param objdevice: device name that the object is in
        :param policy_index: the associated storage policy index
        """
        headers_out['user-agent'] = 'object-server %s' % os.getpid()
        full_path = '/%s/%s/%s' % (account, container, obj)
        if all([host, partition, contdevice]):
            try:
                with ConnectionTimeout(self.conn_timeout):
                    ip, port = host.rsplit(':', 1)
                    conn = http_connect(ip, port, contdevice, partition, op,
                                        full_path, headers_out)
                with Timeout(self.node_timeout):
                    response = conn.getresponse()
                    response.read()
                    if is_success(response.status):
                        return
                    else:
                        self.logger.error(_(
                            'ERROR Container update failed '
                            '(saving for async update later): %(status)d '
                            'response from %(ip)s:%(port)s/%(dev)s'),
                            {'status': response.status, 'ip': ip, 'port': port,
                             'dev': contdevice})
            except (Exception, Timeout):
                self.logger.exception(_(
                    'ERROR container update failed with '
                    '%(ip)s:%(port)s/%(dev)s (saving for async update later)'),
                    {'ip': ip, 'port': port, 'dev': contdevice})
        data = {'op': op, 'account': account, 'container': container,
                'obj': obj, 'headers': headers_out}
        timestamp = headers_out['x-timestamp']
        self._diskfile_mgr.pickle_async_update(objdevice, account, container,
                                               obj, data, timestamp,
                                               policy_index)

    def container_update(self, op, account, container, obj, request,
                         headers_out, objdevice, policy_idx):
        """
        Update the container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request object driving the update
        :param headers_out: dictionary of headers to send in the container
                            request(s)
        :param objdevice: device name that the object is in
        """
        headers_in = request.headers
        conthosts = [h.strip() for h in
                     headers_in.get('X-Container-Host', '').split(',')]
        contdevices = [d.strip() for d in
                       headers_in.get('X-Container-Device', '').split(',')]
        contpartition = headers_in.get('X-Container-Partition', '')

        if len(conthosts) != len(contdevices):
            # This shouldn't happen unless there's a bug in the proxy,
            # but if there is, we want to know about it.
            self.logger.error(_('ERROR Container update failed: different '
                                'numbers of hosts and devices in request: '
                                '"%s" vs "%s"') %
                               (headers_in.get('X-Container-Host', ''),
                                headers_in.get('X-Container-Device', '')))
            return

        if contpartition:
            updates = zip(conthosts, contdevices)
        else:
            updates = []

        headers_out['x-trans-id'] = headers_in.get('x-trans-id', '-')
        headers_out['referer'] = request.as_referer()
        headers_out['X-Backend-Storage-Policy-Index'] = policy_idx
        for conthost, contdevice in updates:
            self.async_update(op, account, container, obj, conthost,
                              contpartition, contdevice, headers_out,
                              objdevice, policy_idx)

    def delete_at_update(self, op, delete_at, account, container, obj,
                         request, objdevice, policy_index):
        """
        Update the expiring objects container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param delete_at: scheduled delete in UNIX seconds, int
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request driving the update
        :param objdevice: device name that the object is in
        :param policy_index: the policy index to be used for tmp dir
        """
        if config_true_value(
                request.headers.get('x-backend-replication', 'f')):
            return
        delete_at = normalize_delete_at_timestamp(delete_at)
        updates = [(None, None)]

        partition = None
        hosts = contdevices = [None]
        headers_in = request.headers
        headers_out = HeaderKeyDict({
            # system accounts are always Policy-0
            'X-Backend-Storage-Policy-Index': 0,
            'x-timestamp': request.timestamp.internal,
            'x-trans-id': headers_in.get('x-trans-id', '-'),
            'referer': request.as_referer()})
        if op != 'DELETE':
            delete_at_container = headers_in.get('X-Delete-At-Container', None)
            if not delete_at_container:
                self.logger.warning(
                    'X-Delete-At-Container header must be specified for '
                    'expiring objects background %s to work properly. Making '
                    'best guess as to the container name for now.' % op)
                # TODO(gholt): In a future release, change the above warning to
                # a raised exception and remove the guess code below.
                delete_at_container = get_expirer_container(
                    delete_at, self.expiring_objects_container_divisor,
                    account, container, obj)
            partition = headers_in.get('X-Delete-At-Partition', None)
            hosts = headers_in.get('X-Delete-At-Host', '')
            contdevices = headers_in.get('X-Delete-At-Device', '')
            updates = [upd for upd in
                       zip((h.strip() for h in hosts.split(',')),
                           (c.strip() for c in contdevices.split(',')))
                       if all(upd) and partition]
            if not updates:
                updates = [(None, None)]
            headers_out['x-size'] = '0'
            headers_out['x-content-type'] = 'text/plain'
            headers_out['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e'
        else:
            # DELETEs of old expiration data have no way of knowing what the
            # old X-Delete-At-Container was at the time of the initial setting
            # of the data, so a best guess is made here.
            # Worst case is a DELETE is issued now for something that doesn't
            # exist there and the original data is left where it is, where
            # it will be ignored when the expirer eventually tries to issue the
            # object DELETE later since the X-Delete-At value won't match up.
            delete_at_container = get_expirer_container(
                delete_at, self.expiring_objects_container_divisor,
                account, container, obj)
        delete_at_container = normalize_delete_at_timestamp(
            delete_at_container)

        for host, contdevice in updates:
            self.async_update(
                op, self.expiring_objects_account, delete_at_container,
                '%s-%s/%s/%s' % (delete_at, account, container, obj),
                host, partition, contdevice, headers_out, objdevice,
                policy_index)

    @public
    @timing_stats()
    def POST(self, request):
        """Handle HTTP POST requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body='X-Delete-At in past', request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
                policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request)
        orig_timestamp = Timestamp(orig_metadata.get('X-Timestamp', 0))
        if orig_timestamp >= req_timestamp:
            return HTTPConflict(
                request=request,
                headers={'X-Backend-Timestamp': orig_timestamp.internal})
        metadata = {'X-Timestamp': req_timestamp.internal}
        metadata.update(val for val in request.headers.iteritems()
                        if is_user_meta('object', val[0]))
        for header_key in self.allowed_headers:
            if header_key in request.headers:
                header_caps = header_key.title()
                metadata[header_caps] = request.headers[header_key]
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update('PUT', new_delete_at, account, container,
                                      obj, request, device, policy_idx)
            if orig_delete_at:
                self.delete_at_update('DELETE', orig_delete_at, account,
                                      container, obj, request, device,
                                      policy_idx)
        try:
            disk_file.write_metadata(metadata)
        except (DiskFileXattrNotSupported, DiskFileNoSpace):
            return HTTPInsufficientStorage(drive=device, request=request)
        return HTTPAccepted(request=request)

    @public
    @timing_stats()
    def PUT(self, request):
        """Handle HTTP PUT requests for the Swift Object Server."""
        a=time.time()
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("A = "+str(request.path)+"="+str(datetime.now())+"***")
        #with open("/home/ubuntu/spawn.txt", "a") as tran_file:
        #    tran_file.write("At PUT  Datetime ="+str(datetime.now())+" \n")
	device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        error_response = check_object_creation(request, obj)
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("B = "+str(request.path)+"="+str(datetime.now())+"***")
        if error_response:
            return error_response
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body='X-Delete-At in past', request=request,
                                  content_type='text/plain')
        try:
            fsize = request.message_length()
        except ValueError as e:
            return HTTPBadRequest(body=str(e), request=request,
                                  content_type='text/plain')
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("C = "+str(request.path)+"="+str(datetime.now())+"***")
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
                policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("D = "+str(request.path)+"="+str(datetime.now())+"***")
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_metadata = {}
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("E = "+str(request.path)+"="+str(datetime.now())+"***")

        # Checks for If-None-Match
        if request.if_none_match is not None and orig_metadata:
            if '*' in request.if_none_match:
                # File exists already so return 412
                return HTTPPreconditionFailed(request=request)
            if orig_metadata.get('ETag') in request.if_none_match:
                # The current ETag matches, so return 412
                return HTTPPreconditionFailed(request=request)

        orig_timestamp = Timestamp(orig_metadata.get('X-Timestamp', 0))
        if orig_timestamp >= req_timestamp:
            return HTTPConflict(
                request=request,
                headers={'X-Backend-Timestamp': orig_timestamp.internal})
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        upload_expiration = time.time() + self.max_upload_time
        etag = md5()
        elapsed_time = 0
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("F = "+str(request.path)+"="+str(datetime.now())+"***")
        try:
            with disk_file.create(size=fsize) as writer:
                upload_size = 0

                def timeout_reader():
                    with ChunkReadTimeout(self.client_timeout):
                        return request.environ['wsgi.input'].read(
                            self.network_chunk_size)

                try:
                    for chunk in iter(lambda: timeout_reader(), ''):
                        start_time = time.time()
                        if start_time > upload_expiration:
                            self.logger.increment('PUT.timeouts')
                            return HTTPRequestTimeout(request=request)
                        etag.update(chunk)
                        upload_size = writer.write(chunk)
                        elapsed_time += time.time() - start_time
                except ChunkReadTimeout:
                    return HTTPRequestTimeout(request=request)
                #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
                #   tran_file.write("G = "+str(request.path)+"="+str(datetime.now())+"***")
                if upload_size:
                    self.logger.transfer_rate(
                        'PUT.' + device + '.timing', elapsed_time,
                        upload_size)
                if fsize is not None and fsize != upload_size:
                    return HTTPClientDisconnect(request=request)
                etag = etag.hexdigest()
                if 'etag' in request.headers and \
                        request.headers['etag'].lower() != etag:
                    return HTTPUnprocessableEntity(request=request)
                metadata = {
                    'X-Timestamp': request.timestamp.internal,
                    'Content-Type': request.headers['content-type'],
                    'ETag': etag,
                    'Content-Length': str(upload_size),
                }
                metadata.update(val for val in request.headers.iteritems()
                                if is_sys_or_user_meta('object', val[0]))
                headers_to_copy = (
                    request.headers.get(
                        'X-Backend-Replication-Headers', '').split() +
                    list(self.allowed_headers))
                for header_key in headers_to_copy:
                    if header_key in request.headers:
                        header_caps = header_key.title()
                        metadata[header_caps] = request.headers[header_key]
                #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
                #    tran_file.write("H = "+str(request.path)+"="+str(datetime.now())+"***")
                writer.put(metadata)
                #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
                #    tran_file.write("I = "+str(request.path)+"="+str(datetime.now())+"***")
        except (DiskFileXattrNotSupported, DiskFileNoSpace):
            return HTTPInsufficientStorage(drive=device, request=request)
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("J = "+str(request.path)+"="+str(datetime.now())+"***")
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update(
                    'PUT', new_delete_at, account, container, obj, request,
                    device, policy_idx)
            if orig_delete_at:
                self.delete_at_update(
                    'DELETE', orig_delete_at, account, container, obj,
                    request, device, policy_idx)
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("K = "+str(request.path)+"="+str(datetime.now())+"***")
        self.container_update(
            'PUT', account, container, obj, request,
            HeaderKeyDict({
                'x-size': metadata['Content-Length'],
                'x-content-type': metadata['Content-Type'],
                'x-timestamp': metadata['X-Timestamp'],
                'x-etag': metadata['ETag']}),
            device, policy_idx)
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("L = "+str(request.path)+"="+str(datetime.now())+"***")
	#with open("/home/ubuntu/obj_create.txt", "a") as tran_file:
        #    tran_file.write("Started = "+str(datetime.now())+"\n")
	#a=2
	#num=22227727
	#for a in range(a, num):
    	#    if a % num == 0:
        #	print('not prime')
        #	break
	#else: # loop not exited via break
   	#    print('prime')
	#time.sleep(0.1)
        #with open("/home/ubuntu/obj_create.txt", "a") as tran_file:
        #    tran_file.write("Stopped = "+str(datetime.now())+"\n")
        b=time.time()-a
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("Total PUT duration of obj = "+str(request.path)+"="+str(b)+"="+str(datetime.now())+"\n")
        return HTTPCreated(request=request, etag=etag)

    @public
    @timing_stats()
    def GET(self, request):
        """Handle HTTP GET requests for the Swift Object Server."""
	#with open("/home/ubuntu/spawn-read.txt", "a") as tran_file:
    	#    tran_file.write("At Read Start Datetime = "+str(datetime.now())+"\n")
        a=time.time()
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        keep_cache = self.keep_cache_private or (
            'X-Auth-Token' not in request.headers and
            'X-Storage-Token' not in request.headers)
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
                policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            with disk_file.open():
                metadata = disk_file.get_metadata()
                obj_size = int(metadata['Content-Length'])
                file_x_ts = Timestamp(metadata['X-Timestamp'])
                keep_cache = (self.keep_cache_private or
                              ('X-Auth-Token' not in request.headers and
                               'X-Storage-Token' not in request.headers))
                response = Response(
                    app_iter=disk_file.reader(keep_cache=keep_cache),
                    request=request, conditional_response=True)
                response.headers['Content-Type'] = metadata.get(
                    'Content-Type', 'application/octet-stream')
                for key, value in metadata.iteritems():
                    if is_sys_or_user_meta('object', key) or \
                            key.lower() in self.allowed_headers:
                        response.headers[key] = value
                response.etag = metadata['ETag']
                response.last_modified = math.ceil(float(file_x_ts))
                response.content_length = obj_size
                try:
                    response.content_encoding = metadata[
                        'Content-Encoding']
                except KeyError:
                    pass
                response.headers['X-Timestamp'] = file_x_ts.normal
                response.headers['X-Backend-Timestamp'] = file_x_ts.internal
                resp = request.get_response(response)
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined) as e:
            headers = {}
            if hasattr(e, 'timestamp'):
                headers['X-Backend-Timestamp'] = e.timestamp.internal
            resp = HTTPNotFound(request=request, headers=headers,
                                conditional_response=True)
        #with open("/home/ubuntu/spawn-read.txt", "a") as tran_file:
    	#    tran_file.write("At Read Delay start  Datetime = "+str(datetime.now())+"\n")
	#a=2
	#num=22227727
	#for a in range(a, num):
    	#    if a % num == 0:
        #	print('not prime')
        #	break
	#    else: # loop not exited via break
    	#	print('prime')
	#time.sleep(2)
	#with open("/home/ubuntu/spawn-read.txt", "a") as tran_file:
        #    tran_file.write("At Read End  Datetime = "+str(datetime.now())+"\n")
        b=time.time()-a
        #with open("/home/ubuntu/RSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("Total GET duration of obj = "+str(request.path)+"="+str(b)+"="+str(datetime.now())+"\n")
	return resp

    @public
    @timing_stats(sample_rate=0.8)
    def HEAD(self, request):
        """Handle HTTP HEAD requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
                policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined) as e:
            headers = {}
            if hasattr(e, 'timestamp'):
                headers['X-Backend-Timestamp'] = e.timestamp.internal
            return HTTPNotFound(request=request, headers=headers,
                                conditional_response=True)
        response = Response(request=request, conditional_response=True)
        response.headers['Content-Type'] = metadata.get(
            'Content-Type', 'application/octet-stream')
        for key, value in metadata.iteritems():
            if is_sys_or_user_meta('object', key) or \
                    key.lower() in self.allowed_headers:
                response.headers[key] = value
        response.etag = metadata['ETag']
        ts = Timestamp(metadata['X-Timestamp'])
        response.last_modified = math.ceil(float(ts))
        # Needed for container sync feature
        response.headers['X-Timestamp'] = ts.normal
        response.headers['X-Backend-Timestamp'] = ts.internal
        response.content_length = int(metadata['Content-Length'])
        try:
            response.content_encoding = metadata['Content-Encoding']
        except KeyError:
            pass
        return response

    @public
    @timing_stats()
    def DELETE(self, request):
        """Handle HTTP DELETE requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
                policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except DiskFileExpired as e:
            orig_timestamp = e.timestamp
            orig_metadata = e.metadata
            response_class = HTTPNotFound
        except DiskFileDeleted as e:
            orig_timestamp = e.timestamp
            orig_metadata = {}
            response_class = HTTPNotFound
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_timestamp = 0
            orig_metadata = {}
            response_class = HTTPNotFound
        else:
            orig_timestamp = Timestamp(orig_metadata.get('X-Timestamp', 0))
            if orig_timestamp < req_timestamp:
                response_class = HTTPNoContent
            else:
                response_class = HTTPConflict
        response_timestamp = max(orig_timestamp, req_timestamp)
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        try:
            req_if_delete_at_val = request.headers['x-if-delete-at']
            req_if_delete_at = int(req_if_delete_at_val)
        except KeyError:
            pass
        except ValueError:
            return HTTPBadRequest(
                request=request,
                body='Bad X-If-Delete-At header value')
        else:
            # request includes x-if-delete-at; we must not place a tombstone
            # if we can not verify the x-if-delete-at time
            if not orig_timestamp:
                # no object found at all
                return HTTPNotFound()
            if orig_delete_at != req_if_delete_at:
                return HTTPPreconditionFailed(
                    request=request,
                    body='X-If-Delete-At and X-Delete-At do not match')
            else:
                # differentiate success from no object at all
                response_class = HTTPNoContent
        if orig_delete_at:
            self.delete_at_update('DELETE', orig_delete_at, account,
                                  container, obj, request, device,
                                  policy_idx)
        if orig_timestamp < req_timestamp:
            disk_file.delete(req_timestamp)
            self.container_update(
                'DELETE', account, container, obj, request,
                HeaderKeyDict({'x-timestamp': req_timestamp.internal}),
                device, policy_idx)
        return response_class(
            request=request,
            headers={'X-Backend-Timestamp': response_timestamp.internal})

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATE(self, request):
        """
        Handle REPLICATE requests for the Swift Object Server.  This is used
        by the object replicator to get hashes for directories.
        """
        device, partition, suffix, policy_idx = \
            get_name_and_placement(request, 2, 3, True)
        try:
            hashes = self._diskfile_mgr.get_hashes(device, partition, suffix,
                                                   policy_idx)
        except DiskFileDeviceUnavailable:
            resp = HTTPInsufficientStorage(drive=device, request=request)
        else:
            resp = Response(body=pickle.dumps(hashes))
        return resp

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATION(self, request):
        return Response(app_iter=ssync_receiver.Receiver(self, request)())

    def __call__(self, env, start_response):
        """WSGI Application entry point for the Swift Object Server."""
        with open("/home/ubuntu/spawn.txt", "a") as tran_file:
    	    tran_file.write("At Start Datetime = "+str(datetime.now())+"\n")
	start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get('x-trans-id', None)

        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL')
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    if req.method not in self.allowed_methods:
                        raise AttributeError('Not allowed method.')
                except AttributeError:
                    res = HTTPMethodNotAllowed()
                else:
                    method = getattr(self, req.method)
                    res = method(req)
            except DiskFileCollision:
                res = HTTPForbidden(request=req)
            except HTTPException as error_response:
                res = error_response
            except (Exception, Timeout):
                self.logger.exception(_(
                    'ERROR __call__ error with %(method)s'
                    ' %(path)s '), {'method': req.method, 'path': req.path})
                res = HTTPInternalServerError(body=traceback.format_exc())
        trans_time = time.time() - start_time
        if self.log_requests:
            log_line = get_log_line(req, res, trans_time, '')
            if req.method in ('REPLICATE', 'REPLICATION') or \
                    'X-Backend-Replication' in req.headers:
                self.logger.debug(log_line)
            else:
                self.logger.info(log_line)
        if req.method in ('PUT', 'DELETE'):
            slow = self.slow - trans_time
	    with open("/home/ubuntu/spawn.txt", "a") as tran_file:
    		tran_file.write("Before slow check  Datetime = "+str(datetime.now())+" Slow="+str(slow)+"\n")
            if slow > 0:
                sleep(slow)

        # To be able to zero-copy send the object, we need a few things.
        # First, we have to be responding successfully to a GET, or else we're
        # not sending the object. Second, we have to be able to extract the
        # socket file descriptor from the WSGI input object. Third, the
        # diskfile has to support zero-copy send.
        #
        # There's a good chance that this could work for 206 responses too,
        # but the common case is sending the whole object, so we'll start
        # there.
        if req.method == 'GET' and res.status_int == 200 and \
           isinstance(env['wsgi.input'], wsgi.Input):
            app_iter = getattr(res, 'app_iter', None)
            checker = getattr(app_iter, 'can_zero_copy_send', None)
            if checker and checker():
                # For any kind of zero-copy thing like sendfile or splice, we
                # need the file descriptor. Eventlet doesn't provide a clean
                # way of getting that, so we resort to this.
                wsock = env['wsgi.input'].get_socket()
                wsockfd = wsock.fileno()

                # Don't call zero_copy_send() until after we force the HTTP
                # headers out of Eventlet and into the socket.
                def zero_copy_iter():
                    # If possible, set TCP_CORK so that headers don't
                    # immediately go on the wire, but instead, wait for some
                    # response body to make the TCP frames as large as
                    # possible (and hence as few packets as possible).
                    #
                    # On non-Linux systems, we might consider TCP_NODELAY, but
                    # since the only known zero-copy-capable diskfile uses
                    # Linux-specific syscalls, we'll defer that work until
                    # someone needs it.
                    if hasattr(socket, 'TCP_CORK'):
                        wsock.setsockopt(socket.IPPROTO_TCP,
                                         socket.TCP_CORK, 1)
                    yield EventletPlungerString()
                    try:
                        app_iter.zero_copy_send(wsockfd)
                    except Exception:
                        self.logger.exception("zero_copy_send() blew up")
                        raise
                    yield ''

                # Get headers ready to go out
                res(env, start_response)
                return zero_copy_iter()
            else:
                return res(env, start_response)
        else:
            return res(env, start_response)
Ejemplo n.º 40
0
    def test_obj_put_async_update_redirection_loop(self):
        policies = list(POLICIES)
        random.shuffle(policies)
        # setup updater
        conf = {
            'devices': self.devices_dir,
            'mount_check': 'false',
            'swift_dir': self.testdir,
        }
        daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
        async_dir = os.path.join(self.sda1, get_async_dir(policies[0]))
        os.mkdir(async_dir)
        dfmanager = DiskFileManager(conf, daemon.logger)

        ts_obj = next(self.ts_iter)
        self._write_async_update(dfmanager, ts_obj, policies[0])
        orig_async_path, orig_async_data = self._check_async_file(async_dir)

        # run once
        ts_redirect = next(self.ts_iter)

        resp_headers_1 = {
            'Location': '/.shards_a/c_shard_1/o',
            'X-Backend-Redirect-Timestamp': ts_redirect.internal
        }
        resp_headers_2 = {
            'Location': '/.shards_a/c_shard_2/o',
            'X-Backend-Redirect-Timestamp': ts_redirect.internal
        }
        fake_responses = (
            # 1st round of redirects, 2nd round of redirects
            [(301, resp_headers_1)] * 3 + [(301, resp_headers_2)] * 3)
        fake_status_codes, fake_headers = zip(*fake_responses)
        with mocked_http_conn(*fake_status_codes,
                              headers=fake_headers) as conn:
            with mock.patch('swift.obj.updater.dump_recon_cache'):
                daemon.run_once()
        self._check_update_requests(conn.requests[:3], ts_obj, policies[0])
        self._check_update_requests(conn.requests[3:], ts_obj, policies[0])
        # only *one* set of redirected requests is attempted per cycle
        root_part = daemon.container_ring.get_part('a/c')
        shard_1_part = daemon.container_ring.get_part('.shards_a/c_shard_1')
        shard_2_part = daemon.container_ring.get_part('.shards_a/c_shard_2')
        shard_3_part = daemon.container_ring.get_part('.shards_a/c_shard_3')
        self.assertEqual(['/sda1/%s/a/c/o' % root_part] * 3 +
                         ['/sda1/%s/.shards_a/c_shard_1/o' % shard_1_part] * 3,
                         [req['path'] for req in conn.requests])
        self.assertEqual({
            'redirects': 2,
            'async_pendings': 1
        }, daemon.logger.get_increment_counts())
        # update failed, we still have pending file with most recent redirect
        # response Location header value added to data
        async_path, async_data = self._check_async_file(async_dir)
        self.assertEqual(orig_async_path, async_path)
        self.assertEqual(
            dict(orig_async_data,
                 container_path='.shards_a/c_shard_2',
                 redirect_history=[
                     '.shards_a/c_shard_1', '.shards_a/c_shard_2'
                 ]), async_data)

        # next cycle, more redirects! first is to previously visited location
        resp_headers_3 = {
            'Location': '/.shards_a/c_shard_3/o',
            'X-Backend-Redirect-Timestamp': ts_redirect.internal
        }
        fake_responses = (
            # 1st round of redirects, 2nd round of redirects
            [(301, resp_headers_1)] * 3 + [(301, resp_headers_3)] * 3)
        fake_status_codes, fake_headers = zip(*fake_responses)
        with mocked_http_conn(*fake_status_codes,
                              headers=fake_headers) as conn:
            with mock.patch('swift.obj.updater.dump_recon_cache'):
                daemon.run_once()
        self._check_update_requests(conn.requests[:3], ts_obj, policies[0])
        self._check_update_requests(conn.requests[3:], ts_obj, policies[0])
        # first try the previously persisted container path, response to that
        # creates a loop so ignore and send to root
        self.assertEqual(
            ['/sda1/%s/.shards_a/c_shard_2/o' % shard_2_part] * 3 +
            ['/sda1/%s/a/c/o' % root_part] * 3,
            [req['path'] for req in conn.requests])
        self.assertEqual({
            'redirects': 4,
            'async_pendings': 1
        }, daemon.logger.get_increment_counts())
        # update failed, we still have pending file with most recent redirect
        # response Location header value from root added to persisted data
        async_path, async_data = self._check_async_file(async_dir)
        self.assertEqual(orig_async_path, async_path)
        # note: redirect_history was reset when falling back to root
        self.assertEqual(
            dict(orig_async_data,
                 container_path='.shards_a/c_shard_3',
                 redirect_history=['.shards_a/c_shard_3']), async_data)

        # next cycle, more redirects! first is to a location visited previously
        # but not since last fall back to root, so that location IS tried;
        # second is to a location visited since last fall back to root so that
        # location is NOT tried
        fake_responses = (
            # 1st round of redirects, 2nd round of redirects
            [(301, resp_headers_1)] * 3 + [(301, resp_headers_3)] * 3)
        fake_status_codes, fake_headers = zip(*fake_responses)
        with mocked_http_conn(*fake_status_codes,
                              headers=fake_headers) as conn:
            with mock.patch('swift.obj.updater.dump_recon_cache'):
                daemon.run_once()
        self._check_update_requests(conn.requests, ts_obj, policies[0])
        self.assertEqual(
            ['/sda1/%s/.shards_a/c_shard_3/o' % shard_3_part] * 3 +
            ['/sda1/%s/.shards_a/c_shard_1/o' % shard_1_part] * 3,
            [req['path'] for req in conn.requests])
        self.assertEqual({
            'redirects': 6,
            'async_pendings': 1
        }, daemon.logger.get_increment_counts())
        # update failed, we still have pending file, but container_path is None
        # because most recent redirect location was a repeat
        async_path, async_data = self._check_async_file(async_dir)
        self.assertEqual(orig_async_path, async_path)
        self.assertEqual(
            dict(orig_async_data, container_path=None, redirect_history=[]),
            async_data)

        # next cycle, persisted container path is None so update should go to
        # root, this time it succeeds
        fake_responses = [(200, {})] * 3
        fake_status_codes, fake_headers = zip(*fake_responses)
        with mocked_http_conn(*fake_status_codes,
                              headers=fake_headers) as conn:
            with mock.patch('swift.obj.updater.dump_recon_cache'):
                daemon.run_once()
        self._check_update_requests(conn.requests, ts_obj, policies[0])
        self.assertEqual(['/sda1/%s/a/c/o' % root_part] * 3,
                         [req['path'] for req in conn.requests])
        self.assertEqual(
            {
                'redirects': 6,
                'successes': 1,
                'unlinks': 1,
                'async_pendings': 1
            }, daemon.logger.get_increment_counts())
        self.assertFalse(os.listdir(async_dir))  # no async file
Ejemplo n.º 41
0
class TestAuditor(unittest.TestCase):
    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), "tmp_test_object_auditor")
        self.devices = os.path.join(self.testdir, "node")
        self.rcache = os.path.join(self.testdir, "object.recon")
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, "sda"))
        self.objects = os.path.join(self.devices, "sda", "objects")

        os.mkdir(os.path.join(self.devices, "sdb"))
        self.objects_2 = os.path.join(self.devices, "sdb", "objects")

        os.mkdir(self.objects)
        self.parts = {}
        for part in ["0", "1", "2", "3"]:
            self.parts[part] = os.path.join(self.objects, part)
            os.mkdir(os.path.join(self.objects, part))

        self.conf = dict(devices=self.devices, mount_check="false", object_size_stats="10,100,1024,10240")
        self.df_mgr = DiskFileManager(self.conf, self.logger)
        self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o")

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_object_audit_extra_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        data = "0" * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            timestamp = str(normalize_timestamp(time.time()))
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

            auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0"))
            self.assertEquals(auditor_worker.quarantines, pre_quarantines)

            os.write(writer._fd, "extra_data")
            auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0"))
            self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        data = "0" * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o")

        auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0"))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update("1" + "0" * 1023)
        etag = etag.hexdigest()
        metadata["ETag"] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)

        auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0"))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + ".data")
        mkdirs(self.disk_file._datadir)
        fp = open(path, "w")
        fp.write("0" * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0"))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + ".data")
        mkdirs(self.disk_file._datadir)
        with open(path, "w") as f:
            write_metadata(f, {"name": "/a/c/o"})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)

        def blowup(*args):
            raise NameError("tpyo")

        with mock.patch.object(DiskFileManager, "get_diskfile_from_audit_location", blowup):
            self.assertRaises(NameError, auditor_worker.object_audit, AuditLocation(os.path.dirname(path), "sda", "0"))

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + ".data")
        mkdirs(self.disk_file._datadir)
        with open(path, "w") as f:
            write_metadata(f, {"name": "/a/c/o"})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)

        def blowup(*args):
            raise NameError("tpyo")

        with mock.patch("swift.obj.diskfile.DiskFile", blowup):
            auditor_worker.failsafe_object_audit(AuditLocation(os.path.dirname(path), "sda", "0"))
        self.assertEquals(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = "0" * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
        with mock.patch("swift.obj.diskfile.DiskFile", lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = "0" * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 1)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = "0" * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
            os.write(writer._fd, "extra_data")
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = "0" * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "ob")
        data = "1" * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
            os.write(writer._fd, "extra_data")
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = "0" * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                "ETag": etag,
                "X-Timestamp": str(normalize_timestamp(time.time())),
                "Content-Length": str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            etag = md5()
            etag.update("1" + "0" * 1023)
            etag = etag.hexdigest()
            metadata["ETag"] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects")
        kwargs = {"mode": "once"}
        kwargs["zero_byte_fps"] = 50
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.isdir(quarantine_path))
        del (kwargs["zero_byte_fps"])
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, with_ts=False):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        ts_file_path = ""
        if with_ts:
            name_hash = hash_path("a", "c", "o")
            dir_path = os.path.join(self.devices, "sda", storage_directory(DATADIR, "0", name_hash))
            ts_file_path = os.path.join(dir_path, "99999.ts")
            if not os.path.exists(dir_path):
                mkdirs(dir_path)
            fp = open(ts_file_path, "w")
            write_metadata(fp, {"X-Timestamp": "99999", "name": "/a/c/o"})
            fp.close()

        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": str(normalize_timestamp(time.time())), "Content-Length": 10}
            writer.put(metadata)
            etag = md5()
            etag = etag.hexdigest()
            metadata["ETag"] = etag
            write_metadata(writer._fd, metadata)
        return ts_file_path

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        kwargs = {"mode": "once"}
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects")
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        kwargs = {"mode": "once"}
        kwargs["zero_byte_fps"] = 50
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects")
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):
            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        was_df = auditor.diskfile.DiskFile
        try:
            auditor.diskfile.DiskFile = FakeFile
            kwargs = {"mode": "once"}
            kwargs["zero_byte_fps"] = 50
            self.auditor.run_audit(**kwargs)
            quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects")
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])
        finally:
            auditor.diskfile.DiskFile = was_df

    def test_with_tombstone(self):
        ts_file_path = self.setup_bad_zero_byte(with_ts=True)
        self.assertTrue(ts_file_path.endswith("ts"))
        kwargs = {"mode": "once"}
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.exists(ts_file_path))

    def test_sleeper(self):
        auditor.SLEEP_BETWEEN_AUDITS = 0.10
        my_auditor = auditor.ObjectAuditor(self.conf)
        start = time.time()
        my_auditor._sleep()
        delta_t = time.time() - start
        self.assert_(delta_t > 0.08)
        self.assert_(delta_t < 0.12)

    def test_run_audit(self):
        class StopForever(Exception):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            check_device_dir = None
            fork_called = 0
            master = 0
            wait_called = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs
                if "zero_byte_fps" in kwargs:
                    self.check_device_dir = kwargs.get("device_dirs")

            def mock_sleep(self):
                raise StopForever("stop")

            def mock_fork(self):
                self.fork_called += 1
                if self.master:
                    return self.fork_called
                else:
                    return 0

            def mock_wait(self):
                self.wait_called += 1
                return (self.wait_called, 0)

        for i in string.ascii_letters[2:26]:
            mkdirs(os.path.join(self.devices, "sd%s" % i))

        my_auditor = auditor.ObjectAuditor(
            dict(devices=self.devices, mount_check="false", zero_byte_files_per_second=89)
        )
        mocker = ObjectAuditorMock()
        my_auditor.run_audit = mocker.mock_run
        my_auditor._sleep = mocker.mock_sleep
        was_fork = os.fork
        was_wait = os.wait
        try:
            os.fork = mocker.mock_fork
            os.wait = mocker.mock_wait
            self.assertRaises(StopForever, my_auditor.run_forever, zero_byte_fps=50)
            self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 50)
            self.assertEquals(mocker.fork_called, 0)

            self.assertRaises(SystemExit, my_auditor.run_forever)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 89)
            self.assertEquals(mocker.check_device_dir, None)
            self.assertEquals(mocker.check_args, ())

            device_list = ["sd%s" % i for i in string.ascii_letters[2:10]]
            device_string = ",".join(device_list)
            device_string_bogus = device_string + ",bogus"

            mocker.fork_called = 0
            self.assertRaises(SystemExit, my_auditor.run_once, devices=device_string_bogus)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 89)
            self.assertEquals(sorted(mocker.check_device_dir), device_list)

            mocker.master = 1

            mocker.fork_called = 0
            self.assertRaises(StopForever, my_auditor.run_forever)
            # Fork is called 3 times since the zbf process is forked twice
            self.assertEquals(mocker.fork_called, 3)
            self.assertEquals(mocker.wait_called, 3)

        finally:
            os.fork = was_fork
            os.wait = was_wait
Ejemplo n.º 42
0
class TestAuditor(unittest.TestCase):
    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        self.objects = os.path.join(self.devices, 'sda', 'objects')

        os.mkdir(os.path.join(self.devices, 'sdb'))
        self.objects_2 = os.path.join(self.devices, 'sdb', 'objects')

        os.mkdir(self.objects)
        self.parts = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            os.mkdir(os.path.join(self.objects, part))

        self.conf = dict(devices=self.devices,
                         mount_check='false',
                         object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_object_audit_extra_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            timestamp = str(normalize_timestamp(time.time()))
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

            auditor_worker.object_audit(
                os.path.join(self.disk_file._datadir, timestamp + '.data'),
                'sda', '0')
            self.assertEquals(auditor_worker.quarantines, pre_quarantines)

            os.write(writer._fd, 'extra_data')
            auditor_worker.object_audit(
                os.path.join(self.disk_file._datadir, timestamp + '.data'),
                'sda', '0')
            self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        data = '0' * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')

        auditor_worker.object_audit(
            os.path.join(self.disk_file._datadir, timestamp + '.data'), 'sda',
            '0')
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update('1' + '0' * 1023)
        etag = etag.hexdigest()
        metadata['ETag'] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)

        auditor_worker.object_audit(
            os.path.join(self.disk_file._datadir, timestamp + '.data'), 'sda',
            '0')
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        fp = open(path, 'w')
        fp.write('0' * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(
            os.path.join(self.disk_file._datadir, timestamp + '.data'), 'sda',
            '0')
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)

        def blowup(*args):
            raise NameError('tpyo')

        with mock.patch('swift.obj.diskfile.DiskFile', blowup):
            self.assertRaises(NameError, auditor_worker.object_audit, path,
                              'sda', '0')

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)

        def blowup(*args):
            raise NameError('tpyo')

        with mock.patch('swift.obj.diskfile.DiskFile', blowup):
            auditor_worker.failsafe_object_audit(path, 'sda', '0')
        self.assertEquals(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        with mock.patch('swift.obj.diskfile.DiskFile', lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 1)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob')
        data = '1' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            etag = md5()
            etag.update('1' + '0' * 1023)
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices, 'sda', 'quarantined',
                                       'objects')
        self.auditor.run_once(zero_byte_fps=50)
        self.assertFalse(os.path.isdir(quarantine_path))
        self.auditor.run_once()
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, with_ts=False):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        ts_file_path = ''
        if with_ts:
            name_hash = hash_path('a', 'c', 'o')
            dir_path = os.path.join(self.devices, 'sda',
                                    storage_directory(DATADIR, '0', name_hash))
            ts_file_path = os.path.join(dir_path, '99999.ts')
            if not os.path.exists(dir_path):
                mkdirs(dir_path)
            fp = open(ts_file_path, 'w')
            fp.close()

        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': 10,
            }
            writer.put(metadata)
            etag = md5()
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)
        return ts_file_path

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        self.auditor.run_once()
        quarantine_path = os.path.join(self.devices, 'sda', 'quarantined',
                                       'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        self.auditor.run_once(zero_byte_fps=50)
        quarantine_path = os.path.join(self.devices, 'sda', 'quarantined',
                                       'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):
            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        was_df = auditor.diskfile.DiskFile
        try:
            auditor.diskfile.DiskFile = FakeFile
            self.auditor.run_once(zero_byte_fps=50)
            quarantine_path = os.path.join(self.devices, 'sda', 'quarantined',
                                           'objects')
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])
        finally:
            auditor.diskfile.DiskFile = was_df

    def test_with_tombstone(self):
        ts_file_path = self.setup_bad_zero_byte(with_ts=True)
        self.auditor.run_once()
        self.assertTrue(ts_file_path.endswith('ts'))
        self.assertTrue(os.path.exists(ts_file_path))

    def test_sleeper(self):
        auditor.SLEEP_BETWEEN_AUDITS = 0.10
        my_auditor = auditor.ObjectAuditor(self.conf)
        start = time.time()
        my_auditor._sleep()
        delta_t = time.time() - start
        self.assert_(delta_t > 0.08)
        self.assert_(delta_t < 0.12)

    def test_run_forever(self):
        class StopForever(Exception):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            fork_called = 0
            fork_res = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs

            def mock_sleep(self):
                raise StopForever('stop')

            def mock_fork(self):
                self.fork_called += 1
                return self.fork_res

        my_auditor = auditor.ObjectAuditor(
            dict(devices=self.devices,
                 mount_check='false',
                 zero_byte_files_per_second=89))
        mocker = ObjectAuditorMock()
        my_auditor.run_once = mocker.mock_run
        my_auditor._sleep = mocker.mock_sleep
        was_fork = os.fork
        try:
            os.fork = mocker.mock_fork
            self.assertRaises(StopForever,
                              my_auditor.run_forever,
                              zero_byte_fps=50)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 50)
            self.assertEquals(mocker.fork_called, 0)

            self.assertRaises(StopForever, my_auditor.run_forever)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_args, ())

            mocker.fork_res = 1
            self.assertRaises(StopForever, my_auditor.run_forever)
            self.assertEquals(mocker.fork_called, 2)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89)

        finally:
            os.fork = was_fork
Ejemplo n.º 43
0
class TestAuditor(unittest.TestCase):

    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.rcache = os.path.join(self.testdir, 'object.recon')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        os.mkdir(os.path.join(self.devices, 'sdb'))

        # policy 0
        self.objects = os.path.join(self.devices, 'sda',
                                    get_data_dir(POLICIES[0]))
        self.objects_2 = os.path.join(self.devices, 'sdb',
                                      get_data_dir(POLICIES[0]))
        os.mkdir(self.objects)
        # policy 1
        self.objects_p1 = os.path.join(self.devices, 'sda',
                                       get_data_dir(POLICIES[1]))
        self.objects_2_p1 = os.path.join(self.devices, 'sdb',
                                         get_data_dir(POLICIES[1]))
        os.mkdir(self.objects_p1)
        # policy 2
        self.objects_p2 = os.path.join(self.devices, 'sda',
                                       get_data_dir(POLICIES[2]))
        self.objects_2_p2 = os.path.join(self.devices, 'sdb',
                                         get_data_dir(POLICIES[2]))
        os.mkdir(self.objects_p2)

        self.parts = {}
        self.parts_p1 = {}
        self.parts_p2 = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            self.parts_p1[part] = os.path.join(self.objects_p1, part)
            self.parts_p2[part] = os.path.join(self.objects_p2, part)
            os.mkdir(os.path.join(self.objects, part))
            os.mkdir(os.path.join(self.objects_p1, part))
            os.mkdir(os.path.join(self.objects_p2, part))

        self.conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)
        self.ec_df_mgr = ECDiskFileManager(self.conf, self.logger)

        # diskfiles for policy 0, 1, 2
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o',
                                                  policy=POLICIES[0])
        self.disk_file_p1 = self.df_mgr.get_diskfile('sda', '0', 'a', 'c',
                                                     'o', policy=POLICIES[1])
        self.disk_file_ec = self.ec_df_mgr.get_diskfile(
            'sda', '0', 'a', 'c', 'o', policy=POLICIES[2], frag_index=1)

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_worker_conf_parms(self):
        def check_common_defaults():
            self.assertEqual(auditor_worker.max_bytes_per_second, 10000000)
            self.assertEqual(auditor_worker.log_time, 3600)

        # test default values
        conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices)
        check_common_defaults()
        for policy in POLICIES:
            mgr = auditor_worker.diskfile_router[policy]
            self.assertEqual(mgr.disk_chunk_size, 65536)
        self.assertEqual(auditor_worker.max_files_per_second, 20)
        self.assertEqual(auditor_worker.zero_byte_only_at_fps, 0)

        # test specified audit value overrides
        conf.update({'disk_chunk_size': 4096})
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices,
                                               zero_byte_only_at_fps=50)
        check_common_defaults()
        for policy in POLICIES:
            mgr = auditor_worker.diskfile_router[policy]
            self.assertEqual(mgr.disk_chunk_size, 4096)
        self.assertEqual(auditor_worker.max_files_per_second, 50)
        self.assertEqual(auditor_worker.zero_byte_only_at_fps, 50)

    def test_object_audit_extra_data(self):
        def run_tests(disk_file):
            auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                                   self.rcache, self.devices)
            data = '0' * 1024
            etag = md5()
            with disk_file.create() as writer:
                writer.write(data)
                etag.update(data)
                etag = etag.hexdigest()
                timestamp = str(normalize_timestamp(time.time()))
                metadata = {
                    'ETag': etag,
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)
                writer.commit(Timestamp(timestamp))
                pre_quarantines = auditor_worker.quarantines

                auditor_worker.object_audit(
                    AuditLocation(disk_file._datadir, 'sda', '0',
                                  policy=disk_file.policy))
                self.assertEqual(auditor_worker.quarantines, pre_quarantines)

                os.write(writer._fd, 'extra_data')

                auditor_worker.object_audit(
                    AuditLocation(disk_file._datadir, 'sda', '0',
                                  policy=disk_file.policy))
                self.assertEqual(auditor_worker.quarantines,
                                 pre_quarantines + 1)
        run_tests(self.disk_file)
        run_tests(self.disk_file_p1)
        run_tests(self.disk_file_ec)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        data = '0' * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o',
                                                  policy=POLICIES.legacy)

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0',
                          policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update('1' + '0' * 1023)
        etag = etag.hexdigest()
        metadata['ETag'] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0',
                          policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        fp = open(path, 'w')
        fp.write('0' * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0',
                          policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch.object(DiskFileManager,
                               'get_diskfile_from_audit_location', blowup):
            self.assertRaises(NameError, auditor_worker.object_audit,
                              AuditLocation(os.path.dirname(path), 'sda', '0',
                                            policy=POLICIES.legacy))

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls',
                        blowup):
            auditor_worker.failsafe_object_audit(
                AuditLocation(os.path.dirname(path), 'sda', '0',
                              policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
        with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls',
                        lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024

        def write_file(df):
            with df.create() as writer:
                writer.write(data)
                metadata = {
                    'ETag': md5(data).hexdigest(),
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)
                writer.commit(Timestamp(timestamp))

        # policy 0
        write_file(self.disk_file)
        # policy 1
        write_file(self.disk_file_p1)
        # policy 2
        write_file(self.disk_file_ec)

        auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        # 1 object per policy falls into 1024 bucket
        self.assertEqual(auditor_worker.stats_buckets[1024], 3)
        self.assertEqual(auditor_worker.stats_buckets[10240], 0)

        # pick up some additional code coverage, large file
        data = '0' * 1024 * 1024
        for df in (self.disk_file, self.disk_file_ec):
            with df.create() as writer:
                writer.write(data)
                metadata = {
                    'ETag': md5(data).hexdigest(),
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)
                writer.commit(Timestamp(timestamp))
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        # still have the 1024 byte object left in policy-1 (plus the
        # stats from the original 3)
        self.assertEqual(auditor_worker.stats_buckets[1024], 4)
        self.assertEqual(auditor_worker.stats_buckets[10240], 0)
        # and then policy-0 disk_file was re-written as a larger object
        self.assertEqual(auditor_worker.stats_buckets['OVER'], 2)

        # pick up even more additional code coverage, misc paths
        auditor_worker.log_time = -1
        auditor_worker.stats_sizes = []
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        self.assertEqual(auditor_worker.stats_buckets[1024], 4)
        self.assertEqual(auditor_worker.stats_buckets[10240], 0)
        self.assertEqual(auditor_worker.stats_buckets['OVER'], 2)

    def test_object_run_logging(self):
        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger,
                                               self.rcache, self.devices)
        auditor_worker.audit_all_objects(device_dirs=['sda'])
        log_lines = logger.get_lines_for_level('info')
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index('ALL - parallel, sda'))

        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger,
                                               self.rcache, self.devices,
                                               zero_byte_only_at_fps=50)
        auditor_worker.audit_all_objects(device_dirs=['sda'])
        log_lines = logger.get_lines_for_level('info')
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index('ZBF - sda'))

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
            writer.commit(Timestamp(timestamp))
        auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob',
                                                  policy=POLICIES.legacy)
        data = '1' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            timestamp = str(normalize_timestamp(time.time()))
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
            etag = md5()
            etag.update('1' + '0' * 1023)
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.isdir(quarantine_path))
        del(kwargs['zero_byte_fps'])
        clear_auditor_status(self.devices)
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, timestamp=None):
        if timestamp is None:
            timestamp = Timestamp(time.time())
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp.internal,
                'Content-Length': 10,
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
            etag = md5()
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50

        called_args = [0]

        def mock_get_auditor_status(path, logger, audit_type):
            called_args[0] = audit_type
            return get_auditor_status(path, logger, audit_type)

        with mock.patch('swift.obj.diskfile.get_auditor_status',
                        mock_get_auditor_status):
                self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))
        self.assertEqual('ZBF', called_args[0])

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):

            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls',
                        FakeFile):
            kwargs = {'mode': 'once'}
            kwargs['zero_byte_fps'] = 50
            self.auditor.run_audit(**kwargs)
            quarantine_path = os.path.join(self.devices,
                                           'sda', 'quarantined', 'objects')
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])

    @mock.patch.object(auditor.ObjectAuditor, 'run_audit')
    @mock.patch('os.fork', return_value=0)
    def test_with_inaccessible_object_location(self, mock_os_fork,
                                               mock_run_audit):
        # Need to ensure that any failures in run_audit do
        # not prevent sys.exit() from running.  Otherwise we get
        # zombie processes.
        e = OSError('permission denied')
        mock_run_audit.side_effect = e
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.assertRaises(SystemExit, self.auditor.fork_child, self)

    def test_with_only_tombstone(self):
        # sanity check that auditor doesn't touch solitary tombstones
        ts_iter = make_timestamp_iter()
        self.setup_bad_zero_byte(timestamp=next(ts_iter))
        self.disk_file.delete(next(ts_iter))
        files = os.listdir(self.disk_file._datadir)
        self.assertEqual(1, len(files))
        self.assertTrue(files[0].endswith('ts'))
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        files_after = os.listdir(self.disk_file._datadir)
        self.assertEqual(files, files_after)

    def test_with_tombstone_and_data(self):
        # rsync replication could leave a tombstone and data file in object
        # dir - verify they are both removed during audit
        ts_iter = make_timestamp_iter()
        ts_tomb = next(ts_iter)
        ts_data = next(ts_iter)
        self.setup_bad_zero_byte(timestamp=ts_data)
        tomb_file_path = os.path.join(self.disk_file._datadir,
                                      '%s.ts' % ts_tomb.internal)
        with open(tomb_file_path, 'wb') as fd:
            write_metadata(fd, {'X-Timestamp': ts_tomb.internal})
        files = os.listdir(self.disk_file._datadir)
        self.assertEqual(2, len(files))
        self.assertTrue(os.path.basename(tomb_file_path) in files, files)
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.exists(self.disk_file._datadir))

    def test_sleeper(self):
        with mock.patch(
                'time.sleep', mock.MagicMock()) as mock_sleep:
            my_auditor = auditor.ObjectAuditor(self.conf)
            my_auditor._sleep()
            mock_sleep.assert_called_with(30)

            my_conf = dict(interval=2)
            my_conf.update(self.conf)
            my_auditor = auditor.ObjectAuditor(my_conf)
            my_auditor._sleep()
            mock_sleep.assert_called_with(2)

            my_auditor = auditor.ObjectAuditor(self.conf)
            my_auditor.interval = 2
            my_auditor._sleep()
            mock_sleep.assert_called_with(2)

    def test_run_parallel_audit(self):

        class StopForever(Exception):
            pass

        class Bogus(Exception):
            pass

        loop_error = Bogus('exception')

        class LetMeOut(BaseException):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            check_device_dir = None
            fork_called = 0
            master = 0
            wait_called = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs
                if 'zero_byte_fps' in kwargs:
                    self.check_device_dir = kwargs.get('device_dirs')

            def mock_sleep_stop(self):
                raise StopForever('stop')

            def mock_sleep_continue(self):
                return

            def mock_audit_loop_error(self, parent, zbo_fps,
                                      override_devices=None, **kwargs):
                raise loop_error

            def mock_fork(self):
                self.fork_called += 1
                if self.master:
                    return self.fork_called
                else:
                    return 0

            def mock_wait(self):
                self.wait_called += 1
                return (self.wait_called, 0)

        for i in string.ascii_letters[2:26]:
            mkdirs(os.path.join(self.devices, 'sd%s' % i))

        my_auditor = auditor.ObjectAuditor(dict(devices=self.devices,
                                                mount_check='false',
                                                zero_byte_files_per_second=89,
                                                concurrency=1))

        mocker = ObjectAuditorMock()
        my_auditor.logger.exception = mock.MagicMock()
        real_audit_loop = my_auditor.audit_loop
        my_auditor.audit_loop = mocker.mock_audit_loop_error
        my_auditor.run_audit = mocker.mock_run
        was_fork = os.fork
        was_wait = os.wait
        os.fork = mocker.mock_fork
        os.wait = mocker.mock_wait
        try:
            my_auditor._sleep = mocker.mock_sleep_stop
            my_auditor.run_once(zero_byte_fps=50)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: %s', loop_error)
            my_auditor.logger.exception.reset_mock()
            self.assertRaises(StopForever, my_auditor.run_forever)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: %s', loop_error)
            my_auditor.audit_loop = real_audit_loop

            self.assertRaises(StopForever,
                              my_auditor.run_forever, zero_byte_fps=50)
            self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 50)
            self.assertEqual(mocker.fork_called, 0)

            self.assertRaises(SystemExit, my_auditor.run_once)
            self.assertEqual(mocker.fork_called, 1)
            self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEqual(mocker.check_device_dir, [])
            self.assertEqual(mocker.check_args, ())

            device_list = ['sd%s' % i for i in string.ascii_letters[2:10]]
            device_string = ','.join(device_list)
            device_string_bogus = device_string + ',bogus'

            mocker.fork_called = 0
            self.assertRaises(SystemExit, my_auditor.run_once,
                              devices=device_string_bogus)
            self.assertEqual(mocker.fork_called, 1)
            self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEqual(sorted(mocker.check_device_dir), device_list)

            mocker.master = 1

            mocker.fork_called = 0
            self.assertRaises(StopForever, my_auditor.run_forever)
            # Fork is called 2 times since the zbf process is forked just
            # once before self._sleep() is called and StopForever is raised
            # Also wait is called just once before StopForever is raised
            self.assertEqual(mocker.fork_called, 2)
            self.assertEqual(mocker.wait_called, 1)

            my_auditor._sleep = mocker.mock_sleep_continue
            my_auditor.audit_loop = works_only_once(my_auditor.audit_loop,
                                                    LetMeOut())

            my_auditor.concurrency = 2
            mocker.fork_called = 0
            mocker.wait_called = 0
            self.assertRaises(LetMeOut, my_auditor.run_forever)
            # Fork is called no. of devices + (no. of devices)/2 + 1 times
            # since zbf process is forked (no.of devices)/2 + 1 times
            no_devices = len(os.listdir(self.devices))
            self.assertEqual(mocker.fork_called, no_devices + no_devices / 2
                             + 1)
            self.assertEqual(mocker.wait_called, no_devices + no_devices / 2
                             + 1)

        finally:
            os.fork = was_fork
            os.wait = was_wait

    def test_run_audit_once(self):
        my_auditor = auditor.ObjectAuditor(dict(devices=self.devices,
                                                mount_check='false',
                                                zero_byte_files_per_second=89,
                                                concurrency=1))

        forked_pids = []
        next_zbf_pid = [2]
        next_normal_pid = [1001]
        outstanding_pids = [[]]

        def fake_fork_child(**kwargs):
            if len(forked_pids) > 10:
                # something's gone horribly wrong
                raise BaseException("forking too much")

            # ZBF pids are all smaller than the normal-audit pids; this way
            # we can return them first.
            #
            # Also, ZBF pids are even and normal-audit pids are odd; this is
            # so humans seeing this test fail can better tell what's happening.
            if kwargs.get('zero_byte_fps'):
                pid = next_zbf_pid[0]
                next_zbf_pid[0] += 2
            else:
                pid = next_normal_pid[0]
                next_normal_pid[0] += 2
            outstanding_pids[0].append(pid)
            forked_pids.append(pid)
            return pid

        def fake_os_wait():
            # Smallest pid first; that's ZBF if we have one, else normal
            outstanding_pids[0].sort()
            pid = outstanding_pids[0].pop(0)
            return (pid, 0)   # (pid, status)

        with mock.patch("swift.obj.auditor.os.wait", fake_os_wait), \
                mock.patch.object(my_auditor, 'fork_child', fake_fork_child), \
                mock.patch.object(my_auditor, '_sleep', lambda *a: None):
            my_auditor.run_once()

        self.assertEqual(sorted(forked_pids), [2, 1001])

    def test_run_parallel_audit_once(self):
        my_auditor = auditor.ObjectAuditor(
            dict(devices=self.devices, mount_check='false',
                 zero_byte_files_per_second=89, concurrency=2))

        # ZBF pids are smaller than the normal-audit pids; this way we can
        # return them first from our mocked os.wait().
        #
        # Also, ZBF pids are even and normal-audit pids are odd; this is so
        # humans seeing this test fail can better tell what's happening.
        forked_pids = []
        next_zbf_pid = [2]
        next_normal_pid = [1001]
        outstanding_pids = [[]]

        def fake_fork_child(**kwargs):
            if len(forked_pids) > 10:
                # something's gone horribly wrong; try not to hang the test
                # run because of it
                raise BaseException("forking too much")

            if kwargs.get('zero_byte_fps'):
                pid = next_zbf_pid[0]
                next_zbf_pid[0] += 2
            else:
                pid = next_normal_pid[0]
                next_normal_pid[0] += 2
            outstanding_pids[0].append(pid)
            forked_pids.append(pid)
            return pid

        def fake_os_wait():
            if not outstanding_pids[0]:
                raise BaseException("nobody waiting")

            # ZBF auditor finishes first
            outstanding_pids[0].sort()
            pid = outstanding_pids[0].pop(0)
            return (pid, 0)   # (pid, status)

        # make sure we've got enough devs that the ZBF auditor can finish
        # before all the normal auditors have been started
        mkdirs(os.path.join(self.devices, 'sdc'))
        mkdirs(os.path.join(self.devices, 'sdd'))

        with mock.patch("swift.obj.auditor.os.wait", fake_os_wait), \
                mock.patch.object(my_auditor, 'fork_child', fake_fork_child), \
                mock.patch.object(my_auditor, '_sleep', lambda *a: None):
            my_auditor.run_once()

        self.assertEqual(sorted(forked_pids), [2, 1001, 1003, 1005, 1007])
Ejemplo n.º 44
0
class MetadataController(object):
    """"
    WSGI Controller for metadata server
    """
    save_headers = [
        'x-metadata-read',
        'x-metadata-write',
        'x-metadata-sync-key',
        'x-metadata-sync-to'
    ]

    def __init__(self, conf, logger=None):
        # location/directory of the metadata database (meta.db)
        self.location = conf.get('location', '/srv/node/sdb1/metadata/')
        # path the the actual file
        #self.db_file = os.path.join(self.location, 'meta.db')
        self.logger = logger or get_logger(conf, log_route='metadata-server')
        self.root = conf.get('devices', '/srv/node')
        #workaround for device listings
        self.node_count = conf.get('nodecount','8')
        self.devicelist = []
        for x in range(0,int(self.node_count)):
            self.devicelist.append(conf.get('device'+str(x),''))
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.node_timeout = int(conf.get('node_timeout', 3))
        self.conn_timeout = float(conf.get('node_timeout', 3))
        replication_server = conf.get('replication_server', None)
        if replication_server is not None:
            replication_server = config_true_value(replication_server)
        self.replication_server = replication_server
        self.allowed_sync_hosts = [
            h.strip()
            for h in conf.get('allowed_sync_hosts', '127.0.0.1').split(',')
            if h.strip()
        ]
        self.replicator_rpc = ReplicatorRpc(
            self.root,
            DATADIR,
            MetadataBroker,
            self.mount_check,
            logger=self.logger
        )
        self.diskfile_mgr = DiskFileManager(conf,self.logger)

        self.db_ip = conf.get('db_ip', '127.0.0.1')
        self.db_port = int(conf.get('db_port', 2424))
        self.db_user = conf.get('db_user', 'root')
        self.db_pw = conf.get('db_pw', 'root')

        if config_true_value(conf.get('allow_versions', 'f')):
            self.save_headers.append('x-versions-location')

        swift.common.db.DB_PREALLOCATION = config_true_value(
            conf.get('db_preallocation', 'f'))
        self.broker = self._get_metadata_broker()#
        self.broker.initialize()#
    def _get_metadata_broker(self, **kwargs):
        """
        Returns an instance of the DB abstraction layer object (broker)
        """
        return MetadataBroker(self.db_ip,self.db_port,self.db_user,self.db_pw)

    def check_attrs(self, attrs, acc, con, obj):
        """
        Verify that attributes are valid
        Checks the attr list against a list of system attributes
        Allows for custom metadata.

        returns: boolean wether the attrs are valid
        """
        for attr in attrs.split(','):
            if attr.startswith('object_meta') or \
                    attr.startswith('container_meta') or \
                    attr.startswith('account_meta'):
                pass
            elif attr not in [
                    'object_uri',
                    'object_name',
                    'object_account_name',
                    'object_container_name',
                    'object_location',
                    'object_uri_create_time',
                    'object_last_modified_time',
                    'object_last_changed_time',
                    'object_delete_time',
                    'object_last_activity_time',
                    'object_etag_hash',
                    'object_content_type',
                    'object_content_length',
                    'object_content_encoding',
                    'object_content_disposition',
                    'object_content_language',
                    'object_cache_control',
                    'object_delete_at',
                    'object_manifest_type',
                    'object_manifest',
                    'object_access_control_allow_origin',
                    'object_access_control_allow_credentials',
                    'object_access_control_expose_headers',
                    'object_access_control_max_age',
                    'object_access_control_allow_methods',
                    'object_access_control_allow_headers',
                    'object_origin',
                    'object_access_control_request_method',
                    'object_access_control_request_headers',
                    'object_meta',
                    'container_uri',
                    'container_name',
                    'container_account_name',
                    'container_create_time',
                    'container_last_modified_time',
                    'container_last_changed_time',
                    'container_delete_time',
                    'container_last_activity_time',
                    'container_read_permissions',
                    'container_write_permissions',
                    'container_sync_to',
                    'container_sync_key',
                    'container_versions_location',
                    'container_object_count',
                    'container_bytes_used',
                    'container_meta',
                    'account_uri',
                    'account_name',
                    'account_tenant_id',
                    'account_first_use_time',
                    'account_last_modified_time',
                    'account_last_changed_time',
                    'account_delete_time',
                    'account_last_activity_time',
                    'account_container_count',
                    'account_object_count',
                    'account_bytes_used',
                    'account_meta',
                    'all_attrs',
                    'all_system_attrs',
                    'all_meta_attrs',
                    'all_account_attrs'
                    'all_account_system_attrs',
                    'all_account_meta_attrs',
                    'all_container_attrs',
                    'all_container_system_attrs'
                    'all_container_meta_attrs',
                    'all_object_attrs',
                    'all_object_system_attrs'
                    'all_object_meta_attrs']:
                return False
        return True

    @public
    @timing_stats()
    def GET(self, req):
        """
        Handle HTTP GET requests
        Build SQL queries piece by piece and then execute
        Custom attributes need to be handled specially, since they exist
        in a seperate table
        """
        #broker = self._get_metadata_broker()
	#broker.initialize()

        base_version, acc, con, obj = split_path(req.path, 1, 4, True)
        if 'sorted' in req.headers:
            sort_value_list = req.headers['sorted']
            if sort_value_list == '':
                sort_value_list = 'uri'
            toSort = True
        else:
            toSort = False
        if 'attributes' in req.headers:
            attrs = req.headers['attributes']
        # if there is no attributes lists, include everything in scope
        # since no attributes passed in, there can be
        #  things from multiple levels of scope
        else:
            attrs = "object_uri,container_uri,account_uri"

        attrs, all_obj_meta, all_con_meta, all_acc_meta = \
            eval_superset(attrs.split(","))
        format = "text/plain"
        if self.check_attrs(attrs, acc, con, obj) or attrs == '':
            accAttrs, conAttrs, objAttrs, superAttrs, customAttrs = \
                split_attrs_by_scope(attrs)

            """
            If we have a thing from which we don't request any sys attrs
            Then we need to add its uri so that it appears in the list
            returned from the query. After we query for custom attrs,
            we need to delete any thing that is empty.
            """
            if all_obj_meta and objAttrs == "":
                objAttrs = "object_uri"
            if all_con_meta and conAttrs == "":
                conAttrs = "container_uri"
            if all_acc_meta and accAttrs == "":
                accAttrs = "account_uri"

            # Builds initial query containing the
            # split attributes for each item type
            accQuery = self.broker.get_attributes_query(acc, con, obj, accAttrs)
            conQuery = self.broker.get_attributes_query(acc, con, obj, conAttrs)
            objQuery = self.broker.get_attributes_query(acc, con, obj, objAttrs)

            # If there is a query in the request add it to the end
            # of the WHERE clause of the SQL
            if 'query' in req.headers:
                query = req.headers['query']
                accQuery = self.broker.get_uri_query(accQuery, query)
                conQuery = self.broker.get_uri_query(conQuery, query)
                objQuery = self.broker.get_uri_query(objQuery, query)

            # if successful query add the results to the end of the
            # accumulator list
            ret = []
            if not accQuery.startswith("BAD"):
                ret.extend(self.broker.execute_query(
                    accQuery, acc, con, obj,
                    'account_uri' in attrs.split(',')))
            if not conQuery.startswith("BAD"):
                ret.extend(self.broker.execute_query(
                    conQuery, acc, con, obj,
                    'container_uri' in attrs.split(',')))
            if not objQuery.startswith("BAD"):
                ret.extend(self.broker.execute_query(
                    objQuery, acc, con, obj,
                    'object_uri' in attrs.split(',')))

            # query the custom table
            ret = self.broker.custom_attributes_query(
                customAttrs, ret, all_obj_meta, all_con_meta, all_acc_meta)

            """
            Do the deletion thing mentioned above
            """
            ret = [x for x in ret if x[x.keys()[0]] != {}]

            if toSort:
                sorter = Sort_metadata()
                ret = sorter.sort_data(ret, sort_value_list.split(","))

            # default format is plain text
            # can choose between json/xml as well
            # no error handling done right now
            # just default everything to plain if spelling error
            if "format" in req.headers:
                if req.headers['format'] == "json":
                    format = "application/json"
                    out = output_json(ret)
                elif req.headers['format'] == "xml":
                    format = "application/xml"
                    out = output_xml(ret)
                else:
                    out = output_plain(ret)
            else:
                out = output_plain(ret)
            status = 200

        else:
            out = "One or more attributes not supported"
            status = 400
            format = "text/plain"

        # Returns the HTTP Response object with the result of the API request
        return Response(
            request=req, body=out + "\n", content_type=format, status=status)

    @public
    #@timing_stats()
    #TODO: reorganize code to generalize repeated calls
    #TODO: abstract data/object type names for generic calls
    def PUT(self, req):
        version, acc, con, obj = split_path(req.path, 1, 4, True)
        stor_policy = req.headers['storage_policy']
        ring = POLICIES.get_object_ring(stor_policy, '/etc/swift')
	#broker = self._get_metadata_broker()
	#broker.initialize()

        #Handle Container PUT
        if not obj:
            hsh = hash_path(acc, con)
            part = ring.get_part(acc, con)
            db_dir = storage_directory(swift.container.backend.DATADIR, part, hsh)
            nodes = ring.get_part_nodes(part)
            for node in nodes:
                for item in self.devicelist:
                    if node['device'] in item:
                        try:
                            path = os.path.join(self.root + item, db_dir, hsh + '.db')
                            #TODO: move kwargs
                            kwargs = {'account':acc, 'container':con, 'logger':self.logger}
                            md_broker= swift.container.backend.ContainerBroker(path, **kwargs)
                            md = md_broker.get_info()
                            md.update(
                                (key, value)
                                for key, (value, timestamp) in md_broker.metadata.iteritems()
                                if value != '' and is_sys_or_user_meta('container', key))
                            sys_md = format_con_metadata(md)
                            user_md = format_custom_metadata(md)
                            if 'X-Container-Read' in req.headers:
                                sys_md['container_read_permissions'] = req.headers['X-Container-Read']
                            if 'X-Container-Write' in req.headers:
                                sys_md['container_write_permissions'] = req.headers['X-Container-Write']
                            #TODO: insert container_last_activity_time
                            #TODO: split meta user/sys
                            #TODO: insert meta
                            self.broker.insert_container_md([sys_md])
                            return
                        except DatabaseConnectionError as e:
                            self.logger.warn("DatabaseConnectionError: " + e.path + "\n")
                            pass
                        except:
                            self.logger.warn("%s: %s\n"%(str(sys.exc_info()[0]),str(sys.exc_info()[1])))
                            pass
        #handle object PUT
        else:
            part = ring.get_part(acc, con, obj)
            nodes = ring.get_part_nodes(part)
            for node in nodes:
                for item in self.devicelist:
                    if node['device'] in item:
                        try:
                            df = self.diskfile_mgr.get_diskfile(item, part, acc, con, obj, stor_policy)
                            md = df.read_metadata()
                            sys_md = format_obj_metadata(md)
                            #df._data_file is a direct path to the objects data
                            sys_md['object_location'] = df._data_file
                            user_md = format_custom_metadata(md)
                            #TODO: insert user meta and sys meta
                            self.broker.insert_object_md([sys_md])
                        except:
                            self.logger.warn("%s: %s\n"%(str(sys.exc_info()[0]),str(sys.exc_info()[1])))
                            pass
        return

    @public
    @timing_stats()
    def DELETE(self, req):
        version, acc, con, obj = split_path(req.path, 1, 4, True)
        timestamp = Timestamp(time.time()).isoformat()
        data_type = ''
        md = {}
        if not con and not obj:
            #do nothing. accounts cannot be deleted
            return
        elif not obj:
            md = format_con_metadata(md)
            md['container_delete_time'] = timestamp
            md['container_last_activity_time'] = timestamp
            data_type = 'container'
            for item in \
                (data_type + '_uri', data_type + '_name'):
                if item in md:
                    del md[item]
            #TODO: overwrite container metadata
            #TODO: delete container custom metadata
        else:
            md = format_obj_metadata(md)
            md['object_delete_time'] = timestamp
            md['object_last_activity_time'] = timestamp
            data_type = 'object'
            for item in \
                (data_type + '_uri', data_type + '_name'):
                if item in md:
                    del md[item]
            #TODO: overwrite object metadata
            #TODO: delete object user meta
        return
        
            
            
    #TODO: generalize strings used for repeat calls
    @public
    #@timing_stats()
    def POST(self, req):
        version, acc, con, obj = split_path(req.path, 1, 4, True)
        stor_policy = req.headers['storage_policy']
        ring = POLICIES.get_object_ring(stor_policy, '/etc/swift')
        if not con and not obj:
            meta_type = 'account'
            kwargs = {'account':acc, 'logger':self.logger}
            data_dir = swift.account.backend.DATADIR
            hsh = hash_path(acc)
            part = ring.get_part(acc)
            db_dir = storage_directory(data_dir, part, hsh)
            nodes = ring.get_part_nodes(part)
            for node in nodes:
                for item in self.devicelist:
                    if node['device'] in item:
                        try:
                            path = os.path.join(self.root + item, db_dir, hsh + '.db')
                            broker = swift.account.backend.AccountBroker(path, **kwargs)
                            md = broker.get_info()
                            md.update(
                                (key, value)
                                for key, (value, timestamp) in broker.metadata.iteritems()
                                if value != '' and is_sys_or_user_meta(meta_type, key))
                            sys_md = format_acc_metadata(md)
                            user_md = format_custom_metadata(md)
                            #TODO: call overwrite_account_metadata
                            #TODO: call overwrite_custom_metadata
                            return
                        except:
                            self.logger.warn("%s: %s\n"%(str(sys.exc_info()[0]),str(sys.exc_info()[1])))
                            pass
        #Handle Container POST
        elif not obj:
            meta_type = 'container'
            kwargs = {'account':acc, 'container':con, 'logger':self.logger}
            data_dir = swift.container.backend.DATADIR
            try:
                hsh = hash_path(acc, con)
                part = ring.get_part(acc, con)
                db_dir = storage_directory(data_dir, part, hsh)
                nodes = ring.get_part_nodes(part)
                for node in nodes:
                    for item in self.devicelist:
                        if node['device'] in item:
                            try:
                                path = os.path.join(self.root + item, db_dir, hsh + '.db')
                                broker = swift.container.backend.ContainerBroker(path, **kwargs)
                                md = broker.get_info()
                                md.update(
                                    (key, value)
                                    for key, (value, timestamp) in broker.metadata.iteritems()
                                    if value != '' and is_sys_or_user_meta('container', key))
                                sys_md = format_con_metadata(md)
                                user_md = format_custom_metadata(md)
                                if 'X-Container-Read' in req.headers:
                                    sys_md['container_read_permissions'] = req.headers['X-Container-Read']
                                    if 'X-Container-Write' in req.headers:
                                        sys_md['container_write_permissions'] = req.headers['X-Container-Write']
                            #TODO: call overwrite_container_metadata
                            #TODO: call overwrite_custom_metadata
                                        return
                            except DatabaseConnectionError as e:
                                self.logger.warn("DatabaseConnectionError: " + e.path + "\n")
                                pass
            except:
                self.logger.warn("%s: %s\n"%(str(sys.exc_info()[0]),str(sys.exc_info()[1])))
                pass
        else:
            part = ring.get_part(acc, con, obj)
            nodes = ring.get_part_nodes(part)
            for node in nodes:
                for item in self.devicelist:
                    if node['device'] in item:
                        try:
                            df = self.diskfile_mgr.get_diskfile(item, part, acc, con, obj, stor_policy)
                            md = df.read_metadata()
                            sys_md = format_obj_metadata(md)
                            user_md = format_custom_metadata(md)
                            #TODO: call overwrite_object_metadata
                            #TODO: call overwrite_custom_metadata
                        except:
                            self.logger.warn("%s: %s\n"%(str(sys.exc_info()[0]),str(sys.exc_info()[1])))
                            pass
        return


    @public
    @timing_stats()
    def COPY(self, req):
        version, acc, con, obj = split_path(req.path, 1, 4, True)
        


    def __call__(self, env, start_response):
        """
        Boilerplate code for how the server's code gets called
        upon receiving a request.
        Taken directly from other servers.
        """
        # start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get('x-trans-id', None)
        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL')
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    method = getattr(self, req.method)
                    getattr(method, 'publicly_accessible')
                    replication_method = getattr(method, 'replication', False)
                    if (self.replication_server is not None and
                            self.replication_server != replication_method):
                        raise AttributeError('Not allowed method.')
                except AttributeError:
                    res = HTTPMethodNotAllowed()
                else:
                    res = method(req)
            except HTTPException as error_response:
                res = error_response
            except (Exception, Timeout):
                self.logger.exception(_(
                    'ERROR __call__ error with %(method)s %(path)s '),
                    {'method': req.method, 'path': req.path})
                res = HTTPInternalServerError(body=traceback.format_exc())
        # trans_time = '%.4f' % (time.time() - start_time)
        # if self.log_requests:
        #     log_message = '%s - - [%s] "%s %s" %s %s "%s" "%s" "%s" %s' % (
        #         req.remote_addr,
        #         time.strftime('%d/%b/%Y:%H:%M:%S +0000',
        #                       time.gmtime()),
        #         req.method, req.path,
        #         res.status.split()[0], res.content_length or '-',
        #         req.headers.get('x-trans-id', '-'),
        #         req.referer or '-', req.user_agent or '-',
        #         trans_time)
        #     if req.method.upper() == 'REPLICATE':
        #         self.logger.debug(log_message)
        #     else:
        #         self.logger.info(log_message)
        return res(env, start_response)
Ejemplo n.º 45
0
    def test_obj_put_async_shard_update_redirected_twice(self):
        policies = list(POLICIES)
        random.shuffle(policies)
        # setup updater
        conf = {
            'devices': self.devices_dir,
            'mount_check': 'false',
            'swift_dir': self.testdir,
        }
        daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
        async_dir = os.path.join(self.sda1, get_async_dir(policies[0]))
        os.mkdir(async_dir)
        dfmanager = DiskFileManager(conf, daemon.logger)

        ts_obj = next(self.ts_iter)
        self._write_async_update(dfmanager,
                                 ts_obj,
                                 policies[0],
                                 container_path='.shards_a/c_shard_older')
        orig_async_path, orig_async_data = self._check_async_file(async_dir)

        # run once
        ts_redirect_1 = next(self.ts_iter)
        ts_redirect_2 = next(self.ts_iter)
        ts_redirect_3 = next(self.ts_iter)
        fake_responses = [
            # 1st round of redirects, newest redirect should be chosen
            (301, {
                'Location': '/.shards_a/c_shard_old/o',
                'X-Backend-Redirect-Timestamp': ts_redirect_1.internal
            }),
            (301, {
                'Location': '/.shards_a/c_shard_new/o',
                'X-Backend-Redirect-Timestamp': ts_redirect_2.internal
            }),
            (301, {
                'Location': '/.shards_a/c_shard_old/o',
                'X-Backend-Redirect-Timestamp': ts_redirect_1.internal
            }),
            # 2nd round of redirects
            (301, {
                'Location': '/.shards_a/c_shard_newer/o',
                'X-Backend-Redirect-Timestamp': ts_redirect_3.internal
            }),
            (301, {
                'Location': '/.shards_a/c_shard_newer/o',
                'X-Backend-Redirect-Timestamp': ts_redirect_3.internal
            }),
            (301, {
                'Location': '/.shards_a/c_shard_newer/o',
                'X-Backend-Redirect-Timestamp': ts_redirect_3.internal
            }),
        ]
        fake_status_codes, fake_headers = zip(*fake_responses)
        with mocked_http_conn(*fake_status_codes,
                              headers=fake_headers) as conn:
            with mock.patch('swift.obj.updater.dump_recon_cache'):
                daemon.run_once()

        self._check_update_requests(conn.requests, ts_obj, policies[0])
        # only *one* set of redirected requests is attempted per cycle
        older_part = daemon.container_ring.get_part('.shards_a/c_shard_older')
        new_part = daemon.container_ring.get_part('.shards_a/c_shard_new')
        newer_part = daemon.container_ring.get_part('.shards_a/c_shard_newer')
        self.assertEqual(
            ['/sda1/%s/.shards_a/c_shard_older/o' % older_part] * 3 +
            ['/sda1/%s/.shards_a/c_shard_new/o' % new_part] * 3,
            [req['path'] for req in conn.requests])
        self.assertEqual({
            'redirects': 2,
            'async_pendings': 1
        }, daemon.logger.get_increment_counts())
        # update failed, we still have pending file with most recent redirect
        # response Location header value added to data
        async_path, async_data = self._check_async_file(async_dir)
        self.assertEqual(orig_async_path, async_path)
        self.assertEqual(
            dict(orig_async_data,
                 container_path='.shards_a/c_shard_newer',
                 redirect_history=[
                     '.shards_a/c_shard_new', '.shards_a/c_shard_newer'
                 ]), async_data)

        # next cycle, should get latest redirect from pickled async update
        fake_responses = [(200, {})] * 3
        fake_status_codes, fake_headers = zip(*fake_responses)
        with mocked_http_conn(*fake_status_codes,
                              headers=fake_headers) as conn:
            with mock.patch('swift.obj.updater.dump_recon_cache'):
                daemon.run_once()

        self._check_update_requests(conn.requests, ts_obj, policies[0])
        self.assertEqual(['/sda1/%s/.shards_a/c_shard_newer/o' % newer_part] *
                         3, [req['path'] for req in conn.requests])
        self.assertEqual(
            {
                'redirects': 2,
                'successes': 1,
                'unlinks': 1,
                'async_pendings': 1
            }, daemon.logger.get_increment_counts())
        self.assertFalse(os.listdir(async_dir))  # no async file
Ejemplo n.º 46
0
class TestAuditor(unittest.TestCase):

    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.rcache = os.path.join(self.testdir, 'object.recon')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        os.mkdir(os.path.join(self.devices, 'sdb'))

        # policy 0
        self.objects = os.path.join(self.devices, 'sda', get_data_dir(0))
        self.objects_2 = os.path.join(self.devices, 'sdb', get_data_dir(0))
        os.mkdir(self.objects)
        # policy 1
        self.objects_p1 = os.path.join(self.devices, 'sda', get_data_dir(1))
        self.objects_2_p1 = os.path.join(self.devices, 'sdb', get_data_dir(1))
        os.mkdir(self.objects_p1)

        self.parts = self.parts_p1 = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            self.parts_p1[part] = os.path.join(self.objects_p1, part)
            os.mkdir(os.path.join(self.objects, part))
            os.mkdir(os.path.join(self.objects_p1, part))

        self.conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)

        # diskfiles for policy 0, 1
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', 0)
        self.disk_file_p1 = self.df_mgr.get_diskfile('sda', '0', 'a', 'c',
                                                     'o', 1)

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_worker_conf_parms(self):
        def check_common_defaults():
            self.assertEquals(auditor_worker.max_bytes_per_second, 10000000)
            self.assertEquals(auditor_worker.log_time, 3600)

        # test default values
        conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices)
        check_common_defaults()
        self.assertEquals(auditor_worker.diskfile_mgr.disk_chunk_size, 65536)
        self.assertEquals(auditor_worker.max_files_per_second, 20)
        self.assertEquals(auditor_worker.zero_byte_only_at_fps, 0)

        # test specified audit value overrides
        conf.update({'disk_chunk_size': 4096})
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices,
                                               zero_byte_only_at_fps=50)
        check_common_defaults()
        self.assertEquals(auditor_worker.diskfile_mgr.disk_chunk_size, 4096)
        self.assertEquals(auditor_worker.max_files_per_second, 50)
        self.assertEquals(auditor_worker.zero_byte_only_at_fps, 50)

    def test_object_audit_extra_data(self):
        def run_tests(disk_file):
            auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                                   self.rcache, self.devices)
            data = '0' * 1024
            etag = md5()
            with disk_file.create() as writer:
                writer.write(data)
                etag.update(data)
                etag = etag.hexdigest()
                timestamp = str(normalize_timestamp(time.time()))
                metadata = {
                    'ETag': etag,
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)
                pre_quarantines = auditor_worker.quarantines

                auditor_worker.object_audit(
                    AuditLocation(disk_file._datadir, 'sda', '0'))
                self.assertEquals(auditor_worker.quarantines, pre_quarantines)

                os.write(writer._fd, 'extra_data')

                auditor_worker.object_audit(
                    AuditLocation(disk_file._datadir, 'sda', '0'))
                self.assertEquals(auditor_worker.quarantines,
                                  pre_quarantines + 1)
        run_tests(self.disk_file)
        run_tests(self.disk_file_p1)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        data = '0' * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update('1' + '0' * 1023)
        etag = etag.hexdigest()
        metadata['ETag'] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        fp = open(path, 'w')
        fp.write('0' * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch.object(DiskFileManager,
                               'get_diskfile_from_audit_location', blowup):
            self.assertRaises(NameError, auditor_worker.object_audit,
                              AuditLocation(os.path.dirname(path), 'sda', '0'))

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch('swift.obj.diskfile.DiskFile', blowup):
            auditor_worker.failsafe_object_audit(
                AuditLocation(os.path.dirname(path), 'sda', '0'))
        self.assertEquals(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        with mock.patch('swift.obj.diskfile.DiskFile', lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024

        def write_file(df):
            etag = md5()
            with df.create() as writer:
                writer.write(data)
                etag.update(data)
                etag = etag.hexdigest()
                metadata = {
                    'ETag': etag,
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)

        # policy 0
        write_file(self.disk_file)
        # policy 1
        write_file(self.disk_file_p1)

        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        # 1 object per policy falls into 1024 bucket
        self.assertEquals(auditor_worker.stats_buckets[1024], 2)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)

        # pick up some additional code coverage, large file
        data = '0' * 1024 * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        # still have the 1024 byte object left in policy-1 (plus the
        # stats from the original 2)
        self.assertEquals(auditor_worker.stats_buckets[1024], 3)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)
        # and then policy-0 disk_file was re-written as a larger object
        self.assertEquals(auditor_worker.stats_buckets['OVER'], 1)

        # pick up even more additional code coverage, misc paths
        auditor_worker.log_time = -1
        auditor_worker.stats_sizes = []
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 3)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)
        self.assertEquals(auditor_worker.stats_buckets['OVER'], 1)

    def test_object_run_logging(self):
        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger,
                                               self.rcache, self.devices)
        auditor_worker.audit_all_objects(device_dirs=['sda'])
        log_lines = logger.get_lines_for_level('info')
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index('ALL - parallel, sda'))

        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger,
                                               self.rcache, self.devices,
                                               zero_byte_only_at_fps=50)
        auditor_worker.audit_all_objects(device_dirs=['sda'])
        log_lines = logger.get_lines_for_level('info')
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index('ZBF - sda'))

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob')
        data = '1' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            etag = md5()
            etag.update('1' + '0' * 1023)
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.isdir(quarantine_path))
        del(kwargs['zero_byte_fps'])
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, with_ts=False):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        ts_file_path = ''
        if with_ts:
            name_hash = hash_path('a', 'c', 'o')
            dir_path = os.path.join(
                self.devices, 'sda',
                storage_directory(get_data_dir(0), '0', name_hash))
            ts_file_path = os.path.join(dir_path, '99999.ts')
            if not os.path.exists(dir_path):
                mkdirs(dir_path)
            fp = open(ts_file_path, 'w')
            write_metadata(fp, {'X-Timestamp': '99999', 'name': '/a/c/o'})
            fp.close()

        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': 10,
            }
            writer.put(metadata)
            etag = md5()
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)
        return ts_file_path

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):

            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        was_df = auditor.diskfile.DiskFile
        try:
            auditor.diskfile.DiskFile = FakeFile
            kwargs = {'mode': 'once'}
            kwargs['zero_byte_fps'] = 50
            self.auditor.run_audit(**kwargs)
            quarantine_path = os.path.join(self.devices,
                                           'sda', 'quarantined', 'objects')
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])
        finally:
            auditor.diskfile.DiskFile = was_df

    def test_with_tombstone(self):
        ts_file_path = self.setup_bad_zero_byte(with_ts=True)
        self.assertTrue(ts_file_path.endswith('ts'))
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.exists(ts_file_path))

    def test_sleeper(self):
        with mock.patch(
                'time.sleep', mock.MagicMock()) as mock_sleep:
            auditor.SLEEP_BETWEEN_AUDITS = 0.10
            my_auditor = auditor.ObjectAuditor(self.conf)
            my_auditor._sleep()
            mock_sleep.assert_called_with(auditor.SLEEP_BETWEEN_AUDITS)

    def test_run_parallel_audit(self):

        class StopForever(Exception):
            pass

        class Bogus(Exception):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            check_device_dir = None
            fork_called = 0
            master = 0
            wait_called = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs
                if 'zero_byte_fps' in kwargs:
                    self.check_device_dir = kwargs.get('device_dirs')

            def mock_sleep_stop(self):
                raise StopForever('stop')

            def mock_sleep_continue(self):
                return

            def mock_audit_loop_error(self, parent, zbo_fps,
                                      override_devices=None, **kwargs):
                raise Bogus('exception')

            def mock_fork(self):
                self.fork_called += 1
                if self.master:
                    return self.fork_called
                else:
                    return 0

            def mock_wait(self):
                self.wait_called += 1
                return (self.wait_called, 0)

        for i in string.ascii_letters[2:26]:
            mkdirs(os.path.join(self.devices, 'sd%s' % i))

        my_auditor = auditor.ObjectAuditor(dict(devices=self.devices,
                                                mount_check='false',
                                                zero_byte_files_per_second=89,
                                                concurrency=1))

        mocker = ObjectAuditorMock()
        my_auditor.logger.exception = mock.MagicMock()
        real_audit_loop = my_auditor.audit_loop
        my_auditor.audit_loop = mocker.mock_audit_loop_error
        my_auditor.run_audit = mocker.mock_run
        was_fork = os.fork
        was_wait = os.wait
        os.fork = mocker.mock_fork
        os.wait = mocker.mock_wait
        try:
            my_auditor._sleep = mocker.mock_sleep_stop
            my_auditor.run_once(zero_byte_fps=50)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: exception')
            my_auditor.logger.exception.reset_mock()
            self.assertRaises(StopForever, my_auditor.run_forever)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: exception')
            my_auditor.audit_loop = real_audit_loop

            self.assertRaises(StopForever,
                              my_auditor.run_forever, zero_byte_fps=50)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 50)
            self.assertEquals(mocker.fork_called, 0)

            self.assertRaises(SystemExit, my_auditor.run_once)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEquals(mocker.check_device_dir, [])
            self.assertEquals(mocker.check_args, ())

            device_list = ['sd%s' % i for i in string.ascii_letters[2:10]]
            device_string = ','.join(device_list)
            device_string_bogus = device_string + ',bogus'

            mocker.fork_called = 0
            self.assertRaises(SystemExit, my_auditor.run_once,
                              devices=device_string_bogus)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEquals(sorted(mocker.check_device_dir), device_list)

            mocker.master = 1

            mocker.fork_called = 0
            self.assertRaises(StopForever, my_auditor.run_forever)
            # Fork is called 2 times since the zbf process is forked just
            # once before self._sleep() is called and StopForever is raised
            # Also wait is called just once before StopForever is raised
            self.assertEquals(mocker.fork_called, 2)
            self.assertEquals(mocker.wait_called, 1)

            my_auditor._sleep = mocker.mock_sleep_continue

            my_auditor.concurrency = 2
            mocker.fork_called = 0
            mocker.wait_called = 0
            my_auditor.run_once()
            # Fork is called no. of devices + (no. of devices)/2 + 1 times
            # since zbf process is forked (no.of devices)/2 + 1 times
            no_devices = len(os.listdir(self.devices))
            self.assertEquals(mocker.fork_called, no_devices + no_devices / 2
                              + 1)
            self.assertEquals(mocker.wait_called, no_devices + no_devices / 2
                              + 1)

        finally:
            os.fork = was_fork
            os.wait = was_wait
Ejemplo n.º 47
0
class TestAuditor(unittest.TestCase):

    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.rcache = os.path.join(self.testdir, 'object.recon')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        os.mkdir(os.path.join(self.devices, 'sdb'))

        # policy 0
        self.objects = os.path.join(self.devices, 'sda',
                                    get_data_dir(POLICIES[0]))
        self.objects_2 = os.path.join(self.devices, 'sdb',
                                      get_data_dir(POLICIES[0]))
        os.mkdir(self.objects)
        # policy 1
        self.objects_p1 = os.path.join(self.devices, 'sda',
                                       get_data_dir(POLICIES[1]))
        self.objects_2_p1 = os.path.join(self.devices, 'sdb',
                                         get_data_dir(POLICIES[1]))
        os.mkdir(self.objects_p1)
        # policy 2
        self.objects_p2 = os.path.join(self.devices, 'sda',
                                       get_data_dir(POLICIES[2]))
        self.objects_2_p2 = os.path.join(self.devices, 'sdb',
                                         get_data_dir(POLICIES[2]))
        os.mkdir(self.objects_p2)

        self.parts = {}
        self.parts_p1 = {}
        self.parts_p2 = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            self.parts_p1[part] = os.path.join(self.objects_p1, part)
            self.parts_p2[part] = os.path.join(self.objects_p2, part)
            os.mkdir(os.path.join(self.objects, part))
            os.mkdir(os.path.join(self.objects_p1, part))
            os.mkdir(os.path.join(self.objects_p2, part))

        self.conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)
        self.ec_df_mgr = ECDiskFileManager(self.conf, self.logger)

        # diskfiles for policy 0, 1, 2
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o',
                                                  policy=POLICIES[0])
        self.disk_file_p1 = self.df_mgr.get_diskfile('sda', '0', 'a', 'c',
                                                     'o', policy=POLICIES[1])
        self.disk_file_ec = self.ec_df_mgr.get_diskfile(
            'sda', '0', 'a', 'c', 'o', policy=POLICIES[2], frag_index=1)

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_worker_conf_parms(self):
        def check_common_defaults():
            self.assertEqual(auditor_worker.max_bytes_per_second, 10000000)
            self.assertEqual(auditor_worker.log_time, 3600)

        # test default values
        conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices)
        check_common_defaults()
        for policy in POLICIES:
            mgr = auditor_worker.diskfile_router[policy]
            self.assertEqual(mgr.disk_chunk_size, 65536)
        self.assertEqual(auditor_worker.max_files_per_second, 20)
        self.assertEqual(auditor_worker.zero_byte_only_at_fps, 0)

        # test specified audit value overrides
        conf.update({'disk_chunk_size': 4096})
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices,
                                               zero_byte_only_at_fps=50)
        check_common_defaults()
        for policy in POLICIES:
            mgr = auditor_worker.diskfile_router[policy]
            self.assertEqual(mgr.disk_chunk_size, 4096)
        self.assertEqual(auditor_worker.max_files_per_second, 50)
        self.assertEqual(auditor_worker.zero_byte_only_at_fps, 50)

    def test_object_audit_extra_data(self):
        def run_tests(disk_file):
            auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                                   self.rcache, self.devices)
            data = b'0' * 1024
            etag = md5()
            with disk_file.create() as writer:
                writer.write(data)
                etag.update(data)
                etag = etag.hexdigest()
                timestamp = str(normalize_timestamp(time.time()))
                metadata = {
                    'ETag': etag,
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)
                writer.commit(Timestamp(timestamp))
                pre_quarantines = auditor_worker.quarantines

                auditor_worker.object_audit(
                    AuditLocation(disk_file._datadir, 'sda', '0',
                                  policy=disk_file.policy))
                self.assertEqual(auditor_worker.quarantines, pre_quarantines)

                os.write(writer._fd, b'extra_data')

                auditor_worker.object_audit(
                    AuditLocation(disk_file._datadir, 'sda', '0',
                                  policy=disk_file.policy))
                self.assertEqual(auditor_worker.quarantines,
                                 pre_quarantines + 1)
        run_tests(self.disk_file)
        run_tests(self.disk_file_p1)
        run_tests(self.disk_file_ec)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        data = b'0' * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o',
                                                  policy=POLICIES.legacy)

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0',
                          policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        etag = md5(b'1' + b'0' * 1023).hexdigest()
        metadata['ETag'] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0',
                          policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        fp = open(path, 'wb')
        fp.write(b'0' * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0',
                          policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch.object(DiskFileManager,
                               'get_diskfile_from_audit_location', blowup):
            self.assertRaises(NameError, auditor_worker.object_audit,
                              AuditLocation(os.path.dirname(path), 'sda', '0',
                                            policy=POLICIES.legacy))

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls',
                        blowup):
            auditor_worker.failsafe_object_audit(
                AuditLocation(os.path.dirname(path), 'sda', '0',
                              policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.errors, 1)

    def test_audit_location_gets_quarantined(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        location = AuditLocation(self.disk_file._datadir, 'sda', '0',
                                 policy=self.disk_file.policy)

        # instead of a datadir, we'll make a file!
        mkdirs(os.path.dirname(self.disk_file._datadir))
        open(self.disk_file._datadir, 'w')

        # after we turn the crank ...
        auditor_worker.object_audit(location)

        # ... it should get quarantined
        self.assertFalse(os.path.exists(self.disk_file._datadir))
        self.assertEqual(1, auditor_worker.quarantines)

    def test_rsync_tempfile_timeout_auto_option(self):
        # if we don't have access to the replicator config section we'll use
        # our default
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        self.assertEqual(auditor_worker.rsync_tempfile_timeout, 86400)
        # if the rsync_tempfile_timeout option is set explicitly we use that
        self.conf['rsync_tempfile_timeout'] = '1800'
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        self.assertEqual(auditor_worker.rsync_tempfile_timeout, 1800)
        # if we have a real config we can be a little smarter
        config_path = os.path.join(self.testdir, 'objserver.conf')
        stub_config = """
        [object-auditor]
        rsync_tempfile_timeout = auto
        """
        with open(config_path, 'w') as f:
            f.write(textwrap.dedent(stub_config))
        # the Daemon loader will hand the object-auditor config to the
        # auditor who will build the workers from it
        conf = readconf(config_path, 'object-auditor')
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices)
        # if there is no object-replicator section we still have to fall back
        # to default because we can't parse the config for that section!
        self.assertEqual(auditor_worker.rsync_tempfile_timeout, 86400)
        stub_config = """
        [object-replicator]
        [object-auditor]
        rsync_tempfile_timeout = auto
        """
        with open(config_path, 'w') as f:
            f.write(textwrap.dedent(stub_config))
        conf = readconf(config_path, 'object-auditor')
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices)
        # if the object-replicator section will parse but does not override
        # the default rsync_timeout we assume the default rsync_timeout value
        # and add 15mins
        self.assertEqual(auditor_worker.rsync_tempfile_timeout,
                         replicator.DEFAULT_RSYNC_TIMEOUT + 900)
        stub_config = """
        [DEFAULT]
        reclaim_age = 1209600
        [object-replicator]
        rsync_timeout = 3600
        [object-auditor]
        rsync_tempfile_timeout = auto
        """
        with open(config_path, 'w') as f:
            f.write(textwrap.dedent(stub_config))
        conf = readconf(config_path, 'object-auditor')
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices)
        # if there is an object-replicator section with a rsync_timeout
        # configured we'll use that value (3600) + 900
        self.assertEqual(auditor_worker.rsync_tempfile_timeout, 3600 + 900)

    def test_inprogress_rsync_tempfiles_get_cleaned_up(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        location = AuditLocation(self.disk_file._datadir, 'sda', '0',
                                 policy=self.disk_file.policy)

        data = b'VERIFY'
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            metadata = {
                'ETag': etag.hexdigest(),
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))

        datafilename = None
        datadir_files = os.listdir(self.disk_file._datadir)
        for filename in datadir_files:
            if filename.endswith('.data'):
                datafilename = filename
                break
        else:
            self.fail('Did not find .data file in %r: %r' %
                      (self.disk_file._datadir, datadir_files))
        rsynctempfile_path = os.path.join(self.disk_file._datadir,
                                          '.%s.9ILVBL' % datafilename)
        open(rsynctempfile_path, 'w')
        # sanity check we have an extra file
        rsync_files = os.listdir(self.disk_file._datadir)
        self.assertEqual(len(datadir_files) + 1, len(rsync_files))

        # and after we turn the crank ...
        auditor_worker.object_audit(location)

        # ... we've still got the rsync file
        self.assertEqual(rsync_files, os.listdir(self.disk_file._datadir))

        # and we'll keep it - depending on the rsync_tempfile_timeout
        self.assertEqual(auditor_worker.rsync_tempfile_timeout, 86400)
        self.conf['rsync_tempfile_timeout'] = '3600'
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        self.assertEqual(auditor_worker.rsync_tempfile_timeout, 3600)
        now = time.time() + 1900
        with mock.patch('swift.obj.auditor.time.time',
                        return_value=now):
            auditor_worker.object_audit(location)
        self.assertEqual(rsync_files, os.listdir(self.disk_file._datadir))

        # but *tomorrow* when we run
        tomorrow = time.time() + 86400
        with mock.patch('swift.obj.auditor.time.time',
                        return_value=tomorrow):
            auditor_worker.object_audit(location)

        # ... we'll totally clean that stuff up!
        self.assertEqual(datadir_files, os.listdir(self.disk_file._datadir))

        # but if we have some random crazy file in there
        random_crazy_file_path = os.path.join(self.disk_file._datadir,
                                              '.random.crazy.file')
        open(random_crazy_file_path, 'w')

        tomorrow = time.time() + 86400
        with mock.patch('swift.obj.auditor.time.time',
                        return_value=tomorrow):
            auditor_worker.object_audit(location)

        # that's someone elses problem
        self.assertIn(os.path.basename(random_crazy_file_path),
                      os.listdir(self.disk_file._datadir))

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = b'0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
        with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls',
                        lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = b'0' * 1024

        def write_file(df):
            with df.create() as writer:
                writer.write(data)
                metadata = {
                    'ETag': md5(data).hexdigest(),
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)
                writer.commit(Timestamp(timestamp))

        # policy 0
        write_file(self.disk_file)
        # policy 1
        write_file(self.disk_file_p1)
        # policy 2
        write_file(self.disk_file_ec)

        auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        # 1 object per policy falls into 1024 bucket
        self.assertEqual(auditor_worker.stats_buckets[1024], 3)
        self.assertEqual(auditor_worker.stats_buckets[10240], 0)

        # pick up some additional code coverage, large file
        data = b'0' * 1024 * 1024
        for df in (self.disk_file, self.disk_file_ec):
            with df.create() as writer:
                writer.write(data)
                metadata = {
                    'ETag': md5(data).hexdigest(),
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)
                writer.commit(Timestamp(timestamp))
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        # still have the 1024 byte object left in policy-1 (plus the
        # stats from the original 3)
        self.assertEqual(auditor_worker.stats_buckets[1024], 4)
        self.assertEqual(auditor_worker.stats_buckets[10240], 0)
        # and then policy-0 disk_file was re-written as a larger object
        self.assertEqual(auditor_worker.stats_buckets['OVER'], 2)

        # pick up even more additional code coverage, misc paths
        auditor_worker.log_time = -1
        auditor_worker.stats_sizes = []
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        self.assertEqual(auditor_worker.stats_buckets[1024], 4)
        self.assertEqual(auditor_worker.stats_buckets[10240], 0)
        self.assertEqual(auditor_worker.stats_buckets['OVER'], 2)

    def test_object_run_logging(self):
        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger,
                                               self.rcache, self.devices)
        auditor_worker.audit_all_objects(device_dirs=['sda'])
        log_lines = logger.get_lines_for_level('info')
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index('ALL - parallel, sda'))

        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger,
                                               self.rcache, self.devices,
                                               zero_byte_only_at_fps=50)
        auditor_worker.audit_all_objects(device_dirs=['sda'])
        log_lines = logger.get_lines_for_level('info')
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index('ZBF - sda'))

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        data = b'0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, b'extra_data')
            writer.commit(Timestamp(timestamp))
        auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = b'0' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob',
                                                  policy=POLICIES.legacy)
        data = b'1' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
            os.write(writer._fd, b'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = b'0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            timestamp = str(normalize_timestamp(time.time()))
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
            etag = md5()
            etag.update(b'1' + b'0' * 1023)
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.isdir(quarantine_path))
        del(kwargs['zero_byte_fps'])
        clear_auditor_status(self.devices)
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, timestamp=None):
        if timestamp is None:
            timestamp = Timestamp(time.time())
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp.internal,
                'Content-Length': 10,
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
            etag = md5()
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50

        called_args = [0]

        def mock_get_auditor_status(path, logger, audit_type):
            called_args[0] = audit_type
            return get_auditor_status(path, logger, audit_type)

        with mock.patch('swift.obj.diskfile.get_auditor_status',
                        mock_get_auditor_status):
                self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))
        self.assertEqual('ZBF', called_args[0])

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):

            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls',
                        FakeFile):
            kwargs = {'mode': 'once'}
            kwargs['zero_byte_fps'] = 50
            self.auditor.run_audit(**kwargs)
            quarantine_path = os.path.join(self.devices,
                                           'sda', 'quarantined', 'objects')
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])

    @mock.patch.object(auditor.ObjectAuditor, 'run_audit')
    @mock.patch('os.fork', return_value=0)
    def test_with_inaccessible_object_location(self, mock_os_fork,
                                               mock_run_audit):
        # Need to ensure that any failures in run_audit do
        # not prevent sys.exit() from running.  Otherwise we get
        # zombie processes.
        e = OSError('permission denied')
        mock_run_audit.side_effect = e
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.assertRaises(SystemExit, self.auditor.fork_child, self)

    def test_with_only_tombstone(self):
        # sanity check that auditor doesn't touch solitary tombstones
        ts_iter = make_timestamp_iter()
        self.setup_bad_zero_byte(timestamp=next(ts_iter))
        self.disk_file.delete(next(ts_iter))
        files = os.listdir(self.disk_file._datadir)
        self.assertEqual(1, len(files))
        self.assertTrue(files[0].endswith('ts'))
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        files_after = os.listdir(self.disk_file._datadir)
        self.assertEqual(files, files_after)

    def test_with_tombstone_and_data(self):
        # rsync replication could leave a tombstone and data file in object
        # dir - verify they are both removed during audit
        ts_iter = make_timestamp_iter()
        ts_tomb = next(ts_iter)
        ts_data = next(ts_iter)
        self.setup_bad_zero_byte(timestamp=ts_data)
        tomb_file_path = os.path.join(self.disk_file._datadir,
                                      '%s.ts' % ts_tomb.internal)
        with open(tomb_file_path, 'wb') as fd:
            write_metadata(fd, {'X-Timestamp': ts_tomb.internal})
        files = os.listdir(self.disk_file._datadir)
        self.assertEqual(2, len(files))
        self.assertTrue(os.path.basename(tomb_file_path) in files, files)
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.exists(self.disk_file._datadir))

    def _audit_tombstone(self, conf, ts_tomb, zero_byte_fps=0):
        self.auditor = auditor.ObjectAuditor(conf)
        self.auditor.log_time = 0
        # create tombstone and hashes.pkl file, ensuring the tombstone is not
        # reclaimed by mocking time to be the tombstone time
        with mock.patch('time.time', return_value=float(ts_tomb)):
            self.disk_file.delete(ts_tomb)
            self.disk_file.manager.get_hashes(
                self.devices + '/sda', '0', [], self.disk_file.policy)
        suffix = basename(dirname(self.disk_file._datadir))
        part_dir = dirname(dirname(self.disk_file._datadir))
        # sanity checks...
        self.assertEqual(['%s.ts' % ts_tomb.internal],
                         os.listdir(self.disk_file._datadir))
        self.assertTrue(os.path.exists(os.path.join(part_dir, HASH_FILE)))
        self.assertFalse(os.path.exists(
            os.path.join(part_dir, HASH_INVALIDATIONS_FILE)))
        # Run auditor
        self.auditor.run_audit(mode='once', zero_byte_fps=zero_byte_fps)
        # sanity check - auditor should not remove tombstone file
        self.assertEqual(['%s.ts' % ts_tomb.internal],
                         os.listdir(self.disk_file._datadir))
        return part_dir, suffix

    def test_non_reclaimable_tombstone(self):
        # audit with a recent tombstone
        ts_tomb = Timestamp(time.time() - 55)
        part_dir, suffix = self._audit_tombstone(self.conf, ts_tomb)
        self.assertTrue(os.path.exists(os.path.join(part_dir, HASH_FILE)))
        self.assertFalse(os.path.exists(
            os.path.join(part_dir, HASH_INVALIDATIONS_FILE)))

    def test_reclaimable_tombstone(self):
        # audit with a reclaimable tombstone
        ts_tomb = Timestamp(time.time() - 604800)
        part_dir, suffix = self._audit_tombstone(self.conf, ts_tomb)
        self.assertTrue(os.path.exists(os.path.join(part_dir, HASH_FILE)))
        hash_invalid = os.path.join(part_dir, HASH_INVALIDATIONS_FILE)
        self.assertTrue(os.path.exists(hash_invalid))
        with open(hash_invalid, 'rb') as fp:
            hash_val = fp.read()
        self.assertEqual(suffix, hash_val.strip('\n'))

    def test_non_reclaimable_tombstone_with_custom_reclaim_age(self):
        # audit with a tombstone newer than custom reclaim age
        ts_tomb = Timestamp(time.time() - 604800)
        conf = dict(self.conf)
        conf['reclaim_age'] = 2 * 604800
        part_dir, suffix = self._audit_tombstone(conf, ts_tomb)
        self.assertTrue(os.path.exists(os.path.join(part_dir, HASH_FILE)))
        self.assertFalse(os.path.exists(
            os.path.join(part_dir, HASH_INVALIDATIONS_FILE)))

    def test_reclaimable_tombstone_with_custom_reclaim_age(self):
        # audit with a tombstone older than custom reclaim age
        ts_tomb = Timestamp(time.time() - 55)
        conf = dict(self.conf)
        conf['reclaim_age'] = 10
        part_dir, suffix = self._audit_tombstone(conf, ts_tomb)
        self.assertTrue(os.path.exists(os.path.join(part_dir, HASH_FILE)))
        hash_invalid = os.path.join(part_dir, HASH_INVALIDATIONS_FILE)
        self.assertTrue(os.path.exists(hash_invalid))
        with open(hash_invalid, 'rb') as fp:
            hash_val = fp.read()
        self.assertEqual(suffix, hash_val.strip('\n'))

    def test_reclaimable_tombstone_with_zero_byte_fps(self):
        # audit with a tombstone older than reclaim age by a zero_byte_fps
        # worker does not invalidate the hash
        ts_tomb = Timestamp(time.time() - 604800)
        part_dir, suffix = self._audit_tombstone(
            self.conf, ts_tomb, zero_byte_fps=50)
        self.assertTrue(os.path.exists(os.path.join(part_dir, HASH_FILE)))
        self.assertFalse(os.path.exists(
            os.path.join(part_dir, HASH_INVALIDATIONS_FILE)))

    def test_auditor_reclaim_age(self):
        # if we don't have access to the replicator config section we'll use
        # diskfile's default
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        router = auditor_worker.diskfile_router
        for policy in POLICIES:
            self.assertEqual(router[policy].reclaim_age, 86400 * 7)

        # if the reclaim_age option is set explicitly we use that
        self.conf['reclaim_age'] = '1800'
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        router = auditor_worker.diskfile_router
        for policy in POLICIES:
            self.assertEqual(router[policy].reclaim_age, 1800)

        # if we have a real config we can be a little smarter
        config_path = os.path.join(self.testdir, 'objserver.conf')

        # if there is no object-replicator section we still have to fall back
        # to default because we can't parse the config for that section!
        stub_config = """
        [object-auditor]
        """
        with open(config_path, 'w') as f:
            f.write(textwrap.dedent(stub_config))
        conf = readconf(config_path, 'object-auditor')
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices)
        router = auditor_worker.diskfile_router
        for policy in POLICIES:
            self.assertEqual(router[policy].reclaim_age, 86400 * 7)

        # verify reclaim_age is of auditor config value
        stub_config = """
                [object-replicator]
                [object-auditor]
                reclaim_age = 60
                """
        with open(config_path, 'w') as f:
            f.write(textwrap.dedent(stub_config))
        conf = readconf(config_path, 'object-auditor')
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices)
        router = auditor_worker.diskfile_router
        for policy in POLICIES:
            self.assertEqual(router[policy].reclaim_age, 60)

        # verify reclaim_age falls back to replicator config value
        # if there is no auditor config value
        config_path = os.path.join(self.testdir, 'objserver.conf')
        stub_config = """
                [object-replicator]
                reclaim_age = 60
                [object-auditor]
                """
        with open(config_path, 'w') as f:
            f.write(textwrap.dedent(stub_config))
        conf = readconf(config_path, 'object-auditor')
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices)
        router = auditor_worker.diskfile_router
        for policy in POLICIES:
            self.assertEqual(router[policy].reclaim_age, 60)

        # we'll prefer our own DEFAULT section to the replicator though
        self.assertEqual(auditor_worker.rsync_tempfile_timeout,
                         replicator.DEFAULT_RSYNC_TIMEOUT + 900)
        stub_config = """
        [DEFAULT]
        reclaim_age = 1209600
        [object-replicator]
        reclaim_age = 1800
        [object-auditor]
        """
        with open(config_path, 'w') as f:
            f.write(textwrap.dedent(stub_config))
        conf = readconf(config_path, 'object-auditor')
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices)
        router = auditor_worker.diskfile_router
        for policy in POLICIES:
            self.assertEqual(router[policy].reclaim_age, 1209600)

    def test_sleeper(self):
        with mock.patch(
                'time.sleep', mock.MagicMock()) as mock_sleep:
            my_auditor = auditor.ObjectAuditor(self.conf)
            my_auditor._sleep()
            mock_sleep.assert_called_with(30)

            my_conf = dict(interval=2)
            my_conf.update(self.conf)
            my_auditor = auditor.ObjectAuditor(my_conf)
            my_auditor._sleep()
            mock_sleep.assert_called_with(2)

            my_auditor = auditor.ObjectAuditor(self.conf)
            my_auditor.interval = 2
            my_auditor._sleep()
            mock_sleep.assert_called_with(2)

    def test_run_parallel_audit(self):

        class StopForever(Exception):
            pass

        class Bogus(Exception):
            pass

        loop_error = Bogus('exception')

        class LetMeOut(BaseException):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            check_device_dir = None
            fork_called = 0
            master = 0
            wait_called = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs
                if 'zero_byte_fps' in kwargs:
                    self.check_device_dir = kwargs.get('device_dirs')

            def mock_sleep_stop(self):
                raise StopForever('stop')

            def mock_sleep_continue(self):
                return

            def mock_audit_loop_error(self, parent, zbo_fps,
                                      override_devices=None, **kwargs):
                raise loop_error

            def mock_fork(self):
                self.fork_called += 1
                if self.master:
                    return self.fork_called
                else:
                    return 0

            def mock_wait(self):
                self.wait_called += 1
                return (self.wait_called, 0)

        for i in string.ascii_letters[2:26]:
            mkdirs(os.path.join(self.devices, 'sd%s' % i))

        my_auditor = auditor.ObjectAuditor(dict(devices=self.devices,
                                                mount_check='false',
                                                zero_byte_files_per_second=89,
                                                concurrency=1))

        mocker = ObjectAuditorMock()
        my_auditor.logger.exception = mock.MagicMock()
        real_audit_loop = my_auditor.audit_loop
        my_auditor.audit_loop = mocker.mock_audit_loop_error
        my_auditor.run_audit = mocker.mock_run
        was_fork = os.fork
        was_wait = os.wait
        os.fork = mocker.mock_fork
        os.wait = mocker.mock_wait
        try:
            my_auditor._sleep = mocker.mock_sleep_stop
            my_auditor.run_once(zero_byte_fps=50)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: %s', loop_error)
            my_auditor.logger.exception.reset_mock()
            self.assertRaises(StopForever, my_auditor.run_forever)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: %s', loop_error)
            my_auditor.audit_loop = real_audit_loop

            self.assertRaises(StopForever,
                              my_auditor.run_forever, zero_byte_fps=50)
            self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 50)
            self.assertEqual(mocker.fork_called, 0)

            self.assertRaises(SystemExit, my_auditor.run_once)
            self.assertEqual(mocker.fork_called, 1)
            self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEqual(mocker.check_device_dir, [])
            self.assertEqual(mocker.check_args, ())

            device_list = ['sd%s' % i for i in string.ascii_letters[2:10]]
            device_string = ','.join(device_list)
            device_string_bogus = device_string + ',bogus'

            mocker.fork_called = 0
            self.assertRaises(SystemExit, my_auditor.run_once,
                              devices=device_string_bogus)
            self.assertEqual(mocker.fork_called, 1)
            self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEqual(sorted(mocker.check_device_dir), device_list)

            mocker.master = 1

            mocker.fork_called = 0
            self.assertRaises(StopForever, my_auditor.run_forever)
            # Fork is called 2 times since the zbf process is forked just
            # once before self._sleep() is called and StopForever is raised
            # Also wait is called just once before StopForever is raised
            self.assertEqual(mocker.fork_called, 2)
            self.assertEqual(mocker.wait_called, 1)

            my_auditor._sleep = mocker.mock_sleep_continue
            my_auditor.audit_loop = works_only_once(my_auditor.audit_loop,
                                                    LetMeOut())

            my_auditor.concurrency = 2
            mocker.fork_called = 0
            mocker.wait_called = 0
            self.assertRaises(LetMeOut, my_auditor.run_forever)
            # Fork is called no. of devices + (no. of devices)/2 + 1 times
            # since zbf process is forked (no.of devices)/2 + 1 times
            no_devices = len(os.listdir(self.devices))
            self.assertEqual(mocker.fork_called, no_devices + no_devices / 2
                             + 1)
            self.assertEqual(mocker.wait_called, no_devices + no_devices / 2
                             + 1)

        finally:
            os.fork = was_fork
            os.wait = was_wait

    def test_run_audit_once(self):
        my_auditor = auditor.ObjectAuditor(dict(devices=self.devices,
                                                mount_check='false',
                                                zero_byte_files_per_second=89,
                                                concurrency=1))

        forked_pids = []
        next_zbf_pid = [2]
        next_normal_pid = [1001]
        outstanding_pids = [[]]

        def fake_fork_child(**kwargs):
            if len(forked_pids) > 10:
                # something's gone horribly wrong
                raise BaseException("forking too much")

            # ZBF pids are all smaller than the normal-audit pids; this way
            # we can return them first.
            #
            # Also, ZBF pids are even and normal-audit pids are odd; this is
            # so humans seeing this test fail can better tell what's happening.
            if kwargs.get('zero_byte_fps'):
                pid = next_zbf_pid[0]
                next_zbf_pid[0] += 2
            else:
                pid = next_normal_pid[0]
                next_normal_pid[0] += 2
            outstanding_pids[0].append(pid)
            forked_pids.append(pid)
            return pid

        def fake_os_wait():
            # Smallest pid first; that's ZBF if we have one, else normal
            outstanding_pids[0].sort()
            pid = outstanding_pids[0].pop(0)
            return (pid, 0)   # (pid, status)

        with mock.patch("swift.obj.auditor.os.wait", fake_os_wait), \
                mock.patch.object(my_auditor, 'fork_child', fake_fork_child), \
                mock.patch.object(my_auditor, '_sleep', lambda *a: None):
            my_auditor.run_once()

        self.assertEqual(sorted(forked_pids), [2, 1001])

    def test_run_parallel_audit_once(self):
        my_auditor = auditor.ObjectAuditor(
            dict(devices=self.devices, mount_check='false',
                 zero_byte_files_per_second=89, concurrency=2))

        # ZBF pids are smaller than the normal-audit pids; this way we can
        # return them first from our mocked os.wait().
        #
        # Also, ZBF pids are even and normal-audit pids are odd; this is so
        # humans seeing this test fail can better tell what's happening.
        forked_pids = []
        next_zbf_pid = [2]
        next_normal_pid = [1001]
        outstanding_pids = [[]]

        def fake_fork_child(**kwargs):
            if len(forked_pids) > 10:
                # something's gone horribly wrong; try not to hang the test
                # run because of it
                raise BaseException("forking too much")

            if kwargs.get('zero_byte_fps'):
                pid = next_zbf_pid[0]
                next_zbf_pid[0] += 2
            else:
                pid = next_normal_pid[0]
                next_normal_pid[0] += 2
            outstanding_pids[0].append(pid)
            forked_pids.append(pid)
            return pid

        def fake_os_wait():
            if not outstanding_pids[0]:
                raise BaseException("nobody waiting")

            # ZBF auditor finishes first
            outstanding_pids[0].sort()
            pid = outstanding_pids[0].pop(0)
            return (pid, 0)   # (pid, status)

        # make sure we've got enough devs that the ZBF auditor can finish
        # before all the normal auditors have been started
        mkdirs(os.path.join(self.devices, 'sdc'))
        mkdirs(os.path.join(self.devices, 'sdd'))

        with mock.patch("swift.obj.auditor.os.wait", fake_os_wait), \
                mock.patch.object(my_auditor, 'fork_child', fake_fork_child), \
                mock.patch.object(my_auditor, '_sleep', lambda *a: None):
            my_auditor.run_once()

        self.assertEqual(sorted(forked_pids), [2, 1001, 1003, 1005, 1007])

    def test_run_parallel_audit_once_failed_fork(self):
        my_auditor = auditor.ObjectAuditor(
            dict(devices=self.devices, mount_check='false',
                 concurrency=2))

        start_pid = [1001]
        outstanding_pids = []
        failed_once = [False]

        def failing_fork(**kwargs):
            # this fork fails only on the 2nd call
            # it's enough to cause the growth of orphaned child processes
            if len(outstanding_pids) > 0 and not failed_once[0]:
                failed_once[0] = True
                raise OSError
            start_pid[0] += 2
            pid = start_pid[0]
            outstanding_pids.append(pid)
            return pid

        def fake_wait():
            return outstanding_pids.pop(0), 0

        with mock.patch("swift.obj.auditor.os.wait", fake_wait), \
                mock.patch.object(my_auditor, 'fork_child', failing_fork), \
                mock.patch.object(my_auditor, '_sleep', lambda *a: None):
            for i in range(3):
                my_auditor.run_once()

        self.assertEqual(len(outstanding_pids), 0,
                         "orphaned children left {0}, expected 0."
                         .format(outstanding_pids))
Ejemplo n.º 48
0
class TestAuditor(unittest.TestCase):
    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.rcache = os.path.join(self.testdir, 'object.recon')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        os.mkdir(os.path.join(self.devices, 'sdb'))

        # policy 0
        self.objects = os.path.join(self.devices, 'sda',
                                    get_data_dir(POLICIES[0]))
        self.objects_2 = os.path.join(self.devices, 'sdb',
                                      get_data_dir(POLICIES[0]))
        os.mkdir(self.objects)
        # policy 1
        self.objects_p1 = os.path.join(self.devices, 'sda',
                                       get_data_dir(POLICIES[1]))
        self.objects_2_p1 = os.path.join(self.devices, 'sdb',
                                         get_data_dir(POLICIES[1]))
        os.mkdir(self.objects_p1)
        # policy 2
        self.objects_p2 = os.path.join(self.devices, 'sda',
                                       get_data_dir(POLICIES[2]))
        self.objects_2_p2 = os.path.join(self.devices, 'sdb',
                                         get_data_dir(POLICIES[2]))
        os.mkdir(self.objects_p2)

        self.parts = {}
        self.parts_p1 = {}
        self.parts_p2 = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            self.parts_p1[part] = os.path.join(self.objects_p1, part)
            self.parts_p2[part] = os.path.join(self.objects_p2, part)
            os.mkdir(os.path.join(self.objects, part))
            os.mkdir(os.path.join(self.objects_p1, part))
            os.mkdir(os.path.join(self.objects_p2, part))

        self.conf = dict(devices=self.devices,
                         mount_check='false',
                         object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)
        self.ec_df_mgr = ECDiskFileManager(self.conf, self.logger)

        # diskfiles for policy 0, 1, 2
        self.disk_file = self.df_mgr.get_diskfile('sda',
                                                  '0',
                                                  'a',
                                                  'c',
                                                  'o',
                                                  policy=POLICIES[0])
        self.disk_file_p1 = self.df_mgr.get_diskfile('sda',
                                                     '0',
                                                     'a',
                                                     'c',
                                                     'o',
                                                     policy=POLICIES[1])
        self.disk_file_ec = self.ec_df_mgr.get_diskfile('sda',
                                                        '0',
                                                        'a',
                                                        'c',
                                                        'o',
                                                        policy=POLICIES[2],
                                                        frag_index=1)

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_worker_conf_parms(self):
        def check_common_defaults():
            self.assertEqual(auditor_worker.max_bytes_per_second, 10000000)
            self.assertEqual(auditor_worker.log_time, 3600)

        # test default values
        conf = dict(devices=self.devices,
                    mount_check='false',
                    object_size_stats='10,100,1024,10240')
        auditor_worker = auditor.AuditorWorker(conf, self.logger, self.rcache,
                                               self.devices)
        check_common_defaults()
        for policy in POLICIES:
            mgr = auditor_worker.diskfile_router[policy]
            self.assertEqual(mgr.disk_chunk_size, 65536)
        self.assertEqual(auditor_worker.max_files_per_second, 20)
        self.assertEqual(auditor_worker.zero_byte_only_at_fps, 0)

        # test specified audit value overrides
        conf.update({'disk_chunk_size': 4096})
        auditor_worker = auditor.AuditorWorker(conf,
                                               self.logger,
                                               self.rcache,
                                               self.devices,
                                               zero_byte_only_at_fps=50)
        check_common_defaults()
        for policy in POLICIES:
            mgr = auditor_worker.diskfile_router[policy]
            self.assertEqual(mgr.disk_chunk_size, 4096)
        self.assertEqual(auditor_worker.max_files_per_second, 50)
        self.assertEqual(auditor_worker.zero_byte_only_at_fps, 50)

    def test_object_audit_extra_data(self):
        def run_tests(disk_file):
            auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                                   self.rcache, self.devices)
            data = '0' * 1024
            etag = md5()
            with disk_file.create() as writer:
                writer.write(data)
                etag.update(data)
                etag = etag.hexdigest()
                timestamp = str(normalize_timestamp(time.time()))
                metadata = {
                    'ETag': etag,
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)
                writer.commit(Timestamp(timestamp))
                pre_quarantines = auditor_worker.quarantines

                auditor_worker.object_audit(
                    AuditLocation(disk_file._datadir,
                                  'sda',
                                  '0',
                                  policy=disk_file.policy))
                self.assertEqual(auditor_worker.quarantines, pre_quarantines)

                os.write(writer._fd, 'extra_data')

                auditor_worker.object_audit(
                    AuditLocation(disk_file._datadir,
                                  'sda',
                                  '0',
                                  policy=disk_file.policy))
                self.assertEqual(auditor_worker.quarantines,
                                 pre_quarantines + 1)

        run_tests(self.disk_file)
        run_tests(self.disk_file_p1)
        run_tests(self.disk_file_ec)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        data = '0' * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile('sda',
                                                  '0',
                                                  'a',
                                                  'c',
                                                  'o',
                                                  policy=POLICIES.legacy)

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir,
                          'sda',
                          '0',
                          policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update('1' + '0' * 1023)
        etag = etag.hexdigest()
        metadata['ETag'] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir,
                          'sda',
                          '0',
                          policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        fp = open(path, 'w')
        fp.write('0' * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir,
                          'sda',
                          '0',
                          policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')

        with mock.patch.object(DiskFileManager,
                               'get_diskfile_from_audit_location', blowup):
            self.assertRaises(
                NameError, auditor_worker.object_audit,
                AuditLocation(os.path.dirname(path),
                              'sda',
                              '0',
                              policy=POLICIES.legacy))

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')

        with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls',
                        blowup):
            auditor_worker.failsafe_object_audit(
                AuditLocation(os.path.dirname(path),
                              'sda',
                              '0',
                              policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
        with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls',
                        lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024

        def write_file(df):
            with df.create() as writer:
                writer.write(data)
                metadata = {
                    'ETag': md5(data).hexdigest(),
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)
                writer.commit(Timestamp(timestamp))

        # policy 0
        write_file(self.disk_file)
        # policy 1
        write_file(self.disk_file_p1)
        # policy 2
        write_file(self.disk_file_ec)

        auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        # 1 object per policy falls into 1024 bucket
        self.assertEqual(auditor_worker.stats_buckets[1024], 3)
        self.assertEqual(auditor_worker.stats_buckets[10240], 0)

        # pick up some additional code coverage, large file
        data = '0' * 1024 * 1024
        for df in (self.disk_file, self.disk_file_ec):
            with df.create() as writer:
                writer.write(data)
                metadata = {
                    'ETag': md5(data).hexdigest(),
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)
                writer.commit(Timestamp(timestamp))
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        # still have the 1024 byte object left in policy-1 (plus the
        # stats from the original 3)
        self.assertEqual(auditor_worker.stats_buckets[1024], 4)
        self.assertEqual(auditor_worker.stats_buckets[10240], 0)
        # and then policy-0 disk_file was re-written as a larger object
        self.assertEqual(auditor_worker.stats_buckets['OVER'], 2)

        # pick up even more additional code coverage, misc paths
        auditor_worker.log_time = -1
        auditor_worker.stats_sizes = []
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        self.assertEqual(auditor_worker.stats_buckets[1024], 4)
        self.assertEqual(auditor_worker.stats_buckets[10240], 0)
        self.assertEqual(auditor_worker.stats_buckets['OVER'], 2)

    def test_object_run_logging(self):
        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger, self.rcache,
                                               self.devices)
        auditor_worker.audit_all_objects(device_dirs=['sda'])
        log_lines = logger.get_lines_for_level('info')
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index('ALL - parallel, sda'))

        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf,
                                               logger,
                                               self.rcache,
                                               self.devices,
                                               zero_byte_only_at_fps=50)
        auditor_worker.audit_all_objects(device_dirs=['sda'])
        log_lines = logger.get_lines_for_level('info')
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index('ZBF - sda'))

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
            writer.commit(Timestamp(timestamp))
        auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile('sda',
                                                  '0',
                                                  'a',
                                                  'c',
                                                  'ob',
                                                  policy=POLICIES.legacy)
        data = '1' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            timestamp = str(normalize_timestamp(time.time()))
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
            etag = md5()
            etag.update('1' + '0' * 1023)
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices, 'sda', 'quarantined',
                                       'objects')
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.isdir(quarantine_path))
        del (kwargs['zero_byte_fps'])
        clear_auditor_status(self.devices)
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, timestamp=None):
        if timestamp is None:
            timestamp = Timestamp(time.time())
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp.internal,
                'Content-Length': 10,
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
            etag = md5()
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices, 'sda', 'quarantined',
                                       'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50

        called_args = [0]

        def mock_get_auditor_status(path, logger, audit_type):
            called_args[0] = audit_type
            return get_auditor_status(path, logger, audit_type)

        with mock.patch('swift.obj.diskfile.get_auditor_status',
                        mock_get_auditor_status):
            self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices, 'sda', 'quarantined',
                                       'objects')
        self.assertTrue(os.path.isdir(quarantine_path))
        self.assertEqual('ZBF', called_args[0])

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):
            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls',
                        FakeFile):
            kwargs = {'mode': 'once'}
            kwargs['zero_byte_fps'] = 50
            self.auditor.run_audit(**kwargs)
            quarantine_path = os.path.join(self.devices, 'sda', 'quarantined',
                                           'objects')
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])

    @mock.patch.object(auditor.ObjectAuditor, 'run_audit')
    @mock.patch('os.fork', return_value=0)
    def test_with_inaccessible_object_location(self, mock_os_fork,
                                               mock_run_audit):
        # Need to ensure that any failures in run_audit do
        # not prevent sys.exit() from running.  Otherwise we get
        # zombie processes.
        e = OSError('permission denied')
        mock_run_audit.side_effect = e
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.assertRaises(SystemExit, self.auditor.fork_child, self)

    def test_with_only_tombstone(self):
        # sanity check that auditor doesn't touch solitary tombstones
        ts_iter = make_timestamp_iter()
        self.setup_bad_zero_byte(timestamp=next(ts_iter))
        self.disk_file.delete(next(ts_iter))
        files = os.listdir(self.disk_file._datadir)
        self.assertEqual(1, len(files))
        self.assertTrue(files[0].endswith('ts'))
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        files_after = os.listdir(self.disk_file._datadir)
        self.assertEqual(files, files_after)

    def test_with_tombstone_and_data(self):
        # rsync replication could leave a tombstone and data file in object
        # dir - verify they are both removed during audit
        ts_iter = make_timestamp_iter()
        ts_tomb = next(ts_iter)
        ts_data = next(ts_iter)
        self.setup_bad_zero_byte(timestamp=ts_data)
        tomb_file_path = os.path.join(self.disk_file._datadir,
                                      '%s.ts' % ts_tomb.internal)
        with open(tomb_file_path, 'wb') as fd:
            write_metadata(fd, {'X-Timestamp': ts_tomb.internal})
        files = os.listdir(self.disk_file._datadir)
        self.assertEqual(2, len(files))
        self.assertTrue(os.path.basename(tomb_file_path) in files, files)
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.exists(self.disk_file._datadir))

    def test_sleeper(self):
        with mock.patch('time.sleep', mock.MagicMock()) as mock_sleep:
            my_auditor = auditor.ObjectAuditor(self.conf)
            my_auditor._sleep()
            mock_sleep.assert_called_with(30)

            my_conf = dict(interval=2)
            my_conf.update(self.conf)
            my_auditor = auditor.ObjectAuditor(my_conf)
            my_auditor._sleep()
            mock_sleep.assert_called_with(2)

            my_auditor = auditor.ObjectAuditor(self.conf)
            my_auditor.interval = 2
            my_auditor._sleep()
            mock_sleep.assert_called_with(2)

    def test_run_parallel_audit(self):
        class StopForever(Exception):
            pass

        class Bogus(Exception):
            pass

        loop_error = Bogus('exception')

        class LetMeOut(BaseException):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            check_device_dir = None
            fork_called = 0
            master = 0
            wait_called = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs
                if 'zero_byte_fps' in kwargs:
                    self.check_device_dir = kwargs.get('device_dirs')

            def mock_sleep_stop(self):
                raise StopForever('stop')

            def mock_sleep_continue(self):
                return

            def mock_audit_loop_error(self,
                                      parent,
                                      zbo_fps,
                                      override_devices=None,
                                      **kwargs):
                raise loop_error

            def mock_fork(self):
                self.fork_called += 1
                if self.master:
                    return self.fork_called
                else:
                    return 0

            def mock_wait(self):
                self.wait_called += 1
                return (self.wait_called, 0)

        for i in string.ascii_letters[2:26]:
            mkdirs(os.path.join(self.devices, 'sd%s' % i))

        my_auditor = auditor.ObjectAuditor(
            dict(devices=self.devices,
                 mount_check='false',
                 zero_byte_files_per_second=89,
                 concurrency=1))

        mocker = ObjectAuditorMock()
        my_auditor.logger.exception = mock.MagicMock()
        real_audit_loop = my_auditor.audit_loop
        my_auditor.audit_loop = mocker.mock_audit_loop_error
        my_auditor.run_audit = mocker.mock_run
        was_fork = os.fork
        was_wait = os.wait
        os.fork = mocker.mock_fork
        os.wait = mocker.mock_wait
        try:
            my_auditor._sleep = mocker.mock_sleep_stop
            my_auditor.run_once(zero_byte_fps=50)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: %s', loop_error)
            my_auditor.logger.exception.reset_mock()
            self.assertRaises(StopForever, my_auditor.run_forever)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: %s', loop_error)
            my_auditor.audit_loop = real_audit_loop

            self.assertRaises(StopForever,
                              my_auditor.run_forever,
                              zero_byte_fps=50)
            self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 50)
            self.assertEqual(mocker.fork_called, 0)

            self.assertRaises(SystemExit, my_auditor.run_once)
            self.assertEqual(mocker.fork_called, 1)
            self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEqual(mocker.check_device_dir, [])
            self.assertEqual(mocker.check_args, ())

            device_list = ['sd%s' % i for i in string.ascii_letters[2:10]]
            device_string = ','.join(device_list)
            device_string_bogus = device_string + ',bogus'

            mocker.fork_called = 0
            self.assertRaises(SystemExit,
                              my_auditor.run_once,
                              devices=device_string_bogus)
            self.assertEqual(mocker.fork_called, 1)
            self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEqual(sorted(mocker.check_device_dir), device_list)

            mocker.master = 1

            mocker.fork_called = 0
            self.assertRaises(StopForever, my_auditor.run_forever)
            # Fork is called 2 times since the zbf process is forked just
            # once before self._sleep() is called and StopForever is raised
            # Also wait is called just once before StopForever is raised
            self.assertEqual(mocker.fork_called, 2)
            self.assertEqual(mocker.wait_called, 1)

            my_auditor._sleep = mocker.mock_sleep_continue
            my_auditor.audit_loop = works_only_once(my_auditor.audit_loop,
                                                    LetMeOut())

            my_auditor.concurrency = 2
            mocker.fork_called = 0
            mocker.wait_called = 0
            self.assertRaises(LetMeOut, my_auditor.run_forever)
            # Fork is called no. of devices + (no. of devices)/2 + 1 times
            # since zbf process is forked (no.of devices)/2 + 1 times
            no_devices = len(os.listdir(self.devices))
            self.assertEqual(mocker.fork_called,
                             no_devices + no_devices / 2 + 1)
            self.assertEqual(mocker.wait_called,
                             no_devices + no_devices / 2 + 1)

        finally:
            os.fork = was_fork
            os.wait = was_wait

    def test_run_audit_once(self):
        my_auditor = auditor.ObjectAuditor(
            dict(devices=self.devices,
                 mount_check='false',
                 zero_byte_files_per_second=89,
                 concurrency=1))

        forked_pids = []
        next_zbf_pid = [2]
        next_normal_pid = [1001]
        outstanding_pids = [[]]

        def fake_fork_child(**kwargs):
            if len(forked_pids) > 10:
                # something's gone horribly wrong
                raise BaseException("forking too much")

            # ZBF pids are all smaller than the normal-audit pids; this way
            # we can return them first.
            #
            # Also, ZBF pids are even and normal-audit pids are odd; this is
            # so humans seeing this test fail can better tell what's happening.
            if kwargs.get('zero_byte_fps'):
                pid = next_zbf_pid[0]
                next_zbf_pid[0] += 2
            else:
                pid = next_normal_pid[0]
                next_normal_pid[0] += 2
            outstanding_pids[0].append(pid)
            forked_pids.append(pid)
            return pid

        def fake_os_wait():
            # Smallest pid first; that's ZBF if we have one, else normal
            outstanding_pids[0].sort()
            pid = outstanding_pids[0].pop(0)
            return (pid, 0)  # (pid, status)

        with mock.patch("swift.obj.auditor.os.wait", fake_os_wait), \
                mock.patch.object(my_auditor, 'fork_child', fake_fork_child), \
                mock.patch.object(my_auditor, '_sleep', lambda *a: None):
            my_auditor.run_once()

        self.assertEqual(sorted(forked_pids), [2, 1001])

    def test_run_parallel_audit_once(self):
        my_auditor = auditor.ObjectAuditor(
            dict(devices=self.devices,
                 mount_check='false',
                 zero_byte_files_per_second=89,
                 concurrency=2))

        # ZBF pids are smaller than the normal-audit pids; this way we can
        # return them first from our mocked os.wait().
        #
        # Also, ZBF pids are even and normal-audit pids are odd; this is so
        # humans seeing this test fail can better tell what's happening.
        forked_pids = []
        next_zbf_pid = [2]
        next_normal_pid = [1001]
        outstanding_pids = [[]]

        def fake_fork_child(**kwargs):
            if len(forked_pids) > 10:
                # something's gone horribly wrong; try not to hang the test
                # run because of it
                raise BaseException("forking too much")

            if kwargs.get('zero_byte_fps'):
                pid = next_zbf_pid[0]
                next_zbf_pid[0] += 2
            else:
                pid = next_normal_pid[0]
                next_normal_pid[0] += 2
            outstanding_pids[0].append(pid)
            forked_pids.append(pid)
            return pid

        def fake_os_wait():
            if not outstanding_pids[0]:
                raise BaseException("nobody waiting")

            # ZBF auditor finishes first
            outstanding_pids[0].sort()
            pid = outstanding_pids[0].pop(0)
            return (pid, 0)  # (pid, status)

        # make sure we've got enough devs that the ZBF auditor can finish
        # before all the normal auditors have been started
        mkdirs(os.path.join(self.devices, 'sdc'))
        mkdirs(os.path.join(self.devices, 'sdd'))

        with mock.patch("swift.obj.auditor.os.wait", fake_os_wait), \
                mock.patch.object(my_auditor, 'fork_child', fake_fork_child), \
                mock.patch.object(my_auditor, '_sleep', lambda *a: None):
            my_auditor.run_once()

        self.assertEqual(sorted(forked_pids), [2, 1001, 1003, 1005, 1007])
Ejemplo n.º 49
0
class ObjectCrawler(Daemon):
    """Update object information in container listings."""

    def __init__(self, conf):
        self.conf = conf
        self.logger = get_logger(conf, log_route='object-updater')
        self.devices = conf.get('devices', '/srv/node')
        self.ip = conf.get('md-server-ip', '127.0.0.1')
        self.port = conf.get('md-server-port', '6090')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.interval = int(conf.get('interval', 30))
        #self.container_ring = None
        #self.concurrency = int(conf.get('concurrency', 1))
        #self.slowdown = float(conf.get('slowdown', 0.01))
        #self.node_timeout = int(conf.get('node_timeout', 10))
        #self.conn_timeout = float(conf.get('conn_timeout', .5))
        self.last_time_ran = 0
        self.diskfile_mgr = DiskFileManager(conf, self.logger)

    def run_forever(self, *args, **kwargs):
        """Run the updater continuously."""
        time.sleep(random() * self.interval)
        while True:
            with open("/opt/swift/OBJLOG.txt", "a+") as f:
                f.write("START\n")
            try:
                self.object_sweep()
                self.last_time_ran = time.time()
            except (Exception, Timeout):
                pass
            time.sleep(self.interval)



    def run_once(self, *args, **kwargs):
        """Run the updater once."""
        self.object_sweep()

    def object_sweep(self):
        """
        Scan through all objects and send metadata dict of ones with updates.
        """

        all_locs = self.diskfile_mgr.object_audit_location_generator()
        metaList = []
        for location in all_locs:
            try:
                metaDict = self.collect_object(location)

                metaDict = self.format_metadata(metaDict)
                if metaDict != {}:
                    modtime = metaDict["object_last_modified_time"]
                    if modtime != 'NULL' and float(modtime) > self.last_time_ran:
                        metaList.append(metaDict)
            except Exception:
                pass
            
        if metaList != []:
            ObjectSender = Sender(self.conf)
            ObjectSender.sendData(
                metaList, 'object_crawler', self.ip, self.port)

    def collect_object(self, location):
        """
        Process the object metadata
        """
        metadata = {}
        try:
            df = self.diskfile_mgr.get_diskfile_from_audit_location(location)
            metadata = df.read_metadata()
        except DiskFileNotExist:
            pass
        return metadata

    def format_metadata(self, data):
        metadata = {}
        uri = data['name'].split("/")
        metadata['object_uri'] = data['name']
        metadata['object_name'] = ("/".join(uri[3:]))
        metadata['object_account_name'] = uri[1]
        metadata['object_container_name'] = uri[2]
        metadata['object_location'] = 'NULL'  # Not implemented yet
        metadata['object_uri_create_time'] = \
            data.setdefault('X-Timestamp', 'NULL')

        metadata['object_last_modified_time'] = \
            data.setdefault('X-Timestamp', 'NULL')

        metadata['object_last_changed_time'] = 'NULL'

        metadata['object_delete_time'] = 'NULL'

        metadata['object_last_activity_time'] = \
            data.setdefault('X-Timestamp', 'NULL')

        metadata['object_etag_hash'] = \
            data.setdefault('ETag', 'NULL')

        metadata['object_content_type'] = \
            data.setdefault('Content-Type', 'NULL')

        metadata['object_content_length'] = \
            data.setdefault('Content-Length', 'NULL')

        metadata['object_content_encoding'] = \
            data.setdefault('Content-Encoding', 'NULL')

        metadata['object_content_disposition'] = \
            data.setdefault('Content-Disposition', 'NULL')

        metadata['object_content_language'] = \
            data.setdefault('Content-Langauge', 'NULL')

        metadata['object_cache_control'] = 'NULL' 

        metadata['object_delete_at'] = \
            data.setdefault('X-Delete-At', 'NULL')

        metadata['object_manifest_type'] = 'NULL'
        metadata['object_manifest'] = 'NULL'
        metadata['object_access_control_allow_origin'] = 'NULL'
        metadata['object_access_control_allow_credentials'] = 'NULL'
        metadata['object_access_control_expose_headers'] = 'NULL'
        metadata['object_access_control_max_age'] = 'NULL'
        metadata['object_access_control_allow_methods'] = 'NULL'
        metadata['object_access_control_allow_headers'] = 'NULL'
        metadata['object_origin'] = 'NULL'
        metadata['object_access_control_request_method'] = 'NULL'
        metadata['object_access_control_request_headers'] = 'NULL'

        #Insert all Object custom metadata
        for custom in data:
            if(custom.startswith("X-Object-Meta")):
                sanitized_custom = custom[2:13].lower() + custom[13:]
                sanitized_custom = sanitized_custom.replace('-', '_')
                metadata[sanitized_custom] = data[custom]

        return metadata