def _get_object_info(self, account, container, obj, number):
     obj_conf = self.configs['object-server']
     config_path = obj_conf[number]
     options = utils.readconf(config_path, 'app:object-server')
     swift_dir = options.get('swift_dir', '/etc/swift')
     ring = POLICIES.get_object_ring(int(self.policy), swift_dir)
     part, nodes = ring.get_nodes(account, container, obj)
     for node in nodes:
         # assumes one to one mapping
         if node['port'] == int(options.get('bind_port')):
             device = node['device']
             break
     else:
         return None
     mgr = DiskFileManager(options, get_logger(options))
     disk_file = mgr.get_diskfile(device, part, account, container, obj,
                                  self.policy)
     info = disk_file.read_metadata()
     return info
Beispiel #2
0
class TestAuditor(unittest.TestCase):

    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.rcache = os.path.join(self.testdir, 'object.recon')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        self.objects = os.path.join(self.devices, 'sda', 'objects')

        os.mkdir(os.path.join(self.devices, 'sdb'))
        self.objects_2 = os.path.join(self.devices, 'sdb', 'objects')

        os.mkdir(self.objects)
        self.parts = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            os.mkdir(os.path.join(self.objects, part))

        self.conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_object_audit_extra_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            timestamp = str(normalize_timestamp(time.time()))
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

            auditor_worker.object_audit(
                AuditLocation(self.disk_file._datadir, 'sda', '0'))
            self.assertEquals(auditor_worker.quarantines, pre_quarantines)

            os.write(writer._fd, 'extra_data')
            auditor_worker.object_audit(
                AuditLocation(self.disk_file._datadir, 'sda', '0'))
            self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        data = '0' * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update('1' + '0' * 1023)
        etag = etag.hexdigest()
        metadata['ETag'] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        fp = open(path, 'w')
        fp.write('0' * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch.object(DiskFileManager,
                               'get_diskfile_from_audit_location', blowup):
            self.assertRaises(NameError, auditor_worker.object_audit,
                              AuditLocation(os.path.dirname(path), 'sda', '0'))

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch('swift.obj.diskfile.DiskFile', blowup):
            auditor_worker.failsafe_object_audit(
                AuditLocation(os.path.dirname(path), 'sda', '0'))
        self.assertEquals(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        with mock.patch('swift.obj.diskfile.DiskFile', lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 1)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)

        # pick up some additional code coverage, large file
        data = '0' * 1024 * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 1)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)
        self.assertEquals(auditor_worker.stats_buckets['OVER'], 1)

        # pick up even more additional code coverage, misc paths
        auditor_worker.log_time = -1
        auditor_worker.stats_sizes = []
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 1)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)
        self.assertEquals(auditor_worker.stats_buckets['OVER'], 1)

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob')
        data = '1' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            etag = md5()
            etag.update('1' + '0' * 1023)
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.isdir(quarantine_path))
        del(kwargs['zero_byte_fps'])
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, with_ts=False):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        ts_file_path = ''
        if with_ts:
            name_hash = hash_path('a', 'c', 'o')
            dir_path = os.path.join(
                self.devices, 'sda',
                storage_directory(DATADIR, '0', name_hash))
            ts_file_path = os.path.join(dir_path, '99999.ts')
            if not os.path.exists(dir_path):
                mkdirs(dir_path)
            fp = open(ts_file_path, 'w')
            write_metadata(fp, {'X-Timestamp': '99999', 'name': '/a/c/o'})
            fp.close()

        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': 10,
            }
            writer.put(metadata)
            etag = md5()
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)
        return ts_file_path

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):

            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        was_df = auditor.diskfile.DiskFile
        try:
            auditor.diskfile.DiskFile = FakeFile
            kwargs = {'mode': 'once'}
            kwargs['zero_byte_fps'] = 50
            self.auditor.run_audit(**kwargs)
            quarantine_path = os.path.join(self.devices,
                                           'sda', 'quarantined', 'objects')
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])
        finally:
            auditor.diskfile.DiskFile = was_df

    def test_with_tombstone(self):
        ts_file_path = self.setup_bad_zero_byte(with_ts=True)
        self.assertTrue(ts_file_path.endswith('ts'))
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.exists(ts_file_path))

    def test_sleeper(self):
        auditor.SLEEP_BETWEEN_AUDITS = 0.10
        my_auditor = auditor.ObjectAuditor(self.conf)
        start = time.time()
        my_auditor._sleep()
        delta_t = time.time() - start
        self.assert_(delta_t > 0.08)
        self.assert_(delta_t < 0.12)

    def test_run_audit(self):

        class StopForever(Exception):
            pass

        class Bogus(Exception):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            check_device_dir = None
            fork_called = 0
            master = 0
            wait_called = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs
                if 'zero_byte_fps' in kwargs:
                    self.check_device_dir = kwargs.get('device_dirs')

            def mock_sleep(self):
                raise StopForever('stop')

            def mock_audit_loop_error(self, parent, zbo_fps,
                                      override_devices=None, **kwargs):
                raise Bogus('exception')

            def mock_fork(self):
                self.fork_called += 1
                if self.master:
                    return self.fork_called
                else:
                    return 0

            def mock_wait(self):
                self.wait_called += 1
                return (self.wait_called, 0)

        for i in string.ascii_letters[2:26]:
            mkdirs(os.path.join(self.devices, 'sd%s' % i))

        my_auditor = auditor.ObjectAuditor(dict(devices=self.devices,
                                                mount_check='false',
                                                zero_byte_files_per_second=89))
        mocker = ObjectAuditorMock()
        my_auditor.logger.exception = mock.MagicMock()
        real_audit_loop = my_auditor.audit_loop
        my_auditor.audit_loop = mocker.mock_audit_loop_error
        my_auditor.run_audit = mocker.mock_run
        my_auditor._sleep = mocker.mock_sleep
        was_fork = os.fork
        was_wait = os.wait
        os.fork = mocker.mock_fork
        os.wait = mocker.mock_wait
        try:
            my_auditor.run_once(zero_byte_fps=50)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: exception')
            my_auditor.logger.exception.reset_mock()
            self.assertRaises(StopForever, my_auditor.run_forever)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: exception')
            my_auditor.audit_loop = real_audit_loop

            self.assertRaises(StopForever,
                              my_auditor.run_forever, zero_byte_fps=50)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 50)
            self.assertEquals(mocker.fork_called, 0)

            self.assertRaises(SystemExit, my_auditor.run_forever)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEquals(mocker.check_device_dir, None)
            self.assertEquals(mocker.check_args, ())

            device_list = ['sd%s' % i for i in string.ascii_letters[2:10]]
            device_string = ','.join(device_list)
            device_string_bogus = device_string + ',bogus'

            mocker.fork_called = 0
            self.assertRaises(SystemExit, my_auditor.run_once,
                              devices=device_string_bogus)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEquals(sorted(mocker.check_device_dir), device_list)

            mocker.master = 1

            mocker.fork_called = 0
            self.assertRaises(StopForever, my_auditor.run_forever)
            # Fork is called 3 times since the zbf process is forked twice
            self.assertEquals(mocker.fork_called, 3)
            self.assertEquals(mocker.wait_called, 3)

        finally:
            os.fork = was_fork
            os.wait = was_wait
Beispiel #3
0
class TestAuditor(unittest.TestCase):

    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.rcache = os.path.join(self.testdir, 'object.recon')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        os.mkdir(os.path.join(self.devices, 'sdb'))

        # policy 0
        self.objects = os.path.join(self.devices, 'sda',
                                    get_data_dir(POLICIES[0]))
        self.objects_2 = os.path.join(self.devices, 'sdb',
                                      get_data_dir(POLICIES[0]))
        os.mkdir(self.objects)
        # policy 1
        self.objects_p1 = os.path.join(self.devices, 'sda',
                                       get_data_dir(POLICIES[1]))
        self.objects_2_p1 = os.path.join(self.devices, 'sdb',
                                         get_data_dir(POLICIES[1]))
        os.mkdir(self.objects_p1)
        # policy 2
        self.objects_p2 = os.path.join(self.devices, 'sda',
                                       get_data_dir(POLICIES[2]))
        self.objects_2_p2 = os.path.join(self.devices, 'sdb',
                                         get_data_dir(POLICIES[2]))
        os.mkdir(self.objects_p2)

        self.parts = {}
        self.parts_p1 = {}
        self.parts_p2 = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            self.parts_p1[part] = os.path.join(self.objects_p1, part)
            self.parts_p2[part] = os.path.join(self.objects_p2, part)
            os.mkdir(os.path.join(self.objects, part))
            os.mkdir(os.path.join(self.objects_p1, part))
            os.mkdir(os.path.join(self.objects_p2, part))

        self.conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)
        self.ec_df_mgr = ECDiskFileManager(self.conf, self.logger)

        # diskfiles for policy 0, 1, 2
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o',
                                                  policy=POLICIES[0])
        self.disk_file_p1 = self.df_mgr.get_diskfile('sda', '0', 'a', 'c',
                                                     'o', policy=POLICIES[1])
        self.disk_file_ec = self.ec_df_mgr.get_diskfile(
            'sda', '0', 'a', 'c', 'o', policy=POLICIES[2], frag_index=1)

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_worker_conf_parms(self):
        def check_common_defaults():
            self.assertEqual(auditor_worker.max_bytes_per_second, 10000000)
            self.assertEqual(auditor_worker.log_time, 3600)

        # test default values
        conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices)
        check_common_defaults()
        for policy in POLICIES:
            mgr = auditor_worker.diskfile_router[policy]
            self.assertEqual(mgr.disk_chunk_size, 65536)
        self.assertEqual(auditor_worker.max_files_per_second, 20)
        self.assertEqual(auditor_worker.zero_byte_only_at_fps, 0)

        # test specified audit value overrides
        conf.update({'disk_chunk_size': 4096})
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices,
                                               zero_byte_only_at_fps=50)
        check_common_defaults()
        for policy in POLICIES:
            mgr = auditor_worker.diskfile_router[policy]
            self.assertEqual(mgr.disk_chunk_size, 4096)
        self.assertEqual(auditor_worker.max_files_per_second, 50)
        self.assertEqual(auditor_worker.zero_byte_only_at_fps, 50)

    def test_object_audit_extra_data(self):
        def run_tests(disk_file):
            auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                                   self.rcache, self.devices)
            data = '0' * 1024
            etag = md5()
            with disk_file.create() as writer:
                writer.write(data)
                etag.update(data)
                etag = etag.hexdigest()
                timestamp = str(normalize_timestamp(time.time()))
                metadata = {
                    'ETag': etag,
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)
                writer.commit(Timestamp(timestamp))
                pre_quarantines = auditor_worker.quarantines

                auditor_worker.object_audit(
                    AuditLocation(disk_file._datadir, 'sda', '0',
                                  policy=disk_file.policy))
                self.assertEqual(auditor_worker.quarantines, pre_quarantines)

                os.write(writer._fd, 'extra_data')

                auditor_worker.object_audit(
                    AuditLocation(disk_file._datadir, 'sda', '0',
                                  policy=disk_file.policy))
                self.assertEqual(auditor_worker.quarantines,
                                 pre_quarantines + 1)
        run_tests(self.disk_file)
        run_tests(self.disk_file_p1)
        run_tests(self.disk_file_ec)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        data = '0' * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o',
                                                  policy=POLICIES.legacy)

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0',
                          policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update('1' + '0' * 1023)
        etag = etag.hexdigest()
        metadata['ETag'] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0',
                          policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        fp = open(path, 'w')
        fp.write('0' * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0',
                          policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch.object(DiskFileManager,
                               'get_diskfile_from_audit_location', blowup):
            self.assertRaises(NameError, auditor_worker.object_audit,
                              AuditLocation(os.path.dirname(path), 'sda', '0',
                                            policy=POLICIES.legacy))

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls',
                        blowup):
            auditor_worker.failsafe_object_audit(
                AuditLocation(os.path.dirname(path), 'sda', '0',
                              policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
        with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls',
                        lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024

        def write_file(df):
            with df.create() as writer:
                writer.write(data)
                metadata = {
                    'ETag': md5(data).hexdigest(),
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)
                writer.commit(Timestamp(timestamp))

        # policy 0
        write_file(self.disk_file)
        # policy 1
        write_file(self.disk_file_p1)
        # policy 2
        write_file(self.disk_file_ec)

        auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        # 1 object per policy falls into 1024 bucket
        self.assertEqual(auditor_worker.stats_buckets[1024], 3)
        self.assertEqual(auditor_worker.stats_buckets[10240], 0)

        # pick up some additional code coverage, large file
        data = '0' * 1024 * 1024
        for df in (self.disk_file, self.disk_file_ec):
            with df.create() as writer:
                writer.write(data)
                metadata = {
                    'ETag': md5(data).hexdigest(),
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)
                writer.commit(Timestamp(timestamp))
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        # still have the 1024 byte object left in policy-1 (plus the
        # stats from the original 3)
        self.assertEqual(auditor_worker.stats_buckets[1024], 4)
        self.assertEqual(auditor_worker.stats_buckets[10240], 0)
        # and then policy-0 disk_file was re-written as a larger object
        self.assertEqual(auditor_worker.stats_buckets['OVER'], 2)

        # pick up even more additional code coverage, misc paths
        auditor_worker.log_time = -1
        auditor_worker.stats_sizes = []
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        self.assertEqual(auditor_worker.stats_buckets[1024], 4)
        self.assertEqual(auditor_worker.stats_buckets[10240], 0)
        self.assertEqual(auditor_worker.stats_buckets['OVER'], 2)

    def test_object_run_logging(self):
        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger,
                                               self.rcache, self.devices)
        auditor_worker.audit_all_objects(device_dirs=['sda'])
        log_lines = logger.get_lines_for_level('info')
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index('ALL - parallel, sda'))

        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger,
                                               self.rcache, self.devices,
                                               zero_byte_only_at_fps=50)
        auditor_worker.audit_all_objects(device_dirs=['sda'])
        log_lines = logger.get_lines_for_level('info')
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index('ZBF - sda'))

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
            writer.commit(Timestamp(timestamp))
        auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob',
                                                  policy=POLICIES.legacy)
        data = '1' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            timestamp = str(normalize_timestamp(time.time()))
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
            etag = md5()
            etag.update('1' + '0' * 1023)
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.isdir(quarantine_path))
        del(kwargs['zero_byte_fps'])
        clear_auditor_status(self.devices)
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, timestamp=None):
        if timestamp is None:
            timestamp = Timestamp(time.time())
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp.internal,
                'Content-Length': 10,
            }
            writer.put(metadata)
            writer.commit(Timestamp(timestamp))
            etag = md5()
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50

        called_args = [0]

        def mock_get_auditor_status(path, logger, audit_type):
            called_args[0] = audit_type
            return get_auditor_status(path, logger, audit_type)

        with mock.patch('swift.obj.diskfile.get_auditor_status',
                        mock_get_auditor_status):
                self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))
        self.assertEqual('ZBF', called_args[0])

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):

            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls',
                        FakeFile):
            kwargs = {'mode': 'once'}
            kwargs['zero_byte_fps'] = 50
            self.auditor.run_audit(**kwargs)
            quarantine_path = os.path.join(self.devices,
                                           'sda', 'quarantined', 'objects')
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])

    @mock.patch.object(auditor.ObjectAuditor, 'run_audit')
    @mock.patch('os.fork', return_value=0)
    def test_with_inaccessible_object_location(self, mock_os_fork,
                                               mock_run_audit):
        # Need to ensure that any failures in run_audit do
        # not prevent sys.exit() from running.  Otherwise we get
        # zombie processes.
        e = OSError('permission denied')
        mock_run_audit.side_effect = e
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.assertRaises(SystemExit, self.auditor.fork_child, self)

    def test_with_only_tombstone(self):
        # sanity check that auditor doesn't touch solitary tombstones
        ts_iter = make_timestamp_iter()
        self.setup_bad_zero_byte(timestamp=next(ts_iter))
        self.disk_file.delete(next(ts_iter))
        files = os.listdir(self.disk_file._datadir)
        self.assertEqual(1, len(files))
        self.assertTrue(files[0].endswith('ts'))
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        files_after = os.listdir(self.disk_file._datadir)
        self.assertEqual(files, files_after)

    def test_with_tombstone_and_data(self):
        # rsync replication could leave a tombstone and data file in object
        # dir - verify they are both removed during audit
        ts_iter = make_timestamp_iter()
        ts_tomb = next(ts_iter)
        ts_data = next(ts_iter)
        self.setup_bad_zero_byte(timestamp=ts_data)
        tomb_file_path = os.path.join(self.disk_file._datadir,
                                      '%s.ts' % ts_tomb.internal)
        with open(tomb_file_path, 'wb') as fd:
            write_metadata(fd, {'X-Timestamp': ts_tomb.internal})
        files = os.listdir(self.disk_file._datadir)
        self.assertEqual(2, len(files))
        self.assertTrue(os.path.basename(tomb_file_path) in files, files)
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.exists(self.disk_file._datadir))

    def test_sleeper(self):
        with mock.patch(
                'time.sleep', mock.MagicMock()) as mock_sleep:
            my_auditor = auditor.ObjectAuditor(self.conf)
            my_auditor._sleep()
            mock_sleep.assert_called_with(30)

            my_conf = dict(interval=2)
            my_conf.update(self.conf)
            my_auditor = auditor.ObjectAuditor(my_conf)
            my_auditor._sleep()
            mock_sleep.assert_called_with(2)

            my_auditor = auditor.ObjectAuditor(self.conf)
            my_auditor.interval = 2
            my_auditor._sleep()
            mock_sleep.assert_called_with(2)

    def test_run_parallel_audit(self):

        class StopForever(Exception):
            pass

        class Bogus(Exception):
            pass

        loop_error = Bogus('exception')

        class LetMeOut(BaseException):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            check_device_dir = None
            fork_called = 0
            master = 0
            wait_called = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs
                if 'zero_byte_fps' in kwargs:
                    self.check_device_dir = kwargs.get('device_dirs')

            def mock_sleep_stop(self):
                raise StopForever('stop')

            def mock_sleep_continue(self):
                return

            def mock_audit_loop_error(self, parent, zbo_fps,
                                      override_devices=None, **kwargs):
                raise loop_error

            def mock_fork(self):
                self.fork_called += 1
                if self.master:
                    return self.fork_called
                else:
                    return 0

            def mock_wait(self):
                self.wait_called += 1
                return (self.wait_called, 0)

        for i in string.ascii_letters[2:26]:
            mkdirs(os.path.join(self.devices, 'sd%s' % i))

        my_auditor = auditor.ObjectAuditor(dict(devices=self.devices,
                                                mount_check='false',
                                                zero_byte_files_per_second=89,
                                                concurrency=1))

        mocker = ObjectAuditorMock()
        my_auditor.logger.exception = mock.MagicMock()
        real_audit_loop = my_auditor.audit_loop
        my_auditor.audit_loop = mocker.mock_audit_loop_error
        my_auditor.run_audit = mocker.mock_run
        was_fork = os.fork
        was_wait = os.wait
        os.fork = mocker.mock_fork
        os.wait = mocker.mock_wait
        try:
            my_auditor._sleep = mocker.mock_sleep_stop
            my_auditor.run_once(zero_byte_fps=50)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: %s', loop_error)
            my_auditor.logger.exception.reset_mock()
            self.assertRaises(StopForever, my_auditor.run_forever)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: %s', loop_error)
            my_auditor.audit_loop = real_audit_loop

            self.assertRaises(StopForever,
                              my_auditor.run_forever, zero_byte_fps=50)
            self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 50)
            self.assertEqual(mocker.fork_called, 0)

            self.assertRaises(SystemExit, my_auditor.run_once)
            self.assertEqual(mocker.fork_called, 1)
            self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEqual(mocker.check_device_dir, [])
            self.assertEqual(mocker.check_args, ())

            device_list = ['sd%s' % i for i in string.ascii_letters[2:10]]
            device_string = ','.join(device_list)
            device_string_bogus = device_string + ',bogus'

            mocker.fork_called = 0
            self.assertRaises(SystemExit, my_auditor.run_once,
                              devices=device_string_bogus)
            self.assertEqual(mocker.fork_called, 1)
            self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEqual(sorted(mocker.check_device_dir), device_list)

            mocker.master = 1

            mocker.fork_called = 0
            self.assertRaises(StopForever, my_auditor.run_forever)
            # Fork is called 2 times since the zbf process is forked just
            # once before self._sleep() is called and StopForever is raised
            # Also wait is called just once before StopForever is raised
            self.assertEqual(mocker.fork_called, 2)
            self.assertEqual(mocker.wait_called, 1)

            my_auditor._sleep = mocker.mock_sleep_continue
            my_auditor.audit_loop = works_only_once(my_auditor.audit_loop,
                                                    LetMeOut())

            my_auditor.concurrency = 2
            mocker.fork_called = 0
            mocker.wait_called = 0
            self.assertRaises(LetMeOut, my_auditor.run_forever)
            # Fork is called no. of devices + (no. of devices)/2 + 1 times
            # since zbf process is forked (no.of devices)/2 + 1 times
            no_devices = len(os.listdir(self.devices))
            self.assertEqual(mocker.fork_called, no_devices + no_devices / 2
                             + 1)
            self.assertEqual(mocker.wait_called, no_devices + no_devices / 2
                             + 1)

        finally:
            os.fork = was_fork
            os.wait = was_wait

    def test_run_audit_once(self):
        my_auditor = auditor.ObjectAuditor(dict(devices=self.devices,
                                                mount_check='false',
                                                zero_byte_files_per_second=89,
                                                concurrency=1))

        forked_pids = []
        next_zbf_pid = [2]
        next_normal_pid = [1001]
        outstanding_pids = [[]]

        def fake_fork_child(**kwargs):
            if len(forked_pids) > 10:
                # something's gone horribly wrong
                raise BaseException("forking too much")

            # ZBF pids are all smaller than the normal-audit pids; this way
            # we can return them first.
            #
            # Also, ZBF pids are even and normal-audit pids are odd; this is
            # so humans seeing this test fail can better tell what's happening.
            if kwargs.get('zero_byte_fps'):
                pid = next_zbf_pid[0]
                next_zbf_pid[0] += 2
            else:
                pid = next_normal_pid[0]
                next_normal_pid[0] += 2
            outstanding_pids[0].append(pid)
            forked_pids.append(pid)
            return pid

        def fake_os_wait():
            # Smallest pid first; that's ZBF if we have one, else normal
            outstanding_pids[0].sort()
            pid = outstanding_pids[0].pop(0)
            return (pid, 0)   # (pid, status)

        with mock.patch("swift.obj.auditor.os.wait", fake_os_wait), \
                mock.patch.object(my_auditor, 'fork_child', fake_fork_child), \
                mock.patch.object(my_auditor, '_sleep', lambda *a: None):
            my_auditor.run_once()

        self.assertEqual(sorted(forked_pids), [2, 1001])

    def test_run_parallel_audit_once(self):
        my_auditor = auditor.ObjectAuditor(
            dict(devices=self.devices, mount_check='false',
                 zero_byte_files_per_second=89, concurrency=2))

        # ZBF pids are smaller than the normal-audit pids; this way we can
        # return them first from our mocked os.wait().
        #
        # Also, ZBF pids are even and normal-audit pids are odd; this is so
        # humans seeing this test fail can better tell what's happening.
        forked_pids = []
        next_zbf_pid = [2]
        next_normal_pid = [1001]
        outstanding_pids = [[]]

        def fake_fork_child(**kwargs):
            if len(forked_pids) > 10:
                # something's gone horribly wrong; try not to hang the test
                # run because of it
                raise BaseException("forking too much")

            if kwargs.get('zero_byte_fps'):
                pid = next_zbf_pid[0]
                next_zbf_pid[0] += 2
            else:
                pid = next_normal_pid[0]
                next_normal_pid[0] += 2
            outstanding_pids[0].append(pid)
            forked_pids.append(pid)
            return pid

        def fake_os_wait():
            if not outstanding_pids[0]:
                raise BaseException("nobody waiting")

            # ZBF auditor finishes first
            outstanding_pids[0].sort()
            pid = outstanding_pids[0].pop(0)
            return (pid, 0)   # (pid, status)

        # make sure we've got enough devs that the ZBF auditor can finish
        # before all the normal auditors have been started
        mkdirs(os.path.join(self.devices, 'sdc'))
        mkdirs(os.path.join(self.devices, 'sdd'))

        with mock.patch("swift.obj.auditor.os.wait", fake_os_wait), \
                mock.patch.object(my_auditor, 'fork_child', fake_fork_child), \
                mock.patch.object(my_auditor, '_sleep', lambda *a: None):
            my_auditor.run_once()

        self.assertEqual(sorted(forked_pids), [2, 1001, 1003, 1005, 1007])
Beispiel #4
0
class TestAuditor(unittest.TestCase):
    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), "tmp_test_object_auditor")
        self.devices = os.path.join(self.testdir, "node")
        self.rcache = os.path.join(self.testdir, "object.recon")
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, "sda"))
        self.objects = os.path.join(self.devices, "sda", "objects")

        os.mkdir(os.path.join(self.devices, "sdb"))
        self.objects_2 = os.path.join(self.devices, "sdb", "objects")

        os.mkdir(self.objects)
        self.parts = {}
        for part in ["0", "1", "2", "3"]:
            self.parts[part] = os.path.join(self.objects, part)
            os.mkdir(os.path.join(self.objects, part))

        self.conf = dict(devices=self.devices, mount_check="false", object_size_stats="10,100,1024,10240")
        self.df_mgr = DiskFileManager(self.conf, self.logger)
        self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o")

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_object_audit_extra_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        data = "0" * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            timestamp = str(normalize_timestamp(time.time()))
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

            auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0"))
            self.assertEquals(auditor_worker.quarantines, pre_quarantines)

            os.write(writer._fd, "extra_data")
            auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0"))
            self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        data = "0" * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o")

        auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0"))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update("1" + "0" * 1023)
        etag = etag.hexdigest()
        metadata["ETag"] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)

        auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0"))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + ".data")
        mkdirs(self.disk_file._datadir)
        fp = open(path, "w")
        fp.write("0" * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0"))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + ".data")
        mkdirs(self.disk_file._datadir)
        with open(path, "w") as f:
            write_metadata(f, {"name": "/a/c/o"})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)

        def blowup(*args):
            raise NameError("tpyo")

        with mock.patch.object(DiskFileManager, "get_diskfile_from_audit_location", blowup):
            self.assertRaises(NameError, auditor_worker.object_audit, AuditLocation(os.path.dirname(path), "sda", "0"))

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + ".data")
        mkdirs(self.disk_file._datadir)
        with open(path, "w") as f:
            write_metadata(f, {"name": "/a/c/o"})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)

        def blowup(*args):
            raise NameError("tpyo")

        with mock.patch("swift.obj.diskfile.DiskFile", blowup):
            auditor_worker.failsafe_object_audit(AuditLocation(os.path.dirname(path), "sda", "0"))
        self.assertEquals(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = "0" * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
        with mock.patch("swift.obj.diskfile.DiskFile", lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = "0" * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 1)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = "0" * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
            os.write(writer._fd, "extra_data")
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = "0" * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "ob")
        data = "1" * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
            os.write(writer._fd, "extra_data")
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = "0" * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                "ETag": etag,
                "X-Timestamp": str(normalize_timestamp(time.time())),
                "Content-Length": str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            etag = md5()
            etag.update("1" + "0" * 1023)
            etag = etag.hexdigest()
            metadata["ETag"] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects")
        kwargs = {"mode": "once"}
        kwargs["zero_byte_fps"] = 50
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.isdir(quarantine_path))
        del (kwargs["zero_byte_fps"])
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, with_ts=False):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        ts_file_path = ""
        if with_ts:
            name_hash = hash_path("a", "c", "o")
            dir_path = os.path.join(self.devices, "sda", storage_directory(DATADIR, "0", name_hash))
            ts_file_path = os.path.join(dir_path, "99999.ts")
            if not os.path.exists(dir_path):
                mkdirs(dir_path)
            fp = open(ts_file_path, "w")
            write_metadata(fp, {"X-Timestamp": "99999", "name": "/a/c/o"})
            fp.close()

        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": str(normalize_timestamp(time.time())), "Content-Length": 10}
            writer.put(metadata)
            etag = md5()
            etag = etag.hexdigest()
            metadata["ETag"] = etag
            write_metadata(writer._fd, metadata)
        return ts_file_path

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        kwargs = {"mode": "once"}
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects")
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        kwargs = {"mode": "once"}
        kwargs["zero_byte_fps"] = 50
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects")
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):
            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        was_df = auditor.diskfile.DiskFile
        try:
            auditor.diskfile.DiskFile = FakeFile
            kwargs = {"mode": "once"}
            kwargs["zero_byte_fps"] = 50
            self.auditor.run_audit(**kwargs)
            quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects")
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])
        finally:
            auditor.diskfile.DiskFile = was_df

    def test_with_tombstone(self):
        ts_file_path = self.setup_bad_zero_byte(with_ts=True)
        self.assertTrue(ts_file_path.endswith("ts"))
        kwargs = {"mode": "once"}
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.exists(ts_file_path))

    def test_sleeper(self):
        auditor.SLEEP_BETWEEN_AUDITS = 0.10
        my_auditor = auditor.ObjectAuditor(self.conf)
        start = time.time()
        my_auditor._sleep()
        delta_t = time.time() - start
        self.assert_(delta_t > 0.08)
        self.assert_(delta_t < 0.12)

    def test_run_audit(self):
        class StopForever(Exception):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            check_device_dir = None
            fork_called = 0
            master = 0
            wait_called = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs
                if "zero_byte_fps" in kwargs:
                    self.check_device_dir = kwargs.get("device_dirs")

            def mock_sleep(self):
                raise StopForever("stop")

            def mock_fork(self):
                self.fork_called += 1
                if self.master:
                    return self.fork_called
                else:
                    return 0

            def mock_wait(self):
                self.wait_called += 1
                return (self.wait_called, 0)

        for i in string.ascii_letters[2:26]:
            mkdirs(os.path.join(self.devices, "sd%s" % i))

        my_auditor = auditor.ObjectAuditor(
            dict(devices=self.devices, mount_check="false", zero_byte_files_per_second=89)
        )
        mocker = ObjectAuditorMock()
        my_auditor.run_audit = mocker.mock_run
        my_auditor._sleep = mocker.mock_sleep
        was_fork = os.fork
        was_wait = os.wait
        try:
            os.fork = mocker.mock_fork
            os.wait = mocker.mock_wait
            self.assertRaises(StopForever, my_auditor.run_forever, zero_byte_fps=50)
            self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 50)
            self.assertEquals(mocker.fork_called, 0)

            self.assertRaises(SystemExit, my_auditor.run_forever)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 89)
            self.assertEquals(mocker.check_device_dir, None)
            self.assertEquals(mocker.check_args, ())

            device_list = ["sd%s" % i for i in string.ascii_letters[2:10]]
            device_string = ",".join(device_list)
            device_string_bogus = device_string + ",bogus"

            mocker.fork_called = 0
            self.assertRaises(SystemExit, my_auditor.run_once, devices=device_string_bogus)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 89)
            self.assertEquals(sorted(mocker.check_device_dir), device_list)

            mocker.master = 1

            mocker.fork_called = 0
            self.assertRaises(StopForever, my_auditor.run_forever)
            # Fork is called 3 times since the zbf process is forked twice
            self.assertEquals(mocker.fork_called, 3)
            self.assertEquals(mocker.wait_called, 3)

        finally:
            os.fork = was_fork
            os.wait = was_wait
Beispiel #5
0
class TestAuditor(unittest.TestCase):

    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        self.objects = os.path.join(self.devices, 'sda', 'objects')

        os.mkdir(os.path.join(self.devices, 'sdb'))
        self.objects_2 = os.path.join(self.devices, 'sdb', 'objects')

        os.mkdir(self.objects)
        self.parts = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            os.mkdir(os.path.join(self.objects, part))

        self.conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_object_audit_extra_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            timestamp = str(normalize_timestamp(time.time()))
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

            auditor_worker.object_audit(
                os.path.join(self.disk_file._datadir, timestamp + '.data'),
                'sda', '0')
            self.assertEquals(auditor_worker.quarantines, pre_quarantines)

            os.write(writer._fd, 'extra_data')
            auditor_worker.object_audit(
                os.path.join(self.disk_file._datadir, timestamp + '.data'),
                'sda', '0')
            self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        data = '0' * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')

        auditor_worker.object_audit(
            os.path.join(self.disk_file._datadir, timestamp + '.data'),
            'sda', '0')
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update('1' + '0' * 1023)
        etag = etag.hexdigest()
        metadata['ETag'] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)

        auditor_worker.object_audit(
            os.path.join(self.disk_file._datadir, timestamp + '.data'),
            'sda', '0')
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        fp = open(path, 'w')
        fp.write('0' * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(
            os.path.join(self.disk_file._datadir, timestamp + '.data'),
            'sda', '0')
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch('swift.obj.diskfile.DiskFile',
                        blowup):
            self.assertRaises(NameError, auditor_worker.object_audit,
                              path, 'sda', '0')

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch('swift.obj.diskfile.DiskFile',
                        blowup):
            auditor_worker.failsafe_object_audit(path, 'sda', '0')
        self.assertEquals(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        with mock.patch('swift.obj.diskfile.DiskFile',
                        lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 1)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob')
        data = '1' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            etag = md5()
            etag.update('1' + '0' * 1023)
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.auditor.run_once(zero_byte_fps=50)
        self.assertFalse(os.path.isdir(quarantine_path))
        self.auditor.run_once()
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, with_ts=False):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        ts_file_path = ''
        if with_ts:
            name_hash = hash_path('a', 'c', 'o')
            dir_path = os.path.join(
                self.devices, 'sda',
                storage_directory(DATADIR, '0', name_hash))
            ts_file_path = os.path.join(dir_path, '99999.ts')
            if not os.path.exists(dir_path):
                mkdirs(dir_path)
            fp = open(ts_file_path, 'w')
            fp.close()

        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': 10,
            }
            writer.put(metadata)
            etag = md5()
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)
        return ts_file_path

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        self.auditor.run_once()
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        self.auditor.run_once(zero_byte_fps=50)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):

            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        was_df = auditor.diskfile.DiskFile
        try:
            auditor.diskfile.DiskFile = FakeFile
            self.auditor.run_once(zero_byte_fps=50)
            quarantine_path = os.path.join(self.devices,
                                           'sda', 'quarantined', 'objects')
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])
        finally:
            auditor.diskfile.DiskFile = was_df

    def test_with_tombstone(self):
        ts_file_path = self.setup_bad_zero_byte(with_ts=True)
        self.auditor.run_once()
        self.assertTrue(ts_file_path.endswith('ts'))
        self.assertTrue(os.path.exists(ts_file_path))

    def test_sleeper(self):
        auditor.SLEEP_BETWEEN_AUDITS = 0.10
        my_auditor = auditor.ObjectAuditor(self.conf)
        start = time.time()
        my_auditor._sleep()
        delta_t = time.time() - start
        self.assert_(delta_t > 0.08)
        self.assert_(delta_t < 0.12)

    def test_run_forever(self):

        class StopForever(Exception):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            fork_called = 0
            fork_res = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs

            def mock_sleep(self):
                raise StopForever('stop')

            def mock_fork(self):
                self.fork_called += 1
                return self.fork_res

        my_auditor = auditor.ObjectAuditor(dict(devices=self.devices,
                                                mount_check='false',
                                                zero_byte_files_per_second=89))
        mocker = ObjectAuditorMock()
        my_auditor.run_once = mocker.mock_run
        my_auditor._sleep = mocker.mock_sleep
        was_fork = os.fork
        try:
            os.fork = mocker.mock_fork
            self.assertRaises(StopForever,
                              my_auditor.run_forever, zero_byte_fps=50)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 50)
            self.assertEquals(mocker.fork_called, 0)

            self.assertRaises(StopForever, my_auditor.run_forever)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_args, ())

            mocker.fork_res = 1
            self.assertRaises(StopForever, my_auditor.run_forever)
            self.assertEquals(mocker.fork_called, 2)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89)

        finally:
            os.fork = was_fork
Beispiel #6
0
class ObjectController(BaseStorageServer):
    """Implements the WSGI application for the Swift Object Server."""

    def __init__(self, conf, logger=None):
        """
        Creates a new WSGI application for the Swift Object Server. An
        example configuration is given at
        <source-dir>/etc/object-server.conf-sample or
        /etc/swift/object-server.conf-sample.
        """
        super(ObjectController, self).__init__(conf)
        self.logger = logger or get_logger(conf, log_route="object-server")
        self.node_timeout = int(conf.get("node_timeout", 3))
        self.conn_timeout = float(conf.get("conn_timeout", 0.5))
        self.client_timeout = int(conf.get("client_timeout", 60))
        self.disk_chunk_size = int(conf.get("disk_chunk_size", 65536))
        self.network_chunk_size = int(conf.get("network_chunk_size", 65536))
        self.log_requests = config_true_value(conf.get("log_requests", "true"))
        self.max_upload_time = int(conf.get("max_upload_time", 86400))
        self.slow = int(conf.get("slow", 0))
        self.keep_cache_private = config_true_value(conf.get("keep_cache_private", "false"))

        default_allowed_headers = """
            content-disposition,
            content-encoding,
            x-delete-at,
            x-object-manifest,
            x-static-large-object,
        """
        extra_allowed_headers = [
            header.strip().lower()
            for header in conf.get("allowed_headers", default_allowed_headers).split(",")
            if header.strip()
        ]
        self.allowed_headers = set()
        for header in extra_allowed_headers:
            if header not in DATAFILE_SYSTEM_META:
                self.allowed_headers.add(header)
        self.auto_create_account_prefix = conf.get("auto_create_account_prefix") or "."
        self.expiring_objects_account = self.auto_create_account_prefix + (
            conf.get("expiring_objects_account_name") or "expiring_objects"
        )
        self.expiring_objects_container_divisor = int(conf.get("expiring_objects_container_divisor") or 86400)
        # Initialization was successful, so now apply the network chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because the primary motivation for this is to optimize how data
        # is written back to the proxy server, we could use the value from the
        # disk_chunk_size parameter. However, it affects all created sockets
        # using this class so we have chosen to tie it to the
        # network_chunk_size parameter value instead.
        socket._fileobject.default_bufsize = self.network_chunk_size

        # Provide further setup specific to an object server implementation.
        self.setup(conf)

    def setup(self, conf):
        """
        Implementation specific setup. This method is called at the very end
        by the constructor to allow a specific implementation to modify
        existing attributes or add its own attributes.

        :param conf: WSGI configuration parameter
        """

        # Common on-disk hierarchy shared across account, container and object
        # servers.
        self._diskfile_mgr = DiskFileManager(conf, self.logger)
        # This is populated by global_conf_callback way below as the semaphore
        # is shared by all workers.
        if "replication_semaphore" in conf:
            # The value was put in a list so it could get past paste
            self.replication_semaphore = conf["replication_semaphore"][0]
        else:
            self.replication_semaphore = None
        self.replication_failure_threshold = int(conf.get("replication_failure_threshold") or 100)
        self.replication_failure_ratio = float(conf.get("replication_failure_ratio") or 1.0)

    def get_diskfile(self, device, partition, account, container, obj, policy_idx, **kwargs):
        """
        Utility method for instantiating a DiskFile object supporting a given
        REST API.

        An implementation of the object server that wants to use a different
        DiskFile class would simply over-ride this method to provide that
        behavior.
        """
        return self._diskfile_mgr.get_diskfile(device, partition, account, container, obj, policy_idx, **kwargs)

    def async_update(
        self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice, policy_index
    ):
        """
        Sends or saves an async update.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param host: host that the container is on
        :param partition: partition that the container is on
        :param contdevice: device name that the container is on
        :param headers_out: dictionary of headers to send in the container
                            request
        :param objdevice: device name that the object is in
        :param policy_index: the associated storage policy index
        """
        headers_out["user-agent"] = "object-server %s" % os.getpid()
        full_path = "/%s/%s/%s" % (account, container, obj)
        if all([host, partition, contdevice]):
            try:
                with ConnectionTimeout(self.conn_timeout):
                    ip, port = host.rsplit(":", 1)
                    conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out)
                with Timeout(self.node_timeout):
                    response = conn.getresponse()
                    response.read()
                    if is_success(response.status):
                        return
                    else:
                        self.logger.error(
                            _(
                                "ERROR Container update failed "
                                "(saving for async update later): %(status)d "
                                "response from %(ip)s:%(port)s/%(dev)s"
                            ),
                            {"status": response.status, "ip": ip, "port": port, "dev": contdevice},
                        )
            except (Exception, Timeout):
                self.logger.exception(
                    _("ERROR container update failed with " "%(ip)s:%(port)s/%(dev)s (saving for async update later)"),
                    {"ip": ip, "port": port, "dev": contdevice},
                )
        data = {"op": op, "account": account, "container": container, "obj": obj, "headers": headers_out}
        timestamp = headers_out["x-timestamp"]
        self._diskfile_mgr.pickle_async_update(objdevice, account, container, obj, data, timestamp, policy_index)

    def container_update(self, op, account, container, obj, request, headers_out, objdevice, policy_idx):
        """
        Update the container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request object driving the update
        :param headers_out: dictionary of headers to send in the container
                            request(s)
        :param objdevice: device name that the object is in
        """
        headers_in = request.headers
        conthosts = [h.strip() for h in headers_in.get("X-Container-Host", "").split(",")]
        contdevices = [d.strip() for d in headers_in.get("X-Container-Device", "").split(",")]
        contpartition = headers_in.get("X-Container-Partition", "")

        if len(conthosts) != len(contdevices):
            # This shouldn't happen unless there's a bug in the proxy,
            # but if there is, we want to know about it.
            self.logger.error(
                _(
                    "ERROR Container update failed: different "
                    "numbers of hosts and devices in request: "
                    '"%s" vs "%s"'
                )
                % (headers_in.get("X-Container-Host", ""), headers_in.get("X-Container-Device", ""))
            )
            return

        if contpartition:
            updates = zip(conthosts, contdevices)
        else:
            updates = []

        headers_out["x-trans-id"] = headers_in.get("x-trans-id", "-")
        headers_out["referer"] = request.as_referer()
        headers_out["X-Backend-Storage-Policy-Index"] = policy_idx
        for conthost, contdevice in updates:
            self.async_update(
                op, account, container, obj, conthost, contpartition, contdevice, headers_out, objdevice, policy_idx
            )

    def delete_at_update(self, op, delete_at, account, container, obj, request, objdevice, policy_index):
        """
        Update the expiring objects container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param delete_at: scheduled delete in UNIX seconds, int
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request driving the update
        :param objdevice: device name that the object is in
        :param policy_index: the policy index to be used for tmp dir
        """
        if config_true_value(request.headers.get("x-backend-replication", "f")):
            return
        delete_at = normalize_delete_at_timestamp(delete_at)
        updates = [(None, None)]

        partition = None
        hosts = contdevices = [None]
        headers_in = request.headers
        headers_out = HeaderKeyDict(
            {
                # system accounts are always Policy-0
                "X-Backend-Storage-Policy-Index": 0,
                "x-timestamp": request.timestamp.internal,
                "x-trans-id": headers_in.get("x-trans-id", "-"),
                "referer": request.as_referer(),
            }
        )
        if op != "DELETE":
            delete_at_container = headers_in.get("X-Delete-At-Container", None)
            if not delete_at_container:
                self.logger.warning(
                    "X-Delete-At-Container header must be specified for "
                    "expiring objects background %s to work properly. Making "
                    "best guess as to the container name for now." % op
                )
                # TODO(gholt): In a future release, change the above warning to
                # a raised exception and remove the guess code below.
                delete_at_container = get_expirer_container(
                    delete_at, self.expiring_objects_container_divisor, account, container, obj
                )
            partition = headers_in.get("X-Delete-At-Partition", None)
            hosts = headers_in.get("X-Delete-At-Host", "")
            contdevices = headers_in.get("X-Delete-At-Device", "")
            updates = [
                upd
                for upd in zip((h.strip() for h in hosts.split(",")), (c.strip() for c in contdevices.split(",")))
                if all(upd) and partition
            ]
            if not updates:
                updates = [(None, None)]
            headers_out["x-size"] = "0"
            headers_out["x-content-type"] = "text/plain"
            headers_out["x-etag"] = "d41d8cd98f00b204e9800998ecf8427e"
        else:
            # DELETEs of old expiration data have no way of knowing what the
            # old X-Delete-At-Container was at the time of the initial setting
            # of the data, so a best guess is made here.
            # Worst case is a DELETE is issued now for something that doesn't
            # exist there and the original data is left where it is, where
            # it will be ignored when the expirer eventually tries to issue the
            # object DELETE later since the X-Delete-At value won't match up.
            delete_at_container = get_expirer_container(
                delete_at, self.expiring_objects_container_divisor, account, container, obj
            )
        delete_at_container = normalize_delete_at_timestamp(delete_at_container)

        for host, contdevice in updates:
            self.async_update(
                op,
                self.expiring_objects_account,
                delete_at_container,
                "%s-%s/%s/%s" % (delete_at, account, container, obj),
                host,
                partition,
                contdevice,
                headers_out,
                objdevice,
                policy_index,
            )

    @public
    @timing_stats()
    def POST(self, request):
        """Handle HTTP POST requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        new_delete_at = int(request.headers.get("X-Delete-At") or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body="X-Delete-At in past", request=request, content_type="text/plain")
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj, policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request)
        orig_timestamp = Timestamp(orig_metadata.get("X-Timestamp", 0))
        if orig_timestamp >= req_timestamp:
            return HTTPConflict(request=request, headers={"X-Backend-Timestamp": orig_timestamp.internal})
        metadata = {"X-Timestamp": req_timestamp.internal}
        metadata.update(val for val in request.headers.iteritems() if is_user_meta("object", val[0]))
        for header_key in self.allowed_headers:
            if header_key in request.headers:
                header_caps = header_key.title()
                metadata[header_caps] = request.headers[header_key]
        orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update("PUT", new_delete_at, account, container, obj, request, device, policy_idx)
            if orig_delete_at:
                self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device, policy_idx)
        try:
            disk_file.write_metadata(metadata)
        except (DiskFileXattrNotSupported, DiskFileNoSpace):
            return HTTPInsufficientStorage(drive=device, request=request)
        return HTTPAccepted(request=request)

    @public
    @timing_stats()
    def PUT(self, request):
        """Handle HTTP PUT requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        error_response = check_object_creation(request, obj)
        if error_response:
            return error_response
        new_delete_at = int(request.headers.get("X-Delete-At") or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body="X-Delete-At in past", request=request, content_type="text/plain")
        try:
            fsize = request.message_length()
        except ValueError as e:
            return HTTPBadRequest(body=str(e), request=request, content_type="text/plain")
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj, policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_metadata = {}

        # Checks for If-None-Match
        if request.if_none_match is not None and orig_metadata:
            if "*" in request.if_none_match:
                # File exists already so return 412
                return HTTPPreconditionFailed(request=request)
            if orig_metadata.get("ETag") in request.if_none_match:
                # The current ETag matches, so return 412
                return HTTPPreconditionFailed(request=request)

        orig_timestamp = Timestamp(orig_metadata.get("X-Timestamp", 0))
        if orig_timestamp >= req_timestamp:
            return HTTPConflict(request=request, headers={"X-Backend-Timestamp": orig_timestamp.internal})
        orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0)
        upload_expiration = time.time() + self.max_upload_time
        etag = md5()
        elapsed_time = 0
        try:
            with disk_file.create(size=fsize) as writer:
                upload_size = 0

                def timeout_reader():
                    with ChunkReadTimeout(self.client_timeout):
                        return request.environ["wsgi.input"].read(self.network_chunk_size)

                try:
                    for chunk in iter(lambda: timeout_reader(), ""):
                        start_time = time.time()
                        if start_time > upload_expiration:
                            self.logger.increment("PUT.timeouts")
                            return HTTPRequestTimeout(request=request)
                        etag.update(chunk)
                        upload_size = writer.write(chunk)
                        elapsed_time += time.time() - start_time
                except ChunkReadTimeout:
                    return HTTPRequestTimeout(request=request)
                if upload_size:
                    self.logger.transfer_rate("PUT." + device + ".timing", elapsed_time, upload_size)
                if fsize is not None and fsize != upload_size:
                    return HTTPClientDisconnect(request=request)
                etag = etag.hexdigest()
                if "etag" in request.headers and request.headers["etag"].lower() != etag:
                    return HTTPUnprocessableEntity(request=request)
                metadata = {
                    "X-Timestamp": request.timestamp.internal,
                    "Content-Type": request.headers["content-type"],
                    "ETag": etag,
                    "Content-Length": str(upload_size),
                }
                metadata.update(val for val in request.headers.iteritems() if is_sys_or_user_meta("object", val[0]))
                headers_to_copy = request.headers.get("X-Backend-Replication-Headers", "").split() + list(
                    self.allowed_headers
                )
                for header_key in headers_to_copy:
                    if header_key in request.headers:
                        header_caps = header_key.title()
                        metadata[header_caps] = request.headers[header_key]
                writer.put(metadata)
        except (DiskFileXattrNotSupported, DiskFileNoSpace):
            return HTTPInsufficientStorage(drive=device, request=request)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update("PUT", new_delete_at, account, container, obj, request, device, policy_idx)
            if orig_delete_at:
                self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device, policy_idx)
        self.container_update(
            "PUT",
            account,
            container,
            obj,
            request,
            HeaderKeyDict(
                {
                    "x-size": metadata["Content-Length"],
                    "x-content-type": metadata["Content-Type"],
                    "x-timestamp": metadata["X-Timestamp"],
                    "x-etag": metadata["ETag"],
                }
            ),
            device,
            policy_idx,
        )
        return HTTPCreated(request=request, etag=etag)

    @public
    @timing_stats()
    def GET(self, request):
        """Handle HTTP GET requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = get_name_and_placement(request, 5, 5, True)
        keep_cache = self.keep_cache_private or (
            "X-Auth-Token" not in request.headers and "X-Storage-Token" not in request.headers
        )
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj, policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            with disk_file.open():
                metadata = disk_file.get_metadata()
                obj_size = int(metadata["Content-Length"])
                file_x_ts = Timestamp(metadata["X-Timestamp"])
                keep_cache = self.keep_cache_private or (
                    "X-Auth-Token" not in request.headers and "X-Storage-Token" not in request.headers
                )
                response = Response(
                    app_iter=disk_file.reader(keep_cache=keep_cache), request=request, conditional_response=True
                )
                response.headers["Content-Type"] = metadata.get("Content-Type", "application/octet-stream")
                for key, value in metadata.iteritems():
                    if is_sys_or_user_meta("object", key) or key.lower() in self.allowed_headers:
                        response.headers[key] = value
                response.etag = metadata["ETag"]
                response.last_modified = math.ceil(float(file_x_ts))
                response.content_length = obj_size
                try:
                    response.content_encoding = metadata["Content-Encoding"]
                except KeyError:
                    pass
                response.headers["X-Timestamp"] = file_x_ts.normal
                response.headers["X-Backend-Timestamp"] = file_x_ts.internal
                resp = request.get_response(response)
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined) as e:
            headers = {}
            if hasattr(e, "timestamp"):
                headers["X-Backend-Timestamp"] = e.timestamp.internal
            resp = HTTPNotFound(request=request, headers=headers, conditional_response=True)
        return resp

    @public
    @timing_stats(sample_rate=0.8)
    def HEAD(self, request):
        """Handle HTTP HEAD requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = get_name_and_placement(request, 5, 5, True)
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj, policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined) as e:
            headers = {}
            if hasattr(e, "timestamp"):
                headers["X-Backend-Timestamp"] = e.timestamp.internal
            return HTTPNotFound(request=request, headers=headers, conditional_response=True)
        response = Response(request=request, conditional_response=True)
        response.headers["Content-Type"] = metadata.get("Content-Type", "application/octet-stream")
        for key, value in metadata.iteritems():
            if is_sys_or_user_meta("object", key) or key.lower() in self.allowed_headers:
                response.headers[key] = value
        response.etag = metadata["ETag"]
        ts = Timestamp(metadata["X-Timestamp"])
        response.last_modified = math.ceil(float(ts))
        # Needed for container sync feature
        response.headers["X-Timestamp"] = ts.normal
        response.headers["X-Backend-Timestamp"] = ts.internal
        response.content_length = int(metadata["Content-Length"])
        try:
            response.content_encoding = metadata["Content-Encoding"]
        except KeyError:
            pass
        return response

    @public
    @timing_stats()
    def DELETE(self, request):
        """Handle HTTP DELETE requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj, policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except DiskFileExpired as e:
            orig_timestamp = e.timestamp
            orig_metadata = e.metadata
            response_class = HTTPNotFound
        except DiskFileDeleted as e:
            orig_timestamp = e.timestamp
            orig_metadata = {}
            response_class = HTTPNotFound
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_timestamp = 0
            orig_metadata = {}
            response_class = HTTPNotFound
        else:
            orig_timestamp = Timestamp(orig_metadata.get("X-Timestamp", 0))
            if orig_timestamp < req_timestamp:
                response_class = HTTPNoContent
            else:
                response_class = HTTPConflict
        response_timestamp = max(orig_timestamp, req_timestamp)
        orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0)
        try:
            req_if_delete_at_val = request.headers["x-if-delete-at"]
            req_if_delete_at = int(req_if_delete_at_val)
        except KeyError:
            pass
        except ValueError:
            return HTTPBadRequest(request=request, body="Bad X-If-Delete-At header value")
        else:
            # request includes x-if-delete-at; we must not place a tombstone
            # if we can not verify the x-if-delete-at time
            if not orig_timestamp:
                # no object found at all
                return HTTPNotFound()
            if orig_delete_at != req_if_delete_at:
                return HTTPPreconditionFailed(request=request, body="X-If-Delete-At and X-Delete-At do not match")
            else:
                # differentiate success from no object at all
                response_class = HTTPNoContent
        if orig_delete_at:
            self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device, policy_idx)
        if orig_timestamp < req_timestamp:
            disk_file.delete(req_timestamp)
            self.container_update(
                "DELETE",
                account,
                container,
                obj,
                request,
                HeaderKeyDict({"x-timestamp": req_timestamp.internal}),
                device,
                policy_idx,
            )
        return response_class(request=request, headers={"X-Backend-Timestamp": response_timestamp.internal})

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATE(self, request):
        """
        Handle REPLICATE requests for the Swift Object Server.  This is used
        by the object replicator to get hashes for directories.
        """
        device, partition, suffix, policy_idx = get_name_and_placement(request, 2, 3, True)
        try:
            hashes = self._diskfile_mgr.get_hashes(device, partition, suffix, policy_idx)
        except DiskFileDeviceUnavailable:
            resp = HTTPInsufficientStorage(drive=device, request=request)
        else:
            resp = Response(body=pickle.dumps(hashes))
        return resp

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATION(self, request):
        return Response(app_iter=ssync_receiver.Receiver(self, request)())

    def __call__(self, env, start_response):
        """WSGI Application entry point for the Swift Object Server."""
        start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get("x-trans-id", None)

        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body="Invalid UTF8 or contains NULL")
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    if req.method not in self.allowed_methods:
                        raise AttributeError("Not allowed method.")
                except AttributeError:
                    res = HTTPMethodNotAllowed()
                else:
                    method = getattr(self, req.method)
                    res = method(req)
            except DiskFileCollision:
                res = HTTPForbidden(request=req)
            except HTTPException as error_response:
                res = error_response
            except (Exception, Timeout):
                self.logger.exception(
                    _("ERROR __call__ error with %(method)s" " %(path)s "), {"method": req.method, "path": req.path}
                )
                res = HTTPInternalServerError(body=traceback.format_exc())
        trans_time = time.time() - start_time
        if self.log_requests:
            log_line = get_log_line(req, res, trans_time, "")
            if req.method in ("REPLICATE", "REPLICATION") or "X-Backend-Replication" in req.headers:
                self.logger.debug(log_line)
            else:
                self.logger.info(log_line)
        if req.method in ("PUT", "DELETE"):
            slow = self.slow - trans_time
            if slow > 0:
                sleep(slow)

        # To be able to zero-copy send the object, we need a few things.
        # First, we have to be responding successfully to a GET, or else we're
        # not sending the object. Second, we have to be able to extract the
        # socket file descriptor from the WSGI input object. Third, the
        # diskfile has to support zero-copy send.
        #
        # There's a good chance that this could work for 206 responses too,
        # but the common case is sending the whole object, so we'll start
        # there.
        if req.method == "GET" and res.status_int == 200 and isinstance(env["wsgi.input"], wsgi.Input):
            app_iter = getattr(res, "app_iter", None)
            checker = getattr(app_iter, "can_zero_copy_send", None)
            if checker and checker():
                # For any kind of zero-copy thing like sendfile or splice, we
                # need the file descriptor. Eventlet doesn't provide a clean
                # way of getting that, so we resort to this.
                wsock = env["wsgi.input"].get_socket()
                wsockfd = wsock.fileno()

                # Don't call zero_copy_send() until after we force the HTTP
                # headers out of Eventlet and into the socket.
                def zero_copy_iter():
                    # If possible, set TCP_CORK so that headers don't
                    # immediately go on the wire, but instead, wait for some
                    # response body to make the TCP frames as large as
                    # possible (and hence as few packets as possible).
                    #
                    # On non-Linux systems, we might consider TCP_NODELAY, but
                    # since the only known zero-copy-capable diskfile uses
                    # Linux-specific syscalls, we'll defer that work until
                    # someone needs it.
                    if hasattr(socket, "TCP_CORK"):
                        wsock.setsockopt(socket.IPPROTO_TCP, socket.TCP_CORK, 1)
                    yield EventletPlungerString()
                    try:
                        app_iter.zero_copy_send(wsockfd)
                    except Exception:
                        self.logger.exception("zero_copy_send() blew up")
                        raise
                    yield ""

                # Get headers ready to go out
                res(env, start_response)
                return zero_copy_iter()
            else:
                return res(env, start_response)
        else:
            return res(env, start_response)
class RestoreMiddleware(object):
    def __init__(self, app, conf, *args, **kwargs):
        self.app = app
        self.conf = conf
        self.logger = get_logger(self.conf, log_route='restore')
        self._diskfile_mgr = DiskFileManager(conf, self.logger)

    def __call__(self, env, start_response):
        req = Request(env)

        if (req.method == 'PUT') or (req.method == 'POST'):
            if 'X-Object-Meta-S3-Restored' in req.headers:
                return self.save_object(env)(env, start_response)
            if 'X-Object-Meta-S3-Restore' in req.headers:
                return self.set_restoring(env)(env, start_response)
        return self.app(env, start_response)

    def _split_request_path(self, req):
        self.device, self.partition, self.account, self.container, \
            self.obj = split_and_validate_path(req, 5, 5, True)

    def save_object(self, env):
        # Restorer 데몬에 의해 호출됨
        req = Request(env)
        self._split_request_path(req)
        try:
            disk_file = self.get_diskfile(self.device, self.partition,
                                          self.account, self.container,
                                          self.obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=self.device,
                                           request=Request(env))
        ori_meta = disk_file.read_metadata()
        metadata = {}
        metadata.update(val for val in req.headers.iteritems()
                        if is_user_meta('object', val[0]))
        del metadata['X-Object-Meta-S3-Restored']
        # Timestamp 값 유지
        metadata['X-Timestamp'] = ori_meta['X-Timestamp']
        metadata['Content-Type'] = ori_meta['Content-Type']
        fsize = req.message_length()
        etag = md5()
        try:
            with disk_file.create(size=fsize) as writer:
                def timeout_reader():
                    with ChunkReadTimeout(60):
                        return req.environ['wsgi.input'].read(65536)

                try:
                    for chunk in iter(lambda: timeout_reader(), ''):
                        etag.update(chunk)
                        writer.write(chunk)
                except ChunkReadTimeout:
                    return HTTPRequestTimeout(request=req)

                etag = etag.hexdigest()
                metadata['ETag'] = etag
                metadata['Content-Length'] = str(fsize)

                writer.put(metadata)
        except DiskFileNoSpace:
            return HTTPInsufficientStorage(drive=self.device, request=req)

        return HTTPCreated(request=req, etag=etag)

    def set_restoring(self, env):
        # Lifecycle Middleware 에서 restore 중이라고 object 를 설정할 때 호출됨
        req = Request(env)
        self._split_request_path(req)
        try:
            disk_file = self.get_diskfile(self.device, self.partition,
                                          self.account, self.container,
                                          self.obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=self.device,
                                           request=Request(env))
        ori_meta = disk_file.read_metadata()
        metadata = ori_meta
        metadata.update(val for val in req.headers.iteritems()
                        if is_user_meta('object', val[0]))

        # Timestamp 값 유지
        with disk_file.create(size=0) as writer:
            writer.put(metadata)
        return HTTPCreated(request=req, etag=ori_meta['ETag'])

    def get_diskfile(self, device, partition, account, container, obj,
                     **kwargs):
        return self._diskfile_mgr.get_diskfile(device, partition, account,
                                               container, obj, **kwargs)
Beispiel #8
0
class TestAuditor(unittest.TestCase):
    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        self.objects = os.path.join(self.devices, 'sda', 'objects')

        os.mkdir(os.path.join(self.devices, 'sdb'))
        self.objects_2 = os.path.join(self.devices, 'sdb', 'objects')

        os.mkdir(self.objects)
        self.parts = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            os.mkdir(os.path.join(self.objects, part))

        self.conf = dict(devices=self.devices,
                         mount_check='false',
                         object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_object_audit_extra_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            timestamp = str(normalize_timestamp(time.time()))
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

            auditor_worker.object_audit(
                AuditLocation(self.disk_file._datadir, 'sda', '0'))
            self.assertEquals(auditor_worker.quarantines, pre_quarantines)

            os.write(writer._fd, 'extra_data')
            auditor_worker.object_audit(
                AuditLocation(self.disk_file._datadir, 'sda', '0'))
            self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        data = '0' * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update('1' + '0' * 1023)
        etag = etag.hexdigest()
        metadata['ETag'] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        fp = open(path, 'w')
        fp.write('0' * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)

        def blowup(*args):
            raise NameError('tpyo')

        with mock.patch.object(DiskFileManager,
                               'get_diskfile_from_audit_location', blowup):
            self.assertRaises(NameError, auditor_worker.object_audit,
                              AuditLocation(os.path.dirname(path), 'sda', '0'))

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)

        def blowup(*args):
            raise NameError('tpyo')

        with mock.patch('swift.obj.diskfile.DiskFile', blowup):
            auditor_worker.failsafe_object_audit(
                AuditLocation(os.path.dirname(path), 'sda', '0'))
        self.assertEquals(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        with mock.patch('swift.obj.diskfile.DiskFile', lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 1)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob')
        data = '1' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            etag = md5()
            etag.update('1' + '0' * 1023)
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices, 'sda', 'quarantined',
                                       'objects')
        self.auditor.run_once(zero_byte_fps=50)
        self.assertFalse(os.path.isdir(quarantine_path))
        self.auditor.run_once()
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, with_ts=False):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        ts_file_path = ''
        if with_ts:
            name_hash = hash_path('a', 'c', 'o')
            dir_path = os.path.join(self.devices, 'sda',
                                    storage_directory(DATADIR, '0', name_hash))
            ts_file_path = os.path.join(dir_path, '99999.ts')
            if not os.path.exists(dir_path):
                mkdirs(dir_path)
            fp = open(ts_file_path, 'w')
            write_metadata(fp, {'X-Timestamp': '99999', 'name': '/a/c/o'})
            fp.close()

        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': 10,
            }
            writer.put(metadata)
            etag = md5()
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)
        return ts_file_path

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        self.auditor.run_once()
        quarantine_path = os.path.join(self.devices, 'sda', 'quarantined',
                                       'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        self.auditor.run_once(zero_byte_fps=50)
        quarantine_path = os.path.join(self.devices, 'sda', 'quarantined',
                                       'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):
            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        was_df = auditor.diskfile.DiskFile
        try:
            auditor.diskfile.DiskFile = FakeFile
            self.auditor.run_once(zero_byte_fps=50)
            quarantine_path = os.path.join(self.devices, 'sda', 'quarantined',
                                           'objects')
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])
        finally:
            auditor.diskfile.DiskFile = was_df

    def test_with_tombstone(self):
        ts_file_path = self.setup_bad_zero_byte(with_ts=True)
        self.assertTrue(ts_file_path.endswith('ts'))
        self.auditor.run_once()
        self.assertTrue(os.path.exists(ts_file_path))

    def test_sleeper(self):
        auditor.SLEEP_BETWEEN_AUDITS = 0.10
        my_auditor = auditor.ObjectAuditor(self.conf)
        start = time.time()
        my_auditor._sleep()
        delta_t = time.time() - start
        self.assert_(delta_t > 0.08)
        self.assert_(delta_t < 0.12)

    def test_run_forever(self):
        class StopForever(Exception):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            fork_called = 0
            fork_res = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs

            def mock_sleep(self):
                raise StopForever('stop')

            def mock_fork(self):
                self.fork_called += 1
                return self.fork_res

        my_auditor = auditor.ObjectAuditor(
            dict(devices=self.devices,
                 mount_check='false',
                 zero_byte_files_per_second=89))
        mocker = ObjectAuditorMock()
        my_auditor.run_once = mocker.mock_run
        my_auditor._sleep = mocker.mock_sleep
        was_fork = os.fork
        try:
            os.fork = mocker.mock_fork
            self.assertRaises(StopForever,
                              my_auditor.run_forever,
                              zero_byte_fps=50)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 50)
            self.assertEquals(mocker.fork_called, 0)

            self.assertRaises(StopForever, my_auditor.run_forever)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_args, ())

            mocker.fork_res = 1
            self.assertRaises(StopForever, my_auditor.run_forever)
            self.assertEquals(mocker.fork_called, 2)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89)

        finally:
            os.fork = was_fork
Beispiel #9
0
class ObjectController(object):
    """Implements the WSGI application for the Swift Object Server."""

    def __init__(self, conf, logger=None):
        """
        Creates a new WSGI application for the Swift Object Server. An
        example configuration is given at
        <source-dir>/etc/object-server.conf-sample or
        /etc/swift/object-server.conf-sample.
        """
        self.logger = logger or get_logger(conf, log_route="object-server")
        self.node_timeout = int(conf.get("node_timeout", 3))
        self.conn_timeout = float(conf.get("conn_timeout", 0.5))
        self.client_timeout = int(conf.get("client_timeout", 60))
        self.disk_chunk_size = int(conf.get("disk_chunk_size", 65536))
        self.network_chunk_size = int(conf.get("network_chunk_size", 65536))
        self.log_requests = config_true_value(conf.get("log_requests", "true"))
        self.max_upload_time = int(conf.get("max_upload_time", 86400))
        self.slow = int(conf.get("slow", 0))
        self.keep_cache_private = config_true_value(conf.get("keep_cache_private", "false"))
        replication_server = conf.get("replication_server", None)
        if replication_server is not None:
            replication_server = config_true_value(replication_server)
        self.replication_server = replication_server

        default_allowed_headers = """
            content-disposition,
            content-encoding,
            x-delete-at,
            x-object-manifest,
            x-static-large-object,
        """
        extra_allowed_headers = [
            header.strip().lower()
            for header in conf.get("allowed_headers", default_allowed_headers).split(",")
            if header.strip()
        ]
        self.allowed_headers = set()
        for header in extra_allowed_headers:
            if header not in DATAFILE_SYSTEM_META:
                self.allowed_headers.add(header)
        self.expiring_objects_account = (conf.get("auto_create_account_prefix") or ".") + "expiring_objects"
        self.expiring_objects_container_divisor = int(conf.get("expiring_objects_container_divisor") or 86400)
        # Initialization was successful, so now apply the network chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because the primary motivation for this is to optimize how data
        # is written back to the proxy server, we could use the value from the
        # disk_chunk_size parameter. However, it affects all created sockets
        # using this class so we have chosen to tie it to the
        # network_chunk_size parameter value instead.
        socket._fileobject.default_bufsize = self.network_chunk_size

        # Provide further setup sepecific to an object server implemenation.
        self.setup(conf)

    def setup(self, conf):
        """
        Implementation specific setup. This method is called at the very end
        by the constructor to allow a specific implementation to modify
        existing attributes or add its own attributes.

        :param conf: WSGI configuration parameter
        """

        # Common on-disk hierarchy shared across account, container and object
        # servers.
        self._diskfile_mgr = DiskFileManager(conf, self.logger)
        # This is populated by global_conf_callback way below as the semaphore
        # is shared by all workers.
        if "replication_semaphore" in conf:
            # The value was put in a list so it could get past paste
            self.replication_semaphore = conf["replication_semaphore"][0]
        else:
            self.replication_semaphore = None
        self.replication_failure_threshold = int(conf.get("replication_failure_threshold") or 100)
        self.replication_failure_ratio = float(conf.get("replication_failure_ratio") or 1.0)

    def get_diskfile(self, device, partition, account, container, obj, **kwargs):
        """
        Utility method for instantiating a DiskFile object supporting a given
        REST API.

        An implementation of the object server that wants to use a different
        DiskFile class would simply over-ride this method to provide that
        behavior.
        """
        return self._diskfile_mgr.get_diskfile(device, partition, account, container, obj, **kwargs)

    def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice):
        """
        Sends or saves an async update.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param host: host that the container is on
        :param partition: partition that the container is on
        :param contdevice: device name that the container is on
        :param headers_out: dictionary of headers to send in the container
                            request
        :param objdevice: device name that the object is in
        """
        headers_out["user-agent"] = "obj-server %s" % os.getpid()
        full_path = "/%s/%s/%s" % (account, container, obj)
        if all([host, partition, contdevice]):
            try:
                with ConnectionTimeout(self.conn_timeout):
                    ip, port = host.rsplit(":", 1)
                    conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out)
                with Timeout(self.node_timeout):
                    response = conn.getresponse()
                    response.read()
                    if is_success(response.status):
                        return
                    else:
                        self.logger.error(
                            _(
                                "ERROR Container update failed "
                                "(saving for async update later): %(status)d "
                                "response from %(ip)s:%(port)s/%(dev)s"
                            ),
                            {"status": response.status, "ip": ip, "port": port, "dev": contdevice},
                        )
            except (Exception, Timeout):
                self.logger.exception(
                    _("ERROR container update failed with " "%(ip)s:%(port)s/%(dev)s (saving for async update later)"),
                    {"ip": ip, "port": port, "dev": contdevice},
                )
        data = {"op": op, "account": account, "container": container, "obj": obj, "headers": headers_out}
        timestamp = headers_out["x-timestamp"]
        self._diskfile_mgr.pickle_async_update(objdevice, account, container, obj, data, timestamp)

    def container_update(self, op, account, container, obj, request, headers_out, objdevice):
        """
        Update the container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request object driving the update
        :param headers_out: dictionary of headers to send in the container
                            request(s)
        :param objdevice: device name that the object is in
        """
        headers_in = request.headers
        conthosts = [h.strip() for h in headers_in.get("X-Container-Host", "").split(",")]
        contdevices = [d.strip() for d in headers_in.get("X-Container-Device", "").split(",")]
        contpartition = headers_in.get("X-Container-Partition", "")

        if len(conthosts) != len(contdevices):
            # This shouldn't happen unless there's a bug in the proxy,
            # but if there is, we want to know about it.
            self.logger.error(
                _(
                    "ERROR Container update failed: different  "
                    "numbers of hosts and devices in request: "
                    '"%s" vs "%s"'
                )
                % (headers_in.get("X-Container-Host", ""), headers_in.get("X-Container-Device", ""))
            )
            return

        if contpartition:
            updates = zip(conthosts, contdevices)
        else:
            updates = []

        headers_out["x-trans-id"] = headers_in.get("x-trans-id", "-")
        headers_out["referer"] = request.as_referer()
        for conthost, contdevice in updates:
            self.async_update(op, account, container, obj, conthost, contpartition, contdevice, headers_out, objdevice)

    def delete_at_update(self, op, delete_at, account, container, obj, request, objdevice):
        """
        Update the expiring objects container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param delete_at: scheduled delete in UNIX seconds, int
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request driving the update
        :param objdevice: device name that the object is in
        """
        if config_true_value(request.headers.get("x-backend-replication", "f")):
            return
        delete_at = normalize_delete_at_timestamp(delete_at)
        updates = [(None, None)]

        partition = None
        hosts = contdevices = [None]
        headers_in = request.headers
        headers_out = HeaderKeyDict(
            {
                "x-timestamp": headers_in["x-timestamp"],
                "x-trans-id": headers_in.get("x-trans-id", "-"),
                "referer": request.as_referer(),
            }
        )
        if op != "DELETE":
            delete_at_container = headers_in.get("X-Delete-At-Container", None)
            if not delete_at_container:
                self.logger.warning(
                    "X-Delete-At-Container header must be specified for "
                    "expiring objects background %s to work properly. Making "
                    "best guess as to the container name for now." % op
                )
                # TODO(gholt): In a future release, change the above warning to
                # a raised exception and remove the guess code below.
                delete_at_container = (
                    int(delete_at) / self.expiring_objects_container_divisor * self.expiring_objects_container_divisor
                )
            partition = headers_in.get("X-Delete-At-Partition", None)
            hosts = headers_in.get("X-Delete-At-Host", "")
            contdevices = headers_in.get("X-Delete-At-Device", "")
            updates = [
                upd
                for upd in zip((h.strip() for h in hosts.split(",")), (c.strip() for c in contdevices.split(",")))
                if all(upd) and partition
            ]
            if not updates:
                updates = [(None, None)]
            headers_out["x-size"] = "0"
            headers_out["x-content-type"] = "text/plain"
            headers_out["x-etag"] = "d41d8cd98f00b204e9800998ecf8427e"
        else:
            # DELETEs of old expiration data have no way of knowing what the
            # old X-Delete-At-Container was at the time of the initial setting
            # of the data, so a best guess is made here.
            # Worst case is a DELETE is issued now for something that doesn't
            # exist there and the original data is left where it is, where
            # it will be ignored when the expirer eventually tries to issue the
            # object DELETE later since the X-Delete-At value won't match up.
            delete_at_container = str(
                int(delete_at) / self.expiring_objects_container_divisor * self.expiring_objects_container_divisor
            )
        delete_at_container = normalize_delete_at_timestamp(delete_at_container)

        for host, contdevice in updates:
            self.async_update(
                op,
                self.expiring_objects_account,
                delete_at_container,
                "%s-%s/%s/%s" % (delete_at, account, container, obj),
                host,
                partition,
                contdevice,
                headers_out,
                objdevice,
            )

    @public
    @timing_stats()
    def POST(self, request):
        """Handle HTTP POST requests for the Swift Object Server."""
        device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True)

        if "x-timestamp" not in request.headers or not check_float(request.headers["x-timestamp"]):
            return HTTPBadRequest(body="Missing timestamp", request=request, content_type="text/plain")
        new_delete_at = int(request.headers.get("X-Delete-At") or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body="X-Delete-At in past", request=request, content_type="text/plain")
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request)
        orig_timestamp = orig_metadata.get("X-Timestamp", "0")
        if orig_timestamp >= request.headers["x-timestamp"]:
            return HTTPConflict(request=request)
        metadata = {"X-Timestamp": request.headers["x-timestamp"]}
        metadata.update(val for val in request.headers.iteritems() if is_user_meta("object", val[0]))
        for header_key in self.allowed_headers:
            if header_key in request.headers:
                header_caps = header_key.title()
                metadata[header_caps] = request.headers[header_key]
        orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update("PUT", new_delete_at, account, container, obj, request, device)
            if orig_delete_at:
                self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device)
        disk_file.write_metadata(metadata)
        return HTTPAccepted(request=request)

    @public
    @timing_stats()
    def PUT(self, request):
        """Handle HTTP PUT requests for the Swift Object Server."""
        device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True)

        if "x-timestamp" not in request.headers or not check_float(request.headers["x-timestamp"]):
            return HTTPBadRequest(body="Missing timestamp", request=request, content_type="text/plain")
        error_response = check_object_creation(request, obj)
        if error_response:
            return error_response
        new_delete_at = int(request.headers.get("X-Delete-At") or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body="X-Delete-At in past", request=request, content_type="text/plain")
        try:
            fsize = request.message_length()
        except ValueError as e:
            return HTTPBadRequest(body=str(e), request=request, content_type="text/plain")
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_metadata = {}
        orig_timestamp = orig_metadata.get("X-Timestamp")
        if orig_timestamp and orig_timestamp >= request.headers["x-timestamp"]:
            return HTTPConflict(request=request)
        orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0)
        upload_expiration = time.time() + self.max_upload_time
        etag = md5()
        elapsed_time = 0
        try:
            with disk_file.create(size=fsize) as writer:
                upload_size = 0
                reader = request.environ["wsgi.input"].read
                for chunk in iter(lambda: reader(self.network_chunk_size), ""):
                    start_time = time.time()
                    if start_time > upload_expiration:
                        self.logger.increment("PUT.timeouts")
                        return HTTPRequestTimeout(request=request)
                    etag.update(chunk)
                    upload_size = writer.write(chunk)
                    elapsed_time += time.time() - start_time
                if upload_size:
                    self.logger.transfer_rate("PUT." + device + ".timing", elapsed_time, upload_size)
                if fsize is not None and fsize != upload_size:
                    return HTTPClientDisconnect(request=request)
                etag = etag.hexdigest()
                if "etag" in request.headers and request.headers["etag"].lower() != etag:
                    return HTTPUnprocessableEntity(request=request)
                metadata = {
                    "X-Timestamp": request.headers["x-timestamp"],
                    "Content-Type": request.headers["content-type"],
                    "ETag": etag,
                    "Content-Length": str(upload_size),
                }
                metadata.update(val for val in request.headers.iteritems() if is_user_meta("object", val[0]))
                for header_key in request.headers.get("X-Backend-Replication-Headers") or self.allowed_headers:
                    if header_key in request.headers:
                        header_caps = header_key.title()
                        metadata[header_caps] = request.headers[header_key]
                writer.put(metadata)
        except DiskFileNoSpace:
            return HTTPInsufficientStorage(drive=device, request=request)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update("PUT", new_delete_at, account, container, obj, request, device)
            if orig_delete_at:
                self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device)
        self.container_update(
            "PUT",
            account,
            container,
            obj,
            request,
            HeaderKeyDict(
                {
                    "x-size": metadata["Content-Length"],
                    "x-content-type": metadata["Content-Type"],
                    "x-timestamp": metadata["X-Timestamp"],
                    "x-etag": metadata["ETag"],
                }
            ),
            device,
        )
        return HTTPCreated(request=request, etag=etag)

    @public
    @timing_stats()
    def GET(self, request):
        """Handle HTTP GET requests for the Swift Object Server."""
        device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True)
        keep_cache = self.keep_cache_private or (
            "X-Auth-Token" not in request.headers and "X-Storage-Token" not in request.headers
        )
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            with disk_file.open():
                metadata = disk_file.get_metadata()
                obj_size = int(metadata["Content-Length"])
                if request.headers.get("if-match") not in (None, "*") and metadata["ETag"] not in request.if_match:
                    return HTTPPreconditionFailed(request=request)
                if request.headers.get("if-none-match") is not None:
                    if metadata["ETag"] in request.if_none_match:
                        resp = HTTPNotModified(request=request)
                        resp.etag = metadata["ETag"]
                        return resp
                file_x_ts = metadata["X-Timestamp"]
                file_x_ts_flt = float(file_x_ts)
                try:
                    if_unmodified_since = request.if_unmodified_since
                except (OverflowError, ValueError):
                    # catches timestamps before the epoch
                    return HTTPPreconditionFailed(request=request)
                file_x_ts_utc = datetime.fromtimestamp(file_x_ts_flt, UTC)
                if if_unmodified_since and file_x_ts_utc > if_unmodified_since:
                    return HTTPPreconditionFailed(request=request)
                try:
                    if_modified_since = request.if_modified_since
                except (OverflowError, ValueError):
                    # catches timestamps before the epoch
                    return HTTPPreconditionFailed(request=request)
                if if_modified_since and file_x_ts_utc <= if_modified_since:
                    return HTTPNotModified(request=request)
                keep_cache = self.keep_cache_private or (
                    "X-Auth-Token" not in request.headers and "X-Storage-Token" not in request.headers
                )
                response = Response(
                    app_iter=disk_file.reader(keep_cache=keep_cache), request=request, conditional_response=True
                )
                response.headers["Content-Type"] = metadata.get("Content-Type", "application/octet-stream")
                for key, value in metadata.iteritems():
                    if is_user_meta("object", key) or key.lower() in self.allowed_headers:
                        response.headers[key] = value
                response.etag = metadata["ETag"]
                response.last_modified = math.ceil(file_x_ts_flt)
                response.content_length = obj_size
                try:
                    response.content_encoding = metadata["Content-Encoding"]
                except KeyError:
                    pass
                response.headers["X-Timestamp"] = file_x_ts
                resp = request.get_response(response)
        except DiskFileNotExist:
            if request.headers.get("if-match") == "*":
                resp = HTTPPreconditionFailed(request=request)
            else:
                resp = HTTPNotFound(request=request)
        except DiskFileQuarantined:
            resp = HTTPNotFound(request=request)
        return resp

    @public
    @timing_stats(sample_rate=0.8)
    def HEAD(self, request):
        """Handle HTTP HEAD requests for the Swift Object Server."""
        device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True)
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request)
        response = Response(request=request, conditional_response=True)
        response.headers["Content-Type"] = metadata.get("Content-Type", "application/octet-stream")
        for key, value in metadata.iteritems():
            if is_user_meta("object", key) or key.lower() in self.allowed_headers:
                response.headers[key] = value
        response.etag = metadata["ETag"]
        ts = metadata["X-Timestamp"]
        response.last_modified = math.ceil(float(ts))
        # Needed for container sync feature
        response.headers["X-Timestamp"] = ts
        response.content_length = int(metadata["Content-Length"])
        try:
            response.content_encoding = metadata["Content-Encoding"]
        except KeyError:
            pass
        return response

    @public
    @timing_stats()
    def DELETE(self, request):
        """Handle HTTP DELETE requests for the Swift Object Server."""
        device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True)
        if "x-timestamp" not in request.headers or not check_float(request.headers["x-timestamp"]):
            return HTTPBadRequest(body="Missing timestamp", request=request, content_type="text/plain")
        try:
            disk_file = self.get_diskfile(device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileExpired as e:
            orig_timestamp = e.timestamp
            orig_metadata = e.metadata
            response_class = HTTPNotFound
        except DiskFileDeleted as e:
            orig_timestamp = e.timestamp
            orig_metadata = {}
            response_class = HTTPNotFound
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_timestamp = 0
            orig_metadata = {}
            response_class = HTTPNotFound
        else:
            orig_timestamp = orig_metadata.get("X-Timestamp", 0)
            if orig_timestamp < request.headers["x-timestamp"]:
                response_class = HTTPNoContent
            else:
                response_class = HTTPConflict
        orig_delete_at = int(orig_metadata.get("X-Delete-At") or 0)
        try:
            req_if_delete_at_val = request.headers["x-if-delete-at"]
            req_if_delete_at = int(req_if_delete_at_val)
        except KeyError:
            pass
        except ValueError:
            return HTTPBadRequest(request=request, body="Bad X-If-Delete-At header value")
        else:
            if orig_delete_at != req_if_delete_at:
                return HTTPPreconditionFailed(request=request, body="X-If-Delete-At and X-Delete-At do not match")
        if orig_delete_at:
            self.delete_at_update("DELETE", orig_delete_at, account, container, obj, request, device)
        req_timestamp = request.headers["X-Timestamp"]
        if orig_timestamp < req_timestamp:
            disk_file.delete(req_timestamp)
            self.container_update(
                "DELETE", account, container, obj, request, HeaderKeyDict({"x-timestamp": req_timestamp}), device
            )
        return response_class(request=request)

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATE(self, request):
        """
        Handle REPLICATE requests for the Swift Object Server.  This is used
        by the object replicator to get hashes for directories.
        """
        device, partition, suffix = split_and_validate_path(request, 2, 3, True)
        try:
            hashes = self._diskfile_mgr.get_hashes(device, partition, suffix)
        except DiskFileDeviceUnavailable:
            resp = HTTPInsufficientStorage(drive=device, request=request)
        else:
            resp = Response(body=pickle.dumps(hashes))
        return resp

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATION(self, request):
        return Response(app_iter=ssync_receiver.Receiver(self, request)())

    def __call__(self, env, start_response):
        """WSGI Application entry point for the Swift Object Server."""
        start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get("x-trans-id", None)

        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body="Invalid UTF8 or contains NULL")
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    method = getattr(self, req.method)
                    getattr(method, "publicly_accessible")
                    replication_method = getattr(method, "replication", False)
                    if self.replication_server is not None and self.replication_server != replication_method:
                        raise AttributeError("Not allowed method.")
                except AttributeError:
                    res = HTTPMethodNotAllowed()
                else:
                    res = method(req)
            except DiskFileCollision:
                res = HTTPForbidden(request=req)
            except HTTPException as error_response:
                res = error_response
            except (Exception, Timeout):
                self.logger.exception(
                    _("ERROR __call__ error with %(method)s" " %(path)s "), {"method": req.method, "path": req.path}
                )
                res = HTTPInternalServerError(body=traceback.format_exc())
        trans_time = time.time() - start_time
        if self.log_requests:
            log_line = '%s - - [%s] "%s %s" %s %s "%s" "%s" "%s" %.4f' % (
                req.remote_addr,
                time.strftime("%d/%b/%Y:%H:%M:%S +0000", time.gmtime()),
                req.method,
                req.path,
                res.status.split()[0],
                res.content_length or "-",
                req.referer or "-",
                req.headers.get("x-trans-id", "-"),
                req.user_agent or "-",
                trans_time,
            )
            if req.method in ("REPLICATE", "REPLICATION") or "X-Backend-Replication" in req.headers:
                self.logger.debug(log_line)
            else:
                self.logger.info(log_line)
        if req.method in ("PUT", "DELETE"):
            slow = self.slow - trans_time
            if slow > 0:
                sleep(slow)
        return res(env, start_response)
Beispiel #10
0
class ObjectController(BaseStorageServer):
    """Implements the WSGI application for the Swift Object Server."""

    server_type = 'object-server'

    def __init__(self, conf, logger=None):
        """
        Creates a new WSGI application for the Swift Object Server. An
        example configuration is given at
        <source-dir>/etc/object-server.conf-sample or
        /etc/swift/object-server.conf-sample.
        """
        super(ObjectController, self).__init__(conf)
        self.logger = logger or get_logger(conf, log_route='object-server')
        self.node_timeout = int(conf.get('node_timeout', 3))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.client_timeout = int(conf.get('client_timeout', 60))
        self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536))
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.log_requests = config_true_value(conf.get('log_requests', 'true'))
        self.max_upload_time = int(conf.get('max_upload_time', 86400))
        self.slow = int(conf.get('slow', 0))
        self.keep_cache_private = \
            config_true_value(conf.get('keep_cache_private', 'false'))

        default_allowed_headers = '''
            content-disposition,
            content-encoding,
            x-delete-at,
            x-object-manifest,
            x-static-large-object,
        '''
        extra_allowed_headers = [
            header.strip().lower() for header in conf.get(
                'allowed_headers', default_allowed_headers).split(',')
            if header.strip()
        ]
        self.allowed_headers = set()
        for header in extra_allowed_headers:
            if header not in DATAFILE_SYSTEM_META:
                self.allowed_headers.add(header)
        self.auto_create_account_prefix = \
            conf.get('auto_create_account_prefix') or '.'
        self.expiring_objects_account = self.auto_create_account_prefix + \
            (conf.get('expiring_objects_account_name') or 'expiring_objects')
        self.expiring_objects_container_divisor = \
            int(conf.get('expiring_objects_container_divisor') or 86400)
        # Initialization was successful, so now apply the network chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because the primary motivation for this is to optimize how data
        # is written back to the proxy server, we could use the value from the
        # disk_chunk_size parameter. However, it affects all created sockets
        # using this class so we have chosen to tie it to the
        # network_chunk_size parameter value instead.
        socket._fileobject.default_bufsize = self.network_chunk_size

        # Provide further setup specific to an object server implementation.
        self.setup(conf)

    def setup(self, conf):
        """
        Implementation specific setup. This method is called at the very end
        by the constructor to allow a specific implementation to modify
        existing attributes or add its own attributes.

        :param conf: WSGI configuration parameter
        """

        # Common on-disk hierarchy shared across account, container and object
        # servers.
        self._diskfile_mgr = DiskFileManager(conf, self.logger)
        # This is populated by global_conf_callback way below as the semaphore
        # is shared by all workers.
        if 'replication_semaphore' in conf:
            # The value was put in a list so it could get past paste
            self.replication_semaphore = conf['replication_semaphore'][0]
        else:
            self.replication_semaphore = None
        self.replication_failure_threshold = int(
            conf.get('replication_failure_threshold') or 100)
        self.replication_failure_ratio = float(
            conf.get('replication_failure_ratio') or 1.0)

    def get_diskfile(self, device, partition, account, container, obj,
                     policy_idx, **kwargs):
        """
        Utility method for instantiating a DiskFile object supporting a given
        REST API.

        An implementation of the object server that wants to use a different
        DiskFile class would simply over-ride this method to provide that
        behavior.
        """
        return self._diskfile_mgr.get_diskfile(
            device, partition, account, container, obj, policy_idx, **kwargs)

    def async_update(self, op, account, container, obj, host, partition,
                     contdevice, headers_out, objdevice, policy_index):
        """
        Sends or saves an async update.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param host: host that the container is on
        :param partition: partition that the container is on
        :param contdevice: device name that the container is on
        :param headers_out: dictionary of headers to send in the container
                            request
        :param objdevice: device name that the object is in
        :param policy_index: the associated storage policy index
        """
        headers_out['user-agent'] = 'object-server %s' % os.getpid()
        full_path = '/%s/%s/%s' % (account, container, obj)
        if all([host, partition, contdevice]):
            try:
                with ConnectionTimeout(self.conn_timeout):
                    ip, port = host.rsplit(':', 1)
                    conn = http_connect(ip, port, contdevice, partition, op,
                                        full_path, headers_out)
                with Timeout(self.node_timeout):
                    response = conn.getresponse()
                    response.read()
                    if is_success(response.status):
                        return
                    else:
                        self.logger.error(_(
                            'ERROR Container update failed '
                            '(saving for async update later): %(status)d '
                            'response from %(ip)s:%(port)s/%(dev)s'),
                            {'status': response.status, 'ip': ip, 'port': port,
                             'dev': contdevice})
            except (Exception, Timeout):
                self.logger.exception(_(
                    'ERROR container update failed with '
                    '%(ip)s:%(port)s/%(dev)s (saving for async update later)'),
                    {'ip': ip, 'port': port, 'dev': contdevice})
        data = {'op': op, 'account': account, 'container': container,
                'obj': obj, 'headers': headers_out}
        timestamp = headers_out['x-timestamp']
        self._diskfile_mgr.pickle_async_update(objdevice, account, container,
                                               obj, data, timestamp,
                                               policy_index)

    def container_update(self, op, account, container, obj, request,
                         headers_out, objdevice, policy_idx):
        """
        Update the container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request object driving the update
        :param headers_out: dictionary of headers to send in the container
                            request(s)
        :param objdevice: device name that the object is in
        """
        headers_in = request.headers
        conthosts = [h.strip() for h in
                     headers_in.get('X-Container-Host', '').split(',')]
        contdevices = [d.strip() for d in
                       headers_in.get('X-Container-Device', '').split(',')]
        contpartition = headers_in.get('X-Container-Partition', '')

        if len(conthosts) != len(contdevices):
            # This shouldn't happen unless there's a bug in the proxy,
            # but if there is, we want to know about it.
            self.logger.error(_('ERROR Container update failed: different '
                                'numbers of hosts and devices in request: '
                                '"%s" vs "%s"') %
                               (headers_in.get('X-Container-Host', ''),
                                headers_in.get('X-Container-Device', '')))
            return

        if contpartition:
            updates = zip(conthosts, contdevices)
        else:
            updates = []

        headers_out['x-trans-id'] = headers_in.get('x-trans-id', '-')
        headers_out['referer'] = request.as_referer()
        headers_out['X-Backend-Storage-Policy-Index'] = policy_idx
        for conthost, contdevice in updates:
            self.async_update(op, account, container, obj, conthost,
                              contpartition, contdevice, headers_out,
                              objdevice, policy_idx)

    def delete_at_update(self, op, delete_at, account, container, obj,
                         request, objdevice, policy_index):
        """
        Update the expiring objects container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param delete_at: scheduled delete in UNIX seconds, int
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request driving the update
        :param objdevice: device name that the object is in
        :param policy_index: the policy index to be used for tmp dir
        """
        if config_true_value(
                request.headers.get('x-backend-replication', 'f')):
            return
        delete_at = normalize_delete_at_timestamp(delete_at)
        updates = [(None, None)]

        partition = None
        hosts = contdevices = [None]
        headers_in = request.headers
        headers_out = HeaderKeyDict({
            # system accounts are always Policy-0
            'X-Backend-Storage-Policy-Index': 0,
            'x-timestamp': request.timestamp.internal,
            'x-trans-id': headers_in.get('x-trans-id', '-'),
            'referer': request.as_referer()})
        if op != 'DELETE':
            delete_at_container = headers_in.get('X-Delete-At-Container', None)
            if not delete_at_container:
                self.logger.warning(
                    'X-Delete-At-Container header must be specified for '
                    'expiring objects background %s to work properly. Making '
                    'best guess as to the container name for now.' % op)
                # TODO(gholt): In a future release, change the above warning to
                # a raised exception and remove the guess code below.
                delete_at_container = get_expirer_container(
                    delete_at, self.expiring_objects_container_divisor,
                    account, container, obj)
            partition = headers_in.get('X-Delete-At-Partition', None)
            hosts = headers_in.get('X-Delete-At-Host', '')
            contdevices = headers_in.get('X-Delete-At-Device', '')
            updates = [upd for upd in
                       zip((h.strip() for h in hosts.split(',')),
                           (c.strip() for c in contdevices.split(',')))
                       if all(upd) and partition]
            if not updates:
                updates = [(None, None)]
            headers_out['x-size'] = '0'
            headers_out['x-content-type'] = 'text/plain'
            headers_out['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e'
        else:
            # DELETEs of old expiration data have no way of knowing what the
            # old X-Delete-At-Container was at the time of the initial setting
            # of the data, so a best guess is made here.
            # Worst case is a DELETE is issued now for something that doesn't
            # exist there and the original data is left where it is, where
            # it will be ignored when the expirer eventually tries to issue the
            # object DELETE later since the X-Delete-At value won't match up.
            delete_at_container = get_expirer_container(
                delete_at, self.expiring_objects_container_divisor,
                account, container, obj)
        delete_at_container = normalize_delete_at_timestamp(
            delete_at_container)

        for host, contdevice in updates:
            self.async_update(
                op, self.expiring_objects_account, delete_at_container,
                '%s-%s/%s/%s' % (delete_at, account, container, obj),
                host, partition, contdevice, headers_out, objdevice,
                policy_index)

    @public
    @timing_stats()
    def POST(self, request):
        """Handle HTTP POST requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body='X-Delete-At in past', request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
                policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request)
        orig_timestamp = Timestamp(orig_metadata.get('X-Timestamp', 0))
        if orig_timestamp >= req_timestamp:
            return HTTPConflict(
                request=request,
                headers={'X-Backend-Timestamp': orig_timestamp.internal})
        metadata = {'X-Timestamp': req_timestamp.internal}
        metadata.update(val for val in request.headers.iteritems()
                        if is_user_meta('object', val[0]))
        for header_key in self.allowed_headers:
            if header_key in request.headers:
                header_caps = header_key.title()
                metadata[header_caps] = request.headers[header_key]
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update('PUT', new_delete_at, account, container,
                                      obj, request, device, policy_idx)
            if orig_delete_at:
                self.delete_at_update('DELETE', orig_delete_at, account,
                                      container, obj, request, device,
                                      policy_idx)
        try:
            disk_file.write_metadata(metadata)
        except (DiskFileXattrNotSupported, DiskFileNoSpace):
            return HTTPInsufficientStorage(drive=device, request=request)
        return HTTPAccepted(request=request)

    @public
    @timing_stats()
    def PUT(self, request):
        """Handle HTTP PUT requests for the Swift Object Server."""
        a=time.time()
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("A = "+str(request.path)+"="+str(datetime.now())+"***")
        #with open("/home/ubuntu/spawn.txt", "a") as tran_file:
        #    tran_file.write("At PUT  Datetime ="+str(datetime.now())+" \n")
	device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        error_response = check_object_creation(request, obj)
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("B = "+str(request.path)+"="+str(datetime.now())+"***")
        if error_response:
            return error_response
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body='X-Delete-At in past', request=request,
                                  content_type='text/plain')
        try:
            fsize = request.message_length()
        except ValueError as e:
            return HTTPBadRequest(body=str(e), request=request,
                                  content_type='text/plain')
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("C = "+str(request.path)+"="+str(datetime.now())+"***")
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
                policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("D = "+str(request.path)+"="+str(datetime.now())+"***")
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_metadata = {}
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("E = "+str(request.path)+"="+str(datetime.now())+"***")

        # Checks for If-None-Match
        if request.if_none_match is not None and orig_metadata:
            if '*' in request.if_none_match:
                # File exists already so return 412
                return HTTPPreconditionFailed(request=request)
            if orig_metadata.get('ETag') in request.if_none_match:
                # The current ETag matches, so return 412
                return HTTPPreconditionFailed(request=request)

        orig_timestamp = Timestamp(orig_metadata.get('X-Timestamp', 0))
        if orig_timestamp >= req_timestamp:
            return HTTPConflict(
                request=request,
                headers={'X-Backend-Timestamp': orig_timestamp.internal})
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        upload_expiration = time.time() + self.max_upload_time
        etag = md5()
        elapsed_time = 0
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("F = "+str(request.path)+"="+str(datetime.now())+"***")
        try:
            with disk_file.create(size=fsize) as writer:
                upload_size = 0

                def timeout_reader():
                    with ChunkReadTimeout(self.client_timeout):
                        return request.environ['wsgi.input'].read(
                            self.network_chunk_size)

                try:
                    for chunk in iter(lambda: timeout_reader(), ''):
                        start_time = time.time()
                        if start_time > upload_expiration:
                            self.logger.increment('PUT.timeouts')
                            return HTTPRequestTimeout(request=request)
                        etag.update(chunk)
                        upload_size = writer.write(chunk)
                        elapsed_time += time.time() - start_time
                except ChunkReadTimeout:
                    return HTTPRequestTimeout(request=request)
                #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
                #   tran_file.write("G = "+str(request.path)+"="+str(datetime.now())+"***")
                if upload_size:
                    self.logger.transfer_rate(
                        'PUT.' + device + '.timing', elapsed_time,
                        upload_size)
                if fsize is not None and fsize != upload_size:
                    return HTTPClientDisconnect(request=request)
                etag = etag.hexdigest()
                if 'etag' in request.headers and \
                        request.headers['etag'].lower() != etag:
                    return HTTPUnprocessableEntity(request=request)
                metadata = {
                    'X-Timestamp': request.timestamp.internal,
                    'Content-Type': request.headers['content-type'],
                    'ETag': etag,
                    'Content-Length': str(upload_size),
                }
                metadata.update(val for val in request.headers.iteritems()
                                if is_sys_or_user_meta('object', val[0]))
                headers_to_copy = (
                    request.headers.get(
                        'X-Backend-Replication-Headers', '').split() +
                    list(self.allowed_headers))
                for header_key in headers_to_copy:
                    if header_key in request.headers:
                        header_caps = header_key.title()
                        metadata[header_caps] = request.headers[header_key]
                #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
                #    tran_file.write("H = "+str(request.path)+"="+str(datetime.now())+"***")
                writer.put(metadata)
                #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
                #    tran_file.write("I = "+str(request.path)+"="+str(datetime.now())+"***")
        except (DiskFileXattrNotSupported, DiskFileNoSpace):
            return HTTPInsufficientStorage(drive=device, request=request)
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("J = "+str(request.path)+"="+str(datetime.now())+"***")
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update(
                    'PUT', new_delete_at, account, container, obj, request,
                    device, policy_idx)
            if orig_delete_at:
                self.delete_at_update(
                    'DELETE', orig_delete_at, account, container, obj,
                    request, device, policy_idx)
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("K = "+str(request.path)+"="+str(datetime.now())+"***")
        self.container_update(
            'PUT', account, container, obj, request,
            HeaderKeyDict({
                'x-size': metadata['Content-Length'],
                'x-content-type': metadata['Content-Type'],
                'x-timestamp': metadata['X-Timestamp'],
                'x-etag': metadata['ETag']}),
            device, policy_idx)
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("L = "+str(request.path)+"="+str(datetime.now())+"***")
	#with open("/home/ubuntu/obj_create.txt", "a") as tran_file:
        #    tran_file.write("Started = "+str(datetime.now())+"\n")
	#a=2
	#num=22227727
	#for a in range(a, num):
    	#    if a % num == 0:
        #	print('not prime')
        #	break
	#else: # loop not exited via break
   	#    print('prime')
	#time.sleep(0.1)
        #with open("/home/ubuntu/obj_create.txt", "a") as tran_file:
        #    tran_file.write("Stopped = "+str(datetime.now())+"\n")
        b=time.time()-a
        #with open("/home/ubuntu/WSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("Total PUT duration of obj = "+str(request.path)+"="+str(b)+"="+str(datetime.now())+"\n")
        return HTTPCreated(request=request, etag=etag)

    @public
    @timing_stats()
    def GET(self, request):
        """Handle HTTP GET requests for the Swift Object Server."""
	#with open("/home/ubuntu/spawn-read.txt", "a") as tran_file:
    	#    tran_file.write("At Read Start Datetime = "+str(datetime.now())+"\n")
        a=time.time()
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        keep_cache = self.keep_cache_private or (
            'X-Auth-Token' not in request.headers and
            'X-Storage-Token' not in request.headers)
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
                policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            with disk_file.open():
                metadata = disk_file.get_metadata()
                obj_size = int(metadata['Content-Length'])
                file_x_ts = Timestamp(metadata['X-Timestamp'])
                keep_cache = (self.keep_cache_private or
                              ('X-Auth-Token' not in request.headers and
                               'X-Storage-Token' not in request.headers))
                response = Response(
                    app_iter=disk_file.reader(keep_cache=keep_cache),
                    request=request, conditional_response=True)
                response.headers['Content-Type'] = metadata.get(
                    'Content-Type', 'application/octet-stream')
                for key, value in metadata.iteritems():
                    if is_sys_or_user_meta('object', key) or \
                            key.lower() in self.allowed_headers:
                        response.headers[key] = value
                response.etag = metadata['ETag']
                response.last_modified = math.ceil(float(file_x_ts))
                response.content_length = obj_size
                try:
                    response.content_encoding = metadata[
                        'Content-Encoding']
                except KeyError:
                    pass
                response.headers['X-Timestamp'] = file_x_ts.normal
                response.headers['X-Backend-Timestamp'] = file_x_ts.internal
                resp = request.get_response(response)
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined) as e:
            headers = {}
            if hasattr(e, 'timestamp'):
                headers['X-Backend-Timestamp'] = e.timestamp.internal
            resp = HTTPNotFound(request=request, headers=headers,
                                conditional_response=True)
        #with open("/home/ubuntu/spawn-read.txt", "a") as tran_file:
    	#    tran_file.write("At Read Delay start  Datetime = "+str(datetime.now())+"\n")
	#a=2
	#num=22227727
	#for a in range(a, num):
    	#    if a % num == 0:
        #	print('not prime')
        #	break
	#    else: # loop not exited via break
    	#	print('prime')
	#time.sleep(2)
	#with open("/home/ubuntu/spawn-read.txt", "a") as tran_file:
        #    tran_file.write("At Read End  Datetime = "+str(datetime.now())+"\n")
        b=time.time()-a
        #with open("/home/ubuntu/RSTORAGE.txt", "a") as tran_file:
        #    tran_file.write("Total GET duration of obj = "+str(request.path)+"="+str(b)+"="+str(datetime.now())+"\n")
	return resp

    @public
    @timing_stats(sample_rate=0.8)
    def HEAD(self, request):
        """Handle HTTP HEAD requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
                policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except (DiskFileNotExist, DiskFileQuarantined) as e:
            headers = {}
            if hasattr(e, 'timestamp'):
                headers['X-Backend-Timestamp'] = e.timestamp.internal
            return HTTPNotFound(request=request, headers=headers,
                                conditional_response=True)
        response = Response(request=request, conditional_response=True)
        response.headers['Content-Type'] = metadata.get(
            'Content-Type', 'application/octet-stream')
        for key, value in metadata.iteritems():
            if is_sys_or_user_meta('object', key) or \
                    key.lower() in self.allowed_headers:
                response.headers[key] = value
        response.etag = metadata['ETag']
        ts = Timestamp(metadata['X-Timestamp'])
        response.last_modified = math.ceil(float(ts))
        # Needed for container sync feature
        response.headers['X-Timestamp'] = ts.normal
        response.headers['X-Backend-Timestamp'] = ts.internal
        response.content_length = int(metadata['Content-Length'])
        try:
            response.content_encoding = metadata['Content-Encoding']
        except KeyError:
            pass
        return response

    @public
    @timing_stats()
    def DELETE(self, request):
        """Handle HTTP DELETE requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
                policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileXattrNotSupported:
            return HTTPInsufficientStorage(drive=device, request=request)
        except DiskFileExpired as e:
            orig_timestamp = e.timestamp
            orig_metadata = e.metadata
            response_class = HTTPNotFound
        except DiskFileDeleted as e:
            orig_timestamp = e.timestamp
            orig_metadata = {}
            response_class = HTTPNotFound
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_timestamp = 0
            orig_metadata = {}
            response_class = HTTPNotFound
        else:
            orig_timestamp = Timestamp(orig_metadata.get('X-Timestamp', 0))
            if orig_timestamp < req_timestamp:
                response_class = HTTPNoContent
            else:
                response_class = HTTPConflict
        response_timestamp = max(orig_timestamp, req_timestamp)
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        try:
            req_if_delete_at_val = request.headers['x-if-delete-at']
            req_if_delete_at = int(req_if_delete_at_val)
        except KeyError:
            pass
        except ValueError:
            return HTTPBadRequest(
                request=request,
                body='Bad X-If-Delete-At header value')
        else:
            # request includes x-if-delete-at; we must not place a tombstone
            # if we can not verify the x-if-delete-at time
            if not orig_timestamp:
                # no object found at all
                return HTTPNotFound()
            if orig_delete_at != req_if_delete_at:
                return HTTPPreconditionFailed(
                    request=request,
                    body='X-If-Delete-At and X-Delete-At do not match')
            else:
                # differentiate success from no object at all
                response_class = HTTPNoContent
        if orig_delete_at:
            self.delete_at_update('DELETE', orig_delete_at, account,
                                  container, obj, request, device,
                                  policy_idx)
        if orig_timestamp < req_timestamp:
            disk_file.delete(req_timestamp)
            self.container_update(
                'DELETE', account, container, obj, request,
                HeaderKeyDict({'x-timestamp': req_timestamp.internal}),
                device, policy_idx)
        return response_class(
            request=request,
            headers={'X-Backend-Timestamp': response_timestamp.internal})

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATE(self, request):
        """
        Handle REPLICATE requests for the Swift Object Server.  This is used
        by the object replicator to get hashes for directories.
        """
        device, partition, suffix, policy_idx = \
            get_name_and_placement(request, 2, 3, True)
        try:
            hashes = self._diskfile_mgr.get_hashes(device, partition, suffix,
                                                   policy_idx)
        except DiskFileDeviceUnavailable:
            resp = HTTPInsufficientStorage(drive=device, request=request)
        else:
            resp = Response(body=pickle.dumps(hashes))
        return resp

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATION(self, request):
        return Response(app_iter=ssync_receiver.Receiver(self, request)())

    def __call__(self, env, start_response):
        """WSGI Application entry point for the Swift Object Server."""
        with open("/home/ubuntu/spawn.txt", "a") as tran_file:
    	    tran_file.write("At Start Datetime = "+str(datetime.now())+"\n")
	start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get('x-trans-id', None)

        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL')
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    if req.method not in self.allowed_methods:
                        raise AttributeError('Not allowed method.')
                except AttributeError:
                    res = HTTPMethodNotAllowed()
                else:
                    method = getattr(self, req.method)
                    res = method(req)
            except DiskFileCollision:
                res = HTTPForbidden(request=req)
            except HTTPException as error_response:
                res = error_response
            except (Exception, Timeout):
                self.logger.exception(_(
                    'ERROR __call__ error with %(method)s'
                    ' %(path)s '), {'method': req.method, 'path': req.path})
                res = HTTPInternalServerError(body=traceback.format_exc())
        trans_time = time.time() - start_time
        if self.log_requests:
            log_line = get_log_line(req, res, trans_time, '')
            if req.method in ('REPLICATE', 'REPLICATION') or \
                    'X-Backend-Replication' in req.headers:
                self.logger.debug(log_line)
            else:
                self.logger.info(log_line)
        if req.method in ('PUT', 'DELETE'):
            slow = self.slow - trans_time
	    with open("/home/ubuntu/spawn.txt", "a") as tran_file:
    		tran_file.write("Before slow check  Datetime = "+str(datetime.now())+" Slow="+str(slow)+"\n")
            if slow > 0:
                sleep(slow)

        # To be able to zero-copy send the object, we need a few things.
        # First, we have to be responding successfully to a GET, or else we're
        # not sending the object. Second, we have to be able to extract the
        # socket file descriptor from the WSGI input object. Third, the
        # diskfile has to support zero-copy send.
        #
        # There's a good chance that this could work for 206 responses too,
        # but the common case is sending the whole object, so we'll start
        # there.
        if req.method == 'GET' and res.status_int == 200 and \
           isinstance(env['wsgi.input'], wsgi.Input):
            app_iter = getattr(res, 'app_iter', None)
            checker = getattr(app_iter, 'can_zero_copy_send', None)
            if checker and checker():
                # For any kind of zero-copy thing like sendfile or splice, we
                # need the file descriptor. Eventlet doesn't provide a clean
                # way of getting that, so we resort to this.
                wsock = env['wsgi.input'].get_socket()
                wsockfd = wsock.fileno()

                # Don't call zero_copy_send() until after we force the HTTP
                # headers out of Eventlet and into the socket.
                def zero_copy_iter():
                    # If possible, set TCP_CORK so that headers don't
                    # immediately go on the wire, but instead, wait for some
                    # response body to make the TCP frames as large as
                    # possible (and hence as few packets as possible).
                    #
                    # On non-Linux systems, we might consider TCP_NODELAY, but
                    # since the only known zero-copy-capable diskfile uses
                    # Linux-specific syscalls, we'll defer that work until
                    # someone needs it.
                    if hasattr(socket, 'TCP_CORK'):
                        wsock.setsockopt(socket.IPPROTO_TCP,
                                         socket.TCP_CORK, 1)
                    yield EventletPlungerString()
                    try:
                        app_iter.zero_copy_send(wsockfd)
                    except Exception:
                        self.logger.exception("zero_copy_send() blew up")
                        raise
                    yield ''

                # Get headers ready to go out
                res(env, start_response)
                return zero_copy_iter()
            else:
                return res(env, start_response)
        else:
            return res(env, start_response)
class TruncateMiddleware(object):
    def __init__(self, app, conf, *args, **kwargs):
        self.app = app
        self.conf = conf
        self.logger = get_logger(self.conf, log_route='truncate')
        self._diskfile_mgr = DiskFileManager(conf, self.logger)

    def truncate(self, env):
        req = Request(env)
        try:
            disk_file = self.get_diskfile(self.device, self.partition,
                                          self.account, self.container,
                                          self.obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=self.device,
                                           request=Request(copy(env)))
        
        # object flow 상, 임시 데이터를 삭제 후 DiskFileWrite 의 put을 하게 되면,
        # _finalize_put을 호출하게 된다. 이 때, metadata에 설정된 X-Timestamp 값으로
        # object 파일명을 생성하고, 임시 파일로 대체한다.
        # 따라서 별 다른 truncate를 할 필요가 없다.

        ori_meta = disk_file.read_metadata()
        metadata = {
            'X-Timestamp': ori_meta['X-Timestamp'],
            'Content-Type': ori_meta['Content-Type'],
            'ETag': 'd41d8cd98f00b204e9800998ecf8427e',
            'Content-Length': 0,
            'X-Object-Meta-Glacier': True,
            'X-Object-Meta-S3-Content-Length': ori_meta['Content-Length'],
            'X-Object-Meta-S3-ETag': ori_meta['ETag']
        }
        # 원본 Object Metatdata 도 저장
        metadata.update(val for val in ori_meta.iteritems()
                        if is_user_meta('object', val[0]))

        # Object Restore 정보가 있으면 해당 정보 지움.
        # 이 경우는 restored object expiration 임.
        if 'X-Object-Meta-S3-Restore' in metadata:
            del metadata['X-Object-Meta-S3-Restore']

        with disk_file.create(size=0) as writer:
            writer.put(metadata)

        return HTTPCreated(request=req, etag=ori_meta['ETag'])

    def get_diskfile(self, device, partition, account, container, obj,
                     **kwargs):
        return self._diskfile_mgr.get_diskfile(device, partition, account,
                                               container, obj, **kwargs)

    def __call__(self, env, start_response):
        req = Request(copy(env))
        method = req.method

        if (method == 'PUT' or method == 'POST') and \
           GLACIER_FLAG_META in req.headers:
            self.device, self.partition, self.account, self.container, \
                self.obj = split_and_validate_path(req, 5, 5, True)
            return self.truncate(env)(env, start_response)

        return self.app(env, start_response)
Beispiel #12
0
class TestAuditor(unittest.TestCase):
    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), "tmp_test_object_auditor")
        self.devices = os.path.join(self.testdir, "node")
        self.rcache = os.path.join(self.testdir, "object.recon")
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, "sda"))
        os.mkdir(os.path.join(self.devices, "sdb"))

        # policy 0
        self.objects = os.path.join(self.devices, "sda", get_data_dir(POLICIES[0]))
        self.objects_2 = os.path.join(self.devices, "sdb", get_data_dir(POLICIES[0]))
        os.mkdir(self.objects)
        # policy 1
        self.objects_p1 = os.path.join(self.devices, "sda", get_data_dir(POLICIES[1]))
        self.objects_2_p1 = os.path.join(self.devices, "sdb", get_data_dir(POLICIES[1]))
        os.mkdir(self.objects_p1)

        self.parts = self.parts_p1 = {}
        for part in ["0", "1", "2", "3"]:
            self.parts[part] = os.path.join(self.objects, part)
            self.parts_p1[part] = os.path.join(self.objects_p1, part)
            os.mkdir(os.path.join(self.objects, part))
            os.mkdir(os.path.join(self.objects_p1, part))

        self.conf = dict(devices=self.devices, mount_check="false", object_size_stats="10,100,1024,10240")
        self.df_mgr = DiskFileManager(self.conf, self.logger)

        # diskfiles for policy 0, 1
        self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o", policy=POLICIES[0])
        self.disk_file_p1 = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o", policy=POLICIES[1])

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_worker_conf_parms(self):
        def check_common_defaults():
            self.assertEquals(auditor_worker.max_bytes_per_second, 10000000)
            self.assertEquals(auditor_worker.log_time, 3600)

        # test default values
        conf = dict(devices=self.devices, mount_check="false", object_size_stats="10,100,1024,10240")
        auditor_worker = auditor.AuditorWorker(conf, self.logger, self.rcache, self.devices)
        check_common_defaults()
        self.assertEquals(auditor_worker.diskfile_mgr.disk_chunk_size, 65536)
        self.assertEquals(auditor_worker.max_files_per_second, 20)
        self.assertEquals(auditor_worker.zero_byte_only_at_fps, 0)

        # test specified audit value overrides
        conf.update({"disk_chunk_size": 4096})
        auditor_worker = auditor.AuditorWorker(conf, self.logger, self.rcache, self.devices, zero_byte_only_at_fps=50)
        check_common_defaults()
        self.assertEquals(auditor_worker.diskfile_mgr.disk_chunk_size, 4096)
        self.assertEquals(auditor_worker.max_files_per_second, 50)
        self.assertEquals(auditor_worker.zero_byte_only_at_fps, 50)

    def test_object_audit_extra_data(self):
        def run_tests(disk_file):
            auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
            data = "0" * 1024
            etag = md5()
            with disk_file.create() as writer:
                writer.write(data)
                etag.update(data)
                etag = etag.hexdigest()
                timestamp = str(normalize_timestamp(time.time()))
                metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
                writer.put(metadata)
                pre_quarantines = auditor_worker.quarantines

                auditor_worker.object_audit(AuditLocation(disk_file._datadir, "sda", "0", policy=POLICIES.legacy))
                self.assertEquals(auditor_worker.quarantines, pre_quarantines)

                os.write(writer._fd, "extra_data")

                auditor_worker.object_audit(AuditLocation(disk_file._datadir, "sda", "0", policy=POLICIES.legacy))
                self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

        run_tests(self.disk_file)
        run_tests(self.disk_file_p1)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        data = "0" * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "o", policy=POLICIES.legacy)

        auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0", policy=POLICIES.legacy))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update("1" + "0" * 1023)
        etag = etag.hexdigest()
        metadata["ETag"] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)

        auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0", policy=POLICIES.legacy))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + ".data")
        mkdirs(self.disk_file._datadir)
        fp = open(path, "w")
        fp.write("0" * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0", policy=POLICIES.legacy))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + ".data")
        mkdirs(self.disk_file._datadir)
        with open(path, "w") as f:
            write_metadata(f, {"name": "/a/c/o"})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)

        def blowup(*args):
            raise NameError("tpyo")

        with mock.patch.object(DiskFileManager, "get_diskfile_from_audit_location", blowup):
            self.assertRaises(
                NameError,
                auditor_worker.object_audit,
                AuditLocation(os.path.dirname(path), "sda", "0", policy=POLICIES.legacy),
            )

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + ".data")
        mkdirs(self.disk_file._datadir)
        with open(path, "w") as f:
            write_metadata(f, {"name": "/a/c/o"})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)

        def blowup(*args):
            raise NameError("tpyo")

        with mock.patch("swift.obj.diskfile.DiskFileManager.diskfile_cls", blowup):
            auditor_worker.failsafe_object_audit(
                AuditLocation(os.path.dirname(path), "sda", "0", policy=POLICIES.legacy)
            )
        self.assertEquals(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = "0" * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
        with mock.patch("swift.obj.diskfile.DiskFileManager.diskfile_cls", lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = "0" * 1024

        def write_file(df):
            etag = md5()
            with df.create() as writer:
                writer.write(data)
                etag.update(data)
                etag = etag.hexdigest()
                metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
                writer.put(metadata)

        # policy 0
        write_file(self.disk_file)
        # policy 1
        write_file(self.disk_file_p1)

        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        # 1 object per policy falls into 1024 bucket
        self.assertEquals(auditor_worker.stats_buckets[1024], 2)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)

        # pick up some additional code coverage, large file
        data = "0" * 1024 * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
        auditor_worker.audit_all_objects(device_dirs=["sda", "sdb"])
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        # still have the 1024 byte object left in policy-1 (plus the
        # stats from the original 2)
        self.assertEquals(auditor_worker.stats_buckets[1024], 3)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)
        # and then policy-0 disk_file was re-written as a larger object
        self.assertEquals(auditor_worker.stats_buckets["OVER"], 1)

        # pick up even more additional code coverage, misc paths
        auditor_worker.log_time = -1
        auditor_worker.stats_sizes = []
        auditor_worker.audit_all_objects(device_dirs=["sda", "sdb"])
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 3)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)
        self.assertEquals(auditor_worker.stats_buckets["OVER"], 1)

    def test_object_run_logging(self):
        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger, self.rcache, self.devices)
        auditor_worker.audit_all_objects(device_dirs=["sda"])
        log_lines = logger.get_lines_for_level("info")
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index("ALL - parallel, sda"))

        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger, self.rcache, self.devices, zero_byte_only_at_fps=50)
        auditor_worker.audit_all_objects(device_dirs=["sda"])
        log_lines = logger.get_lines_for_level("info")
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index("ZBF - sda"))

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        data = "0" * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
            os.write(writer._fd, "extra_data")
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = "0" * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile("sda", "0", "a", "c", "ob", policy=POLICIES.legacy)
        data = "1" * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp, "Content-Length": str(os.fstat(writer._fd).st_size)}
            writer.put(metadata)
            os.write(writer._fd, "extra_data")
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = "0" * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                "ETag": etag,
                "X-Timestamp": str(normalize_timestamp(time.time())),
                "Content-Length": str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            etag = md5()
            etag.update("1" + "0" * 1023)
            etag = etag.hexdigest()
            metadata["ETag"] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects")
        kwargs = {"mode": "once"}
        kwargs["zero_byte_fps"] = 50
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.isdir(quarantine_path))
        del (kwargs["zero_byte_fps"])
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, timestamp=None):
        if timestamp is None:
            timestamp = Timestamp(time.time())
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {"ETag": etag, "X-Timestamp": timestamp.internal, "Content-Length": 10}
            writer.put(metadata)
            etag = md5()
            etag = etag.hexdigest()
            metadata["ETag"] = etag
            write_metadata(writer._fd, metadata)

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        kwargs = {"mode": "once"}
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects")
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        kwargs = {"mode": "once"}
        kwargs["zero_byte_fps"] = 50
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects")
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):
            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        with mock.patch("swift.obj.diskfile.DiskFileManager.diskfile_cls", FakeFile):
            kwargs = {"mode": "once"}
            kwargs["zero_byte_fps"] = 50
            self.auditor.run_audit(**kwargs)
            quarantine_path = os.path.join(self.devices, "sda", "quarantined", "objects")
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])

    @mock.patch.object(auditor.ObjectAuditor, "run_audit")
    @mock.patch("os.fork", return_value=0)
    def test_with_inaccessible_object_location(self, mock_os_fork, mock_run_audit):
        # Need to ensure that any failures in run_audit do
        # not prevent sys.exit() from running.  Otherwise we get
        # zombie processes.
        e = OSError("permission denied")
        mock_run_audit.side_effect = e
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.assertRaises(SystemExit, self.auditor.fork_child, self)

    def test_with_only_tombstone(self):
        # sanity check that auditor doesn't touch solitary tombstones
        ts_iter = make_timestamp_iter()
        self.setup_bad_zero_byte(timestamp=ts_iter.next())
        self.disk_file.delete(ts_iter.next())
        files = os.listdir(self.disk_file._datadir)
        self.assertEqual(1, len(files))
        self.assertTrue(files[0].endswith("ts"))
        kwargs = {"mode": "once"}
        self.auditor.run_audit(**kwargs)
        files_after = os.listdir(self.disk_file._datadir)
        self.assertEqual(files, files_after)

    def test_with_tombstone_and_data(self):
        # rsync replication could leave a tombstone and data file in object
        # dir - verify they are both removed during audit
        ts_iter = make_timestamp_iter()
        ts_tomb = ts_iter.next()
        ts_data = ts_iter.next()
        self.setup_bad_zero_byte(timestamp=ts_data)
        tomb_file_path = os.path.join(self.disk_file._datadir, "%s.ts" % ts_tomb.internal)
        with open(tomb_file_path, "wb") as fd:
            write_metadata(fd, {"X-Timestamp": ts_tomb.internal})
        files = os.listdir(self.disk_file._datadir)
        self.assertEqual(2, len(files))
        self.assertTrue(os.path.basename(tomb_file_path) in files, files)
        kwargs = {"mode": "once"}
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.exists(self.disk_file._datadir))

    def test_sleeper(self):
        with mock.patch("time.sleep", mock.MagicMock()) as mock_sleep:
            auditor.SLEEP_BETWEEN_AUDITS = 0.10
            my_auditor = auditor.ObjectAuditor(self.conf)
            my_auditor._sleep()
            mock_sleep.assert_called_with(auditor.SLEEP_BETWEEN_AUDITS)

    def test_run_parallel_audit(self):
        class StopForever(Exception):
            pass

        class Bogus(Exception):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            check_device_dir = None
            fork_called = 0
            master = 0
            wait_called = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs
                if "zero_byte_fps" in kwargs:
                    self.check_device_dir = kwargs.get("device_dirs")

            def mock_sleep_stop(self):
                raise StopForever("stop")

            def mock_sleep_continue(self):
                return

            def mock_audit_loop_error(self, parent, zbo_fps, override_devices=None, **kwargs):
                raise Bogus("exception")

            def mock_fork(self):
                self.fork_called += 1
                if self.master:
                    return self.fork_called
                else:
                    return 0

            def mock_wait(self):
                self.wait_called += 1
                return (self.wait_called, 0)

        for i in string.ascii_letters[2:26]:
            mkdirs(os.path.join(self.devices, "sd%s" % i))

        my_auditor = auditor.ObjectAuditor(
            dict(devices=self.devices, mount_check="false", zero_byte_files_per_second=89, concurrency=1)
        )

        mocker = ObjectAuditorMock()
        my_auditor.logger.exception = mock.MagicMock()
        real_audit_loop = my_auditor.audit_loop
        my_auditor.audit_loop = mocker.mock_audit_loop_error
        my_auditor.run_audit = mocker.mock_run
        was_fork = os.fork
        was_wait = os.wait
        os.fork = mocker.mock_fork
        os.wait = mocker.mock_wait
        try:
            my_auditor._sleep = mocker.mock_sleep_stop
            my_auditor.run_once(zero_byte_fps=50)
            my_auditor.logger.exception.assert_called_once_with("ERROR auditing: exception")
            my_auditor.logger.exception.reset_mock()
            self.assertRaises(StopForever, my_auditor.run_forever)
            my_auditor.logger.exception.assert_called_once_with("ERROR auditing: exception")
            my_auditor.audit_loop = real_audit_loop

            self.assertRaises(StopForever, my_auditor.run_forever, zero_byte_fps=50)
            self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 50)
            self.assertEquals(mocker.fork_called, 0)

            self.assertRaises(SystemExit, my_auditor.run_once)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 89)
            self.assertEquals(mocker.check_device_dir, [])
            self.assertEquals(mocker.check_args, ())

            device_list = ["sd%s" % i for i in string.ascii_letters[2:10]]
            device_string = ",".join(device_list)
            device_string_bogus = device_string + ",bogus"

            mocker.fork_called = 0
            self.assertRaises(SystemExit, my_auditor.run_once, devices=device_string_bogus)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs["zero_byte_fps"], 89)
            self.assertEquals(sorted(mocker.check_device_dir), device_list)

            mocker.master = 1

            mocker.fork_called = 0
            self.assertRaises(StopForever, my_auditor.run_forever)
            # Fork is called 2 times since the zbf process is forked just
            # once before self._sleep() is called and StopForever is raised
            # Also wait is called just once before StopForever is raised
            self.assertEquals(mocker.fork_called, 2)
            self.assertEquals(mocker.wait_called, 1)

            my_auditor._sleep = mocker.mock_sleep_continue

            my_auditor.concurrency = 2
            mocker.fork_called = 0
            mocker.wait_called = 0
            my_auditor.run_once()
            # Fork is called no. of devices + (no. of devices)/2 + 1 times
            # since zbf process is forked (no.of devices)/2 + 1 times
            no_devices = len(os.listdir(self.devices))
            self.assertEquals(mocker.fork_called, no_devices + no_devices / 2 + 1)
            self.assertEquals(mocker.wait_called, no_devices + no_devices / 2 + 1)

        finally:
            os.fork = was_fork
            os.wait = was_wait
Beispiel #13
0
class ObjectController(object):
    """Implements the WSGI application for the Swift Object Server."""

    def __init__(self, conf, logger=None):
        """
        Creates a new WSGI application for the Swift Object Server. An
        example configuration is given at
        <source-dir>/etc/object-server.conf-sample or
        /etc/swift/object-server.conf-sample.
        """
        self.logger = logger or get_logger(conf, log_route='object-server')
        self.node_timeout = int(conf.get('node_timeout', 3))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.client_timeout = int(conf.get('client_timeout', 60))
        self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536))
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.log_requests = config_true_value(conf.get('log_requests', 'true'))
        self.max_upload_time = int(conf.get('max_upload_time', 86400))
        self.slow = int(conf.get('slow', 0))
        self.keep_cache_private = \
            config_true_value(conf.get('keep_cache_private', 'false'))
        replication_server = conf.get('replication_server', None)
        if replication_server is not None:
            replication_server = config_true_value(replication_server)
        self.replication_server = replication_server

        default_allowed_headers = '''
            content-disposition,
            content-encoding,
            x-delete-at,
            x-object-manifest,
            x-static-large-object,
        '''
        extra_allowed_headers = [
            header.strip().lower() for header in conf.get(
                'allowed_headers', default_allowed_headers).split(',')
            if header.strip()
        ]
        self.allowed_headers = set()
        for header in extra_allowed_headers:
            if header not in DATAFILE_SYSTEM_META:
                self.allowed_headers.add(header)
        self.auto_create_account_prefix = \
            conf.get('auto_create_account_prefix') or '.'
        self.expiring_objects_account = self.auto_create_account_prefix + \
            (conf.get('expiring_objects_account_name') or 'expiring_objects')
        self.expiring_objects_container_divisor = \
            int(conf.get('expiring_objects_container_divisor') or 86400)
        # Initialization was successful, so now apply the network chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because the primary motivation for this is to optimize how data
        # is written back to the proxy server, we could use the value from the
        # disk_chunk_size parameter. However, it affects all created sockets
        # using this class so we have chosen to tie it to the
        # network_chunk_size parameter value instead.
        socket._fileobject.default_bufsize = self.network_chunk_size

        # Provide further setup sepecific to an object server implemenation.
        self.setup(conf)

    def setup(self, conf):
        """
        Implementation specific setup. This method is called at the very end
        by the constructor to allow a specific implementation to modify
        existing attributes or add its own attributes.

        :param conf: WSGI configuration parameter
        """

        # Common on-disk hierarchy shared across account, container and object
        # servers.
        self._diskfile_mgr = DiskFileManager(conf, self.logger)
        # This is populated by global_conf_callback way below as the semaphore
        # is shared by all workers.
        if 'replication_semaphore' in conf:
            # The value was put in a list so it could get past paste
            self.replication_semaphore = conf['replication_semaphore'][0]
        else:
            self.replication_semaphore = None
        self.replication_failure_threshold = int(
            conf.get('replication_failure_threshold') or 100)
        self.replication_failure_ratio = float(
            conf.get('replication_failure_ratio') or 1.0)

    def get_diskfile(self, device, partition, account, container, obj,
                     policy_idx, **kwargs):
        """
        Utility method for instantiating a DiskFile object supporting a given
        REST API.

        An implementation of the object server that wants to use a different
        DiskFile class would simply over-ride this method to provide that
        behavior.
        """
        return self._diskfile_mgr.get_diskfile(
            device, partition, account, container, obj, policy_idx, **kwargs)

    def async_update(self, op, account, container, obj, host, partition,
                     contdevice, headers_out, objdevice, policy_idx):
        """
        Sends or saves an async update.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param host: host that the container is on
        :param partition: partition that the container is on
        :param contdevice: device name that the container is on
        :param headers_out: dictionary of headers to send in the container
                            request
        :param objdevice: device name that the object is in
        :param policy_idx: the associated storage policy index
        """
        headers_out['user-agent'] = 'obj-server %s' % os.getpid()
        full_path = '/%s/%s/%s' % (account, container, obj)
        if all([host, partition, contdevice]):
            try:
                with ConnectionTimeout(self.conn_timeout):
                    ip, port = host.rsplit(':', 1)
                    conn = http_connect(ip, port, contdevice, partition, op,
                                        full_path, headers_out)
                with Timeout(self.node_timeout):
                    response = conn.getresponse()
                    response.read()
                    if is_success(response.status):
                        return
                    else:
                        self.logger.error(_(
                            'ERROR Container update failed '
                            '(saving for async update later): %(status)d '
                            'response from %(ip)s:%(port)s/%(dev)s'),
                            {'status': response.status, 'ip': ip, 'port': port,
                             'dev': contdevice})
            except (Exception, Timeout):
                self.logger.exception(_(
                    'ERROR container update failed with '
                    '%(ip)s:%(port)s/%(dev)s (saving for async update later)'),
                    {'ip': ip, 'port': port, 'dev': contdevice})
        data = {'op': op, 'account': account, 'container': container,
                'obj': obj, 'headers': headers_out}
        timestamp = headers_out['x-timestamp']
        self._diskfile_mgr.pickle_async_update(objdevice, account, container,
                                               obj, data, timestamp,
                                               policy_idx)

    def container_update(self, op, account, container, obj, request,
                         headers_out, objdevice, policy_idx):
        """
        Update the container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request object driving the update
        :param headers_out: dictionary of headers to send in the container
                            request(s)
        :param objdevice: device name that the object is in
        """
        headers_in = request.headers
        conthosts = [h.strip() for h in
                     headers_in.get('X-Container-Host', '').split(',')]
        contdevices = [d.strip() for d in
                       headers_in.get('X-Container-Device', '').split(',')]
        contpartition = headers_in.get('X-Container-Partition', '')

        if len(conthosts) != len(contdevices):
            # This shouldn't happen unless there's a bug in the proxy,
            # but if there is, we want to know about it.
            self.logger.error(_('ERROR Container update failed: different  '
                                'numbers of hosts and devices in request: '
                                '"%s" vs "%s"') %
                               (headers_in.get('X-Container-Host', ''),
                                headers_in.get('X-Container-Device', '')))
            return

        if contpartition:
            updates = zip(conthosts, contdevices)
        else:
            updates = []

        headers_out['x-trans-id'] = headers_in.get('x-trans-id', '-')
        headers_out['referer'] = request.as_referer()
        headers_out[POLICY_INDEX] = str(policy_idx)
        for conthost, contdevice in updates:
            self.async_update(op, account, container, obj, conthost,
                              contpartition, contdevice, headers_out,
                              objdevice, policy_idx)

    def delete_at_update(self, op, delete_at, account, container, obj,
                         request, objdevice):
        """
        Update the expiring objects container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param delete_at: scheduled delete in UNIX seconds, int
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request driving the update
        :param objdevice: device name that the object is in
        """
        if config_true_value(
                request.headers.get('x-backend-replication', 'f')):
            return
        delete_at = normalize_delete_at_timestamp(delete_at)
        updates = [(None, None)]

        partition = None
        hosts = contdevices = [None]
        headers_in = request.headers
        headers_out = HeaderKeyDict({
            'x-timestamp': headers_in['x-timestamp'],
            'x-trans-id': headers_in.get('x-trans-id', '-'),
            'referer': request.as_referer()})
        if op != 'DELETE':
            delete_at_container = headers_in.get('X-Delete-At-Container', None)
            if not delete_at_container:
                self.logger.warning(
                    'X-Delete-At-Container header must be specified for '
                    'expiring objects background %s to work properly. Making '
                    'best guess as to the container name for now.' % op)
                # TODO(gholt): In a future release, change the above warning to
                # a raised exception and remove the guess code below.
                delete_at_container = (
                    int(delete_at) / self.expiring_objects_container_divisor *
                    self.expiring_objects_container_divisor)
            partition = headers_in.get('X-Delete-At-Partition', None)
            hosts = headers_in.get('X-Delete-At-Host', '')
            contdevices = headers_in.get('X-Delete-At-Device', '')
            updates = [upd for upd in
                       zip((h.strip() for h in hosts.split(',')),
                           (c.strip() for c in contdevices.split(',')))
                       if all(upd) and partition]
            if not updates:
                updates = [(None, None)]
            headers_out['x-size'] = '0'
            headers_out['x-content-type'] = 'text/plain'
            headers_out['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e'
        else:
            # DELETEs of old expiration data have no way of knowing what the
            # old X-Delete-At-Container was at the time of the initial setting
            # of the data, so a best guess is made here.
            # Worst case is a DELETE is issued now for something that doesn't
            # exist there and the original data is left where it is, where
            # it will be ignored when the expirer eventually tries to issue the
            # object DELETE later since the X-Delete-At value won't match up.
            delete_at_container = str(
                int(delete_at) / self.expiring_objects_container_divisor *
                self.expiring_objects_container_divisor)
        delete_at_container = normalize_delete_at_timestamp(
            delete_at_container)

        for host, contdevice in updates:
            self.async_update(
                op, self.expiring_objects_account, delete_at_container,
                '%s-%s/%s/%s' % (delete_at, account, container, obj),
                host, partition, contdevice, headers_out, objdevice, 0)

    @public
    @timing_stats()
    def POST(self, request):
        """Handle HTTP POST requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        if 'x-timestamp' not in request.headers or \
                not check_float(request.headers['x-timestamp']):
            return HTTPBadRequest(body='Missing timestamp', request=request,
                                  content_type='text/plain')
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body='X-Delete-At in past', request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
                policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request)
        orig_timestamp = orig_metadata.get('X-Timestamp', '0')
        if orig_timestamp >= request.headers['x-timestamp']:
            return HTTPConflict(request=request)
        metadata = {'X-Timestamp': request.headers['x-timestamp']}
        metadata.update(val for val in request.headers.iteritems()
                        if is_user_meta('object', val[0]))
        for header_key in self.allowed_headers:
            if header_key in request.headers:
                header_caps = header_key.title()
                metadata[header_caps] = request.headers[header_key]
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update('PUT', new_delete_at, account, container,
                                      obj, request, device)
            if orig_delete_at:
                self.delete_at_update('DELETE', orig_delete_at, account,
                                      container, obj, request, device)
        disk_file.write_metadata(metadata)
        return HTTPAccepted(request=request)

    @public
    @timing_stats()
    def PUT(self, request):
        """Handle HTTP PUT requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        if 'x-timestamp' not in request.headers or \
                not check_float(request.headers['x-timestamp']):
            return HTTPBadRequest(body='Missing timestamp', request=request,
                                  content_type='text/plain')
        error_response = check_object_creation(request, obj)
        if error_response:
            return error_response
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body='X-Delete-At in past', request=request,
                                  content_type='text/plain')
        try:
            fsize = request.message_length()
        except ValueError as e:
            return HTTPBadRequest(body=str(e), request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
                policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_metadata = {}

        # Checks for If-None-Match
        if request.if_none_match is not None and orig_metadata:
            if '*' in request.if_none_match:
                # File exists already so return 412
                return HTTPPreconditionFailed(request=request)
            if orig_metadata.get('ETag') in request.if_none_match:
                # The current ETag matches, so return 412
                return HTTPPreconditionFailed(request=request)

        orig_timestamp = orig_metadata.get('X-Timestamp')
        if orig_timestamp and orig_timestamp >= request.headers['x-timestamp']:
            return HTTPConflict(request=request)
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        upload_expiration = time.time() + self.max_upload_time
        etag = md5()
        elapsed_time = 0
        try:
            with disk_file.create(size=fsize) as writer:
                upload_size = 0

                def timeout_reader():
                    with ChunkReadTimeout(self.client_timeout):
                        return request.environ['wsgi.input'].read(
                            self.network_chunk_size)

                try:
                    for chunk in iter(lambda: timeout_reader(), ''):
                        start_time = time.time()
                        if start_time > upload_expiration:
                            self.logger.increment('PUT.timeouts')
                            return HTTPRequestTimeout(request=request)
                        etag.update(chunk)
                        upload_size = writer.write(chunk)
                        elapsed_time += time.time() - start_time
                except ChunkReadTimeout:
                    return HTTPRequestTimeout(request=request)
                if upload_size:
                    self.logger.transfer_rate(
                        'PUT.' + device + '.timing', elapsed_time,
                        upload_size)
                if fsize is not None and fsize != upload_size:
                    return HTTPClientDisconnect(request=request)
                etag = etag.hexdigest()
                if 'etag' in request.headers and \
                        request.headers['etag'].lower() != etag:
                    return HTTPUnprocessableEntity(request=request)
                metadata = {
                    'X-Timestamp': request.headers['x-timestamp'],
                    'Content-Type': request.headers['content-type'],
                    'ETag': etag,
                    'Content-Length': str(upload_size),
                }
                metadata.update(val for val in request.headers.iteritems()
                                if is_user_meta('object', val[0]))
                for header_key in (
                        request.headers.get('X-Backend-Replication-Headers') or
                        self.allowed_headers):
                    if header_key in request.headers:
                        header_caps = header_key.title()
                        metadata[header_caps] = request.headers[header_key]
                writer.put(metadata)
        except DiskFileNoSpace:
            return HTTPInsufficientStorage(drive=device, request=request)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update(
                    'PUT', new_delete_at, account, container, obj, request,
                    device)
            if orig_delete_at:
                self.delete_at_update(
                    'DELETE', orig_delete_at, account, container, obj,
                    request, device)
        self.container_update(
            'PUT', account, container, obj, request,
            HeaderKeyDict({
                'x-size': metadata['Content-Length'],
                'x-content-type': metadata['Content-Type'],
                'x-timestamp': metadata['X-Timestamp'],
                'x-etag': metadata['ETag']}),
            device, policy_idx)
        return HTTPCreated(request=request, etag=etag)

    @public
    @timing_stats()
    def GET(self, request):
        """Handle HTTP GET requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        keep_cache = self.keep_cache_private or (
            'X-Auth-Token' not in request.headers and
            'X-Storage-Token' not in request.headers)
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
                policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            with disk_file.open():
                metadata = disk_file.get_metadata()
                obj_size = int(metadata['Content-Length'])
                file_x_ts = metadata['X-Timestamp']
                file_x_ts_flt = float(file_x_ts)
                keep_cache = (self.keep_cache_private or
                              ('X-Auth-Token' not in request.headers and
                               'X-Storage-Token' not in request.headers))
                response = Response(
                    app_iter=disk_file.reader(keep_cache=keep_cache),
                    request=request, conditional_response=True)
                response.headers['Content-Type'] = metadata.get(
                    'Content-Type', 'application/octet-stream')
                for key, value in metadata.iteritems():
                    if is_user_meta('object', key) or \
                            key.lower() in self.allowed_headers:
                        response.headers[key] = value
                response.etag = metadata['ETag']
                response.last_modified = math.ceil(file_x_ts_flt)
                response.content_length = obj_size
                try:
                    response.content_encoding = metadata[
                        'Content-Encoding']
                except KeyError:
                    pass
                response.headers['X-Timestamp'] = file_x_ts
                resp = request.get_response(response)
        except (DiskFileNotExist, DiskFileQuarantined) as e:
            headers = {}
            if hasattr(e, 'timestamp'):
                headers['X-Timestamp'] = e.timestamp
            resp = HTTPNotFound(request=request, headers=headers,
                                conditional_response=True)
        return resp

    @public
    @timing_stats(sample_rate=0.8)
    def HEAD(self, request):
        """Handle HTTP HEAD requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
                policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined) as e:
            headers = {}
            if hasattr(e, 'timestamp'):
                headers['X-Timestamp'] = e.timestamp
            return HTTPNotFound(request=request, headers=headers,
                                conditional_response=True)
        response = Response(request=request, conditional_response=True)
        response.headers['Content-Type'] = metadata.get(
            'Content-Type', 'application/octet-stream')
        for key, value in metadata.iteritems():
            if is_user_meta('object', key) or \
                    key.lower() in self.allowed_headers:
                response.headers[key] = value
        response.etag = metadata['ETag']
        ts = metadata['X-Timestamp']
        response.last_modified = math.ceil(float(ts))
        # Needed for container sync feature
        response.headers['X-Timestamp'] = ts
        response.content_length = int(metadata['Content-Length'])
        try:
            response.content_encoding = metadata['Content-Encoding']
        except KeyError:
            pass
        return response

    @public
    @timing_stats()
    def DELETE(self, request):
        """Handle HTTP DELETE requests for the Swift Object Server."""
        device, partition, account, container, obj, policy_idx = \
            get_name_and_placement(request, 5, 5, True)
        if 'x-timestamp' not in request.headers or \
                not check_float(request.headers['x-timestamp']):
            return HTTPBadRequest(body='Missing timestamp', request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
                policy_idx=policy_idx)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileExpired as e:
            orig_timestamp = e.timestamp
            orig_metadata = e.metadata
            response_class = HTTPNotFound
        except DiskFileDeleted as e:
            orig_timestamp = e.timestamp
            orig_metadata = {}
            response_class = HTTPNotFound
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_timestamp = 0
            orig_metadata = {}
            response_class = HTTPNotFound
        else:
            orig_timestamp = orig_metadata.get('X-Timestamp', 0)
            if orig_timestamp < request.headers['x-timestamp']:
                response_class = HTTPNoContent
            else:
                response_class = HTTPConflict
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        try:
            req_if_delete_at_val = request.headers['x-if-delete-at']
            req_if_delete_at = int(req_if_delete_at_val)
        except KeyError:
            pass
        except ValueError:
            return HTTPBadRequest(
                request=request,
                body='Bad X-If-Delete-At header value')
        else:
            if orig_delete_at != req_if_delete_at:
                return HTTPPreconditionFailed(
                    request=request,
                    body='X-If-Delete-At and X-Delete-At do not match')
        if orig_delete_at:
            self.delete_at_update('DELETE', orig_delete_at, account,
                                  container, obj, request, device)
        req_timestamp = request.headers['X-Timestamp']
        if orig_timestamp < req_timestamp:
            disk_file.delete(req_timestamp)
            self.container_update(
                'DELETE', account, container, obj, request,
                HeaderKeyDict({'x-timestamp': req_timestamp}),
                device, policy_idx)
        return response_class(request=request)

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATE(self, request):
        """
        Handle REPLICATE requests for the Swift Object Server.  This is used
        by the object replicator to get hashes for directories.
        """
        device, partition, suffix, policy_idx = \
            get_name_and_placement(request, 2, 3, True)
        try:
            hashes = self._diskfile_mgr.get_hashes(device, partition, suffix,
                                                   policy_idx)
        except DiskFileDeviceUnavailable:
            resp = HTTPInsufficientStorage(drive=device, request=request)
        else:
            resp = Response(body=pickle.dumps(hashes))
        return resp

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATION(self, request):
        return Response(app_iter=ssync_receiver.Receiver(self, request)())

    def __call__(self, env, start_response):
        """WSGI Application entry point for the Swift Object Server."""
        start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get('x-trans-id', None)

        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL')
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    method = getattr(self, req.method)
                    getattr(method, 'publicly_accessible')
                    replication_method = getattr(method, 'replication', False)
                    if (self.replication_server is not None and
                            self.replication_server != replication_method):
                        raise AttributeError('Not allowed method.')
                except AttributeError:
                    res = HTTPMethodNotAllowed()
                else:
                    res = method(req)
            except DiskFileCollision:
                res = HTTPForbidden(request=req)
            except HTTPException as error_response:
                res = error_response
            except (Exception, Timeout):
                self.logger.exception(_(
                    'ERROR __call__ error with %(method)s'
                    ' %(path)s '), {'method': req.method, 'path': req.path})
                res = HTTPInternalServerError(body=traceback.format_exc())
        trans_time = time.time() - start_time
        if self.log_requests:
            log_line = get_log_line(req, res, trans_time, '')
            if req.method in ('REPLICATE', 'REPLICATION') or \
                    'X-Backend-Replication' in req.headers:
                self.logger.debug(log_line)
            else:
                self.logger.info(log_line)
        if req.method in ('PUT', 'DELETE'):
            slow = self.slow - trans_time
            if slow > 0:
                sleep(slow)
        return res(env, start_response)
Beispiel #14
0
class ObjectController(object):
    """Implements the WSGI application for the Swift Object Server."""

    def __init__(self, conf, logger=None):
        """
        Creates a new WSGI application for the Swift Object Server. An
        example configuration is given at
        <source-dir>/etc/object-server.conf-sample or
        /etc/swift/object-server.conf-sample.
        """
        self.logger = logger or get_logger(conf, log_route='object-server')
        self.node_timeout = int(conf.get('node_timeout', 3))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.client_timeout = int(conf.get('client_timeout', 60))
        self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536))
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.log_requests = config_true_value(conf.get('log_requests', 'true'))
        self.max_upload_time = int(conf.get('max_upload_time', 86400))
        self.slow = int(conf.get('slow', 0))
        self.keep_cache_private = \
            config_true_value(conf.get('keep_cache_private', 'false'))
        replication_server = conf.get('replication_server', None)
        if replication_server is not None:
            replication_server = config_true_value(replication_server)
        self.replication_server = replication_server

        default_allowed_headers = '''
            content-disposition,
            content-encoding,
            x-delete-at,
            x-object-manifest,
            x-static-large-object,
        '''
        extra_allowed_headers = [
            header.strip().lower() for header in conf.get(
                'allowed_headers', default_allowed_headers).split(',')
            if header.strip()
        ]
        self.allowed_headers = set()
        for header in extra_allowed_headers:
            if header not in DATAFILE_SYSTEM_META:
                self.allowed_headers.add(header)
        self.expiring_objects_account = \
            (conf.get('auto_create_account_prefix') or '.') + \
            (conf.get('expiring_objects_account_name') or 'expiring_objects')
        self.expiring_objects_container_divisor = \
            int(conf.get('expiring_objects_container_divisor') or 86400)
        # Initialization was successful, so now apply the network chunk size
        # parameter as the default read / write buffer size for the network
        # sockets.
        #
        # NOTE WELL: This is a class setting, so until we get set this on a
        # per-connection basis, this affects reading and writing on ALL
        # sockets, those between the proxy servers and external clients, and
        # those between the proxy servers and the other internal servers.
        #
        # ** Because the primary motivation for this is to optimize how data
        # is written back to the proxy server, we could use the value from the
        # disk_chunk_size parameter. However, it affects all created sockets
        # using this class so we have chosen to tie it to the
        # network_chunk_size parameter value instead.
        socket._fileobject.default_bufsize = self.network_chunk_size

        # Provide further setup sepecific to an object server implemenation.
        self.setup(conf)

    def setup(self, conf):
        """
        Implementation specific setup. This method is called at the very end
        by the constructor to allow a specific implementation to modify
        existing attributes or add its own attributes.

        :param conf: WSGI configuration parameter
        """

        # Common on-disk hierarchy shared across account, container and object
        # servers.
        self._diskfile_mgr = DiskFileManager(conf, self.logger)
        # This is populated by global_conf_callback way below as the semaphore
        # is shared by all workers.
        if 'replication_semaphore' in conf:
            # The value was put in a list so it could get past paste
            self.replication_semaphore = conf['replication_semaphore'][0]
        else:
            self.replication_semaphore = None
        self.replication_failure_threshold = int(
            conf.get('replication_failure_threshold') or 100)
        self.replication_failure_ratio = float(
            conf.get('replication_failure_ratio') or 1.0)

    def get_diskfile(self, device, partition, account, container, obj,
                     **kwargs):
        """
        Utility method for instantiating a DiskFile object supporting a given
        REST API.

        An implementation of the object server that wants to use a different
        DiskFile class would simply over-ride this method to provide that
        behavior.
        """
        return self._diskfile_mgr.get_diskfile(
            device, partition, account, container, obj, **kwargs)

    def async_update(self, op, account, container, obj, host, partition,
                     contdevice, headers_out, objdevice):
        """
        Sends or saves an async update.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param host: host that the container is on
        :param partition: partition that the container is on
        :param contdevice: device name that the container is on
        :param headers_out: dictionary of headers to send in the container
                            request
        :param objdevice: device name that the object is in
        """
        headers_out['user-agent'] = 'obj-server %s' % os.getpid()
        full_path = '/%s/%s/%s' % (account, container, obj)
        if all([host, partition, contdevice]):
            try:
                with ConnectionTimeout(self.conn_timeout):
                    ip, port = host.rsplit(':', 1)
                    conn = http_connect(ip, port, contdevice, partition, op,
                                        full_path, headers_out)
                with Timeout(self.node_timeout):
                    response = conn.getresponse()
                    response.read()
                    if is_success(response.status):
                        return
                    else:
                        self.logger.error(_(
                            'ERROR Container update failed '
                            '(saving for async update later): %(status)d '
                            'response from %(ip)s:%(port)s/%(dev)s'),
                            {'status': response.status, 'ip': ip, 'port': port,
                             'dev': contdevice})
            except (Exception, Timeout):
                self.logger.exception(_(
                    'ERROR container update failed with '
                    '%(ip)s:%(port)s/%(dev)s (saving for async update later)'),
                    {'ip': ip, 'port': port, 'dev': contdevice})
        data = {'op': op, 'account': account, 'container': container,
                'obj': obj, 'headers': headers_out}
        timestamp = headers_out['x-timestamp']
        self._diskfile_mgr.pickle_async_update(objdevice, account, container,
                                               obj, data, timestamp)

    def container_update(self, op, account, container, obj, request,
                         headers_out, objdevice):
        """
        Update the container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request object driving the update
        :param headers_out: dictionary of headers to send in the container
                            request(s)
        :param objdevice: device name that the object is in
        """
        headers_in = request.headers
        conthosts = [h.strip() for h in
                     headers_in.get('X-Container-Host', '').split(',')]
        contdevices = [d.strip() for d in
                       headers_in.get('X-Container-Device', '').split(',')]
        contpartition = headers_in.get('X-Container-Partition', '')

        if len(conthosts) != len(contdevices):
            # This shouldn't happen unless there's a bug in the proxy,
            # but if there is, we want to know about it.
            self.logger.error(_('ERROR Container update failed: different  '
                                'numbers of hosts and devices in request: '
                                '"%s" vs "%s"') %
                               (headers_in.get('X-Container-Host', ''),
                                headers_in.get('X-Container-Device', '')))
            return

        if contpartition:
            updates = zip(conthosts, contdevices)
        else:
            updates = []

        headers_out['x-trans-id'] = headers_in.get('x-trans-id', '-')
        headers_out['referer'] = request.as_referer()
        for conthost, contdevice in updates:
            self.async_update(op, account, container, obj, conthost,
                              contpartition, contdevice, headers_out,
                              objdevice)

    def delete_at_update(self, op, delete_at, account, container, obj,
                         request, objdevice):
        """
        Update the expiring objects container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param delete_at: scheduled delete in UNIX seconds, int
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request driving the update
        :param objdevice: device name that the object is in
        """
        if config_true_value(
                request.headers.get('x-backend-replication', 'f')):
            return
        delete_at = normalize_delete_at_timestamp(delete_at)
        updates = [(None, None)]

        partition = None
        hosts = contdevices = [None]
        headers_in = request.headers
        headers_out = HeaderKeyDict({
            'x-timestamp': headers_in['x-timestamp'],
            'x-trans-id': headers_in.get('x-trans-id', '-'),
            'referer': request.as_referer()})
        if op != 'DELETE':
            delete_at_container = headers_in.get('X-Delete-At-Container', None)
            if not delete_at_container:
                self.logger.warning(
                    'X-Delete-At-Container header must be specified for '
                    'expiring objects background %s to work properly. Making '
                    'best guess as to the container name for now.' % op)
                # TODO(gholt): In a future release, change the above warning to
                # a raised exception and remove the guess code below.
                delete_at_container = (
                    int(delete_at) / self.expiring_objects_container_divisor *
                    self.expiring_objects_container_divisor)
            partition = headers_in.get('X-Delete-At-Partition', None)
            hosts = headers_in.get('X-Delete-At-Host', '')
            contdevices = headers_in.get('X-Delete-At-Device', '')
            updates = [upd for upd in
                       zip((h.strip() for h in hosts.split(',')),
                           (c.strip() for c in contdevices.split(',')))
                       if all(upd) and partition]
            if not updates:
                updates = [(None, None)]
            headers_out['x-size'] = '0'
            headers_out['x-content-type'] = 'text/plain'
            headers_out['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e'
        else:
            # DELETEs of old expiration data have no way of knowing what the
            # old X-Delete-At-Container was at the time of the initial setting
            # of the data, so a best guess is made here.
            # Worst case is a DELETE is issued now for something that doesn't
            # exist there and the original data is left where it is, where
            # it will be ignored when the expirer eventually tries to issue the
            # object DELETE later since the X-Delete-At value won't match up.
            delete_at_container = str(
                int(delete_at) / self.expiring_objects_container_divisor *
                self.expiring_objects_container_divisor)
        delete_at_container = normalize_delete_at_timestamp(
            delete_at_container)

        for host, contdevice in updates:
            self.async_update(
                op, self.expiring_objects_account, delete_at_container,
                '%s-%s/%s/%s' % (delete_at, account, container, obj),
                host, partition, contdevice, headers_out, objdevice)

    @public
    @timing_stats()
    def POST(self, request):
        """Handle HTTP POST requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)

        if 'x-timestamp' not in request.headers or \
                not check_float(request.headers['x-timestamp']):
            return HTTPBadRequest(body='Missing timestamp', request=request,
                                  content_type='text/plain')
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body='X-Delete-At in past', request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request)
        orig_timestamp = orig_metadata.get('X-Timestamp', '0')
        if orig_timestamp >= request.headers['x-timestamp']:
            return HTTPConflict(request=request)
        metadata = {'X-Timestamp': request.headers['x-timestamp']}
        metadata.update(val for val in request.headers.iteritems()
                        if is_user_meta('object', val[0]))
        for header_key in self.allowed_headers:
            if header_key in request.headers:
                header_caps = header_key.title()
                metadata[header_caps] = request.headers[header_key]
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update('PUT', new_delete_at, account, container,
                                      obj, request, device)
            if orig_delete_at:
                self.delete_at_update('DELETE', orig_delete_at, account,
                                      container, obj, request, device)
        disk_file.write_metadata(metadata)
        return HTTPAccepted(request=request)

    @public
    @timing_stats()
    def PUT(self, request):
        """Handle HTTP PUT requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)

        if 'x-timestamp' not in request.headers or \
                not check_float(request.headers['x-timestamp']):
            return HTTPBadRequest(body='Missing timestamp', request=request,
                                  content_type='text/plain')
        error_response = check_object_creation(request, obj)
        if error_response:
            return error_response
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
        if new_delete_at and new_delete_at < time.time():
            return HTTPBadRequest(body='X-Delete-At in past', request=request,
                                  content_type='text/plain')
        try:
            fsize = request.message_length()
        except ValueError as e:
            return HTTPBadRequest(body=str(e), request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_metadata = {}

        # Checks for If-None-Match
        if request.if_none_match is not None and orig_metadata:
            if '*' in request.if_none_match:
                # File exists already so return 412
                return HTTPPreconditionFailed(request=request)
            if orig_metadata.get('ETag') in request.if_none_match:
                # The current ETag matches, so return 412
                return HTTPPreconditionFailed(request=request)

        orig_timestamp = orig_metadata.get('X-Timestamp')
        if orig_timestamp and orig_timestamp >= request.headers['x-timestamp']:
            return HTTPConflict(request=request)
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        upload_expiration = time.time() + self.max_upload_time
        etag = md5()
        elapsed_time = 0
        try:
            with disk_file.create(size=fsize) as writer:
                upload_size = 0

                def timeout_reader():
                    with ChunkReadTimeout(self.client_timeout):
                        return request.environ['wsgi.input'].read(
                            self.network_chunk_size)

                try:
                    for chunk in iter(lambda: timeout_reader(), ''):
                        start_time = time.time()
                        if start_time > upload_expiration:
                            self.logger.increment('PUT.timeouts')
                            return HTTPRequestTimeout(request=request)
                        etag.update(chunk)
                        upload_size = writer.write(chunk)
                        elapsed_time += time.time() - start_time
                except ChunkReadTimeout:
                    return HTTPRequestTimeout(request=request)
                if upload_size:
                    self.logger.transfer_rate(
                        'PUT.' + device + '.timing', elapsed_time,
                        upload_size)
                if fsize is not None and fsize != upload_size:
                    return HTTPClientDisconnect(request=request)
                etag = etag.hexdigest()
                if 'etag' in request.headers and \
                        request.headers['etag'].lower() != etag:
                    return HTTPUnprocessableEntity(request=request)
                metadata = {
                    'X-Timestamp': request.headers['x-timestamp'],
                    'Content-Type': request.headers['content-type'],
                    'ETag': etag,
                    'Content-Length': str(upload_size),
                }
                metadata.update(val for val in request.headers.iteritems()
                                if is_user_meta('object', val[0]))
                for header_key in (
                        request.headers.get('X-Backend-Replication-Headers') or
                        self.allowed_headers):
                    if header_key in request.headers:
                        header_caps = header_key.title()
                        metadata[header_caps] = request.headers[header_key]
                writer.put(metadata)
        except DiskFileNoSpace:
            return HTTPInsufficientStorage(drive=device, request=request)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update(
                    'PUT', new_delete_at, account, container, obj,
                    request, device)
            if orig_delete_at:
                self.delete_at_update(
                    'DELETE', orig_delete_at, account, container, obj,
                    request, device)
        self.container_update(
            'PUT', account, container, obj, request,
            HeaderKeyDict({
                'x-size': metadata['Content-Length'],
                'x-content-type': metadata['Content-Type'],
                'x-timestamp': metadata['X-Timestamp'],
                'x-etag': metadata['ETag']}),
            device)
        return HTTPCreated(request=request, etag=etag)

    @public
    @timing_stats()
    def GET(self, request):
        """Handle HTTP GET requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)
        keep_cache = self.keep_cache_private or (
            'X-Auth-Token' not in request.headers and
            'X-Storage-Token' not in request.headers)
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            with disk_file.open():
                metadata = disk_file.get_metadata()
                obj_size = int(metadata['Content-Length'])
                file_x_ts = metadata['X-Timestamp']
                file_x_ts_flt = float(file_x_ts)
                keep_cache = (self.keep_cache_private or
                              ('X-Auth-Token' not in request.headers and
                               'X-Storage-Token' not in request.headers))
                response = Response(
                    app_iter=disk_file.reader(keep_cache=keep_cache),
                    request=request, conditional_response=True)
                response.headers['Content-Type'] = metadata.get(
                    'Content-Type', 'application/octet-stream')
                for key, value in metadata.iteritems():
                    if is_user_meta('object', key) or \
                            key.lower() in self.allowed_headers:
                        response.headers[key] = value
                response.etag = metadata['ETag']
                response.last_modified = math.ceil(file_x_ts_flt)
                response.content_length = obj_size
                try:
                    response.content_encoding = metadata[
                        'Content-Encoding']
                except KeyError:
                    pass
                response.headers['X-Timestamp'] = file_x_ts
                resp = request.get_response(response)
        except (DiskFileNotExist, DiskFileQuarantined):
            resp = HTTPNotFound(request=request, conditional_response=True)
        return resp

    @public
    @timing_stats(sample_rate=0.8)
    def HEAD(self, request):
        """Handle HTTP HEAD requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            metadata = disk_file.read_metadata()
        except (DiskFileNotExist, DiskFileQuarantined):
            return HTTPNotFound(request=request, conditional_response=True)
        response = Response(request=request, conditional_response=True)
        response.headers['Content-Type'] = metadata.get(
            'Content-Type', 'application/octet-stream')
        for key, value in metadata.iteritems():
            if is_user_meta('object', key) or \
                    key.lower() in self.allowed_headers:
                response.headers[key] = value
        response.etag = metadata['ETag']
        ts = metadata['X-Timestamp']
        response.last_modified = math.ceil(float(ts))
        # Needed for container sync feature
        response.headers['X-Timestamp'] = ts
        response.content_length = int(metadata['Content-Length'])
        try:
            response.content_encoding = metadata['Content-Encoding']
        except KeyError:
            pass
        return response

    @public
    @timing_stats()
    def DELETE(self, request):
        """Handle HTTP DELETE requests for the Swift Object Server."""
        device, partition, account, container, obj = \
            split_and_validate_path(request, 5, 5, True)
        if 'x-timestamp' not in request.headers or \
                not check_float(request.headers['x-timestamp']):
            return HTTPBadRequest(body='Missing timestamp', request=request,
                                  content_type='text/plain')
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
            orig_metadata = disk_file.read_metadata()
        except DiskFileExpired as e:
            orig_timestamp = e.timestamp
            orig_metadata = e.metadata
            response_class = HTTPNotFound
        except DiskFileDeleted as e:
            orig_timestamp = e.timestamp
            orig_metadata = {}
            response_class = HTTPNotFound
        except (DiskFileNotExist, DiskFileQuarantined):
            orig_timestamp = 0
            orig_metadata = {}
            response_class = HTTPNotFound
        else:
            orig_timestamp = orig_metadata.get('X-Timestamp', 0)
            if orig_timestamp < request.headers['x-timestamp']:
                response_class = HTTPNoContent
            else:
                response_class = HTTPConflict
        orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
        try:
            req_if_delete_at_val = request.headers['x-if-delete-at']
            req_if_delete_at = int(req_if_delete_at_val)
        except KeyError:
            pass
        except ValueError:
            return HTTPBadRequest(
                request=request,
                body='Bad X-If-Delete-At header value')
        else:
            if orig_delete_at != req_if_delete_at:
                return HTTPPreconditionFailed(
                    request=request,
                    body='X-If-Delete-At and X-Delete-At do not match')
        if orig_delete_at:
            self.delete_at_update('DELETE', orig_delete_at, account,
                                  container, obj, request, device)
        req_timestamp = request.headers['X-Timestamp']
        if orig_timestamp < req_timestamp:
            disk_file.delete(req_timestamp)
            self.container_update(
                'DELETE', account, container, obj, request,
                HeaderKeyDict({'x-timestamp': req_timestamp}),
                device)
        return response_class(request=request)

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATE(self, request):
        """
        Handle REPLICATE requests for the Swift Object Server.  This is used
        by the object replicator to get hashes for directories.
        """
        device, partition, suffix = split_and_validate_path(
            request, 2, 3, True)
        try:
            hashes = self._diskfile_mgr.get_hashes(device, partition, suffix)
        except DiskFileDeviceUnavailable:
            resp = HTTPInsufficientStorage(drive=device, request=request)
        else:
            resp = Response(body=pickle.dumps(hashes))
        return resp

    @public
    @replication
    @timing_stats(sample_rate=0.1)
    def REPLICATION(self, request):
        return Response(app_iter=ssync_receiver.Receiver(self, request)())

    def __call__(self, env, start_response):
        """WSGI Application entry point for the Swift Object Server."""
        start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get('x-trans-id', None)

        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL')
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    method = getattr(self, req.method)
                    getattr(method, 'publicly_accessible')
                    replication_method = getattr(method, 'replication', False)
                    if (self.replication_server is not None and
                            self.replication_server != replication_method):
                        raise AttributeError('Not allowed method.')
                except AttributeError:
                    res = HTTPMethodNotAllowed()
                else:
                    res = method(req)
            except DiskFileCollision:
                res = HTTPForbidden(request=req)
            except HTTPException as error_response:
                res = error_response
            except (Exception, Timeout):
                self.logger.exception(_(
                    'ERROR __call__ error with %(method)s'
                    ' %(path)s '), {'method': req.method, 'path': req.path})
                res = HTTPInternalServerError(body=traceback.format_exc())
        trans_time = time.time() - start_time
        if self.log_requests:
            log_line = get_log_line(req, res, trans_time, '')
            if req.method in ('REPLICATE', 'REPLICATION') or \
                    'X-Backend-Replication' in req.headers:
                self.logger.debug(log_line)
            else:
                self.logger.info(log_line)
        if req.method in ('PUT', 'DELETE'):
            slow = self.slow - trans_time
            if slow > 0:
                sleep(slow)
        return res(env, start_response)
Beispiel #15
0
class MetadataController(object):
    """"
    WSGI Controller for metadata server
    """
    save_headers = [
        'x-metadata-read',
        'x-metadata-write',
        'x-metadata-sync-key',
        'x-metadata-sync-to'
    ]

    def __init__(self, conf, logger=None):
        # location/directory of the metadata database (meta.db)
        self.location = conf.get('location', '/srv/node/sdb1/metadata/')
        # path the the actual file
        #self.db_file = os.path.join(self.location, 'meta.db')
        self.logger = logger or get_logger(conf, log_route='metadata-server')
        self.root = conf.get('devices', '/srv/node')
        #workaround for device listings
        self.node_count = conf.get('nodecount','8')
        self.devicelist = []
        for x in range(0,int(self.node_count)):
            self.devicelist.append(conf.get('device'+str(x),''))
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.node_timeout = int(conf.get('node_timeout', 3))
        self.conn_timeout = float(conf.get('node_timeout', 3))
        replication_server = conf.get('replication_server', None)
        if replication_server is not None:
            replication_server = config_true_value(replication_server)
        self.replication_server = replication_server
        self.allowed_sync_hosts = [
            h.strip()
            for h in conf.get('allowed_sync_hosts', '127.0.0.1').split(',')
            if h.strip()
        ]
        self.replicator_rpc = ReplicatorRpc(
            self.root,
            DATADIR,
            MetadataBroker,
            self.mount_check,
            logger=self.logger
        )
        self.diskfile_mgr = DiskFileManager(conf,self.logger)

        self.db_ip = conf.get('db_ip', '127.0.0.1')
        self.db_port = int(conf.get('db_port', 2424))
        self.db_user = conf.get('db_user', 'root')
        self.db_pw = conf.get('db_pw', 'root')

        if config_true_value(conf.get('allow_versions', 'f')):
            self.save_headers.append('x-versions-location')

        swift.common.db.DB_PREALLOCATION = config_true_value(
            conf.get('db_preallocation', 'f'))
        self.broker = self._get_metadata_broker()#
        self.broker.initialize()#
    def _get_metadata_broker(self, **kwargs):
        """
        Returns an instance of the DB abstraction layer object (broker)
        """
        return MetadataBroker(self.db_ip,self.db_port,self.db_user,self.db_pw)

    def check_attrs(self, attrs, acc, con, obj):
        """
        Verify that attributes are valid
        Checks the attr list against a list of system attributes
        Allows for custom metadata.

        returns: boolean wether the attrs are valid
        """
        for attr in attrs.split(','):
            if attr.startswith('object_meta') or \
                    attr.startswith('container_meta') or \
                    attr.startswith('account_meta'):
                pass
            elif attr not in [
                    'object_uri',
                    'object_name',
                    'object_account_name',
                    'object_container_name',
                    'object_location',
                    'object_uri_create_time',
                    'object_last_modified_time',
                    'object_last_changed_time',
                    'object_delete_time',
                    'object_last_activity_time',
                    'object_etag_hash',
                    'object_content_type',
                    'object_content_length',
                    'object_content_encoding',
                    'object_content_disposition',
                    'object_content_language',
                    'object_cache_control',
                    'object_delete_at',
                    'object_manifest_type',
                    'object_manifest',
                    'object_access_control_allow_origin',
                    'object_access_control_allow_credentials',
                    'object_access_control_expose_headers',
                    'object_access_control_max_age',
                    'object_access_control_allow_methods',
                    'object_access_control_allow_headers',
                    'object_origin',
                    'object_access_control_request_method',
                    'object_access_control_request_headers',
                    'object_meta',
                    'container_uri',
                    'container_name',
                    'container_account_name',
                    'container_create_time',
                    'container_last_modified_time',
                    'container_last_changed_time',
                    'container_delete_time',
                    'container_last_activity_time',
                    'container_read_permissions',
                    'container_write_permissions',
                    'container_sync_to',
                    'container_sync_key',
                    'container_versions_location',
                    'container_object_count',
                    'container_bytes_used',
                    'container_meta',
                    'account_uri',
                    'account_name',
                    'account_tenant_id',
                    'account_first_use_time',
                    'account_last_modified_time',
                    'account_last_changed_time',
                    'account_delete_time',
                    'account_last_activity_time',
                    'account_container_count',
                    'account_object_count',
                    'account_bytes_used',
                    'account_meta',
                    'all_attrs',
                    'all_system_attrs',
                    'all_meta_attrs',
                    'all_account_attrs'
                    'all_account_system_attrs',
                    'all_account_meta_attrs',
                    'all_container_attrs',
                    'all_container_system_attrs'
                    'all_container_meta_attrs',
                    'all_object_attrs',
                    'all_object_system_attrs'
                    'all_object_meta_attrs']:
                return False
        return True

    @public
    @timing_stats()
    def GET(self, req):
        """
        Handle HTTP GET requests
        Build SQL queries piece by piece and then execute
        Custom attributes need to be handled specially, since they exist
        in a seperate table
        """
        #broker = self._get_metadata_broker()
	#broker.initialize()

        base_version, acc, con, obj = split_path(req.path, 1, 4, True)
        if 'sorted' in req.headers:
            sort_value_list = req.headers['sorted']
            if sort_value_list == '':
                sort_value_list = 'uri'
            toSort = True
        else:
            toSort = False
        if 'attributes' in req.headers:
            attrs = req.headers['attributes']
        # if there is no attributes lists, include everything in scope
        # since no attributes passed in, there can be
        #  things from multiple levels of scope
        else:
            attrs = "object_uri,container_uri,account_uri"

        attrs, all_obj_meta, all_con_meta, all_acc_meta = \
            eval_superset(attrs.split(","))
        format = "text/plain"
        if self.check_attrs(attrs, acc, con, obj) or attrs == '':
            accAttrs, conAttrs, objAttrs, superAttrs, customAttrs = \
                split_attrs_by_scope(attrs)

            """
            If we have a thing from which we don't request any sys attrs
            Then we need to add its uri so that it appears in the list
            returned from the query. After we query for custom attrs,
            we need to delete any thing that is empty.
            """
            if all_obj_meta and objAttrs == "":
                objAttrs = "object_uri"
            if all_con_meta and conAttrs == "":
                conAttrs = "container_uri"
            if all_acc_meta and accAttrs == "":
                accAttrs = "account_uri"

            # Builds initial query containing the
            # split attributes for each item type
            accQuery = self.broker.get_attributes_query(acc, con, obj, accAttrs)
            conQuery = self.broker.get_attributes_query(acc, con, obj, conAttrs)
            objQuery = self.broker.get_attributes_query(acc, con, obj, objAttrs)

            # If there is a query in the request add it to the end
            # of the WHERE clause of the SQL
            if 'query' in req.headers:
                query = req.headers['query']
                accQuery = self.broker.get_uri_query(accQuery, query)
                conQuery = self.broker.get_uri_query(conQuery, query)
                objQuery = self.broker.get_uri_query(objQuery, query)

            # if successful query add the results to the end of the
            # accumulator list
            ret = []
            if not accQuery.startswith("BAD"):
                ret.extend(self.broker.execute_query(
                    accQuery, acc, con, obj,
                    'account_uri' in attrs.split(',')))
            if not conQuery.startswith("BAD"):
                ret.extend(self.broker.execute_query(
                    conQuery, acc, con, obj,
                    'container_uri' in attrs.split(',')))
            if not objQuery.startswith("BAD"):
                ret.extend(self.broker.execute_query(
                    objQuery, acc, con, obj,
                    'object_uri' in attrs.split(',')))

            # query the custom table
            ret = self.broker.custom_attributes_query(
                customAttrs, ret, all_obj_meta, all_con_meta, all_acc_meta)

            """
            Do the deletion thing mentioned above
            """
            ret = [x for x in ret if x[x.keys()[0]] != {}]

            if toSort:
                sorter = Sort_metadata()
                ret = sorter.sort_data(ret, sort_value_list.split(","))

            # default format is plain text
            # can choose between json/xml as well
            # no error handling done right now
            # just default everything to plain if spelling error
            if "format" in req.headers:
                if req.headers['format'] == "json":
                    format = "application/json"
                    out = output_json(ret)
                elif req.headers['format'] == "xml":
                    format = "application/xml"
                    out = output_xml(ret)
                else:
                    out = output_plain(ret)
            else:
                out = output_plain(ret)
            status = 200

        else:
            out = "One or more attributes not supported"
            status = 400
            format = "text/plain"

        # Returns the HTTP Response object with the result of the API request
        return Response(
            request=req, body=out + "\n", content_type=format, status=status)

    @public
    #@timing_stats()
    #TODO: reorganize code to generalize repeated calls
    #TODO: abstract data/object type names for generic calls
    def PUT(self, req):
        version, acc, con, obj = split_path(req.path, 1, 4, True)
        stor_policy = req.headers['storage_policy']
        ring = POLICIES.get_object_ring(stor_policy, '/etc/swift')
	#broker = self._get_metadata_broker()
	#broker.initialize()

        #Handle Container PUT
        if not obj:
            hsh = hash_path(acc, con)
            part = ring.get_part(acc, con)
            db_dir = storage_directory(swift.container.backend.DATADIR, part, hsh)
            nodes = ring.get_part_nodes(part)
            for node in nodes:
                for item in self.devicelist:
                    if node['device'] in item:
                        try:
                            path = os.path.join(self.root + item, db_dir, hsh + '.db')
                            #TODO: move kwargs
                            kwargs = {'account':acc, 'container':con, 'logger':self.logger}
                            md_broker= swift.container.backend.ContainerBroker(path, **kwargs)
                            md = md_broker.get_info()
                            md.update(
                                (key, value)
                                for key, (value, timestamp) in md_broker.metadata.iteritems()
                                if value != '' and is_sys_or_user_meta('container', key))
                            sys_md = format_con_metadata(md)
                            user_md = format_custom_metadata(md)
                            if 'X-Container-Read' in req.headers:
                                sys_md['container_read_permissions'] = req.headers['X-Container-Read']
                            if 'X-Container-Write' in req.headers:
                                sys_md['container_write_permissions'] = req.headers['X-Container-Write']
                            #TODO: insert container_last_activity_time
                            #TODO: split meta user/sys
                            #TODO: insert meta
                            self.broker.insert_container_md([sys_md])
                            return
                        except DatabaseConnectionError as e:
                            self.logger.warn("DatabaseConnectionError: " + e.path + "\n")
                            pass
                        except:
                            self.logger.warn("%s: %s\n"%(str(sys.exc_info()[0]),str(sys.exc_info()[1])))
                            pass
        #handle object PUT
        else:
            part = ring.get_part(acc, con, obj)
            nodes = ring.get_part_nodes(part)
            for node in nodes:
                for item in self.devicelist:
                    if node['device'] in item:
                        try:
                            df = self.diskfile_mgr.get_diskfile(item, part, acc, con, obj, stor_policy)
                            md = df.read_metadata()
                            sys_md = format_obj_metadata(md)
                            #df._data_file is a direct path to the objects data
                            sys_md['object_location'] = df._data_file
                            user_md = format_custom_metadata(md)
                            #TODO: insert user meta and sys meta
                            self.broker.insert_object_md([sys_md])
                        except:
                            self.logger.warn("%s: %s\n"%(str(sys.exc_info()[0]),str(sys.exc_info()[1])))
                            pass
        return

    @public
    @timing_stats()
    def DELETE(self, req):
        version, acc, con, obj = split_path(req.path, 1, 4, True)
        timestamp = Timestamp(time.time()).isoformat()
        data_type = ''
        md = {}
        if not con and not obj:
            #do nothing. accounts cannot be deleted
            return
        elif not obj:
            md = format_con_metadata(md)
            md['container_delete_time'] = timestamp
            md['container_last_activity_time'] = timestamp
            data_type = 'container'
            for item in \
                (data_type + '_uri', data_type + '_name'):
                if item in md:
                    del md[item]
            #TODO: overwrite container metadata
            #TODO: delete container custom metadata
        else:
            md = format_obj_metadata(md)
            md['object_delete_time'] = timestamp
            md['object_last_activity_time'] = timestamp
            data_type = 'object'
            for item in \
                (data_type + '_uri', data_type + '_name'):
                if item in md:
                    del md[item]
            #TODO: overwrite object metadata
            #TODO: delete object user meta
        return
        
            
            
    #TODO: generalize strings used for repeat calls
    @public
    #@timing_stats()
    def POST(self, req):
        version, acc, con, obj = split_path(req.path, 1, 4, True)
        stor_policy = req.headers['storage_policy']
        ring = POLICIES.get_object_ring(stor_policy, '/etc/swift')
        if not con and not obj:
            meta_type = 'account'
            kwargs = {'account':acc, 'logger':self.logger}
            data_dir = swift.account.backend.DATADIR
            hsh = hash_path(acc)
            part = ring.get_part(acc)
            db_dir = storage_directory(data_dir, part, hsh)
            nodes = ring.get_part_nodes(part)
            for node in nodes:
                for item in self.devicelist:
                    if node['device'] in item:
                        try:
                            path = os.path.join(self.root + item, db_dir, hsh + '.db')
                            broker = swift.account.backend.AccountBroker(path, **kwargs)
                            md = broker.get_info()
                            md.update(
                                (key, value)
                                for key, (value, timestamp) in broker.metadata.iteritems()
                                if value != '' and is_sys_or_user_meta(meta_type, key))
                            sys_md = format_acc_metadata(md)
                            user_md = format_custom_metadata(md)
                            #TODO: call overwrite_account_metadata
                            #TODO: call overwrite_custom_metadata
                            return
                        except:
                            self.logger.warn("%s: %s\n"%(str(sys.exc_info()[0]),str(sys.exc_info()[1])))
                            pass
        #Handle Container POST
        elif not obj:
            meta_type = 'container'
            kwargs = {'account':acc, 'container':con, 'logger':self.logger}
            data_dir = swift.container.backend.DATADIR
            try:
                hsh = hash_path(acc, con)
                part = ring.get_part(acc, con)
                db_dir = storage_directory(data_dir, part, hsh)
                nodes = ring.get_part_nodes(part)
                for node in nodes:
                    for item in self.devicelist:
                        if node['device'] in item:
                            try:
                                path = os.path.join(self.root + item, db_dir, hsh + '.db')
                                broker = swift.container.backend.ContainerBroker(path, **kwargs)
                                md = broker.get_info()
                                md.update(
                                    (key, value)
                                    for key, (value, timestamp) in broker.metadata.iteritems()
                                    if value != '' and is_sys_or_user_meta('container', key))
                                sys_md = format_con_metadata(md)
                                user_md = format_custom_metadata(md)
                                if 'X-Container-Read' in req.headers:
                                    sys_md['container_read_permissions'] = req.headers['X-Container-Read']
                                    if 'X-Container-Write' in req.headers:
                                        sys_md['container_write_permissions'] = req.headers['X-Container-Write']
                            #TODO: call overwrite_container_metadata
                            #TODO: call overwrite_custom_metadata
                                        return
                            except DatabaseConnectionError as e:
                                self.logger.warn("DatabaseConnectionError: " + e.path + "\n")
                                pass
            except:
                self.logger.warn("%s: %s\n"%(str(sys.exc_info()[0]),str(sys.exc_info()[1])))
                pass
        else:
            part = ring.get_part(acc, con, obj)
            nodes = ring.get_part_nodes(part)
            for node in nodes:
                for item in self.devicelist:
                    if node['device'] in item:
                        try:
                            df = self.diskfile_mgr.get_diskfile(item, part, acc, con, obj, stor_policy)
                            md = df.read_metadata()
                            sys_md = format_obj_metadata(md)
                            user_md = format_custom_metadata(md)
                            #TODO: call overwrite_object_metadata
                            #TODO: call overwrite_custom_metadata
                        except:
                            self.logger.warn("%s: %s\n"%(str(sys.exc_info()[0]),str(sys.exc_info()[1])))
                            pass
        return


    @public
    @timing_stats()
    def COPY(self, req):
        version, acc, con, obj = split_path(req.path, 1, 4, True)
        


    def __call__(self, env, start_response):
        """
        Boilerplate code for how the server's code gets called
        upon receiving a request.
        Taken directly from other servers.
        """
        # start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get('x-trans-id', None)
        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL')
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    method = getattr(self, req.method)
                    getattr(method, 'publicly_accessible')
                    replication_method = getattr(method, 'replication', False)
                    if (self.replication_server is not None and
                            self.replication_server != replication_method):
                        raise AttributeError('Not allowed method.')
                except AttributeError:
                    res = HTTPMethodNotAllowed()
                else:
                    res = method(req)
            except HTTPException as error_response:
                res = error_response
            except (Exception, Timeout):
                self.logger.exception(_(
                    'ERROR __call__ error with %(method)s %(path)s '),
                    {'method': req.method, 'path': req.path})
                res = HTTPInternalServerError(body=traceback.format_exc())
        # trans_time = '%.4f' % (time.time() - start_time)
        # if self.log_requests:
        #     log_message = '%s - - [%s] "%s %s" %s %s "%s" "%s" "%s" %s' % (
        #         req.remote_addr,
        #         time.strftime('%d/%b/%Y:%H:%M:%S +0000',
        #                       time.gmtime()),
        #         req.method, req.path,
        #         res.status.split()[0], res.content_length or '-',
        #         req.headers.get('x-trans-id', '-'),
        #         req.referer or '-', req.user_agent or '-',
        #         trans_time)
        #     if req.method.upper() == 'REPLICATE':
        #         self.logger.debug(log_message)
        #     else:
        #         self.logger.info(log_message)
        return res(env, start_response)
Beispiel #16
0
class TestAuditor(unittest.TestCase):

    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.rcache = os.path.join(self.testdir, 'object.recon')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        os.mkdir(os.path.join(self.devices, 'sdb'))

        # policy 0
        self.objects = os.path.join(self.devices, 'sda', get_data_dir(0))
        self.objects_2 = os.path.join(self.devices, 'sdb', get_data_dir(0))
        os.mkdir(self.objects)
        # policy 1
        self.objects_p1 = os.path.join(self.devices, 'sda', get_data_dir(1))
        self.objects_2_p1 = os.path.join(self.devices, 'sdb', get_data_dir(1))
        os.mkdir(self.objects_p1)

        self.parts = self.parts_p1 = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            self.parts_p1[part] = os.path.join(self.objects_p1, part)
            os.mkdir(os.path.join(self.objects, part))
            os.mkdir(os.path.join(self.objects_p1, part))

        self.conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)

        # diskfiles for policy 0, 1
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', 0)
        self.disk_file_p1 = self.df_mgr.get_diskfile('sda', '0', 'a', 'c',
                                                     'o', 1)

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_worker_conf_parms(self):
        def check_common_defaults():
            self.assertEquals(auditor_worker.max_bytes_per_second, 10000000)
            self.assertEquals(auditor_worker.log_time, 3600)

        # test default values
        conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices)
        check_common_defaults()
        self.assertEquals(auditor_worker.diskfile_mgr.disk_chunk_size, 65536)
        self.assertEquals(auditor_worker.max_files_per_second, 20)
        self.assertEquals(auditor_worker.zero_byte_only_at_fps, 0)

        # test specified audit value overrides
        conf.update({'disk_chunk_size': 4096})
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices,
                                               zero_byte_only_at_fps=50)
        check_common_defaults()
        self.assertEquals(auditor_worker.diskfile_mgr.disk_chunk_size, 4096)
        self.assertEquals(auditor_worker.max_files_per_second, 50)
        self.assertEquals(auditor_worker.zero_byte_only_at_fps, 50)

    def test_object_audit_extra_data(self):
        def run_tests(disk_file):
            auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                                   self.rcache, self.devices)
            data = '0' * 1024
            etag = md5()
            with disk_file.create() as writer:
                writer.write(data)
                etag.update(data)
                etag = etag.hexdigest()
                timestamp = str(normalize_timestamp(time.time()))
                metadata = {
                    'ETag': etag,
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)
                pre_quarantines = auditor_worker.quarantines

                auditor_worker.object_audit(
                    AuditLocation(disk_file._datadir, 'sda', '0'))
                self.assertEquals(auditor_worker.quarantines, pre_quarantines)

                os.write(writer._fd, 'extra_data')

                auditor_worker.object_audit(
                    AuditLocation(disk_file._datadir, 'sda', '0'))
                self.assertEquals(auditor_worker.quarantines,
                                  pre_quarantines + 1)
        run_tests(self.disk_file)
        run_tests(self.disk_file_p1)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        data = '0' * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update('1' + '0' * 1023)
        etag = etag.hexdigest()
        metadata['ETag'] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        fp = open(path, 'w')
        fp.write('0' * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0'))
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch.object(DiskFileManager,
                               'get_diskfile_from_audit_location', blowup):
            self.assertRaises(NameError, auditor_worker.object_audit,
                              AuditLocation(os.path.dirname(path), 'sda', '0'))

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch('swift.obj.diskfile.DiskFile', blowup):
            auditor_worker.failsafe_object_audit(
                AuditLocation(os.path.dirname(path), 'sda', '0'))
        self.assertEquals(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        with mock.patch('swift.obj.diskfile.DiskFile', lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024

        def write_file(df):
            etag = md5()
            with df.create() as writer:
                writer.write(data)
                etag.update(data)
                etag = etag.hexdigest()
                metadata = {
                    'ETag': etag,
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)

        # policy 0
        write_file(self.disk_file)
        # policy 1
        write_file(self.disk_file_p1)

        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        # 1 object per policy falls into 1024 bucket
        self.assertEquals(auditor_worker.stats_buckets[1024], 2)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)

        # pick up some additional code coverage, large file
        data = '0' * 1024 * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        # still have the 1024 byte object left in policy-1 (plus the
        # stats from the original 2)
        self.assertEquals(auditor_worker.stats_buckets[1024], 3)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)
        # and then policy-0 disk_file was re-written as a larger object
        self.assertEquals(auditor_worker.stats_buckets['OVER'], 1)

        # pick up even more additional code coverage, misc paths
        auditor_worker.log_time = -1
        auditor_worker.stats_sizes = []
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEquals(auditor_worker.quarantines, pre_quarantines)
        self.assertEquals(auditor_worker.stats_buckets[1024], 3)
        self.assertEquals(auditor_worker.stats_buckets[10240], 0)
        self.assertEquals(auditor_worker.stats_buckets['OVER'], 1)

    def test_object_run_logging(self):
        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger,
                                               self.rcache, self.devices)
        auditor_worker.audit_all_objects(device_dirs=['sda'])
        log_lines = logger.get_lines_for_level('info')
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index('ALL - parallel, sda'))

        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger,
                                               self.rcache, self.devices,
                                               zero_byte_only_at_fps=50)
        auditor_worker.audit_all_objects(device_dirs=['sda'])
        log_lines = logger.get_lines_for_level('info')
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index('ZBF - sda'))

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob')
        data = '1' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            etag = md5()
            etag.update('1' + '0' * 1023)
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.isdir(quarantine_path))
        del(kwargs['zero_byte_fps'])
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, with_ts=False):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        ts_file_path = ''
        if with_ts:
            name_hash = hash_path('a', 'c', 'o')
            dir_path = os.path.join(
                self.devices, 'sda',
                storage_directory(get_data_dir(0), '0', name_hash))
            ts_file_path = os.path.join(dir_path, '99999.ts')
            if not os.path.exists(dir_path):
                mkdirs(dir_path)
            fp = open(ts_file_path, 'w')
            write_metadata(fp, {'X-Timestamp': '99999', 'name': '/a/c/o'})
            fp.close()

        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': 10,
            }
            writer.put(metadata)
            etag = md5()
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)
        return ts_file_path

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):

            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        was_df = auditor.diskfile.DiskFile
        try:
            auditor.diskfile.DiskFile = FakeFile
            kwargs = {'mode': 'once'}
            kwargs['zero_byte_fps'] = 50
            self.auditor.run_audit(**kwargs)
            quarantine_path = os.path.join(self.devices,
                                           'sda', 'quarantined', 'objects')
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])
        finally:
            auditor.diskfile.DiskFile = was_df

    def test_with_tombstone(self):
        ts_file_path = self.setup_bad_zero_byte(with_ts=True)
        self.assertTrue(ts_file_path.endswith('ts'))
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.exists(ts_file_path))

    def test_sleeper(self):
        with mock.patch(
                'time.sleep', mock.MagicMock()) as mock_sleep:
            auditor.SLEEP_BETWEEN_AUDITS = 0.10
            my_auditor = auditor.ObjectAuditor(self.conf)
            my_auditor._sleep()
            mock_sleep.assert_called_with(auditor.SLEEP_BETWEEN_AUDITS)

    def test_run_parallel_audit(self):

        class StopForever(Exception):
            pass

        class Bogus(Exception):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            check_device_dir = None
            fork_called = 0
            master = 0
            wait_called = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs
                if 'zero_byte_fps' in kwargs:
                    self.check_device_dir = kwargs.get('device_dirs')

            def mock_sleep_stop(self):
                raise StopForever('stop')

            def mock_sleep_continue(self):
                return

            def mock_audit_loop_error(self, parent, zbo_fps,
                                      override_devices=None, **kwargs):
                raise Bogus('exception')

            def mock_fork(self):
                self.fork_called += 1
                if self.master:
                    return self.fork_called
                else:
                    return 0

            def mock_wait(self):
                self.wait_called += 1
                return (self.wait_called, 0)

        for i in string.ascii_letters[2:26]:
            mkdirs(os.path.join(self.devices, 'sd%s' % i))

        my_auditor = auditor.ObjectAuditor(dict(devices=self.devices,
                                                mount_check='false',
                                                zero_byte_files_per_second=89,
                                                concurrency=1))

        mocker = ObjectAuditorMock()
        my_auditor.logger.exception = mock.MagicMock()
        real_audit_loop = my_auditor.audit_loop
        my_auditor.audit_loop = mocker.mock_audit_loop_error
        my_auditor.run_audit = mocker.mock_run
        was_fork = os.fork
        was_wait = os.wait
        os.fork = mocker.mock_fork
        os.wait = mocker.mock_wait
        try:
            my_auditor._sleep = mocker.mock_sleep_stop
            my_auditor.run_once(zero_byte_fps=50)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: exception')
            my_auditor.logger.exception.reset_mock()
            self.assertRaises(StopForever, my_auditor.run_forever)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: exception')
            my_auditor.audit_loop = real_audit_loop

            self.assertRaises(StopForever,
                              my_auditor.run_forever, zero_byte_fps=50)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 50)
            self.assertEquals(mocker.fork_called, 0)

            self.assertRaises(SystemExit, my_auditor.run_once)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEquals(mocker.check_device_dir, [])
            self.assertEquals(mocker.check_args, ())

            device_list = ['sd%s' % i for i in string.ascii_letters[2:10]]
            device_string = ','.join(device_list)
            device_string_bogus = device_string + ',bogus'

            mocker.fork_called = 0
            self.assertRaises(SystemExit, my_auditor.run_once,
                              devices=device_string_bogus)
            self.assertEquals(mocker.fork_called, 1)
            self.assertEquals(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEquals(sorted(mocker.check_device_dir), device_list)

            mocker.master = 1

            mocker.fork_called = 0
            self.assertRaises(StopForever, my_auditor.run_forever)
            # Fork is called 2 times since the zbf process is forked just
            # once before self._sleep() is called and StopForever is raised
            # Also wait is called just once before StopForever is raised
            self.assertEquals(mocker.fork_called, 2)
            self.assertEquals(mocker.wait_called, 1)

            my_auditor._sleep = mocker.mock_sleep_continue

            my_auditor.concurrency = 2
            mocker.fork_called = 0
            mocker.wait_called = 0
            my_auditor.run_once()
            # Fork is called no. of devices + (no. of devices)/2 + 1 times
            # since zbf process is forked (no.of devices)/2 + 1 times
            no_devices = len(os.listdir(self.devices))
            self.assertEquals(mocker.fork_called, no_devices + no_devices / 2
                              + 1)
            self.assertEquals(mocker.wait_called, no_devices + no_devices / 2
                              + 1)

        finally:
            os.fork = was_fork
            os.wait = was_wait
Beispiel #17
0
class TestAuditor(unittest.TestCase):

    def setUp(self):
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_auditor')
        self.devices = os.path.join(self.testdir, 'node')
        self.rcache = os.path.join(self.testdir, 'object.recon')
        self.logger = FakeLogger()
        rmtree(self.testdir, ignore_errors=1)
        mkdirs(os.path.join(self.devices, 'sda'))
        os.mkdir(os.path.join(self.devices, 'sdb'))

        # policy 0
        self.objects = os.path.join(self.devices, 'sda',
                                    get_data_dir(POLICIES[0]))
        self.objects_2 = os.path.join(self.devices, 'sdb',
                                      get_data_dir(POLICIES[0]))
        os.mkdir(self.objects)
        # policy 1
        self.objects_p1 = os.path.join(self.devices, 'sda',
                                       get_data_dir(POLICIES[1]))
        self.objects_2_p1 = os.path.join(self.devices, 'sdb',
                                         get_data_dir(POLICIES[1]))
        os.mkdir(self.objects_p1)

        self.parts = self.parts_p1 = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            self.parts_p1[part] = os.path.join(self.objects_p1, part)
            os.mkdir(os.path.join(self.objects, part))
            os.mkdir(os.path.join(self.objects_p1, part))

        self.conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        self.df_mgr = DiskFileManager(self.conf, self.logger)

        # diskfiles for policy 0, 1
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o',
                                                  policy=POLICIES[0])
        self.disk_file_p1 = self.df_mgr.get_diskfile('sda', '0', 'a', 'c',
                                                     'o', policy=POLICIES[1])

    def tearDown(self):
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)
        unit.xattr_data = {}

    def test_worker_conf_parms(self):
        def check_common_defaults():
            self.assertEqual(auditor_worker.max_bytes_per_second, 10000000)
            self.assertEqual(auditor_worker.log_time, 3600)

        # test default values
        conf = dict(
            devices=self.devices,
            mount_check='false',
            object_size_stats='10,100,1024,10240')
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices)
        check_common_defaults()
        self.assertEqual(auditor_worker.diskfile_mgr.disk_chunk_size, 65536)
        self.assertEqual(auditor_worker.max_files_per_second, 20)
        self.assertEqual(auditor_worker.zero_byte_only_at_fps, 0)

        # test specified audit value overrides
        conf.update({'disk_chunk_size': 4096})
        auditor_worker = auditor.AuditorWorker(conf, self.logger,
                                               self.rcache, self.devices,
                                               zero_byte_only_at_fps=50)
        check_common_defaults()
        self.assertEqual(auditor_worker.diskfile_mgr.disk_chunk_size, 4096)
        self.assertEqual(auditor_worker.max_files_per_second, 50)
        self.assertEqual(auditor_worker.zero_byte_only_at_fps, 50)

    def test_object_audit_extra_data(self):
        def run_tests(disk_file):
            auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                                   self.rcache, self.devices)
            data = '0' * 1024
            etag = md5()
            with disk_file.create() as writer:
                writer.write(data)
                etag.update(data)
                etag = etag.hexdigest()
                timestamp = str(normalize_timestamp(time.time()))
                metadata = {
                    'ETag': etag,
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)
                pre_quarantines = auditor_worker.quarantines

                auditor_worker.object_audit(
                    AuditLocation(disk_file._datadir, 'sda', '0',
                                  policy=POLICIES.legacy))
                self.assertEqual(auditor_worker.quarantines, pre_quarantines)

                os.write(writer._fd, 'extra_data')

                auditor_worker.object_audit(
                    AuditLocation(disk_file._datadir, 'sda', '0',
                                  policy=POLICIES.legacy))
                self.assertEqual(auditor_worker.quarantines,
                                 pre_quarantines + 1)
        run_tests(self.disk_file)
        run_tests(self.disk_file_p1)

    def test_object_audit_diff_data(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        data = '0' * 1024
        etag = md5()
        timestamp = str(normalize_timestamp(time.time()))
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            pre_quarantines = auditor_worker.quarantines

        # remake so it will have metadata
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o',
                                                  policy=POLICIES.legacy)

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0',
                          policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        etag = md5()
        etag.update('1' + '0' * 1023)
        etag = etag.hexdigest()
        metadata['ETag'] = etag

        with self.disk_file.create() as writer:
            writer.write(data)
            writer.put(metadata)

        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0',
                          policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_no_meta(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        fp = open(path, 'w')
        fp.write('0' * 1024)
        fp.close()
        invalidate_hash(os.path.dirname(self.disk_file._datadir))
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        pre_quarantines = auditor_worker.quarantines
        auditor_worker.object_audit(
            AuditLocation(self.disk_file._datadir, 'sda', '0',
                          policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_audit_will_not_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch.object(DiskFileManager,
                               'get_diskfile_from_audit_location', blowup):
            self.assertRaises(NameError, auditor_worker.object_audit,
                              AuditLocation(os.path.dirname(path), 'sda', '0',
                                            policy=POLICIES.legacy))

    def test_failsafe_object_audit_will_swallow_errors_in_tests(self):
        timestamp = str(normalize_timestamp(time.time()))
        path = os.path.join(self.disk_file._datadir, timestamp + '.data')
        mkdirs(self.disk_file._datadir)
        with open(path, 'w') as f:
            write_metadata(f, {'name': '/a/c/o'})
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)

        def blowup(*args):
            raise NameError('tpyo')
        with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls',
                        blowup):
            auditor_worker.failsafe_object_audit(
                AuditLocation(os.path.dirname(path), 'sda', '0',
                              policy=POLICIES.legacy))
        self.assertEqual(auditor_worker.errors, 1)

    def test_generic_exception_handling(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_errors = auditor_worker.errors
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls',
                        lambda *_: 1 / 0):
            auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.errors, pre_errors + 1)

    def test_object_run_once_pass(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        auditor_worker.log_time = 0
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 1024

        def write_file(df):
            etag = md5()
            with df.create() as writer:
                writer.write(data)
                etag.update(data)
                etag = etag.hexdigest()
                metadata = {
                    'ETag': etag,
                    'X-Timestamp': timestamp,
                    'Content-Length': str(os.fstat(writer._fd).st_size),
                }
                writer.put(metadata)

        # policy 0
        write_file(self.disk_file)
        # policy 1
        write_file(self.disk_file_p1)

        auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        # 1 object per policy falls into 1024 bucket
        self.assertEqual(auditor_worker.stats_buckets[1024], 2)
        self.assertEqual(auditor_worker.stats_buckets[10240], 0)

        # pick up some additional code coverage, large file
        data = '0' * 1024 * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        # still have the 1024 byte object left in policy-1 (plus the
        # stats from the original 2)
        self.assertEqual(auditor_worker.stats_buckets[1024], 3)
        self.assertEqual(auditor_worker.stats_buckets[10240], 0)
        # and then policy-0 disk_file was re-written as a larger object
        self.assertEqual(auditor_worker.stats_buckets['OVER'], 1)

        # pick up even more additional code coverage, misc paths
        auditor_worker.log_time = -1
        auditor_worker.stats_sizes = []
        auditor_worker.audit_all_objects(device_dirs=['sda', 'sdb'])
        self.assertEqual(auditor_worker.quarantines, pre_quarantines)
        self.assertEqual(auditor_worker.stats_buckets[1024], 3)
        self.assertEqual(auditor_worker.stats_buckets[10240], 0)
        self.assertEqual(auditor_worker.stats_buckets['OVER'], 1)

    def test_object_run_logging(self):
        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger,
                                               self.rcache, self.devices)
        auditor_worker.audit_all_objects(device_dirs=['sda'])
        log_lines = logger.get_lines_for_level('info')
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index('ALL - parallel, sda'))

        logger = FakeLogger()
        auditor_worker = auditor.AuditorWorker(self.conf, logger,
                                               self.rcache, self.devices,
                                               zero_byte_only_at_fps=50)
        auditor_worker.audit_all_objects(device_dirs=['sda'])
        log_lines = logger.get_lines_for_level('info')
        self.assertTrue(len(log_lines) > 0)
        self.assertTrue(log_lines[0].index('ZBF - sda'))

    def test_object_run_once_no_sda(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_once_multi_devices(self):
        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                               self.rcache, self.devices)
        # pretend that we logged (and reset counters) just now
        auditor_worker.last_logged = time.time()
        timestamp = str(normalize_timestamp(time.time()))
        pre_quarantines = auditor_worker.quarantines
        data = '0' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
        auditor_worker.audit_all_objects()
        self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob',
                                                  policy=POLICIES.legacy)
        data = '1' * 10
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp,
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            os.write(writer._fd, 'extra_data')
        auditor_worker.audit_all_objects()
        self.assertEqual(auditor_worker.quarantines, pre_quarantines + 1)

    def test_object_run_fast_track_non_zero(self):
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        data = '0' * 1024
        etag = md5()
        with self.disk_file.create() as writer:
            writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': str(normalize_timestamp(time.time())),
                'Content-Length': str(os.fstat(writer._fd).st_size),
            }
            writer.put(metadata)
            etag = md5()
            etag.update('1' + '0' * 1023)
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.isdir(quarantine_path))
        del(kwargs['zero_byte_fps'])
        self.auditor.run_audit(**kwargs)
        self.assertTrue(os.path.isdir(quarantine_path))

    def setup_bad_zero_byte(self, timestamp=None):
        if timestamp is None:
            timestamp = Timestamp(time.time())
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.auditor.log_time = 0
        etag = md5()
        with self.disk_file.create() as writer:
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp.internal,
                'Content-Length': 10,
            }
            writer.put(metadata)
            etag = md5()
            etag = etag.hexdigest()
            metadata['ETag'] = etag
            write_metadata(writer._fd, metadata)

    def test_object_run_fast_track_all(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero(self):
        self.setup_bad_zero_byte()
        kwargs = {'mode': 'once'}
        kwargs['zero_byte_fps'] = 50
        self.auditor.run_audit(**kwargs)
        quarantine_path = os.path.join(self.devices,
                                       'sda', 'quarantined', 'objects')
        self.assertTrue(os.path.isdir(quarantine_path))

    def test_object_run_fast_track_zero_check_closed(self):
        rat = [False]

        class FakeFile(DiskFile):

            def _quarantine(self, data_file, msg):
                rat[0] = True
                DiskFile._quarantine(self, data_file, msg)

        self.setup_bad_zero_byte()
        with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls',
                        FakeFile):
            kwargs = {'mode': 'once'}
            kwargs['zero_byte_fps'] = 50
            self.auditor.run_audit(**kwargs)
            quarantine_path = os.path.join(self.devices,
                                           'sda', 'quarantined', 'objects')
            self.assertTrue(os.path.isdir(quarantine_path))
            self.assertTrue(rat[0])

    @mock.patch.object(auditor.ObjectAuditor, 'run_audit')
    @mock.patch('os.fork', return_value=0)
    def test_with_inaccessible_object_location(self, mock_os_fork,
                                               mock_run_audit):
        # Need to ensure that any failures in run_audit do
        # not prevent sys.exit() from running.  Otherwise we get
        # zombie processes.
        e = OSError('permission denied')
        mock_run_audit.side_effect = e
        self.auditor = auditor.ObjectAuditor(self.conf)
        self.assertRaises(SystemExit, self.auditor.fork_child, self)

    def test_with_only_tombstone(self):
        # sanity check that auditor doesn't touch solitary tombstones
        ts_iter = make_timestamp_iter()
        self.setup_bad_zero_byte(timestamp=ts_iter.next())
        self.disk_file.delete(ts_iter.next())
        files = os.listdir(self.disk_file._datadir)
        self.assertEqual(1, len(files))
        self.assertTrue(files[0].endswith('ts'))
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        files_after = os.listdir(self.disk_file._datadir)
        self.assertEqual(files, files_after)

    def test_with_tombstone_and_data(self):
        # rsync replication could leave a tombstone and data file in object
        # dir - verify they are both removed during audit
        ts_iter = make_timestamp_iter()
        ts_tomb = ts_iter.next()
        ts_data = ts_iter.next()
        self.setup_bad_zero_byte(timestamp=ts_data)
        tomb_file_path = os.path.join(self.disk_file._datadir,
                                      '%s.ts' % ts_tomb.internal)
        with open(tomb_file_path, 'wb') as fd:
            write_metadata(fd, {'X-Timestamp': ts_tomb.internal})
        files = os.listdir(self.disk_file._datadir)
        self.assertEqual(2, len(files))
        self.assertTrue(os.path.basename(tomb_file_path) in files, files)
        kwargs = {'mode': 'once'}
        self.auditor.run_audit(**kwargs)
        self.assertFalse(os.path.exists(self.disk_file._datadir))

    def test_sleeper(self):
        with mock.patch(
                'time.sleep', mock.MagicMock()) as mock_sleep:
            auditor.SLEEP_BETWEEN_AUDITS = 0.10
            my_auditor = auditor.ObjectAuditor(self.conf)
            my_auditor._sleep()
            mock_sleep.assert_called_with(auditor.SLEEP_BETWEEN_AUDITS)

    def test_run_parallel_audit(self):

        class StopForever(Exception):
            pass

        class Bogus(Exception):
            pass

        class ObjectAuditorMock(object):
            check_args = ()
            check_kwargs = {}
            check_device_dir = None
            fork_called = 0
            master = 0
            wait_called = 0

            def mock_run(self, *args, **kwargs):
                self.check_args = args
                self.check_kwargs = kwargs
                if 'zero_byte_fps' in kwargs:
                    self.check_device_dir = kwargs.get('device_dirs')

            def mock_sleep_stop(self):
                raise StopForever('stop')

            def mock_sleep_continue(self):
                return

            def mock_audit_loop_error(self, parent, zbo_fps,
                                      override_devices=None, **kwargs):
                raise Bogus('exception')

            def mock_fork(self):
                self.fork_called += 1
                if self.master:
                    return self.fork_called
                else:
                    return 0

            def mock_wait(self):
                self.wait_called += 1
                return (self.wait_called, 0)

        for i in string.ascii_letters[2:26]:
            mkdirs(os.path.join(self.devices, 'sd%s' % i))

        my_auditor = auditor.ObjectAuditor(dict(devices=self.devices,
                                                mount_check='false',
                                                zero_byte_files_per_second=89,
                                                concurrency=1))

        mocker = ObjectAuditorMock()
        my_auditor.logger.exception = mock.MagicMock()
        real_audit_loop = my_auditor.audit_loop
        my_auditor.audit_loop = mocker.mock_audit_loop_error
        my_auditor.run_audit = mocker.mock_run
        was_fork = os.fork
        was_wait = os.wait
        os.fork = mocker.mock_fork
        os.wait = mocker.mock_wait
        try:
            my_auditor._sleep = mocker.mock_sleep_stop
            my_auditor.run_once(zero_byte_fps=50)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: exception')
            my_auditor.logger.exception.reset_mock()
            self.assertRaises(StopForever, my_auditor.run_forever)
            my_auditor.logger.exception.assert_called_once_with(
                'ERROR auditing: exception')
            my_auditor.audit_loop = real_audit_loop

            self.assertRaises(StopForever,
                              my_auditor.run_forever, zero_byte_fps=50)
            self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 50)
            self.assertEqual(mocker.fork_called, 0)

            self.assertRaises(SystemExit, my_auditor.run_once)
            self.assertEqual(mocker.fork_called, 1)
            self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEqual(mocker.check_device_dir, [])
            self.assertEqual(mocker.check_args, ())

            device_list = ['sd%s' % i for i in string.ascii_letters[2:10]]
            device_string = ','.join(device_list)
            device_string_bogus = device_string + ',bogus'

            mocker.fork_called = 0
            self.assertRaises(SystemExit, my_auditor.run_once,
                              devices=device_string_bogus)
            self.assertEqual(mocker.fork_called, 1)
            self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 89)
            self.assertEqual(sorted(mocker.check_device_dir), device_list)

            mocker.master = 1

            mocker.fork_called = 0
            self.assertRaises(StopForever, my_auditor.run_forever)
            # Fork is called 2 times since the zbf process is forked just
            # once before self._sleep() is called and StopForever is raised
            # Also wait is called just once before StopForever is raised
            self.assertEqual(mocker.fork_called, 2)
            self.assertEqual(mocker.wait_called, 1)

            my_auditor._sleep = mocker.mock_sleep_continue

            my_auditor.concurrency = 2
            mocker.fork_called = 0
            mocker.wait_called = 0
            my_auditor.run_once()
            # Fork is called no. of devices + (no. of devices)/2 + 1 times
            # since zbf process is forked (no.of devices)/2 + 1 times
            no_devices = len(os.listdir(self.devices))
            self.assertEqual(mocker.fork_called, no_devices + no_devices / 2
                             + 1)
            self.assertEqual(mocker.wait_called, no_devices + no_devices / 2
                             + 1)

        finally:
            os.fork = was_fork
            os.wait = was_wait