Ejemplo n.º 1
0
class TestBlobMover(BaseTestCase):
    def setUp(self):
        super(TestBlobMover, self).setUp()
        self.container = random_str(16)
        self.cid = cid_from_name(self.account, self.container)
        self.path = random_str(16)
        self.api = ObjectStorageApi(self.ns)
        self.blob_client = BlobClient(self.conf)

        self.api.container_create(self.account, self.container)
        _, chunks = self.api.container.content_prepare(self.account,
                                                       self.container,
                                                       self.path,
                                                       size=1)
        services = self.conscience.all_services('rawx')
        if len(chunks) >= len([s for s in services if s['score'] > 0]):
            self.skipTest("need at least %d rawx to run" % (len(chunks) + 1))

        self.rawx_volumes = dict()
        for rawx in services:
            tags = rawx['tags']
            service_id = tags.get('tag.service_id', None)
            if service_id is None:
                service_id = rawx['addr']
            volume = tags.get('tag.vol', None)
            self.rawx_volumes[service_id] = volume

        self.api.object_create(self.account,
                               self.container,
                               obj_name=self.path,
                               data="chunk")
        meta, self.chunks = self.api.object_locate(self.account,
                                                   self.container, self.path)
        self.version = meta['version']
        self.content_id = meta['id']
        self.chunk_method = meta['chunk_method']

    def _chunk_path(self, chunk):
        url = chunk['url']
        volume_id = url.split('/', 3)[2]
        chunk_id = url.split('/', 3)[3]
        volume = self.rawx_volumes[volume_id]
        return volume + '/' + chunk_id[:3] + '/' + chunk_id

    def test_move_old_chunk(self):
        for chunk in self.chunks:
            convert_to_old_chunk(self._chunk_path(chunk), self.account,
                                 self.container, self.path, self.version,
                                 self.content_id)

        orig_chunk = random.choice(self.chunks)
        chunk_volume = orig_chunk['url'].split('/')[2]
        chunk_id = orig_chunk['url'].split('/')[3]
        chunk_headers, chunk_stream = self.blob_client.chunk_get(
            orig_chunk['url'], check_headers=False)
        chunks_kept = list(self.chunks)
        chunks_kept.remove(orig_chunk)

        mover = BlobMoverWorker(self.conf, None,
                                self.rawx_volumes[chunk_volume])
        mover.chunk_move(self._chunk_path(orig_chunk), chunk_id)

        _, new_chunks = self.api.object_locate(self.account, self.container,
                                               self.path)
        new_chunk = list(new_chunks)

        self.assertEqual(len(new_chunks), len(chunks_kept) + 1)
        url_kept = [c['url'] for c in chunks_kept]
        new_chunk = None
        for chunk in new_chunks:
            if chunk['url'] not in url_kept:
                self.assertIsNone(new_chunk)
                new_chunk = chunk

        self.assertNotEqual(orig_chunk['real_url'], new_chunk['real_url'])
        self.assertNotEqual(orig_chunk['url'], new_chunk['url'])
        self.assertEqual(orig_chunk['pos'], new_chunk['pos'])
        self.assertEqual(orig_chunk['size'], new_chunk['size'])
        self.assertEqual(orig_chunk['hash'], new_chunk['hash'])

        new_chunk_headers, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk['url'])
        chunk_data = b''.join(chunk_stream)
        new_chunk_data = b''.join(new_chunk_stream)
        self.assertEqual(chunk_data, new_chunk_data)
        fullpath = encode_fullpath(self.account, self.container, self.path,
                                   self.version, self.content_id)
        self.assertEqual(fullpath, new_chunk_headers['full_path'])
        del new_chunk_headers['full_path']
        self.assertNotEqual(chunk_headers['chunk_id'],
                            new_chunk_headers['chunk_id'])
        new_chunk_id = new_chunk['url'].split('/')[3]
        self.assertEqual(new_chunk_id, new_chunk_headers['chunk_id'])
        del chunk_headers['chunk_id']
        del new_chunk_headers['chunk_id']
        self.assertEqual(OIO_VERSION, new_chunk_headers['oio_version'])
        del chunk_headers['oio_version']
        del new_chunk_headers['oio_version']
        self.assertEqual(chunk_headers, new_chunk_headers)

    def test_move_with_wrong_size(self):
        if not self.chunk_method.startswith('ec'):
            self.skipTest('Only works with EC')

        orig_chunk = random.choice(self.chunks)
        chunk_volume = orig_chunk['url'].split('/')[2]
        chunk_id = orig_chunk['url'].split('/')[3]

        mover = BlobMoverWorker(self.conf, None,
                                self.rawx_volumes[chunk_volume])
        meta, stream = mover.blob_client.chunk_get(orig_chunk['url'])
        data = stream.read()
        stream.close()
        data = data[:-1]
        del meta['chunk_hash']
        wrong_stream = GeneratorIO(data)
        mover.blob_client.chunk_get = Mock(return_value=(meta, wrong_stream))

        self.assertRaises(ChunkException, mover.chunk_move,
                          self._chunk_path(orig_chunk), chunk_id)
Ejemplo n.º 2
0
class TestPerfectibleContent(BaseTestCase):
    def setUp(self):
        super(TestPerfectibleContent, self).setUp()
        self.api = ObjectStorageApi(self.ns,
                                    endpoint=self.uri,
                                    pool_manager=self.http_pool)
        # Ensure the tube is not clogged
        self.beanstalkd.drain_tube(DEFAULT_IMPROVER_TUBE, timeout=0.2)

    def tearDown(self):
        super(TestPerfectibleContent, self).tearDown()
        self.wait_for_score(('rawx', ), timeout=5.0, score_threshold=8)

    @classmethod
    def tearDownClass(cls):
        # Be kind with the next test suites
        cls._cls_reload_proxy()
        time.sleep(3)
        cls._cls_reload_meta()
        time.sleep(1)

    def _aggregate_services(self, type_, key):
        """
        Build a dictionary of lists of services indexed by `key`.

        :param type_: the type if services to index
        :param key: a function
        """
        all_svcs = self.conscience.all_services(type_)
        out = defaultdict(list)
        for svc in all_svcs:
            out[key(svc)].append(svc)
        return out

    def _aggregate_rawx_by_slot(self):
        by_slot = self._aggregate_services(
            'rawx',
            lambda x: x['tags'].get('tag.slots', 'rawx').rsplit(',', 2)[-1])
        if 'rawx-even' not in by_slot or 'rawx-odd' not in by_slot:
            self.skip('This test requires "rawx-even" and "rawx-odd" slots')
        return by_slot

    def _aggregate_rawx_by_place(self):
        by_place = self._aggregate_services(
            'rawx', lambda x: x['tags']['tag.loc'].rsplit('.', 1)[0])
        if len(by_place) < 3:
            self.skip('This test requires 3 different 2nd level locations')
        return by_place

    def _wait_for_event(self, timeout=REASONABLE_EVENT_DELAY):
        """
        Wait for an event in the oio-improve tube.
        """
        event = self.wait_for_event(DEFAULT_IMPROVER_TUBE, timeout=timeout)
        if event is None:
            self.fail("No event received in the last %s seconds" % timeout)
        return event

    # This test must be executed first
    def test_0_upload_ok(self):
        """Check that no event is emitted when everything is ok."""
        self.wait_for_score(('rawx', ))
        # Check we have enough service locations.
        self._aggregate_rawx_by_place()

        # Upload an object.
        container = self._random_user()
        reqid = request_id('perfectible-')
        self.api.object_create(self.account,
                               container,
                               obj_name='perfect',
                               data=b'whatever',
                               policy='THREECOPIES',
                               headers={REQID_HEADER: reqid})

        # Wait on the oio-improve beanstalk tube.
        self.beanstalkd.watch(DEFAULT_IMPROVER_TUBE)
        # Ensure we do not receive any event.
        self.assertRaises(ResponseError,
                          self.beanstalkd.reserve,
                          timeout=REASONABLE_EVENT_DELAY)

    @flaky(rerun_filter=is_event_delay_error)
    def test_upload_warn_dist(self):
        """
        Check that an event is emitted when the warning distance is reached.
        """
        self.wait_for_score(('rawx', ))
        # Check we have enough service locations.
        by_place = self._aggregate_rawx_by_place()

        # Lock all services of the 3rd location.
        banned_loc = list(by_place.keys())[2]
        self._lock_services('rawx', by_place[banned_loc])

        # Upload an object.
        container = self._random_user()
        reqid = request_id('perfectible-')
        self.api.object_create(self.account,
                               container,
                               obj_name='perfectible',
                               data=b'whatever',
                               policy='THREECOPIES',
                               headers={REQID_HEADER: reqid})

        # Wait on the oio-improve beanstalk tube.
        event = self._wait_for_event(timeout=REASONABLE_EVENT_DELAY * 2)

        # Check the content of the event.
        self.assertEqual('storage.content.perfectible', event.event_type)
        self.assertEqual(reqid, event.reqid)
        self.assertEqual(self.account, event.url['account'])
        self.assertEqual(container, event.url['user'])
        self.assertEqual('perfectible', event.url['path'])
        mc = event.data
        self.assertEqual(0, mc['pos'])  # only one metachunk in this test
        lowest_dist = 4
        warn_dist = 4
        for chunk in mc['chunks']:
            qual = chunk['quality']
            if qual['final_dist'] < lowest_dist:
                lowest_dist = qual['final_dist']
            if qual['warn_dist'] < warn_dist:
                warn_dist = qual['warn_dist']
            self.assertEqual(qual['expected_slot'], qual['final_slot'])
        self.assertLessEqual(lowest_dist, warn_dist)

    def test_upload_fallback(self):
        """
        Test that an event is emitted when a fallback service slot is used.
        """
        by_slot = self._aggregate_rawx_by_slot()
        if len(by_slot['rawx-odd']) < 3:
            self.skip('This test requires at least 3 services '
                      'in the "rawx-odd" slot')

        # Lock all services of the 'rawx-even' slot.
        banned_slot = 'rawx-even'
        self._lock_services('rawx', by_slot[banned_slot])

        # Upload an object.
        container = self._random_user()
        reqid = request_id('perfectible-')
        self.api.object_create(self.account,
                               container,
                               obj_name='perfectible',
                               data=b'whatever',
                               policy='THREECOPIES',
                               headers={REQID_HEADER: reqid})

        # Wait on the oio-improve beanstalk tube.
        event = self._wait_for_event(timeout=REASONABLE_EVENT_DELAY * 2)

        # Check the content of the event.
        self.assertEqual('storage.content.perfectible', event.event_type)
        self.assertEqual(reqid, event.reqid)
        self.assertEqual(self.account, event.url['account'])
        self.assertEqual(container, event.url['user'])
        self.assertEqual('perfectible', event.url['path'])
        mc = event.data
        self.assertEqual(0, mc['pos'])  # only one metachunk in this test
        slot_matches = list()
        for chunk in mc['chunks']:
            qual = chunk['quality']
            slot_matches.append(qual['final_slot'] == qual['expected_slot'])
            self.assertNotEqual(qual['final_slot'], banned_slot)
        self.assertIn(False, slot_matches)

    def _call_blob_improver_subprocess(self,
                                       run_time=3.0,
                                       stop_after_events=1,
                                       log_level='INFO'):
        # FIXME(FVE): find a way to call coverage on the subprocess
        blob_improver = subprocess.Popen([
            'oio-blob-improver', self.ns,
            '--beanstalkd=' + self.conf['queue_addr'], '--retry-delay=1',
            '--log-level=' + log_level,
            '--stop-after-events=%d' % stop_after_events
        ])
        if SUBPROCESS32:
            try:
                blob_improver.wait(run_time)
            except Exception:
                blob_improver.kill()
        else:
            time.sleep(run_time)
            blob_improver.kill()

    def test_blob_improver_threecopies(self):
        by_slot = self._aggregate_rawx_by_slot()
        if len(by_slot['rawx-odd']) < 3:
            self.skip('This test requires at least 3 services '
                      'in the "rawx-odd" slot')
        # Ensure the distance between services won't be a problem.
        self._aggregate_rawx_by_place()

        # Lock all services of the 'rawx-even' slot.
        banned_slot = 'rawx-even'
        self._lock_services('rawx', by_slot[banned_slot])

        # Upload an object.
        container = self._random_user()
        reqid = request_id('perfectible-')
        chunks, _, _ = self.api.object_create(self.account,
                                              container,
                                              obj_name='perfectible',
                                              data=b'whatever',
                                              policy='THREECOPIES',
                                              reqid=reqid)

        # Wait for the "perfectible" event to be emitted,
        # but do not consume it.
        job, data = self.beanstalkd.wait_for_ready_job(
            DEFAULT_IMPROVER_TUBE, timeout=REASONABLE_EVENT_DELAY)
        if job:
            logging.debug("Expected job data: %s", data)
        self.assertIsNotNone(job)
        # "Unlock" the services of the 'rawx-even' slot.
        self._lock_services('rawx', by_slot[banned_slot], score=100)

        self._call_blob_improver_subprocess()

        # Check some changes have been done on the object.
        _, new_chunks = self.api.object_locate(self.account, container,
                                               'perfectible')
        old_urls = sorted([x['url'] for x in chunks])
        new_urls = sorted([x['url'] for x in new_chunks])
        logging.debug('Old chunks: %s', old_urls)
        logging.debug('New chunks: %s', new_urls)
        self.assertNotEqual(old_urls, new_urls)

        # Ensure no new "perfectible" event is emitted.
        job, data = self.beanstalkd.wait_for_ready_job(
            DEFAULT_IMPROVER_TUBE, timeout=REASONABLE_EVENT_DELAY)
        if job:
            logging.debug("Unexpected job data: %s", data)
        self.assertIsNone(job)
Ejemplo n.º 3
0
class TestBlobRebuilder(BaseTestCase):
    def setUp(self):
        super(TestBlobRebuilder, self).setUp()
        self.container = random_str(16)
        self.cid = cid_from_name(self.account, self.container)
        self.path = random_str(16)
        self.api = ObjectStorageApi(self.ns)
        self.blob_client = BlobClient(self.conf)

        self.api.container_create(self.account, self.container)
        _, chunks = self.api.container.content_prepare(self.account,
                                                       self.container,
                                                       self.path,
                                                       size=1)
        if len(chunks) < 2:
            self.skipTest("need at least 2 chunks to run")

        services = self.conscience.all_services('rawx')
        self.rawx_volumes = dict()
        for rawx in services:
            tags = rawx['tags']
            service_id = tags.get('tag.service_id', None)
            if service_id is None:
                service_id = rawx['addr']
            volume = tags.get('tag.vol', None)
            self.rawx_volumes[service_id] = volume

        self.api.object_create(self.account,
                               self.container,
                               obj_name=self.path,
                               data="chunk")
        meta, self.chunks = self.api.object_locate(self.account,
                                                   self.container, self.path)
        self.version = meta['version']
        self.content_id = meta['id']

    def _chunk_path(self, chunk):
        url = chunk['url']
        volume_id = url.split('/', 3)[2]
        chunk_id = url.split('/', 3)[3]
        volume = self.rawx_volumes[volume_id]
        return volume + '/' + chunk_id[:3] + '/' + chunk_id

    def test_rebuild_old_chunk(self):
        for c in self.chunks:
            convert_to_old_chunk(self._chunk_path(c), self.account,
                                 self.container, self.path, self.version,
                                 self.content_id)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_headers, chunk_stream = self.blob_client.chunk_get(
            chunk['url'], check_headers=False)
        os.remove(self._chunk_path(chunk))
        chunks_kept = list(self.chunks)
        chunks_kept.remove(chunk)

        conf = self.conf.copy()
        conf['allow_same_rawx'] = True
        rebuilder = BlobRebuilder(conf, service_id=chunk_volume)
        rebuilder_worker = rebuilder.create_worker(None, None)
        rebuilder_worker._process_item(
            (self.ns, self.cid, self.content_id, chunk_id))

        _, new_chunks = self.api.object_locate(self.account, self.container,
                                               self.path)
        new_chunk = list(new_chunks)

        self.assertEqual(len(new_chunks), len(chunks_kept) + 1)
        url_kept = [c['url'] for c in chunks_kept]
        new_chunk = None
        for c in new_chunks:
            if c['url'] not in url_kept:
                self.assertIsNone(new_chunk)
                new_chunk = c

        # Cannot check if the URL is different: it may be the same since we
        # generate predictible chunk IDs.
        # self.assertNotEqual(chunk['real_url'], new_chunk['real_url'])
        # self.assertNotEqual(chunk['url'], new_chunk['url'])
        self.assertEqual(chunk['pos'], new_chunk['pos'])
        self.assertEqual(chunk['size'], new_chunk['size'])
        self.assertEqual(chunk['hash'], new_chunk['hash'])

        new_chunk_headers, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk['url'])
        chunk_data = b''.join(chunk_stream)
        new_chunk_data = b''.join(new_chunk_stream)
        self.assertEqual(chunk_data, new_chunk_data)
        fullpath = encode_fullpath(self.account, self.container, self.path,
                                   self.version, self.content_id)
        self.assertEqual(fullpath, new_chunk_headers['full_path'])
        del new_chunk_headers['full_path']
        # Since we generate predictible chunk IDs, they can be equal
        # self.assertNotEqual(chunk_headers['chunk_id'],
        #                     new_chunk_headers['chunk_id'])
        # We could compare the modification time of the chunks,
        # but unfortunately they have a 1s resolution...
        # self.assertNotEqual(chunk_headers['chunk_mtime'],
        #                     new_chunk_headers['chunk_mtime'])
        new_chunk_id = new_chunk['url'].split('/')[3]
        self.assertEqual(new_chunk_id, new_chunk_headers['chunk_id'])
        del chunk_headers['chunk_id']
        del new_chunk_headers['chunk_id']
        self.assertEqual(OIO_VERSION, new_chunk_headers['oio_version'])
        del chunk_headers['oio_version']
        del new_chunk_headers['oio_version']
        del chunk_headers['chunk_mtime']
        del new_chunk_headers['chunk_mtime']
        self.assertEqual(chunk_headers, new_chunk_headers)
Ejemplo n.º 4
0
class TestBlobConverter(BaseTestCase):

    def setUp(self):
        super(TestBlobConverter, self).setUp()
        self.container = random_str(16)
        self.path = random_str(16)
        self.api = ObjectStorageApi(self.ns)

        self.api.container_create(self.account, self.container)
        _, chunks = self.api.container.content_prepare(
            self.account, self.container, self.path, size=1)
        services = self.conscience.all_services('rawx')
        self.rawx_volumes = dict()
        for rawx in services:
            tags = rawx['tags']
            service_id = tags.get('tag.service_id', None)
            if service_id is None:
                service_id = rawx['addr']
            volume = tags.get('tag.vol', None)
            self.rawx_volumes[service_id] = volume

        self.api.object_create(
            self.account, self.container, obj_name=self.path, data="chunk")
        meta, self.chunks = self.api.object_locate(
            self.account, self.container, self.path)
        self.version = meta['version']
        self.content_id = meta['id']
        self.container_id = cid_from_name(self.account, self.container)

    def tearDown(self):
        try:
            self.api.object_delete(self.account, self.container, self.path)
        except Exception:
            pass
        super(TestBlobConverter, self).tearDown()

    def _chunk_path(self, chunk):
        url = chunk['url']
        chunk_id = url.split('/', 3)[3]
        volume = self.rawx_volumes[self._chunk_volume_id(chunk)]
        return volume + '/' + chunk_id[:3] + '/' + chunk_id

    def _chunk_volume_id(self, chunk):
        return chunk['url'].split('/', 3)[2]

    def _deindex_chunk(self, chunk):
        rdir = RdirClient(self.conf, pool_manager=self.conscience.pool_manager)
        url = chunk['url']
        volume_id = url.split('/', 3)[2]
        chunk_id = url.split('/', 3)[3]
        rdir.chunk_delete(volume_id, self.container_id,
                          self.content_id, chunk_id)

    def _convert_and_check(self, chunk_volume, chunk_path,
                           chunk_id_info, expected_raw_meta=None,
                           expected_errors=0):
        conf = self.conf
        conf['volume'] = self.rawx_volumes[chunk_volume]
        converter = BlobConverter(conf, logger=self.logger)
        converter.safe_convert_chunk(chunk_path)
        self.assertEqual(1, converter.total_chunks_processed)
        self.assertEqual(1, converter.passes)
        self.assertEqual(expected_errors, converter.errors)

        checker = Checker(self.ns)
        for chunk_id, info in chunk_id_info.items():
            account, container, path, version, content_id = info
            fullpath = encode_fullpath(
                account, container, path, version, content_id)
            cid = cid_from_name(account, container)
            meta, raw_meta = read_chunk_metadata(chunk_path, chunk_id)

            self.assertEqual(meta.get('chunk_id'), chunk_id)
            self.assertEqual(meta.get('container_id'), cid)
            self.assertEqual(meta.get('content_path'), path)
            self.assertEqual(meta.get('content_version'), version)
            self.assertEqual(meta.get('content_id'), content_id)
            self.assertEqual(meta.get('full_path'), fullpath)

            checker.check(Target(
                account, container=container, obj=path,
                chunk='http://' + converter.volume_id + '/' + chunk_id))
            for _ in checker.run():
                pass
            self.assertTrue(checker.report())

            if expected_raw_meta:
                self.assertDictEqual(expected_raw_meta, raw_meta)
                continue

            self.assertNotIn(chunk_xattr_keys['chunk_id'], raw_meta)
            self.assertNotIn(chunk_xattr_keys['container_id'], raw_meta)
            self.assertNotIn(chunk_xattr_keys['content_path'], raw_meta)
            self.assertNotIn(chunk_xattr_keys['content_version'], raw_meta)
            self.assertNotIn(chunk_xattr_keys['content_id'], raw_meta)
            self.assertIn(CHUNK_XATTR_CONTENT_FULLPATH_PREFIX + chunk_id,
                          raw_meta)
            for k in raw_meta.keys():
                if k.startswith('oio:'):
                    self.fail('old fullpath always existing')
            self.assertEqual(raw_meta[chunk_xattr_keys['oio_version']],
                             OIO_VERSION)

    def _test_converter_single_chunk(self, chunk, expected_errors=0):
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        self._convert_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)},
            expected_errors=expected_errors)

    def test_converter(self):
        chunk = random.choice(self.chunks)
        self._test_converter_single_chunk(chunk)

    def test_converter_old_chunk(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, self.content_id)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        self._convert_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

    def test_converter_old_chunk_with_wrong_path(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container,
                self.path + '+', self.version, self.content_id)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        self._convert_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

    def test_converter_old_chunk_with_wrong_content_id(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, '0123456789ABCDEF0123456789ABCDEF')

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        self._convert_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

    def test_converter_old_chunk_with_old_fullpath(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, self.content_id, add_old_fullpath=True)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        self._convert_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

    def test_converter_old_chunk_with_old_fullpath_and_wrong_path(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container,
                self.path, self.version, self.content_id,
                add_old_fullpath=True)
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container,
                self.path + '+', self.version, self.content_id)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        self._convert_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

    def test_converter_old_chunk_with_wrong_fullpath(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container,
                self.path, 'None', '0123456789ABCDEF0123456789ABCDEF',
                add_old_fullpath=True)
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container,
                self.path, self.version, self.content_id)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        self._convert_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

    def test_converter_linked_chunk(self):
        self.api.object_link(
            self.account, self.container, self.path,
            self.account, self.container, self.path + '.link')

        linked_meta, linked_chunks = self.api.object_locate(
            self.account, self.container, self.path + '.link')
        self.assertNotEqual(self.content_id, linked_meta['id'])

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)
        for c in linked_chunks:
            if chunk_volume == c['url'].split('/')[2]:
                linked_chunk_id2 = c['url'].split('/')[3]
                break

        linked_chunk = random.choice(linked_chunks)
        linked_chunk_volume = linked_chunk['url'].split('/')[2]
        linked_chunk_id = linked_chunk['url'].split('/')[3]
        linked_chunk_path = self._chunk_path(linked_chunk)
        for c in self.chunks:
            if linked_chunk_volume == c['url'].split('/')[2]:
                chunk_id2 = c['url'].split('/')[3]
                break

        self._convert_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id),
             linked_chunk_id2: (self.account, self.container,
                                self.path + '.link', linked_meta['version'],
                                linked_meta['id'])})

        self._convert_and_check(
            linked_chunk_volume, linked_chunk_path,
            {chunk_id2: (self.account, self.container, self.path, self.version,
                         self.content_id),
             linked_chunk_id: (self.account, self.container,
                               self.path + '.link', linked_meta['version'],
                               linked_meta['id'])})

    def test_converter_old_linked_chunk(self):
        self.api.object_link(
            self.account, self.container, self.path,
            self.account, self.container, self.path + '.link')

        linked_meta, linked_chunks = self.api.object_locate(
            self.account, self.container, self.path + '.link')
        self.assertNotEqual(self.content_id, linked_meta['id'])

        for c in linked_chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container,
                self.path + '.link', 'None',
                '0123456789ABCDEF0123456789ABCDEF', add_old_fullpath=True)
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, self.content_id)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)
        for c in linked_chunks:
            if chunk_volume == c['url'].split('/')[2]:
                linked_chunk_id2 = c['url'].split('/')[3]
                break

        linked_chunk = random.choice(linked_chunks)
        linked_chunk_volume = linked_chunk['url'].split('/')[2]
        linked_chunk_id = linked_chunk['url'].split('/')[3]
        linked_chunk_path = self._chunk_path(linked_chunk)
        for c in self.chunks:
            if linked_chunk_volume == c['url'].split('/')[2]:
                chunk_id2 = c['url'].split('/')[3]
                break

        self._convert_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id),
             linked_chunk_id2: (self.account, self.container,
                                self.path + '.link', linked_meta['version'],
                                linked_meta['id'])})

        self._convert_and_check(
            linked_chunk_volume, linked_chunk_path,
            {chunk_id2: (self.account, self.container, self.path, self.version,
                         self.content_id),
             linked_chunk_id: (self.account, self.container,
                               self.path + '.link', linked_meta['version'],
                               linked_meta['id'])})

    def test_converter_old_chunk_with_link_on_same_object(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, self.content_id)

        self.api.object_link(
            self.account, self.container, self.path,
            self.account, self.container, self.path)

        linked_meta, linked_chunks = self.api.object_locate(
            self.account, self.container, self.path)
        self.assertNotEqual(self.content_id, linked_meta['id'])

        linked_chunk = random.choice(linked_chunks)
        linked_chunk_volume = linked_chunk['url'].split('/')[2]
        linked_chunk_id = linked_chunk['url'].split('/')[3]
        linked_chunk_path = self._chunk_path(linked_chunk)

        # old xattr not removed
        _, expected_raw_meta = read_chunk_metadata(linked_chunk_path,
                                                   linked_chunk_id)
        expected_raw_meta[chunk_xattr_keys['oio_version']] = OIO_VERSION

        self._convert_and_check(
            linked_chunk_volume, linked_chunk_path,
            {linked_chunk_id: (self.account, self.container,
                               self.path, linked_meta['version'],
                               linked_meta['id'])},
            expected_raw_meta=expected_raw_meta, expected_errors=1)

    def test_converter_old_linked_chunk_with_link_on_same_object(self):
        self.api.object_link(
            self.account, self.container, self.path,
            self.account, self.container, self.path + '.link')

        linked_meta, linked_chunks = self.api.object_locate(
            self.account, self.container, self.path + '.link')
        self.assertNotEqual(self.content_id, linked_meta['id'])

        for c in linked_chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container,
                self.path + '.link', 'None',
                '0123456789ABCDEF0123456789ABCDEF', add_old_fullpath=True)
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, self.content_id, add_old_fullpath=True)

        self.api.object_link(
            self.account, self.container, self.path + '.link',
            self.account, self.container, self.path + '.link')

        linked_meta, linked_chunks = self.api.object_locate(
            self.account, self.container, self.path + '.link')
        self.assertNotEqual(self.content_id, linked_meta['id'])

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)
        for c in linked_chunks:
            if chunk_volume == c['url'].split('/')[2]:
                linked_chunk_id2 = c['url'].split('/')[3]
                break

        linked_chunk = random.choice(linked_chunks)
        linked_chunk_volume = linked_chunk['url'].split('/')[2]
        linked_chunk_id = linked_chunk['url'].split('/')[3]
        linked_chunk_path = self._chunk_path(linked_chunk)
        for c in self.chunks:
            if linked_chunk_volume == c['url'].split('/')[2]:
                chunk_id2 = c['url'].split('/')[3]
                break

        self._convert_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id),
             linked_chunk_id2: (self.account, self.container,
                                self.path + '.link', linked_meta['version'],
                                linked_meta['id'])})

        self._convert_and_check(
            linked_chunk_volume, linked_chunk_path,
            {chunk_id2: (self.account, self.container, self.path, self.version,
                         self.content_id),
             linked_chunk_id: (self.account, self.container,
                               self.path + '.link', linked_meta['version'],
                               linked_meta['id'])})

    def test_converter_with_versioning(self):
        self.api.container_set_properties(
            self.account, self.container,
            system={'sys.m2.policy.version': '2'})
        self.api.object_create(
            self.account, self.container, obj_name=self.path, data='version')

        versioned_meta, versioned_chunks = self.api.object_locate(
            self.account, self.container, self.path)
        self.assertNotEqual(self.content_id, versioned_meta['id'])

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        versioned_chunk = random.choice(versioned_chunks)
        versioned_chunk_volume = versioned_chunk['url'].split('/')[2]
        versioned_chunk_id = versioned_chunk['url'].split('/')[3]
        versioned_chunk_path = self._chunk_path(versioned_chunk)

        self._convert_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

        self._convert_and_check(
            versioned_chunk_volume, versioned_chunk_path,
            {versioned_chunk_id: (self.account, self.container, self.path,
                                  versioned_meta['version'],
                                  versioned_meta['id'])})

    def test_converter_old_chunk_with_versioning(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, self.content_id)

        self.api.container_set_properties(
            self.account, self.container,
            system={'sys.m2.policy.version': '2'})
        self.api.object_create(
            self.account, self.container, obj_name=self.path, data='version')

        versioned_meta, versioned_chunks = self.api.object_locate(
            self.account, self.container, self.path)
        self.assertNotEqual(self.content_id, versioned_meta['id'])
        for c in versioned_chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                versioned_meta['version'], versioned_meta['id'])

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        versioned_chunk = random.choice(versioned_chunks)
        versioned_chunk_volume = versioned_chunk['url'].split('/')[2]
        versioned_chunk_id = versioned_chunk['url'].split('/')[3]
        versioned_chunk_path = self._chunk_path(versioned_chunk)

        self._convert_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

        self._convert_and_check(
            versioned_chunk_volume, versioned_chunk_path,
            {versioned_chunk_id: (self.account, self.container, self.path,
                                  versioned_meta['version'],
                                  versioned_meta['id'])})

    def test_converter_file_not_found(self):
        """
        Test what happens when the BlobConverter encounters a chunk with
        neither a fullpath extended attribute, not any of the legacy
        attributes.
        """
        victim = random.choice(self.chunks)
        path = self._chunk_path(victim)
        chunk_volume = victim['url'].split('/')[2]

        os.remove(path)
        with patch('oio.blob.converter.BlobConverter.recover_chunk_fullpath') \
                as recover:
            self._convert_and_check(chunk_volume, path, {}, expected_errors=1)
            recover.assert_not_called()

    def test_recover_missing_old_fullpath(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, self.content_id)

        victim = random.choice(self.chunks)
        self._test_converter_single_chunk(victim)

    def test_recover_missing_content_path(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, self.content_id, add_old_fullpath=True)

        victim = random.choice(self.chunks)
        path = self._chunk_path(victim)
        remove_xattr(path, chunk_xattr_keys['content_path'])
        self._test_converter_single_chunk(victim)

    def test_recover_missing_old_fullpath_and_content_path(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, self.content_id)

        victim = random.choice(self.chunks)
        path = self._chunk_path(victim)
        remove_xattr(path, chunk_xattr_keys['content_path'])
        self._test_converter_single_chunk(victim)

    def test_recover_missing_fullpath(self):
        """
        Test what happens when the BlobConverter encounters a chunk with
        neither a fullpath extended attribute, not any of the legacy
        attributes.
        """
        victim = random.choice(self.chunks)
        path = self._chunk_path(victim)
        remove_fullpath_xattr(path)
        self._test_converter_single_chunk(victim)

    def test_recover_missing_fullpath_not_indexed(self):
        """
        Test what happens when the BlobConverter encounters a chunk with
        neither a fullpath extended attribute, not any of the legacy
        attributes, and the chunk does not appear in rdir.
        """
        victim = random.choice(self.chunks)
        path = self._chunk_path(victim)
        remove_fullpath_xattr(path)
        self._deindex_chunk(victim)
        conf = dict(self.conf)
        conf['volume'] = self.rawx_volumes[self._chunk_volume_id(victim)]
        converter = BlobConverter(conf)
        self.assertRaises(KeyError, converter.recover_chunk_fullpath, path)

    def test_recover_missing_fullpath_orphan_chunk(self):
        """
        Test what happens when the BlobConverter encounters a chunk with
        neither a fullpath extended attribute, not any of the legacy
        attributes, and the chunk does not appear in object description.
        """
        victim = random.choice(self.chunks)
        path = self._chunk_path(victim)
        remove_fullpath_xattr(path)
        cbean = {
            'content': self.content_id,
            'hash': victim['hash'],
            'id': victim['url'],
            'size': victim['size'],
            'pos': victim['pos'],
            'type': 'chunk'
        }
        self.api.container.container_raw_delete(
            self.account, self.container, data=[cbean])
        conf = dict(self.conf)
        conf['volume'] = self.rawx_volumes[self._chunk_volume_id(victim)]
        converter = BlobConverter(conf)
        self.assertRaises(OrphanChunk, converter.recover_chunk_fullpath, path)
Ejemplo n.º 5
0
class Checker(object):
    def __init__(self,
                 namespace,
                 concurrency=50,
                 error_file=None,
                 rebuild_file=None,
                 check_xattr=True,
                 limit_listings=0,
                 request_attempts=1,
                 logger=None,
                 verbose=False,
                 check_hash=False,
                 min_time_in_error=0.0,
                 required_confirmations=0,
                 beanstalkd_addr=None,
                 beanstalkd_tube=BlobRebuilder.DEFAULT_BEANSTALKD_WORKER_TUBE,
                 cache_size=2**24,
                 **_kwargs):
        self.pool = GreenPool(concurrency)
        self.error_file = error_file
        self.error_sender = None
        self.check_xattr = bool(check_xattr)
        self.check_hash = bool(check_hash)
        self.logger = logger or get_logger(
            {'namespace': namespace}, name='integrity', verbose=verbose)
        # Optimisation for when we are only checking one object
        # or one container.
        # 0 -> do not limit
        # 1 -> limit account listings (list of containers)
        # 2 -> limit container listings (list of objects)
        self.limit_listings = limit_listings
        if self.error_file:
            outfile = open(self.error_file, 'a')
            self.error_writer = csv.writer(outfile, delimiter=' ')

        self.rebuild_file = rebuild_file
        if self.rebuild_file:
            self.fd = open(self.rebuild_file, 'a')
            self.rebuild_writer = csv.writer(self.fd, delimiter='|')

        if beanstalkd_addr:
            self.error_sender = BeanstalkdSender(beanstalkd_addr,
                                                 beanstalkd_tube, self.logger)

        self.api = ObjectStorageApi(namespace,
                                    logger=self.logger,
                                    max_retries=request_attempts - 1,
                                    request_attempts=request_attempts)
        self.rdir_client = RdirClient({"namespace": namespace},
                                      logger=self.logger)

        self.accounts_checked = 0
        self.containers_checked = 0
        self.objects_checked = 0
        self.chunks_checked = 0
        self.account_not_found = 0
        self.container_not_found = 0
        self.object_not_found = 0
        self.chunk_not_found = 0
        self.account_exceptions = 0
        self.container_exceptions = 0
        self.object_exceptions = 0
        self.chunk_exceptions = 0

        self.list_cache = CacheDict(cache_size)
        self.running_tasks = {}
        self.running_lock = Semaphore(1)
        self.result_queue = LightQueue(concurrency)

        self.running = True
        self.run_time = 0

        # Set of targets which must be checked again, to confirm
        # or deny the issues reported by previous passes.
        self.delayed_targets = dict()
        # Minimum time in error and number of confirmations of the error
        # before triggering a reconstruction action.
        self.min_time_in_error = min_time_in_error
        self.required_confirmations = required_confirmations

    def reset_stats(self):
        self.accounts_checked = 0
        self.containers_checked = 0
        self.objects_checked = 0
        self.chunks_checked = 0
        self.account_not_found = 0
        self.container_not_found = 0
        self.object_not_found = 0
        self.chunk_not_found = 0
        self.account_exceptions = 0
        self.container_exceptions = 0
        self.object_exceptions = 0
        self.chunk_exceptions = 0

    def _spawn(self, func, target, *args, **kwargs):
        """
        Spawn a task on the internal GreenPool.
        Discards the task if the pool is no more running.
        """
        if self.running:
            return self.pool.spawn(func, target, *args, **kwargs)
        self.logger.info("Discarding %s", target)
        return None

    def _spawn_n(self, func, target, *args, **kwargs):
        """
        Spawn a task on the internal GreenPool, do not wait for the result.
        Discards the task if the pool is no more running.
        """
        if self.running:
            return self.pool.spawn_n(func, target, *args, **kwargs)
        self.logger.info("Discarding %s", target)
        return None

    def complete_target_from_chunk_metadata(self, target, xattr_meta):
        """
        Complete a Target object from metadata found in chunk's extended
        attributes. In case the "fullpath" is not available, try to read
        legacy metadata, and maybe ask meta1 to resolve the CID into
        account and container names.
        """
        # pylint: disable=unbalanced-tuple-unpacking
        try:
            acct, ct, path, vers, content_id = \
                decode_fullpath(xattr_meta['full_path'])
            target.account = acct
            target.container = ct
            target.obj = path
            target.content_id = content_id
            target.version = vers
        except KeyError:
            # No fullpath header, try legacy headers
            if 'content_path' in xattr_meta:
                target.obj = xattr_meta['content_path']
            if 'content_id' in xattr_meta:
                target.content_id = xattr_meta['content_id']
            if 'content_version' in xattr_meta:
                target.version = xattr_meta['content_version']
            cid = xattr_meta.get('container_id')
            if cid:
                try:
                    md = self.api.directory.show(cid=cid)
                    acct = md.get('account')
                    ct = md.get('name')
                    if acct:
                        target.account = acct
                    if ct:
                        target.container = ct
                except Exception as err:
                    self.logger.warn(
                        "Failed to resolve CID %s into account "
                        "and container names: %s", cid, err)

    def recover_and_complete_object_meta(self, target, chunk):
        _, rawx_service, chunk_id = chunk.rsplit('/', 2)
        # 1. Fetch chunk list from rdir (could be cached).
        # Unfortunately we cannot seek for a chunk ID.
        entries = [
            x for x in self.rdir_client.chunk_fetch(rawx_service, limit=-1)
            if x[2] == chunk_id
        ]
        if not entries:
            self.logger.warn('Chunk %s not found in rdir' % chunk_id)
            return
        elif len(entries) > 1:
            self.logger.info('Chunk %s appears in %d objects', chunk_id,
                             len(entries))
        # 2. Find content and container IDs
        target.cid, target.content_id = entries[0][0:2]
        meta = self.api.object_get_properties(None,
                                              None,
                                              None,
                                              cid=target.cid,
                                              content=target.content_id)
        target.obj = meta['name']
        target.version = meta['version']
        target.account, target.container = self.api.resolve_cid(target.cid)

    def send_result(self, target, errors=None, irreparable=False):
        """
        Put an item in the result queue.
        """
        # TODO(FVE): send to an external queue.
        target.append_result(ItemResult(errors, irreparable))
        self.result_queue.put(target)

    def send_chunk_job(self, target, irreparable=False):
        """
        Send a "content broken" event, to trigger the
        reconstruction of the chunk.
        """
        item = (self.api.namespace, target.cid, target.content_id,
                target.chunk)
        ev_dict = BlobRebuilder.task_event_from_item(item)
        if irreparable:
            ev_dict['data']['irreparable'] = irreparable
        job = json.dumps(ev_dict)
        self.error_sender.send_job(job)
        self.error_sender.job_done()  # Don't expect any response

    def write_error(self, target, irreparable=False):
        if not self.error_file:
            return
        error = list()
        if irreparable:
            error.append(IRREPARABLE_PREFIX)
        error.append(target.account)
        if target.container:
            error.append(target.container)
        if target.obj:
            error.append(target.obj)
        if target.chunk:
            error.append(target.chunk)
        self.error_writer.writerow(error)

    def write_rebuilder_input(self, target, irreparable=False):
        error = list()
        if irreparable:
            error.append(IRREPARABLE_PREFIX)
        error.append(target.cid)
        # FIXME(FVE): ensure we always resolve content_id,
        # or pass object version along with object name.
        error.append(target.content_id or target.obj)
        error.append(target.chunk)
        self.rebuild_writer.writerow(error)

    def write_chunk_error(self, target, chunk=None, irreparable=False):
        if chunk is not None:
            target = target.copy()
            target.chunk = chunk
        self.write_error(target, irreparable=irreparable)
        if self.rebuild_file:
            self.write_rebuilder_input(target, irreparable=irreparable)
        if self.error_sender:
            self.send_chunk_job(target, irreparable=irreparable)

    def _check_chunk_xattr(self, target, obj_meta, xattr_meta):
        """
        Check coherency of chunk extended attributes with object metadata.

        :returns: a list of errors
        """
        errors = list()
        # Composed position -> erasure coding
        attr_prefix = 'meta' if '.' in obj_meta['pos'] else ''

        attr_key = attr_prefix + 'chunk_size'
        if str(obj_meta['size']) != xattr_meta.get(attr_key):
            errors.append(
                "'%s' xattr (%s) differs from size in meta2 (%s)" %
                (attr_key, xattr_meta.get(attr_key), obj_meta['size']))

        attr_key = attr_prefix + 'chunk_hash'
        if obj_meta['hash'] != xattr_meta.get(attr_key):
            errors.append(
                "'%s' xattr (%s) differs from hash in meta2 (%s)" %
                (attr_key, xattr_meta.get(attr_key), obj_meta['hash']))
        return errors

    def _check_chunk(self, target):
        """
        Execute various checks on a chunk:
        - does it appear in object's chunk list?
        - is it reachable?
        - are its extended attributes coherent?

        :returns: the list of errors encountered,
            and the chunk's owner object metadata.
        """
        chunk = target.chunk
        errors = list()
        obj_meta = None
        xattr_meta = None

        cached = self._get_cached_or_lock(chunk)
        if cached is not None:
            return cached + (True, )

        self.logger.debug('Checking chunk "%s"', target)
        try:
            xattr_meta = self.api.blob_client.chunk_head(
                chunk, xattr=self.check_xattr, check_hash=self.check_hash)
        except exc.NotFound as err:
            self.chunk_not_found += 1
            errors.append('Not found: %s' % (err, ))
        except exc.FaultyChunk as err:
            self.chunk_exceptions += 1
            errors.append('Faulty: %r' % (err, ))
        except Exception as err:
            self.chunk_exceptions += 1
            errors.append('Check failed: %s' % (err, ))

        if not target.obj:
            if xattr_meta:
                self.complete_target_from_chunk_metadata(target, xattr_meta)
            else:
                self.recover_and_complete_object_meta(target, chunk)

        if target.obj:
            obj_listing, obj_meta = self.check_obj(target.copy_object())
            if chunk not in obj_listing:
                errors.append('Missing from object listing')
                db_meta = dict()
            else:
                db_meta = obj_listing[chunk]

            if db_meta and xattr_meta and self.check_xattr:
                errors.extend(
                    self._check_chunk_xattr(target, db_meta, xattr_meta))

        self.list_cache[chunk] = errors, obj_meta
        self._unlock(chunk)

        # Do not send errors directly, let the caller do it.
        # Indeed, it may want to check if the chunks can be repaired or not.
        self.chunks_checked += 1
        return errors, obj_meta, False

    def check_chunk(self, target):
        errors, _obj_meta, from_cache = self._check_chunk(target)
        # If the result comes from the cache, we already reported it.
        if not from_cache:
            self.send_result(target, errors, target.irreparable)
        return errors

    def _check_metachunk(self, target, stg_met, pos, chunks, recurse=0):
        """
        Check that a metachunk has the right number of chunks.

        :returns: the list of errors
        """
        required = stg_met.expected_chunks
        errors = list()
        chunk_results = list()

        if len(chunks) < required:
            missing_chunks = required - len(chunks)
            if stg_met.ec:
                subs = {x['num'] for x in chunks}
                for sub in range(required):
                    if sub not in subs:
                        chkt = target.copy()
                        chkt.chunk = '%d.%d' % (pos, sub)
                        err = "Missing chunk at position %s" % chkt.chunk
                        chunk_results.append((chkt, [err], False))
                        errors.append(err)
            else:
                for _ in range(missing_chunks):
                    chkt = target.copy()
                    chkt.chunk = '%d.%d' % (pos, sub)
                    err = "Missing chunk at position %d" % pos
                    chunk_results.append((chkt, [err], False))
                    errors.append(err)

        if recurse > 0:
            for chunk in chunks:
                tcopy = target.copy()
                tcopy.chunk = chunk['url']
                chunk_errors, _, from_cache = self._check_chunk(tcopy)
                chunk_results.append((tcopy, chunk_errors, from_cache))
                if chunk_errors:
                    errors.append("Unusable chunk %s at position %s" %
                                  (chunk['url'], chunk['pos']))

        irreparable = required - len(errors) < stg_met.min_chunks_to_read
        if irreparable:
            errors.append(
                "Unavailable metachunk at position %s "
                "(%d/%d chunks available, %d/%d required)" %
                (pos, required - len(errors), stg_met.expected_chunks,
                 stg_met.min_chunks_to_read, stg_met.expected_chunks))
        for tgt, errs, from_cache in chunk_results:
            # If the result comes from the cache, we already reported it.
            if not from_cache:
                self.send_result(tgt, errs, irreparable)
        # Since the "metachunk" is not an official item type,
        # this method does not report errors itself. Errors will
        # be reported as object errors.
        return errors

    def _check_obj_policy(self, target, obj_meta, chunks, recurse=0):
        """
        Check that the list of chunks of an object matches
        the object's storage policy.

        :returns: the list of errors encountered
        """
        stg_met = STORAGE_METHODS.load(obj_meta['chunk_method'])
        chunks_by_pos = _sort_chunks(chunks, stg_met.ec)
        tasks = list()
        for pos, pchunks in iteritems(chunks_by_pos):
            tasks.append((pos,
                          self._spawn(self._check_metachunk,
                                      target.copy(),
                                      stg_met,
                                      pos,
                                      pchunks,
                                      recurse=recurse)))
        errors = list()
        for pos, task in tasks:
            if not task and not self.running:
                errors.append("Pos %d skipped: checker is exiting" % pos)
                continue
            try:
                errors.extend(task.wait())
            except Exception as err:
                errors.append("Check failed: pos %d: %s" % (pos, err))
        return errors

    def check_obj_versions(self, target, versions, recurse=0):
        """
        Run checks of all versions of the targeted object in parallel.
        """
        tasks = list()
        for ov in versions:
            tcopy = target.copy_object()
            tcopy.content_id = ov['id']
            tcopy.version = str(ov['version'])
            tasks.append((tcopy.version,
                          self._spawn(self.check_obj, tcopy, recurse=recurse)))
        errors = list()
        for version, task in tasks:
            if not task and not self.running:
                errors.append("Version %s skipped: checker is exiting" %
                              version)
                continue
            try:
                task.wait()
            except Exception as err:
                errors.append("Check failed: version %s: %s" % (version, err))
        if errors:
            # Send a result with the target without version to tell
            # we were not able to check all versions of the object.
            self.send_result(target, errors)

    def _load_obj_meta(self, target, errors):
        """
        Load object metadata and chunks.

        :param target: which object to check.
        :param errors: list of errors that will be appended
            in case any error occurs.
        :returns: a tuple with object metadata and a list of chunks.
        """
        try:
            return self.api.object_locate(target.account,
                                          target.container,
                                          target.obj,
                                          version=target.version,
                                          properties=False)
        except exc.NoSuchObject as err:
            self.object_not_found += 1
            errors.append('Not found: %s' % (err, ))
        except Exception as err:
            self.object_exceptions += 1
            errors.append('Check failed: %s' % (err, ))
        return None, []

    def _get_cached_or_lock(self, lock_key):
        # If something is running, wait for it
        with self.running_lock:
            event = self.running_tasks.get(lock_key)
        if event:
            event.wait()
            event = None

        # Maybe get a cached result
        if lock_key in self.list_cache:
            return self.list_cache[lock_key]

        # No cached result, try to compute the thing ourselves
        while True:
            with self.running_lock:
                # Another check while locked
                if lock_key in self.list_cache:
                    return self.list_cache[lock_key]
                # Still nothing cached
                event = self.running_tasks.get(lock_key)
                if event is None:
                    self.running_tasks[lock_key] = Event()
                    return None
            event.wait()

    def _unlock(self, lock_key):
        with self.running_lock:
            event = self.running_tasks[lock_key]
            del self.running_tasks[lock_key]
            event.send(True)

    def check_obj(self, target, recurse=0):
        """
        Check one object version.
        If no version is specified, all versions of the object will be checked.
        :returns: the result of the check of the most recent version,
            or the one that is explicitly targeted.
        """
        account = target.account
        container = target.container
        obj = target.obj
        vers = target.version  # can be None

        cached = self._get_cached_or_lock((account, container, obj, vers))
        if cached is not None:
            return cached

        self.logger.info('Checking object "%s"', target)
        container_listing, _ = self.check_container(target.copy_container())
        errors = list()
        if obj not in container_listing:
            errors.append('Missing from container listing')
            # checksum = None
        else:
            versions = container_listing[obj]
            if vers is None:
                if target.content_id is None:
                    # No version specified, check all versions
                    self.check_obj_versions(target.copy_object(),
                                            versions,
                                            recurse=recurse)
                    # Now return the cached result of the most recent version
                    target.content_id = versions[0]['id']
                    target.version = str(versions[0]['version'])
                    res = self.check_obj(target, recurse=0)
                    self._unlock((account, container, obj, vers))
                    return res
                else:
                    for ov in versions:
                        if ov['id'] == target.content_id:
                            vers = str(ov['version'])
                            target.version = vers
                            break
                    else:
                        errors.append('Missing from container listing')

            # TODO check checksum match
            # checksum = container_listing[obj]['hash']
            pass

        meta, chunks = self._load_obj_meta(target, errors)

        chunk_listing = {c['url']: c for c in chunks}
        if meta:
            if target.content_id is None:
                target.content_id = meta['id']
            if target.version is None:
                target.version = str(meta['version'])
            self.list_cache[(account, container, obj, vers)] = \
                (chunk_listing, meta)
        self.objects_checked += 1
        self._unlock((account, container, obj, vers))

        # Skip the check if we could not locate the object
        if meta:
            errors.extend(
                self._check_obj_policy(target, meta, chunks, recurse=recurse))

        self.send_result(target, errors)
        return chunk_listing, meta

    def check_container(self, target, recurse=0):
        account = target.account
        container = target.container

        cached = self._get_cached_or_lock((account, container))
        if cached is not None:
            return cached

        self.logger.info('Checking container "%s"', target)
        account_listing = self.check_account(target.copy_account())
        errors = list()
        if container not in account_listing:
            errors.append('Missing from account listing')

        marker = None
        results = []
        ct_meta = dict()
        extra_args = dict()
        if self.limit_listings > 1 and target.obj:
            # When we are explicitly checking one object, start the listing
            # where this object is supposed to be. Do not use a limit,
            # but an end marker, in order to fetch all versions of the object.
            extra_args['prefix'] = target.obj
            extra_args['end_marker'] = target.obj + '\x00'  # HACK
        while True:
            try:
                resp = self.api.object_list(account,
                                            container,
                                            marker=marker,
                                            versions=True,
                                            **extra_args)
            except exc.NoSuchContainer as err:
                self.container_not_found += 1
                errors.append('Not found: %s' % (err, ))
                break
            except Exception as err:
                self.container_exceptions += 1
                errors.append('Check failed: %s' % (err, ))
                break

            truncated = resp.get('truncated', False)
            if truncated:
                marker = resp['next_marker']

            if resp['objects']:
                # safeguard, probably useless
                if not marker:
                    marker = resp['objects'][-1]['name']
                results.extend(resp['objects'])
                if not truncated or self.limit_listings > 1:
                    break
            else:
                ct_meta = resp
                ct_meta.pop('objects')
                break

        container_listing = dict()
        # Save all object versions, with the most recent first
        for obj in results:
            container_listing.setdefault(obj['name'], list()).append(obj)
        for versions in container_listing.values():
            versions.sort(key=lambda o: o['version'], reverse=True)

        if self.limit_listings <= 1:
            # We just listed the whole container, keep the result in a cache
            self.containers_checked += 1
            self.list_cache[(account, container)] = container_listing, ct_meta
        self._unlock((account, container))

        if recurse > 0:
            for obj_vers in container_listing.values():
                for obj in obj_vers:
                    tcopy = target.copy_object()
                    tcopy.obj = obj['name']
                    tcopy.content_id = obj['id']
                    tcopy.version = str(obj['version'])
                    self._spawn_n(self.check_obj, tcopy, recurse - 1)
        self.send_result(target, errors)
        return container_listing, ct_meta

    def check_account(self, target, recurse=0):
        account = target.account

        cached = self._get_cached_or_lock(account)
        if cached is not None:
            return cached

        self.logger.info('Checking account "%s"', target)
        errors = list()
        marker = None
        results = []
        extra_args = dict()
        if self.limit_listings > 0 and target.container:
            # When we are explicitly checking one container, start the listing
            # where this container is supposed to be, and list only one
            # container.
            extra_args['prefix'] = target.container
            extra_args['limit'] = 1
        while True:
            try:
                resp = self.api.container_list(account,
                                               marker=marker,
                                               **extra_args)
            except Exception as err:
                self.account_exceptions += 1
                errors.append('Check failed: %s' % (err, ))
                break
            if resp:
                marker = resp[-1][0]
                results.extend(resp)
                if self.limit_listings > 0:
                    break
            else:
                break

        containers = dict()
        for container in results:
            # Name, number of objects, number of bytes
            containers[container[0]] = (container[1], container[2])

        if self.limit_listings <= 0:
            # We just listed the whole account, keep the result in a cache
            self.accounts_checked += 1
            self.list_cache[account] = containers
        self._unlock(account)

        if recurse > 0:
            for container in containers:
                tcopy = target.copy_account()
                tcopy.container = container
                self._spawn_n(self.check_container, tcopy, recurse - 1)

        self.send_result(target, errors)
        return containers

    def check(self, target, recurse=0):
        if target.type == 'chunk':
            self._spawn_n(self.check_chunk, target)
        elif target.type == 'object':
            self._spawn_n(self.check_obj, target, recurse)
        elif target.type == 'container':
            self._spawn_n(self.check_container, target, recurse)
        else:
            self._spawn_n(self.check_account, target, recurse)

    def check_all_accounts(self, recurse=0):
        all_accounts = self.api.account_list()
        for acct in all_accounts:
            self.check(Target(acct), recurse=recurse)

    def fetch_results(self, rate_limiter=None):
        while self.running and not self.result_queue.empty():
            res = self.result_queue.get(True)
            yield res
            # Rate limiting is done on the result queue for now.
            # Someday we could implement a submission queue instead of
            # letting each worker submit tasks to the pool, and do
            # the rate limiting on this queue.
            if rate_limiter is not None:
                self.run_time = rate_limiter(self.run_time)

    def merge_with_delayed_target(self, target):
        """
        Merge the specified target with a delayed one.

        :returns: the delayed target, if there is one, with an error log
            including the errors of the new target. Return the new target
            otherwise.
        """
        tkey = repr(target)
        prev_target = self.delayed_targets.get(tkey, target)
        if prev_target is not target:
            errors = dict(prev_target.error_log)
            errors.update(target.error_log)
            prev_target.error_log = sorted(errors.items())
        return prev_target

    def log_result(self, target):
        """
        Log a check result, if it shows errors. Dispatch the errors to the
        appropriate destinations (log files, queues, etc.).
        """
        # The result may come from a new target, or from an old target
        # we checked another time, or both.
        target = self.merge_with_delayed_target(target)
        if target.has_errors:
            time_in_error, confirmations = target.time_in_error()
            if (time_in_error < self.min_time_in_error
                    or confirmations < self.required_confirmations):
                self.logger.info("Delaying check for %s, %d/%d confirmations",
                                 target, confirmations,
                                 self.required_confirmations)
                self.delayed_targets[repr(target)] = target
            else:
                if target.type == 'chunk':
                    self.logger.info(
                        "Writing error for %s, %d/%d confirmations", target,
                        confirmations, self.required_confirmations)
                    self.write_chunk_error(target,
                                           irreparable=target.irreparable)
                else:
                    self.write_error(target, irreparable=target.irreparable)
                self.delayed_targets.pop(repr(target), None)
            self.logger.warn(
                '%s:%s\n%s', target,
                ' irreparable' if target.irreparable else '',
                target.latest_error_result().errors_to_str(err_format='  %s'))

    def run(self, rate_limiter=None):
        """
        Fetch results and write logs until all jobs have finished.

        :returns: a generator yielding check results.
        """
        while self.running and (self.pool.running() + self.pool.waiting()):
            for result in self.fetch_results(rate_limiter):
                self.log_result(result)
                yield result
            sleep(0.1)
        if self.running:
            self.pool.waitall()
        # No rate limiting
        for result in self.fetch_results():
            self.log_result(result)
            yield result
        self.list_cache = CacheDict(self.list_cache.size)

    def stop(self):
        self.logger.info("Stopping")
        self.running = False

    def report(self):
        success = True

        def _report_stat(name, stat):
            print("{0:18}: {1}".format(name, stat))

        print()
        print('Report')
        _report_stat("Accounts checked", self.accounts_checked)
        if self.account_not_found:
            success = False
            _report_stat("Missing accounts", self.account_not_found)
        if self.account_exceptions:
            success = False
            _report_stat("Exceptions", self.account_exceptions)
        print()
        _report_stat("Containers checked", self.containers_checked)
        if self.container_not_found:
            success = False
            _report_stat("Missing containers", self.container_not_found)
        if self.container_exceptions:
            success = False
            _report_stat("Exceptions", self.container_exceptions)
        print()
        _report_stat("Objects checked", self.objects_checked)
        if self.object_not_found:
            success = False
            _report_stat("Missing objects", self.object_not_found)
        if self.object_exceptions:
            success = False
            _report_stat("Exceptions", self.object_exceptions)
        print()
        _report_stat("Chunks checked", self.chunks_checked)
        if self.chunk_not_found:
            success = False
            _report_stat("Missing chunks", self.chunk_not_found)
        if self.chunk_exceptions:
            success = False
            _report_stat("Exceptions", self.chunk_exceptions)
        return success
Ejemplo n.º 6
0
class TestBlobAuditor(BaseTestCase):

    def setUp(self):
        super(TestBlobAuditor, self).setUp()
        self.container = random_str(16)
        self.cid = cid_from_name(self.account, self.container)
        self.path = random_str(16)
        self.api = ObjectStorageApi(self.ns)
        self.blob_client = BlobClient(self.conf)

        self.api.container_create(self.account, self.container)
        _, chunks = self.api.container.content_prepare(
            self.account, self.container, self.path, 1)
        services = self.conscience.all_services('rawx')
        self.rawx_volumes = dict()
        for rawx in services:
            tags = rawx['tags']
            service_id = tags.get('tag.service_id', None)
            if service_id is None:
                service_id = rawx['addr']
            volume = tags.get('tag.vol', None)
            self.rawx_volumes[service_id] = volume

        self.api.object_create(
            self.account, self.container, obj_name=self.path, data="chunk")
        meta, self.chunks = self.api.object_locate(
            self.account, self.container, self.path)
        self.version = meta['version']
        self.content_id = meta['id']

    def _chunk_path(self, chunk):
        url = chunk['url']
        volume_id = url.split('/', 3)[2]
        chunk_id = url.split('/', 3)[3]
        volume = self.rawx_volumes[volume_id]
        return volume + '/' + chunk_id[:3] + '/' + chunk_id

    def test_audit(self):
        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]

        auditor = BlobAuditorWorker(self.conf, None,
                                    self.rawx_volumes[chunk_volume])
        auditor.chunk_audit(self._chunk_path(chunk), chunk_id)

    def test_audit_old_chunk(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, self.content_id)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]

        auditor = BlobAuditorWorker(self.conf, None,
                                    self.rawx_volumes[chunk_volume])
        auditor.chunk_audit(self._chunk_path(chunk), chunk_id)

    def test_audit_linked_chunk(self):
        self.api.object_link(
            self.account, self.container, self.path,
            self.account, self.container, self.path + '.link')

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)
        auditor = BlobAuditorWorker(self.conf, None,
                                    self.rawx_volumes[chunk_volume])
        auditor.chunk_audit(chunk_path, chunk_id)

        linked_meta, linked_chunks = self.api.object_locate(
            self.account, self.container, self.path + '.link')
        self.assertNotEqual(self.content_id, linked_meta['id'])
        linked_chunk = random.choice(linked_chunks)
        linked_chunk_id = linked_chunk['url'].split('/')[3]
        linked_chunk_path = self._chunk_path(linked_chunk)
        auditor.chunk_audit(linked_chunk_path, linked_chunk_id)
        auditor.chunk_audit(chunk_path, chunk_id)

        copy_chunk(chunk_path, chunk_path + '.copy')
        auditor.chunk_audit(chunk_path + '.copy', chunk_id)

        self.api.object_delete(
            self.account, self.container, self.path)
        auditor.chunk_audit(linked_chunk_path, linked_chunk_id)
        self.assertRaises(OrphanChunk, auditor.chunk_audit,
                          chunk_path + '.copy', chunk_id)

    def test_audit_with_versioning(self):
        self.api.container_set_properties(
            self.account, self.container,
            system={'sys.m2.policy.version': '2'})
        self.api.object_create(
            self.account, self.container, obj_name=self.path, data="version")

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)
        auditor = BlobAuditorWorker(self.conf, None,
                                    self.rawx_volumes[chunk_volume])
        auditor.chunk_audit(chunk_path, chunk_id)

        versioned_meta, versioned_chunks = self.api.object_locate(
            self.account, self.container, self.path)
        self.assertNotEqual(self.content_id, versioned_meta['id'])
        versioned_chunk = random.choice(versioned_chunks)
        versioned_chunk_volume = versioned_chunk['url'].split('/')[2]
        versioned_chunk_id = versioned_chunk['url'].split('/')[3]
        versioned_chunk_path = self._chunk_path(versioned_chunk)
        versioned_auditor = BlobAuditorWorker(
            self.conf, None, self.rawx_volumes[versioned_chunk_volume])
        versioned_auditor.chunk_audit(versioned_chunk_path, versioned_chunk_id)
        auditor.chunk_audit(chunk_path, chunk_id)

        copy_chunk(chunk_path, chunk_path + '.copy')
        auditor.chunk_audit(chunk_path + '.copy', chunk_id)

        self.api.object_delete(
            self.account, self.container, self.path, version=self.version)
        versioned_auditor.chunk_audit(versioned_chunk_path, versioned_chunk_id)
        self.assertRaises(OrphanChunk, auditor.chunk_audit,
                          chunk_path + '.copy', chunk_id)
Ejemplo n.º 7
0
class BaseServiceIdTest(BaseTestCase):
    def setUp(self):
        super(BaseServiceIdTest, self).setUp()

        if not self.conf['with_service_id']:
            self.skipTest("Service ID not enabled")

        self._cnt = random_str(10)
        self.http = urllib3.PoolManager()
        self.api = ObjectStorageApi(self.ns)

    def tearDown(self):
        super(BaseServiceIdTest, self).tearDown()

    def _update_apache(self, port):
        path = HTTPD_CONF % (self.ns, self.name)
        with open(path, "r") as fp:
            data = fp.read().split('\n')
        for idx in xrange(len(data)):
            if data[idx].startswith('Listen'):
                data[idx] = data[idx].split(':')[0] + ':' + str(port)
            elif data[idx].startswith('<VirtualHost'):
                data[idx] = data[idx].split(':')[0] + ':' + str(port) + '>'
        with open(path, "w") as fp:
            fp.write('\n'.join(data))

    def _cache_flush(self):
        for item in ['local', 'low', 'high']:
            r = self.http.request(
                'POST',
                'http://%s/v3.0/cache/flush/%s' % (self.conf['proxy'], item))
            self.assertEqual(r.status, 204)

    def _create_data(self):
        ret = self.api.object_create(self.account,
                                     self._cnt,
                                     obj_name="plop",
                                     data="*" * 1024)
        ret = self.api.object_locate(self.account, self._cnt, "plop")
        return ret

    def _service_in_charge_is_up(self):
        """
        Tells if the service in charge of the mock object is up and running.
        """
        try:
            # cache may be empty for meta2 as well, catch exceptions here
            self.api.object_locate(self.account, self._cnt, "plop")[1]
            return True
        except ServiceBusy:
            return False

    def _wait_for_data_availability(self, timeout=10):
        """Wait for the mock object to become available."""
        while timeout > 0 and not self._service_in_charge_is_up():
            time.sleep(1)
            timeout -= 1
        self.assertGreater(timeout, 0)

    def _update_event_watch(self, name, port):
        conf = None
        path = WATCH_CONF % (self.ns, name)
        with open(path, "r") as fp:
            conf = yaml.load(fp)

        conf['port'] = port

        with open(path, "w") as fp:
            yaml.dump(conf, stream=fp)

    def _change_rawx_addr(self, name, port):
        self._service(name, "stop")

        self._update_gridinit_rawx(port)
        self._update_event_watch(name, port)
        self._update_apache(port)

        self._service(self.name, "reload")
        self._service(self.name, "restart")
        self._service("conscience-agent", "restart")
        check_call(["openio", "cluster", "flush", "rawx"])
        check_call(["openio", "cluster", "unlockall"])
        self._cache_flush()
Ejemplo n.º 8
0
class TestObjectStorageApiPerfdata(BaseTestCase):

    def setUp(self):
        super(TestObjectStorageApiPerfdata, self).setUp()
        self.api = ObjectStorageApi(self.ns, endpoint=self.uri)
        self.created = list()

    def tearDown(self):
        super(TestObjectStorageApiPerfdata, self).tearDown()
        for ct, name in self.created:
            try:
                self.api.object_delete(self.account, ct, name)
            except Exception:
                logging.exception("Failed to delete %s/%s/%s//%s",
                                  self.ns, self.account, ct, name)

    def test_object_create_perfdata(self):
        perfdata = dict()
        container = random_str(8)
        obj = random_str(8)
        self.api.object_create(self.account, container, obj_name=obj, data=obj,
                               perfdata=perfdata)
        meta, chunks = self.api.object_locate(self.account, container, obj)
        self.assertIn('proxy', perfdata)
        self.assertIn('resolve', perfdata['proxy'])
        self.assertIn('meta2', perfdata['proxy'])
        self.assertIn('overall', perfdata['proxy'])
        self.assertIn('rawx', perfdata)
        if meta['policy'] == 'EC':
            self.assertIn('ec', perfdata['rawx'])
        for chunk in chunks:
            self.assertIn(chunk['url'], perfdata['rawx'])
        self.assertIn('overall', perfdata['rawx'])

        perfdata.clear()
        self.api.object_delete(self.account, container, obj, perfdata=perfdata)
        self.assertIn('proxy', perfdata)
        self.assertIn('resolve', perfdata['proxy'])
        self.assertIn('meta2', perfdata['proxy'])
        self.assertIn('overall', perfdata['proxy'])

    def test_object_fetch_perfdata(self):
        perfdata = dict()
        container = random_str(8)
        obj = random_str(8)
        self.api.object_create(self.account, container, obj_name=obj, data=obj)
        meta, chunks = self.api.object_locate(self.account, container, obj)
        stg_method = STORAGE_METHODS.load(meta['chunk_method'])
        _, stream = self.api.object_fetch(self.account, container, obj,
                                          perfdata=perfdata)
        self.assertIn('proxy', perfdata)
        self.assertIn('resolve', perfdata['proxy'])
        self.assertIn('meta2', perfdata['proxy'])
        self.assertIn('overall', perfdata['proxy'])
        self.assertNotIn('ttfb', perfdata)
        self.assertNotIn('ttlb', perfdata)

        buf = b''.join(stream)
        self.assertEqual(obj, buf)
        self.assertIn('rawx', perfdata)
        if stg_method.ec:
            self.assertIn('ec', perfdata['rawx'])
        nb_chunks_to_read = 0
        for chunk in chunks:
            if chunk['url'] in perfdata['rawx']:
                nb_chunks_to_read += 1
        self.assertLessEqual(stg_method.min_chunks_to_read,
                             nb_chunks_to_read)
        self.assertIn('overall', perfdata['rawx'])
        self.assertIn('ttfb', perfdata)
        self.assertIn('ttlb', perfdata)

        self.api.object_delete(self.account, container, obj)
Ejemplo n.º 9
0
class TestContentRebuildFilter(BaseTestCase):
    def setUp(self):
        super(TestContentRebuildFilter, self).setUp()
        self.namespace = self.conf['namespace']
        self.gridconf = {"namespace": self.namespace}
        self.container = "TestContentRebuildFilter%f" % time.time()
        self.ref = self.container
        self.container_client = ContainerClient(self.conf)
        self.container_client.container_create(self.account, self.container)
        syst = self.container_client.container_get_properties(
            self.account, self.container)['system']
        self.container_id = syst['sys.name'].split('.', 1)[0]
        self.object_storage_api = ObjectStorageApi(namespace=self.namespace)
        queue_addr = choice(self.conf['services']['beanstalkd'])['addr']
        self.queue_url = queue_addr
        self.conf['queue_url'] = 'beanstalk://' + self.queue_url
        self.conf['tube'] = DEFAULT_REBUILDER_TUBE
        self.notify_filter = NotifyFilter(app=_App, conf=self.conf)
        bt = Beanstalk.from_url(self.conf['queue_url'])
        bt.drain_tube(DEFAULT_REBUILDER_TUBE)
        bt.close()

    def _create_event(self, content_name, present_chunks, missing_chunks,
                      content_id):
        event = {}
        event["when"] = time.time()
        event["event"] = "storage.content.broken"
        event["data"] = {
            "present_chunks": present_chunks,
            "missing_chunks": missing_chunks
        }
        event["url"] = {
            "ns": self.namespace,
            "account": self.account,
            "user": self.container,
            "path": content_name,
            "id": self.container_id,
            "content": content_id
        }
        return event

    def _is_chunks_created(self, previous, after, pos_created):
        remain = list(after)
        for p in previous:
            for r in remain:
                if p["url"] == r["url"]:
                    remain.remove(r)
                    break
        if len(remain) != len(pos_created):
            return False
        for r in remain:
            if r["pos"] in pos_created:
                remain.remove(r)
            else:
                return False
        return True

    def _rebuild(self, event, job_id=0):
        self.blob_rebuilder = subprocess.Popen([
            'oio-blob-rebuilder', self.namespace,
            '--beanstalkd=' + self.queue_url
        ])
        time.sleep(3)
        self.blob_rebuilder.kill()

    def _remove_chunks(self, chunks, content_id):
        if not chunks:
            return
        for chunk in chunks:
            chunk['id'] = chunk['url']
            chunk['content'] = content_id
            chunk['type'] = 'chunk'
        self.container_client.container_raw_delete(self.account,
                                                   self.container,
                                                   data=chunks)

    def _check_rebuild(self,
                       content_name,
                       chunks,
                       missing_pos,
                       meta,
                       chunks_to_remove,
                       chunk_created=True):
        self._remove_chunks(chunks_to_remove, meta['id'])
        event = self._create_event(content_name, chunks, missing_pos,
                                   meta['id'])
        self.notify_filter.process(env=event, cb=None)
        self._rebuild(event)
        _, after = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        self.assertIs(chunk_created,
                      self._is_chunks_created(chunks, after, missing_pos))

    def test_nothing_missing(self):
        content_name = "test_nothing_missing"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="THREECOPIES",
                                              obj_name=content_name)

        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = []
        self._check_rebuild(content_name,
                            chunks,
                            missing_pos,
                            meta,
                            chunks_to_remove,
                            chunk_created=True)

    def test_missing_1_chunk(self):
        content_name = "test_missing_1_chunk"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="THREECOPIES",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_last_chunk(self):
        content_name = "test_missing_last_chunk"
        data = random_str(1024 * 1024 * 4)
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data=data,
                                              policy="THREECOPIES",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["3"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_2_chunks(self):
        content_name = "test_missing_2_chunks"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="THREECOPIES",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        for i in range(0, 2):
            chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0", "0"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_all_chunks(self):
        content_name = "test_missing_all_chunks"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="SINGLE",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0"]
        self._check_rebuild(content_name,
                            chunks,
                            missing_pos,
                            meta,
                            chunks_to_remove,
                            chunk_created=False)

    def test_missing_all_chunks_of_a_pos(self):
        content_name = "test_missing_2_chunks"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="THREECOPIES",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        for i in range(0, 3):
            chunks_to_remove.append(chunks.pop(0))

        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0"]
        self._check_rebuild(content_name,
                            chunks,
                            missing_pos,
                            meta,
                            chunks_to_remove,
                            chunk_created=False)

    def test_missing_multiple_chunks(self):
        content_name = "test_missing_multiple_chunks"
        data = random_str(1024 * 1024 * 4)
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data=data,
                                              policy="THREECOPIES",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(9))
        chunks_to_remove.append(chunks.pop(6))
        chunks_to_remove.append(chunks.pop(4))
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0", "1", "2", "3"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_1_chunk_ec(self):
        if len(self.conf['services']['rawx']) < 9:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 9 rawx to run")
        content_name = "test_missing_1_chunk_ec"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="EC",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0.1"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_m_chunk_ec(self):
        if len(self.conf['services']['rawx']) < 9:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 9 rawx to run")
        content_name = "test_missing_m_chunk_ec"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="EC",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        for i in range(0, 3):
            chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0.1", "0.2", "0.3"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_m_chunk_ec_2(self):
        if len(self.conf['services']['rawx']) < 9:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 9 rawx to run")
        content_name = "test_missing_m_chunk_ec"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="EC",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        chunks_to_remove.append(chunks.pop(3))
        chunks_to_remove.append(chunks.pop(5))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0.1", "0.5", "0.8"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_m1_chunk_ec(self):
        if len(self.conf['services']['rawx']) < 9:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 9 rawx to run")
        content_name = "test_missing_m1_chunk_ec"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="EC",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        chunks_to_remove.append(chunks.pop(0))
        chunks_to_remove.append(chunks.pop(0))
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0.1", "0.2", "0.3", "0.4"]
        self._check_rebuild(content_name,
                            chunks,
                            missing_pos,
                            meta,
                            chunks_to_remove,
                            chunk_created=False)
Ejemplo n.º 10
0
class Checker(object):
    def __init__(self,
                 namespace,
                 concurrency=50,
                 error_file=None,
                 rebuild_file=None,
                 full=True,
                 limit_listings=0,
                 request_attempts=1,
                 logger=None,
                 verbose=False,
                 integrity=False):
        self.pool = GreenPool(concurrency)
        self.error_file = error_file
        self.full = bool(full)
        self.integrity = bool(integrity)
        # Optimisation for when we are only checking one object
        # or one container.
        # 0 -> do not limit
        # 1 -> limit account listings (list of containers)
        # 2 -> limit container listings (list of objects)
        self.limit_listings = limit_listings
        if self.error_file:
            outfile = open(self.error_file, 'a')
            self.error_writer = csv.writer(outfile, delimiter=' ')

        self.rebuild_file = rebuild_file
        if self.rebuild_file:
            fd = open(self.rebuild_file, 'a')
            self.rebuild_writer = csv.writer(fd, delimiter='|')

        self.logger = logger or get_logger(
            {'namespace': namespace}, name='integrity', verbose=verbose)
        self.api = ObjectStorageApi(namespace,
                                    logger=self.logger,
                                    max_retries=request_attempts - 1,
                                    request_attempts=request_attempts)

        self.accounts_checked = 0
        self.containers_checked = 0
        self.objects_checked = 0
        self.chunks_checked = 0
        self.account_not_found = 0
        self.container_not_found = 0
        self.object_not_found = 0
        self.chunk_not_found = 0
        self.account_exceptions = 0
        self.container_exceptions = 0
        self.object_exceptions = 0
        self.chunk_exceptions = 0

        self.list_cache = {}
        self.running = {}
        self.result_queue = Queue()

    def complete_target_from_chunk_metadata(self, target, xattr_meta):
        """
        Complete a Target object from metadata found in chunk's extended
        attributes. In case the "fullpath" is not available, try to read
        legacy metadata, and maybe ask meta1 to resolve the CID into
        account and container names.
        """
        # pylint: disable=unbalanced-tuple-unpacking
        try:
            acct, ct, path, vers, content_id = \
                decode_fullpath(xattr_meta['full_path'])
            target.account = acct
            target.container = ct
            target.obj = path
            target.content_id = content_id
            target.version = vers
        except KeyError:
            # No fullpath header, try legacy headers
            if 'content_path' in xattr_meta:
                target.obj = xattr_meta['content_path']
            if 'content_id' in xattr_meta:
                target.content_id = xattr_meta['content_id']
            if 'content_version' in xattr_meta:
                target.version = xattr_meta['content_version']
            cid = xattr_meta.get('container_id')
            if cid:
                try:
                    md = self.api.directory.show(cid=cid)
                    acct = md.get('account')
                    ct = md.get('name')
                    if acct:
                        target.account = acct
                    if ct:
                        target.container = ct
                except Exception as err:
                    self.logger.warn(
                        "Failed to resolve CID %s into account "
                        "and container names: %s", cid, err)

    def send_result(self, target, errors=None):
        """
        Put an item in the result queue.
        """
        # TODO(FVE): send to an external queue.
        self.result_queue.put(ItemResult(target, errors))

    def write_error(self, target, irreparable=False):
        if not self.error_file:
            return
        error = list()
        if irreparable:
            error.append('#IRREPARABLE')
        error.append(target.account)
        if target.container:
            error.append(target.container)
        if target.obj:
            error.append(target.obj)
        if target.chunk:
            error.append(target.chunk)
        self.error_writer.writerow(error)

    def write_rebuilder_input(self, target, irreparable=False):
        # FIXME(FVE): cid can be computed from account and container names
        ct_meta = self.list_cache[(target.account, target.container)][1]
        try:
            cid = ct_meta['system']['sys.name'].split('.', 1)[0]
        except KeyError:
            cid = ct_meta['properties']['sys.name'].split('.', 1)[0]
        error = list()
        if irreparable:
            error.append('#IRREPARABLE')
        error.append(cid)
        # FIXME(FVE): ensure we always resolve content_id,
        # or pass object version along with object name.
        error.append(target.content_id or target.obj)
        error.append(target.chunk)
        self.rebuild_writer.writerow(error)

    def write_chunk_error(self, target, chunk=None, irreparable=False):
        if chunk is not None:
            target = target.copy()
            target.chunk = chunk
        self.write_error(target, irreparable=irreparable)
        if self.rebuild_file:
            self.write_rebuilder_input(target, irreparable=irreparable)

    def _check_chunk_xattr(self, target, obj_meta, xattr_meta):
        """
        Check coherency of chunk extended attributes with object metadata.

        :returns: a list of errors
        """
        errors = list()
        # Composed position -> erasure coding
        attr_prefix = 'meta' if '.' in obj_meta['pos'] else ''

        attr_key = attr_prefix + 'chunk_size'
        if str(obj_meta['size']) != xattr_meta.get(attr_key):
            errors.append(
                "'%s' xattr (%s) differs from size in meta2 (%s)" %
                (attr_key, xattr_meta.get(attr_key), obj_meta['size']))

        attr_key = attr_prefix + 'chunk_hash'
        if obj_meta['hash'] != xattr_meta.get(attr_key):
            errors.append(
                "'%s' xattr (%s) differs from hash in meta2 (%s)" %
                (attr_key, xattr_meta.get(attr_key), obj_meta['hash']))
        return errors

    def _check_chunk(self, target):
        """
        Execute various checks on a chunk:
        - does it appear in object's chunk list?
        - is it reachable?
        - are its extended attributes coherent?

        :returns: the list of errors encountered,
            and the chunk's owner object metadata.
        """
        chunk = target.chunk
        errors = list()
        obj_meta = None
        xattr_meta = None

        try:
            xattr_meta = self.api.blob_client.chunk_head(
                chunk, xattr=self.full, check_hash=self.integrity)
        except exc.NotFound as err:
            self.chunk_not_found += 1
            errors.append('Not found: %s' % (err, ))
        except exc.FaultyChunk as err:
            self.chunk_exceptions += 1
            errors.append('Faulty: %r' % (err, ))
        except Exception as err:
            self.chunk_exceptions += 1
            errors.append('Check failed: %s' % (err, ))

        if not target.obj and xattr_meta:
            self.complete_target_from_chunk_metadata(target, xattr_meta)

        if target.obj:
            obj_listing, obj_meta = self.check_obj(target.copy_object())
            if chunk not in obj_listing:
                errors.append('Missing from object listing')
                db_meta = dict()
            else:
                db_meta = obj_listing[chunk]

            if db_meta and xattr_meta and self.full:
                errors.extend(
                    self._check_chunk_xattr(target, db_meta, xattr_meta))

        self.send_result(target, errors)
        self.chunks_checked += 1
        return errors, obj_meta

    def check_chunk(self, target):
        errors, _obj_meta = self._check_chunk(target)
        return errors

    def _check_metachunk(self, target, stg_met, pos, chunks, recurse=0):
        """
        Check that a metachunk has the right number of chunks.

        :returns: the list of errors
        """
        required = stg_met.expected_chunks
        errors = list()

        if len(chunks) < required:
            missing_chunks = required - len(chunks)
            if stg_met.ec:
                subs = {x['num'] for x in chunks}
                for sub in range(required):
                    if sub not in subs:
                        errors.append("Missing chunk at position %d.%d" %
                                      (pos, sub))
            else:
                for _ in range(missing_chunks):
                    errors.append("Missing chunk at position %d" % pos)

        if recurse > 0:
            for chunk in chunks:
                tcopy = target.copy()
                tcopy.chunk = chunk['url']
                chunk_errors, _ = self._check_chunk(tcopy)
                if chunk_errors:
                    # The errors have already been reported by _check_chunk,
                    # but we must count this chunk among the unusable chunks
                    # of the current metachunk.
                    errors.append("Unusable chunk %s at position %s" %
                                  (chunk['url'], chunk['pos']))

        irreparable = required - len(errors) < stg_met.min_chunks_to_read
        if irreparable:
            errors.append(
                "Unavailable metachunk at position %s (%d/%d chunks)" %
                (pos, required - len(errors), stg_met.expected_chunks))
        # Since the "metachunk" is not an official item type,
        # this method does not report errors itself. Errors will
        # be reported as object errors.
        return errors

    def _check_obj_policy(self, target, obj_meta, chunks, recurse=0):
        """
        Check that the list of chunks of an object matches
        the object's storage policy.

        :returns: the list of errors encountered
        """
        stg_met = STORAGE_METHODS.load(obj_meta['chunk_method'])
        chunks_by_pos = _sort_chunks(chunks, stg_met.ec)
        tasks = list()
        for pos, chunks in chunks_by_pos.iteritems():
            tasks.append((pos,
                          self.pool.spawn(self._check_metachunk,
                                          target.copy(),
                                          stg_met,
                                          pos,
                                          chunks,
                                          recurse=recurse)))
        errors = list()
        for pos, task in tasks:
            try:
                errors.extend(task.wait())
            except Exception as err:
                errors.append("Check failed: pos %d: %s" % (pos, err))
        return errors

    def check_obj_versions(self, target, versions, recurse=0):
        """
        Run checks of all versions of the targeted object in parallel.
        """
        tasks = list()
        for ov in versions:
            tcopy = target.copy_object()
            tcopy.content_id = ov['id']
            tcopy.version = str(ov['version'])
            tasks.append((tcopy.version,
                          self.pool.spawn(self.check_obj,
                                          tcopy,
                                          recurse=recurse)))
        errors = list()
        for version, task in tasks:
            try:
                task.wait()
            except Exception as err:
                errors.append("Check failed: version %s: %s" % (version, err))
        if errors:
            # Send a result with the target without version to tell
            # we were not able to check all versions of the object.
            self.send_result(target, errors)

    def _load_obj_meta(self, target, errors):
        """
        Load object metadata and chunks.

        :param target: which object to check.
        :param errors: list of errors that will be appended
            in case any error occurs.
        :returns: a tuple with object metadata and a list of chunks.
        """
        try:
            return self.api.object_locate(target.account,
                                          target.container,
                                          target.obj,
                                          version=target.version,
                                          properties=False)
        except exc.NoSuchObject as err:
            self.object_not_found += 1
            errors.append('Not found: %s' % (err, ))
        except Exception as err:
            self.object_exceptions += 1
            errors.append('Check failed: %s' % (err, ))
        return None, []

    def check_obj(self, target, recurse=0):
        """
        Check one object version.
        If no version is specified, all versions of the object will be checked.
        :returns: the result of the check of the most recent version,
            or the one that is explicitly targeted.
        """
        account = target.account
        container = target.container
        obj = target.obj
        vers = target.version  # can be None

        if (account, container, obj, vers) in self.running:
            self.running[(account, container, obj, vers)].wait()
        if (account, container, obj, vers) in self.list_cache:
            return self.list_cache[(account, container, obj, vers)]
        self.running[(account, container, obj, vers)] = Event()
        self.logger.info('Checking object "%s"', target)
        container_listing, _ = self.check_container(target.copy_container())
        errors = list()
        if obj not in container_listing:
            errors.append('Missing from container listing')
            # checksum = None
        else:
            versions = container_listing[obj]
            if vers is None:
                if target.content_id is None:
                    # No version specified, check all versions
                    self.check_obj_versions(target.copy_object(),
                                            versions,
                                            recurse=recurse)
                    # Now return the cached result of the most recent version
                    target.content_id = versions[0]['id']
                    target.version = str(versions[0]['version'])
                    res = self.check_obj(target, recurse=0)
                    self.running[(account, container, obj, vers)].send(True)
                    del self.running[(account, container, obj, vers)]
                    return res
                else:
                    for ov in versions:
                        if ov['id'] == target.content_id:
                            vers = str(ov['version'])
                            target.version = vers
                            break
                    else:
                        errors.append('Missing from container listing')

            # TODO check checksum match
            # checksum = container_listing[obj]['hash']
            pass

        meta, chunks = self._load_obj_meta(target, errors)

        chunk_listing = {c['url']: c for c in chunks}
        if meta:
            self.list_cache[(account, container, obj, vers)] = \
                (chunk_listing, meta)
        self.objects_checked += 1
        self.running[(account, container, obj, vers)].send(True)
        del self.running[(account, container, obj, vers)]

        # Skip the check if we could not locate the object
        if meta:
            errors.extend(
                self._check_obj_policy(target, meta, chunks, recurse=recurse))

        self.send_result(target, errors)
        return chunk_listing, meta

    def check_container(self, target, recurse=0):
        account = target.account
        container = target.container

        if (account, container) in self.running:
            self.running[(account, container)].wait()
        if (account, container) in self.list_cache:
            return self.list_cache[(account, container)]
        self.running[(account, container)] = Event()
        self.logger.info('Checking container "%s"', target)
        account_listing = self.check_account(target.copy_account())
        errors = list()
        if container not in account_listing:
            errors.append('Missing from account listing')

        marker = None
        results = []
        ct_meta = dict()
        extra_args = dict()
        if self.limit_listings > 1 and target.obj:
            # When we are explicitly checking one object, start the listing
            # where this object is supposed to be. Do not use a limit,
            # but an end marker, in order to fetch all versions of the object.
            extra_args['prefix'] = target.obj
            extra_args['end_marker'] = target.obj + '\x00'  # HACK
        while True:
            try:
                resp = self.api.object_list(account,
                                            container,
                                            marker=marker,
                                            versions=True,
                                            **extra_args)
            except exc.NoSuchContainer as err:
                self.container_not_found += 1
                errors.append('Not found: %s' % (err, ))
                break
            except Exception as err:
                self.container_exceptions += 1
                errors.append('Check failed: %s' % (err, ))
                break

            if resp.get('truncated', False):
                marker = resp['next_marker']

            if resp['objects']:
                # safeguard, probably useless
                if not marker:
                    marker = resp['objects'][-1]['name']
                results.extend(resp['objects'])
                if self.limit_listings > 1:
                    break
            else:
                ct_meta = resp
                ct_meta.pop('objects')
                break

        container_listing = dict()
        # Save all object versions, with the most recent first
        for obj in results:
            container_listing.setdefault(obj['name'], list()).append(obj)
        for versions in container_listing.values():
            versions.sort(key=lambda o: o['version'], reverse=True)

        if self.limit_listings <= 1:
            # We just listed the whole container, keep the result in a cache
            self.containers_checked += 1
            self.list_cache[(account, container)] = container_listing, ct_meta
        self.running[(account, container)].send(True)
        del self.running[(account, container)]

        if recurse > 0:
            for obj_vers in container_listing.values():
                for obj in obj_vers:
                    tcopy = target.copy_object()
                    tcopy.obj = obj['name']
                    tcopy.content_id = obj['id']
                    tcopy.version = str(obj['version'])
                    self.pool.spawn_n(self.check_obj, tcopy, recurse - 1)
        self.send_result(target, errors)
        return container_listing, ct_meta

    def check_account(self, target, recurse=0):
        account = target.account

        if account in self.running:
            self.running[account].wait()
        if account in self.list_cache:
            return self.list_cache[account]
        self.running[account] = Event()
        self.logger.info('Checking account "%s"', target)
        errors = list()
        marker = None
        results = []
        extra_args = dict()
        if self.limit_listings > 0 and target.container:
            # When we are explicitly checking one container, start the listing
            # where this container is supposed to be, and list only one
            # container.
            extra_args['prefix'] = target.container
            extra_args['limit'] = 1
        while True:
            try:
                resp = self.api.container_list(account,
                                               marker=marker,
                                               **extra_args)
            except Exception as err:
                self.account_exceptions += 1
                errors.append('Check failed: %s' % (err, ))
                break
            if resp:
                marker = resp[-1][0]
                results.extend(resp)
                if self.limit_listings > 0:
                    break
            else:
                break

        containers = dict()
        for container in results:
            # Name, number of objects, number of bytes
            containers[container[0]] = (container[1], container[2])

        if self.limit_listings <= 0:
            # We just listed the whole account, keep the result in a cache
            self.accounts_checked += 1
            self.list_cache[account] = containers
        self.running[account].send(True)
        del self.running[account]

        if recurse > 0:
            for container in containers:
                tcopy = target.copy_account()
                tcopy.container = container
                self.pool.spawn_n(self.check_container, tcopy, recurse - 1)

        self.send_result(target, errors)
        return containers

    def check(self, target, recurse=0):
        if target.type == 'chunk':
            self.pool.spawn_n(self.check_chunk, target)
        elif target.type == 'object':
            self.pool.spawn_n(self.check_obj, target, recurse)
        elif target.type == 'container':
            self.pool.spawn_n(self.check_container, target, recurse)
        else:
            self.pool.spawn_n(self.check_account, target, recurse)

    def fetch_results(self):
        while not self.result_queue.empty():
            res = self.result_queue.get(True)
            yield res

    def log_result(self, result):
        if result.errors:
            if result.target.type == 'chunk':
                # FIXME(FVE): check error criticity
                # and set the irreparable flag.
                self.write_chunk_error(result.target)
            else:
                self.write_error(result.target)
            self.logger.warn('%s:\n%s', result.target,
                             result.errors_to_str(err_format='  %s'))

    def run(self):
        """
        Fetch results and write logs until all jobs have finished.

        :returns: a generator yielding check results.
        """
        while self.pool.running() + self.pool.waiting():
            for result in self.fetch_results():
                self.log_result(result)
                yield result
            sleep(0.1)
        self.pool.waitall()
        for result in self.fetch_results():
            self.log_result(result)
            yield result

    def report(self):
        success = True

        def _report_stat(name, stat):
            print("{0:18}: {1}".format(name, stat))

        print()
        print('Report')
        _report_stat("Accounts checked", self.accounts_checked)
        if self.account_not_found:
            success = False
            _report_stat("Missing accounts", self.account_not_found)
        if self.account_exceptions:
            success = False
            _report_stat("Exceptions", self.account_exceptions)
        print()
        _report_stat("Containers checked", self.containers_checked)
        if self.container_not_found:
            success = False
            _report_stat("Missing containers", self.container_not_found)
        if self.container_exceptions:
            success = False
            _report_stat("Exceptions", self.container_exceptions)
        print()
        _report_stat("Objects checked", self.objects_checked)
        if self.object_not_found:
            success = False
            _report_stat("Missing objects", self.object_not_found)
        if self.object_exceptions:
            success = False
            _report_stat("Exceptions", self.object_exceptions)
        print()
        _report_stat("Chunks checked", self.chunks_checked)
        if self.chunk_not_found:
            success = False
            _report_stat("Missing chunks", self.chunk_not_found)
        if self.chunk_exceptions:
            success = False
            _report_stat("Exceptions", self.chunk_exceptions)
        return success
Ejemplo n.º 11
0
class TestBlobRebuilder(BaseTestCase):
    def setUp(self):
        super(TestBlobRebuilder, self).setUp()
        self.container = random_str(16)
        self.cid = cid_from_name(self.account, self.container)
        self.path = random_str(16)
        self.api = ObjectStorageApi(self.ns)
        self.blob_client = BlobClient(self.conf)

        self.api.container_create(self.account, self.container)
        _, chunks = self.api.container.content_prepare(self.account,
                                                       self.container,
                                                       self.path, 1)
        if len(chunks) < 2:
            self.skipTest("need at least 2 chunks to run")

        services = self.conscience.all_services('rawx')
        self.rawx_volumes = dict()
        for rawx in services:
            tags = rawx['tags']
            service_id = tags.get('tag.service_id', None)
            if service_id is None:
                service_id = rawx['addr']
            volume = tags.get('tag.vol', None)
            self.rawx_volumes[service_id] = volume

        self.api.object_create(self.account,
                               self.container,
                               obj_name=self.path,
                               data="chunk")
        meta, self.chunks = self.api.object_locate(self.account,
                                                   self.container, self.path)
        self.version = meta['version']
        self.content_id = meta['id']

    def _chunk_path(self, chunk):
        url = chunk['url']
        volume_id = url.split('/', 3)[2]
        chunk_id = url.split('/', 3)[3]
        volume = self.rawx_volumes[volume_id]
        return volume + '/' + chunk_id[:3] + '/' + chunk_id

    def test_rebuild_old_chunk(self):
        if self._cls_conf['go_rawx']:
            self.skipTest('Rawx V2 read only new fullpath')
        for c in self.chunks:
            convert_to_old_chunk(self._chunk_path(c), self.account,
                                 self.container, self.path, self.version,
                                 self.content_id)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_headers, chunk_stream = self.blob_client.chunk_get(chunk['url'])
        os.remove(self._chunk_path(chunk))
        chunks_kept = list(self.chunks)
        chunks_kept.remove(chunk)

        conf = self.conf.copy()
        conf['allow_same_rawx'] = True
        rebuilder = BlobRebuilder(conf, None, chunk_volume)
        rebuilder_worker = rebuilder._create_worker()
        rebuilder_worker.chunk_rebuild(self.cid, self.content_id, chunk_id)

        _, new_chunks = self.api.object_locate(self.account, self.container,
                                               self.path)
        new_chunk = list(new_chunks)

        self.assertEqual(len(new_chunks), len(chunks_kept) + 1)
        url_kept = [c['url'] for c in chunks_kept]
        new_chunk = None
        for c in new_chunks:
            if c['url'] not in url_kept:
                self.assertIsNone(new_chunk)
                new_chunk = c

        self.assertNotEqual(chunk['real_url'], new_chunk['real_url'])
        self.assertNotEqual(chunk['url'], new_chunk['url'])
        self.assertEqual(chunk['pos'], new_chunk['pos'])
        self.assertEqual(chunk['size'], new_chunk['size'])
        self.assertEqual(chunk['hash'], new_chunk['hash'])

        new_chunk_headers, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk['url'])
        self.assertEqual(chunk_stream.read(), new_chunk_stream.read())
        fullpath = encode_fullpath(self.account, self.container, self.path,
                                   self.version, self.content_id)
        self.assertEqual(fullpath, new_chunk_headers['full_path'])
        del new_chunk_headers['full_path']
        self.assertNotEqual(chunk_headers['chunk_id'],
                            new_chunk_headers['chunk_id'])
        new_chunk_id = new_chunk['url'].split('/')[3]
        self.assertEqual(new_chunk_id, new_chunk_headers['chunk_id'])
        del chunk_headers['chunk_id']
        del new_chunk_headers['chunk_id']
        self.assertEqual(OIO_VERSION, new_chunk_headers['oio_version'])
        del chunk_headers['oio_version']
        del new_chunk_headers['oio_version']
        self.assertEqual(chunk_headers, new_chunk_headers)
Ejemplo n.º 12
0
class TestObjectStorageAPI(BaseTestCase):
    def setUp(self):
        super(TestObjectStorageAPI, self).setUp()
        self.api = ObjectStorageApi(self.ns, endpoint=self.uri)
        self.created = list()

    def tearDown(self):
        super(TestObjectStorageAPI, self).tearDown()
        for ct, name in self.created:
            try:
                self.api.object_delete(self.account, ct, name)
            except Exception:
                logging.exception("Failed to delete %s/%s/%s//%s", self.ns,
                                  self.account, ct, name)

    def _create(self, name, metadata=None):
        return self.api.container_create(self.account,
                                         name,
                                         properties=metadata)

    def _delete(self, name):
        self.api.container_delete(self.account, name)

    def _clean(self, name, clear=False):
        if clear:
            # must clean properties before
            self.api.container_del_properties(self.account, name, [])
        self._delete(name)

    def _get_properties(self, name, properties=None):
        return self.api.container_get_properties(self.account,
                                                 name,
                                                 properties=properties)

    def _set_properties(self, name, properties=None):
        return self.api.container_set_properties(self.account,
                                                 name,
                                                 properties=properties)

    def test_container_show(self):
        # container_show on unknown container
        name = random_str(32)
        self.assertRaises(exc.NoSuchContainer, self.api.container_show,
                          self.account, name)

        self._create(name)
        # container_show on existing container
        res = self.api.container_show(self.account, name)
        self.assertIsNot(res['properties'], None)

        self._delete(name)
        # container_show on deleted container
        self.assertRaises(exc.NoSuchContainer, self.api.container_show,
                          self.account, name)

    def test_container_create(self):
        name = random_str(32)
        res = self._create(name)
        self.assertEqual(res, True)

        # second create
        res = self._create(name)
        self.assertEqual(res, False)

        # clean
        self._delete(name)

    def test_create_properties(self):
        name = random_str(32)

        metadata = {
            random_str(32): random_str(32),
            random_str(32): random_str(32),
        }
        res = self._create(name, metadata)
        self.assertEqual(res, True)

        data = self._get_properties(name)

        self.assertEqual(data['properties'], metadata)

        # clean
        self._clean(name, True)

    def test_container_delete(self):
        name = random_str(32)

        # container_delete on unknown container
        self.assertRaises(exc.NoSuchContainer, self.api.container_delete,
                          self.account, name)

        res = self._create(name)
        self.assertEqual(res, True)
        # container_delete on existing container
        self._delete(name)

        # verify deleted
        self.assertRaises(exc.NoSuchContainer, self.api.container_show,
                          self.account, name)

        # second delete
        self.assertRaises(exc.NoSuchContainer, self.api.container_delete,
                          self.account, name)

        # verify deleted
        self.assertRaises(exc.NoSuchContainer, self.api.container_show,
                          self.account, name)

    def test_container_get_properties(self):
        name = random_str(32)

        # container_get_properties on unknown container
        self.assertRaises(exc.NoSuchContainer,
                          self.api.container_get_properties, self.account,
                          name)

        res = self._create(name)
        self.assertEqual(res, True)

        # container_get_properties on existing container
        data = self.api.container_get_properties(self.account, name)
        self.assertEqual(data['properties'], {})
        self.assertIsNot(data['system'], None)
        self.assertIn("sys.user.name", data['system'])

        # container_get_properties
        metadata = {
            random_str(32): random_str(32),
            random_str(32): random_str(32),
        }
        self._set_properties(name, metadata)

        data = self.api.container_get_properties(self.account, name)
        self.assertEqual(data['properties'], metadata)

        # clean
        self._clean(name, True)

        # container_get_properties on deleted container
        self.assertRaises(exc.NoSuchContainer,
                          self.api.container_get_properties, self.account,
                          name)

    def test_container_get_properties_filtered(self):
        self.skipTest("Server side properties filtering not implemented")
        name = random_str(32)

        res = self._create(name)
        self.assertEqual(res, True)

        # container_get_properties on existing container
        data = self.api.container_get_properties(self.account, name)
        self.assertEqual(data['properties'], {})

        # container_get_properties
        metadata = {
            random_str(32): random_str(32),
            random_str(32): random_str(32),
        }
        self._set_properties(name, metadata)

        # container_get_properties specify key
        key = metadata.keys().pop(0)

        data = self.api.container_get_properties(self.account, name, [key])
        self.assertEqual({key: metadata[key]}, data['properties'])

        # clean
        self._clean(name, True)

    def test_container_set_properties(self):
        name = random_str(32)

        metadata = {
            random_str(32): random_str(32),
            random_str(32): random_str(32),
        }

        # container_set_properties on unknown container
        self.assertRaises(exc.NoSuchContainer,
                          self.api.container_set_properties, self.account,
                          name, metadata)

        res = self._create(name)
        self.assertEqual(res, True)

        # container_set_properties on existing container
        self.api.container_set_properties(self.account, name, metadata)
        data = self._get_properties(name)
        self.assertEqual(data['properties'], metadata)

        # container_set_properties
        key = random_str(32)
        value = random_str(32)
        metadata2 = {key: value}
        self._set_properties(name, metadata2)
        metadata.update(metadata2)

        data = self._get_properties(name)
        self.assertEqual(data['properties'], metadata)

        # container_set_properties overwrite key
        key = metadata.keys().pop(0)
        value = random_str(32)
        metadata3 = {key: value}

        metadata.update(metadata3)
        self.api.container_set_properties(self.account, name, metadata3)
        data = self._get_properties(name)
        self.assertEqual(data['properties'], metadata)

        # clean
        self._clean(name, True)

        # container_set_properties on deleted container
        self.assertRaises(exc.NoSuchContainer,
                          self.api.container_set_properties, self.account,
                          name, metadata)

    def test_del_properties(self):
        name = random_str(32)

        metadata = {
            random_str(32): random_str(32),
            random_str(32): random_str(32),
        }

        # container_del_properties on unknown container
        self.assertRaises(exc.NoSuchContainer,
                          self.api.container_del_properties, self.account,
                          name, [])

        res = self._create(name, metadata)
        self.assertEqual(res, True)

        key = metadata.keys().pop()
        del metadata[key]

        # container_del_properties on existing container
        self.api.container_del_properties(self.account, name, [key])
        data = self._get_properties(name)
        self.assertNotIn(key, data['properties'])

        key = random_str(32)
        # We do not check if a property exists before deleting it
        # self.assertRaises(
        #     exc.NoSuchContainer, self.api.container_del_properties,
        #     self.account, name, [key])
        self.api.container_del_properties(self.account, name, [key])

        data = self._get_properties(name)
        self.assertEqual(data['properties'], metadata)

        # clean
        self._clean(name, True)

        # container_del_properties on deleted container
        self.assertRaises(exc.NoSuchContainer,
                          self.api.container_del_properties, self.account,
                          name, metadata.keys())

    def test_object_create_mime_type(self):
        name = random_str(32)
        self.api.object_create(self.account,
                               name,
                               data="data",
                               obj_name=name,
                               mime_type='text/custom')
        meta, _ = self.api.object_locate(self.account, name, name)
        self.assertEqual(meta['mime_type'], 'text/custom')

    def _upload_data(self, name):
        chunksize = int(self.conf["chunk_size"])
        size = int(chunksize * 12)
        data = random_data(int(size))
        self.api.object_create(self.account, name, obj_name=name, data=data)
        self.created.append((name, name))
        _, chunks = self.api.object_locate(self.account, name, name)
        logging.debug("Chunks: %s", chunks)
        return sort_chunks(chunks, False), data

    def _fetch_range(self, name, range_):
        if not isinstance(range_[0], tuple):
            ranges = (range_, )
        else:
            ranges = range_
        stream = self.api.object_fetch(self.account, name, name,
                                       ranges=ranges)[1]
        data = ""
        for chunk in stream:
            data += chunk
        return data

    def test_object_fetch_range_start(self):
        """From 0 to somewhere"""
        name = random_str(16)
        _, data = self._upload_data(name)
        end = 666
        fdata = self._fetch_range(name, (0, end))
        self.assertEqual(len(fdata), end + 1)
        self.assertEqual(fdata, data[0:end + 1])

    def test_object_fetch_range_end(self):
        """From somewhere to end"""
        name = random_str(16)
        chunks, data = self._upload_data(name)
        start = 666
        last = max(chunks.keys())
        end = chunks[last][0]['offset'] + chunks[last][0]['size']
        fdata = self._fetch_range(name, (start, end))
        self.assertEqual(len(fdata), len(data) - start)
        self.assertEqual(fdata, data[start:])

    def test_object_fetch_range_metachunk_start(self):
        """From the start of the second metachunk to somewhere"""
        name = random_str(16)
        chunks, data = self._upload_data(name)
        start = chunks[1][0]['offset']
        end = start + 666
        fdata = self._fetch_range(name, (start, end))
        self.assertEqual(len(fdata), end - start + 1)
        self.assertEqual(fdata, data[start:end + 1])

    def test_object_fetch_range_metachunk_end(self):
        """From somewhere to end of the first metachunk"""
        name = random_str(16)
        chunks, data = self._upload_data(name)
        start = 666
        end = chunks[0][0]['size'] - 1
        fdata = self._fetch_range(name, (start, end))
        self.assertEqual(len(fdata), end - start + 1)
        self.assertEqual(fdata, data[start:end + 1])

    def test_object_fetch_range_2_metachunks(self):
        """
        From somewhere in the first metachunk
        to somewhere in the second metachunk
        """
        name = random_str(16)
        chunks, data = self._upload_data(name)
        start = 666
        end = start + chunks[0][0]['size'] - 1
        fdata = self._fetch_range(name, (start, end))
        self.assertEqual(len(fdata), end - start + 1)
        self.assertEqual(fdata, data[start:end + 1])

    def test_object_fetch_several_ranges(self):
        """
        Download several ranges at once.
        """
        name = random_str(16)
        chunks, data = self._upload_data(name)
        start = 666
        end = start + chunks[0][0]['size'] - 1
        fdata = self._fetch_range(name, ((start, end), (end + 1, end + 2)))
        self.assertEqual(len(fdata), end - start + 3)
        self.assertEqual(fdata, data[start:end + 3])

        # Notice that we download some bytes from the second metachunk
        # before some from the first.
        fdata = self._fetch_range(
            name, ((chunks[0][0]['size'], chunks[0][0]['size'] + 2), (0, 1),
                   (1, 2), (4, 6)))
        self.assertEqual(len(fdata), 10)
        self.assertEqual(
            fdata, data[chunks[0][0]['size']:chunks[0][0]['size'] + 3] +
            data[0:2] + data[1:3] + data[4:7])

    def test_object_create_then_append(self):
        """Create an object then append data"""
        name = random_str(16)
        self.api.object_create(self.account,
                               name,
                               data="1" * 128,
                               obj_name=name)
        _, size, _ = self.api.object_create(self.account,
                                            name,
                                            data="2" * 128,
                                            obj_name=name,
                                            append=True)
        self.assertEqual(size, 128)
        _, data = self.api.object_fetch(self.account, name, name)
        data = "".join(data)
        self.assertEqual(len(data), 256)
        self.assertEqual(data, "1" * 128 + "2" * 128)

    def test_object_create_from_append(self):
        """Create an object with append operation"""
        name = random_str(16)
        self.api.container_create(self.account, name)
        self.api.object_create(self.account,
                               name,
                               data="1" * 128,
                               obj_name=name,
                               append=True)
        _, data = self.api.object_fetch(self.account, name, name)
        data = "".join(data)
        self.assertEqual(len(data), 128)
        self.assertEqual(data, "1" * 128)

    def test_container_object_create_from_append(self):
        """Try to create container and object with append operation"""
        name = random_str(16)
        _chunks, size, checksum = self.api.object_create(self.account,
                                                         name,
                                                         data="1" * 128,
                                                         obj_name=name,
                                                         append=True)
        self.assertEqual(size, 128)

        meta = self.api.object_get_properties(self.account, name, name)
        self.assertEqual(meta.get('hash', "").lower(), checksum.lower())

    def test_container_refresh(self):
        account = random_str(32)
        # container_refresh on unknown container
        name = random_str(32)
        self.assertRaises(exc.NoSuchContainer, self.api.container_refresh,
                          account, name)

        self.api.container_create(account, name)
        time.sleep(0.5)  # ensure container event have been processed
        # container_refresh on existing container
        self.api.container_refresh(account, name)
        time.sleep(0.5)  # ensure container event have been processed
        res = self.api.container_list(account, prefix=name)
        name_container, nb_objects, nb_bytes, _ = res[0]
        self.assertEqual(name_container, name)
        self.assertEqual(nb_objects, 0)
        self.assertEqual(nb_bytes, 0)

        self.api.object_create(account, name, data="data", obj_name=name)
        time.sleep(0.5)  # ensure container event have been processed
        # container_refresh on existing container with data
        self.api.container_refresh(account, name)
        time.sleep(0.5)  # ensure container event have been processed
        res = self.api.container_list(account, prefix=name)
        name_container, nb_objects, nb_bytes, _ = res[0]
        self.assertEqual(name_container, name)
        self.assertEqual(nb_objects, 1)
        self.assertEqual(nb_bytes, 4)

        self.api.object_delete(account, name, name)
        time.sleep(0.5)  # ensure container event have been processed
        self.api.container_delete(account, name)
        time.sleep(0.5)  # ensure container event have been processed
        # container_refresh on deleted container
        self.assertRaises(exc.NoSuchContainer, self.api.container_refresh,
                          account, name)

        self.api.account_delete(account)

    def test_container_refresh_user_not_found(self):
        name = random_str(32)
        self.api.account.container_update(name, name, {"mtime": time.time()})
        self.api.container_refresh(name, name)
        containers = self.api.container_list(name)
        self.assertEqual(len(containers), 0)
        self.api.account_delete(name)

    def test_account_refresh(self):
        # account_refresh on unknown account
        account = random_str(32)
        self.assertRaises(exc.NoSuchAccount, self.api.account_refresh, account)

        # account_refresh on existing account
        self.api.account_create(account)
        self.api.account_refresh(account)
        time.sleep(0.5)  # ensure container event have been processed
        res = self.api.account_show(account)
        self.assertEqual(res["bytes"], 0)
        self.assertEqual(res["objects"], 0)
        self.assertEqual(res["containers"], 0)

        name = random_str(32)
        self.api.object_create(account, name, data="data", obj_name=name)
        time.sleep(0.5)  # ensure container event have been processed
        self.api.account_refresh(account)
        time.sleep(0.5)  # ensure container event have been processed
        res = self.api.account_show(account)
        self.assertEqual(res["bytes"], 4)
        self.assertEqual(res["objects"], 1)
        self.assertEqual(res["containers"], 1)

        self.api.object_delete(account, name, name)
        time.sleep(0.5)  # ensure container event have been processed
        self.api.container_delete(account, name)
        time.sleep(0.5)  # ensure container event have been processed
        self.api.account_delete(account)
        # account_refresh on deleted account
        self.assertRaises(exc.NoSuchAccount, self.api.account_refresh, account)

    def test_all_accounts_refresh(self):
        # clear accounts
        accounts = self.api.account_list()
        for account in accounts:
            try:
                self.api.account_flush(account)
                self.api.account_delete(account)
            except exc.NoSuchAccount:  # account remove in the meantime
                pass

        # all_accounts_refresh with 0 account
        self.api.all_accounts_refresh()

        # all_accounts_refresh with 2 account
        account1 = random_str(32)
        self.api.account_create(account1)
        account2 = random_str(32)
        self.api.account_create(account2)
        self.api.all_accounts_refresh()
        res = self.api.account_show(account1)
        self.assertEqual(res["bytes"], 0)
        self.assertEqual(res["objects"], 0)
        self.assertEqual(res["containers"], 0)
        res = self.api.account_show(account2)
        self.assertEqual(res["bytes"], 0)
        self.assertEqual(res["objects"], 0)
        self.assertEqual(res["containers"], 0)

        self.api.account_delete(account1)
        self.api.account_delete(account2)

    def test_account_flush(self):
        # account_flush on unknown account
        account = random_str(32)
        self.assertRaises(exc.NoSuchAccount, self.api.account_flush, account)

        # account_flush on existing account
        name1 = random_str(32)
        self.api.container_create(account, name1)
        name2 = random_str(32)
        self.api.container_create(account, name2)
        time.sleep(0.5)  # ensure container event have been processed
        self.api.account_flush(account)
        containers = self.api.container_list(account)
        self.assertEqual(len(containers), 0)
        res = self.api.account_show(account)
        self.assertEqual(res["bytes"], 0)
        self.assertEqual(res["objects"], 0)
        self.assertEqual(res["containers"], 0)

        self.api.container_delete(account, name1)
        self.api.container_delete(account, name2)
        time.sleep(0.5)  # ensure container event have been processed
        self.api.account_delete(account)

        # account_flush on deleted account
        self.assertRaises(exc.NoSuchAccount, self.api.account_flush, account)

    def test_object_create_then_truncate(self):
        """Create an object then truncate data"""
        name = random_str(16)
        self.api.object_create(self.account,
                               name,
                               data="1" * 128,
                               obj_name=name)
        self.api.object_truncate(self.account, name, name, size=64)
        _, data = self.api.object_fetch(self.account, name, name)
        data = "".join(data)
        self.assertEqual(len(data), 64)
        self.assertEqual(data, "1" * 64)

    def test_object_create_append_then_truncate(self):
        """Create an object, append data then truncate on chunk boundary"""
        name = random_str(16)
        self.api.object_create(self.account,
                               name,
                               data="1" * 128,
                               obj_name=name)
        _, size, _ = self.api.object_create(self.account,
                                            name,
                                            data="2" * 128,
                                            obj_name=name,
                                            append=True)
        self.assertEqual(size, 128)

        self.api.object_truncate(self.account, name, name, size=128)
        _, data = self.api.object_fetch(self.account, name, name)
        data = "".join(data)
        self.assertEqual(len(data), 128)
        self.assertEqual(data, "1" * 128)

        self.api.object_truncate(self.account, name, name, size=128)

    def test_object_create_then_invalid_truncate(self):
        """Create an object, append data then try to truncate outside object
           range"""
        name = random_str(16)
        self.api.object_create(self.account,
                               name,
                               data="1" * 128,
                               obj_name=name)
        self.assertRaises(exc.OioException,
                          self.api.object_truncate,
                          self.account,
                          name,
                          name,
                          size=-1)
        self.assertRaises(exc.OioException,
                          self.api.object_truncate,
                          self.account,
                          name,
                          name,
                          size=129)

    def test_container_snapshot(self):
        name = random_str(16)
        self.api.container_create(self.account, name)
        test_object = "test_object"
        self.api.object_create(self.account,
                               name,
                               data="0" * 128,
                               obj_name=test_object)
        # Snapshot cannot have same name and same account
        self.assertRaises(exc.ClientException, self.api.container_snapshot,
                          self.account, name, self.account, name)
        snapshot_name = random_str(16)
        self.assertNotEqual(snapshot_name, name)
        # Non existing snapshot should work
        self.api.container_snapshot(self.account, name, self.account,
                                    snapshot_name)
        # Already taken snapshot name should failed
        self.assertRaises(exc.ClientException, self.api.container_snapshot,
                          self.account, name, self.account, snapshot_name)
        # Check Container Frozen so create should failed
        self.assertRaises(exc.ServiceBusy,
                          self.api.object_create,
                          self.account,
                          snapshot_name,
                          data="1" * 128,
                          obj_name="should_not_be_created")

        # fullpath is set on every chunk
        chunk_list = self.api.object_locate(self.account, name, test_object)[1]
        # check that every chunk is different from the target
        snapshot_list = self.api.object_locate(self.account, snapshot_name,
                                               test_object)[1]

        for c, t in zip(chunk_list, snapshot_list):
            self.assertNotEqual(c['url'], t['url'])
        # check target can be used
        self.api.object_create(self.account,
                               name,
                               data="0" * 128,
                               obj_name="should_be_created")
        # Create and send copy of a object
        url_list = [c['url'] for c in chunk_list]
        copy_list = self.api._generate_copy(url_list)
        # every chunks should have the fullpath
        fullpath = self.api._generate_fullpath(self.account, snapshot_name,
                                               'copy', 12456)
        self.api._send_copy(url_list, copy_list, fullpath[0])
        # check that every copy exists
        pool_manager = get_pool_manager()
        for c in copy_list:
            r = pool_manager.request('HEAD', c)
            self.assertEqual(r.status, 200)
            self.assertIn(fullpath[0],
                          r.headers["X-oio-chunk-meta-full-path"].split(','))
        # Snapshot on non existing container should failed
        self.assertRaises(exc.NoSuchContainer, self.api.container_snapshot,
                          random_str(16), random_str(16), random_str(16),
                          random_str(16))
        # Snapshot need to have a account
        self.assertRaises(exc.ClientException, self.api.container_snapshot,
                          self.account, name, None, random_str(16))
        # Snapshot need to have a name
        self.assertRaises(exc.ClientException, self.api.container_snapshot,
                          self.account, name, random_str(16), None)
Ejemplo n.º 13
0
class TestBlobConverter(BaseTestCase):

    def setUp(self):
        super(TestBlobConverter, self).setUp()
        self.container = random_str(16)
        self.path = random_str(16)
        self.api = ObjectStorageApi(self.ns)

        self.api.container_create(self.account, self.container)
        _, chunks = self.api.container.content_prepare(
            self.account, self.container, self.path, 1)
        services = self.conscience.all_services('rawx')
        self.rawx_volumes = dict()
        for rawx in services:
            tags = rawx['tags']
            service_id = tags.get('tag.service_id', None)
            if service_id is None:
                service_id = rawx['addr']
            volume = tags.get('tag.vol', None)
            self.rawx_volumes[service_id] = volume

        self.api.object_create(
            self.account, self.container, obj_name=self.path, data="chunk")
        meta, self.chunks = self.api.object_locate(
            self.account, self.container, self.path)
        self.version = meta['version']
        self.content_id = meta['id']

    def _chunk_path(self, chunk):
        url = chunk['url']
        volume_id = url.split('/', 3)[2]
        chunk_id = url.split('/', 3)[3]
        volume = self.rawx_volumes[volume_id]
        return volume + '/' + chunk_id[:3] + '/' + chunk_id

    def _converter_and_check(self, chunk_volume, chunk_path,
                             chunk_id_info, expected_raw_meta=None,
                             expected_errors=0):
        conf = self.conf
        conf['volume'] = self.rawx_volumes[chunk_volume]
        converter = BlobConverter(conf)
        converter.safe_convert_chunk(chunk_path)
        self.assertEqual(1, converter.total_chunks_processed)
        self.assertEqual(1, converter.passes)
        self.assertEqual(expected_errors, converter.errors)

        checker = Checker(self.ns)
        for chunk_id, info in chunk_id_info.iteritems():
            account, container, path, version, content_id = info
            fullpath = encode_fullpath(
                account, container, path, version, content_id)
            cid = cid_from_name(account, container)
            meta, raw_meta = read_chunk_metadata(chunk_path, chunk_id)

            self.assertEqual(meta.get('chunk_id'), chunk_id)
            self.assertEqual(meta.get('container_id'), cid)
            self.assertEqual(meta.get('content_path'), path)
            self.assertEqual(meta.get('content_version'), version)
            self.assertEqual(meta.get('content_id'), content_id)
            self.assertEqual(meta.get('full_path'), fullpath)

            checker.check(Target(
                account, container=container, obj=path,
                chunk='http://' + converter.volume_id + '/' + chunk_id))
            checker.wait()
            self.assertTrue(checker.report())

            if expected_raw_meta:
                self.assertDictEqual(expected_raw_meta, raw_meta)
                continue

            self.assertNotIn(chunk_xattr_keys['chunk_id'], raw_meta)
            self.assertNotIn(chunk_xattr_keys['container_id'], raw_meta)
            self.assertNotIn(chunk_xattr_keys['content_path'], raw_meta)
            self.assertNotIn(chunk_xattr_keys['content_version'], raw_meta)
            self.assertNotIn(chunk_xattr_keys['content_id'], raw_meta)
            self.assertIn(CHUNK_XATTR_CONTENT_FULLPATH_PREFIX + chunk_id,
                          raw_meta)
            for k, v in raw_meta.iteritems():
                if k.startswith('oio:'):
                    self.fail('old fullpath always existing')
            self.assertEqual(raw_meta[chunk_xattr_keys['oio_version']],
                             OIO_VERSION)

    def test_converter(self):
        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        self._converter_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

    def test_converter_old_chunk(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, self.content_id)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        self._converter_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

    def test_converter_old_chunk_with_wrong_path(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container,
                self.path + '+', self.version, self.content_id)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        self._converter_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

    def test_converter_old_chunk_with_wrong_content_id(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, '0123456789ABCDEF0123456789ABCDEF')

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        self._converter_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

    def test_converter_old_chunk_with_old_fullpath(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, self.content_id, add_old_fullpath=True)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        self._converter_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

    def test_converter_old_chunk_with_old_fullpath_and_wrong_path(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container,
                self.path, self.version, self.content_id,
                add_old_fullpath=True)
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container,
                self.path + '+', self.version, self.content_id)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        self._converter_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

    def test_converter_old_chunk_with_wrong_fullpath(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container,
                self.path, 'None', '0123456789ABCDEF0123456789ABCDEF',
                add_old_fullpath=True)
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container,
                self.path, self.version, self.content_id)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        self._converter_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

    def test_converter_linked_chunk(self):
        self.api.object_link(
            self.account, self.container, self.path,
            self.account, self.container, self.path + '.link')

        linked_meta, linked_chunks = self.api.object_locate(
            self.account, self.container, self.path + '.link')
        self.assertNotEqual(self.content_id, linked_meta['id'])

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)
        for c in linked_chunks:
            if chunk_volume == c['url'].split('/')[2]:
                linked_chunk_id2 = c['url'].split('/')[3]
                break

        linked_chunk = random.choice(linked_chunks)
        linked_chunk_volume = linked_chunk['url'].split('/')[2]
        linked_chunk_id = linked_chunk['url'].split('/')[3]
        linked_chunk_path = self._chunk_path(linked_chunk)
        for c in self.chunks:
            if linked_chunk_volume == c['url'].split('/')[2]:
                chunk_id2 = c['url'].split('/')[3]
                break

        self._converter_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id),
             linked_chunk_id2: (self.account, self.container,
                                self.path + '.link', linked_meta['version'],
                                linked_meta['id'])})

        self._converter_and_check(
            linked_chunk_volume, linked_chunk_path,
            {chunk_id2: (self.account, self.container, self.path, self.version,
                         self.content_id),
             linked_chunk_id: (self.account, self.container,
                               self.path + '.link', linked_meta['version'],
                               linked_meta['id'])})

    def test_converter_old_linked_chunk(self):
        self.api.object_link(
            self.account, self.container, self.path,
            self.account, self.container, self.path + '.link')

        linked_meta, linked_chunks = self.api.object_locate(
            self.account, self.container, self.path + '.link')
        self.assertNotEqual(self.content_id, linked_meta['id'])

        for c in linked_chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container,
                self.path + '.link', 'None',
                '0123456789ABCDEF0123456789ABCDEF', add_old_fullpath=True)
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, self.content_id)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)
        for c in linked_chunks:
            if chunk_volume == c['url'].split('/')[2]:
                linked_chunk_id2 = c['url'].split('/')[3]
                break

        linked_chunk = random.choice(linked_chunks)
        linked_chunk_volume = linked_chunk['url'].split('/')[2]
        linked_chunk_id = linked_chunk['url'].split('/')[3]
        linked_chunk_path = self._chunk_path(linked_chunk)
        for c in self.chunks:
            if linked_chunk_volume == c['url'].split('/')[2]:
                chunk_id2 = c['url'].split('/')[3]
                break

        self._converter_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id),
             linked_chunk_id2: (self.account, self.container,
                                self.path + '.link', linked_meta['version'],
                                linked_meta['id'])})

        self._converter_and_check(
            linked_chunk_volume, linked_chunk_path,
            {chunk_id2: (self.account, self.container, self.path, self.version,
                         self.content_id),
             linked_chunk_id: (self.account, self.container,
                               self.path + '.link', linked_meta['version'],
                               linked_meta['id'])})

    def test_converter_old_chunk_with_link_on_same_object(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, self.content_id)

        self.api.object_link(
            self.account, self.container, self.path,
            self.account, self.container, self.path)

        linked_meta, linked_chunks = self.api.object_locate(
            self.account, self.container, self.path)
        self.assertNotEqual(self.content_id, linked_meta['id'])

        linked_chunk = random.choice(linked_chunks)
        linked_chunk_volume = linked_chunk['url'].split('/')[2]
        linked_chunk_id = linked_chunk['url'].split('/')[3]
        linked_chunk_path = self._chunk_path(linked_chunk)

        # old xattr not removed
        _, expected_raw_meta = read_chunk_metadata(linked_chunk_path,
                                                   linked_chunk_id)
        expected_raw_meta[chunk_xattr_keys['oio_version']] = OIO_VERSION

        self._converter_and_check(
            linked_chunk_volume, linked_chunk_path,
            {linked_chunk_id: (self.account, self.container,
                               self.path, linked_meta['version'],
                               linked_meta['id'])},
            expected_raw_meta=expected_raw_meta, expected_errors=1)

    def test_converter_old_linked_chunk_with_link_on_same_object(self):
        self.api.object_link(
            self.account, self.container, self.path,
            self.account, self.container, self.path + '.link')

        linked_meta, linked_chunks = self.api.object_locate(
            self.account, self.container, self.path + '.link')
        self.assertNotEqual(self.content_id, linked_meta['id'])

        for c in linked_chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container,
                self.path + '.link', 'None',
                '0123456789ABCDEF0123456789ABCDEF', add_old_fullpath=True)
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, self.content_id, add_old_fullpath=True)

        self.api.object_link(
            self.account, self.container, self.path + '.link',
            self.account, self.container, self.path + '.link')

        linked_meta, linked_chunks = self.api.object_locate(
            self.account, self.container, self.path + '.link')
        self.assertNotEqual(self.content_id, linked_meta['id'])

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)
        for c in linked_chunks:
            if chunk_volume == c['url'].split('/')[2]:
                linked_chunk_id2 = c['url'].split('/')[3]
                break

        linked_chunk = random.choice(linked_chunks)
        linked_chunk_volume = linked_chunk['url'].split('/')[2]
        linked_chunk_id = linked_chunk['url'].split('/')[3]
        linked_chunk_path = self._chunk_path(linked_chunk)
        for c in self.chunks:
            if linked_chunk_volume == c['url'].split('/')[2]:
                chunk_id2 = c['url'].split('/')[3]
                break

        self._converter_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id),
             linked_chunk_id2: (self.account, self.container,
                                self.path + '.link', linked_meta['version'],
                                linked_meta['id'])})

        self._converter_and_check(
            linked_chunk_volume, linked_chunk_path,
            {chunk_id2: (self.account, self.container, self.path, self.version,
                         self.content_id),
             linked_chunk_id: (self.account, self.container,
                               self.path + '.link', linked_meta['version'],
                               linked_meta['id'])})

    def test_converter_with_versioning(self):
        self.api.container_set_properties(
            self.account, self.container,
            system={'sys.m2.policy.version': '2'})
        self.api.object_create(
            self.account, self.container, obj_name=self.path, data='version')

        versioned_meta, versioned_chunks = self.api.object_locate(
            self.account, self.container, self.path)
        self.assertNotEqual(self.content_id, versioned_meta['id'])

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        versioned_chunk = random.choice(versioned_chunks)
        versioned_chunk_volume = versioned_chunk['url'].split('/')[2]
        versioned_chunk_id = versioned_chunk['url'].split('/')[3]
        versioned_chunk_path = self._chunk_path(versioned_chunk)

        self._converter_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

        self._converter_and_check(
            versioned_chunk_volume, versioned_chunk_path,
            {versioned_chunk_id: (self.account, self.container, self.path,
                                  versioned_meta['version'],
                                  versioned_meta['id'])})

    def test_converter_old_chunk_with_versioning(self):
        for c in self.chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                self.version, self.content_id)

        self.api.container_set_properties(
            self.account, self.container,
            system={'sys.m2.policy.version': '2'})
        self.api.object_create(
            self.account, self.container, obj_name=self.path, data='version')

        versioned_meta, versioned_chunks = self.api.object_locate(
            self.account, self.container, self.path)
        self.assertNotEqual(self.content_id, versioned_meta['id'])
        for c in versioned_chunks:
            convert_to_old_chunk(
                self._chunk_path(c), self.account, self.container, self.path,
                versioned_meta['version'], versioned_meta['id'])

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_path = self._chunk_path(chunk)

        versioned_chunk = random.choice(versioned_chunks)
        versioned_chunk_volume = versioned_chunk['url'].split('/')[2]
        versioned_chunk_id = versioned_chunk['url'].split('/')[3]
        versioned_chunk_path = self._chunk_path(versioned_chunk)

        self._converter_and_check(
            chunk_volume, chunk_path,
            {chunk_id: (self.account, self.container, self.path, self.version,
                        self.content_id)})

        self._converter_and_check(
            versioned_chunk_volume, versioned_chunk_path,
            {versioned_chunk_id: (self.account, self.container, self.path,
                                  versioned_meta['version'],
                                  versioned_meta['id'])})