Exemple #1
0
class ChunkOperator(object):
    """
    Execute maintenance operations on chunks.
    """

    def __init__(self, conf, logger=None):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.rdir_client = RdirClient(conf, logger=self.logger)
        self.content_factory = ContentFactory(conf, logger=self.logger)

    def rebuild(self, container_id, content_id, chunk_id_or_pos,
                rawx_id=None, try_chunk_delete=False, allow_same_rawx=True):
        """
        Try to find the chunk in the metadata of the specified object,
        then rebuild it.
        """
        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise OrphanChunk('Content not found: possible orphan chunk')

        chunk_size = 0
        chunk_pos = None
        if len(chunk_id_or_pos) < 32:
            chunk_pos = chunk_id_or_pos
            chunk_id = None
            metapos = int(chunk_pos.split('.', 1)[0])
            chunk_size = content.chunks.filter(metapos=metapos).all()[0].size
        else:
            if '/' in chunk_id_or_pos:
                chunk_id = chunk_id_or_pos.rsplit('/', 1)[-1]
            else:
                chunk_id = chunk_id_or_pos

            chunk = content.chunks.filter(id=chunk_id).one()
            if chunk is None:
                raise OrphanChunk(
                    'Chunk not found in content: possible orphan chunk')
            elif rawx_id and chunk.host != rawx_id:
                raise ValueError('Chunk does not belong to this rawx')
            chunk_size = chunk.size

        content.rebuild_chunk(
            chunk_id, allow_same_rawx=allow_same_rawx,
            chunk_pos=chunk_pos)

        if try_chunk_delete:
            try:
                content.blob_client.chunk_delete(chunk.url)
                self.logger.info("Chunk %s deleted", chunk.url)
            except NotFound as exc:
                self.logger.debug("Chunk %s: %s", chunk.url, exc)

        # This call does not raise exception if chunk is not referenced
        if chunk_id is not None:
            self.rdir_client.chunk_delete(
                chunk.host, container_id, content_id, chunk_id)

        return chunk_size
Exemple #2
0
class TestFilters(BaseTestCase):

    def setUp(self):
        with mock.patch('oio.container.client.gen_headers',
                        gen_headers_mock):
            super(TestFilters, self).setUp()
            self.account = self.conf['account']
            self.namespace = self.conf['namespace']
            self.chunk_size = self.conf['chunk_size']
            self.gridconf = {'namespace': self.namespace}
            self.content_factory = ContentFactory(self.gridconf)
            self.container_name = 'TestFilter%f' % time.time()
            self.blob_client = BlobClient()
            self.container_client = ContainerClient(self.gridconf)
            self.container_client.container_create(acct=self.account,
                                                   ref=self.container_name)
            self.container_id = cid_from_name(self.account,
                                              self.container_name).upper()
            self.stgpol = "SINGLE"

    def _new_content(self, data, path):
        old_content = self.content_factory.new(self.container_id, path,
                                               len(data), self.stgpol)
        old_content.create(StringIO.StringIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_slave_and_admin(self):
        if not os.getenv("SLAVE"):
            self.skipTest("must be in slave mode")
        data = random_data(10)
        path = 'test_slave'
        try:
            self._new_content(data, path)
            self.assertTrue(None)
        except ClientException as e:
            print str(e)
            self.assertTrue(str(e).find('NS slave!') != -1)
        with mock.patch('oio.container.client.gen_headers', gen_headers_mock):
            content = self._new_content(data, path)
            content.delete()

    def test_worm_and_admin(self):
        if not os.getenv("WORM"):
            self.skipTest("must be in worm mode")
        data = random_data(10)
        path = 'test_worm'
        content = self._new_content(data, path)
        try:
            content.delete()
            self.assertTrue(None)
        except ClientException as e:
            self.assertTrue(str(e).find('NS wormed!') != -1)
        downloaded_data = ''.join(content.fetch())
        self.assertEqual(downloaded_data, data)
        with mock.patch('oio.container.client.gen_headers', gen_headers_mock):
            content.delete()
Exemple #3
0
class TestFilters(BaseTestCase):
    def setUp(self):
        with mock.patch('oio.container.client.gen_headers', gen_headers_mock):
            super(TestFilters, self).setUp()
            self.account = self.conf['account']
            self.namespace = self.conf['namespace']
            self.chunk_size = self.conf['chunk_size']
            self.gridconf = {'namespace': self.namespace}
            self.content_factory = ContentFactory(self.gridconf)
            self.container_name = 'TestFilter%f' % time.time()
            self.blob_client = BlobClient()
            self.container_client = ContainerClient(self.gridconf)
            self.container_client.container_create(acct=self.account,
                                                   ref=self.container_name)
            self.container_id = cid_from_name(self.account,
                                              self.container_name).upper()
            self.stgpol = "SINGLE"

    def _new_content(self, data, path):
        old_content = self.content_factory.new(self.container_id, path,
                                               len(data), self.stgpol)
        old_content.create(StringIO.StringIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_slave_and_admin(self):
        if not os.getenv("SLAVE"):
            self.skipTest("must be in slave mode")
        data = random_data(10)
        path = 'test_slave'
        try:
            self._new_content(data, path)
            self.assertTrue(None)
        except ClientException as e:
            print str(e)
            self.assertTrue(str(e).find('NS slave!') != -1)
        with mock.patch('oio.container.client.gen_headers', gen_headers_mock):
            content = self._new_content(data, path)
            content.delete()

    def test_worm_and_admin(self):
        if not os.getenv("WORM"):
            self.skipTest("must be in worm mode")
        data = random_data(10)
        path = 'test_worm'
        content = self._new_content(data, path)
        try:
            content.delete()
            self.assertTrue(None)
        except ClientException as e:
            self.assertTrue(str(e).find('NS wormed!') != -1)
        downloaded_data = ''.join(content.fetch())
        self.assertEqual(downloaded_data, data)
        with mock.patch('oio.container.client.gen_headers', gen_headers_mock):
            content.delete()
Exemple #4
0
class TestContentFactory(BaseTestCase):
    def setUp(self):
        super(TestContentFactory, self).setUp()
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = "TestContentFactory%f" % time.time()
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestContentFactory, self).tearDown()

    def test_extract_datasec(self):
        self.content_factory.ns_info = {
            "data_security": {
                "DUPONETWO": "DUP:distance=1|nb_copy=2",
                "RAIN": "RAIN:k=6|m=2|algo=liber8tion"
            },
            "storage_policy": {
                "RAIN": "NONE:RAIN:NONE",
                "SINGLE": "NONE:NONE:NONE",
                "TWOCOPIES": "NONE:DUPONETWO:NONE"
            }
        }

        ds_type, ds_args = self.content_factory._extract_datasec("RAIN")
        self.assertEqual(ds_type, "RAIN")
        self.assertEqual(ds_args, {"k": "6", "m": "2", "algo": "liber8tion"})

        ds_type, ds_args = self.content_factory._extract_datasec("SINGLE")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {"nb_copy": "1", "distance": "0"})

        ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {"nb_copy": "2", "distance": "1"})

        self.assertRaises(InconsistentContent,
                          self.content_factory._extract_datasec, "UnKnOwN")

    def test_get_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "RAIN",
            "version": "1450176946676289"
        }
        chunks = [{
            "url": "http://127.0.0.1:6012/A0A0",
            "pos": "0.p0",
            "size": 512,
            "hash": "E7D4E4AD460971CA2E3141F2102308D4"
        }, {
            "url": "http://127.0.0.1:6010/A01",
            "pos": "0.1",
            "size": 146,
            "hash": "760AB5DA7C51A3654F1CA622687CD6C3"
        }, {
            "url": "http://127.0.0.1:6011/A00",
            "pos": "0.0",
            "size": 512,
            "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"
        }, {
            "url": "http://127.0.0.1:6013/A0A1",
            "pos": "0.p1",
            "size": 512,
            "hash": "DA9D7F72AEEA5791565724424CE45C16"
        }]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[2])
        self.assertEqual(c.chunks[1].raw(), chunks[1])
        self.assertEqual(c.chunks[2].raw(), chunks[0])
        self.assertEqual(c.chunks[3].raw(), chunks[3])

    def test_get_dup(self):
        meta = {
            "chunk-method": "plain/bytes",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "TWOCOPIES",
            "version": "1450176946676289"
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/A0",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }, {
            "url": "http://127.0.0.1:6011/A1",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), DupContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.nb_copy, 2)
        self.assertEqual(c.distance, 1)
        self.assertEqual(len(c.chunks), 2)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])

    def test_new_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450341162",
            "deleted": "False",
            "hash": "",
            "hash-method": "md5",
            "id": "F4B1C8DD132705007DE8B43D0709DAA2",
            "length": "1000",
            "mime-type": "application/octet-stream",
            "name": "titi",
            "policy": "RAIN",
            "version": "1450341162332663"
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/0_p1",
            "pos": "0.p1",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6011/0_p0",
            "pos": "0.p0",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6016/0_1",
            "pos": "0.1",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6017/0_0",
            "pos": "0.0",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }]
        self.content_factory.container_client.content_prepare = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.new("xxx_container_id", "titi", 1000, "RAIN")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2")
        self.assertEqual(c.length, 1000)
        self.assertEqual(c.path, "titi")
        self.assertEqual(c.version, "1450341162332663")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[3])
        self.assertEqual(c.chunks[1].raw(), chunks[2])
        self.assertEqual(c.chunks[2].raw(), chunks[1])
        self.assertEqual(c.chunks[3].raw(), chunks[0])

    def _new_content(self, stgpol, data):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), stgpol)
        old_content.upload(StringIO.StringIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_change_policy(self, data_size, old_policy, new_policy):
        if (old_policy == "RAIN" or new_policy == "RAIN") \
                and len(self.conf['rawx']) < 8:
            self.skipTest("RAIN: Need more than 8 rawx to run")

        data = random_data(data_size)
        obj_type = {
            "SINGLE": DupContent,
            "TWOCOPIES": DupContent,
            "THREECOPIES": DupContent,
            "RAIN": RainContent
        }
        old_content = self._new_content(old_policy, data)
        self.assertEqual(type(old_content), obj_type[old_policy])

        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, new_policy)

        self.assertRaises(NotFound,
                          self.container_client.content_show,
                          self.account,
                          cid=old_content.container_id,
                          content=old_content.content_id)

        new_content = self.content_factory.get(self.container_id,
                                               changed_content.content_id)
        self.assertEqual(type(new_content), obj_type[new_policy])

        downloaded_data = "".join(new_content.download())

        self.assertEqual(downloaded_data, data)

    # TODO add tests with RAIN empty contents when supported

    def test_change_content_1_byte_policy_single_to_rain(self):
        self._test_change_policy(1, "SINGLE", "RAIN")

    def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self):
        self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN")

    def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN")

    def test_change_content_1_byte_policy_rain_to_threecopies(self):
        self._test_change_policy(1, "RAIN", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_rain_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE")

    def test_change_content_0_byte_policy_twocopies_to_threecopies(self):
        self._test_change_policy(0, "TWOCOPIES", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_single_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE")

    def test_change_content_with_same_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, "TWOCOPIES")
        self.assertEqual(old_content.content_id, changed_content.content_id)

    def test_change_policy_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.change_policy,
                          self.container_id, "1234", "SINGLE")

    def test_change_policy_unknown_storage_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        self.assertRaises(ClientException, self.content_factory.change_policy,
                          self.container_id, old_content.content_id, "UnKnOwN")
Exemple #5
0
class TestDupContent(BaseTestCase):
    def setUp(self):
        super(TestDupContent, self).setUp()

        if len(self.conf['rawx']) < 3:
            self.skipTest("Not enough rawx. "
                          "Dup tests needs more than 2 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestDupContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestDupContent, self).tearDown()

    def _test_upload(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), stgpol)
        self.assertEqual(type(content), DupContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        if stgpol == "THREECOPIES":
            nb_copy = 3
        elif stgpol == "TWOCOPIES":
            nb_copy = 2
        elif stgpol == "SINGLE":
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                self.assertEqual(meta['chunk_hash'], chunk_hash)

    def test_twocopies_upload_0_byte(self):
        self._test_upload("TWOCOPIES", 0)

    def test_twocopies_upload_1_byte(self):
        self._test_upload("TWOCOPIES", 1)

    def test_twocopies_upload_chunksize_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size)

    def test_twocopies_upload_chunksize_plus_1_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size + 1)

    def test_single_upload_0_byte(self):
        self._test_upload("SINGLE", 0)

    def test_single_upload_chunksize_plus_1_bytes(self):
        self._test_upload("SINGLE", self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "TWOCOPIES")
        self.assertEqual(type(content), DupContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos=1):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos=1):
            self.assertRaises(NotFound,
                              self.blob_client.chunk_head, chunk.url)

    def _new_content(self, stgpol, data, broken_pos_list):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), stgpol)
        self.assertEqual(type(old_content), DupContent)

        old_content.upload(StringIO.StringIO(data))

        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_download(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content = self._new_content(stgpol, data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_download_content_0_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 0, [])

    def test_twocopies_download_content_0_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 0, [(0, 0)])

    def test_twocopies_download_content_1_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 1, [])

    def test_twocopies_download_content_1_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 1, [(0, 0)])

    def test_twocopies_download_chunksize_bytes_without_broken_chunks(self):
        self._test_download("TWOCOPIES", self.chunk_size, [])

    def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_single_download_content_1_byte_without_broken_chunks(self):
        self._test_download("SINGLE", 1, [])

    def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_download("SINGLE", self.chunk_size * 2, [])
class TestDupContent(BaseTestCase):
    def setUp(self):
        super(TestDupContent, self).setUp()

        if len(self.conf['rawx']) < 3:
            self.skipTest("Not enough rawx. "
                          "Dup tests needs more than 2 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestDupContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestDupContent, self).tearDown()

    def _test_upload(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), stgpol)
        self.assertEqual(type(content), DupContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        if stgpol == "THREECOPIES":
            nb_copy = 3
        elif stgpol == "TWOCOPIES":
            nb_copy = 2
        elif stgpol == "SINGLE":
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                self.assertEqual(meta['chunk_hash'], chunk_hash)

    def test_twocopies_upload_0_byte(self):
        self._test_upload("TWOCOPIES", 0)

    def test_twocopies_upload_1_byte(self):
        self._test_upload("TWOCOPIES", 1)

    def test_twocopies_upload_chunksize_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size)

    def test_twocopies_upload_chunksize_plus_1_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size + 1)

    def test_single_upload_0_byte(self):
        self._test_upload("SINGLE", 0)

    def test_single_upload_chunksize_plus_1_bytes(self):
        self._test_upload("SINGLE", self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "TWOCOPIES")
        self.assertEqual(type(content), DupContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos=1):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos=1):
            self.assertRaises(NotFound,
                              self.blob_client.chunk_head, chunk.url)

    def _new_content(self, stgpol, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), stgpol)
        self.assertEqual(type(old_content), DupContent)

        old_content.upload(StringIO.StringIO(data))

        broken_chunks_info = {}
        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            meta, stream = self.blob_client.chunk_get(c.url)
            if pos not in broken_chunks_info:
                broken_chunks_info[pos] = {}
            broken_chunks_info[pos][idx] = {
                "url": c.url,
                "id": c.id,
                "hash": c.hash,
                "dl_meta": meta,
                "dl_hash": md5_stream(stream)
            }
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return (self.content_factory.get(
            self.container_id, old_content.content_id), broken_chunks_info)

    def _test_rebuild(self, stgpol, data_size, broken_pos_list,
                      full_rebuild_pos):
        data = random_data(data_size)
        content, broken_chunks_info = self._new_content(stgpol,
                                                        data, broken_pos_list)

        rebuild_pos, rebuild_idx = full_rebuild_pos
        rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
        content.rebuild_chunk(rebuild_chunk_info["id"])

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   content.content_id)
        self.assertEqual(type(rebuilt_content), DupContent)

        # find the rebuilt chunk
        for c in rebuilt_content.chunks.filter(pos=rebuild_pos):
            if len(content.chunks.filter(id=c.id)) > 0:
                # not the rebuilt chunk
                # if this chunk is broken, it must not have been rebuilt
                for b_c_i in broken_chunks_info[rebuild_pos].values():
                    if c.id == b_c_i["id"]:
                        with ExpectedException(NotFound):
                            _, _ = self.blob_client.chunk_get(c.url)
                continue
            meta, stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(stream),
                             rebuild_chunk_info["dl_hash"])
            self.assertEqual(c.hash, rebuild_chunk_info["hash"])
            self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"]))
            del meta["chunk_id"]
            del rebuild_chunk_info["dl_meta"]["chunk_id"]
            self.assertEqual(meta, rebuild_chunk_info["dl_meta"])

    def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self):
        self._test_rebuild("TWOCOPIES", 0, [(0, 0)], (0, 0))

    def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self):
        self._test_rebuild("TWOCOPIES", 1, [(0, 1)], (0, 1))

    def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self):
        if len(self.conf['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild("THREECOPIES", self.chunk_size,
                           [(0, 0), (0, 1)], (0, 1))

    def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self):
        if len(self.conf['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild("THREECOPIES", 2 * self.chunk_size,
                           [(1, 0), (1, 2)], (1, 2))

    def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self):
        with ExpectedException(UnrecoverableContent):
            self._test_rebuild("TWOCOPIES", 0, [(0, 0), (0, 1)], (0, 0))

    def _test_download(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content, _ = self._new_content(stgpol, data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_download_content_0_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 0, [])

    def test_twocopies_download_content_0_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 0, [(0, 0)])

    def test_twocopies_download_content_1_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 1, [])

    def test_twocopies_download_content_1_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 1, [(0, 0)])

    def test_twocopies_download_chunksize_bytes_without_broken_chunks(self):
        self._test_download("TWOCOPIES", self.chunk_size, [])

    def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content, _ = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_single_download_content_1_byte_without_broken_chunks(self):
        self._test_download("SINGLE", 1, [])

    def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_download("SINGLE", self.chunk_size * 2, [])
Exemple #7
0
class BlobConverter(object):
    def __init__(self, conf, logger=None, **kwargs):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        volume = conf.get('volume')
        if not volume:
            raise ConfigurationException('No volume specified for converter')
        self.volume = volume
        self.namespace, self.volume_id = check_volume(self.volume)
        # cache
        self.name_by_cid = CacheDict()
        self.content_id_by_name = CacheDict()
        # client
        self.container_client = ContainerClient(conf, **kwargs)
        self.content_factory = ContentFactory(conf,
                                              self.container_client,
                                              logger=self.logger)
        # stats/logs
        self.errors = 0
        self.passes = 0
        self.total_chunks_processed = 0
        self.start_time = 0
        self.last_reported = 0
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        # speed
        self.chunks_run_time = 0
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        # backup
        self.no_backup = true_value(conf.get('no_backup', False))
        self.backup_dir = conf.get('backup_dir') or tempfile.gettempdir()
        self.backup_name = 'backup_%s_%f' \
            % (self.volume_id, time.time())
        # dry run
        self.dry_run = true_value(conf.get('dry_run', False))

    def save_xattr(self, chunk_id, xattr):
        if self.no_backup:
            return
        dirname = self.backup_dir + '/' + self.backup_name + '/' + chunk_id[:3]
        try:
            os.makedirs(dirname)
        except OSError:
            if not os.path.isdir(dirname):
                raise
        with open(dirname + '/' + chunk_id, 'w') as backup_fd:
            # same format as getfattr
            backup_fd.write('# file: ' + self._get_path(chunk_id) + '\n')
            for k, v in xattr.iteritems():
                backup_fd.write('user.' + k + '="' + v + '"\n')

    def _save_container(self, cid, account, container):
        cid = cid.upper()
        self.name_by_cid[cid] = (account, container)
        return cid, account, container

    def _save_content(self, cid, path, version, content_id):
        cid = cid.upper()
        content_id = content_id.upper()
        self.content_id_by_name[(cid, path, version)] = content_id
        return cid, path, version, content_id

    def _get_path(self, chunk_id):
        return self.volume + '/' + chunk_id[:3] + '/' + chunk_id

    def cid_from_name(self, account, container):
        cid = cid_from_name(account, container)
        cid, account, container = self._save_container(cid, account, container)
        return cid

    def name_from_cid(self, cid):
        name = self.name_by_cid.get(cid)
        if name:
            return name

        properties = self.container_client.container_get_properties(cid=cid)
        account = properties['system']['sys.account']
        container = properties['system']['sys.user.name']
        cid, account, container = self._save_container(cid, account, container)
        return account, container

    def content_id_from_name(self, cid, path, version, search=False):
        content_id = self.content_id_by_name.get((cid, path, version))
        if content_id or not search:
            return content_id

        properties = self.container_client.content_get_properties(
            cid=cid, path=path, version=version)
        content_id = properties['id']
        cid, path, version, content_id = self._save_content(
            cid, path, version, content_id)
        return content_id

    def decode_fullpath(self, fullpath):
        account, container, path, version, content_id = decode_fullpath(
            fullpath)
        cid = self.cid_from_name(account, container)
        cid, path, version, content_id = self._save_content(
            cid, path, version, content_id)
        return account, container, cid, path, version, content_id

    def decode_old_fullpath(self, old_fullpath):
        account, container, path, version = decode_old_fullpath(old_fullpath)
        cid = self.cid_from_name(account, container)
        content_id = self.content_id_from_name(cid, path, version)
        return account, container, cid, path, version, content_id

    def encode_fullpath(self, chunk_inode, chunk_id, account, container, path,
                        version, content_id):
        # check if chunk exists and has the same inode
        if not is_hexa(chunk_id) or len(chunk_id) != STRLEN_CHUNKID:
            raise ValueError('chunk ID must be hexadecimal (%s)' %
                             STRLEN_CHUNKID)
        try:
            chunk_inode2 = os.stat(self._get_path(chunk_id)).st_ino
        except OSError:
            raise OrphanChunk('No such chunk: possible orphan chunk')
        if chunk_inode2 != chunk_inode:
            raise OrphanChunk('Not the same inode: possible orphan chunk')

        # check fullpath and chunk ID
        if isinstance(version, basestring):
            try:
                version = int(version)
            except ValueError:
                raise ValueError('version must be a number')
        if version <= 0:
            raise ValueError('version must be positive')
        if not is_hexa(content_id):
            raise ValueError('content ID must be hexadecimal')

        fullpath = encode_fullpath(account, container, path, version,
                                   content_id.upper())

        return chunk_id.upper(), fullpath

    def _get_chunk_id_and_fullpath(self,
                                   chunk_inode,
                                   chunk_pos,
                                   content,
                                   chunk_id=None):
        content.container_id, content.account, content.container_name = \
            self._save_container(content.container_id, content.account,
                                 content.container_name)
        content.container_id, content.path, content.version, \
            content.content_id = self._save_content(
                content.container_id, content.path, content.version,
                content.content_id)

        chunks = content.chunks.filter(host=self.volume_id)
        if chunk_id:
            chunks = chunks.filter(id=chunk_id)
        chunk = chunks.filter(pos=chunk_pos).one()
        if chunk is None:
            raise OrphanChunk('Chunk not found in content:'
                              'possible orphan chunk')

        chunk_id, new_fullpath = self.encode_fullpath(
            chunk_inode, chunk.id, content.account, content.container_name,
            content.path, content.version, content.content_id)
        return chunk_id, new_fullpath

    def get_chunk_id_and_fullpath(self,
                                  chunk_inode,
                                  chunk_pos,
                                  container_id,
                                  path,
                                  version,
                                  chunk_id=None,
                                  account=None,
                                  container=None,
                                  content_id=None):
        if account is None or container is None:
            account, container = self.name_from_cid(container_id)

        if content_id:
            try:
                content = self.content_factory.get(container_id,
                                                   content_id,
                                                   account=account,
                                                   container_name=container)
                return self._get_chunk_id_and_fullpath(chunk_inode,
                                                       chunk_pos,
                                                       content,
                                                       chunk_id=chunk_id)
            except Exception as exc:
                self.logger.warn(
                    'chunk_id=%s chunk_pos=%s object=%s/%s/%s/%s/%s/%s: %s',
                    chunk_id, chunk_pos, str(account), str(container),
                    container_id, path, str(version), str(content_id), exc)

        # version must be integer
        try:
            version = str(int(version))
        except Exception:
            version = None

        try:
            content = self.content_factory.get_by_path_and_version(
                container_id,
                path,
                version,
                account=account,
                container_name=container)
        except ContentNotFound:
            raise OrphanChunk('Content not found: possible orphan chunk')
        return self._get_chunk_id_and_fullpath(chunk_inode,
                                               chunk_pos,
                                               content,
                                               chunk_id=chunk_id)

    def convert_chunk(self, fd, chunk_id):
        meta, raw_meta = read_chunk_metadata(fd,
                                             chunk_id,
                                             check_chunk_id=False)

        links = meta.get('links', dict())
        for chunk_id2, fullpath2 in links.iteritems():
            self.decode_fullpath(fullpath2)

        fullpath = meta.get('full_path')
        if fullpath is not None:
            self.decode_fullpath(fullpath)
            if meta.get('oio_version') == OIO_VERSION:
                return True, meta

        chunk_inode = os.fstat(fd.fileno()).st_ino
        raw_chunk_id = None
        chunk_id = chunk_id.upper()
        chunk_pos = meta['chunk_pos']
        container_id = meta['container_id'].upper()
        path = meta['content_path']
        version = meta['content_version']
        content_id = meta['content_id'].upper()

        new_fullpaths = dict()
        xattr_to_remove = list()
        success = True

        for k, v in raw_meta.iteritems():
            # fetch raw chunk ID
            if k == XATTR_CHUNK_ID:
                raw_chunk_id = v.upper()

            # search old fullpaths
            if not k.startswith(XATTR_OLD_FULLPATH) \
                    or not is_hexa(k[4:], size=64):
                continue

            try:
                account2, container2, container_id2, path2, version2, \
                    content_id2 = self.decode_old_fullpath(v)

                if container_id == container_id2 and path == path2 \
                        and version == version2:
                    if content_id2 is None:
                        content_id2 = self.content_id_from_name(container_id2,
                                                                path2,
                                                                version2,
                                                                search=True)

                    chunk_id, new_fullpath = self.encode_fullpath(
                        chunk_inode, chunk_id, account2, container2, path2,
                        version2, content_id2)
                    new_fullpaths[chunk_id] = new_fullpath
                else:
                    chunk_id2, new_fullpath = self.get_chunk_id_and_fullpath(
                        chunk_inode,
                        chunk_pos,
                        container_id2,
                        path2,
                        version2,
                        account=account2,
                        container=container2,
                        content_id=content_id2)
                    new_fullpaths[chunk_id2] = new_fullpath

                xattr_to_remove.append(k)
            except Exception as exc:
                success = False
                self.logger.warn('chunk_id=%s old_fullpath=%s: %s', chunk_id,
                                 k, exc)

        # old xattr
        if raw_chunk_id is not None:
            try:
                if raw_chunk_id != chunk_id and raw_chunk_id not in links:
                    if raw_chunk_id not in new_fullpaths:
                        meta2, _ = read_chunk_metadata(fd, raw_chunk_id)
                        container_id2 = meta2['container_id'].upper()
                        path2 = meta2['content_path']
                        version2 = meta2['content_version']
                        content_id2 = meta2['content_id'].upper()

                        raw_chunk_id, new_fullpath = \
                            self.get_chunk_id_and_fullpath(
                                chunk_inode, chunk_pos, container_id2, path2,
                                version2, chunk_id=raw_chunk_id,
                                content_id=content_id2)
                        new_fullpaths[raw_chunk_id] = new_fullpath
                elif raw_chunk_id == chunk_id and fullpath is None:
                    if raw_chunk_id not in new_fullpaths:
                        raw_chunk_id, new_fullpath = \
                            self.get_chunk_id_and_fullpath(
                                chunk_inode, chunk_pos, container_id, path,
                                version, chunk_id=raw_chunk_id,
                                content_id=content_id)
                        new_fullpaths[raw_chunk_id] = new_fullpath
            except Exception as exc:
                success = False
                self.logger.warn('chunk_id=%s (old xattr): %s', raw_chunk_id,
                                 exc)

        self.save_xattr(chunk_id, raw_meta)

        if self.dry_run:
            self.logger.info(
                "[dryrun] Converting chunk %s: success=%s new_fullpaths=%s "
                "xattr_to_remove=%s", chunk_id, str(success),
                str(new_fullpaths), str(xattr_to_remove))
        else:
            # for security, if there is an error, we don't delete old xattr
            modify_xattr(fd, new_fullpaths, success, xattr_to_remove)
        return success, None

    def safe_convert_chunk(self, path, fd=None, chunk_id=None):
        if chunk_id is None:
            chunk_id = path.rsplit('/', 1)[-1]
            if len(chunk_id) != STRLEN_CHUNKID:
                self.logger.warn('Not a chunk %s' % path)
                return
            for c in chunk_id:
                if c not in hexdigits:
                    self.logger.warn('Not a chunk %s' % path)
                    return

        success = False
        self.total_chunks_processed += 1
        try:
            if fd is None:
                with open(path) as fd:
                    success, _ = self.convert_chunk(fd, chunk_id)
            else:
                success, _ = self.convert_chunk(fd, chunk_id)
        except Exception:
            self.logger.exception('ERROR while conversion %s', path)

        if not success:
            self.errors += 1
        else:
            self.logger.debug('Converted %s', path)
        self.passes += 1

    def _fetch_chunks_from_file(self, input_file):
        with open(input_file, 'r') as ifile:
            for line in ifile:
                chunk_id = line.strip()
                if chunk_id and not chunk_id.startswith('#'):
                    yield self._get_path(chunk_id)

    def paths_gen(self, input_file=None):
        if input_file:
            return self._fetch_chunks_from_file(input_file)
        else:
            return paths_gen(self.volume)

    def converter_pass(self, input_file=None):
        def report(tag, now=None):
            if now is None:
                now = time.time()
            total_time = now - self.start_time
            self.logger.info(
                '%(tag)s  %(volume)s '
                'started=%(start_time)s '
                'passes=%(passes)d '
                'errors=%(errors)d '
                'chunks=%(nb_chunks)d %(c_rate).2f/s '
                'total_time=%(total_time).2f '
                '(converter: %(success_rate).2f%%)' % {
                    'tag':
                    tag,
                    'volume':
                    self.volume_id,
                    'start_time':
                    datetime.fromtimestamp(int(self.start_time)).isoformat(),
                    'passes':
                    self.passes,
                    'errors':
                    self.errors,
                    'nb_chunks':
                    self.total_chunks_processed,
                    'c_rate':
                    self.total_chunks_processed / total_time,
                    'total_time':
                    total_time,
                    'success_rate':
                    100 * ((self.total_chunks_processed - self.errors) /
                           float(self.total_chunks_processed))
                })
            self.passes = 0
            self.last_reported = now

        self.start_time = time.time()
        self.errors = 0
        self.passes = 0

        self.backup_name = 'backup_%s_%f' % (self.volume_id, self.start_time)

        paths = self.paths_gen(input_file=input_file)
        for path in paths:
            self.safe_convert_chunk(path)

            now = time.time()
            if now - self.last_reported >= self.report_interval:
                report('RUN', now=now)

            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
        report('DONE')

        return self.errors == 0
Exemple #8
0
class BlobConverter(object):
    def __init__(self, conf, logger=None, **kwargs):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        volume = conf.get('volume')
        if not volume:
            raise ConfigurationException('No volume specified for converter')
        self.volume = volume
        self.namespace, self.volume_id = check_volume(self.volume)
        # cache
        self.name_by_cid = CacheDict()
        self.content_id_by_name = CacheDict()
        # client
        self.container_client = ContainerClient(conf, **kwargs)
        self.content_factory = ContentFactory(conf,
                                              self.container_client,
                                              logger=self.logger)
        self._rdir = None  # we may never need it
        # stats/logs
        self.errors = 0
        self.passes = 0
        self.total_chunks_processed = 0
        self.start_time = 0
        self.last_reported = 0
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        # speed
        self.chunks_run_time = 0
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        # backup
        self.no_backup = true_value(conf.get('no_backup', False))
        self.backup_dir = conf.get('backup_dir') or tempfile.gettempdir()
        self.backup_name = 'backup_%s_%f' \
            % (self.volume_id, time.time())
        # dry run
        self.dry_run = true_value(conf.get('dry_run', False))

    @property
    def rdir(self):
        """Get an instance of `RdirClient`."""
        if self._rdir is None:
            self._rdir = RdirClient(
                self.conf, pool_manager=self.container_client.pool_manager)
        return self._rdir

    def save_xattr(self, fd, chunk_id, xattr):
        if self.no_backup:
            return
        dirname = self.backup_dir + '/' + self.backup_name + '/' + chunk_id[:3]
        try:
            os.makedirs(dirname)
        except OSError:
            if not os.path.isdir(dirname):
                raise
        with open(dirname + '/' + chunk_id, 'w') as backup_fd:
            # same format as getfattr
            backup_fd.write('# file: ' + self._get_path(fd, chunk_id) + '\n')
            for k, v in xattr.items():
                backup_fd.write('user.' + k + '="' + v + '"\n')

    def _save_container(self, cid, account, container):
        cid = cid.upper()
        self.name_by_cid[cid] = (account, container)
        return cid, account, container

    def _save_content(self, cid, path, version, content_id):
        cid = cid.upper()
        content_id = content_id.upper()
        self.content_id_by_name[(cid, path, version)] = content_id
        return cid, path, version, content_id

    def _get_path(self, fd, chunk_id):
        chunk_path = self.volume
        chunk_path_split = fd.name[len(self.volume):].split('/')
        start = 0
        for chunk_part in chunk_path_split[:-1]:
            end = start + len(chunk_part)
            chunk_path += '/' + chunk_id[start:end]
            start = end
        chunk_path += '/' + chunk_path_split[-1]
        return chunk_path

    def cid_from_name(self, account, container):
        cid = cid_from_name(account, container)
        cid, account, container = self._save_container(cid, account, container)
        return cid

    def name_from_cid(self, cid):
        name = self.name_by_cid.get(cid)
        if name:
            return name

        properties = self.container_client.container_get_properties(cid=cid)
        account = properties['system']['sys.account']
        container = properties['system']['sys.user.name']
        cid, account, container = self._save_container(cid, account, container)
        return account, container

    def content_id_from_name(self, cid, path, version, search=False):
        content_id = self.content_id_by_name.get((cid, path, version))
        if content_id or not search:
            return content_id

        properties = self.container_client.content_get_properties(
            cid=cid, path=path, version=version)
        content_id = properties['id']
        cid, path, version, content_id = self._save_content(
            cid, path, version, content_id)
        return content_id

    def decode_fullpath(self, fullpath):
        # pylint: disable=unbalanced-tuple-unpacking
        account, container, path, version, content_id = decode_fullpath(
            fullpath)
        cid = self.cid_from_name(account, container)
        cid, path, version, content_id = self._save_content(
            cid, path, version, content_id)
        return account, container, cid, path, version, content_id

    def decode_old_fullpath(self, old_fullpath):
        # pylint: disable=unbalanced-tuple-unpacking
        try:
            account, container, path, version = decode_old_fullpath(
                old_fullpath)
            cid = self.cid_from_name(account, container)
            content_id = self.content_id_from_name(cid, path, version)
        except ValueError:
            # We never know, let's try to decode the fullpath as if it was new
            account, container, path, version, content_id = decode_fullpath(
                old_fullpath)
            cid = self.cid_from_name(account, container)
        return account, container, cid, path, version, content_id

    def encode_fullpath(self, fd, chunk_id, account, container, path, version,
                        content_id):
        # check if chunk exists and has the same inode
        if not is_hexa(chunk_id) or len(chunk_id) != STRLEN_CHUNKID:
            raise ValueError('chunk ID must be hexadecimal (%s)' %
                             STRLEN_CHUNKID)
        try:
            chunk_inode = os.fstat(fd.fileno()).st_ino
            chunk_inode2 = os.stat(self._get_path(fd, chunk_id)).st_ino
            if chunk_inode2 != chunk_inode:
                raise OrphanChunk('Not the same inode: possible orphan chunk')
        except OSError:
            raise OrphanChunk('No such chunk: possible orphan chunk')

        # check fullpath and chunk ID
        if isinstance(version, string_types):
            try:
                version = int(version)
            except ValueError:
                raise ValueError('version must be a number')
        if version <= 0:
            raise ValueError('version must be positive')
        if not is_hexa(content_id):
            raise ValueError('content ID must be hexadecimal')

        fullpath = encode_fullpath(account, container, path, version,
                                   content_id.upper())

        return chunk_id.upper(), fullpath

    def _get_chunk_id_and_fullpath(self,
                                   fd,
                                   chunk_pos,
                                   content,
                                   chunk_id=None):
        content.container_id, content.account, content.container_name = \
            self._save_container(content.container_id, content.account,
                                 content.container_name)
        content.container_id, content.path, content.version, \
            content.content_id = self._save_content(
                content.container_id, content.path, content.version,
                content.content_id)

        chunks = content.chunks.filter(host=self.volume_id)
        if chunk_id:
            chunks = chunks.filter(id=chunk_id)
        chunk = chunks.filter(pos=chunk_pos).one()
        if chunk is None:
            raise OrphanChunk('Chunk not found in content:'
                              'possible orphan chunk')

        chunk_id, new_fullpath = self.encode_fullpath(
            fd, chunk.id, content.account, content.container_name,
            content.path, content.version, content.content_id)
        return chunk_id, new_fullpath

    def get_chunk_id_and_fullpath(self,
                                  fd,
                                  chunk_pos,
                                  container_id,
                                  path,
                                  version,
                                  chunk_id=None,
                                  account=None,
                                  container=None,
                                  content_id=None):
        if account is None or container is None:
            account, container = self.name_from_cid(container_id)

        if content_id:
            try:
                content = self.content_factory.get(container_id,
                                                   content_id,
                                                   account=account,
                                                   container_name=container)
                return self._get_chunk_id_and_fullpath(fd,
                                                       chunk_pos,
                                                       content,
                                                       chunk_id=chunk_id)
            except Exception as exc:
                self.logger.warn(
                    'chunk_id=%s chunk_pos=%s object=%s/%s/%s/%s/%s/%s: %s',
                    chunk_id, chunk_pos, str(account), str(container),
                    container_id, path, str(version), str(content_id), exc)

        # version must be integer
        try:
            version = str(int(version))
        except Exception:
            version = None

        try:
            content = self.content_factory.get_by_path_and_version(
                container_id,
                path,
                version,
                account=account,
                container_name=container)
        except ContentNotFound:
            raise OrphanChunk('Content not found: possible orphan chunk')
        return self._get_chunk_id_and_fullpath(fd,
                                               chunk_pos,
                                               content,
                                               chunk_id=chunk_id)

    def convert_chunk(self, fd, chunk_id):
        meta, raw_meta = read_chunk_metadata(fd, chunk_id, for_conversion=True)

        links = meta.get('links', dict())
        for chunk_id2, fullpath2 in links.items():
            self.decode_fullpath(fullpath2)

        fullpath = meta.get('full_path')
        if fullpath is not None:
            self.decode_fullpath(fullpath)
            if meta.get('oio_version') == OIO_VERSION:
                return True, meta

        raw_chunk_id = None
        chunk_id = chunk_id.upper()
        chunk_pos = meta['chunk_pos']
        container_id = meta['container_id'].upper()
        path = meta['content_path']
        version = meta['content_version']
        content_id = meta['content_id'].upper()

        new_fullpaths = dict()
        xattr_to_remove = list()
        success = True

        for k, v in raw_meta.items():
            # fetch raw chunk ID
            if k == XATTR_CHUNK_ID:
                raw_chunk_id = v.upper()

            # search old fullpaths
            if not k.startswith(XATTR_OLD_FULLPATH) \
                    or not is_hexa(k[4:], size=64):
                continue

            try:
                account2, container2, container_id2, path2, version2, \
                    content_id2 = self.decode_old_fullpath(v)

                if meta['chunk_id'] == chunk_id \
                        and container_id == container_id2 \
                        and path == path2 \
                        and version == version2:
                    if content_id2 is None:
                        content_id2 = self.content_id_from_name(container_id2,
                                                                path2,
                                                                version2,
                                                                search=True)

                    chunk_id2, new_fullpath = self.encode_fullpath(
                        fd, chunk_id, account2, container2, path2, version2,
                        content_id2)
                    new_fullpaths[chunk_id2] = new_fullpath
                else:
                    chunk_id2, new_fullpath = self.get_chunk_id_and_fullpath(
                        fd,
                        chunk_pos,
                        container_id2,
                        path2,
                        version2,
                        account=account2,
                        container=container2,
                        content_id=content_id2)
                    new_fullpaths[chunk_id2] = new_fullpath

                xattr_to_remove.append(k)
            except Exception as exc:
                success = False
                self.logger.warn('chunk_id=%s old_fullpath=%s: %s', chunk_id,
                                 k, exc)

        # old xattr
        if raw_chunk_id is not None:
            try:
                if raw_chunk_id != chunk_id and raw_chunk_id not in links:
                    if raw_chunk_id not in new_fullpaths:
                        meta2, _ = read_chunk_metadata(fd, raw_chunk_id)
                        container_id2 = meta2['container_id'].upper()
                        path2 = meta2['content_path']
                        version2 = meta2['content_version']
                        content_id2 = meta2['content_id'].upper()

                        raw_chunk_id2, new_fullpath = \
                            self.get_chunk_id_and_fullpath(
                                fd, chunk_pos, container_id2, path2,
                                version2, chunk_id=raw_chunk_id,
                                content_id=content_id2)
                        new_fullpaths[raw_chunk_id2] = new_fullpath
                elif raw_chunk_id == chunk_id and fullpath is None:
                    if raw_chunk_id not in new_fullpaths:
                        raw_chunk_id2, new_fullpath = \
                            self.get_chunk_id_and_fullpath(
                                fd, chunk_pos, container_id, path,
                                version, chunk_id=raw_chunk_id,
                                content_id=content_id)
                        new_fullpaths[raw_chunk_id2] = new_fullpath
            except Exception as exc:
                success = False
                self.logger.warn('chunk_id=%s (old xattr): %s', raw_chunk_id,
                                 exc)

        self.save_xattr(fd, chunk_id, raw_meta)

        if self.dry_run:
            self.logger.info(
                "[dryrun] Converting chunk %s: success=%s new_fullpaths=%s "
                "xattr_to_remove=%s", chunk_id, str(success),
                str(new_fullpaths), str(xattr_to_remove))
        else:
            # for security, if there is an error, we don't delete old xattr
            set_fullpath_xattr(fd, new_fullpaths, success, xattr_to_remove)
        return success, None

    def is_fullpath_error(self, err):
        if (isinstance(err, MissingAttribute) and
            (err.attribute.startswith(CHUNK_XATTR_CONTENT_FULLPATH_PREFIX)
             or err.attribute == CHUNK_XATTR_KEYS['content_path']
             or err.attribute.startswith(XATTR_OLD_FULLPATH))):
            return True
        elif isinstance(err, FaultyChunk):
            return any(self.is_fullpath_error(x) for x in err.args)
        return False

    def safe_convert_chunk(self, path, chunk_id=None):
        if chunk_id is None:
            chunk_id = path.rsplit('/', 1)[-1]
            if len(chunk_id) != STRLEN_CHUNKID:
                self.logger.warn('Not a chunk %s' % path)
                return
            for char in chunk_id:
                if char not in hexdigits:
                    self.logger.warn('Not a chunk %s' % path)
                    return

        success = False
        self.total_chunks_processed += 1
        try:
            with open(path) as fd:
                success, _ = self.convert_chunk(fd, chunk_id)
        except (FaultyChunk, MissingAttribute) as err:
            if self.is_fullpath_error(err):
                self.logger.warn(
                    "Cannot convert %s: %s, will try to recover 'fullpath'",
                    path, err)
                try:
                    success = self.recover_chunk_fullpath(path, chunk_id)
                except Exception as err2:
                    self.logger.error('Could not recover fullpath: %s', err2)
            else:
                self.logger.exception('ERROR while converting %s', path)
        except Exception:
            self.logger.exception('ERROR while converting %s', path)

        if not success:
            self.errors += 1
        else:
            self.logger.debug('Converted %s', path)
        self.passes += 1

    def recover_chunk_fullpath(self, path, chunk_id=None):
        if not chunk_id:
            chunk_id = path.rsplit('/', 1)[-1]
        # 1. Fetch chunk list from rdir (could be cached).
        # Unfortunately we cannot seek for a chunk ID.
        entries = [
            x for x in self.rdir.chunk_fetch(self.volume_id, limit=-1)
            if x[2] == chunk_id
        ]
        if not entries:
            raise KeyError('Chunk %s not found in rdir' % chunk_id)
        elif len(entries) > 1:
            self.logger.info('Chunk %s appears in %d objects', chunk_id,
                             len(entries))
        # 2. Find content and container IDs
        cid, content_id = entries[0][0:2]
        # 3a. Call ContainerClient.content_locate()
        #    with the container ID and content ID
        try:
            meta, chunks = self.container_client.content_locate(
                cid=cid, content=content_id)
        except NotFound as err:
            raise OrphanChunk('Cannot check %s is valid: %s' % (path, err))
        # 3b. Resolve container ID into account and container names.
        # FIXME(FVE): get account and container names from meta1
        cmeta = self.container_client.container_get_properties(cid=cid)
        aname = cmeta['system']['sys.account']
        cname = cmeta['system']['sys.user.name']
        fullpath = encode_fullpath(aname, cname, meta['name'], meta['version'],
                                   content_id)
        # 4. Check if the chunk actually belongs to the object
        chunk_url = 'http://%s/%s' % (self.volume_id, chunk_id)
        if chunk_url not in [x['url'] for x in chunks]:
            raise OrphanChunk('Chunk %s not found in object %s' %
                              (chunk_url, fullpath))
        # 5. Regenerate the fullpath
        with open(path, 'w') as fd:
            set_fullpath_xattr(fd, {chunk_id: fullpath})
        return True

    def _fetch_chunks_from_file(self, input_file):
        with open(input_file, 'r') as ifile:
            for line in ifile:
                chunk_path = line.strip()
                if chunk_path and not chunk_path.startswith('#'):
                    yield self.volume + '/' + chunk_path

    def paths_gen(self, input_file=None):
        if input_file:
            return self._fetch_chunks_from_file(input_file)
        else:
            return paths_gen(self.volume)

    def converter_pass(self, input_file=None):
        def report(tag, now=None):
            if now is None:
                now = time.time()
            total_time = now - self.start_time
            self.logger.info(
                '%(tag)s  %(volume)s '
                'started=%(start_time)s '
                'passes=%(passes)d '
                'errors=%(errors)d '
                'chunks=%(nb_chunks)d %(c_rate).2f/s '
                'total_time=%(total_time).2f '
                '(converter: %(success_rate).2f%%)' % {
                    'tag':
                    tag,
                    'volume':
                    self.volume_id,
                    'start_time':
                    datetime.fromtimestamp(int(self.start_time)).isoformat(),
                    'passes':
                    self.passes,
                    'errors':
                    self.errors,
                    'nb_chunks':
                    self.total_chunks_processed,
                    'c_rate':
                    self.total_chunks_processed / total_time,
                    'total_time':
                    total_time,
                    'success_rate':
                    100 * ((self.total_chunks_processed - self.errors) /
                           (float(self.total_chunks_processed) or 1.0))
                })
            self.passes = 0
            self.last_reported = now

        self.start_time = time.time()
        self.errors = 0
        self.passes = 0

        self.backup_name = 'backup_%s_%f' % (self.volume_id, self.start_time)

        paths = self.paths_gen(input_file=input_file)
        for path in paths:
            self.safe_convert_chunk(path)

            now = time.time()
            if now - self.last_reported >= self.report_interval:
                report('RUN', now=now)

            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
        report('DONE')

        return self.errors == 0
Exemple #9
0
class BlobRebuilderWorker(object):
    def __init__(self,
                 conf,
                 logger,
                 volume,
                 input_file=None,
                 try_chunk_delete=False,
                 beanstalkd_addr=None):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.dry_run = true_value(conf.get('dry_run', False))
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        self.max_bytes_per_second = int_value(conf.get('bytes_per_second'),
                                              10000000)
        self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100)
        self.allow_same_rawx = true_value(conf.get('allow_same_rawx'))
        self.input_file = input_file
        self.rdir_client = RdirClient(conf, logger=self.logger)
        self.content_factory = ContentFactory(conf)
        self.try_chunk_delete = try_chunk_delete
        self.beanstalkd_addr = beanstalkd_addr
        self.beanstalkd_tube = conf.get('beanstalkd_tube', 'rebuild')
        self.beanstalk = None

    def _fetch_chunks_from_event(self, job_id, data):
        env = json.loads(data)
        for chunk_pos in env['data']['missing_chunks']:
            yield [
                env['url']['id'], env['url']['content'],
                str(chunk_pos), None
            ]

    def _connect_to_beanstalk(self):
        self.beanstalk = Beanstalk.from_url(self.beanstalkd_addr)
        self.beanstalk.use(self.beanstalkd_tube)
        self.beanstalk.watch(self.beanstalkd_tube)

    def _handle_beanstalk_event(self, conn_error):
        try:
            job_id, data = self.beanstalk.reserve()
            if conn_error:
                self.logger.warn("beanstalk reconnected")
        except ConnectionError:
            if not conn_error:
                self.logger.warn("beanstalk connection error")
            raise
        try:
            for chunk in self._fetch_chunks_from_event(job_id, data):
                yield chunk
            self.beanstalk.delete(job_id)
        except Exception:
            self.logger.exception("handling event %s (bury)", job_id)
            self.beanstalk.bury(job_id)

    def _fetch_chunks_from_beanstalk(self):
        conn_error = False
        while 1:
            try:
                self._connect_to_beanstalk()
                for chunk in self._handle_beanstalk_event(conn_error):
                    conn_error = False
                    yield chunk
            except ConnectionError:
                conn_error = True
                time.sleep(1.0)

    def _fetch_chunks_from_file(self):
        with open(self.input_file, 'r') as ifile:
            for line in ifile:
                stripped = line.strip()
                if stripped and not stripped.startswith('#'):
                    yield stripped.split('|', 3)[:3] + [None]

    def _fetch_chunks(self):
        if self.input_file:
            return self._fetch_chunks_from_file()
        elif self.beanstalkd_addr:
            return self._fetch_chunks_from_beanstalk()
        else:
            return self.rdir_client.chunk_fetch(self.volume,
                                                limit=self.rdir_fetch_limit,
                                                rebuild=True)

    def rebuilder_pass_with_lock(self):
        self.rdir_client.admin_lock(self.volume,
                                    "rebuilder on %s" % gethostname())
        try:
            self.rebuilder_pass()
        finally:
            self.rdir_client.admin_unlock(self.volume)

    def rebuilder_pass(self):
        start_time = report_time = time.time()

        rebuilder_time = 0

        chunks = self._fetch_chunks()
        for cid, content_id, chunk_id_or_pos, _ in chunks:
            loop_time = time.time()
            if self.dry_run:
                self.dryrun_chunk_rebuild(cid, content_id, chunk_id_or_pos)
            else:
                self.safe_chunk_rebuild(cid, content_id, chunk_id_or_pos)

            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    'RUN  %(volume)s '
                    'started=%(start_time)s '
                    'passes=%(passes)d '
                    'errors=%(errors)d '
                    'chunks=%(nb_chunks)d %(c_rate).2f/s '
                    'bytes=%(nb_bytes)d %(b_rate).2fB/s '
                    'elapsed=%(total).2f '
                    '(rebuilder: %(success_rate).2f%%)' % {
                        'volume':
                        self.volume,
                        'start_time':
                        datetime.fromtimestamp(int(report_time)).isoformat(),
                        'passes':
                        self.passes,
                        'errors':
                        self.errors,
                        'nb_chunks':
                        self.total_chunks_processed,
                        'nb_bytes':
                        self.total_bytes_processed,
                        'c_rate':
                        self.passes / (now - report_time),
                        'b_rate':
                        self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'rebuilder_time':
                        rebuilder_time,
                        'success_rate':
                        100 * ((self.total_chunks_processed - self.errors) /
                               float(self.total_chunks_processed))
                    })
                report_time = now
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            rebuilder_time += (now - loop_time)
        end_time = time.time()
        elapsed = (end_time - start_time) or 0.000001
        self.logger.info(
            'DONE %(volume)s '
            'started=%(start_time)s '
            'ended=%(end_time)s '
            'passes=%(passes)d '
            'elapsed=%(elapsed).02f '
            'errors=%(errors)d '
            'chunks=%(nb_chunks)d %(c_rate).2f/s '
            'bytes=%(nb_bytes)d %(b_rate).2fB/s '
            'elapsed=%(rebuilder_time).2f '
            '(rebuilder: %(success_rate).2f%%)' % {
                'volume':
                self.volume,
                'start_time':
                datetime.fromtimestamp(int(start_time)).isoformat(),
                'end_time':
                datetime.fromtimestamp(int(end_time)).isoformat(),
                'passes':
                self.passes,
                'elapsed':
                elapsed,
                'errors':
                self.errors,
                'nb_chunks':
                self.total_chunks_processed,
                'nb_bytes':
                self.total_bytes_processed,
                'c_rate':
                self.total_chunks_processed / elapsed,
                'b_rate':
                self.total_bytes_processed / elapsed,
                'rebuilder_time':
                rebuilder_time,
                'success_rate':
                100 * ((self.total_chunks_processed - self.errors) /
                       float(self.total_chunks_processed or 1))
            })

    def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id_or_pos):
        self.logger.info(
            "[dryrun] Rebuilding "
            "container %s, content %s, chunk %s", container_id, content_id,
            chunk_id_or_pos)
        self.passes += 1

    def safe_chunk_rebuild(self, container_id, content_id, chunk_id_or_pos):
        try:
            self.chunk_rebuild(container_id, content_id, chunk_id_or_pos)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while rebuilding chunk %s|%s|%s: %s',
                              container_id, content_id, chunk_id_or_pos, e)

        self.passes += 1

    def chunk_rebuild(self, container_id, content_id, chunk_id_or_pos):
        self.logger.info('Rebuilding (container %s, content %s, chunk %s)',
                         container_id, content_id, chunk_id_or_pos)
        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise OrphanChunk('Content not found: possible orphan chunk')

        chunk_size = 0
        chunk_pos = None
        if len(chunk_id_or_pos) < 32:
            chunk_pos = chunk_id_or_pos
            chunk_id = None
            metapos = int(chunk_pos.split('.', 1)[0])
            chunk_size = content.chunks.filter(metapos=metapos).all()[0].size
        else:
            if '/' in chunk_id_or_pos:
                chunk_id = chunk_id_or_pos.rsplit('/', 1)[-1]
            else:
                chunk_id = chunk_id_or_pos

            chunk = content.chunks.filter(id=chunk_id).one()
            if chunk is None:
                raise OrphanChunk(("Chunk not found in content:"
                                   "possible orphan chunk"))
            elif self.volume and chunk.host != self.volume:
                raise ValueError("Chunk does not belong to this volume")
            chunk_size = chunk.size

        content.rebuild_chunk(chunk_id,
                              allow_same_rawx=self.allow_same_rawx,
                              chunk_pos=chunk_pos)

        if self.try_chunk_delete:
            try:
                content.blob_client.chunk_delete(chunk.url)
                self.logger.info("Chunk %s deleted", chunk.url)
            except NotFound as exc:
                self.logger.debug("Chunk %s: %s", chunk.url, exc)

        # This call does not raise exception if chunk is not referenced
        if chunk_id is not None:
            self.rdir_client.chunk_delete(chunk.host, container_id, content_id,
                                          chunk_id)

        self.bytes_processed += chunk_size
        self.total_bytes_processed += chunk_size
Exemple #10
0
class TestFilters(BaseTestCase):
    def setUp(self):
        super(TestFilters, self).setUp()
        self.account = self.conf['account']
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {'namespace': self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = 'TestFilter%f' % time.time()
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name,
                                               admin_mode=True)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.stgpol = "SINGLE"

    def _prepare_content(self, path, content_id, admin_mode):
        self.container_client.content_prepare(account=self.account,
                                              reference=self.container_name,
                                              path=path,
                                              content_id=content_id,
                                              size=1,
                                              stgpol=self.stgpol,
                                              autocreate=True,
                                              admin_mode=admin_mode)

    def _new_content(self, data, path, admin_mode):
        old_content = self.content_factory.new(self.container_id,
                                               path,
                                               len(data),
                                               self.stgpol,
                                               admin_mode=admin_mode)
        old_content.create(BytesIO(data), admin_mode=admin_mode)
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_slave_and_admin(self):
        if not os.getenv("SLAVE"):
            self.skipTest("must be in slave mode")
        data = random_data(10)
        path = 'test_slave'
        try:
            self._new_content(data, path, False)
        except ClientException as exc:
            self.assertIn('NS slave!', text_type(exc))
        else:
            self.fail("New content: no exception")
        content = self._new_content(data, path, True)
        content.delete(admin_mode=True)

    def test_worm_and_admin(self):
        if not os.getenv("WORM"):
            self.skipTest("must be in worm mode")
        data = random_data(10)
        path = 'test_worm'
        content = self._new_content(data, path, True)

        # Prepare without admin mode:
        # Since the 'prepare' step is done in the proxy, there is no check
        # on the pre-existence of the content. The subsequent prepare MUST
        # now work despite the presence of the content.
        self._prepare_content(path, None, False)
        self._prepare_content(path, content.content_id, False)
        self._prepare_content('test_worm_prepare', content.content_id, False)
        self._prepare_content(path, random_id(32), False)

        # Overwrite without admin mode
        data2 = random_data(11)
        self.assertRaises(Conflict, self._new_content, data2, path, False)

        # Prepare with admin mode
        self._prepare_content(path, None, True)
        self._prepare_content(path, content.content_id, True)
        self._prepare_content('test_worm_prepare', content.content_id, True)
        self._prepare_content(path, random_id(32), True)

        # Overwrite with admin mode
        content = self._new_content(data2, path, True)

        # Delete without admin mode
        try:
            content.delete()
        except ClientException as exc:
            self.assertIn('worm', str(exc))
        else:
            self.fail("Delete without admin mode: no exception")
        downloaded_data = ''.join(content.fetch())
        self.assertEqual(downloaded_data, data2)

        # Delete with admin mode
        content.delete(admin_mode=True)
Exemple #11
0
class RawxDecommissionTask(XcuteTask):
    def __init__(self, conf, job_params, logger=None):
        super(RawxDecommissionTask, self).__init__(conf,
                                                   job_params,
                                                   logger=logger)

        self.service_id = job_params['service_id']
        self.rawx_timeout = job_params['rawx_timeout']
        self.min_chunk_size = job_params['min_chunk_size']
        self.max_chunk_size = job_params['max_chunk_size']
        self.excluded_rawx = job_params['excluded_rawx']

        self.blob_client = BlobClient(self.conf, logger=self.logger)
        self.content_factory = ContentFactory(self.conf)
        self.conscience_client = ConscienceClient(self.conf,
                                                  logger=self.logger)

        self.fake_excluded_chunks = self._generate_fake_excluded_chunks(
            self.excluded_rawx)

    def _generate_fake_excluded_chunks(self, excluded_rawx):
        fake_excluded_chunks = list()
        fake_chunk_id = '0' * 64
        for service_id in excluded_rawx:
            service_addr = self.conscience_client.resolve_service_id(
                'rawx', service_id)
            chunk = dict()
            chunk['hash'] = '0000000000000000000000000000000000'
            chunk['pos'] = '0'
            chunk['size'] = 1
            chunk['score'] = 1
            chunk['url'] = 'http://{}/{}'.format(service_id, fake_chunk_id)
            chunk['real_url'] = 'http://{}/{}'.format(service_addr,
                                                      fake_chunk_id)
            fake_excluded_chunks.append(chunk)
        return fake_excluded_chunks

    def process(self, task_id, task_payload, reqid=None):
        container_id = task_payload['container_id']
        content_id = task_payload['content_id']
        chunk_id = task_payload['chunk_id']

        chunk_url = 'http://{}/{}'.format(self.service_id, chunk_id)
        try:
            meta = self.blob_client.chunk_head(chunk_url,
                                               timeout=self.rawx_timeout,
                                               reqid=reqid)
        except NotFound:
            # The chunk is still present in the rdir,
            # but the chunk no longer exists in the rawx.
            # We ignore it because there is nothing to move.
            return {'skipped_chunks_no_longer_exist': 1}
        if container_id != meta['container_id']:
            raise ValueError('Mismatch container ID: %s != %s', container_id,
                             meta['container_id'])
        if content_id != meta['content_id']:
            raise ValueError('Mismatch content ID: %s != %s', content_id,
                             meta['content_id'])
        chunk_size = int(meta['chunk_size'])

        # Maybe skip the chunk because it doesn't match the size constaint
        if chunk_size < self.min_chunk_size:
            self.logger.debug('[reqid=%s] SKIP %s too small', reqid, chunk_url)
            return {'skipped_chunks_too_small': 1}
        if self.max_chunk_size > 0 and chunk_size > self.max_chunk_size:
            self.logger.debug('[reqid=%s] SKIP %s too big', reqid, chunk_url)
            return {'skipped_chunks_too_big': 1}

        # Start moving the chunk
        try:
            content = self.content_factory.get(container_id,
                                               content_id,
                                               reqid=reqid)
            content.move_chunk(chunk_id,
                               fake_excluded_chunks=self.fake_excluded_chunks,
                               reqid=reqid)
        except (ContentNotFound, OrphanChunk):
            return {'orphan_chunks': 1}

        return {'moved_chunks': 1, 'moved_bytes': chunk_size}
Exemple #12
0
class TestFilters(BaseTestCase):
    def setUp(self):
        super(TestFilters, self).setUp()
        self.account = self.conf['account']
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {'namespace': self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = 'TestFilter%f' % time.time()
        self.container_client = ContainerClient(self.gridconf, admin_mode=True)
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.stgpol = "SINGLE"

    def _new_content(self, data, path, admin_mode=True):
        old_content = self.content_factory.new(self.container_id,
                                               path,
                                               len(data),
                                               self.stgpol,
                                               admin_mode=admin_mode)
        old_content.create(BytesIO(data), admin_mode=admin_mode)
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_slave_and_admin(self):
        if not os.getenv("SLAVE"):
            self.skipTest("must be in slave mode")
        data = random_data(10)
        path = 'test_slave'
        try:
            self._new_content(data, path)
        except ClientException as exc:
            print(str(exc))
            self.assertTrue(str(exc).find('NS slave!') != -1)
        content = self._new_content(data, path)
        content.delete(admin_mode=True)

    def test_worm_and_admin(self):
        if not os.getenv("WORM"):
            self.skipTest("must be in worm mode")
        data = random_data(10)
        path = 'test_worm'
        content = self._new_content(data, path)

        # Overwrite without admin mode
        data2 = random_data(11)
        try:
            content = self._new_content(data2, path, admin_mode=False)
        except ClientException as exc:
            self.assertIsInstance(exc, Conflict)

        # Overwrite with admin mode
        content = self._new_content(data2, path)

        # Delete without admin mode
        try:
            content.delete()
        except ClientException as exc:
            self.assertTrue(str(exc).lower().find('worm') != -1)
        downloaded_data = ''.join(content.fetch())
        self.assertEqual(downloaded_data, data2)

        # Delete with admin mode
        content.delete(admin_mode=True)
Exemple #13
0
class TestRainContent(BaseTestCase):
    def setUp(self):
        super(TestRainContent, self).setUp()

        if len(self.conf['rawx']) < 12:
            self.skipTest("Not enough rawx. "
                          "Rain tests needs more than 12 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestRainContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestRainContent, self).tearDown()

    def _test_upload(self, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        k = 6
        m = 2
        self.assertEqual(type(content), RainContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], "RAIN")
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        nb_chunks_min = metachunk_nb * (k + m) - (k - 1)
        nb_chunks_max = metachunk_nb * (k + m)
        self.assertEquals(len(chunks) >= nb_chunks_min, True)
        self.assertEquals(len(chunks) <= nb_chunks_max, True)

        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)
            data_chunks_at_pos = chunks_at_pos.filter(is_parity=False)
            parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True)

            if metapos < metachunk_nb - 1:
                self.assertEqual(len(data_chunks_at_pos), k)
            else:
                self.assertEquals(len(data_chunks_at_pos) >= 1, True)
                self.assertEquals(len(data_chunks_at_pos) <= k, True)
            self.assertEqual(len(parity_chunks_at_pos), m)

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk.hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], chunk.hash)

            data_begin = metapos * self.chunk_size
            data_end = metapos * self.chunk_size + self.chunk_size
            target_metachunk_hash = md5_data(data[data_begin:data_end])

            metachunk_hash = hashlib.md5()
            for chunk in data_chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                for d in stream:
                    metachunk_hash.update(d)
            self.assertEqual(metachunk_hash.hexdigest().upper(),
                             target_metachunk_hash)

    def test_upload_0_byte(self):
        self._test_upload(0)

    def test_upload_1_byte(self):
        self._test_upload(1)

    def test_upload_chunksize_bytes(self):
        self._test_upload(self.chunk_size)

    def test_upload_chunksize_plus_1_bytes(self):
        self._test_upload(self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        self.assertEqual(type(content), RainContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos="1.p0"):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos="1.p0"):
            self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url)

    def _test_rebuild(self, data_size, broken_pos_list):
        data = os.urandom(data_size)
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), "RAIN")
        self.assertEqual(type(old_content), RainContent)

        old_content.upload(StringIO.StringIO(data))

        # get the new structure of the uploaded content
        uploaded_content = self.content_factory.get(self.container_id,
                                                    old_content.content_id)

        old_info = {}
        for pos in broken_pos_list:
            old_info[pos] = {}
            c = uploaded_content.chunks.filter(pos=pos)[0]
            old_info[pos]["url"] = c.url
            old_info[pos]["id"] = c.id
            old_info[pos]["hash"] = c.hash
            chunk_id_to_rebuild = c.id
            meta, stream = self.blob_client.chunk_get(c.url)
            old_info[pos]["dl_meta"] = meta
            old_info[pos]["dl_hash"] = md5_stream(stream)
            # delete the chunk
            self.blob_client.chunk_delete(c.url)

        # rebuild the broken chunks
        uploaded_content.rebuild_chunk(chunk_id_to_rebuild)

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   uploaded_content.content_id)
        self.assertEqual(type(rebuilt_content), RainContent)

        for pos in broken_pos_list:
            c = rebuilt_content.chunks.filter(pos=pos)[0]
            rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(rebuilt_meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(rebuilt_stream),
                             old_info[pos]["dl_hash"])
            self.assertEqual(c.hash, old_info[pos]["hash"])
            self.assertThat(c.url, NotEquals(old_info[pos]["url"]))
            del old_info[pos]["dl_meta"]["chunk_id"]
            del rebuilt_meta["chunk_id"]
            self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"])

    def test_content_0_byte_rebuild_pos_0_0(self):
        self._test_rebuild(0, ["0.0"])

    def test_content_0_byte_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(0, ["0.0", "0.p0"])

    def test_content_1_byte_rebuild_pos_0_0(self):
        self._test_rebuild(1, ["0.0"])

    def test_content_1_byte_rebuild_pos_0_p0(self):
        self._test_rebuild(1, ["0.p0"])

    def test_content_1_byte_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(1, ["0.0", "0.p0"])

    def test_content_chunksize_bytes_rebuild_pos_0_0(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0"])

    def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_1(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.1"])

    def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.p0"])

    def test_content_chunksize_bytes_rebuild_pos_0_p0_and_0_p1(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.p0", "0.p1"])

    def test_content_chunksize_bytes_rebuild_more_than_k_chunk(self):
        self.assertRaises(UnrecoverableContent, self._test_rebuild,
                          self.conf["chunk_size"], ["0.0", "0.1", "0.2"])

    def _new_content(self, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), "RAIN")
        self.assertEqual(type(old_content), RainContent)

        old_content.upload(StringIO.StringIO(data))

        for pos in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[0]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_orphan_chunk(self):
        content = self._new_content(random_data(10))

        self.assertRaises(OrphanChunk, content.rebuild_chunk, "uNkNoWnId")

    def test_rebuild_on_the_fly(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.0", "0.p0"])

        stream = content.rebuild_metachunk("0", on_the_fly=True)

        dl_data = "".join(stream)

        self.assertEqual(dl_data, data)

        del_chunk_0_0 = content.chunks.filter(pos="0.0")[0]
        del_chunk_0_p0 = content.chunks.filter(pos="0.p0")[0]

        self.assertRaises(NotFound, self.blob_client.chunk_get,
                          del_chunk_0_0.url)
        self.assertRaises(NotFound, self.blob_client.chunk_get,
                          del_chunk_0_p0.url)

    def _test_download(self, data_size, broken_pos_list):
        data = random_data(data_size)
        content = self._new_content(data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos in broken_pos_list:
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_download_content_0_byte_without_broken_chunks(self):
        self._test_download(0, [])

    def test_download_content_1_byte_without_broken_chunks(self):
        self._test_download(1, [])

    def test_download_content_chunksize_bytes_without_broken_chunks(self):
        self._test_download(self.conf["chunk_size"], [])

    def test_download_content_chunksize_plus_1_without_broken_chunks(self):
        self._test_download(self.conf["chunk_size"] + 1, [])

    def test_download_content_0_byte_with_broken_0_0_and_0_p0(self):
        self._test_download(0, ["0.0", "0.p0"])

    def test_download_content_1_byte_with_broken_0_0_and_0_p0(self):
        self._test_download(1, ["0.0", "0.p0"])

    def test_download_content_2xchunksize_with_broken_0_2_and_1_0(self):
        self._test_download(2 * self.conf["chunk_size"], ["0.2", "1.0"])

    def test_download_content_chunksize_bytes_with_3_broken_chunks(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.0", "0.1", "0.2"])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_download_interrupt_close(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.p0"])

        download_iter = content.download()

        self.assertEqual(download_iter.next(), data[0:READ_CHUNK_SIZE - 1])
        download_iter.close()
Exemple #14
0
class TestRainContent(BaseTestCase):
    def setUp(self):
        super(TestRainContent, self).setUp()

        if len(self.conf['rawx']) < 12:
            self.skipTest("Not enough rawx. "
                          "Rain tests needs more than 12 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestRainContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestRainContent, self).tearDown()

    def _test_upload(self, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        k = 6
        m = 2
        self.assertEqual(type(content), RainContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], "RAIN")
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        nb_chunks_min = metachunk_nb * (1 + m)
        nb_chunks_max = metachunk_nb * (k + m)
        self.assertGreaterEqual(len(chunks), nb_chunks_min)
        self.assertLessEqual(len(chunks), nb_chunks_max)

        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)
            data_chunks_at_pos = chunks_at_pos.filter(is_parity=False)
            parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True)

            self.assertEquals(len(data_chunks_at_pos) >= 1, True)
            self.assertEquals(len(data_chunks_at_pos) <= k, True)
            self.assertEqual(len(parity_chunks_at_pos), m)

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk.hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], chunk.hash)

            data_begin = metapos * self.chunk_size
            data_end = metapos * self.chunk_size + self.chunk_size
            target_metachunk_hash = md5_data(data[data_begin:data_end])

            metachunk_hash = hashlib.md5()
            for chunk in data_chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                for d in stream:
                    metachunk_hash.update(d)
            self.assertEqual(metachunk_hash.hexdigest().upper(),
                             target_metachunk_hash)

    def test_upload_0_byte(self):
        self._test_upload(0)

    def test_upload_1_byte(self):
        self._test_upload(1)

    def test_upload_chunksize_bytes(self):
        self._test_upload(self.chunk_size)

    def test_upload_chunksize_plus_1_bytes(self):
        self._test_upload(self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        self.assertEqual(type(content), RainContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos="1.p0"):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos="1.p0"):
            self.assertRaises(NotFound,
                              self.blob_client.chunk_head, chunk.url)

    def _test_rebuild(self, data_size, broken_pos_list):
        data = os.urandom(data_size)
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), "RAIN")
        self.assertEqual(type(old_content), RainContent)

        old_content.upload(StringIO.StringIO(data))

        # get the new structure of the uploaded content
        uploaded_content = self.content_factory.get(self.container_id,
                                                    old_content.content_id)

        old_info = {}
        for pos in broken_pos_list:
            old_info[pos] = {}
            c = uploaded_content.chunks.filter(pos=pos)[0]
            old_info[pos]["url"] = c.url
            old_info[pos]["id"] = c.id
            old_info[pos]["hash"] = c.hash
            chunk_id_to_rebuild = c.id
            meta, stream = self.blob_client.chunk_get(c.url)
            old_info[pos]["dl_meta"] = meta
            old_info[pos]["dl_hash"] = md5_stream(stream)
            # delete the chunk
            self.blob_client.chunk_delete(c.url)

        # rebuild the broken chunks
        uploaded_content.rebuild_chunk(chunk_id_to_rebuild)

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   uploaded_content.content_id)
        self.assertEqual(type(rebuilt_content), RainContent)

        for pos in broken_pos_list:
            c = rebuilt_content.chunks.filter(pos=pos)[0]
            rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(rebuilt_meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(rebuilt_stream),
                             old_info[pos]["dl_hash"])
            self.assertEqual(c.hash, old_info[pos]["hash"])
            self.assertThat(c.url, NotEquals(old_info[pos]["url"]))
            del old_info[pos]["dl_meta"]["chunk_id"]
            del rebuilt_meta["chunk_id"]
            self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"])

    def test_content_0_byte_rebuild_pos_0_0(self):
        self._test_rebuild(0, ["0.0"])

    def test_content_0_byte_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(0, ["0.0", "0.p0"])

    def test_content_1_byte_rebuild_pos_0_0(self):
        self._test_rebuild(1, ["0.0"])

    def test_content_1_byte_rebuild_pos_0_p0(self):
        self._test_rebuild(1, ["0.p0"])

    def test_content_1_byte_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(1, ["0.0", "0.p0"])

    def test_content_chunksize_bytes_rebuild_pos_0_0(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0"])

    def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_1(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.1"])

    def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.p0"])

    def test_content_chunksize_bytes_rebuild_pos_0_p0_and_0_p1(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.p0", "0.p1"])

    def test_content_chunksize_bytes_rebuild_more_than_k_chunk(self):
        self.assertRaises(UnrecoverableContent, self._test_rebuild,
                          self.conf["chunk_size"], ["0.0", "0.1", "0.2"])

    def _new_content(self, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), "RAIN")
        self.assertEqual(type(old_content), RainContent)

        old_content.upload(StringIO.StringIO(data))

        for pos in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[0]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_orphan_chunk(self):
        content = self._new_content(random_data(10))

        self.assertRaises(OrphanChunk, content.rebuild_chunk, "uNkNoWnId")

    def test_rebuild_on_the_fly(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.0", "0.p0"])

        stream = content.rebuild_metachunk("0", on_the_fly=True)

        dl_data = "".join(stream)

        self.assertEqual(dl_data, data)

        del_chunk_0_0 = content.chunks.filter(pos="0.0")[0]
        del_chunk_0_p0 = content.chunks.filter(pos="0.p0")[0]

        self.assertRaises(NotFound,
                          self.blob_client.chunk_get, del_chunk_0_0.url)
        self.assertRaises(NotFound,
                          self.blob_client.chunk_get, del_chunk_0_p0.url)

    def _test_download(self, data_size, broken_pos_list):
        data = random_data(data_size)
        content = self._new_content(data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos in broken_pos_list:
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_download_content_0_byte_without_broken_chunks(self):
        self._test_download(0, [])

    def test_download_content_1_byte_without_broken_chunks(self):
        self._test_download(1, [])

    def test_download_content_chunksize_bytes_without_broken_chunks(self):
        self._test_download(self.conf["chunk_size"], [])

    def test_download_content_chunksize_plus_1_without_broken_chunks(self):
        self._test_download(self.conf["chunk_size"] + 1, [])

    def test_download_content_0_byte_with_broken_0_0_and_0_p0(self):
        self._test_download(0, ["0.0", "0.p0"])

    def test_download_content_1_byte_with_broken_0_0_and_0_p0(self):
        self._test_download(1, ["0.0", "0.p0"])

    def test_download_content_2xchunksize_with_broken_0_2_and_1_0(self):
        self._test_download(2 * self.conf["chunk_size"], ["0.2", "1.0"])

    def test_download_content_chunksize_bytes_with_3_broken_chunks(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.0", "0.1", "0.2"])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_download_interrupt_close(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.p0"])

        download_iter = content.download()

        dl_data = ""
        for buf in download_iter:
            dl_data += buf
        self.assertEqual(len(dl_data), len(data))
        self.assertEqual(dl_data, data)
        download_iter.close()
Exemple #15
0
class BlobMoverWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.namespace, self.address = check_volume(self.volume)
        self.running = False
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.last_usage_check = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.concurrency = int_value(conf.get('concurrency'), 10)
        self.usage_target = int_value(
            conf.get('usage_target'), 0)
        self.usage_check_interval = int_value(
            conf.get('usage_check_interval'), 3600)
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        self.limit = int_value(conf.get('limit'), 0)
        self.allow_links = true_value(conf.get('allow_links', True))
        self.blob_client = BlobClient(conf)
        self.container_client = ContainerClient(conf, logger=self.logger)
        self.content_factory = ContentFactory(conf)

    def mover_pass(self, **kwargs):
        start_time = report_time = time.time()

        total_errors = 0
        mover_time = 0

        pool = GreenPool(self.concurrency)

        paths = paths_gen(self.volume)

        for path in paths:
            loop_time = time.time()

            now = time.time()
            if now - self.last_usage_check >= self.usage_check_interval:
                free_ratio = statfs(self.volume)
                usage = (1-float(free_ratio)) * 100
                if usage <= self.usage_target:
                    self.logger.info(
                        'current usage %.2f%%: target reached (%.2f%%)', usage,
                        self.usage_target)
                    self.last_usage_check = now
                    break

            # Spawn a chunk move task.
            # The call will block if no green thread is available.
            pool.spawn_n(self.safe_chunk_move, path)

            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(mover_time).2f'
                    '%(mover_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'mover_time': mover_time,
                        'mover_rate': mover_time / (now - start_time)
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            mover_time += (now - loop_time)
            if self.limit != 0 and self.total_chunks_processed >= self.limit:
                break
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(mover_time).2f '
            '%(mover_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'mover_time': mover_time,
                'mover_rate': mover_time / elapsed
            }
        )

    def safe_chunk_move(self, path):
        chunk_id = path.rsplit('/', 1)[-1]
        if len(chunk_id) != STRLEN_CHUNKID:
            self.logger.warn('WARN Not a chunk %s' % path)
            return
        for char in chunk_id:
            if char not in hexdigits:
                self.logger.warn('WARN Not a chunk %s' % path)
                return
        try:
            self.chunk_move(path, chunk_id)
        except Exception as err:
            self.errors += 1
            self.logger.error('ERROR while moving chunk %s: %s', path, err)
        self.passes += 1

    def load_chunk_metadata(self, path, chunk_id):
        with open(path) as file_:
            meta, _ = read_chunk_metadata(file_, chunk_id)
            return meta

    def chunk_move(self, path, chunk_id):
        meta = self.load_chunk_metadata(path, chunk_id)
        container_id = meta['container_id']
        content_id = meta['content_id']
        chunk_id = meta['chunk_id']

        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise exc.OrphanChunk('Content not found')

        new_chunk = content.move_chunk(chunk_id)

        self.logger.info(
            'moved chunk http://%s/%s to %s',
            self.address, chunk_id, new_chunk['url'])

        if self.allow_links:
            old_links = meta['links']
            for chunk_id, fullpath in old_links.iteritems():
                # pylint: disable=unbalanced-tuple-unpacking
                account, container, _, _, content_id = \
                    decode_fullpath(fullpath)
                container_id = cid_from_name(account, container)

                try:
                    content = self.content_factory.get(container_id,
                                                       content_id)
                except ContentNotFound:
                    raise exc.OrphanChunk('Content not found')

                new_linked_chunk = content.move_linked_chunk(
                    chunk_id, new_chunk['url'])

                self.logger.info(
                    'moved chunk http://%s/%s to %s',
                    self.address, chunk_id, new_linked_chunk['url'])
Exemple #16
0
class TestDupContent(BaseTestCase):
    def setUp(self):
        super(TestDupContent, self).setUp()

        if len(self.conf['rawx']) < 3:
            self.skipTest("Not enough rawx. "
                          "Dup tests needs more than 2 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestDupContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestDupContent, self).tearDown()

    def _test_upload(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), stgpol)
        self.assertEqual(type(content), DupContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        if stgpol == "THREECOPIES":
            nb_copy = 3
        elif stgpol == "TWOCOPIES":
            nb_copy = 2
        elif stgpol == "SINGLE":
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                self.assertEqual(meta['chunk_hash'], chunk_hash)

    def test_twocopies_upload_0_byte(self):
        self._test_upload("TWOCOPIES", 0)

    def test_twocopies_upload_1_byte(self):
        self._test_upload("TWOCOPIES", 1)

    def test_twocopies_upload_chunksize_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size)

    def test_twocopies_upload_chunksize_plus_1_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size + 1)

    def test_single_upload_0_byte(self):
        self._test_upload("SINGLE", 0)

    def test_single_upload_chunksize_plus_1_bytes(self):
        self._test_upload("SINGLE", self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "TWOCOPIES")
        self.assertEqual(type(content), DupContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos=1):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos=1):
            self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url)

    def _new_content(self, stgpol, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), stgpol)
        self.assertEqual(type(old_content), DupContent)

        old_content.upload(StringIO.StringIO(data))

        broken_chunks_info = {}
        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            meta, stream = self.blob_client.chunk_get(c.url)
            if pos not in broken_chunks_info:
                broken_chunks_info[pos] = {}
            broken_chunks_info[pos][idx] = {
                "url": c.url,
                "id": c.id,
                "hash": c.hash,
                "dl_meta": meta,
                "dl_hash": md5_stream(stream)
            }
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return (self.content_factory.get(self.container_id,
                                         old_content.content_id),
                broken_chunks_info)

    def _test_rebuild(self, stgpol, data_size, broken_pos_list,
                      full_rebuild_pos):
        data = random_data(data_size)
        content, broken_chunks_info = self._new_content(
            stgpol, data, broken_pos_list)

        rebuild_pos, rebuild_idx = full_rebuild_pos
        rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
        content.rebuild_chunk(rebuild_chunk_info["id"])

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   content.content_id)
        self.assertEqual(type(rebuilt_content), DupContent)

        # find the rebuilt chunk
        for c in rebuilt_content.chunks.filter(pos=rebuild_pos):
            if len(content.chunks.filter(id=c.id)) > 0:
                # not the rebuilt chunk
                # if this chunk is broken, it must not have been rebuilt
                for b_c_i in broken_chunks_info[rebuild_pos].values():
                    if c.id == b_c_i["id"]:
                        with ExpectedException(NotFound):
                            _, _ = self.blob_client.chunk_get(c.url)
                continue
            meta, stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(stream), rebuild_chunk_info["dl_hash"])
            self.assertEqual(c.hash, rebuild_chunk_info["hash"])
            self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"]))
            del meta["chunk_id"]
            del rebuild_chunk_info["dl_meta"]["chunk_id"]
            self.assertEqual(meta, rebuild_chunk_info["dl_meta"])

    def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self):
        self._test_rebuild("TWOCOPIES", 0, [(0, 0)], (0, 0))

    def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self):
        self._test_rebuild("TWOCOPIES", 1, [(0, 1)], (0, 1))

    def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self):
        if len(self.conf['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild("THREECOPIES", self.chunk_size, [(0, 0), (0, 1)],
                           (0, 1))

    def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self):
        if len(self.conf['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild("THREECOPIES", 2 * self.chunk_size,
                           [(1, 0), (1, 2)], (1, 2))

    def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self):
        with ExpectedException(UnrecoverableContent):
            self._test_rebuild("TWOCOPIES", 0, [(0, 0), (0, 1)], (0, 0))

    def _test_download(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content, _ = self._new_content(stgpol, data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_download_content_0_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 0, [])

    def test_twocopies_download_content_0_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 0, [(0, 0)])

    def test_twocopies_download_content_1_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 1, [])

    def test_twocopies_download_content_1_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 1, [(0, 0)])

    def test_twocopies_download_chunksize_bytes_without_broken_chunks(self):
        self._test_download("TWOCOPIES", self.chunk_size, [])

    def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content, _ = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_single_download_content_1_byte_without_broken_chunks(self):
        self._test_download("SINGLE", 1, [])

    def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_download("SINGLE", self.chunk_size * 2, [])
Exemple #17
0
class BlobRebuilderWorker(RebuilderWorker):
    def __init__(self, conf, logger, volume, try_chunk_delete=False, **kwargs):
        super(BlobRebuilderWorker, self).__init__(conf, logger, **kwargs)
        self.volume = volume
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.dry_run = true_value(conf.get('dry_run', False))
        self.allow_same_rawx = true_value(conf.get('allow_same_rawx'))
        self.rdir_client = RdirClient(conf, logger=self.logger)
        self.content_factory = ContentFactory(conf, logger=self.logger)
        self.try_chunk_delete = try_chunk_delete

    def _rebuild_one(self, chunk, **kwargs):
        cid, content_id, chunk_id_or_pos, _ = chunk
        if self.dry_run:
            self.dryrun_chunk_rebuild(cid, content_id, chunk_id_or_pos)
        else:
            self.safe_chunk_rebuild(cid, content_id, chunk_id_or_pos)

    def _get_report(self, num, start_time, end_time, total_time, report_time,
                    **kwargs):
        return ('RUN  %(volume)s '
                'worker=%(num)d '
                'started=%(start_time)s '
                'passes=%(passes)d '
                'errors=%(errors)d '
                'chunks=%(nb_chunks)d %(c_rate).2f/s '
                'bytes=%(nb_bytes)d %(b_rate).2fB/s '
                'waiting_time=%(waiting_time).2f '
                'rebuilder_time=%(rebuilder_time).2f '
                'total_time=%(total_time).2f '
                '(rebuilder: %(success_rate).2f%%)' % {
                    'volume':
                    self.volume,
                    'num':
                    num,
                    'start_time':
                    datetime.fromtimestamp(int(report_time)).isoformat(),
                    'passes':
                    self.passes,
                    'errors':
                    self.errors,
                    'nb_chunks':
                    self.total_items_processed,
                    'nb_bytes':
                    self.total_bytes_processed,
                    'c_rate':
                    self.passes / (end_time - report_time),
                    'b_rate':
                    self.bytes_processed / (end_time - report_time),
                    'waiting_time':
                    self.waiting_time,
                    'rebuilder_time':
                    self.rebuilder_time,
                    'total_time': (end_time - start_time),
                    'success_rate':
                    100 * ((self.total_items_processed - self.errors) /
                           float(self.total_items_processed))
                })

    def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id_or_pos):
        self.logger.info(
            "[dryrun] Rebuilding "
            "container %s, content %s, chunk %s", container_id, content_id,
            chunk_id_or_pos)
        self.passes += 1

    def safe_chunk_rebuild(self, container_id, content_id, chunk_id_or_pos):
        try:
            self.chunk_rebuild(container_id, content_id, chunk_id_or_pos)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while rebuilding chunk %s|%s|%s: %s',
                              container_id, content_id, chunk_id_or_pos, e)

        self.passes += 1

    def chunk_rebuild(self, container_id, content_id, chunk_id_or_pos):
        self.logger.info('Rebuilding (container %s, content %s, chunk %s)',
                         container_id, content_id, chunk_id_or_pos)
        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise OrphanChunk('Content not found: possible orphan chunk')

        chunk_size = 0
        chunk_pos = None
        if len(chunk_id_or_pos) < 32:
            chunk_pos = chunk_id_or_pos
            chunk_id = None
            metapos = int(chunk_pos.split('.', 1)[0])
            chunk_size = content.chunks.filter(metapos=metapos).all()[0].size
        else:
            if '/' in chunk_id_or_pos:
                chunk_id = chunk_id_or_pos.rsplit('/', 1)[-1]
            else:
                chunk_id = chunk_id_or_pos

            chunk = content.chunks.filter(id=chunk_id).one()
            if chunk is None:
                raise OrphanChunk(("Chunk not found in content:"
                                   'possible orphan chunk'))
            elif self.volume and chunk.host != self.volume:
                raise ValueError("Chunk does not belong to this volume")
            chunk_size = chunk.size

        content.rebuild_chunk(chunk_id,
                              allow_same_rawx=self.allow_same_rawx,
                              chunk_pos=chunk_pos)

        if self.try_chunk_delete:
            try:
                content.blob_client.chunk_delete(chunk.url)
                self.logger.info("Chunk %s deleted", chunk.url)
            except NotFound as exc:
                self.logger.debug("Chunk %s: %s", chunk.url, exc)

        # This call does not raise exception if chunk is not referenced
        if chunk_id is not None:
            self.rdir_client.chunk_delete(chunk.host, container_id, content_id,
                                          chunk_id)

        self.bytes_processed += chunk_size
        self.total_bytes_processed += chunk_size
Exemple #18
0
class TestContentFactory(BaseTestCase):
    def setUp(self):
        super(TestContentFactory, self).setUp()
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = "TestContentFactory%f" % time.time()
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestContentFactory, self).tearDown()

    def test_extract_datasec(self):
        self.content_factory.ns_info = {
            "data_security": {
                "DUPONETWO": "DUP:distance=1|nb_copy=2",
                "RAIN": "RAIN:k=6|m=2|algo=liber8tion"
            },
            "storage_policy": {
                "RAIN": "NONE:RAIN:NONE",
                "SINGLE": "NONE:NONE:NONE",
                "TWOCOPIES": "NONE:DUPONETWO:NONE"
            }
        }

        ds_type, ds_args = self.content_factory._extract_datasec("RAIN")
        self.assertEqual(ds_type, "RAIN")
        self.assertEqual(ds_args, {
            "k": "6",
            "m": "2",
            "algo": "liber8tion"
        })

        ds_type, ds_args = self.content_factory._extract_datasec("SINGLE")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {
            "nb_copy": "1",
            "distance": "0"
        })

        ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {
            "nb_copy": "2",
            "distance": "1"
        })

        self.assertRaises(InconsistentContent,
                          self.content_factory._extract_datasec,
                          "UnKnOwN")

    def test_get_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "RAIN",
            "version": "1450176946676289"
        }
        chunks = [
            {
                "url": "http://127.0.0.1:6012/A0A0",
                "pos": "0.p0", "size": 512,
                "hash": "E7D4E4AD460971CA2E3141F2102308D4"},
            {
                "url": "http://127.0.0.1:6010/A01",
                "pos": "0.1", "size": 146,
                "hash": "760AB5DA7C51A3654F1CA622687CD6C3"},
            {
                "url": "http://127.0.0.1:6011/A00",
                "pos": "0.0", "size": 512,
                "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"},
            {
                "url": "http://127.0.0.1:6013/A0A1",
                "pos": "0.p1", "size": 512,
                "hash": "DA9D7F72AEEA5791565724424CE45C16"}
        ]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[2])
        self.assertEqual(c.chunks[1].raw(), chunks[1])
        self.assertEqual(c.chunks[2].raw(), chunks[0])
        self.assertEqual(c.chunks[3].raw(), chunks[3])

    def test_get_dup(self):
        meta = {
            "chunk-method": "plain/bytes",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "TWOCOPIES",
            "version": "1450176946676289"
        }
        chunks = [
            {
                "url": "http://127.0.0.1:6010/A0",
                "pos": "0", "size": 658,
                "hash": "E952A419957A6E405BFC53EC65483F73"},
            {
                "url": "http://127.0.0.1:6011/A1",
                "pos": "0", "size": 658,
                "hash": "E952A419957A6E405BFC53EC65483F73"}
        ]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), DupContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.nb_copy, 2)
        self.assertEqual(c.distance, 1)
        self.assertEqual(len(c.chunks), 2)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])

    def test_new_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450341162",
            "deleted": "False",
            "hash": "",
            "hash-method": "md5",
            "id": "F4B1C8DD132705007DE8B43D0709DAA2",
            "length": "1000",
            "mime-type": "application/octet-stream",
            "name": "titi",
            "policy": "RAIN",
            "version": "1450341162332663"
        }
        chunks = [
            {
                "url": "http://127.0.0.1:6010/0_p1",
                "pos": "0.p1", "size": 1048576,
                "hash": "00000000000000000000000000000000"},
            {
                "url": "http://127.0.0.1:6011/0_p0",
                "pos": "0.p0", "size": 1048576,
                "hash": "00000000000000000000000000000000"},
            {
                "url": "http://127.0.0.1:6016/0_1",
                "pos": "0.1", "size": 1048576,
                "hash": "00000000000000000000000000000000"},
            {
                "url": "http://127.0.0.1:6017/0_0",
                "pos": "0.0", "size": 1048576,
                "hash": "00000000000000000000000000000000"}
        ]
        self.content_factory.container_client.content_prepare = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.new("xxx_container_id", "titi",
                                     1000, "RAIN")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2")
        self.assertEqual(c.length, 1000)
        self.assertEqual(c.path, "titi")
        self.assertEqual(c.version, "1450341162332663")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[3])
        self.assertEqual(c.chunks[1].raw(), chunks[2])
        self.assertEqual(c.chunks[2].raw(), chunks[1])
        self.assertEqual(c.chunks[3].raw(), chunks[0])

    def _new_content(self, stgpol, data):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), stgpol)
        old_content.upload(StringIO.StringIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_change_policy(self, data_size, old_policy, new_policy):
        if (old_policy == "RAIN" or new_policy == "RAIN") \
                and len(self.conf['rawx']) < 8:
            self.skipTest("RAIN: Need more than 8 rawx to run")

        data = random_data(data_size)
        obj_type = {
            "SINGLE": DupContent,
            "TWOCOPIES": DupContent,
            "THREECOPIES": DupContent,
            "RAIN": RainContent
        }
        old_content = self._new_content(old_policy, data)
        self.assertEqual(type(old_content), obj_type[old_policy])

        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, new_policy)

        self.assertRaises(NotFound, self.container_client.content_show,
                          self.account,
                          cid=old_content.container_id,
                          content=old_content.content_id)

        new_content = self.content_factory.get(self.container_id,
                                               changed_content.content_id)
        self.assertEqual(type(new_content), obj_type[new_policy])

        downloaded_data = "".join(new_content.download())

        self.assertEqual(downloaded_data, data)

    def test_change_content_0_byte_policy_single_to_rain(self):
        self._test_change_policy(0, "SINGLE", "RAIN")

    def test_change_content_0_byte_policy_rain_to_twocopies(self):
        self._test_change_policy(0, "RAIN", "TWOCOPIES")

    def test_change_content_1_byte_policy_single_to_rain(self):
        self._test_change_policy(1, "SINGLE", "RAIN")

    def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self):
        self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN")

    def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN")

    def test_change_content_1_byte_policy_rain_to_threecopies(self):
        self._test_change_policy(1, "RAIN", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_rain_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE")

    def test_change_content_0_byte_policy_twocopies_to_threecopies(self):
        self._test_change_policy(0, "TWOCOPIES", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_single_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE")

    def test_change_content_with_same_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, "TWOCOPIES")
        self.assertEqual(old_content.content_id, changed_content.content_id)

    def test_change_policy_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.change_policy,
                          self.container_id, "1234", "SINGLE")

    def test_change_policy_unknown_storage_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        self.assertRaises(ClientException, self.content_factory.change_policy,
                          self.container_id, old_content.content_id, "UnKnOwN")
Exemple #19
0
class BlobRebuilderWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.dry_run = true_value(
            conf.get('dry_run', False))
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        self.max_bytes_per_second = int_value(
            conf.get('bytes_per_second'), 10000000)
        self.rdir_fetch_limit = int_value(
            conf.get('rdir_fetch_limit'), 100)
        self.rdir_client = RdirClient(conf)
        self.content_factory = ContentFactory(conf)

    def rebuilder_pass_with_lock(self):
        self.rdir_client.admin_lock(self.volume,
                                    "rebuilder on %s" % gethostname())
        try:
            self.rebuilder_pass()
        finally:
            self.rdir_client.admin_unlock(self.volume)

    def rebuilder_pass(self):
        start_time = report_time = time.time()

        total_errors = 0
        rebuilder_time = 0

        chunks = self.rdir_client.chunk_fetch(self.volume,
                                              limit=self.rdir_fetch_limit,
                                              rebuild=True)
        for container_id, content_id, chunk_id, data in chunks:
            loop_time = time.time()

            if self.dry_run:
                self.dryrun_chunk_rebuild(container_id, content_id, chunk_id)
            else:
                self.safe_chunk_rebuild(container_id, content_id, chunk_id)

            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(rebuilder_time).2f'
                    '%(rebuilder_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'rebuilder_time': rebuilder_time,
                        'rebuilder_rate': rebuilder_time / (now - start_time)
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            rebuilder_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(rebuilder_time).2f '
            '%(rebuilder_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'rebuilder_time': rebuilder_time,
                'rebuilder_rate': rebuilder_time / elapsed
            }
        )

    def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info("[dryrun] Rebuilding "
                         "container %s, content %s, chunk %s",
                         container_id, content_id, chunk_id)
        self.passes += 1

    def safe_chunk_rebuild(self, container_id, content_id, chunk_id):
        try:
            self.chunk_rebuild(container_id, content_id, chunk_id)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while rebuilding chunk %s|%s|%s) : %s',
                              container_id, content_id, chunk_id, e)

        self.passes += 1

    def chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info('Rebuilding (container %s, content %s, chunk %s)',
                         container_id, content_id, chunk_id)

        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise exc.OrphanChunk('Content not found')

        chunk = content.chunks.filter(id=chunk_id).one()
        if chunk is None:
            raise OrphanChunk("Chunk not found in content")
        chunk_size = chunk.size

        content.rebuild_chunk(chunk_id)

        self.rdir_client.chunk_push(self.volume, container_id, content_id,
                                    chunk_id, rtime=int(time.time()))

        self.bytes_processed += chunk_size
        self.total_bytes_processed += chunk_size
Exemple #20
0
class BlobRebuilderWorker(RebuilderWorker):
    def __init__(self, rebuilder, try_chunk_delete=False, **kwargs):
        super(BlobRebuilderWorker, self).__init__(rebuilder, **kwargs)
        self.dry_run = true_value(self.rebuilder.conf.get('dry_run', False))
        self.allow_same_rawx = true_value(
            self.rebuilder.conf.get('allow_same_rawx'))
        self.try_chunk_delete = try_chunk_delete
        self.rdir_client = self.rebuilder.rdir_client
        self.content_factory = ContentFactory(self.rebuilder.conf,
                                              logger=self.logger)
        self.sender = None

    def _rebuild_one(self, chunk, **kwargs):
        container_id, content_id, chunk_id_or_pos, _ = chunk
        if self.dry_run:
            self.dryrun_chunk_rebuild(container_id, content_id,
                                      chunk_id_or_pos, **kwargs)
            return 0
        else:
            return self.chunk_rebuild(container_id, content_id,
                                      chunk_id_or_pos, **kwargs)

    def update_processed(self, chunk, bytes_processed, error=None, **kwargs):
        container_id, content_id, chunk_id_or_pos, more = chunk
        if more is not None:
            reply = more.get('reply', None)
            if reply is not None:
                event = {
                    'rebuilder_id': reply['rebuilder_id'],
                    'beanstalkd': self.rebuilder.beanstalkd_listener.addr,
                    'cid': container_id,
                    'content_id': content_id,
                    'chunk_id_or_pos': chunk_id_or_pos
                }
                if error is not None:
                    event['error'] = error
                if bytes_processed is not None:
                    event['bytes_processed'] = bytes_processed
                try:
                    if self.sender is None:
                        self.sender = BeanstalkdSender(reply['addr'],
                                                       reply['tube'],
                                                       self.logger, **kwargs)
                    elif self.sender.addr != reply['addr'] \
                            or self.sender.addr != reply['tube']:
                        self.sender.close()
                        self.sender = BeanstalkdSender(reply['addr'],
                                                       reply['tube'],
                                                       self.logger, **kwargs)

                    self.sender.send_event(json.dumps(event))
                except BeanstalkError as exc:
                    self.logger.warn(
                        'reply failed %s: %s',
                        self.rebuilder._item_to_string(chunk, **kwargs), exc)

        super(BlobRebuilderWorker, self).update_processed(chunk,
                                                          bytes_processed,
                                                          error=error,
                                                          **kwargs)

    def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id_or_pos,
                             **kwargs):
        self.logger.info(
            "[dryrun] Rebuilding "
            "container %s, content %s, chunk %s", container_id, content_id,
            chunk_id_or_pos)

    def chunk_rebuild(self, container_id, content_id, chunk_id_or_pos,
                      **kwargs):
        self.logger.info('Rebuilding (container %s, content %s, chunk %s)',
                         container_id, content_id, chunk_id_or_pos)
        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise OrphanChunk('Content not found: possible orphan chunk')

        chunk_size = 0
        chunk_pos = None
        if len(chunk_id_or_pos) < 32:
            chunk_pos = chunk_id_or_pos
            chunk_id = None
            metapos = int(chunk_pos.split('.', 1)[0])
            chunk_size = content.chunks.filter(metapos=metapos).all()[0].size
        else:
            if '/' in chunk_id_or_pos:
                chunk_id = chunk_id_or_pos.rsplit('/', 1)[-1]
            else:
                chunk_id = chunk_id_or_pos

            chunk = content.chunks.filter(id=chunk_id).one()
            if chunk is None:
                raise OrphanChunk(("Chunk not found in content:"
                                   'possible orphan chunk'))
            elif self.volume and chunk.host != self.volume:
                raise ValueError("Chunk does not belong to this volume")
            chunk_size = chunk.size

        content.rebuild_chunk(chunk_id,
                              allow_same_rawx=self.allow_same_rawx,
                              chunk_pos=chunk_pos)

        if self.try_chunk_delete:
            try:
                content.blob_client.chunk_delete(chunk.url, **kwargs)
                self.logger.info("Chunk %s deleted", chunk.url)
            except NotFound as exc:
                self.logger.debug("Chunk %s: %s", chunk.url, exc)

        # This call does not raise exception if chunk is not referenced
        if chunk_id is not None:
            self.rdir_client.chunk_delete(chunk.host, container_id, content_id,
                                          chunk_id, **kwargs)

        return chunk_size
Exemple #21
0
class BlobMoverWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.last_usage_check = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.usage_target = int_value(
            conf.get('usage_target'), 0)
        self.usage_check_interval = int_value(
            conf.get('usage_check_interval'), 3600)
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        self.max_bytes_per_second = int_value(
            conf.get('bytes_per_second'), 10000000)
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(conf)
        self.content_factory = ContentFactory(conf)

    def mover_pass(self):
        self.namespace, self.address = check_volume(self.volume)

        start_time = report_time = time.time()

        total_errors = 0
        mover_time = 0

        paths = paths_gen(self.volume)

        for path in paths:
            loop_time = time.time()

            now = time.time()
            if now - self.last_usage_check >= self.usage_check_interval:
                used, total = statfs(self.volume)
                usage = (float(used) / total) * 100
                if usage <= self.usage_target:
                    self.logger.info(
                        'current usage %.2f%%: target reached (%.2f%%)', usage,
                        self.usage_target)
                    self.last_usage_check = now
                    break

            self.safe_chunk_move(path)
            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(mover_time).2f'
                    '%(mover_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'mover_time': mover_time,
                        'mover_rate': mover_time / (now - start_time)
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            mover_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(mover_time).2f '
            '%(mover_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'mover_time': mover_time,
                'mover_rate': mover_time / elapsed
            }
        )

    def safe_chunk_move(self, path):
        try:
            self.chunk_move(path)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while moving chunk %s: %s', path, e)
        self.passes += 1

    def load_chunk_metadata(self, path):
        with open(path) as f:
            return read_chunk_metadata(f)

    def chunk_move(self, path):
        meta = self.load_chunk_metadata(path)
        content_cid = meta['content_cid']
        content_id = meta['content_id']
        chunk_id = meta['chunk_id']
        chunk_url = 'http://%s/%s' % (self.address, meta['chunk_id'])

        try:
            content = self.content_factory.get(content_cid, content_id)
        except ContentNotFound:
            raise exc.OrphanChunk('Content not found')

        new_chunk = content.move_chunk(chunk_id)

        self.logger.info(
            'moved chunk %s to %s', chunk_url, new_chunk['url'])
Exemple #22
0
class BlobMoverWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.last_usage_check = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.usage_target = int_value(conf.get("usage_target"), 0)
        self.usage_check_interval = int_value(conf.get("usage_check_interval"), 3600)
        self.report_interval = int_value(conf.get("report_interval"), 3600)
        self.max_chunks_per_second = int_value(conf.get("chunks_per_second"), 30)
        self.max_bytes_per_second = int_value(conf.get("bytes_per_second"), 10000000)
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(conf)
        self.content_factory = ContentFactory(conf)

    def mover_pass(self):
        self.namespace, self.address = check_volume(self.volume)

        start_time = report_time = time.time()

        total_errors = 0
        mover_time = 0

        paths = paths_gen(self.volume)

        for path in paths:
            loop_time = time.time()

            now = time.time()
            if now - self.last_usage_check >= self.usage_check_interval:
                used, total = statfs(self.volume)
                usage = (float(used) / total) * 100
                if usage <= self.usage_target:
                    self.logger.info("current usage %.2f%%: target reached (%.2f%%)", usage, self.usage_target)
                    self.last_usage_check = now
                    break

            self.safe_chunk_move(path)
            self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second)
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    "%(start_time)s "
                    "%(passes)d "
                    "%(errors)d "
                    "%(c_rate).2f "
                    "%(b_rate).2f "
                    "%(total).2f "
                    "%(mover_time).2f"
                    "%(mover_rate).2f"
                    % {
                        "start_time": time.ctime(report_time),
                        "passes": self.passes,
                        "errors": self.errors,
                        "c_rate": self.passes / (now - report_time),
                        "b_rate": self.bytes_processed / (now - report_time),
                        "total": (now - start_time),
                        "mover_time": mover_time,
                        "mover_rate": mover_time / (now - start_time),
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            mover_time += now - loop_time
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            "%(elapsed).02f "
            "%(errors)d "
            "%(chunk_rate).2f "
            "%(bytes_rate).2f "
            "%(mover_time).2f "
            "%(mover_rate).2f"
            % {
                "elapsed": elapsed,
                "errors": total_errors + self.errors,
                "chunk_rate": self.total_chunks_processed / elapsed,
                "bytes_rate": self.total_bytes_processed / elapsed,
                "mover_time": mover_time,
                "mover_rate": mover_time / elapsed,
            }
        )

    def safe_chunk_move(self, path):
        try:
            self.chunk_move(path)
        except Exception as e:
            self.errors += 1
            self.logger.error("ERROR while moving chunk %s: %s", path, e)
        self.passes += 1

    def load_chunk_metadata(self, path):
        with open(path) as f:
            return read_chunk_metadata(f)

    def chunk_move(self, path):
        meta = self.load_chunk_metadata(path)
        container_id = meta["container_id"]
        content_id = meta["content_id"]
        chunk_id = meta["chunk_id"]
        chunk_url = "http://%s/%s" % (self.address, meta["chunk_id"])

        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise exc.OrphanChunk("Content not found")

        new_chunk = content.move_chunk(chunk_id)

        self.logger.info("moved chunk %s to %s", chunk_url, new_chunk["url"])
Exemple #23
0
class BlobMoverWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.namespace, self.address = check_volume(self.volume)
        self.running = False
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.last_usage_check = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.concurrency = int_value(conf.get('concurrency'), 10)
        self.usage_target = int_value(conf.get('usage_target'), 0)
        self.usage_check_interval = int_value(conf.get('usage_check_interval'),
                                              60)
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        self.limit = int_value(conf.get('limit'), 0)
        self.allow_links = true_value(conf.get('allow_links', True))
        self.blob_client = BlobClient(conf)
        self.container_client = ContainerClient(conf, logger=self.logger)
        self.content_factory = ContentFactory(
            conf,
            container_client=self.container_client,
            blob_client=self.blob_client)
        self.excluded_rawx = \
            [rawx for rawx in conf.get('excluded_rawx', '').split(',') if rawx]
        self.fake_excluded_chunks = self._generate_fake_excluded_chunks()

    def _generate_fake_excluded_chunks(self):
        conscience_client = ConscienceClient(self.conf, logger=self.logger)
        fake_excluded_chunks = list()
        fake_chunk_id = '0' * 64
        for service_id in self.excluded_rawx:
            service_addr = conscience_client.resolve_service_id(
                'rawx', service_id)
            chunk = dict()
            chunk['hash'] = '0000000000000000000000000000000000'
            chunk['pos'] = '0'
            chunk['size'] = 1
            chunk['score'] = 1
            chunk['url'] = 'http://' + service_id + '/' + fake_chunk_id
            chunk['real_url'] = 'http://' + service_addr + '/' + fake_chunk_id
            fake_excluded_chunks.append(chunk)
        return fake_excluded_chunks

    def mover_pass(self, **kwargs):
        start_time = report_time = time.time()

        total_errors = 0
        mover_time = 0

        pool = GreenPool(self.concurrency)

        paths = paths_gen(self.volume)

        for path in paths:
            loop_time = time.time()

            now = time.time()
            if now - self.last_usage_check >= self.usage_check_interval:
                free_ratio = statfs(self.volume)
                usage = (1 - float(free_ratio)) * 100
                if usage <= self.usage_target:
                    self.logger.info(
                        'current usage %.2f%%: target reached (%.2f%%)', usage,
                        self.usage_target)
                    break
                self.last_usage_check = now

            # Spawn a chunk move task.
            # The call will block if no green thread is available.
            pool.spawn_n(self.safe_chunk_move, path)

            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(mover_time).2f'
                    '%(mover_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'mover_time': mover_time,
                        'mover_rate': mover_time / (now - start_time)
                    })
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            mover_time += (now - loop_time)
            if self.limit != 0 and self.total_chunks_processed >= self.limit:
                break
        pool.waitall()
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(mover_time).2f '
            '%(mover_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'mover_time': mover_time,
                'mover_rate': mover_time / elapsed
            })

    def safe_chunk_move(self, path):
        chunk_id = path.rsplit('/', 1)[-1]
        if len(chunk_id) != STRLEN_CHUNKID:
            self.logger.warn('WARN Not a chunk %s' % path)
            return
        for char in chunk_id:
            if char not in hexdigits:
                self.logger.warn('WARN Not a chunk %s' % path)
                return
        try:
            self.chunk_move(path, chunk_id)
        except Exception as err:
            self.errors += 1
            self.logger.error('ERROR while moving chunk %s: %s', path, err)
        self.passes += 1

    def load_chunk_metadata(self, path, chunk_id):
        with open(path) as file_:
            meta, _ = read_chunk_metadata(file_, chunk_id)
            return meta

    def chunk_move(self, path, chunk_id):
        meta = self.load_chunk_metadata(path, chunk_id)
        container_id = meta['container_id']
        content_id = meta['content_id']
        chunk_id = meta['chunk_id']

        # Maybe skip the chunk because it doesn't match the size constaint
        chunk_size = int(meta['chunk_size'])
        min_chunk_size = int(self.conf.get('min_chunk_size', 0))
        max_chunk_size = int(self.conf.get('max_chunk_size', 0))
        if chunk_size < min_chunk_size:
            self.logger.debug("SKIP %s too small", path)
            return
        if max_chunk_size > 0 and chunk_size > max_chunk_size:
            self.logger.debug("SKIP %s too big", path)
            return

        # Start moving the chunk
        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise exc.OrphanChunk('Content not found')

        new_chunk = content.move_chunk(
            chunk_id, fake_excluded_chunks=self.fake_excluded_chunks)

        self.logger.info('moved chunk http://%s/%s to %s', self.address,
                         chunk_id, new_chunk['url'])

        if self.allow_links:
            old_links = meta['links']
            for chunk_id, fullpath in old_links.items():
                # pylint: disable=unbalanced-tuple-unpacking
                account, container, _, _, content_id = \
                    decode_fullpath(fullpath)
                container_id = cid_from_name(account, container)

                try:
                    content = self.content_factory.get(container_id,
                                                       content_id)
                except ContentNotFound:
                    raise exc.OrphanChunk('Content not found')

                new_linked_chunk = content.move_linked_chunk(
                    chunk_id, new_chunk['url'])

                self.logger.info('moved chunk http://%s/%s to %s',
                                 self.address, chunk_id,
                                 new_linked_chunk['url'])
Exemple #24
0
class TestContentFactory(BaseTestCase):
    def setUp(self):
        super(TestContentFactory, self).setUp()
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = "TestContentFactory%f" % time.time()
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestContentFactory, self).tearDown()

    def test_extract_datasec(self):
        self.content_factory.ns_info = {
            "data_security": {
                "DUPONETWO": "DUP:distance=1|nb_copy=2",
                "RAIN": "RAIN:k=6|m=2|algo=liber8tion"
            },
            "storage_policy": {
                "RAIN": "NONE:RAIN:NONE",
                "SINGLE": "NONE:NONE:NONE",
                "TWOCOPIES": "NONE:DUPONETWO:NONE"
            }
        }

        ds_type, ds_args = self.content_factory._extract_datasec("RAIN")
        self.assertEqual(ds_type, "RAIN")
        self.assertEqual(ds_args, {"k": "6", "m": "2", "algo": "liber8tion"})

        ds_type, ds_args = self.content_factory._extract_datasec("SINGLE")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {"nb_copy": "1", "distance": "0"})

        ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {"nb_copy": "2", "distance": "1"})

        self.assertRaises(InconsistentContent,
                          self.content_factory._extract_datasec, "UnKnOwN")

    def test_get_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "RAIN",
            "version": "1450176946676289"
        }
        chunks = [{
            "url": "http://127.0.0.1:6012/A0A0",
            "pos": "0.p0",
            "size": 512,
            "hash": "E7D4E4AD460971CA2E3141F2102308D4"
        }, {
            "url": "http://127.0.0.1:6010/A01",
            "pos": "0.1",
            "size": 146,
            "hash": "760AB5DA7C51A3654F1CA622687CD6C3"
        }, {
            "url": "http://127.0.0.1:6011/A00",
            "pos": "0.0",
            "size": 512,
            "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"
        }, {
            "url": "http://127.0.0.1:6013/A0A1",
            "pos": "0.p1",
            "size": 512,
            "hash": "DA9D7F72AEEA5791565724424CE45C16"
        }]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[2])
        self.assertEqual(c.chunks[1].raw(), chunks[1])
        self.assertEqual(c.chunks[2].raw(), chunks[0])
        self.assertEqual(c.chunks[3].raw(), chunks[3])

    def test_get_dup(self):
        meta = {
            "chunk-method": "plain/bytes",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "TWOCOPIES",
            "version": "1450176946676289"
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/A0",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }, {
            "url": "http://127.0.0.1:6011/A1",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), DupContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.nb_copy, 2)
        self.assertEqual(c.distance, 1)
        self.assertEqual(len(c.chunks), 2)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])

    def test_get_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.get,
                          self.container_id, "1234")

    def test_new_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450341162",
            "deleted": "False",
            "hash": "",
            "hash-method": "md5",
            "id": "F4B1C8DD132705007DE8B43D0709DAA2",
            "length": "1000",
            "mime-type": "application/octet-stream",
            "name": "titi",
            "policy": "RAIN",
            "version": "1450341162332663"
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/0_p1",
            "pos": "0.p1",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6011/0_p0",
            "pos": "0.p0",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6016/0_1",
            "pos": "0.1",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6017/0_0",
            "pos": "0.0",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }]
        self.content_factory.container_client.content_prepare = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.new("xxx_container_id", "titi", 1000, "RAIN")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2")
        self.assertEqual(c.length, 1000)
        self.assertEqual(c.path, "titi")
        self.assertEqual(c.version, "1450341162332663")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[3])
        self.assertEqual(c.chunks[1].raw(), chunks[2])
        self.assertEqual(c.chunks[2].raw(), chunks[1])
        self.assertEqual(c.chunks[3].raw(), chunks[0])

    def _new_content(self, stgpol, data, path="titi"):
        old_content = self.content_factory.new(self.container_id, path,
                                               len(data), stgpol)
        old_content.upload(StringIO.StringIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_change_policy(self, data_size, old_policy, new_policy):
        if (old_policy == "RAIN" or new_policy == "RAIN") \
                and len(self.conf['rawx']) < 8:
            self.skipTest("RAIN: Need more than 8 rawx to run")

        data = random_data(data_size)
        obj_type = {
            "SINGLE": DupContent,
            "TWOCOPIES": DupContent,
            "THREECOPIES": DupContent,
            "RAIN": RainContent
        }
        old_content = self._new_content(old_policy, data)
        self.assertEqual(type(old_content), obj_type[old_policy])

        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, new_policy)

        self.assertRaises(NotFound,
                          self.container_client.content_show,
                          self.account,
                          cid=old_content.container_id,
                          content=old_content.content_id)

        new_content = self.content_factory.get(self.container_id,
                                               changed_content.content_id)
        self.assertEqual(type(new_content), obj_type[new_policy])

        downloaded_data = "".join(new_content.download())

        self.assertEqual(downloaded_data, data)

    def test_change_content_0_byte_policy_single_to_rain(self):
        self._test_change_policy(0, "SINGLE", "RAIN")

    def test_change_content_0_byte_policy_rain_to_twocopies(self):
        self._test_change_policy(0, "RAIN", "TWOCOPIES")

    def test_change_content_1_byte_policy_single_to_rain(self):
        self._test_change_policy(1, "SINGLE", "RAIN")

    def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self):
        self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN")

    def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN")

    def test_change_content_1_byte_policy_rain_to_threecopies(self):
        self._test_change_policy(1, "RAIN", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_rain_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE")

    def test_change_content_0_byte_policy_twocopies_to_threecopies(self):
        self._test_change_policy(0, "TWOCOPIES", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_single_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE")

    def test_change_content_with_same_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, "TWOCOPIES")
        self.assertEqual(old_content.content_id, changed_content.content_id)

    def test_change_policy_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.change_policy,
                          self.container_id, "1234", "SINGLE")

    def test_change_policy_unknown_storage_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        self.assertRaises(ClientException, self.content_factory.change_policy,
                          self.container_id, old_content.content_id, "UnKnOwN")

    def _test_move_chunk(self, policy):
        data = random_data(self.chunk_size)
        content = self._new_content(policy, data)

        chunk_id = content.chunks.filter(metapos=0)[0].id
        chunk_url = content.chunks.filter(metapos=0)[0].url
        chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url)
        chunk_hash = md5_stream(chunk_stream)
        new_chunk = content.move_chunk(chunk_id)

        content_updated = self.content_factory.get(self.container_id,
                                                   content.content_id)

        hosts = []
        for c in content_updated.chunks.filter(metapos=0):
            self.assertThat(hosts, Not(Contains(c.host)))
            self.assertNotEquals(c.id, chunk_id)
            hosts.append(c.host)

        new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk["url"])
        new_chunk_hash = md5_stream(new_chunk_stream)

        self.assertEqual(new_chunk_hash, chunk_hash)

        del chunk_meta["chunk_id"]
        del new_chunk_meta["chunk_id"]
        self.assertEqual(new_chunk_meta, chunk_meta)

    def test_single_move_chunk(self):
        self._test_move_chunk("SINGLE")

    def test_twocopies_move_chunk(self):
        self._test_move_chunk("TWOCOPIES")

    def test_rain_move_chunk(self):
        if len(self.conf['rawx']) < 9:
            self.skipTest("Need more than 8 rawx")
        self._test_move_chunk("RAIN")

    def test_move_chunk_not_in_content(self):
        data = random_data(self.chunk_size)
        content = self._new_content("TWOCOPIES", data)
        with ExpectedException(OrphanChunk):
            content.move_chunk("1234")

    def test_strange_paths(self):
        for cname in (
                "Annual report.txt",
                "foo+bar=foobar.txt",
                "100%_bug_free.c",
                "forward/slash/allowed",
                "I\\put\\backslashes\\and$dollar$signs$in$file$names",
                "Je suis tombé sur la tête, mais ça va bien.",
                "%s%f%u%d%%",
                "carriage\rreturn",
                "line\nfeed",
                "ta\tbu\tla\ttion",
                "controlchars",
        ):
            content = self._new_content("SINGLE", "nobody cares", cname)
            try:
                self.assertEqual(cname, content.path)
            finally:
                pass  # TODO: delete the content
Exemple #25
0
class TestContentFactory(BaseTestCase):
    def setUp(self):
        super(TestContentFactory, self).setUp()
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = "TestContentFactory%f" % time.time()
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestContentFactory, self).tearDown()

    def test_extract_datasec(self):
        self.content_factory.ns_info = {
            "data_security": {
                "DUPONETWO": "DUP:distance=1|nb_copy=2",
                "RAIN": "RAIN:k=6|m=2|algo=liber8tion"
            },
            "storage_policy": {
                "RAIN": "NONE:RAIN:NONE",
                "SINGLE": "NONE:NONE:NONE",
                "TWOCOPIES": "NONE:DUPONETWO:NONE"
            }
        }

        ds_type, ds_args = self.content_factory._extract_datasec("RAIN")
        self.assertEqual(ds_type, "RAIN")
        self.assertEqual(ds_args, {
            "k": "6",
            "m": "2",
            "algo": "liber8tion"
        })

        ds_type, ds_args = self.content_factory._extract_datasec("SINGLE")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {
            "nb_copy": "1",
            "distance": "0"
        })

        ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {
            "nb_copy": "2",
            "distance": "1"
        })

        self.assertRaises(InconsistentContent,
                          self.content_factory._extract_datasec,
                          "UnKnOwN")

    def test_get_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "RAIN",
            "version": "1450176946676289"
        }
        chunks = [
            {
                "url": "http://127.0.0.1:6012/A0A0",
                "pos": "0.p0", "size": 512,
                "hash": "E7D4E4AD460971CA2E3141F2102308D4"},
            {
                "url": "http://127.0.0.1:6010/A01",
                "pos": "0.1", "size": 146,
                "hash": "760AB5DA7C51A3654F1CA622687CD6C3"},
            {
                "url": "http://127.0.0.1:6011/A00",
                "pos": "0.0", "size": 512,
                "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"},
            {
                "url": "http://127.0.0.1:6013/A0A1",
                "pos": "0.p1", "size": 512,
                "hash": "DA9D7F72AEEA5791565724424CE45C16"}
        ]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[2])
        self.assertEqual(c.chunks[1].raw(), chunks[1])
        self.assertEqual(c.chunks[2].raw(), chunks[0])
        self.assertEqual(c.chunks[3].raw(), chunks[3])

    def test_get_dup(self):
        meta = {
            "chunk-method": "plain/bytes",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "TWOCOPIES",
            "version": "1450176946676289"
        }
        chunks = [
            {
                "url": "http://127.0.0.1:6010/A0",
                "pos": "0", "size": 658,
                "hash": "E952A419957A6E405BFC53EC65483F73"},
            {
                "url": "http://127.0.0.1:6011/A1",
                "pos": "0", "size": 658,
                "hash": "E952A419957A6E405BFC53EC65483F73"}
        ]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), DupContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.nb_copy, 2)
        self.assertEqual(c.distance, 1)
        self.assertEqual(len(c.chunks), 2)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])

    def test_get_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.get,
                          self.container_id, "1234")

    def test_new_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450341162",
            "deleted": "False",
            "hash": "",
            "hash-method": "md5",
            "id": "F4B1C8DD132705007DE8B43D0709DAA2",
            "length": "1000",
            "mime-type": "application/octet-stream",
            "name": "titi",
            "policy": "RAIN",
            "version": "1450341162332663"
        }
        chunks = [
            {
                "url": "http://127.0.0.1:6010/0_p1",
                "pos": "0.p1", "size": 1048576,
                "hash": "00000000000000000000000000000000"},
            {
                "url": "http://127.0.0.1:6011/0_p0",
                "pos": "0.p0", "size": 1048576,
                "hash": "00000000000000000000000000000000"},
            {
                "url": "http://127.0.0.1:6016/0_1",
                "pos": "0.1", "size": 1048576,
                "hash": "00000000000000000000000000000000"},
            {
                "url": "http://127.0.0.1:6017/0_0",
                "pos": "0.0", "size": 1048576,
                "hash": "00000000000000000000000000000000"}
        ]
        self.content_factory.container_client.content_prepare = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.new("xxx_container_id", "titi",
                                     1000, "RAIN")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2")
        self.assertEqual(c.length, 1000)
        self.assertEqual(c.path, "titi")
        self.assertEqual(c.version, "1450341162332663")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[3])
        self.assertEqual(c.chunks[1].raw(), chunks[2])
        self.assertEqual(c.chunks[2].raw(), chunks[1])
        self.assertEqual(c.chunks[3].raw(), chunks[0])

    def _new_content(self, stgpol, data, path="titi"):
        old_content = self.content_factory.new(self.container_id, path,
                                               len(data), stgpol)
        old_content.upload(StringIO.StringIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_change_policy(self, data_size, old_policy, new_policy):
        if (old_policy == "RAIN" or new_policy == "RAIN") \
                and len(self.conf['rawx']) < 8:
            self.skipTest("RAIN: Need more than 8 rawx to run")

        data = random_data(data_size)
        obj_type = {
            "SINGLE": DupContent,
            "TWOCOPIES": DupContent,
            "THREECOPIES": DupContent,
            "RAIN": RainContent
        }
        old_content = self._new_content(old_policy, data)
        self.assertEqual(type(old_content), obj_type[old_policy])

        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, new_policy)

        self.assertRaises(NotFound, self.container_client.content_show,
                          self.account,
                          cid=old_content.container_id,
                          content=old_content.content_id)

        new_content = self.content_factory.get(self.container_id,
                                               changed_content.content_id)
        self.assertEqual(type(new_content), obj_type[new_policy])

        downloaded_data = "".join(new_content.download())

        self.assertEqual(downloaded_data, data)

    def test_change_content_0_byte_policy_single_to_rain(self):
        self._test_change_policy(0, "SINGLE", "RAIN")

    def test_change_content_0_byte_policy_rain_to_twocopies(self):
        self._test_change_policy(0, "RAIN", "TWOCOPIES")

    def test_change_content_1_byte_policy_single_to_rain(self):
        self._test_change_policy(1, "SINGLE", "RAIN")

    def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self):
        self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN")

    def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN")

    def test_change_content_1_byte_policy_rain_to_threecopies(self):
        self._test_change_policy(1, "RAIN", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_rain_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE")

    def test_change_content_0_byte_policy_twocopies_to_threecopies(self):
        self._test_change_policy(0, "TWOCOPIES", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_single_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE")

    def test_change_content_with_same_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, "TWOCOPIES")
        self.assertEqual(old_content.content_id, changed_content.content_id)

    def test_change_policy_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.change_policy,
                          self.container_id, "1234", "SINGLE")

    def test_change_policy_unknown_storage_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        self.assertRaises(ClientException, self.content_factory.change_policy,
                          self.container_id, old_content.content_id, "UnKnOwN")

    def _test_move_chunk(self, policy):
        data = random_data(self.chunk_size)
        content = self._new_content(policy, data)

        chunk_id = content.chunks.filter(metapos=0)[0].id
        chunk_url = content.chunks.filter(metapos=0)[0].url
        chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url)
        chunk_hash = md5_stream(chunk_stream)
        new_chunk = content.move_chunk(chunk_id)

        content_updated = self.content_factory.get(self.container_id,
                                                   content.content_id)

        hosts = []
        for c in content_updated.chunks.filter(metapos=0):
            self.assertThat(hosts, Not(Contains(c.host)))
            self.assertNotEquals(c.id, chunk_id)
            hosts.append(c.host)

        new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk["url"])
        new_chunk_hash = md5_stream(new_chunk_stream)

        self.assertEqual(new_chunk_hash, chunk_hash)

        del chunk_meta["chunk_id"]
        del new_chunk_meta["chunk_id"]
        self.assertEqual(new_chunk_meta, chunk_meta)

    def test_single_move_chunk(self):
        self._test_move_chunk("SINGLE")

    def test_twocopies_move_chunk(self):
        self._test_move_chunk("TWOCOPIES")

    def test_rain_move_chunk(self):
        if len(self.conf['rawx']) < 9:
            self.skipTest("Need more than 8 rawx")
        self._test_move_chunk("RAIN")

    def test_move_chunk_not_in_content(self):
        data = random_data(self.chunk_size)
        content = self._new_content("TWOCOPIES", data)
        with ExpectedException(OrphanChunk):
            content.move_chunk("1234")

    def test_strange_paths(self):
        strange_paths = [
                "Annual report.txt",
                "foo+bar=foobar.txt",
                "100%_bug_free.c",
                "forward/slash/allowed",
                "I\\put\\backslashes\\and$dollar$signs$in$file$names",
                "Je suis tombé sur la tête, mais ça va bien.",
                "%s%f%u%d%%",
                "carriage\rreturn",
                "line\nfeed",
                "ta\tbu\tla\ttion",
                "controlchars",
                ]
        answers = dict()
        for cname in strange_paths:
            content = self._new_content("SINGLE", "nobody cares", cname)
            answers[cname] = content
        listing = self.container_client.container_list(self.account,
                                                       self.container_name)
        obj_set = {k["name"].encode("utf8", "ignore")
                   for k in listing["objects"]}
        try:
            # Ensure the saved path is the one we gave the object
            for cname in answers:
                self.assertEqual(cname, answers[cname].path)
            # Ensure all objects appear in listing
            for cname in strange_paths:
                self.assertIn(cname, obj_set)
        finally:
            # Cleanup
            for cname in answers:
                try:
                    content.delete()
                except:
                    pass
Exemple #26
0
class ChunkOperator(object):
    """
    Execute maintenance operations on chunks.
    """
    def __init__(self, conf, logger=None):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.rdir_client = RdirClient(conf, logger=self.logger)
        self.content_factory = ContentFactory(conf, logger=self.logger)

    def rebuild(self,
                container_id,
                content_id,
                chunk_id_or_pos,
                rawx_id=None,
                try_chunk_delete=False,
                allow_frozen_container=True,
                allow_same_rawx=True):
        """
        Try to find the chunk in the metadata of the specified object,
        then rebuild it.
        """
        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise OrphanChunk('Content not found: possible orphan chunk')

        chunk_pos = None
        if looks_like_chunk_position(chunk_id_or_pos):
            chunk_pos = chunk_id_or_pos
            chunk_id = None
        else:
            if '/' in chunk_id_or_pos:
                parsed = urlparse(chunk_id_or_pos)
                chunk_id = parsed.path.lstrip('/')
                rawx_id = parsed.netloc
            else:
                chunk_id = chunk_id_or_pos

            candidates = content.chunks.filter(id=chunk_id)
            # FIXME(FVE): if for some reason the chunks have been registered
            # with an IP address and port instead of an ID, this won't work.
            if rawx_id:
                candidates = candidates.filter(host=rawx_id)
            chunk = candidates.one()
            if chunk is None:
                raise OrphanChunk(
                    'Chunk not found in content: possible orphan chunk: ' +
                    '%s' % (candidates.all(), ))
            elif rawx_id and chunk.host != rawx_id:
                raise ValueError('Chunk does not belong to this rawx')

        rebuilt_bytes = content.rebuild_chunk(
            chunk_id,
            service_id=rawx_id,
            allow_frozen_container=allow_frozen_container,
            allow_same_rawx=allow_same_rawx,
            chunk_pos=chunk_pos)

        if try_chunk_delete:
            try:
                content.blob_client.chunk_delete(chunk.url)
                self.logger.info("Old chunk %s deleted", chunk.url)
            except Exception as exc:
                self.logger.warn('Failed to delete old chunk %s: %s',
                                 chunk.url, exc)

        # This call does not raise exception if chunk is not referenced
        if chunk_id is not None:
            try:
                self.rdir_client.chunk_delete(chunk.host, container_id,
                                              content_id, chunk_id)
            except Exception as exc:
                self.logger.warn(
                    'Failed to delete chunk entry (%s) from the rdir (%s): %s',
                    chunk_id, chunk.host, exc)

        return rebuilt_bytes
Exemple #27
0
class TestContentFactory(BaseTestCase):
    def setUp(self):
        super(TestContentFactory, self).setUp()

        self.wait_for_score(('meta2', ))
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = "TestContentFactory%f" % time.time()
        self.blob_client = BlobClient(conf=self.conf)
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.stgpol = "SINGLE"
        self.stgpol_twocopies = "TWOCOPIES"
        self.stgpol_threecopies = "THREECOPIES"
        self.stgpol_ec = "EC"

    def tearDown(self):
        super(TestContentFactory, self).tearDown()

    def test_get_ec(self):
        meta = {
            "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash_method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime_type": "application/octet-stream",
            "name": "tox.ini",
            "policy": self.stgpol_ec,
            "version": "1450176946676289",
            "oio_version": "4.2",
        }
        chunks = [{
            "url": "http://127.0.0.1:6012/A0A0",
            "pos": "0.0",
            "size": 512,
            "hash": "E7D4E4AD460971CA2E3141F2102308D4"
        }, {
            "url": "http://127.0.0.1:6010/A01",
            "pos": "0.1",
            "size": 146,
            "hash": "760AB5DA7C51A3654F1CA622687CD6C3"
        }, {
            "url": "http://127.0.0.1:6011/A00",
            "pos": "0.2",
            "size": 512,
            "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"
        }, {
            "url": "http://127.0.0.1:6013/A0A1",
            "pos": "0.3",
            "size": 512,
            "hash": "DA9D7F72AEEA5791565724424CE45C16"
        }]
        self.content_factory.container_client.content_locate = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id",
                                     "xxx_content_id",
                                     account=self.account,
                                     container_name=self.container_name)
        self.assertEqual(type(c), ECContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(
            c.full_path,
            encode_fullpath(self.account, self.container_name, "tox.ini",
                            meta['version'], meta['id']))
        self.assertEqual(c.version, "1450176946676289")
        # TODO test storage method
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])
        self.assertEqual(c.chunks[2].raw(), chunks[2])
        self.assertEqual(c.chunks[3].raw(), chunks[3])

    def test_get_plain(self):
        meta = {
            "chunk_method": "plain/nb_copy=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash_method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime_type": "application/octet-stream",
            "name": "tox.ini",
            "policy": self.stgpol_twocopies,
            "version": "1450176946676289",
            "oio_version": "4.2",
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/A0",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }, {
            "url": "http://127.0.0.1:6011/A1",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }]
        self.content_factory.container_client.content_locate = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id",
                                     "xxx_content_id",
                                     account=self.account,
                                     container_name=self.container_name)
        self.assertEqual(type(c), PlainContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(
            c.full_path,
            encode_fullpath(self.account, self.container_name, "tox.ini",
                            meta['version'], meta['id']))
        # TODO test storage_method
        self.assertEqual(len(c.chunks), 2)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])

    def test_get_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.get,
                          self.container_id, "1234")

    def test_new_ec(self):
        meta = {
            "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2",
            "ctime": "1450341162",
            "deleted": "False",
            "hash": "",
            "hash_method": "md5",
            "id": "F4B1C8DD132705007DE8B43D0709DAA2",
            "length": "1000",
            "mime_type": "application/octet-stream",
            "name": "titi",
            "policy": self.stgpol_ec,
            "version": "1450341162332663",
            "oio_version": "4.2",
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/0_p1",
            "pos": "0.3",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6011/0_p0",
            "pos": "0.2",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6016/0_1",
            "pos": "0.1",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6017/0_0",
            "pos": "0.0",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }]
        self.content_factory.container_client.content_prepare = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.new("xxx_container_id",
                                     "titi",
                                     1000,
                                     self.stgpol_ec,
                                     account=self.account,
                                     container_name=self.container_name)
        self.assertEqual(type(c), ECContent)
        self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2")
        self.assertEqual(c.length, 1000)
        self.assertEqual(c.path, "titi")
        self.assertEqual(c.version, "1450341162332663")
        # TODO test storage_method
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[3])
        self.assertEqual(c.chunks[1].raw(), chunks[2])
        self.assertEqual(c.chunks[2].raw(), chunks[1])
        self.assertEqual(c.chunks[3].raw(), chunks[0])

    def _new_content(self,
                     stgpol,
                     data,
                     path="titi",
                     account=None,
                     container_name=None,
                     mime_type=None,
                     properties=None):
        old_content = self.content_factory.new(self.container_id,
                                               path,
                                               len(data),
                                               stgpol,
                                               account=account,
                                               container_name=container_name)
        if properties:
            old_content.properties = properties
        if mime_type:
            old_content.mime_type = mime_type
        old_content.create(BytesIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_move_chunk(self, policy):
        data = random_data(self.chunk_size)
        content = self._new_content(policy, data)

        mc = content.chunks.filter(metapos=0)
        chunk_id = mc[0].id
        chunk_url = mc[0].url
        chunk_host = mc[0].host
        chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url)
        chunk_hash = md5_stream(chunk_stream)
        new_chunk = content.move_chunk(chunk_id, service_id=chunk_host)

        content_updated = self.content_factory.get(self.container_id,
                                                   content.content_id)

        hosts = []
        for c in content_updated.chunks.filter(metapos=0):
            self.assertThat(hosts, Not(Contains(c.host)))
            self.assertNotEqual(c.url, chunk_url)
            hosts.append(c.host)

        new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk["url"])
        new_chunk_hash = md5_stream(new_chunk_stream)

        self.assertEqual(new_chunk_hash, chunk_hash)
        self.assertGreaterEqual(new_chunk_meta['chunk_mtime'],
                                chunk_meta['chunk_mtime'])

        del chunk_meta["chunk_id"]
        del new_chunk_meta["chunk_id"]
        del chunk_meta["chunk_mtime"]
        del new_chunk_meta["chunk_mtime"]
        self.assertEqual(new_chunk_meta, chunk_meta)

    def test_single_move_chunk(self):
        self._test_move_chunk(self.stgpol)

    def test_twocopies_move_chunk(self):
        self._test_move_chunk(self.stgpol_twocopies)

    @ec
    def test_ec_move_chunk(self):
        self._test_move_chunk(self.stgpol_ec)

    def test_move_chunk_not_in_content(self):
        data = random_data(self.chunk_size)
        content = self._new_content(self.stgpol_twocopies, data)
        with ExpectedException(OrphanChunk):
            content.move_chunk("1234")

    def test_strange_paths(self):
        answers = dict()
        for cname in strange_paths:
            content = self._new_content(self.stgpol, b"nobody cares", cname)
            answers[cname] = content

        _, listing = self.container_client.content_list(
            self.account, self.container_name)
        if PY2:
            obj_set = {k["name"].encode('utf-8') for k in listing["objects"]}
        else:
            obj_set = {k["name"] for k in listing["objects"]}
        try:
            # Ensure the saved path is the one we gave the object
            for cname in answers:
                self.assertEqual(cname, answers[cname].path)
                fullpath = encode_fullpath(self.account, self.container_name,
                                           cname, answers[cname].version,
                                           answers[cname].content_id)
                self.assertEqual(answers[cname].full_path, fullpath)
            # Ensure all objects appear in listing
            for cname in strange_paths:
                self.assertIn(cname, obj_set)

        finally:
            # Cleanup
            for cname in answers:
                try:
                    content.delete()
                except Exception:
                    pass
class BlobRebuilderWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.dry_run = true_value(conf.get('dry_run', False))
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        self.max_bytes_per_second = int_value(conf.get('bytes_per_second'),
                                              10000000)
        self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100)
        self.allow_same_rawx = true_value(conf.get('allow_same_rawx'))
        self.rdir_client = RdirClient(conf)
        self.content_factory = ContentFactory(conf)

    def rebuilder_pass_with_lock(self):
        self.rdir_client.admin_lock(self.volume,
                                    "rebuilder on %s" % gethostname())
        try:
            self.rebuilder_pass()
        finally:
            self.rdir_client.admin_unlock(self.volume)

    def rebuilder_pass(self):
        start_time = report_time = time.time()

        total_errors = 0
        rebuilder_time = 0

        chunks = self.rdir_client.chunk_fetch(self.volume,
                                              limit=self.rdir_fetch_limit,
                                              rebuild=True)
        for container_id, content_id, chunk_id, data in chunks:
            loop_time = time.time()

            if self.dry_run:
                self.dryrun_chunk_rebuild(container_id, content_id, chunk_id)
            else:
                self.safe_chunk_rebuild(container_id, content_id, chunk_id)

            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    'RUN  %(volume)s '
                    'started=%(start_time)s '
                    'passes=%(passes)d '
                    'errors=%(errors)d '
                    'chunks=%(nb_chunks)d %(c_rate).2f/s '
                    'bytes=%(nb_bytes)d %(b_rate).2fB/s '
                    'elapsed=%(total).2f '
                    '(rebuilder: %(rebuilder_rate).2f%%)' % {
                        'volume':
                        self.volume,
                        'start_time':
                        datetime.fromtimestamp(int(report_time)).isoformat(),
                        'passes':
                        self.passes,
                        'errors':
                        self.errors,
                        'nb_chunks':
                        self.total_chunks_processed,
                        'nb_bytes':
                        self.total_bytes_processed,
                        'c_rate':
                        self.passes / (now - report_time),
                        'b_rate':
                        self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'rebuilder_time':
                        rebuilder_time,
                        'rebuilder_rate':
                        100.0 * rebuilder_time / float(now - start_time)
                    })
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            rebuilder_time += (now - loop_time)
        end_time = time.time()
        elapsed = (end_time - start_time) or 0.000001
        self.logger.info(
            'DONE %(volume)s '
            'started=%(start_time)s '
            'ended=%(end_time)s '
            'elapsed=%(elapsed).02f '
            'errors=%(errors)d '
            'chunks=%(nb_chunks)d %(c_rate).2f/s '
            'bytes=%(nb_bytes)d %(b_rate).2fB/s '
            'elapsed=%(rebuilder_time).2f '
            '(rebuilder: %(rebuilder_rate).2f%%)' % {
                'volume': self.volume,
                'start_time': datetime.fromtimestamp(
                    int(start_time)).isoformat(),
                'end_time': datetime.fromtimestamp(int(end_time)).isoformat(),
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'nb_chunks': self.total_chunks_processed,
                'nb_bytes': self.total_bytes_processed,
                'c_rate': self.total_chunks_processed / elapsed,
                'b_rate': self.total_bytes_processed / elapsed,
                'rebuilder_time': rebuilder_time,
                'rebuilder_rate': 100.0 * rebuilder_time / float(elapsed)
            })

    def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info(
            "[dryrun] Rebuilding "
            "container %s, content %s, chunk %s", container_id, content_id,
            chunk_id)
        self.passes += 1

    def safe_chunk_rebuild(self, container_id, content_id, chunk_id):
        try:
            self.chunk_rebuild(container_id, content_id, chunk_id)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while rebuilding chunk %s|%s|%s) : %s',
                              container_id, content_id, chunk_id, e)

        self.passes += 1

    def chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info('Rebuilding (container %s, content %s, chunk %s)',
                         container_id, content_id, chunk_id)

        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise exc.OrphanChunk('Content not found')

        chunk = content.chunks.filter(id=chunk_id).one()
        if chunk is None:
            raise OrphanChunk("Chunk not found in content")
        chunk_size = chunk.size

        content.rebuild_chunk(chunk_id, allow_same_rawx=self.allow_same_rawx)

        self.rdir_client.chunk_delete(self.volume, container_id, content_id,
                                      chunk_id)

        self.bytes_processed += chunk_size
        self.total_bytes_processed += chunk_size
Exemple #29
0
class TestECContent(BaseTestCase):
    def setUp(self):
        super(TestECContent, self).setUp()

        if len(self.conf['services']['rawx']) < 12:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 12 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient(self.conf)
        self.container_name = "TestECContent%f" % time.time()
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.content = "%s-%s" % (self.__class__.__name__, random_str(4))
        self.stgpol = "EC"
        self.size = 1024 * 1024 + 320
        self.k = 6
        self.m = 3

    def tearDown(self):
        super(TestECContent, self).tearDown()

    def random_chunks(self, nb):
        pos = random.sample(xrange(self.k + self.m), nb)
        return ["0.%s" % i for i in pos]

    def _test_create(self, data_size):
        # generate random test data
        data = random_data(data_size)
        # using factory create new EC content
        content = self.content_factory.new(self.container_id, self.content,
                                           len(data), self.stgpol)
        # verify the factory gave us an ECContent
        self.assertEqual(type(content), ECContent)

        # perform the content creation
        content.create(BytesIO(data))

        meta, chunks = self.container_client.content_locate(
            cid=self.container_id, content=content.content_id)
        # verify metadata
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], self.stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \
            if len(data) != 0 else 1

        offset = 0
        # verify each metachunk
        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)
            if len(chunks_at_pos) < 1:
                break
            metachunk_size = chunks_at_pos[0].size
            metachunk_hash = md5_data(data[offset:offset + metachunk_size])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(meta['metachunk_size'], str(chunk.size))
                self.assertEqual(meta['metachunk_hash'], chunk.checksum)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], md5_stream(stream))
                full_path = encode_fullpath(self.account, self.container_name,
                                            self.content,
                                            meta['content_version'],
                                            meta['content_id'])
                self.assertEqual(meta['full_path'], full_path)
                self.assertEqual(meta['oio_version'], '4.2')
                self.assertEqual(metachunk_hash, chunk.checksum)

            offset += metachunk_size

    def test_create_0_byte(self):
        self._test_create(0)

    def test_create_1_byte(self):
        self._test_create(1)

    def test_create(self):
        self._test_create(DAT_LEGIT_SIZE)

    def test_create_6294503_bytes(self):
        self._test_create(6294503)

    def _test_rebuild(self, data_size, broken_pos_list):
        # generate test data
        data = os.urandom(data_size)
        # create initial content
        old_content = self.content_factory.new(self.container_id, self.content,
                                               len(data), self.stgpol)
        # verify factory work as intended
        self.assertEqual(type(old_content), ECContent)

        # perform initial content creation
        old_content.create(BytesIO(data))

        uploaded_content = self.content_factory.get(self.container_id,
                                                    old_content.content_id)

        # break the content
        old_info = {}
        for pos in broken_pos_list:
            old_info[pos] = {}
            c = uploaded_content.chunks.filter(pos=pos)[0]
            old_info[pos]["url"] = c.url
            old_info[pos]["id"] = c.id
            old_info[pos]["hash"] = c.checksum
            chunk_id_to_rebuild = c.id
            meta, stream = self.blob_client.chunk_get(c.url)
            old_info[pos]["dl_meta"] = meta
            old_info[pos]["dl_hash"] = md5_stream(stream)
            # delete the chunk
            self.blob_client.chunk_delete(c.url)

            # rebuild the broken chunks
            uploaded_content.rebuild_chunk(chunk_id_to_rebuild)

        rebuilt_content = self.content_factory.get(self.container_id,
                                                   uploaded_content.content_id)
        # sanity check
        self.assertEqual(type(rebuilt_content), ECContent)

        # verify rebuild result
        for pos in broken_pos_list:
            c = rebuilt_content.chunks.filter(pos=pos)[0]
            rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(rebuilt_meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(rebuilt_stream),
                             old_info[pos]["dl_hash"])
            self.assertEqual(c.checksum, old_info[pos]["hash"])
            self.assertNotEqual(c.url, old_info[pos]["url"])
            self.assertGreaterEqual(rebuilt_meta['chunk_mtime'],
                                    old_info[pos]['dl_meta']['chunk_mtime'])
            del old_info[pos]["dl_meta"]["chunk_mtime"]
            del rebuilt_meta["chunk_mtime"]
            del old_info[pos]["dl_meta"]["chunk_id"]
            del rebuilt_meta["chunk_id"]
            self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"])

    def test_content_0_byte_rebuild(self):
        self._test_rebuild(0, self.random_chunks(1))

    def test_content_0_byte_rebuild_advanced(self):
        self._test_rebuild(0, self.random_chunks(3))

    def test_content_1_byte_rebuild(self):
        self._test_rebuild(1, self.random_chunks(1))

    def test_content_1_byte_rebuild_advanced(self):
        self._test_rebuild(1, self.random_chunks(3))

    def test_content_rebuild(self):
        self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(1))

    def test_content_rebuild_advanced(self):
        self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(3))

    def test_content_rebuild_unrecoverable(self):
        self.assertRaises(UnrecoverableContent, self._test_rebuild,
                          DAT_LEGIT_SIZE, self.random_chunks(4))

    def _new_content(self, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, self.content,
                                               len(data), self.stgpol)
        self.assertEqual(type(old_content), ECContent)

        old_content.create(BytesIO(data))

        # break content
        for pos in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[0]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_orphan_chunk(self):
        content = self._new_content(random_data(10))
        self.assertRaises(OrphanChunk, content.rebuild_chunk, "invalid")

    def _test_fetch(self, data_size, broken_pos_list=None):
        broken_pos_list = broken_pos_list or []
        test_data = random_data(data_size)
        content = self._new_content(test_data, broken_pos_list)

        data = b''.join(content.fetch())

        self.assertEqual(len(data), len(test_data))
        self.assertEqual(md5_data(data), md5_data(test_data))

        # verify that chunks are broken
        for pos in broken_pos_list:
            chunk = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete,
                              chunk.url)

    def test_fetch_content_0_byte(self):
        self._test_fetch(0)

    def test_fetch_content_1_byte(self):
        self._test_fetch(1)

    def test_fetch_content(self):
        self._test_fetch(DAT_LEGIT_SIZE)

    def test_fetch_content_0_byte_broken(self):
        self._test_fetch(0, self.random_chunks(3))

    def test_fetch_content_1_byte_broken(self):
        self._test_fetch(1, self.random_chunks(3))

    def test_fetch_content_broken(self):
        self._test_fetch(DAT_LEGIT_SIZE, self.random_chunks(3))

    def test_fetch_content_unrecoverable(self):
        broken_chunks = self.random_chunks(4)
        self.assertRaises(OioException, self._test_fetch, DAT_LEGIT_SIZE,
                          broken_chunks)
class TestContentFactory(BaseTestCase):
    def setUp(self):
        super(TestContentFactory, self).setUp()

        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = "TestContentFactory%f" % time.time()
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.stgpol = "SINGLE"
        self.stgpol_twocopies = "TWOCOPIES"
        self.stgpol_threecopies = "THREECOPIES"
        self.stgpol_ec = "EC"

    def tearDown(self):
        super(TestContentFactory, self).tearDown()

    def test_get_ec(self):
        meta = {
            "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash_method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime_type": "application/octet-stream",
            "name": "tox.ini",
            "policy": self.stgpol_ec,
            "version": "1450176946676289"
        }
        chunks = [{
            "url": "http://127.0.0.1:6012/A0A0",
            "pos": "0.0",
            "size": 512,
            "hash": "E7D4E4AD460971CA2E3141F2102308D4"
        }, {
            "url": "http://127.0.0.1:6010/A01",
            "pos": "0.1",
            "size": 146,
            "hash": "760AB5DA7C51A3654F1CA622687CD6C3"
        }, {
            "url": "http://127.0.0.1:6011/A00",
            "pos": "0.2",
            "size": 512,
            "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"
        }, {
            "url": "http://127.0.0.1:6013/A0A1",
            "pos": "0.3",
            "size": 512,
            "hash": "DA9D7F72AEEA5791565724424CE45C16"
        }]
        self.content_factory.container_client.content_locate = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), ECContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        # TODO test storage method
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])
        self.assertEqual(c.chunks[2].raw(), chunks[2])
        self.assertEqual(c.chunks[3].raw(), chunks[3])

    def test_get_plain(self):
        meta = {
            "chunk_method": "plain/nb_copy=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash_method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime_type": "application/octet-stream",
            "name": "tox.ini",
            "policy": self.stgpol_twocopies,
            "version": "1450176946676289"
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/A0",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }, {
            "url": "http://127.0.0.1:6011/A1",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }]
        self.content_factory.container_client.content_locate = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), PlainContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        # TODO test storage_method
        self.assertEqual(len(c.chunks), 2)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])

    def test_get_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.get,
                          self.container_id, "1234")

    def test_new_ec(self):
        meta = {
            "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2",
            "ctime": "1450341162",
            "deleted": "False",
            "hash": "",
            "hash_method": "md5",
            "id": "F4B1C8DD132705007DE8B43D0709DAA2",
            "length": "1000",
            "mime_type": "application/octet-stream",
            "name": "titi",
            "policy": self.stgpol_ec,
            "version": "1450341162332663"
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/0_p1",
            "pos": "0.3",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6011/0_p0",
            "pos": "0.2",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6016/0_1",
            "pos": "0.1",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6017/0_0",
            "pos": "0.0",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }]
        self.content_factory.container_client.content_prepare = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.new("xxx_container_id", "titi", 1000,
                                     self.stgpol_ec)
        self.assertEqual(type(c), ECContent)
        self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2")
        self.assertEqual(c.length, 1000)
        self.assertEqual(c.path, "titi")
        self.assertEqual(c.version, "1450341162332663")
        # TODO test storage_method
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[3])
        self.assertEqual(c.chunks[1].raw(), chunks[2])
        self.assertEqual(c.chunks[2].raw(), chunks[1])
        self.assertEqual(c.chunks[3].raw(), chunks[0])

    def _new_content(self, stgpol, data, path="titi"):
        old_content = self.content_factory.new(self.container_id, path,
                                               len(data), stgpol)
        old_content.create(BytesIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_change_policy(self, data_size, old_policy, new_policy):
        data = random_data(data_size)
        obj_type = {
            self.stgpol: PlainContent,
            self.stgpol_twocopies: PlainContent,
            self.stgpol_threecopies: PlainContent,
            self.stgpol_ec: ECContent
        }
        old_content = self._new_content(old_policy, data)
        self.assertEqual(type(old_content), obj_type[old_policy])

        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, new_policy)

        self.assertRaises(NotFound,
                          self.container_client.content_show,
                          self.account,
                          cid=old_content.container_id,
                          content=old_content.content_id)

        new_content = self.content_factory.get(self.container_id,
                                               changed_content.content_id)
        self.assertEqual(type(new_content), obj_type[new_policy])

        downloaded_data = "".join(new_content.fetch())

        self.assertEqual(downloaded_data, data)

    @ec
    def test_change_content_0_byte_policy_single_to_ec(self):
        self._test_change_policy(0, self.stgpol, self.stgpol_ec)

    @ec
    def test_change_content_0_byte_policy_ec_to_twocopies(self):
        self._test_change_policy(0, self.stgpol_ec, self.stgpol_twocopies)

    @ec
    def test_change_content_1_byte_policy_single_to_ec(self):
        self._test_change_policy(1, self.stgpol, self.stgpol_ec)

    @ec
    def test_change_content_chunksize_bytes_policy_twocopies_to_ec(self):
        self._test_change_policy(self.chunk_size, self.stgpol_twocopies,
                                 self.stgpol_ec)

    @ec
    def test_change_content_2xchunksize_bytes_policy_threecopies_to_ec(self):
        self._test_change_policy(self.chunk_size * 2, self.stgpol_threecopies,
                                 self.stgpol_ec)

    @ec
    def test_change_content_1_byte_policy_ec_to_threecopies(self):
        self._test_change_policy(1, self.stgpol_ec, self.stgpol_threecopies)

    @ec
    def test_change_content_chunksize_bytes_policy_ec_to_twocopies(self):
        self._test_change_policy(self.chunk_size, self.stgpol_ec,
                                 self.stgpol_twocopies)

    @ec
    def test_change_content_2xchunksize_bytes_policy_ec_to_single(self):
        self._test_change_policy(self.chunk_size * 2, self.stgpol_ec,
                                 self.stgpol)

    def test_change_content_0_byte_policy_twocopies_to_threecopies(self):
        self._test_change_policy(0, self.stgpol_twocopies,
                                 self.stgpol_threecopies)

    def test_change_content_chunksize_bytes_policy_single_to_twocopies(self):
        self._test_change_policy(self.chunk_size, self.stgpol,
                                 self.stgpol_twocopies)

    def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self):
        self._test_change_policy(self.chunk_size * 2, self.stgpol_threecopies,
                                 self.stgpol)

    def test_change_content_with_same_policy(self):
        data = random_data(10)
        old_content = self._new_content(self.stgpol_twocopies, data)
        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id,
            self.stgpol_twocopies)
        self.assertEqual(old_content.content_id, changed_content.content_id)

    def test_change_policy_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.change_policy,
                          self.container_id, "1234", self.stgpol)

    def test_change_policy_unknown_storage_policy(self):
        data = random_data(10)
        old_content = self._new_content(self.stgpol_twocopies, data)
        self.assertRaises(ClientException, self.content_factory.change_policy,
                          self.container_id, old_content.content_id, "UnKnOwN")

    def _test_move_chunk(self, policy):
        data = random_data(self.chunk_size)
        content = self._new_content(policy, data)

        chunk_id = content.chunks.filter(metapos=0)[0].id
        chunk_url = content.chunks.filter(metapos=0)[0].url
        chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url)
        chunk_hash = md5_stream(chunk_stream)
        new_chunk = content.move_chunk(chunk_id)

        content_updated = self.content_factory.get(self.container_id,
                                                   content.content_id)

        hosts = []
        for c in content_updated.chunks.filter(metapos=0):
            self.assertThat(hosts, Not(Contains(c.host)))
            self.assertNotEquals(c.id, chunk_id)
            hosts.append(c.host)

        new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk["url"])
        new_chunk_hash = md5_stream(new_chunk_stream)

        self.assertEqual(new_chunk_hash, chunk_hash)

        del chunk_meta["chunk_id"]
        del new_chunk_meta["chunk_id"]
        self.assertEqual(new_chunk_meta, chunk_meta)

    def test_single_move_chunk(self):
        self._test_move_chunk(self.stgpol)

    def test_twocopies_move_chunk(self):
        self._test_move_chunk(self.stgpol_twocopies)

    @ec
    def test_ec_move_chunk(self):
        self._test_move_chunk(self.stgpol_ec)

    def test_move_chunk_not_in_content(self):
        data = random_data(self.chunk_size)
        content = self._new_content(self.stgpol_twocopies, data)
        with ExpectedException(OrphanChunk):
            content.move_chunk("1234")

    def test_strange_paths(self):
        strange_paths = [
            "Annual report.txt",
            "foo+bar=foobar.txt",
            "100%_bug_free.c",
            "forward/slash/allowed",
            "I\\put\\backslashes\\and$dollar$signs$in$file$names",
            "Je suis tombé sur la tête, mais ça va bien.",
            "%s%f%u%d%%",
            "carriage\rreturn",
            "line\nfeed",
            "ta\tbu\tla\ttion",
            "controlchars",
        ]
        answers = dict()
        for cname in strange_paths:
            content = self._new_content(self.stgpol, "nobody cares", cname)
            answers[cname] = content
        _, listing = self.container_client.content_list(
            self.account, self.container_name)
        obj_set = {
            k["name"].encode("utf8", "ignore")
            for k in listing["objects"]
        }
        try:
            # Ensure the saved path is the one we gave the object
            for cname in answers:
                self.assertEqual(cname, answers[cname].path)
            # Ensure all objects appear in listing
            for cname in strange_paths:
                self.assertIn(cname, obj_set)
        finally:
            # Cleanup
            for cname in answers:
                try:
                    content.delete()
                except:
                    pass
Exemple #31
0
class TestPlainContent(BaseTestCase):
    def setUp(self):
        super(TestPlainContent, self).setUp()

        if len(self.conf['services']['rawx']) < 4:
            self.skipTest(
                "Plain tests needs more than 3 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient(self.conf)
        self.container_name = "TestPlainContent-%f" % time.time()
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.content = random_str(64)
        self.stgpol = "SINGLE"
        self.stgpol_twocopies = "TWOCOPIES"
        self.stgpol_threecopies = "THREECOPIES"

    def _test_create(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, self.content,
                                           len(data), stgpol)

        content.create(BytesIO(data))

        meta, chunks = self.container_client.content_locate(
            cid=self.container_id, content=content.content_id)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        chunks = ChunksHelper(chunks)

        # TODO NO NO NO
        if stgpol == self.stgpol_threecopies:
            nb_copy = 3
        elif stgpol == self.stgpol_twocopies:
            nb_copy = 2
        elif stgpol == self.stgpol:
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                # Check that chunk data matches chunk hash from xattr
                self.assertEqual(meta['chunk_hash'], chunk_hash)
                # Check that chunk data matches chunk hash from database
                self.assertEqual(chunk.checksum, chunk_hash)
                full_path = encode_fullpath(
                    self.account, self.container_name, self.content,
                    meta['content_version'], meta['content_id'])
                self.assertEqual(meta['full_path'], full_path)
                self.assertEqual(meta['oio_version'], '4.2')

    def test_twocopies_create_0_byte(self):
        self._test_create(self.stgpol_twocopies, 0)

    def test_twocopies_create_1_byte(self):
        self._test_create(self.stgpol_twocopies, 1)

    def test_twocopies_create_chunksize_bytes(self):
        self._test_create(self.stgpol_twocopies, self.chunk_size)

    def test_twocopies_create_chunksize_plus_1_bytes(self):
        self._test_create(self.stgpol_twocopies, self.chunk_size + 1)

    def test_twocopies_create_6294503_bytes(self):
        self._test_create(self.stgpol_twocopies, 6294503)

    def test_single_create_0_byte(self):
        self._test_create(self.stgpol, 0)

    def test_single_create_chunksize_plus_1_bytes(self):
        self._test_create(self.stgpol, self.chunk_size + 1)

    def _new_content(self, stgpol, data, broken_pos_list=[]):
        old_content = self.content_factory.new(
            self.container_id, self.content, len(data), stgpol)

        old_content.create(BytesIO(data))

        broken_chunks_info = {}
        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            meta, stream = self.blob_client.chunk_get(c.url)
            if pos not in broken_chunks_info:
                broken_chunks_info[pos] = {}
            broken_chunks_info[pos][idx] = {
                "url": c.url,
                "id": c.id,
                "hash": c.checksum,
                "dl_meta": meta,
                "dl_hash": md5_stream(stream)
            }
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return (self.content_factory.get(
            self.container_id, old_content.content_id), broken_chunks_info)

    def _rebuild_and_check(self, content, broken_chunks_info, full_rebuild_pos,
                           allow_frozen_container=False):
        rebuild_pos, rebuild_idx = full_rebuild_pos
        rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
        content.rebuild_chunk(rebuild_chunk_info["id"],
                              allow_frozen_container=allow_frozen_container)

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   content.content_id)

        # find the rebuilt chunk
        for c in rebuilt_content.chunks.filter(pos=rebuild_pos):
            if len(content.chunks.filter(id=c.id)) > 0:
                # not the rebuilt chunk
                # if this chunk is broken, it must not have been rebuilt
                for b_c_i in broken_chunks_info[rebuild_pos].values():
                    if c.id == b_c_i["id"]:
                        with ExpectedException(NotFound):
                            _, _ = self.blob_client.chunk_get(c.url)
                continue
            meta, stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(stream),
                             rebuild_chunk_info["dl_hash"])
            self.assertEqual(c.checksum, rebuild_chunk_info["hash"])
            self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"]))
            del meta["chunk_id"]
            del rebuild_chunk_info["dl_meta"]["chunk_id"]
            self.assertEqual(meta, rebuild_chunk_info["dl_meta"])

    def _test_rebuild(self, stgpol, data_size, broken_pos_list,
                      full_rebuild_pos):
        data = random_data(data_size)
        content, broken_chunks_info = self._new_content(
            stgpol, data, broken_pos_list)

        self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos)

    def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self):
        self._test_rebuild(self.stgpol_twocopies, 0, [(0, 0)], (0, 0))

    def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self):
        self._test_rebuild(self.stgpol_twocopies, 1, [(0, 1)], (0, 1))

    def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self):
        if len(self.conf['services']['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild(self.stgpol_threecopies, self.chunk_size,
                           [(0, 0), (0, 1)], (0, 1))

    def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self):
        self._test_rebuild(self.stgpol_threecopies, 2 * self.chunk_size,
                           [(1, 0), (1, 2)], (1, 2))

    def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self):
        with ExpectedException(UnrecoverableContent):
            self._test_rebuild(
                self.stgpol_twocopies, 0, [(0, 0), (0, 1)], (0, 0))

    def test_rebuild_chunk_in_frozen_container(self):
        data = random_data(self.chunk_size)
        content, broken_chunks_info = self._new_content(
            self.stgpol_twocopies, data, [(0, 0)])
        system = dict()
        system['sys.status'] = str(OIO_DB_FROZEN)
        self.container_client.container_set_properties(
            self.account, self.container_name, None, system=system)

        try:
            full_rebuild_pos = (0, 0)
            rebuild_pos, rebuild_idx = full_rebuild_pos
            rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
            self.assertRaises(ServiceBusy,
                              content.rebuild_chunk, rebuild_chunk_info["id"])
        finally:
            system['sys.status'] = str(OIO_DB_ENABLED)
            self.container_client.container_set_properties(
                self.account, self.container_name, None, system=system)

        self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos,
                                allow_frozen_container=True)

    def _test_fetch(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content, _ = self._new_content(stgpol, data, broken_pos_list)

        fetched_data = "".join(content.fetch())

        self.assertEqual(fetched_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_fetch_content_0_byte_without_broken_chunks(self):
        self._test_fetch(self.stgpol_twocopies, 0, [])

    def test_twocopies_fetch_content_0_byte_with_broken_0_0(self):
        self._test_fetch(self.stgpol_twocopies, 0, [(0, 0)])

    def test_twocopies_fetch_content_1_byte_without_broken_chunks(self):
        self._test_fetch(self.stgpol_twocopies, 1, [])

    def test_twocopies_fetch_content_1_byte_with_broken_0_0(self):
        self._test_fetch(self.stgpol_twocopies, 1, [(0, 0)])

    def test_twocopies_fetch_chunksize_bytes_without_broken_chunks(self):
        self._test_fetch(self.stgpol_twocopies, self.chunk_size, [])

    def test_twocopies_fetch_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_fetch(
            self.stgpol_twocopies, self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_fetch_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content, _ = self._new_content(
            self.stgpol_twocopies, data, [(0, 0), (0, 1)])
        gen = content.fetch()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_single_fetch_content_1_byte_without_broken_chunks(self):
        self._test_fetch(self.stgpol, 1, [])

    def test_single_fetch_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_fetch(self.stgpol, self.chunk_size * 2, [])
Exemple #32
0
class TestECContent(BaseTestCase):
    def setUp(self):
        super(TestECContent, self).setUp()

        if len(self.conf['services']['rawx']) < 12:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 12 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestECContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.content = random_str(64)
        self.stgpol = "EC"
        self.size = 1024*1024 + 320
        self.k = 6
        self.m = 3

    def tearDown(self):
        super(TestECContent, self).tearDown()

    def random_chunks(self, nb):
        l = random.sample(xrange(self.k + self.m), nb)
        return ["0.%s" % i for i in l]

    def _test_create(self, data_size):
        # generate random test data
        data = random_data(data_size)
        # using factory create new EC content
        content = self.content_factory.new(
            self.container_id, self.content, len(data), self.stgpol)
        # verify the factory gave us an ECContent
        self.assertEqual(type(content), ECContent)

        # perform the content creation
        content.create(StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        # verify metadata
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], self.stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \
            if len(data) != 0 else 1

        # verify each metachunk
        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(meta['metachunk_size'], str(chunk.size))
                self.assertEqual(meta['metachunk_hash'], chunk.checksum)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], md5_stream(stream))

    def test_create_0_byte(self):
        self._test_create(0)

    def test_create_1_byte(self):
        self._test_create(1)

    def test_create(self):
        self._test_create(DAT_LEGIT_SIZE)

    def _test_rebuild(self, data_size, broken_pos_list):
        # generate test data
        data = os.urandom(data_size)
        # create initial content
        old_content = self.content_factory.new(
            self.container_id, self.content, len(data), self.stgpol)
        # verify factory work as intended
        self.assertEqual(type(old_content), ECContent)

        # perform initial content creation
        old_content.create(StringIO(data))

        uploaded_content = self.content_factory.get(self.container_id,
                                                    old_content.content_id)

        # break the content
        old_info = {}
        for pos in broken_pos_list:
            old_info[pos] = {}
            c = uploaded_content.chunks.filter(pos=pos)[0]
            old_info[pos]["url"] = c.url
            old_info[pos]["id"] = c.id
            old_info[pos]["hash"] = c.checksum
            chunk_id_to_rebuild = c.id
            meta, stream = self.blob_client.chunk_get(c.url)
            old_info[pos]["dl_meta"] = meta
            old_info[pos]["dl_hash"] = md5_stream(stream)
            # delete the chunk
            self.blob_client.chunk_delete(c.url)

            # rebuild the broken chunks
            uploaded_content.rebuild_chunk(chunk_id_to_rebuild)

        rebuilt_content = self.content_factory.get(self.container_id,
                                                   uploaded_content.content_id)
        # sanity check
        self.assertEqual(type(rebuilt_content), ECContent)

        # verify rebuild result
        for pos in broken_pos_list:
            c = rebuilt_content.chunks.filter(pos=pos)[0]
            rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(rebuilt_meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(rebuilt_stream),
                             old_info[pos]["dl_hash"])
            self.assertEqual(c.checksum, old_info[pos]["hash"])
            self.assertNotEqual(c.url, old_info[pos]["url"])
            del old_info[pos]["dl_meta"]["chunk_id"]
            del rebuilt_meta["chunk_id"]
            self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"])

    def test_content_0_byte_rebuild(self):
        self._test_rebuild(0, self.random_chunks(1))

    def test_content_0_byte_rebuild_advanced(self):
        self._test_rebuild(0, self.random_chunks(3))

    def test_content_1_byte_rebuild(self):
        self._test_rebuild(1, self.random_chunks(1))

    def test_content_1_byte_rebuild_advanced(self):
        self._test_rebuild(1, self.random_chunks(3))

    def test_content_rebuild(self):
        self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(1))

    def test_content_rebuild_advanced(self):
        self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(3))

    def test_content_rebuild_unrecoverable(self):
        self.assertRaises(
            UnrecoverableContent, self._test_rebuild, DAT_LEGIT_SIZE,
            self.random_chunks(4))

    def _new_content(self, data, broken_pos_list=[]):
        old_content = self.content_factory.new(
            self.container_id, self.content, len(data), self.stgpol)
        self.assertEqual(type(old_content), ECContent)

        old_content.create(StringIO(data))

        # break content
        for pos in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[0]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_orphan_chunk(self):
        content = self._new_content(random_data(10))
        self.assertRaises(OrphanChunk, content.rebuild_chunk, "invalid")

    def _test_fetch(self, data_size, broken_pos_list=None):
        broken_pos_list = broken_pos_list or []
        test_data = random_data(data_size)
        content = self._new_content(test_data, broken_pos_list)

        data = "".join(content.fetch())

        self.assertEqual(len(data), len(test_data))
        self.assertEqual(md5_data(data), md5_data(test_data))

        # verify that chunks are broken
        for pos in broken_pos_list:
            chunk = content.chunks.filter(pos=pos)[0]
            self.assertRaises(
                NotFound, self.blob_client.chunk_delete, chunk.url)

    def test_fetch_content_0_byte(self):
        self._test_fetch(0)

    def test_fetch_content_1_byte(self):
        self._test_fetch(1)

    def test_fetch_content(self):
        self._test_fetch(DAT_LEGIT_SIZE)

    def test_fetch_content_0_byte_broken(self):
        self._test_fetch(0, self.random_chunks(3))

    def test_fetch_content_1_byte_broken(self):
        self._test_fetch(1, self.random_chunks(3))

    def test_fetch_content_broken(self):
        self._test_fetch(DAT_LEGIT_SIZE, self.random_chunks(3))

    def test_fetch_content_unrecoverable(self):
        broken_chunks = self.random_chunks(4)
        self.assertRaises(
            OioException, self._test_fetch, DAT_LEGIT_SIZE, broken_chunks)