Example #1
0
class TestBlobMover(BaseTestCase):
    def setUp(self):
        super(TestBlobMover, self).setUp()
        self.container = random_str(16)
        self.cid = cid_from_name(self.account, self.container)
        self.path = random_str(16)
        self.api = ObjectStorageApi(self.ns)
        self.blob_client = BlobClient(self.conf)

        self.api.container_create(self.account, self.container)
        _, chunks = self.api.container.content_prepare(self.account,
                                                       self.container,
                                                       self.path,
                                                       size=1)
        services = self.conscience.all_services('rawx')
        if len(chunks) >= len([s for s in services if s['score'] > 0]):
            self.skipTest("need at least %d rawx to run" % (len(chunks) + 1))

        self.rawx_volumes = dict()
        for rawx in services:
            tags = rawx['tags']
            service_id = tags.get('tag.service_id', None)
            if service_id is None:
                service_id = rawx['addr']
            volume = tags.get('tag.vol', None)
            self.rawx_volumes[service_id] = volume

        self.api.object_create(self.account,
                               self.container,
                               obj_name=self.path,
                               data="chunk")
        meta, self.chunks = self.api.object_locate(self.account,
                                                   self.container, self.path)
        self.version = meta['version']
        self.content_id = meta['id']
        self.chunk_method = meta['chunk_method']

    def _chunk_path(self, chunk):
        url = chunk['url']
        volume_id = url.split('/', 3)[2]
        chunk_id = url.split('/', 3)[3]
        volume = self.rawx_volumes[volume_id]
        return volume + '/' + chunk_id[:3] + '/' + chunk_id

    def test_move_old_chunk(self):
        for chunk in self.chunks:
            convert_to_old_chunk(self._chunk_path(chunk), self.account,
                                 self.container, self.path, self.version,
                                 self.content_id)

        orig_chunk = random.choice(self.chunks)
        chunk_volume = orig_chunk['url'].split('/')[2]
        chunk_id = orig_chunk['url'].split('/')[3]
        chunk_headers, chunk_stream = self.blob_client.chunk_get(
            orig_chunk['url'], check_headers=False)
        chunks_kept = list(self.chunks)
        chunks_kept.remove(orig_chunk)

        mover = BlobMoverWorker(self.conf, None,
                                self.rawx_volumes[chunk_volume])
        mover.chunk_move(self._chunk_path(orig_chunk), chunk_id)

        _, new_chunks = self.api.object_locate(self.account, self.container,
                                               self.path)
        new_chunk = list(new_chunks)

        self.assertEqual(len(new_chunks), len(chunks_kept) + 1)
        url_kept = [c['url'] for c in chunks_kept]
        new_chunk = None
        for chunk in new_chunks:
            if chunk['url'] not in url_kept:
                self.assertIsNone(new_chunk)
                new_chunk = chunk

        self.assertNotEqual(orig_chunk['real_url'], new_chunk['real_url'])
        self.assertNotEqual(orig_chunk['url'], new_chunk['url'])
        self.assertEqual(orig_chunk['pos'], new_chunk['pos'])
        self.assertEqual(orig_chunk['size'], new_chunk['size'])
        self.assertEqual(orig_chunk['hash'], new_chunk['hash'])

        new_chunk_headers, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk['url'])
        chunk_data = b''.join(chunk_stream)
        new_chunk_data = b''.join(new_chunk_stream)
        self.assertEqual(chunk_data, new_chunk_data)
        fullpath = encode_fullpath(self.account, self.container, self.path,
                                   self.version, self.content_id)
        self.assertEqual(fullpath, new_chunk_headers['full_path'])
        del new_chunk_headers['full_path']
        self.assertNotEqual(chunk_headers['chunk_id'],
                            new_chunk_headers['chunk_id'])
        new_chunk_id = new_chunk['url'].split('/')[3]
        self.assertEqual(new_chunk_id, new_chunk_headers['chunk_id'])
        del chunk_headers['chunk_id']
        del new_chunk_headers['chunk_id']
        self.assertEqual(OIO_VERSION, new_chunk_headers['oio_version'])
        del chunk_headers['oio_version']
        del new_chunk_headers['oio_version']
        self.assertEqual(chunk_headers, new_chunk_headers)

    def test_move_with_wrong_size(self):
        if not self.chunk_method.startswith('ec'):
            self.skipTest('Only works with EC')

        orig_chunk = random.choice(self.chunks)
        chunk_volume = orig_chunk['url'].split('/')[2]
        chunk_id = orig_chunk['url'].split('/')[3]

        mover = BlobMoverWorker(self.conf, None,
                                self.rawx_volumes[chunk_volume])
        meta, stream = mover.blob_client.chunk_get(orig_chunk['url'])
        data = stream.read()
        stream.close()
        data = data[:-1]
        del meta['chunk_hash']
        wrong_stream = GeneratorIO(data)
        mover.blob_client.chunk_get = Mock(return_value=(meta, wrong_stream))

        self.assertRaises(ChunkException, mover.chunk_move,
                          self._chunk_path(orig_chunk), chunk_id)
Example #2
0
class TestDupContent(BaseTestCase):
    def setUp(self):
        super(TestDupContent, self).setUp()

        if len(self.conf['rawx']) < 3:
            self.skipTest("Not enough rawx. "
                          "Dup tests needs more than 2 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestDupContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestDupContent, self).tearDown()

    def _test_upload(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), stgpol)
        self.assertEqual(type(content), DupContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        if stgpol == "THREECOPIES":
            nb_copy = 3
        elif stgpol == "TWOCOPIES":
            nb_copy = 2
        elif stgpol == "SINGLE":
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                self.assertEqual(meta['chunk_hash'], chunk_hash)

    def test_twocopies_upload_0_byte(self):
        self._test_upload("TWOCOPIES", 0)

    def test_twocopies_upload_1_byte(self):
        self._test_upload("TWOCOPIES", 1)

    def test_twocopies_upload_chunksize_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size)

    def test_twocopies_upload_chunksize_plus_1_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size + 1)

    def test_single_upload_0_byte(self):
        self._test_upload("SINGLE", 0)

    def test_single_upload_chunksize_plus_1_bytes(self):
        self._test_upload("SINGLE", self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "TWOCOPIES")
        self.assertEqual(type(content), DupContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos=1):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos=1):
            self.assertRaises(NotFound,
                              self.blob_client.chunk_head, chunk.url)

    def _new_content(self, stgpol, data, broken_pos_list):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), stgpol)
        self.assertEqual(type(old_content), DupContent)

        old_content.upload(StringIO.StringIO(data))

        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_download(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content = self._new_content(stgpol, data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_download_content_0_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 0, [])

    def test_twocopies_download_content_0_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 0, [(0, 0)])

    def test_twocopies_download_content_1_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 1, [])

    def test_twocopies_download_content_1_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 1, [(0, 0)])

    def test_twocopies_download_chunksize_bytes_without_broken_chunks(self):
        self._test_download("TWOCOPIES", self.chunk_size, [])

    def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_single_download_content_1_byte_without_broken_chunks(self):
        self._test_download("SINGLE", 1, [])

    def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_download("SINGLE", self.chunk_size * 2, [])
Example #3
0
class TestContentFactory(BaseTestCase):
    def setUp(self):
        super(TestContentFactory, self).setUp()
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = "TestContentFactory%f" % time.time()
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestContentFactory, self).tearDown()

    def test_extract_datasec(self):
        self.content_factory.ns_info = {
            "data_security": {
                "DUPONETWO": "DUP:distance=1|nb_copy=2",
                "RAIN": "RAIN:k=6|m=2|algo=liber8tion"
            },
            "storage_policy": {
                "RAIN": "NONE:RAIN:NONE",
                "SINGLE": "NONE:NONE:NONE",
                "TWOCOPIES": "NONE:DUPONETWO:NONE"
            }
        }

        ds_type, ds_args = self.content_factory._extract_datasec("RAIN")
        self.assertEqual(ds_type, "RAIN")
        self.assertEqual(ds_args, {
            "k": "6",
            "m": "2",
            "algo": "liber8tion"
        })

        ds_type, ds_args = self.content_factory._extract_datasec("SINGLE")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {
            "nb_copy": "1",
            "distance": "0"
        })

        ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {
            "nb_copy": "2",
            "distance": "1"
        })

        self.assertRaises(InconsistentContent,
                          self.content_factory._extract_datasec,
                          "UnKnOwN")

    def test_get_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "RAIN",
            "version": "1450176946676289"
        }
        chunks = [
            {
                "url": "http://127.0.0.1:6012/A0A0",
                "pos": "0.p0", "size": 512,
                "hash": "E7D4E4AD460971CA2E3141F2102308D4"},
            {
                "url": "http://127.0.0.1:6010/A01",
                "pos": "0.1", "size": 146,
                "hash": "760AB5DA7C51A3654F1CA622687CD6C3"},
            {
                "url": "http://127.0.0.1:6011/A00",
                "pos": "0.0", "size": 512,
                "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"},
            {
                "url": "http://127.0.0.1:6013/A0A1",
                "pos": "0.p1", "size": 512,
                "hash": "DA9D7F72AEEA5791565724424CE45C16"}
        ]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[2])
        self.assertEqual(c.chunks[1].raw(), chunks[1])
        self.assertEqual(c.chunks[2].raw(), chunks[0])
        self.assertEqual(c.chunks[3].raw(), chunks[3])

    def test_get_dup(self):
        meta = {
            "chunk-method": "plain/bytes",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "TWOCOPIES",
            "version": "1450176946676289"
        }
        chunks = [
            {
                "url": "http://127.0.0.1:6010/A0",
                "pos": "0", "size": 658,
                "hash": "E952A419957A6E405BFC53EC65483F73"},
            {
                "url": "http://127.0.0.1:6011/A1",
                "pos": "0", "size": 658,
                "hash": "E952A419957A6E405BFC53EC65483F73"}
        ]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), DupContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.nb_copy, 2)
        self.assertEqual(c.distance, 1)
        self.assertEqual(len(c.chunks), 2)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])

    def test_get_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.get,
                          self.container_id, "1234")

    def test_new_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450341162",
            "deleted": "False",
            "hash": "",
            "hash-method": "md5",
            "id": "F4B1C8DD132705007DE8B43D0709DAA2",
            "length": "1000",
            "mime-type": "application/octet-stream",
            "name": "titi",
            "policy": "RAIN",
            "version": "1450341162332663"
        }
        chunks = [
            {
                "url": "http://127.0.0.1:6010/0_p1",
                "pos": "0.p1", "size": 1048576,
                "hash": "00000000000000000000000000000000"},
            {
                "url": "http://127.0.0.1:6011/0_p0",
                "pos": "0.p0", "size": 1048576,
                "hash": "00000000000000000000000000000000"},
            {
                "url": "http://127.0.0.1:6016/0_1",
                "pos": "0.1", "size": 1048576,
                "hash": "00000000000000000000000000000000"},
            {
                "url": "http://127.0.0.1:6017/0_0",
                "pos": "0.0", "size": 1048576,
                "hash": "00000000000000000000000000000000"}
        ]
        self.content_factory.container_client.content_prepare = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.new("xxx_container_id", "titi",
                                     1000, "RAIN")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2")
        self.assertEqual(c.length, 1000)
        self.assertEqual(c.path, "titi")
        self.assertEqual(c.version, "1450341162332663")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[3])
        self.assertEqual(c.chunks[1].raw(), chunks[2])
        self.assertEqual(c.chunks[2].raw(), chunks[1])
        self.assertEqual(c.chunks[3].raw(), chunks[0])

    def _new_content(self, stgpol, data, path="titi"):
        old_content = self.content_factory.new(self.container_id, path,
                                               len(data), stgpol)
        old_content.upload(StringIO.StringIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_change_policy(self, data_size, old_policy, new_policy):
        if (old_policy == "RAIN" or new_policy == "RAIN") \
                and len(self.conf['rawx']) < 8:
            self.skipTest("RAIN: Need more than 8 rawx to run")

        data = random_data(data_size)
        obj_type = {
            "SINGLE": DupContent,
            "TWOCOPIES": DupContent,
            "THREECOPIES": DupContent,
            "RAIN": RainContent
        }
        old_content = self._new_content(old_policy, data)
        self.assertEqual(type(old_content), obj_type[old_policy])

        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, new_policy)

        self.assertRaises(NotFound, self.container_client.content_show,
                          self.account,
                          cid=old_content.container_id,
                          content=old_content.content_id)

        new_content = self.content_factory.get(self.container_id,
                                               changed_content.content_id)
        self.assertEqual(type(new_content), obj_type[new_policy])

        downloaded_data = "".join(new_content.download())

        self.assertEqual(downloaded_data, data)

    def test_change_content_0_byte_policy_single_to_rain(self):
        self._test_change_policy(0, "SINGLE", "RAIN")

    def test_change_content_0_byte_policy_rain_to_twocopies(self):
        self._test_change_policy(0, "RAIN", "TWOCOPIES")

    def test_change_content_1_byte_policy_single_to_rain(self):
        self._test_change_policy(1, "SINGLE", "RAIN")

    def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self):
        self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN")

    def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN")

    def test_change_content_1_byte_policy_rain_to_threecopies(self):
        self._test_change_policy(1, "RAIN", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_rain_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE")

    def test_change_content_0_byte_policy_twocopies_to_threecopies(self):
        self._test_change_policy(0, "TWOCOPIES", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_single_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE")

    def test_change_content_with_same_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, "TWOCOPIES")
        self.assertEqual(old_content.content_id, changed_content.content_id)

    def test_change_policy_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.change_policy,
                          self.container_id, "1234", "SINGLE")

    def test_change_policy_unknown_storage_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        self.assertRaises(ClientException, self.content_factory.change_policy,
                          self.container_id, old_content.content_id, "UnKnOwN")

    def _test_move_chunk(self, policy):
        data = random_data(self.chunk_size)
        content = self._new_content(policy, data)

        chunk_id = content.chunks.filter(metapos=0)[0].id
        chunk_url = content.chunks.filter(metapos=0)[0].url
        chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url)
        chunk_hash = md5_stream(chunk_stream)
        new_chunk = content.move_chunk(chunk_id)

        content_updated = self.content_factory.get(self.container_id,
                                                   content.content_id)

        hosts = []
        for c in content_updated.chunks.filter(metapos=0):
            self.assertThat(hosts, Not(Contains(c.host)))
            self.assertNotEquals(c.id, chunk_id)
            hosts.append(c.host)

        new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk["url"])
        new_chunk_hash = md5_stream(new_chunk_stream)

        self.assertEqual(new_chunk_hash, chunk_hash)

        del chunk_meta["chunk_id"]
        del new_chunk_meta["chunk_id"]
        self.assertEqual(new_chunk_meta, chunk_meta)

    def test_single_move_chunk(self):
        self._test_move_chunk("SINGLE")

    def test_twocopies_move_chunk(self):
        self._test_move_chunk("TWOCOPIES")

    def test_rain_move_chunk(self):
        if len(self.conf['rawx']) < 9:
            self.skipTest("Need more than 8 rawx")
        self._test_move_chunk("RAIN")

    def test_move_chunk_not_in_content(self):
        data = random_data(self.chunk_size)
        content = self._new_content("TWOCOPIES", data)
        with ExpectedException(OrphanChunk):
            content.move_chunk("1234")

    def test_strange_paths(self):
        strange_paths = [
                "Annual report.txt",
                "foo+bar=foobar.txt",
                "100%_bug_free.c",
                "forward/slash/allowed",
                "I\\put\\backslashes\\and$dollar$signs$in$file$names",
                "Je suis tombé sur la tête, mais ça va bien.",
                "%s%f%u%d%%",
                "carriage\rreturn",
                "line\nfeed",
                "ta\tbu\tla\ttion",
                "controlchars",
                ]
        answers = dict()
        for cname in strange_paths:
            content = self._new_content("SINGLE", "nobody cares", cname)
            answers[cname] = content
        listing = self.container_client.container_list(self.account,
                                                       self.container_name)
        obj_set = {k["name"].encode("utf8", "ignore")
                   for k in listing["objects"]}
        try:
            # Ensure the saved path is the one we gave the object
            for cname in answers:
                self.assertEqual(cname, answers[cname].path)
            # Ensure all objects appear in listing
            for cname in strange_paths:
                self.assertIn(cname, obj_set)
        finally:
            # Cleanup
            for cname in answers:
                try:
                    content.delete()
                except:
                    pass
Example #4
0
class TestContentFactory(BaseTestCase):
    def setUp(self):
        super(TestContentFactory, self).setUp()

        self.wait_for_score(('meta2', ))
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = "TestContentFactory%f" % time.time()
        self.blob_client = BlobClient(conf=self.conf)
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.stgpol = "SINGLE"
        self.stgpol_twocopies = "TWOCOPIES"
        self.stgpol_threecopies = "THREECOPIES"
        self.stgpol_ec = "EC"

    def tearDown(self):
        super(TestContentFactory, self).tearDown()

    def test_get_ec(self):
        meta = {
            "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash_method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime_type": "application/octet-stream",
            "name": "tox.ini",
            "policy": self.stgpol_ec,
            "version": "1450176946676289",
            "oio_version": "4.2",
        }
        chunks = [{
            "url": "http://127.0.0.1:6012/A0A0",
            "pos": "0.0",
            "size": 512,
            "hash": "E7D4E4AD460971CA2E3141F2102308D4"
        }, {
            "url": "http://127.0.0.1:6010/A01",
            "pos": "0.1",
            "size": 146,
            "hash": "760AB5DA7C51A3654F1CA622687CD6C3"
        }, {
            "url": "http://127.0.0.1:6011/A00",
            "pos": "0.2",
            "size": 512,
            "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"
        }, {
            "url": "http://127.0.0.1:6013/A0A1",
            "pos": "0.3",
            "size": 512,
            "hash": "DA9D7F72AEEA5791565724424CE45C16"
        }]
        self.content_factory.container_client.content_locate = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id",
                                     "xxx_content_id",
                                     account=self.account,
                                     container_name=self.container_name)
        self.assertEqual(type(c), ECContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(
            c.full_path,
            encode_fullpath(self.account, self.container_name, "tox.ini",
                            meta['version'], meta['id']))
        self.assertEqual(c.version, "1450176946676289")
        # TODO test storage method
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])
        self.assertEqual(c.chunks[2].raw(), chunks[2])
        self.assertEqual(c.chunks[3].raw(), chunks[3])

    def test_get_plain(self):
        meta = {
            "chunk_method": "plain/nb_copy=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash_method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime_type": "application/octet-stream",
            "name": "tox.ini",
            "policy": self.stgpol_twocopies,
            "version": "1450176946676289",
            "oio_version": "4.2",
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/A0",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }, {
            "url": "http://127.0.0.1:6011/A1",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }]
        self.content_factory.container_client.content_locate = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id",
                                     "xxx_content_id",
                                     account=self.account,
                                     container_name=self.container_name)
        self.assertEqual(type(c), PlainContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(
            c.full_path,
            encode_fullpath(self.account, self.container_name, "tox.ini",
                            meta['version'], meta['id']))
        # TODO test storage_method
        self.assertEqual(len(c.chunks), 2)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])

    def test_get_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.get,
                          self.container_id, "1234")

    def test_new_ec(self):
        meta = {
            "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2",
            "ctime": "1450341162",
            "deleted": "False",
            "hash": "",
            "hash_method": "md5",
            "id": "F4B1C8DD132705007DE8B43D0709DAA2",
            "length": "1000",
            "mime_type": "application/octet-stream",
            "name": "titi",
            "policy": self.stgpol_ec,
            "version": "1450341162332663",
            "oio_version": "4.2",
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/0_p1",
            "pos": "0.3",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6011/0_p0",
            "pos": "0.2",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6016/0_1",
            "pos": "0.1",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6017/0_0",
            "pos": "0.0",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }]
        self.content_factory.container_client.content_prepare = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.new("xxx_container_id",
                                     "titi",
                                     1000,
                                     self.stgpol_ec,
                                     account=self.account,
                                     container_name=self.container_name)
        self.assertEqual(type(c), ECContent)
        self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2")
        self.assertEqual(c.length, 1000)
        self.assertEqual(c.path, "titi")
        self.assertEqual(c.version, "1450341162332663")
        # TODO test storage_method
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[3])
        self.assertEqual(c.chunks[1].raw(), chunks[2])
        self.assertEqual(c.chunks[2].raw(), chunks[1])
        self.assertEqual(c.chunks[3].raw(), chunks[0])

    def _new_content(self,
                     stgpol,
                     data,
                     path="titi",
                     account=None,
                     container_name=None,
                     mime_type=None,
                     properties=None):
        old_content = self.content_factory.new(self.container_id,
                                               path,
                                               len(data),
                                               stgpol,
                                               account=account,
                                               container_name=container_name)
        if properties:
            old_content.properties = properties
        if mime_type:
            old_content.mime_type = mime_type
        old_content.create(BytesIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_move_chunk(self, policy):
        data = random_data(self.chunk_size)
        content = self._new_content(policy, data)

        mc = content.chunks.filter(metapos=0)
        chunk_id = mc[0].id
        chunk_url = mc[0].url
        chunk_host = mc[0].host
        chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url)
        chunk_hash = md5_stream(chunk_stream)
        new_chunk = content.move_chunk(chunk_id, service_id=chunk_host)

        content_updated = self.content_factory.get(self.container_id,
                                                   content.content_id)

        hosts = []
        for c in content_updated.chunks.filter(metapos=0):
            self.assertThat(hosts, Not(Contains(c.host)))
            self.assertNotEqual(c.url, chunk_url)
            hosts.append(c.host)

        new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk["url"])
        new_chunk_hash = md5_stream(new_chunk_stream)

        self.assertEqual(new_chunk_hash, chunk_hash)
        self.assertGreaterEqual(new_chunk_meta['chunk_mtime'],
                                chunk_meta['chunk_mtime'])

        del chunk_meta["chunk_id"]
        del new_chunk_meta["chunk_id"]
        del chunk_meta["chunk_mtime"]
        del new_chunk_meta["chunk_mtime"]
        self.assertEqual(new_chunk_meta, chunk_meta)

    def test_single_move_chunk(self):
        self._test_move_chunk(self.stgpol)

    def test_twocopies_move_chunk(self):
        self._test_move_chunk(self.stgpol_twocopies)

    @ec
    def test_ec_move_chunk(self):
        self._test_move_chunk(self.stgpol_ec)

    def test_move_chunk_not_in_content(self):
        data = random_data(self.chunk_size)
        content = self._new_content(self.stgpol_twocopies, data)
        with ExpectedException(OrphanChunk):
            content.move_chunk("1234")

    def test_strange_paths(self):
        answers = dict()
        for cname in strange_paths:
            content = self._new_content(self.stgpol, b"nobody cares", cname)
            answers[cname] = content

        _, listing = self.container_client.content_list(
            self.account, self.container_name)
        if PY2:
            obj_set = {k["name"].encode('utf-8') for k in listing["objects"]}
        else:
            obj_set = {k["name"] for k in listing["objects"]}
        try:
            # Ensure the saved path is the one we gave the object
            for cname in answers:
                self.assertEqual(cname, answers[cname].path)
                fullpath = encode_fullpath(self.account, self.container_name,
                                           cname, answers[cname].version,
                                           answers[cname].content_id)
                self.assertEqual(answers[cname].full_path, fullpath)
            # Ensure all objects appear in listing
            for cname in strange_paths:
                self.assertIn(cname, obj_set)

        finally:
            # Cleanup
            for cname in answers:
                try:
                    content.delete()
                except Exception:
                    pass
Example #5
0
class TestBlobRebuilder(BaseTestCase):
    def setUp(self):
        super(TestBlobRebuilder, self).setUp()
        self.container = random_str(16)
        self.cid = cid_from_name(self.account, self.container)
        self.path = random_str(16)
        self.api = ObjectStorageApi(self.ns)
        self.blob_client = BlobClient(self.conf)

        self.api.container_create(self.account, self.container)
        _, chunks = self.api.container.content_prepare(self.account,
                                                       self.container,
                                                       self.path,
                                                       size=1)
        if len(chunks) < 2:
            self.skipTest("need at least 2 chunks to run")

        services = self.conscience.all_services('rawx')
        self.rawx_volumes = dict()
        for rawx in services:
            tags = rawx['tags']
            service_id = tags.get('tag.service_id', None)
            if service_id is None:
                service_id = rawx['addr']
            volume = tags.get('tag.vol', None)
            self.rawx_volumes[service_id] = volume

        self.api.object_create(self.account,
                               self.container,
                               obj_name=self.path,
                               data="chunk")
        meta, self.chunks = self.api.object_locate(self.account,
                                                   self.container, self.path)
        self.version = meta['version']
        self.content_id = meta['id']

    def _chunk_path(self, chunk):
        url = chunk['url']
        volume_id = url.split('/', 3)[2]
        chunk_id = url.split('/', 3)[3]
        volume = self.rawx_volumes[volume_id]
        return volume + '/' + chunk_id[:3] + '/' + chunk_id

    def test_rebuild_old_chunk(self):
        for c in self.chunks:
            convert_to_old_chunk(self._chunk_path(c), self.account,
                                 self.container, self.path, self.version,
                                 self.content_id)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_headers, chunk_stream = self.blob_client.chunk_get(
            chunk['url'], check_headers=False)
        os.remove(self._chunk_path(chunk))
        chunks_kept = list(self.chunks)
        chunks_kept.remove(chunk)

        conf = self.conf.copy()
        conf['allow_same_rawx'] = True
        rebuilder = BlobRebuilder(conf, service_id=chunk_volume)
        rebuilder_worker = rebuilder.create_worker(None, None)
        rebuilder_worker._process_item(
            (self.ns, self.cid, self.content_id, chunk_id))

        _, new_chunks = self.api.object_locate(self.account, self.container,
                                               self.path)
        new_chunk = list(new_chunks)

        self.assertEqual(len(new_chunks), len(chunks_kept) + 1)
        url_kept = [c['url'] for c in chunks_kept]
        new_chunk = None
        for c in new_chunks:
            if c['url'] not in url_kept:
                self.assertIsNone(new_chunk)
                new_chunk = c

        # Cannot check if the URL is different: it may be the same since we
        # generate predictible chunk IDs.
        # self.assertNotEqual(chunk['real_url'], new_chunk['real_url'])
        # self.assertNotEqual(chunk['url'], new_chunk['url'])
        self.assertEqual(chunk['pos'], new_chunk['pos'])
        self.assertEqual(chunk['size'], new_chunk['size'])
        self.assertEqual(chunk['hash'], new_chunk['hash'])

        new_chunk_headers, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk['url'])
        chunk_data = b''.join(chunk_stream)
        new_chunk_data = b''.join(new_chunk_stream)
        self.assertEqual(chunk_data, new_chunk_data)
        fullpath = encode_fullpath(self.account, self.container, self.path,
                                   self.version, self.content_id)
        self.assertEqual(fullpath, new_chunk_headers['full_path'])
        del new_chunk_headers['full_path']
        # Since we generate predictible chunk IDs, they can be equal
        # self.assertNotEqual(chunk_headers['chunk_id'],
        #                     new_chunk_headers['chunk_id'])
        # We could compare the modification time of the chunks,
        # but unfortunately they have a 1s resolution...
        # self.assertNotEqual(chunk_headers['chunk_mtime'],
        #                     new_chunk_headers['chunk_mtime'])
        new_chunk_id = new_chunk['url'].split('/')[3]
        self.assertEqual(new_chunk_id, new_chunk_headers['chunk_id'])
        del chunk_headers['chunk_id']
        del new_chunk_headers['chunk_id']
        self.assertEqual(OIO_VERSION, new_chunk_headers['oio_version'])
        del chunk_headers['oio_version']
        del new_chunk_headers['oio_version']
        del chunk_headers['chunk_mtime']
        del new_chunk_headers['chunk_mtime']
        self.assertEqual(chunk_headers, new_chunk_headers)
Example #6
0
class TestDupContent(BaseTestCase):
    def setUp(self):
        super(TestDupContent, self).setUp()

        if len(self.conf['rawx']) < 3:
            self.skipTest("Not enough rawx. "
                          "Dup tests needs more than 2 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestDupContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestDupContent, self).tearDown()

    def _test_upload(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), stgpol)
        self.assertEqual(type(content), DupContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        if stgpol == "THREECOPIES":
            nb_copy = 3
        elif stgpol == "TWOCOPIES":
            nb_copy = 2
        elif stgpol == "SINGLE":
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                self.assertEqual(meta['chunk_hash'], chunk_hash)

    def test_twocopies_upload_0_byte(self):
        self._test_upload("TWOCOPIES", 0)

    def test_twocopies_upload_1_byte(self):
        self._test_upload("TWOCOPIES", 1)

    def test_twocopies_upload_chunksize_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size)

    def test_twocopies_upload_chunksize_plus_1_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size + 1)

    def test_single_upload_0_byte(self):
        self._test_upload("SINGLE", 0)

    def test_single_upload_chunksize_plus_1_bytes(self):
        self._test_upload("SINGLE", self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "TWOCOPIES")
        self.assertEqual(type(content), DupContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos=1):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos=1):
            self.assertRaises(NotFound,
                              self.blob_client.chunk_head, chunk.url)

    def _new_content(self, stgpol, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), stgpol)
        self.assertEqual(type(old_content), DupContent)

        old_content.upload(StringIO.StringIO(data))

        broken_chunks_info = {}
        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            meta, stream = self.blob_client.chunk_get(c.url)
            if pos not in broken_chunks_info:
                broken_chunks_info[pos] = {}
            broken_chunks_info[pos][idx] = {
                "url": c.url,
                "id": c.id,
                "hash": c.hash,
                "dl_meta": meta,
                "dl_hash": md5_stream(stream)
            }
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return (self.content_factory.get(
            self.container_id, old_content.content_id), broken_chunks_info)

    def _test_rebuild(self, stgpol, data_size, broken_pos_list,
                      full_rebuild_pos):
        data = random_data(data_size)
        content, broken_chunks_info = self._new_content(stgpol,
                                                        data, broken_pos_list)

        rebuild_pos, rebuild_idx = full_rebuild_pos
        rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
        content.rebuild_chunk(rebuild_chunk_info["id"])

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   content.content_id)
        self.assertEqual(type(rebuilt_content), DupContent)

        # find the rebuilt chunk
        for c in rebuilt_content.chunks.filter(pos=rebuild_pos):
            if len(content.chunks.filter(id=c.id)) > 0:
                # not the rebuilt chunk
                # if this chunk is broken, it must not have been rebuilt
                for b_c_i in broken_chunks_info[rebuild_pos].values():
                    if c.id == b_c_i["id"]:
                        with ExpectedException(NotFound):
                            _, _ = self.blob_client.chunk_get(c.url)
                continue
            meta, stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(stream),
                             rebuild_chunk_info["dl_hash"])
            self.assertEqual(c.hash, rebuild_chunk_info["hash"])
            self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"]))
            del meta["chunk_id"]
            del rebuild_chunk_info["dl_meta"]["chunk_id"]
            self.assertEqual(meta, rebuild_chunk_info["dl_meta"])

    def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self):
        self._test_rebuild("TWOCOPIES", 0, [(0, 0)], (0, 0))

    def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self):
        self._test_rebuild("TWOCOPIES", 1, [(0, 1)], (0, 1))

    def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self):
        if len(self.conf['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild("THREECOPIES", self.chunk_size,
                           [(0, 0), (0, 1)], (0, 1))

    def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self):
        if len(self.conf['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild("THREECOPIES", 2 * self.chunk_size,
                           [(1, 0), (1, 2)], (1, 2))

    def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self):
        with ExpectedException(UnrecoverableContent):
            self._test_rebuild("TWOCOPIES", 0, [(0, 0), (0, 1)], (0, 0))

    def _test_download(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content, _ = self._new_content(stgpol, data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_download_content_0_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 0, [])

    def test_twocopies_download_content_0_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 0, [(0, 0)])

    def test_twocopies_download_content_1_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 1, [])

    def test_twocopies_download_content_1_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 1, [(0, 0)])

    def test_twocopies_download_chunksize_bytes_without_broken_chunks(self):
        self._test_download("TWOCOPIES", self.chunk_size, [])

    def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content, _ = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_single_download_content_1_byte_without_broken_chunks(self):
        self._test_download("SINGLE", 1, [])

    def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_download("SINGLE", self.chunk_size * 2, [])
class TestContentFactory(BaseTestCase):
    def setUp(self):
        super(TestContentFactory, self).setUp()

        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = "TestContentFactory%f" % time.time()
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.stgpol = "SINGLE"
        self.stgpol_twocopies = "TWOCOPIES"
        self.stgpol_threecopies = "THREECOPIES"
        self.stgpol_ec = "EC"

    def tearDown(self):
        super(TestContentFactory, self).tearDown()

    def test_get_ec(self):
        meta = {
            "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash_method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime_type": "application/octet-stream",
            "name": "tox.ini",
            "policy": self.stgpol_ec,
            "version": "1450176946676289"
        }
        chunks = [{
            "url": "http://127.0.0.1:6012/A0A0",
            "pos": "0.0",
            "size": 512,
            "hash": "E7D4E4AD460971CA2E3141F2102308D4"
        }, {
            "url": "http://127.0.0.1:6010/A01",
            "pos": "0.1",
            "size": 146,
            "hash": "760AB5DA7C51A3654F1CA622687CD6C3"
        }, {
            "url": "http://127.0.0.1:6011/A00",
            "pos": "0.2",
            "size": 512,
            "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"
        }, {
            "url": "http://127.0.0.1:6013/A0A1",
            "pos": "0.3",
            "size": 512,
            "hash": "DA9D7F72AEEA5791565724424CE45C16"
        }]
        self.content_factory.container_client.content_locate = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), ECContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        # TODO test storage method
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])
        self.assertEqual(c.chunks[2].raw(), chunks[2])
        self.assertEqual(c.chunks[3].raw(), chunks[3])

    def test_get_plain(self):
        meta = {
            "chunk_method": "plain/nb_copy=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash_method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime_type": "application/octet-stream",
            "name": "tox.ini",
            "policy": self.stgpol_twocopies,
            "version": "1450176946676289"
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/A0",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }, {
            "url": "http://127.0.0.1:6011/A1",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }]
        self.content_factory.container_client.content_locate = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), PlainContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        # TODO test storage_method
        self.assertEqual(len(c.chunks), 2)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])

    def test_get_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.get,
                          self.container_id, "1234")

    def test_new_ec(self):
        meta = {
            "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2",
            "ctime": "1450341162",
            "deleted": "False",
            "hash": "",
            "hash_method": "md5",
            "id": "F4B1C8DD132705007DE8B43D0709DAA2",
            "length": "1000",
            "mime_type": "application/octet-stream",
            "name": "titi",
            "policy": self.stgpol_ec,
            "version": "1450341162332663"
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/0_p1",
            "pos": "0.3",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6011/0_p0",
            "pos": "0.2",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6016/0_1",
            "pos": "0.1",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6017/0_0",
            "pos": "0.0",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }]
        self.content_factory.container_client.content_prepare = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.new("xxx_container_id", "titi", 1000,
                                     self.stgpol_ec)
        self.assertEqual(type(c), ECContent)
        self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2")
        self.assertEqual(c.length, 1000)
        self.assertEqual(c.path, "titi")
        self.assertEqual(c.version, "1450341162332663")
        # TODO test storage_method
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[3])
        self.assertEqual(c.chunks[1].raw(), chunks[2])
        self.assertEqual(c.chunks[2].raw(), chunks[1])
        self.assertEqual(c.chunks[3].raw(), chunks[0])

    def _new_content(self, stgpol, data, path="titi"):
        old_content = self.content_factory.new(self.container_id, path,
                                               len(data), stgpol)
        old_content.create(BytesIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_change_policy(self, data_size, old_policy, new_policy):
        data = random_data(data_size)
        obj_type = {
            self.stgpol: PlainContent,
            self.stgpol_twocopies: PlainContent,
            self.stgpol_threecopies: PlainContent,
            self.stgpol_ec: ECContent
        }
        old_content = self._new_content(old_policy, data)
        self.assertEqual(type(old_content), obj_type[old_policy])

        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, new_policy)

        self.assertRaises(NotFound,
                          self.container_client.content_show,
                          self.account,
                          cid=old_content.container_id,
                          content=old_content.content_id)

        new_content = self.content_factory.get(self.container_id,
                                               changed_content.content_id)
        self.assertEqual(type(new_content), obj_type[new_policy])

        downloaded_data = "".join(new_content.fetch())

        self.assertEqual(downloaded_data, data)

    @ec
    def test_change_content_0_byte_policy_single_to_ec(self):
        self._test_change_policy(0, self.stgpol, self.stgpol_ec)

    @ec
    def test_change_content_0_byte_policy_ec_to_twocopies(self):
        self._test_change_policy(0, self.stgpol_ec, self.stgpol_twocopies)

    @ec
    def test_change_content_1_byte_policy_single_to_ec(self):
        self._test_change_policy(1, self.stgpol, self.stgpol_ec)

    @ec
    def test_change_content_chunksize_bytes_policy_twocopies_to_ec(self):
        self._test_change_policy(self.chunk_size, self.stgpol_twocopies,
                                 self.stgpol_ec)

    @ec
    def test_change_content_2xchunksize_bytes_policy_threecopies_to_ec(self):
        self._test_change_policy(self.chunk_size * 2, self.stgpol_threecopies,
                                 self.stgpol_ec)

    @ec
    def test_change_content_1_byte_policy_ec_to_threecopies(self):
        self._test_change_policy(1, self.stgpol_ec, self.stgpol_threecopies)

    @ec
    def test_change_content_chunksize_bytes_policy_ec_to_twocopies(self):
        self._test_change_policy(self.chunk_size, self.stgpol_ec,
                                 self.stgpol_twocopies)

    @ec
    def test_change_content_2xchunksize_bytes_policy_ec_to_single(self):
        self._test_change_policy(self.chunk_size * 2, self.stgpol_ec,
                                 self.stgpol)

    def test_change_content_0_byte_policy_twocopies_to_threecopies(self):
        self._test_change_policy(0, self.stgpol_twocopies,
                                 self.stgpol_threecopies)

    def test_change_content_chunksize_bytes_policy_single_to_twocopies(self):
        self._test_change_policy(self.chunk_size, self.stgpol,
                                 self.stgpol_twocopies)

    def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self):
        self._test_change_policy(self.chunk_size * 2, self.stgpol_threecopies,
                                 self.stgpol)

    def test_change_content_with_same_policy(self):
        data = random_data(10)
        old_content = self._new_content(self.stgpol_twocopies, data)
        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id,
            self.stgpol_twocopies)
        self.assertEqual(old_content.content_id, changed_content.content_id)

    def test_change_policy_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.change_policy,
                          self.container_id, "1234", self.stgpol)

    def test_change_policy_unknown_storage_policy(self):
        data = random_data(10)
        old_content = self._new_content(self.stgpol_twocopies, data)
        self.assertRaises(ClientException, self.content_factory.change_policy,
                          self.container_id, old_content.content_id, "UnKnOwN")

    def _test_move_chunk(self, policy):
        data = random_data(self.chunk_size)
        content = self._new_content(policy, data)

        chunk_id = content.chunks.filter(metapos=0)[0].id
        chunk_url = content.chunks.filter(metapos=0)[0].url
        chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url)
        chunk_hash = md5_stream(chunk_stream)
        new_chunk = content.move_chunk(chunk_id)

        content_updated = self.content_factory.get(self.container_id,
                                                   content.content_id)

        hosts = []
        for c in content_updated.chunks.filter(metapos=0):
            self.assertThat(hosts, Not(Contains(c.host)))
            self.assertNotEquals(c.id, chunk_id)
            hosts.append(c.host)

        new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk["url"])
        new_chunk_hash = md5_stream(new_chunk_stream)

        self.assertEqual(new_chunk_hash, chunk_hash)

        del chunk_meta["chunk_id"]
        del new_chunk_meta["chunk_id"]
        self.assertEqual(new_chunk_meta, chunk_meta)

    def test_single_move_chunk(self):
        self._test_move_chunk(self.stgpol)

    def test_twocopies_move_chunk(self):
        self._test_move_chunk(self.stgpol_twocopies)

    @ec
    def test_ec_move_chunk(self):
        self._test_move_chunk(self.stgpol_ec)

    def test_move_chunk_not_in_content(self):
        data = random_data(self.chunk_size)
        content = self._new_content(self.stgpol_twocopies, data)
        with ExpectedException(OrphanChunk):
            content.move_chunk("1234")

    def test_strange_paths(self):
        strange_paths = [
            "Annual report.txt",
            "foo+bar=foobar.txt",
            "100%_bug_free.c",
            "forward/slash/allowed",
            "I\\put\\backslashes\\and$dollar$signs$in$file$names",
            "Je suis tombé sur la tête, mais ça va bien.",
            "%s%f%u%d%%",
            "carriage\rreturn",
            "line\nfeed",
            "ta\tbu\tla\ttion",
            "controlchars",
        ]
        answers = dict()
        for cname in strange_paths:
            content = self._new_content(self.stgpol, "nobody cares", cname)
            answers[cname] = content
        _, listing = self.container_client.content_list(
            self.account, self.container_name)
        obj_set = {
            k["name"].encode("utf8", "ignore")
            for k in listing["objects"]
        }
        try:
            # Ensure the saved path is the one we gave the object
            for cname in answers:
                self.assertEqual(cname, answers[cname].path)
            # Ensure all objects appear in listing
            for cname in strange_paths:
                self.assertIn(cname, obj_set)
        finally:
            # Cleanup
            for cname in answers:
                try:
                    content.delete()
                except:
                    pass
Example #8
0
class TestRainContent(BaseTestCase):
    def setUp(self):
        super(TestRainContent, self).setUp()

        if len(self.conf['rawx']) < 12:
            self.skipTest("Not enough rawx. "
                          "Rain tests needs more than 12 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestRainContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestRainContent, self).tearDown()

    def _test_upload(self, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        k = 6
        m = 2
        self.assertEqual(type(content), RainContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], "RAIN")
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        nb_chunks_min = metachunk_nb * (1 + m)
        nb_chunks_max = metachunk_nb * (k + m)
        self.assertGreaterEqual(len(chunks), nb_chunks_min)
        self.assertLessEqual(len(chunks), nb_chunks_max)

        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)
            data_chunks_at_pos = chunks_at_pos.filter(is_parity=False)
            parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True)

            self.assertEquals(len(data_chunks_at_pos) >= 1, True)
            self.assertEquals(len(data_chunks_at_pos) <= k, True)
            self.assertEqual(len(parity_chunks_at_pos), m)

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk.hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], chunk.hash)

            data_begin = metapos * self.chunk_size
            data_end = metapos * self.chunk_size + self.chunk_size
            target_metachunk_hash = md5_data(data[data_begin:data_end])

            metachunk_hash = hashlib.md5()
            for chunk in data_chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                for d in stream:
                    metachunk_hash.update(d)
            self.assertEqual(metachunk_hash.hexdigest().upper(),
                             target_metachunk_hash)

    def test_upload_0_byte(self):
        self._test_upload(0)

    def test_upload_1_byte(self):
        self._test_upload(1)

    def test_upload_chunksize_bytes(self):
        self._test_upload(self.chunk_size)

    def test_upload_chunksize_plus_1_bytes(self):
        self._test_upload(self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        self.assertEqual(type(content), RainContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos="1.p0"):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos="1.p0"):
            self.assertRaises(NotFound,
                              self.blob_client.chunk_head, chunk.url)

    def _test_rebuild(self, data_size, broken_pos_list):
        data = os.urandom(data_size)
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), "RAIN")
        self.assertEqual(type(old_content), RainContent)

        old_content.upload(StringIO.StringIO(data))

        # get the new structure of the uploaded content
        uploaded_content = self.content_factory.get(self.container_id,
                                                    old_content.content_id)

        old_info = {}
        for pos in broken_pos_list:
            old_info[pos] = {}
            c = uploaded_content.chunks.filter(pos=pos)[0]
            old_info[pos]["url"] = c.url
            old_info[pos]["id"] = c.id
            old_info[pos]["hash"] = c.hash
            chunk_id_to_rebuild = c.id
            meta, stream = self.blob_client.chunk_get(c.url)
            old_info[pos]["dl_meta"] = meta
            old_info[pos]["dl_hash"] = md5_stream(stream)
            # delete the chunk
            self.blob_client.chunk_delete(c.url)

        # rebuild the broken chunks
        uploaded_content.rebuild_chunk(chunk_id_to_rebuild)

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   uploaded_content.content_id)
        self.assertEqual(type(rebuilt_content), RainContent)

        for pos in broken_pos_list:
            c = rebuilt_content.chunks.filter(pos=pos)[0]
            rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(rebuilt_meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(rebuilt_stream),
                             old_info[pos]["dl_hash"])
            self.assertEqual(c.hash, old_info[pos]["hash"])
            self.assertThat(c.url, NotEquals(old_info[pos]["url"]))
            del old_info[pos]["dl_meta"]["chunk_id"]
            del rebuilt_meta["chunk_id"]
            self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"])

    def test_content_0_byte_rebuild_pos_0_0(self):
        self._test_rebuild(0, ["0.0"])

    def test_content_0_byte_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(0, ["0.0", "0.p0"])

    def test_content_1_byte_rebuild_pos_0_0(self):
        self._test_rebuild(1, ["0.0"])

    def test_content_1_byte_rebuild_pos_0_p0(self):
        self._test_rebuild(1, ["0.p0"])

    def test_content_1_byte_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(1, ["0.0", "0.p0"])

    def test_content_chunksize_bytes_rebuild_pos_0_0(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0"])

    def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_1(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.1"])

    def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.p0"])

    def test_content_chunksize_bytes_rebuild_pos_0_p0_and_0_p1(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.p0", "0.p1"])

    def test_content_chunksize_bytes_rebuild_more_than_k_chunk(self):
        self.assertRaises(UnrecoverableContent, self._test_rebuild,
                          self.conf["chunk_size"], ["0.0", "0.1", "0.2"])

    def _new_content(self, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), "RAIN")
        self.assertEqual(type(old_content), RainContent)

        old_content.upload(StringIO.StringIO(data))

        for pos in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[0]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_orphan_chunk(self):
        content = self._new_content(random_data(10))

        self.assertRaises(OrphanChunk, content.rebuild_chunk, "uNkNoWnId")

    def test_rebuild_on_the_fly(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.0", "0.p0"])

        stream = content.rebuild_metachunk("0", on_the_fly=True)

        dl_data = "".join(stream)

        self.assertEqual(dl_data, data)

        del_chunk_0_0 = content.chunks.filter(pos="0.0")[0]
        del_chunk_0_p0 = content.chunks.filter(pos="0.p0")[0]

        self.assertRaises(NotFound,
                          self.blob_client.chunk_get, del_chunk_0_0.url)
        self.assertRaises(NotFound,
                          self.blob_client.chunk_get, del_chunk_0_p0.url)

    def _test_download(self, data_size, broken_pos_list):
        data = random_data(data_size)
        content = self._new_content(data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos in broken_pos_list:
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_download_content_0_byte_without_broken_chunks(self):
        self._test_download(0, [])

    def test_download_content_1_byte_without_broken_chunks(self):
        self._test_download(1, [])

    def test_download_content_chunksize_bytes_without_broken_chunks(self):
        self._test_download(self.conf["chunk_size"], [])

    def test_download_content_chunksize_plus_1_without_broken_chunks(self):
        self._test_download(self.conf["chunk_size"] + 1, [])

    def test_download_content_0_byte_with_broken_0_0_and_0_p0(self):
        self._test_download(0, ["0.0", "0.p0"])

    def test_download_content_1_byte_with_broken_0_0_and_0_p0(self):
        self._test_download(1, ["0.0", "0.p0"])

    def test_download_content_2xchunksize_with_broken_0_2_and_1_0(self):
        self._test_download(2 * self.conf["chunk_size"], ["0.2", "1.0"])

    def test_download_content_chunksize_bytes_with_3_broken_chunks(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.0", "0.1", "0.2"])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_download_interrupt_close(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.p0"])

        download_iter = content.download()

        dl_data = ""
        for buf in download_iter:
            dl_data += buf
        self.assertEqual(len(dl_data), len(data))
        self.assertEqual(dl_data, data)
        download_iter.close()
Example #9
0
class TestContentFactory(BaseTestCase):
    def setUp(self):
        super(TestContentFactory, self).setUp()
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = "TestContentFactory%f" % time.time()
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestContentFactory, self).tearDown()

    def test_extract_datasec(self):
        self.content_factory.ns_info = {
            "data_security": {
                "DUPONETWO": "DUP:distance=1|nb_copy=2",
                "RAIN": "RAIN:k=6|m=2|algo=liber8tion"
            },
            "storage_policy": {
                "RAIN": "NONE:RAIN:NONE",
                "SINGLE": "NONE:NONE:NONE",
                "TWOCOPIES": "NONE:DUPONETWO:NONE"
            }
        }

        ds_type, ds_args = self.content_factory._extract_datasec("RAIN")
        self.assertEqual(ds_type, "RAIN")
        self.assertEqual(ds_args, {"k": "6", "m": "2", "algo": "liber8tion"})

        ds_type, ds_args = self.content_factory._extract_datasec("SINGLE")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {"nb_copy": "1", "distance": "0"})

        ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {"nb_copy": "2", "distance": "1"})

        self.assertRaises(InconsistentContent,
                          self.content_factory._extract_datasec, "UnKnOwN")

    def test_get_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "RAIN",
            "version": "1450176946676289"
        }
        chunks = [{
            "url": "http://127.0.0.1:6012/A0A0",
            "pos": "0.p0",
            "size": 512,
            "hash": "E7D4E4AD460971CA2E3141F2102308D4"
        }, {
            "url": "http://127.0.0.1:6010/A01",
            "pos": "0.1",
            "size": 146,
            "hash": "760AB5DA7C51A3654F1CA622687CD6C3"
        }, {
            "url": "http://127.0.0.1:6011/A00",
            "pos": "0.0",
            "size": 512,
            "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"
        }, {
            "url": "http://127.0.0.1:6013/A0A1",
            "pos": "0.p1",
            "size": 512,
            "hash": "DA9D7F72AEEA5791565724424CE45C16"
        }]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[2])
        self.assertEqual(c.chunks[1].raw(), chunks[1])
        self.assertEqual(c.chunks[2].raw(), chunks[0])
        self.assertEqual(c.chunks[3].raw(), chunks[3])

    def test_get_dup(self):
        meta = {
            "chunk-method": "plain/bytes",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "TWOCOPIES",
            "version": "1450176946676289"
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/A0",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }, {
            "url": "http://127.0.0.1:6011/A1",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), DupContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.nb_copy, 2)
        self.assertEqual(c.distance, 1)
        self.assertEqual(len(c.chunks), 2)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])

    def test_get_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.get,
                          self.container_id, "1234")

    def test_new_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450341162",
            "deleted": "False",
            "hash": "",
            "hash-method": "md5",
            "id": "F4B1C8DD132705007DE8B43D0709DAA2",
            "length": "1000",
            "mime-type": "application/octet-stream",
            "name": "titi",
            "policy": "RAIN",
            "version": "1450341162332663"
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/0_p1",
            "pos": "0.p1",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6011/0_p0",
            "pos": "0.p0",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6016/0_1",
            "pos": "0.1",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6017/0_0",
            "pos": "0.0",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }]
        self.content_factory.container_client.content_prepare = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.new("xxx_container_id", "titi", 1000, "RAIN")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2")
        self.assertEqual(c.length, 1000)
        self.assertEqual(c.path, "titi")
        self.assertEqual(c.version, "1450341162332663")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[3])
        self.assertEqual(c.chunks[1].raw(), chunks[2])
        self.assertEqual(c.chunks[2].raw(), chunks[1])
        self.assertEqual(c.chunks[3].raw(), chunks[0])

    def _new_content(self, stgpol, data, path="titi"):
        old_content = self.content_factory.new(self.container_id, path,
                                               len(data), stgpol)
        old_content.upload(StringIO.StringIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_change_policy(self, data_size, old_policy, new_policy):
        if (old_policy == "RAIN" or new_policy == "RAIN") \
                and len(self.conf['rawx']) < 8:
            self.skipTest("RAIN: Need more than 8 rawx to run")

        data = random_data(data_size)
        obj_type = {
            "SINGLE": DupContent,
            "TWOCOPIES": DupContent,
            "THREECOPIES": DupContent,
            "RAIN": RainContent
        }
        old_content = self._new_content(old_policy, data)
        self.assertEqual(type(old_content), obj_type[old_policy])

        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, new_policy)

        self.assertRaises(NotFound,
                          self.container_client.content_show,
                          self.account,
                          cid=old_content.container_id,
                          content=old_content.content_id)

        new_content = self.content_factory.get(self.container_id,
                                               changed_content.content_id)
        self.assertEqual(type(new_content), obj_type[new_policy])

        downloaded_data = "".join(new_content.download())

        self.assertEqual(downloaded_data, data)

    def test_change_content_0_byte_policy_single_to_rain(self):
        self._test_change_policy(0, "SINGLE", "RAIN")

    def test_change_content_0_byte_policy_rain_to_twocopies(self):
        self._test_change_policy(0, "RAIN", "TWOCOPIES")

    def test_change_content_1_byte_policy_single_to_rain(self):
        self._test_change_policy(1, "SINGLE", "RAIN")

    def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self):
        self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN")

    def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN")

    def test_change_content_1_byte_policy_rain_to_threecopies(self):
        self._test_change_policy(1, "RAIN", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_rain_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE")

    def test_change_content_0_byte_policy_twocopies_to_threecopies(self):
        self._test_change_policy(0, "TWOCOPIES", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_single_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE")

    def test_change_content_with_same_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, "TWOCOPIES")
        self.assertEqual(old_content.content_id, changed_content.content_id)

    def test_change_policy_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.change_policy,
                          self.container_id, "1234", "SINGLE")

    def test_change_policy_unknown_storage_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        self.assertRaises(ClientException, self.content_factory.change_policy,
                          self.container_id, old_content.content_id, "UnKnOwN")

    def _test_move_chunk(self, policy):
        data = random_data(self.chunk_size)
        content = self._new_content(policy, data)

        chunk_id = content.chunks.filter(metapos=0)[0].id
        chunk_url = content.chunks.filter(metapos=0)[0].url
        chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url)
        chunk_hash = md5_stream(chunk_stream)
        new_chunk = content.move_chunk(chunk_id)

        content_updated = self.content_factory.get(self.container_id,
                                                   content.content_id)

        hosts = []
        for c in content_updated.chunks.filter(metapos=0):
            self.assertThat(hosts, Not(Contains(c.host)))
            self.assertNotEquals(c.id, chunk_id)
            hosts.append(c.host)

        new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk["url"])
        new_chunk_hash = md5_stream(new_chunk_stream)

        self.assertEqual(new_chunk_hash, chunk_hash)

        del chunk_meta["chunk_id"]
        del new_chunk_meta["chunk_id"]
        self.assertEqual(new_chunk_meta, chunk_meta)

    def test_single_move_chunk(self):
        self._test_move_chunk("SINGLE")

    def test_twocopies_move_chunk(self):
        self._test_move_chunk("TWOCOPIES")

    def test_rain_move_chunk(self):
        if len(self.conf['rawx']) < 9:
            self.skipTest("Need more than 8 rawx")
        self._test_move_chunk("RAIN")

    def test_move_chunk_not_in_content(self):
        data = random_data(self.chunk_size)
        content = self._new_content("TWOCOPIES", data)
        with ExpectedException(OrphanChunk):
            content.move_chunk("1234")

    def test_strange_paths(self):
        for cname in (
                "Annual report.txt",
                "foo+bar=foobar.txt",
                "100%_bug_free.c",
                "forward/slash/allowed",
                "I\\put\\backslashes\\and$dollar$signs$in$file$names",
                "Je suis tombé sur la tête, mais ça va bien.",
                "%s%f%u%d%%",
                "carriage\rreturn",
                "line\nfeed",
                "ta\tbu\tla\ttion",
                "controlchars",
        ):
            content = self._new_content("SINGLE", "nobody cares", cname)
            try:
                self.assertEqual(cname, content.path)
            finally:
                pass  # TODO: delete the content
class TestRebuilderCrawler(BaseTestCase):
    def setUp(self):
        super(TestRebuilderCrawler, self).setUp()

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']

        self.gridconf = {"namespace": self.namespace}
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()

        self.container_name = "TestRebuilderCrawler%d" % int(time.time())
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)

    def _push_content(self, content):
        for c in content.chunks:
            self.blob_client.chunk_put(c.url, c.get_create_xattr(), c.data)

        self.container_client.content_create(acct=content.account,
                                             ref=content.container_name,
                                             path=content.content_name,
                                             size=content.size,
                                             checksum=content.hash,
                                             content_id=content.content_id,
                                             stgpol=content.stgpol,
                                             data=content.get_create_meta2())

    def tearDown(self):
        super(TestRebuilderCrawler, self).tearDown()

    def test_rebuild_chunk(self):
        # push a new content
        content = TestContent(self.conf, self.account, self.container_name,
                              "mycontent", "TWOCOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        rebuilder.chunk_rebuild(content.container_id, content.content_id,
                                content.chunks[0].id)

        # check meta2 information
        _, res = self.container_client.content_show(acct=content.account,
                                                    ref=content.container_name,
                                                    content=content.content_id)

        new_chunk_info = None
        for c in res:
            if (c['url'] != content.chunks[0].url
                    and c['url'] != content.chunks[1].url):
                new_chunk_info = c

        new_chunk_id = new_chunk_info['url'].split('/')[-1]

        self.assertEqual(new_chunk_info['hash'], content.chunks[0].hash)
        self.assertEqual(new_chunk_info['pos'], content.chunks[0].pos)
        self.assertEqual(new_chunk_info['size'], content.chunks[0].size)

        # check chunk information
        meta, stream = self.blob_client.chunk_get(new_chunk_info['url'])

        self.assertEqual(meta['content_size'], str(content.chunks[0].size))
        self.assertEqual(meta['content_path'], content.content_name)
        self.assertEqual(meta['content_cid'], content.container_id)
        self.assertEqual(meta['content_id'], content.content_id)
        self.assertEqual(meta['chunk_id'], new_chunk_id)
        self.assertEqual(meta['chunk_pos'], content.chunks[0].pos)
        self.assertEqual(meta['content_version'], content.version)
        self.assertEqual(meta['chunk_hash'], content.chunks[0].hash)

        self.assertEqual(stream.next(), content.chunks[0].data)

        # check rtime flag in rdir
        rdir_client = RdirClient(self.gridconf)
        res = rdir_client.chunk_fetch(self.conf['rawx'][0]['addr'])
        key = (content.container_id, content.content_id, content.chunks[0].id)
        for i_container, i_content, i_chunk, i_value in res:
            if (i_container, i_content, i_chunk) == key:
                check_value = i_value

        self.assertIsNotNone(check_value.get('rtime'))

    @unittest.skipIf(
        len(get_config()['rawx']) != 3, "The number of rawx must be 3")
    def test_rebuild_no_spare(self):
        # push a new content
        content = TestContent(self.conf, self.account, self.container_name,
                              "mycontent", "THREECOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)
        content.add_chunk(data, pos='0', rawx=2)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        self.assertRaises(SpareChunkException, rebuilder.chunk_rebuild,
                          content.container_id, content.content_id,
                          content.chunks[0].id)

    def test_rebuild_upload_failed(self):
        # push a new content
        content = TestContent(self.conf, self.account, self.container_name,
                              "mycontent", "TWOCOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        # Force upload to raise an exception
        with patch('oio.content.content.BlobClient') as MockClass:
            instance = MockClass.return_value
            instance.chunk_copy.side_effect = Exception("xx")
            self.assertRaises(UnrecoverableContent, rebuilder.chunk_rebuild,
                              content.container_id, content.content_id,
                              content.chunks[0].id)

    def test_rebuild_nonexistent_chunk(self):
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        # try to rebuild an nonexistant chunk
        self.assertRaises(OrphanChunk, rebuilder.chunk_rebuild, 64 * '0',
                          32 * '0', 64 * '0')

    def test_rebuild_orphan_chunk(self):
        # push a new content
        content = TestContent(self.conf, self.account, self.container_name,
                              "mycontent", "TWOCOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        # try to rebuild an nonexistant chunk
        self.assertRaises(OrphanChunk, rebuilder.chunk_rebuild,
                          content.container_id, content.content_id, 64 * '0')

    def test_rebuild_with_no_copy(self):
        # push a new content
        content = TestContent(self.conf, self.account, self.container_name,
                              "mycontent", "SINGLE")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        # try to rebuild chunk without copy
        self.assertRaises(UnrecoverableContent, rebuilder.chunk_rebuild,
                          content.container_id, content.content_id,
                          content.chunks[0].id)
Example #11
0
class TestBlobRebuilder(BaseTestCase):
    def setUp(self):
        super(TestBlobRebuilder, self).setUp()
        self.container = random_str(16)
        self.cid = cid_from_name(self.account, self.container)
        self.path = random_str(16)
        self.api = ObjectStorageApi(self.ns)
        self.blob_client = BlobClient(self.conf)

        self.api.container_create(self.account, self.container)
        _, chunks = self.api.container.content_prepare(self.account,
                                                       self.container,
                                                       self.path, 1)
        if len(chunks) < 2:
            self.skipTest("need at least 2 chunks to run")

        services = self.conscience.all_services('rawx')
        self.rawx_volumes = dict()
        for rawx in services:
            tags = rawx['tags']
            service_id = tags.get('tag.service_id', None)
            if service_id is None:
                service_id = rawx['addr']
            volume = tags.get('tag.vol', None)
            self.rawx_volumes[service_id] = volume

        self.api.object_create(self.account,
                               self.container,
                               obj_name=self.path,
                               data="chunk")
        meta, self.chunks = self.api.object_locate(self.account,
                                                   self.container, self.path)
        self.version = meta['version']
        self.content_id = meta['id']

    def _chunk_path(self, chunk):
        url = chunk['url']
        volume_id = url.split('/', 3)[2]
        chunk_id = url.split('/', 3)[3]
        volume = self.rawx_volumes[volume_id]
        return volume + '/' + chunk_id[:3] + '/' + chunk_id

    def test_rebuild_old_chunk(self):
        if self._cls_conf['go_rawx']:
            self.skipTest('Rawx V2 read only new fullpath')
        for c in self.chunks:
            convert_to_old_chunk(self._chunk_path(c), self.account,
                                 self.container, self.path, self.version,
                                 self.content_id)

        chunk = random.choice(self.chunks)
        chunk_volume = chunk['url'].split('/')[2]
        chunk_id = chunk['url'].split('/')[3]
        chunk_headers, chunk_stream = self.blob_client.chunk_get(chunk['url'])
        os.remove(self._chunk_path(chunk))
        chunks_kept = list(self.chunks)
        chunks_kept.remove(chunk)

        conf = self.conf.copy()
        conf['allow_same_rawx'] = True
        rebuilder = BlobRebuilder(conf, None, chunk_volume)
        rebuilder_worker = rebuilder._create_worker()
        rebuilder_worker.chunk_rebuild(self.cid, self.content_id, chunk_id)

        _, new_chunks = self.api.object_locate(self.account, self.container,
                                               self.path)
        new_chunk = list(new_chunks)

        self.assertEqual(len(new_chunks), len(chunks_kept) + 1)
        url_kept = [c['url'] for c in chunks_kept]
        new_chunk = None
        for c in new_chunks:
            if c['url'] not in url_kept:
                self.assertIsNone(new_chunk)
                new_chunk = c

        self.assertNotEqual(chunk['real_url'], new_chunk['real_url'])
        self.assertNotEqual(chunk['url'], new_chunk['url'])
        self.assertEqual(chunk['pos'], new_chunk['pos'])
        self.assertEqual(chunk['size'], new_chunk['size'])
        self.assertEqual(chunk['hash'], new_chunk['hash'])

        new_chunk_headers, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk['url'])
        self.assertEqual(chunk_stream.read(), new_chunk_stream.read())
        fullpath = encode_fullpath(self.account, self.container, self.path,
                                   self.version, self.content_id)
        self.assertEqual(fullpath, new_chunk_headers['full_path'])
        del new_chunk_headers['full_path']
        self.assertNotEqual(chunk_headers['chunk_id'],
                            new_chunk_headers['chunk_id'])
        new_chunk_id = new_chunk['url'].split('/')[3]
        self.assertEqual(new_chunk_id, new_chunk_headers['chunk_id'])
        del chunk_headers['chunk_id']
        del new_chunk_headers['chunk_id']
        self.assertEqual(OIO_VERSION, new_chunk_headers['oio_version'])
        del chunk_headers['oio_version']
        del new_chunk_headers['oio_version']
        self.assertEqual(chunk_headers, new_chunk_headers)
Example #12
0
class TestDupContent(BaseTestCase):
    def setUp(self):
        super(TestDupContent, self).setUp()

        if len(self.conf['rawx']) < 3:
            self.skipTest("Not enough rawx. "
                          "Dup tests needs more than 2 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestDupContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestDupContent, self).tearDown()

    def _test_upload(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), stgpol)
        self.assertEqual(type(content), DupContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        if stgpol == "THREECOPIES":
            nb_copy = 3
        elif stgpol == "TWOCOPIES":
            nb_copy = 2
        elif stgpol == "SINGLE":
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                self.assertEqual(meta['chunk_hash'], chunk_hash)

    def test_twocopies_upload_0_byte(self):
        self._test_upload("TWOCOPIES", 0)

    def test_twocopies_upload_1_byte(self):
        self._test_upload("TWOCOPIES", 1)

    def test_twocopies_upload_chunksize_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size)

    def test_twocopies_upload_chunksize_plus_1_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size + 1)

    def test_single_upload_0_byte(self):
        self._test_upload("SINGLE", 0)

    def test_single_upload_chunksize_plus_1_bytes(self):
        self._test_upload("SINGLE", self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "TWOCOPIES")
        self.assertEqual(type(content), DupContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos=1):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos=1):
            self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url)

    def _new_content(self, stgpol, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), stgpol)
        self.assertEqual(type(old_content), DupContent)

        old_content.upload(StringIO.StringIO(data))

        broken_chunks_info = {}
        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            meta, stream = self.blob_client.chunk_get(c.url)
            if pos not in broken_chunks_info:
                broken_chunks_info[pos] = {}
            broken_chunks_info[pos][idx] = {
                "url": c.url,
                "id": c.id,
                "hash": c.hash,
                "dl_meta": meta,
                "dl_hash": md5_stream(stream)
            }
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return (self.content_factory.get(self.container_id,
                                         old_content.content_id),
                broken_chunks_info)

    def _test_rebuild(self, stgpol, data_size, broken_pos_list,
                      full_rebuild_pos):
        data = random_data(data_size)
        content, broken_chunks_info = self._new_content(
            stgpol, data, broken_pos_list)

        rebuild_pos, rebuild_idx = full_rebuild_pos
        rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
        content.rebuild_chunk(rebuild_chunk_info["id"])

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   content.content_id)
        self.assertEqual(type(rebuilt_content), DupContent)

        # find the rebuilt chunk
        for c in rebuilt_content.chunks.filter(pos=rebuild_pos):
            if len(content.chunks.filter(id=c.id)) > 0:
                # not the rebuilt chunk
                # if this chunk is broken, it must not have been rebuilt
                for b_c_i in broken_chunks_info[rebuild_pos].values():
                    if c.id == b_c_i["id"]:
                        with ExpectedException(NotFound):
                            _, _ = self.blob_client.chunk_get(c.url)
                continue
            meta, stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(stream), rebuild_chunk_info["dl_hash"])
            self.assertEqual(c.hash, rebuild_chunk_info["hash"])
            self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"]))
            del meta["chunk_id"]
            del rebuild_chunk_info["dl_meta"]["chunk_id"]
            self.assertEqual(meta, rebuild_chunk_info["dl_meta"])

    def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self):
        self._test_rebuild("TWOCOPIES", 0, [(0, 0)], (0, 0))

    def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self):
        self._test_rebuild("TWOCOPIES", 1, [(0, 1)], (0, 1))

    def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self):
        if len(self.conf['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild("THREECOPIES", self.chunk_size, [(0, 0), (0, 1)],
                           (0, 1))

    def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self):
        if len(self.conf['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild("THREECOPIES", 2 * self.chunk_size,
                           [(1, 0), (1, 2)], (1, 2))

    def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self):
        with ExpectedException(UnrecoverableContent):
            self._test_rebuild("TWOCOPIES", 0, [(0, 0), (0, 1)], (0, 0))

    def _test_download(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content, _ = self._new_content(stgpol, data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_download_content_0_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 0, [])

    def test_twocopies_download_content_0_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 0, [(0, 0)])

    def test_twocopies_download_content_1_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 1, [])

    def test_twocopies_download_content_1_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 1, [(0, 0)])

    def test_twocopies_download_chunksize_bytes_without_broken_chunks(self):
        self._test_download("TWOCOPIES", self.chunk_size, [])

    def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content, _ = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_single_download_content_1_byte_without_broken_chunks(self):
        self._test_download("SINGLE", 1, [])

    def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_download("SINGLE", self.chunk_size * 2, [])
Example #13
0
class TestRainContent(BaseTestCase):
    def setUp(self):
        super(TestRainContent, self).setUp()

        if len(self.conf['rawx']) < 12:
            self.skipTest("Not enough rawx. "
                          "Rain tests needs more than 12 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestRainContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestRainContent, self).tearDown()

    def _test_upload(self, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        k = 6
        m = 2
        self.assertEqual(type(content), RainContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], "RAIN")
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        nb_chunks_min = metachunk_nb * (k + m) - (k - 1)
        nb_chunks_max = metachunk_nb * (k + m)
        self.assertEquals(len(chunks) >= nb_chunks_min, True)
        self.assertEquals(len(chunks) <= nb_chunks_max, True)

        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)
            data_chunks_at_pos = chunks_at_pos.filter(is_parity=False)
            parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True)

            if metapos < metachunk_nb - 1:
                self.assertEqual(len(data_chunks_at_pos), k)
            else:
                self.assertEquals(len(data_chunks_at_pos) >= 1, True)
                self.assertEquals(len(data_chunks_at_pos) <= k, True)
            self.assertEqual(len(parity_chunks_at_pos), m)

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk.hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], chunk.hash)

            data_begin = metapos * self.chunk_size
            data_end = metapos * self.chunk_size + self.chunk_size
            target_metachunk_hash = md5_data(data[data_begin:data_end])

            metachunk_hash = hashlib.md5()
            for chunk in data_chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                for d in stream:
                    metachunk_hash.update(d)
            self.assertEqual(metachunk_hash.hexdigest().upper(),
                             target_metachunk_hash)

    def test_upload_0_byte(self):
        self._test_upload(0)

    def test_upload_1_byte(self):
        self._test_upload(1)

    def test_upload_chunksize_bytes(self):
        self._test_upload(self.chunk_size)

    def test_upload_chunksize_plus_1_bytes(self):
        self._test_upload(self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        self.assertEqual(type(content), RainContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos="1.p0"):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos="1.p0"):
            self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url)

    def _test_rebuild(self, data_size, broken_pos_list):
        data = os.urandom(data_size)
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), "RAIN")
        self.assertEqual(type(old_content), RainContent)

        old_content.upload(StringIO.StringIO(data))

        # get the new structure of the uploaded content
        uploaded_content = self.content_factory.get(self.container_id,
                                                    old_content.content_id)

        old_info = {}
        for pos in broken_pos_list:
            old_info[pos] = {}
            c = uploaded_content.chunks.filter(pos=pos)[0]
            old_info[pos]["url"] = c.url
            old_info[pos]["id"] = c.id
            old_info[pos]["hash"] = c.hash
            chunk_id_to_rebuild = c.id
            meta, stream = self.blob_client.chunk_get(c.url)
            old_info[pos]["dl_meta"] = meta
            old_info[pos]["dl_hash"] = md5_stream(stream)
            # delete the chunk
            self.blob_client.chunk_delete(c.url)

        # rebuild the broken chunks
        uploaded_content.rebuild_chunk(chunk_id_to_rebuild)

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   uploaded_content.content_id)
        self.assertEqual(type(rebuilt_content), RainContent)

        for pos in broken_pos_list:
            c = rebuilt_content.chunks.filter(pos=pos)[0]
            rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(rebuilt_meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(rebuilt_stream),
                             old_info[pos]["dl_hash"])
            self.assertEqual(c.hash, old_info[pos]["hash"])
            self.assertThat(c.url, NotEquals(old_info[pos]["url"]))
            del old_info[pos]["dl_meta"]["chunk_id"]
            del rebuilt_meta["chunk_id"]
            self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"])

    def test_content_0_byte_rebuild_pos_0_0(self):
        self._test_rebuild(0, ["0.0"])

    def test_content_0_byte_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(0, ["0.0", "0.p0"])

    def test_content_1_byte_rebuild_pos_0_0(self):
        self._test_rebuild(1, ["0.0"])

    def test_content_1_byte_rebuild_pos_0_p0(self):
        self._test_rebuild(1, ["0.p0"])

    def test_content_1_byte_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(1, ["0.0", "0.p0"])

    def test_content_chunksize_bytes_rebuild_pos_0_0(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0"])

    def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_1(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.1"])

    def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.p0"])

    def test_content_chunksize_bytes_rebuild_pos_0_p0_and_0_p1(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.p0", "0.p1"])

    def test_content_chunksize_bytes_rebuild_more_than_k_chunk(self):
        self.assertRaises(UnrecoverableContent, self._test_rebuild,
                          self.conf["chunk_size"], ["0.0", "0.1", "0.2"])

    def _new_content(self, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), "RAIN")
        self.assertEqual(type(old_content), RainContent)

        old_content.upload(StringIO.StringIO(data))

        for pos in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[0]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_orphan_chunk(self):
        content = self._new_content(random_data(10))

        self.assertRaises(OrphanChunk, content.rebuild_chunk, "uNkNoWnId")

    def test_rebuild_on_the_fly(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.0", "0.p0"])

        stream = content.rebuild_metachunk("0", on_the_fly=True)

        dl_data = "".join(stream)

        self.assertEqual(dl_data, data)

        del_chunk_0_0 = content.chunks.filter(pos="0.0")[0]
        del_chunk_0_p0 = content.chunks.filter(pos="0.p0")[0]

        self.assertRaises(NotFound, self.blob_client.chunk_get,
                          del_chunk_0_0.url)
        self.assertRaises(NotFound, self.blob_client.chunk_get,
                          del_chunk_0_p0.url)

    def _test_download(self, data_size, broken_pos_list):
        data = random_data(data_size)
        content = self._new_content(data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos in broken_pos_list:
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_download_content_0_byte_without_broken_chunks(self):
        self._test_download(0, [])

    def test_download_content_1_byte_without_broken_chunks(self):
        self._test_download(1, [])

    def test_download_content_chunksize_bytes_without_broken_chunks(self):
        self._test_download(self.conf["chunk_size"], [])

    def test_download_content_chunksize_plus_1_without_broken_chunks(self):
        self._test_download(self.conf["chunk_size"] + 1, [])

    def test_download_content_0_byte_with_broken_0_0_and_0_p0(self):
        self._test_download(0, ["0.0", "0.p0"])

    def test_download_content_1_byte_with_broken_0_0_and_0_p0(self):
        self._test_download(1, ["0.0", "0.p0"])

    def test_download_content_2xchunksize_with_broken_0_2_and_1_0(self):
        self._test_download(2 * self.conf["chunk_size"], ["0.2", "1.0"])

    def test_download_content_chunksize_bytes_with_3_broken_chunks(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.0", "0.1", "0.2"])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_download_interrupt_close(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.p0"])

        download_iter = content.download()

        self.assertEqual(download_iter.next(), data[0:READ_CHUNK_SIZE - 1])
        download_iter.close()
Example #14
0
class TestECContent(BaseTestCase):
    def setUp(self):
        super(TestECContent, self).setUp()

        if len(self.conf['services']['rawx']) < 12:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 12 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient(self.conf)
        self.container_name = "TestECContent%f" % time.time()
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.content = "%s-%s" % (self.__class__.__name__, random_str(4))
        self.stgpol = "EC"
        self.size = 1024 * 1024 + 320
        self.k = 6
        self.m = 3

    def tearDown(self):
        super(TestECContent, self).tearDown()

    def random_chunks(self, nb):
        pos = random.sample(xrange(self.k + self.m), nb)
        return ["0.%s" % i for i in pos]

    def _test_create(self, data_size):
        # generate random test data
        data = random_data(data_size)
        # using factory create new EC content
        content = self.content_factory.new(self.container_id, self.content,
                                           len(data), self.stgpol)
        # verify the factory gave us an ECContent
        self.assertEqual(type(content), ECContent)

        # perform the content creation
        content.create(BytesIO(data))

        meta, chunks = self.container_client.content_locate(
            cid=self.container_id, content=content.content_id)
        # verify metadata
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], self.stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \
            if len(data) != 0 else 1

        offset = 0
        # verify each metachunk
        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)
            if len(chunks_at_pos) < 1:
                break
            metachunk_size = chunks_at_pos[0].size
            metachunk_hash = md5_data(data[offset:offset + metachunk_size])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(meta['metachunk_size'], str(chunk.size))
                self.assertEqual(meta['metachunk_hash'], chunk.checksum)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], md5_stream(stream))
                full_path = encode_fullpath(self.account, self.container_name,
                                            self.content,
                                            meta['content_version'],
                                            meta['content_id'])
                self.assertEqual(meta['full_path'], full_path)
                self.assertEqual(meta['oio_version'], '4.2')
                self.assertEqual(metachunk_hash, chunk.checksum)

            offset += metachunk_size

    def test_create_0_byte(self):
        self._test_create(0)

    def test_create_1_byte(self):
        self._test_create(1)

    def test_create(self):
        self._test_create(DAT_LEGIT_SIZE)

    def test_create_6294503_bytes(self):
        self._test_create(6294503)

    def _test_rebuild(self, data_size, broken_pos_list):
        # generate test data
        data = os.urandom(data_size)
        # create initial content
        old_content = self.content_factory.new(self.container_id, self.content,
                                               len(data), self.stgpol)
        # verify factory work as intended
        self.assertEqual(type(old_content), ECContent)

        # perform initial content creation
        old_content.create(BytesIO(data))

        uploaded_content = self.content_factory.get(self.container_id,
                                                    old_content.content_id)

        # break the content
        old_info = {}
        for pos in broken_pos_list:
            old_info[pos] = {}
            c = uploaded_content.chunks.filter(pos=pos)[0]
            old_info[pos]["url"] = c.url
            old_info[pos]["id"] = c.id
            old_info[pos]["hash"] = c.checksum
            chunk_id_to_rebuild = c.id
            meta, stream = self.blob_client.chunk_get(c.url)
            old_info[pos]["dl_meta"] = meta
            old_info[pos]["dl_hash"] = md5_stream(stream)
            # delete the chunk
            self.blob_client.chunk_delete(c.url)

            # rebuild the broken chunks
            uploaded_content.rebuild_chunk(chunk_id_to_rebuild)

        rebuilt_content = self.content_factory.get(self.container_id,
                                                   uploaded_content.content_id)
        # sanity check
        self.assertEqual(type(rebuilt_content), ECContent)

        # verify rebuild result
        for pos in broken_pos_list:
            c = rebuilt_content.chunks.filter(pos=pos)[0]
            rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(rebuilt_meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(rebuilt_stream),
                             old_info[pos]["dl_hash"])
            self.assertEqual(c.checksum, old_info[pos]["hash"])
            self.assertNotEqual(c.url, old_info[pos]["url"])
            self.assertGreaterEqual(rebuilt_meta['chunk_mtime'],
                                    old_info[pos]['dl_meta']['chunk_mtime'])
            del old_info[pos]["dl_meta"]["chunk_mtime"]
            del rebuilt_meta["chunk_mtime"]
            del old_info[pos]["dl_meta"]["chunk_id"]
            del rebuilt_meta["chunk_id"]
            self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"])

    def test_content_0_byte_rebuild(self):
        self._test_rebuild(0, self.random_chunks(1))

    def test_content_0_byte_rebuild_advanced(self):
        self._test_rebuild(0, self.random_chunks(3))

    def test_content_1_byte_rebuild(self):
        self._test_rebuild(1, self.random_chunks(1))

    def test_content_1_byte_rebuild_advanced(self):
        self._test_rebuild(1, self.random_chunks(3))

    def test_content_rebuild(self):
        self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(1))

    def test_content_rebuild_advanced(self):
        self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(3))

    def test_content_rebuild_unrecoverable(self):
        self.assertRaises(UnrecoverableContent, self._test_rebuild,
                          DAT_LEGIT_SIZE, self.random_chunks(4))

    def _new_content(self, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, self.content,
                                               len(data), self.stgpol)
        self.assertEqual(type(old_content), ECContent)

        old_content.create(BytesIO(data))

        # break content
        for pos in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[0]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_orphan_chunk(self):
        content = self._new_content(random_data(10))
        self.assertRaises(OrphanChunk, content.rebuild_chunk, "invalid")

    def _test_fetch(self, data_size, broken_pos_list=None):
        broken_pos_list = broken_pos_list or []
        test_data = random_data(data_size)
        content = self._new_content(test_data, broken_pos_list)

        data = b''.join(content.fetch())

        self.assertEqual(len(data), len(test_data))
        self.assertEqual(md5_data(data), md5_data(test_data))

        # verify that chunks are broken
        for pos in broken_pos_list:
            chunk = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete,
                              chunk.url)

    def test_fetch_content_0_byte(self):
        self._test_fetch(0)

    def test_fetch_content_1_byte(self):
        self._test_fetch(1)

    def test_fetch_content(self):
        self._test_fetch(DAT_LEGIT_SIZE)

    def test_fetch_content_0_byte_broken(self):
        self._test_fetch(0, self.random_chunks(3))

    def test_fetch_content_1_byte_broken(self):
        self._test_fetch(1, self.random_chunks(3))

    def test_fetch_content_broken(self):
        self._test_fetch(DAT_LEGIT_SIZE, self.random_chunks(3))

    def test_fetch_content_unrecoverable(self):
        broken_chunks = self.random_chunks(4)
        self.assertRaises(OioException, self._test_fetch, DAT_LEGIT_SIZE,
                          broken_chunks)
class TestRebuilderCrawler(BaseTestCase):
    def setUp(self):
        super(TestRebuilderCrawler, self).setUp()

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']

        self.gridconf = {"namespace": self.namespace}
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()

        self.container_name = "TestRebuilderCrawler%d" % int(time.time())
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)

    def _push_content(self, content):
        for c in content.chunks:
            self.blob_client.chunk_put(c.url, c.get_create_xattr(), c.data)

        self.container_client.content_create(acct=content.account,
                                             ref=content.container_name,
                                             path=content.content_name,
                                             size=content.size,
                                             checksum=content.hash,
                                             content_id=content.content_id,
                                             stgpol=content.stgpol,
                                             data=content.get_create_meta2())

    def tearDown(self):
        super(TestRebuilderCrawler, self).tearDown()

    def test_rebuild_chunk(self):
        # push a new content
        content = TestContent(self.conf, self.account,
                              self.container_name, "mycontent", "TWOCOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        rebuilder.chunk_rebuild(content.container_id, content.content_id,
                                content.chunks[0].id)

        # check meta2 information
        _, res = self.container_client.content_show(acct=content.account,
                                                    ref=content.container_name,
                                                    content=content.content_id)

        new_chunk_info = None
        for c in res:
            if (c['url'] != content.chunks[0].url and
                    c['url'] != content.chunks[1].url):
                new_chunk_info = c

        new_chunk_id = new_chunk_info['url'].split('/')[-1]

        self.assertEqual(new_chunk_info['hash'], content.chunks[0].hash)
        self.assertEqual(new_chunk_info['pos'], content.chunks[0].pos)
        self.assertEqual(new_chunk_info['size'], content.chunks[0].size)

        # check chunk information
        meta, stream = self.blob_client.chunk_get(new_chunk_info['url'])

        self.assertEqual(meta['content_size'], str(content.chunks[0].size))
        self.assertEqual(meta['content_path'], content.content_name)
        self.assertEqual(meta['content_cid'], content.container_id)
        self.assertEqual(meta['content_id'], content.content_id)
        self.assertEqual(meta['chunk_id'], new_chunk_id)
        self.assertEqual(meta['chunk_pos'], content.chunks[0].pos)
        self.assertEqual(meta['content_version'], content.version)
        self.assertEqual(meta['chunk_hash'], content.chunks[0].hash)

        self.assertEqual(stream.next(), content.chunks[0].data)

        # check rtime flag in rdir
        rdir_client = RdirClient(self.gridconf)
        res = rdir_client.chunk_fetch(self.conf['rawx'][0]['addr'])
        key = (content.container_id, content.content_id, content.chunks[0].id)
        for i_container, i_content, i_chunk, i_value in res:
            if (i_container, i_content, i_chunk) == key:
                check_value = i_value

        self.assertIsNotNone(check_value.get('rtime'))

    @unittest.skipIf(len(get_config()['rawx']) != 3,
                     "The number of rawx must be 3")
    def test_rebuild_no_spare(self):
        # push a new content
        content = TestContent(self.conf, self.account,
                              self.container_name, "mycontent", "THREECOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)
        content.add_chunk(data, pos='0', rawx=2)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        self.assertRaises(SpareChunkException, rebuilder.chunk_rebuild,
                          content.container_id, content.content_id,
                          content.chunks[0].id)

    def test_rebuild_upload_failed(self):
        # push a new content
        content = TestContent(self.conf, self.account,
                              self.container_name, "mycontent", "TWOCOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        # Force upload to raise an exception
        with patch('oio.content.content.BlobClient') as MockClass:
            instance = MockClass.return_value
            instance.chunk_copy.side_effect = Exception("xx")
            self.assertRaises(UnrecoverableContent, rebuilder.chunk_rebuild,
                              content.container_id, content.content_id,
                              content.chunks[0].id)

    def test_rebuild_nonexistent_chunk(self):
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        # try to rebuild an nonexistant chunk
        self.assertRaises(OrphanChunk, rebuilder.chunk_rebuild,
                          64 * '0', 32 * '0', 64 * '0')

    def test_rebuild_orphan_chunk(self):
        # push a new content
        content = TestContent(self.conf, self.account,
                              self.container_name, "mycontent", "TWOCOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        # try to rebuild an nonexistant chunk
        self.assertRaises(OrphanChunk, rebuilder.chunk_rebuild,
                          content.container_id, content.content_id, 64 * '0')

    def test_rebuild_with_no_copy(self):
        # push a new content
        content = TestContent(self.conf, self.account,
                              self.container_name, "mycontent", "SINGLE")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        # try to rebuild chunk without copy
        self.assertRaises(UnrecoverableContent, rebuilder.chunk_rebuild,
                          content.container_id, content.content_id,
                          content.chunks[0].id)
Example #16
0
class TestPlainContent(BaseTestCase):
    def setUp(self):
        super(TestPlainContent, self).setUp()

        if len(self.conf['services']['rawx']) < 4:
            self.skipTest(
                "Plain tests needs more than 3 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient(self.conf)
        self.container_name = "TestPlainContent-%f" % time.time()
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.content = random_str(64)
        self.stgpol = "SINGLE"
        self.stgpol_twocopies = "TWOCOPIES"
        self.stgpol_threecopies = "THREECOPIES"

    def _test_create(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, self.content,
                                           len(data), stgpol)

        content.create(BytesIO(data))

        meta, chunks = self.container_client.content_locate(
            cid=self.container_id, content=content.content_id)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        chunks = ChunksHelper(chunks)

        # TODO NO NO NO
        if stgpol == self.stgpol_threecopies:
            nb_copy = 3
        elif stgpol == self.stgpol_twocopies:
            nb_copy = 2
        elif stgpol == self.stgpol:
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                # Check that chunk data matches chunk hash from xattr
                self.assertEqual(meta['chunk_hash'], chunk_hash)
                # Check that chunk data matches chunk hash from database
                self.assertEqual(chunk.checksum, chunk_hash)
                full_path = encode_fullpath(
                    self.account, self.container_name, self.content,
                    meta['content_version'], meta['content_id'])
                self.assertEqual(meta['full_path'], full_path)
                self.assertEqual(meta['oio_version'], '4.2')

    def test_twocopies_create_0_byte(self):
        self._test_create(self.stgpol_twocopies, 0)

    def test_twocopies_create_1_byte(self):
        self._test_create(self.stgpol_twocopies, 1)

    def test_twocopies_create_chunksize_bytes(self):
        self._test_create(self.stgpol_twocopies, self.chunk_size)

    def test_twocopies_create_chunksize_plus_1_bytes(self):
        self._test_create(self.stgpol_twocopies, self.chunk_size + 1)

    def test_twocopies_create_6294503_bytes(self):
        self._test_create(self.stgpol_twocopies, 6294503)

    def test_single_create_0_byte(self):
        self._test_create(self.stgpol, 0)

    def test_single_create_chunksize_plus_1_bytes(self):
        self._test_create(self.stgpol, self.chunk_size + 1)

    def _new_content(self, stgpol, data, broken_pos_list=[]):
        old_content = self.content_factory.new(
            self.container_id, self.content, len(data), stgpol)

        old_content.create(BytesIO(data))

        broken_chunks_info = {}
        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            meta, stream = self.blob_client.chunk_get(c.url)
            if pos not in broken_chunks_info:
                broken_chunks_info[pos] = {}
            broken_chunks_info[pos][idx] = {
                "url": c.url,
                "id": c.id,
                "hash": c.checksum,
                "dl_meta": meta,
                "dl_hash": md5_stream(stream)
            }
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return (self.content_factory.get(
            self.container_id, old_content.content_id), broken_chunks_info)

    def _rebuild_and_check(self, content, broken_chunks_info, full_rebuild_pos,
                           allow_frozen_container=False):
        rebuild_pos, rebuild_idx = full_rebuild_pos
        rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
        content.rebuild_chunk(rebuild_chunk_info["id"],
                              allow_frozen_container=allow_frozen_container)

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   content.content_id)

        # find the rebuilt chunk
        for c in rebuilt_content.chunks.filter(pos=rebuild_pos):
            if len(content.chunks.filter(id=c.id)) > 0:
                # not the rebuilt chunk
                # if this chunk is broken, it must not have been rebuilt
                for b_c_i in broken_chunks_info[rebuild_pos].values():
                    if c.id == b_c_i["id"]:
                        with ExpectedException(NotFound):
                            _, _ = self.blob_client.chunk_get(c.url)
                continue
            meta, stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(stream),
                             rebuild_chunk_info["dl_hash"])
            self.assertEqual(c.checksum, rebuild_chunk_info["hash"])
            self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"]))
            del meta["chunk_id"]
            del rebuild_chunk_info["dl_meta"]["chunk_id"]
            self.assertEqual(meta, rebuild_chunk_info["dl_meta"])

    def _test_rebuild(self, stgpol, data_size, broken_pos_list,
                      full_rebuild_pos):
        data = random_data(data_size)
        content, broken_chunks_info = self._new_content(
            stgpol, data, broken_pos_list)

        self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos)

    def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self):
        self._test_rebuild(self.stgpol_twocopies, 0, [(0, 0)], (0, 0))

    def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self):
        self._test_rebuild(self.stgpol_twocopies, 1, [(0, 1)], (0, 1))

    def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self):
        if len(self.conf['services']['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild(self.stgpol_threecopies, self.chunk_size,
                           [(0, 0), (0, 1)], (0, 1))

    def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self):
        self._test_rebuild(self.stgpol_threecopies, 2 * self.chunk_size,
                           [(1, 0), (1, 2)], (1, 2))

    def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self):
        with ExpectedException(UnrecoverableContent):
            self._test_rebuild(
                self.stgpol_twocopies, 0, [(0, 0), (0, 1)], (0, 0))

    def test_rebuild_chunk_in_frozen_container(self):
        data = random_data(self.chunk_size)
        content, broken_chunks_info = self._new_content(
            self.stgpol_twocopies, data, [(0, 0)])
        system = dict()
        system['sys.status'] = str(OIO_DB_FROZEN)
        self.container_client.container_set_properties(
            self.account, self.container_name, None, system=system)

        try:
            full_rebuild_pos = (0, 0)
            rebuild_pos, rebuild_idx = full_rebuild_pos
            rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
            self.assertRaises(ServiceBusy,
                              content.rebuild_chunk, rebuild_chunk_info["id"])
        finally:
            system['sys.status'] = str(OIO_DB_ENABLED)
            self.container_client.container_set_properties(
                self.account, self.container_name, None, system=system)

        self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos,
                                allow_frozen_container=True)

    def _test_fetch(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content, _ = self._new_content(stgpol, data, broken_pos_list)

        fetched_data = "".join(content.fetch())

        self.assertEqual(fetched_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_fetch_content_0_byte_without_broken_chunks(self):
        self._test_fetch(self.stgpol_twocopies, 0, [])

    def test_twocopies_fetch_content_0_byte_with_broken_0_0(self):
        self._test_fetch(self.stgpol_twocopies, 0, [(0, 0)])

    def test_twocopies_fetch_content_1_byte_without_broken_chunks(self):
        self._test_fetch(self.stgpol_twocopies, 1, [])

    def test_twocopies_fetch_content_1_byte_with_broken_0_0(self):
        self._test_fetch(self.stgpol_twocopies, 1, [(0, 0)])

    def test_twocopies_fetch_chunksize_bytes_without_broken_chunks(self):
        self._test_fetch(self.stgpol_twocopies, self.chunk_size, [])

    def test_twocopies_fetch_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_fetch(
            self.stgpol_twocopies, self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_fetch_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content, _ = self._new_content(
            self.stgpol_twocopies, data, [(0, 0), (0, 1)])
        gen = content.fetch()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_single_fetch_content_1_byte_without_broken_chunks(self):
        self._test_fetch(self.stgpol, 1, [])

    def test_single_fetch_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_fetch(self.stgpol, self.chunk_size * 2, [])
Example #17
0
class TestECContent(BaseTestCase):
    def setUp(self):
        super(TestECContent, self).setUp()

        if len(self.conf['services']['rawx']) < 12:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 12 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestECContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.content = random_str(64)
        self.stgpol = "EC"
        self.size = 1024*1024 + 320
        self.k = 6
        self.m = 3

    def tearDown(self):
        super(TestECContent, self).tearDown()

    def random_chunks(self, nb):
        l = random.sample(xrange(self.k + self.m), nb)
        return ["0.%s" % i for i in l]

    def _test_create(self, data_size):
        # generate random test data
        data = random_data(data_size)
        # using factory create new EC content
        content = self.content_factory.new(
            self.container_id, self.content, len(data), self.stgpol)
        # verify the factory gave us an ECContent
        self.assertEqual(type(content), ECContent)

        # perform the content creation
        content.create(StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        # verify metadata
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], self.stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \
            if len(data) != 0 else 1

        # verify each metachunk
        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(meta['metachunk_size'], str(chunk.size))
                self.assertEqual(meta['metachunk_hash'], chunk.checksum)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], md5_stream(stream))

    def test_create_0_byte(self):
        self._test_create(0)

    def test_create_1_byte(self):
        self._test_create(1)

    def test_create(self):
        self._test_create(DAT_LEGIT_SIZE)

    def _test_rebuild(self, data_size, broken_pos_list):
        # generate test data
        data = os.urandom(data_size)
        # create initial content
        old_content = self.content_factory.new(
            self.container_id, self.content, len(data), self.stgpol)
        # verify factory work as intended
        self.assertEqual(type(old_content), ECContent)

        # perform initial content creation
        old_content.create(StringIO(data))

        uploaded_content = self.content_factory.get(self.container_id,
                                                    old_content.content_id)

        # break the content
        old_info = {}
        for pos in broken_pos_list:
            old_info[pos] = {}
            c = uploaded_content.chunks.filter(pos=pos)[0]
            old_info[pos]["url"] = c.url
            old_info[pos]["id"] = c.id
            old_info[pos]["hash"] = c.checksum
            chunk_id_to_rebuild = c.id
            meta, stream = self.blob_client.chunk_get(c.url)
            old_info[pos]["dl_meta"] = meta
            old_info[pos]["dl_hash"] = md5_stream(stream)
            # delete the chunk
            self.blob_client.chunk_delete(c.url)

            # rebuild the broken chunks
            uploaded_content.rebuild_chunk(chunk_id_to_rebuild)

        rebuilt_content = self.content_factory.get(self.container_id,
                                                   uploaded_content.content_id)
        # sanity check
        self.assertEqual(type(rebuilt_content), ECContent)

        # verify rebuild result
        for pos in broken_pos_list:
            c = rebuilt_content.chunks.filter(pos=pos)[0]
            rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(rebuilt_meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(rebuilt_stream),
                             old_info[pos]["dl_hash"])
            self.assertEqual(c.checksum, old_info[pos]["hash"])
            self.assertNotEqual(c.url, old_info[pos]["url"])
            del old_info[pos]["dl_meta"]["chunk_id"]
            del rebuilt_meta["chunk_id"]
            self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"])

    def test_content_0_byte_rebuild(self):
        self._test_rebuild(0, self.random_chunks(1))

    def test_content_0_byte_rebuild_advanced(self):
        self._test_rebuild(0, self.random_chunks(3))

    def test_content_1_byte_rebuild(self):
        self._test_rebuild(1, self.random_chunks(1))

    def test_content_1_byte_rebuild_advanced(self):
        self._test_rebuild(1, self.random_chunks(3))

    def test_content_rebuild(self):
        self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(1))

    def test_content_rebuild_advanced(self):
        self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(3))

    def test_content_rebuild_unrecoverable(self):
        self.assertRaises(
            UnrecoverableContent, self._test_rebuild, DAT_LEGIT_SIZE,
            self.random_chunks(4))

    def _new_content(self, data, broken_pos_list=[]):
        old_content = self.content_factory.new(
            self.container_id, self.content, len(data), self.stgpol)
        self.assertEqual(type(old_content), ECContent)

        old_content.create(StringIO(data))

        # break content
        for pos in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[0]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_orphan_chunk(self):
        content = self._new_content(random_data(10))
        self.assertRaises(OrphanChunk, content.rebuild_chunk, "invalid")

    def _test_fetch(self, data_size, broken_pos_list=None):
        broken_pos_list = broken_pos_list or []
        test_data = random_data(data_size)
        content = self._new_content(test_data, broken_pos_list)

        data = "".join(content.fetch())

        self.assertEqual(len(data), len(test_data))
        self.assertEqual(md5_data(data), md5_data(test_data))

        # verify that chunks are broken
        for pos in broken_pos_list:
            chunk = content.chunks.filter(pos=pos)[0]
            self.assertRaises(
                NotFound, self.blob_client.chunk_delete, chunk.url)

    def test_fetch_content_0_byte(self):
        self._test_fetch(0)

    def test_fetch_content_1_byte(self):
        self._test_fetch(1)

    def test_fetch_content(self):
        self._test_fetch(DAT_LEGIT_SIZE)

    def test_fetch_content_0_byte_broken(self):
        self._test_fetch(0, self.random_chunks(3))

    def test_fetch_content_1_byte_broken(self):
        self._test_fetch(1, self.random_chunks(3))

    def test_fetch_content_broken(self):
        self._test_fetch(DAT_LEGIT_SIZE, self.random_chunks(3))

    def test_fetch_content_unrecoverable(self):
        broken_chunks = self.random_chunks(4)
        self.assertRaises(
            OioException, self._test_fetch, DAT_LEGIT_SIZE, broken_chunks)