Example #1
0
class TestPlainContent(BaseTestCase):
    def setUp(self):
        super(TestPlainContent, self).setUp()

        if len(self.conf['services']['rawx']) < 4:
            self.skipTest(
                "Plain tests needs more than 3 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient(self.conf)
        self.container_name = "TestPlainContent-%f" % time.time()
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.content = random_str(64)
        self.stgpol = "SINGLE"
        self.stgpol_twocopies = "TWOCOPIES"
        self.stgpol_threecopies = "THREECOPIES"

    def _test_create(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, self.content,
                                           len(data), stgpol)

        content.create(BytesIO(data))

        meta, chunks = self.container_client.content_locate(
            cid=self.container_id, content=content.content_id)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        chunks = ChunksHelper(chunks)

        # TODO NO NO NO
        if stgpol == self.stgpol_threecopies:
            nb_copy = 3
        elif stgpol == self.stgpol_twocopies:
            nb_copy = 2
        elif stgpol == self.stgpol:
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                # Check that chunk data matches chunk hash from xattr
                self.assertEqual(meta['chunk_hash'], chunk_hash)
                # Check that chunk data matches chunk hash from database
                self.assertEqual(chunk.checksum, chunk_hash)
                full_path = encode_fullpath(
                    self.account, self.container_name, self.content,
                    meta['content_version'], meta['content_id'])
                self.assertEqual(meta['full_path'], full_path)
                self.assertEqual(meta['oio_version'], '4.2')

    def test_twocopies_create_0_byte(self):
        self._test_create(self.stgpol_twocopies, 0)

    def test_twocopies_create_1_byte(self):
        self._test_create(self.stgpol_twocopies, 1)

    def test_twocopies_create_chunksize_bytes(self):
        self._test_create(self.stgpol_twocopies, self.chunk_size)

    def test_twocopies_create_chunksize_plus_1_bytes(self):
        self._test_create(self.stgpol_twocopies, self.chunk_size + 1)

    def test_twocopies_create_6294503_bytes(self):
        self._test_create(self.stgpol_twocopies, 6294503)

    def test_single_create_0_byte(self):
        self._test_create(self.stgpol, 0)

    def test_single_create_chunksize_plus_1_bytes(self):
        self._test_create(self.stgpol, self.chunk_size + 1)

    def _new_content(self, stgpol, data, broken_pos_list=[]):
        old_content = self.content_factory.new(
            self.container_id, self.content, len(data), stgpol)

        old_content.create(BytesIO(data))

        broken_chunks_info = {}
        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            meta, stream = self.blob_client.chunk_get(c.url)
            if pos not in broken_chunks_info:
                broken_chunks_info[pos] = {}
            broken_chunks_info[pos][idx] = {
                "url": c.url,
                "id": c.id,
                "hash": c.checksum,
                "dl_meta": meta,
                "dl_hash": md5_stream(stream)
            }
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return (self.content_factory.get(
            self.container_id, old_content.content_id), broken_chunks_info)

    def _rebuild_and_check(self, content, broken_chunks_info, full_rebuild_pos,
                           allow_frozen_container=False):
        rebuild_pos, rebuild_idx = full_rebuild_pos
        rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
        content.rebuild_chunk(rebuild_chunk_info["id"],
                              allow_frozen_container=allow_frozen_container)

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   content.content_id)

        # find the rebuilt chunk
        for c in rebuilt_content.chunks.filter(pos=rebuild_pos):
            if len(content.chunks.filter(id=c.id)) > 0:
                # not the rebuilt chunk
                # if this chunk is broken, it must not have been rebuilt
                for b_c_i in broken_chunks_info[rebuild_pos].values():
                    if c.id == b_c_i["id"]:
                        with ExpectedException(NotFound):
                            _, _ = self.blob_client.chunk_get(c.url)
                continue
            meta, stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(stream),
                             rebuild_chunk_info["dl_hash"])
            self.assertEqual(c.checksum, rebuild_chunk_info["hash"])
            self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"]))
            del meta["chunk_id"]
            del rebuild_chunk_info["dl_meta"]["chunk_id"]
            self.assertEqual(meta, rebuild_chunk_info["dl_meta"])

    def _test_rebuild(self, stgpol, data_size, broken_pos_list,
                      full_rebuild_pos):
        data = random_data(data_size)
        content, broken_chunks_info = self._new_content(
            stgpol, data, broken_pos_list)

        self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos)

    def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self):
        self._test_rebuild(self.stgpol_twocopies, 0, [(0, 0)], (0, 0))

    def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self):
        self._test_rebuild(self.stgpol_twocopies, 1, [(0, 1)], (0, 1))

    def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self):
        if len(self.conf['services']['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild(self.stgpol_threecopies, self.chunk_size,
                           [(0, 0), (0, 1)], (0, 1))

    def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self):
        self._test_rebuild(self.stgpol_threecopies, 2 * self.chunk_size,
                           [(1, 0), (1, 2)], (1, 2))

    def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self):
        with ExpectedException(UnrecoverableContent):
            self._test_rebuild(
                self.stgpol_twocopies, 0, [(0, 0), (0, 1)], (0, 0))

    def test_rebuild_chunk_in_frozen_container(self):
        data = random_data(self.chunk_size)
        content, broken_chunks_info = self._new_content(
            self.stgpol_twocopies, data, [(0, 0)])
        system = dict()
        system['sys.status'] = str(OIO_DB_FROZEN)
        self.container_client.container_set_properties(
            self.account, self.container_name, None, system=system)

        try:
            full_rebuild_pos = (0, 0)
            rebuild_pos, rebuild_idx = full_rebuild_pos
            rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
            self.assertRaises(ServiceBusy,
                              content.rebuild_chunk, rebuild_chunk_info["id"])
        finally:
            system['sys.status'] = str(OIO_DB_ENABLED)
            self.container_client.container_set_properties(
                self.account, self.container_name, None, system=system)

        self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos,
                                allow_frozen_container=True)

    def _test_fetch(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content, _ = self._new_content(stgpol, data, broken_pos_list)

        fetched_data = "".join(content.fetch())

        self.assertEqual(fetched_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_fetch_content_0_byte_without_broken_chunks(self):
        self._test_fetch(self.stgpol_twocopies, 0, [])

    def test_twocopies_fetch_content_0_byte_with_broken_0_0(self):
        self._test_fetch(self.stgpol_twocopies, 0, [(0, 0)])

    def test_twocopies_fetch_content_1_byte_without_broken_chunks(self):
        self._test_fetch(self.stgpol_twocopies, 1, [])

    def test_twocopies_fetch_content_1_byte_with_broken_0_0(self):
        self._test_fetch(self.stgpol_twocopies, 1, [(0, 0)])

    def test_twocopies_fetch_chunksize_bytes_without_broken_chunks(self):
        self._test_fetch(self.stgpol_twocopies, self.chunk_size, [])

    def test_twocopies_fetch_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_fetch(
            self.stgpol_twocopies, self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_fetch_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content, _ = self._new_content(
            self.stgpol_twocopies, data, [(0, 0), (0, 1)])
        gen = content.fetch()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_single_fetch_content_1_byte_without_broken_chunks(self):
        self._test_fetch(self.stgpol, 1, [])

    def test_single_fetch_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_fetch(self.stgpol, self.chunk_size * 2, [])
Example #2
0
class TestECContent(BaseTestCase):
    def setUp(self):
        super(TestECContent, self).setUp()

        if len(self.conf['services']['rawx']) < 12:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 12 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient(self.conf)
        self.container_name = "TestECContent%f" % time.time()
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.content = "%s-%s" % (self.__class__.__name__, random_str(4))
        self.stgpol = "EC"
        self.size = 1024 * 1024 + 320
        self.k = 6
        self.m = 3

    def tearDown(self):
        super(TestECContent, self).tearDown()

    def random_chunks(self, nb):
        pos = random.sample(xrange(self.k + self.m), nb)
        return ["0.%s" % i for i in pos]

    def _test_create(self, data_size):
        # generate random test data
        data = random_data(data_size)
        # using factory create new EC content
        content = self.content_factory.new(self.container_id, self.content,
                                           len(data), self.stgpol)
        # verify the factory gave us an ECContent
        self.assertEqual(type(content), ECContent)

        # perform the content creation
        content.create(BytesIO(data))

        meta, chunks = self.container_client.content_locate(
            cid=self.container_id, content=content.content_id)
        # verify metadata
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], self.stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \
            if len(data) != 0 else 1

        offset = 0
        # verify each metachunk
        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)
            if len(chunks_at_pos) < 1:
                break
            metachunk_size = chunks_at_pos[0].size
            metachunk_hash = md5_data(data[offset:offset + metachunk_size])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(meta['metachunk_size'], str(chunk.size))
                self.assertEqual(meta['metachunk_hash'], chunk.checksum)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], md5_stream(stream))
                full_path = encode_fullpath(self.account, self.container_name,
                                            self.content,
                                            meta['content_version'],
                                            meta['content_id'])
                self.assertEqual(meta['full_path'], full_path)
                self.assertEqual(meta['oio_version'], '4.2')
                self.assertEqual(metachunk_hash, chunk.checksum)

            offset += metachunk_size

    def test_create_0_byte(self):
        self._test_create(0)

    def test_create_1_byte(self):
        self._test_create(1)

    def test_create(self):
        self._test_create(DAT_LEGIT_SIZE)

    def test_create_6294503_bytes(self):
        self._test_create(6294503)

    def _test_rebuild(self, data_size, broken_pos_list):
        # generate test data
        data = os.urandom(data_size)
        # create initial content
        old_content = self.content_factory.new(self.container_id, self.content,
                                               len(data), self.stgpol)
        # verify factory work as intended
        self.assertEqual(type(old_content), ECContent)

        # perform initial content creation
        old_content.create(BytesIO(data))

        uploaded_content = self.content_factory.get(self.container_id,
                                                    old_content.content_id)

        # break the content
        old_info = {}
        for pos in broken_pos_list:
            old_info[pos] = {}
            c = uploaded_content.chunks.filter(pos=pos)[0]
            old_info[pos]["url"] = c.url
            old_info[pos]["id"] = c.id
            old_info[pos]["hash"] = c.checksum
            chunk_id_to_rebuild = c.id
            meta, stream = self.blob_client.chunk_get(c.url)
            old_info[pos]["dl_meta"] = meta
            old_info[pos]["dl_hash"] = md5_stream(stream)
            # delete the chunk
            self.blob_client.chunk_delete(c.url)

            # rebuild the broken chunks
            uploaded_content.rebuild_chunk(chunk_id_to_rebuild)

        rebuilt_content = self.content_factory.get(self.container_id,
                                                   uploaded_content.content_id)
        # sanity check
        self.assertEqual(type(rebuilt_content), ECContent)

        # verify rebuild result
        for pos in broken_pos_list:
            c = rebuilt_content.chunks.filter(pos=pos)[0]
            rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(rebuilt_meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(rebuilt_stream),
                             old_info[pos]["dl_hash"])
            self.assertEqual(c.checksum, old_info[pos]["hash"])
            self.assertNotEqual(c.url, old_info[pos]["url"])
            self.assertGreaterEqual(rebuilt_meta['chunk_mtime'],
                                    old_info[pos]['dl_meta']['chunk_mtime'])
            del old_info[pos]["dl_meta"]["chunk_mtime"]
            del rebuilt_meta["chunk_mtime"]
            del old_info[pos]["dl_meta"]["chunk_id"]
            del rebuilt_meta["chunk_id"]
            self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"])

    def test_content_0_byte_rebuild(self):
        self._test_rebuild(0, self.random_chunks(1))

    def test_content_0_byte_rebuild_advanced(self):
        self._test_rebuild(0, self.random_chunks(3))

    def test_content_1_byte_rebuild(self):
        self._test_rebuild(1, self.random_chunks(1))

    def test_content_1_byte_rebuild_advanced(self):
        self._test_rebuild(1, self.random_chunks(3))

    def test_content_rebuild(self):
        self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(1))

    def test_content_rebuild_advanced(self):
        self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(3))

    def test_content_rebuild_unrecoverable(self):
        self.assertRaises(UnrecoverableContent, self._test_rebuild,
                          DAT_LEGIT_SIZE, self.random_chunks(4))

    def _new_content(self, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, self.content,
                                               len(data), self.stgpol)
        self.assertEqual(type(old_content), ECContent)

        old_content.create(BytesIO(data))

        # break content
        for pos in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[0]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_orphan_chunk(self):
        content = self._new_content(random_data(10))
        self.assertRaises(OrphanChunk, content.rebuild_chunk, "invalid")

    def _test_fetch(self, data_size, broken_pos_list=None):
        broken_pos_list = broken_pos_list or []
        test_data = random_data(data_size)
        content = self._new_content(test_data, broken_pos_list)

        data = b''.join(content.fetch())

        self.assertEqual(len(data), len(test_data))
        self.assertEqual(md5_data(data), md5_data(test_data))

        # verify that chunks are broken
        for pos in broken_pos_list:
            chunk = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete,
                              chunk.url)

    def test_fetch_content_0_byte(self):
        self._test_fetch(0)

    def test_fetch_content_1_byte(self):
        self._test_fetch(1)

    def test_fetch_content(self):
        self._test_fetch(DAT_LEGIT_SIZE)

    def test_fetch_content_0_byte_broken(self):
        self._test_fetch(0, self.random_chunks(3))

    def test_fetch_content_1_byte_broken(self):
        self._test_fetch(1, self.random_chunks(3))

    def test_fetch_content_broken(self):
        self._test_fetch(DAT_LEGIT_SIZE, self.random_chunks(3))

    def test_fetch_content_unrecoverable(self):
        broken_chunks = self.random_chunks(4)
        self.assertRaises(OioException, self._test_fetch, DAT_LEGIT_SIZE,
                          broken_chunks)
Example #3
0
class TestBlobIndexer(BaseTestCase):
    def setUp(self):
        super(TestBlobIndexer, self).setUp()
        self.rdir_client = RdirClient(self.conf)
        self.blob_client = BlobClient(self.conf)
        _, self.rawx_path, rawx_addr, _ = \
            self.get_service_url('rawx')
        services = self.conscience.all_services('rawx')
        self.rawx_id = None
        for rawx in services:
            if rawx_addr == rawx['addr']:
                self.rawx_id = rawx['tags'].get('tag.service_id', None)
        if self.rawx_id is None:
            self.rawx_id = rawx_addr
        conf = self.conf.copy()
        conf['volume'] = self.rawx_path
        self.blob_indexer = BlobIndexer(conf)
        # clear rawx/rdir
        chunk_files = paths_gen(self.rawx_path)
        for chunk_file in chunk_files:
            os.remove(chunk_file)
        self.rdir_client.admin_clear(self.rawx_id, clear_all=True)

    def _put_chunk(self):
        account = random_str(16)
        container = random_str(16)
        cid = cid_from_name(account, container)
        content_path = random_str(16)
        content_version = 1234567890
        content_id = random_id(32)
        fullpath = encode_fullpath(account, container, content_path,
                                   content_version, content_id)
        chunk_id = random_chunk_id()
        data = random_buffer(string.printable, 100)
        meta = {
            'full_path': fullpath,
            'container_id': cid,
            'content_path': content_path,
            'version': content_version,
            'id': content_id,
            'chunk_method': 'ec/algo=liberasurecode_rs_vand,k=6,m=3',
            'policy': 'TESTPOLICY',
            'chunk_hash': md5(data).hexdigest().upper(),
            'oio_version': OIO_VERSION,
            'chunk_pos': 0,
            'metachunk_hash': md5().hexdigest(),
            'metachunk_size': 1024
        }
        self.blob_client.chunk_put('http://' + self.rawx_id + '/' + chunk_id,
                                   meta, data)
        sleep(1)  # ensure chunk event have been processed
        return account, container, cid, content_path, content_version, \
            content_id, chunk_id

    def _delete_chunk(self, chunk_id):
        self.blob_client.chunk_delete('http://' + self.rawx_id + '/' +
                                      chunk_id)
        sleep(1)  # ensure chunk event have been processed

    def _link_chunk(self, target_chunk_id):
        account = random_str(16)
        container = random_str(16)
        cid = cid_from_name(account, container)
        content_path = random_str(16)
        content_version = 1234567890
        content_id = random_id(32)
        fullpath = encode_fullpath(account, container, content_path,
                                   content_version, content_id)
        _, link = self.blob_client.chunk_link(
            'http://' + self.rawx_id + '/' + target_chunk_id, None, fullpath)
        chunk_id = link.split('/')[-1]
        sleep(1)  # ensure chunk event have been processed
        return account, container, cid, content_path, content_version, \
            content_id, chunk_id

    def _chunk_path(self, chunk_id):
        return self.rawx_path + '/' + chunk_id[:3] + '/' + chunk_id

    def test_blob_indexer(self):
        _, _, expected_cid, _, _, expected_content_id, expected_chunk_id = \
            self._put_chunk()

        chunks = list(self.rdir_client.chunk_fetch(self.rawx_id))
        self.assertEqual(1, len(chunks))
        cid, content_id, chunk_id, _ = chunks[0]
        self.assertEqual(expected_cid, cid)
        self.assertEqual(expected_content_id, content_id)
        self.assertEqual(expected_chunk_id, chunk_id)

        self.rdir_client.admin_clear(self.rawx_id, clear_all=True)
        self.blob_indexer.index_pass()
        self.assertEqual(1, self.blob_indexer.successes)
        self.assertEqual(0, self.blob_indexer.errors)

        chunks = self.rdir_client.chunk_fetch(self.rawx_id)
        chunks = list(chunks)
        self.assertEqual(1, len(chunks))
        cid, content_id, chunk_id, _ = chunks[0]
        self.assertEqual(expected_cid, cid)
        self.assertEqual(expected_content_id, content_id)
        self.assertEqual(expected_chunk_id, chunk_id)

        self._delete_chunk(expected_chunk_id)
        chunks = self.rdir_client.chunk_fetch(self.rawx_id)
        chunks = list(chunks)
        self.assertEqual(0, len(chunks))

    def test_blob_indexer_with_old_chunk(self):
        expected_account, expected_container, expected_cid, \
            expected_content_path, expected_content_version, \
            expected_content_id, expected_chunk_id = self._put_chunk()

        chunks = list(self.rdir_client.chunk_fetch(self.rawx_id))
        self.assertEqual(1, len(chunks))
        cid, content_id, chunk_id, _ = chunks[0]
        self.assertEqual(expected_cid, cid)
        self.assertEqual(expected_content_id, content_id)
        self.assertEqual(expected_chunk_id, chunk_id)

        convert_to_old_chunk(self._chunk_path(chunk_id), expected_account,
                             expected_container, expected_content_path,
                             expected_content_version, expected_content_id)

        self.rdir_client.admin_clear(self.rawx_id, clear_all=True)
        self.blob_indexer.index_pass()
        self.assertEqual(1, self.blob_indexer.successes)
        self.assertEqual(0, self.blob_indexer.errors)

        chunks = self.rdir_client.chunk_fetch(self.rawx_id)
        chunks = list(chunks)
        self.assertEqual(1, len(chunks))
        cid, content_id, chunk_id, _ = chunks[0]
        self.assertEqual(expected_cid, cid)
        self.assertEqual(expected_content_id, content_id)
        self.assertEqual(expected_chunk_id, chunk_id)

        self._delete_chunk(expected_chunk_id)
        chunks = self.rdir_client.chunk_fetch(self.rawx_id)
        chunks = list(chunks)
        self.assertEqual(0, len(chunks))

    def test_blob_indexer_with_linked_chunk(self):
        _, _, expected_cid, _, _, expected_content_id, expected_chunk_id = \
            self._put_chunk()

        chunks = self.rdir_client.chunk_fetch(self.rawx_id)
        chunks = list(chunks)
        self.assertEqual(1, len(chunks))
        cid, content_id, chunk_id, _ = chunks[0]
        self.assertEqual(expected_cid, cid)
        self.assertEqual(expected_content_id, content_id)
        self.assertEqual(expected_chunk_id, chunk_id)

        self.rdir_client.admin_clear(self.rawx_id, clear_all=True)
        self.blob_indexer.index_pass()
        self.assertEqual(1, self.blob_indexer.successes)
        self.assertEqual(0, self.blob_indexer.errors)

        chunks = self.rdir_client.chunk_fetch(self.rawx_id)
        chunks = list(chunks)
        self.assertEqual(1, len(chunks))
        cid, content_id, chunk_id, _ = chunks[0]
        self.assertEqual(expected_cid, cid)
        self.assertEqual(expected_content_id, content_id)
        self.assertEqual(expected_chunk_id, chunk_id)

        _, _, linked_cid, _, _, linked_content_id, linked_chunk_id = \
            self._link_chunk(expected_chunk_id)

        self.rdir_client.admin_clear(self.rawx_id, clear_all=True)
        self.blob_indexer.index_pass()
        self.assertEqual(2, self.blob_indexer.successes)
        self.assertEqual(0, self.blob_indexer.errors)

        chunks = self.rdir_client.chunk_fetch(self.rawx_id)
        chunks = list(chunks)
        self.assertEqual(2, len(chunks))
        self.assertNotEqual(chunks[0][2], chunks[1][2])
        for chunk in chunks:
            cid, content_id, chunk_id, _ = chunk
            if chunk_id == expected_chunk_id:
                self.assertEqual(expected_cid, cid)
                self.assertEqual(expected_content_id, content_id)
            else:
                self.assertEqual(linked_cid, cid)
                self.assertEqual(linked_content_id, content_id)
                self.assertEqual(linked_chunk_id, chunk_id)

        self._delete_chunk(expected_chunk_id)
        chunks = self.rdir_client.chunk_fetch(self.rawx_id)
        chunks = list(chunks)
        self.assertEqual(1, len(chunks))
        cid, content_id, chunk_id, _ = chunks[0]
        self.assertEqual(linked_cid, cid)
        self.assertEqual(linked_content_id, content_id)
        self.assertEqual(linked_chunk_id, chunk_id)

        self._delete_chunk(linked_chunk_id)
        chunks = self.rdir_client.chunk_fetch(self.rawx_id)
        chunks = list(chunks)
        self.assertEqual(0, len(chunks))
Example #4
0
class Content(object):
    def __init__(self, conf, container_id, metadata, chunks, storage_method):
        self.conf = conf
        self.container_id = container_id
        self.metadata = metadata
        self.chunks = ChunksHelper(chunks)
        self.storage_method = storage_method
        self.logger = get_logger(self.conf)
        self.cs_client = ConscienceClient(conf)
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(self.conf)
        self.content_id = self.metadata["id"]
        self.stgpol = self.metadata["policy"]
        self.path = self.metadata["name"]
        self.length = int(self.metadata["length"])
        self.version = self.metadata["version"]
        self.checksum = self.metadata["hash"]
        self.mime_type = self.metadata["mime_type"]
        self.chunk_method = self.metadata["chunk_method"]

    def _get_spare_chunk(self, chunks_notin, chunks_broken):
        spare_data = {
            "notin": ChunksHelper(chunks_notin, False).raw(),
            "broken": ChunksHelper(chunks_broken, False).raw()
        }
        try:
            spare_resp = self.container_client.content_spare(
                cid=self.container_id,
                content=self.content_id,
                data=spare_data,
                stgpol=self.stgpol)
        except ClientException as e:
            raise exc.SpareChunkException("No spare chunk (%s)" % e.message)

        url_list = []
        for c in spare_resp["chunks"]:
            url_list.append(c["id"])

        return url_list

    def _update_spare_chunk(self, current_chunk, new_url):
        old = [{
            'type': 'chunk',
            'id': current_chunk.url,
            'hash': current_chunk.checksum,
            'size': current_chunk.size,
            'pos': current_chunk.pos,
            'content': self.content_id
        }]
        new = [{
            'type': 'chunk',
            'id': new_url,
            'hash': current_chunk.checksum,
            'size': current_chunk.size,
            'pos': current_chunk.pos,
            'content': self.content_id
        }]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(cid=self.container_id,
                                                   data=update_data)

    def _create_object(self):
        self.container_client.content_create(cid=self.container_id,
                                             path=self.path,
                                             content_id=self.content_id,
                                             stgpol=self.stgpol,
                                             size=self.length,
                                             checksum=self.checksum,
                                             version=self.version,
                                             chunk_method=self.chunk_method,
                                             mime_type=self.mime_type,
                                             data=self.chunks.raw())

    def rebuild_chunk(self, chunk_id, allow_same_rawx=False):
        raise NotImplementedError()

    def create(self, stream):
        raise NotImplementedError()

    def fetch(self):
        raise NotImplementedError()

    def delete(self):
        self.container_client.content_delete(cid=self.container_id,
                                             path=self.path)

    def move_chunk(self, chunk_id):
        current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None:
            raise OrphanChunk("Chunk not found in content")

        other_chunks = self.chunks.filter(
            metapos=current_chunk.metapos).exclude(id=chunk_id).all()

        spare_urls = self._get_spare_chunk(other_chunks, [current_chunk])

        self.logger.debug("copy chunk from %s to %s", current_chunk.url,
                          spare_urls[0])
        self.blob_client.chunk_copy(current_chunk.url, spare_urls[0])

        self._update_spare_chunk(current_chunk, spare_urls[0])

        try:
            self.blob_client.chunk_delete(current_chunk.url)
        except:
            self.logger.warn("Failed to delete chunk %s" % current_chunk.url)

        current_chunk.url = spare_urls[0]

        return current_chunk.raw()
Example #5
0
class TestDupContent(BaseTestCase):
    def setUp(self):
        super(TestDupContent, self).setUp()

        if len(self.conf['rawx']) < 3:
            self.skipTest("Not enough rawx. "
                          "Dup tests needs more than 2 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestDupContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestDupContent, self).tearDown()

    def _test_upload(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), stgpol)
        self.assertEqual(type(content), DupContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        if stgpol == "THREECOPIES":
            nb_copy = 3
        elif stgpol == "TWOCOPIES":
            nb_copy = 2
        elif stgpol == "SINGLE":
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                self.assertEqual(meta['chunk_hash'], chunk_hash)

    def test_twocopies_upload_0_byte(self):
        self._test_upload("TWOCOPIES", 0)

    def test_twocopies_upload_1_byte(self):
        self._test_upload("TWOCOPIES", 1)

    def test_twocopies_upload_chunksize_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size)

    def test_twocopies_upload_chunksize_plus_1_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size + 1)

    def test_single_upload_0_byte(self):
        self._test_upload("SINGLE", 0)

    def test_single_upload_chunksize_plus_1_bytes(self):
        self._test_upload("SINGLE", self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "TWOCOPIES")
        self.assertEqual(type(content), DupContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos=1):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos=1):
            self.assertRaises(NotFound,
                              self.blob_client.chunk_head, chunk.url)

    def _new_content(self, stgpol, data, broken_pos_list):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), stgpol)
        self.assertEqual(type(old_content), DupContent)

        old_content.upload(StringIO.StringIO(data))

        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_download(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content = self._new_content(stgpol, data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_download_content_0_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 0, [])

    def test_twocopies_download_content_0_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 0, [(0, 0)])

    def test_twocopies_download_content_1_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 1, [])

    def test_twocopies_download_content_1_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 1, [(0, 0)])

    def test_twocopies_download_chunksize_bytes_without_broken_chunks(self):
        self._test_download("TWOCOPIES", self.chunk_size, [])

    def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_single_download_content_1_byte_without_broken_chunks(self):
        self._test_download("SINGLE", 1, [])

    def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_download("SINGLE", self.chunk_size * 2, [])
Example #6
0
class BlobMoverWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.last_usage_check = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.usage_target = int_value(conf.get('usage_target'), 0)
        self.usage_check_interval = int_value(conf.get('usage_check_interval'),
                                              3600)
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        self.max_bytes_per_second = int_value(conf.get('bytes_per_second'),
                                              10000000)
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(conf)

    def mover_pass(self):
        self.namespace, self.address = check_volume(self.volume)

        start_time = report_time = time.time()

        total_errors = 0
        mover_time = 0

        paths = paths_gen(self.volume)

        for path in paths:
            loop_time = time.time()

            now = time.time()
            if now - self.last_usage_check >= self.usage_check_interval:
                used, total = statfs(self.volume)
                usage = (float(used) / total) * 100
                if usage <= self.usage_target:
                    self.logger.info(
                        'current usage %.2f%%: target reached (%.2f%%)', usage,
                        self.usage_target)
                    self.last_usage_check = now
                    break

            self.safe_chunk_move(path)
            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(mover_time).2f'
                    '%(mover_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'mover_time': mover_time,
                        'mover_rate': mover_time / (now - start_time)
                    })
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            mover_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(mover_time).2f '
            '%(mover_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'mover_time': mover_time,
                'mover_rate': mover_time / elapsed
            })

    def safe_chunk_move(self, path):
        try:
            self.chunk_move(path)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while moving chunk %s: %s', path, e)
        self.passes += 1

    def load_chunk_metadata(self, path):
        with open(path) as f:
            return read_chunk_metadata(f)

    def chunk_move(self, path):
        meta = self.load_chunk_metadata(path)
        content_cid = meta['content_cid']
        content_path = meta['content_path']

        chunk_url = 'http://%s/%s' % \
            (self.address, meta['chunk_id'])

        try:
            _, data = self.container_client.content_show(cid=content_cid,
                                                         path=content_path)
        except exc.NotFound:
            raise exc.OrphanChunk('Content not found')
        current_chunk = None
        notin = []
        for c in data:
            if c['pos'] == meta['chunk_pos']:
                notin.append(c)
        for c in notin:
            if c['url'] == chunk_url:
                current_chunk = c
                notin.remove(c)
        if not current_chunk:
            raise exc.OrphanChunk('Chunk not found in content')
        spare_data = {'notin': notin, 'broken': [current_chunk], 'size': 0}
        spare_resp = self.container_client.content_spare(cid=content_cid,
                                                         path=content_path,
                                                         data=spare_data)

        new_chunk = spare_resp['chunks'][0]
        self.blob_client.chunk_copy(current_chunk['url'], new_chunk['id'])

        old = [{
            'type': 'chunk',
            'id': current_chunk['url'],
            'hash': meta['chunk_hash'],
            'size': int(meta['chunk_size'])
        }]
        new = [{
            'type': 'chunk',
            'id': new_chunk['id'],
            'hash': meta['chunk_hash'],
            'size': int(meta['chunk_size'])
        }]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(cid=content_cid,
                                                   data=update_data)

        self.blob_client.chunk_delete(current_chunk['url'])

        self.logger.info('moved chunk %s to %s', current_chunk['url'],
                         new_chunk['id'])
Example #7
0
class TestDupContent(BaseTestCase):
    def setUp(self):
        super(TestDupContent, self).setUp()

        if len(self.conf['rawx']) < 3:
            self.skipTest("Not enough rawx. "
                          "Dup tests needs more than 2 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestDupContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestDupContent, self).tearDown()

    def _test_upload(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), stgpol)
        self.assertEqual(type(content), DupContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        if stgpol == "THREECOPIES":
            nb_copy = 3
        elif stgpol == "TWOCOPIES":
            nb_copy = 2
        elif stgpol == "SINGLE":
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                self.assertEqual(meta['chunk_hash'], chunk_hash)

    def test_twocopies_upload_0_byte(self):
        self._test_upload("TWOCOPIES", 0)

    def test_twocopies_upload_1_byte(self):
        self._test_upload("TWOCOPIES", 1)

    def test_twocopies_upload_chunksize_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size)

    def test_twocopies_upload_chunksize_plus_1_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size + 1)

    def test_single_upload_0_byte(self):
        self._test_upload("SINGLE", 0)

    def test_single_upload_chunksize_plus_1_bytes(self):
        self._test_upload("SINGLE", self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "TWOCOPIES")
        self.assertEqual(type(content), DupContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos=1):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos=1):
            self.assertRaises(NotFound,
                              self.blob_client.chunk_head, chunk.url)

    def _new_content(self, stgpol, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), stgpol)
        self.assertEqual(type(old_content), DupContent)

        old_content.upload(StringIO.StringIO(data))

        broken_chunks_info = {}
        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            meta, stream = self.blob_client.chunk_get(c.url)
            if pos not in broken_chunks_info:
                broken_chunks_info[pos] = {}
            broken_chunks_info[pos][idx] = {
                "url": c.url,
                "id": c.id,
                "hash": c.hash,
                "dl_meta": meta,
                "dl_hash": md5_stream(stream)
            }
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return (self.content_factory.get(
            self.container_id, old_content.content_id), broken_chunks_info)

    def _test_rebuild(self, stgpol, data_size, broken_pos_list,
                      full_rebuild_pos):
        data = random_data(data_size)
        content, broken_chunks_info = self._new_content(stgpol,
                                                        data, broken_pos_list)

        rebuild_pos, rebuild_idx = full_rebuild_pos
        rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
        content.rebuild_chunk(rebuild_chunk_info["id"])

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   content.content_id)
        self.assertEqual(type(rebuilt_content), DupContent)

        # find the rebuilt chunk
        for c in rebuilt_content.chunks.filter(pos=rebuild_pos):
            if len(content.chunks.filter(id=c.id)) > 0:
                # not the rebuilt chunk
                # if this chunk is broken, it must not have been rebuilt
                for b_c_i in broken_chunks_info[rebuild_pos].values():
                    if c.id == b_c_i["id"]:
                        with ExpectedException(NotFound):
                            _, _ = self.blob_client.chunk_get(c.url)
                continue
            meta, stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(stream),
                             rebuild_chunk_info["dl_hash"])
            self.assertEqual(c.hash, rebuild_chunk_info["hash"])
            self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"]))
            del meta["chunk_id"]
            del rebuild_chunk_info["dl_meta"]["chunk_id"]
            self.assertEqual(meta, rebuild_chunk_info["dl_meta"])

    def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self):
        self._test_rebuild("TWOCOPIES", 0, [(0, 0)], (0, 0))

    def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self):
        self._test_rebuild("TWOCOPIES", 1, [(0, 1)], (0, 1))

    def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self):
        if len(self.conf['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild("THREECOPIES", self.chunk_size,
                           [(0, 0), (0, 1)], (0, 1))

    def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self):
        if len(self.conf['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild("THREECOPIES", 2 * self.chunk_size,
                           [(1, 0), (1, 2)], (1, 2))

    def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self):
        with ExpectedException(UnrecoverableContent):
            self._test_rebuild("TWOCOPIES", 0, [(0, 0), (0, 1)], (0, 0))

    def _test_download(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content, _ = self._new_content(stgpol, data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_download_content_0_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 0, [])

    def test_twocopies_download_content_0_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 0, [(0, 0)])

    def test_twocopies_download_content_1_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 1, [])

    def test_twocopies_download_content_1_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 1, [(0, 0)])

    def test_twocopies_download_chunksize_bytes_without_broken_chunks(self):
        self._test_download("TWOCOPIES", self.chunk_size, [])

    def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content, _ = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_single_download_content_1_byte_without_broken_chunks(self):
        self._test_download("SINGLE", 1, [])

    def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_download("SINGLE", self.chunk_size * 2, [])
Example #8
0
class Content(object):
    def __init__(self, conf, container_id, metadata, chunks, stgpol_args):
        self.conf = conf
        self.container_id = container_id
        self.metadata = metadata
        self.chunks = ChunksHelper(chunks)
        self.stgpol_args = stgpol_args
        self.logger = get_logger(self.conf)
        self.cs_client = ConscienceClient(conf)
        self.container_client = ContainerClient(self.conf)
        self.blob_client = BlobClient()
        self.session = requests.Session()
        self.content_id = self.metadata["id"]
        self.stgpol_name = self.metadata["policy"]
        self.path = self.metadata["name"]
        self.length = int(self.metadata["length"])
        self.version = self.metadata["version"]
        self.hash = self.metadata["hash"]
        self.mime_type = self.metadata["mime-type"]
        self.chunk_method = self.metadata["chunk-method"]

    def _meta2_get_spare_chunk(self, chunks_notin, chunks_broken):
        spare_data = {
            "notin": ChunksHelper(chunks_notin, False).raw(),
            "broken": ChunksHelper(chunks_broken, False).raw()
        }
        try:
            spare_resp = self.container_client.content_spare(
                cid=self.container_id, content=self.content_id,
                data=spare_data, stgpol=self.stgpol_name)
        except ClientException as e:
            raise exc.SpareChunkException("No spare chunk (%s)" % e.message)

        url_list = []
        for c in spare_resp["chunks"]:
            url_list.append(c["id"])

        return url_list

    def _meta2_update_spare_chunk(self, current_chunk, new_url):
        old = [{'type': 'chunk',
                'id': current_chunk.url,
                'hash': current_chunk.hash,
                'size': current_chunk.size,
                'pos': current_chunk.pos,
                'content': self.content_id}]
        new = [{'type': 'chunk',
                'id': new_url,
                'hash': current_chunk.hash,
                'size': current_chunk.size,
                'pos': current_chunk.pos,
                'content': self.content_id}]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(
            cid=self.container_id, data=update_data)

    def _meta2_create_object(self):
        self.container_client.content_create(cid=self.container_id,
                                             path=self.path,
                                             content_id=self.content_id,
                                             stgpol=self.stgpol_name,
                                             size=self.length,
                                             checksum=self.hash,
                                             version=self.version,
                                             chunk_method=self.chunk_method,
                                             mime_type=self.mime_type,
                                             data=self.chunks.raw())

    def rebuild_chunk(self, chunk_id):
        raise NotImplementedError()

    def upload(self, stream):
        try:
            self._upload(stream)
        except Exception as e:
            for chunk in self.chunks:
                try:
                    self.blob_client.chunk_delete(chunk.url)
                except:
                    pass
            raise e

    def _upload(self, stream):
        raise NotImplementedError()

    def download(self):
        raise NotImplementedError()
Example #9
0
class BlobMoverWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.last_usage_check = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.usage_target = int_value(
            conf.get('usage_target'), 0)
        self.usage_check_interval = int_value(
            conf.get('usage_check_interval'), 3600)
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        self.max_bytes_per_second = int_value(
            conf.get('bytes_per_second'), 10000000)
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(conf)

    def mover_pass(self):
        self.namespace, self.address = check_volume(self.volume)

        start_time = report_time = time.time()

        total_errors = 0
        mover_time = 0

        paths = paths_gen(self.volume)

        for path in paths:
            loop_time = time.time()

            now = time.time()
            if now - self.last_usage_check >= self.usage_check_interval:
                used, total = statfs(self.volume)
                usage = (float(used) / total) * 100
                if usage <= self.usage_target:
                    self.logger.info(
                        'current usage %.2f%%: target reached (%.2f%%)', usage,
                        self.usage_target)
                    self.last_usage_check = now
                    break

            self.safe_chunk_move(path)
            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(mover_time).2f'
                    '%(mover_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'mover_time': mover_time,
                        'mover_rate': mover_time / (now - start_time)
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            mover_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(mover_time).2f '
            '%(mover_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'mover_time': mover_time,
                'mover_rate': mover_time / elapsed
            }
        )

    def safe_chunk_move(self, path):
        try:
            self.chunk_move(path)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while moving chunk %s: %s', path, e)
        self.passes += 1

    def load_chunk_metadata(self, path):
        with open(path) as f:
            return read_chunk_metadata(f)

    def chunk_move(self, path):
        meta = self.load_chunk_metadata(path)
        content_cid = meta['content_cid']
        content_path = meta['content_path']

        chunk_url = 'http://%s/%s' % \
            (self.address, meta['chunk_id'])

        try:
            data = self.container_client.content_show(
                cid=content_cid, path=content_path)
        except exc.NotFound:
            raise exc.OrphanChunk('Content not found')
        current_chunk = None
        notin = []
        for c in data:
            if c['pos'] == meta['chunk_pos']:
                notin.append(c)
        for c in notin:
            if c['url'] == chunk_url:
                current_chunk = c
                notin.remove(c)
        if not current_chunk:
            raise exc.OrphanChunk('Chunk not found in content')
        spare_data = {'notin': notin, 'broken': [current_chunk], 'size': 0}
        spare_resp = self.container_client.content_spare(
            cid=content_cid, path=content_path, data=spare_data)

        new_chunk = spare_resp['chunks'][0]
        self.blob_client.chunk_copy(
            current_chunk['url'], new_chunk['id'])

        old = [{'type': 'chunk',
                'id': current_chunk['url'],
                'hash': meta['chunk_hash'],
                'size': int(meta['chunk_size'])}]
        new = [{'type': 'chunk',
                'id': new_chunk['id'],
                'hash': meta['chunk_hash'],
                'size': int(meta['chunk_size'])}]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(
            cid=content_cid, data=update_data)

        self.blob_client.chunk_delete(current_chunk['url'])

        self.logger.info(
            'moved chunk %s to %s', current_chunk['url'], new_chunk['id'])
Example #10
0
class TestDupContent(BaseTestCase):
    def setUp(self):
        super(TestDupContent, self).setUp()

        if len(self.conf['rawx']) < 3:
            self.skipTest("Not enough rawx. "
                          "Dup tests needs more than 2 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestDupContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestDupContent, self).tearDown()

    def _test_upload(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), stgpol)
        self.assertEqual(type(content), DupContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        if stgpol == "THREECOPIES":
            nb_copy = 3
        elif stgpol == "TWOCOPIES":
            nb_copy = 2
        elif stgpol == "SINGLE":
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                self.assertEqual(meta['chunk_hash'], chunk_hash)

    def test_twocopies_upload_0_byte(self):
        self._test_upload("TWOCOPIES", 0)

    def test_twocopies_upload_1_byte(self):
        self._test_upload("TWOCOPIES", 1)

    def test_twocopies_upload_chunksize_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size)

    def test_twocopies_upload_chunksize_plus_1_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size + 1)

    def test_single_upload_0_byte(self):
        self._test_upload("SINGLE", 0)

    def test_single_upload_chunksize_plus_1_bytes(self):
        self._test_upload("SINGLE", self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "TWOCOPIES")
        self.assertEqual(type(content), DupContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos=1):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos=1):
            self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url)

    def _new_content(self, stgpol, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), stgpol)
        self.assertEqual(type(old_content), DupContent)

        old_content.upload(StringIO.StringIO(data))

        broken_chunks_info = {}
        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            meta, stream = self.blob_client.chunk_get(c.url)
            if pos not in broken_chunks_info:
                broken_chunks_info[pos] = {}
            broken_chunks_info[pos][idx] = {
                "url": c.url,
                "id": c.id,
                "hash": c.hash,
                "dl_meta": meta,
                "dl_hash": md5_stream(stream)
            }
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return (self.content_factory.get(self.container_id,
                                         old_content.content_id),
                broken_chunks_info)

    def _test_rebuild(self, stgpol, data_size, broken_pos_list,
                      full_rebuild_pos):
        data = random_data(data_size)
        content, broken_chunks_info = self._new_content(
            stgpol, data, broken_pos_list)

        rebuild_pos, rebuild_idx = full_rebuild_pos
        rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
        content.rebuild_chunk(rebuild_chunk_info["id"])

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   content.content_id)
        self.assertEqual(type(rebuilt_content), DupContent)

        # find the rebuilt chunk
        for c in rebuilt_content.chunks.filter(pos=rebuild_pos):
            if len(content.chunks.filter(id=c.id)) > 0:
                # not the rebuilt chunk
                # if this chunk is broken, it must not have been rebuilt
                for b_c_i in broken_chunks_info[rebuild_pos].values():
                    if c.id == b_c_i["id"]:
                        with ExpectedException(NotFound):
                            _, _ = self.blob_client.chunk_get(c.url)
                continue
            meta, stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(stream), rebuild_chunk_info["dl_hash"])
            self.assertEqual(c.hash, rebuild_chunk_info["hash"])
            self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"]))
            del meta["chunk_id"]
            del rebuild_chunk_info["dl_meta"]["chunk_id"]
            self.assertEqual(meta, rebuild_chunk_info["dl_meta"])

    def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self):
        self._test_rebuild("TWOCOPIES", 0, [(0, 0)], (0, 0))

    def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self):
        self._test_rebuild("TWOCOPIES", 1, [(0, 1)], (0, 1))

    def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self):
        if len(self.conf['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild("THREECOPIES", self.chunk_size, [(0, 0), (0, 1)],
                           (0, 1))

    def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self):
        if len(self.conf['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild("THREECOPIES", 2 * self.chunk_size,
                           [(1, 0), (1, 2)], (1, 2))

    def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self):
        with ExpectedException(UnrecoverableContent):
            self._test_rebuild("TWOCOPIES", 0, [(0, 0), (0, 1)], (0, 0))

    def _test_download(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content, _ = self._new_content(stgpol, data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_download_content_0_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 0, [])

    def test_twocopies_download_content_0_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 0, [(0, 0)])

    def test_twocopies_download_content_1_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 1, [])

    def test_twocopies_download_content_1_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 1, [(0, 0)])

    def test_twocopies_download_chunksize_bytes_without_broken_chunks(self):
        self._test_download("TWOCOPIES", self.chunk_size, [])

    def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content, _ = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_single_download_content_1_byte_without_broken_chunks(self):
        self._test_download("SINGLE", 1, [])

    def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_download("SINGLE", self.chunk_size * 2, [])
Example #11
0
class Content(object):
    def __init__(self, conf, container_id, metadata, chunks, stgpol_args):
        self.conf = conf
        self.container_id = container_id
        self.metadata = metadata
        self.chunks = ChunksHelper(chunks)
        self.stgpol_args = stgpol_args
        self.logger = get_logger(self.conf)
        self.cs_client = ConscienceClient(conf)
        self.container_client = ContainerClient(self.conf)
        self.blob_client = BlobClient()
        self.session = requests.Session()
        self.content_id = self.metadata["id"]
        self.stgpol_name = self.metadata["policy"]
        self.path = self.metadata["name"]
        self.length = int(self.metadata["length"])
        self.version = self.metadata["version"]
        self.hash = self.metadata["hash"]
        self.mime_type = self.metadata["mime-type"]
        self.chunk_method = self.metadata["chunk-method"]

    def _meta2_get_spare_chunk(self, chunks_notin, chunks_broken):
        spare_data = {
            "notin": ChunksHelper(chunks_notin, False).raw(),
            "broken": ChunksHelper(chunks_broken, False).raw()
        }
        try:
            spare_resp = self.container_client.content_spare(
                cid=self.container_id, content=self.content_id,
                data=spare_data, stgpol=self.stgpol_name)
        except ClientException as e:
            raise exc.SpareChunkException("No spare chunk (%s)" % e.message)

        url_list = []
        for c in spare_resp["chunks"]:
            url_list.append(c["id"])

        return url_list

    def _meta2_update_spare_chunk(self, current_chunk, new_url):
        old = [{'type': 'chunk',
                'id': current_chunk.url,
                'hash': current_chunk.hash,
                'size': current_chunk.size,
                'pos': current_chunk.pos,
                'content': self.content_id}]
        new = [{'type': 'chunk',
                'id': new_url,
                'hash': current_chunk.hash,
                'size': current_chunk.size,
                'pos': current_chunk.pos,
                'content': self.content_id}]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(
            cid=self.container_id, data=update_data)

    def _meta2_create_object(self):
        self.container_client.content_create(cid=self.container_id,
                                             path=self.path,
                                             content_id=self.content_id,
                                             stgpol=self.stgpol_name,
                                             size=self.length,
                                             checksum=self.hash,
                                             version=self.version,
                                             chunk_method=self.chunk_method,
                                             mime_type=self.mime_type,
                                             data=self.chunks.raw())

    def rebuild_chunk(self, chunk_id):
        raise NotImplementedError()

    def upload(self, stream):
        try:
            self._upload(stream)
        except:
            # Keep the stack trace
            exc_info = sys.exc_info()
            for chunk in self.chunks:
                try:
                    self.blob_client.chunk_delete(chunk.url)
                except:
                    self.logger.warn("Failed to delete %s", chunk.url)
            # Raise with the original stack trace
            raise exc_info[0], exc_info[1], exc_info[2]

    def _upload(self, stream):
        raise NotImplementedError()

    def download(self):
        raise NotImplementedError()

    def delete(self):
        self.container_client.content_delete(cid=self.container_id,
                                             path=self.path)

    def move_chunk(self, chunk_id):
        current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None:
            raise OrphanChunk("Chunk not found in content")

        other_chunks = self.chunks.filter(
            metapos=current_chunk.metapos).exclude(id=chunk_id).all()

        spare_urls = self._meta2_get_spare_chunk(other_chunks,
                                                 [current_chunk])

        self.logger.debug("copy chunk from %s to %s",
                          current_chunk.url, spare_urls[0])
        self.blob_client.chunk_copy(current_chunk.url, spare_urls[0])

        self._meta2_update_spare_chunk(current_chunk, spare_urls[0])

        try:
            self.blob_client.chunk_delete(current_chunk.url)
        except:
            self.logger.warn("Failed to delete chunk %s" % current_chunk.url)

        current_chunk.url = spare_urls[0]

        return current_chunk.raw()
Example #12
0
class Content(object):
    def __init__(self, conf, container_id, metadata, chunks, stgpol_args):
        self.conf = conf
        self.container_id = container_id
        self.metadata = metadata
        self.chunks = ChunksHelper(chunks)
        self.stgpol_args = stgpol_args
        self.logger = get_logger(self.conf)
        self.cs_client = ConscienceClient(conf)
        self.container_client = ContainerClient(self.conf)
        self.blob_client = BlobClient()
        self.session = requests.Session()
        self.content_id = self.metadata["id"]
        self.stgpol_name = self.metadata["policy"]
        self.path = self.metadata["name"]
        self.length = int(self.metadata["length"])
        self.version = self.metadata["version"]
        self.hash = self.metadata["hash"]
        self.mime_type = self.metadata["mime-type"]
        self.chunk_method = self.metadata["chunk-method"]

    def _meta2_get_spare_chunk(self, chunks_notin, chunks_broken):
        spare_data = {
            "notin": ChunksHelper(chunks_notin, False).raw(),
            "broken": ChunksHelper(chunks_broken, False).raw()
        }
        try:
            spare_resp = self.container_client.content_spare(
                cid=self.container_id,
                content=self.content_id,
                data=spare_data,
                stgpol=self.stgpol_name)
        except ClientException as e:
            raise exc.SpareChunkException("No spare chunk (%s)" % e.message)

        url_list = []
        for c in spare_resp["chunks"]:
            url_list.append(c["id"])

        return url_list

    def _meta2_update_spare_chunk(self, current_chunk, new_url):
        old = [{
            'type': 'chunk',
            'id': current_chunk.url,
            'hash': current_chunk.hash,
            'size': current_chunk.size,
            'pos': current_chunk.pos,
            'content': self.content_id
        }]
        new = [{
            'type': 'chunk',
            'id': new_url,
            'hash': current_chunk.hash,
            'size': current_chunk.size,
            'pos': current_chunk.pos,
            'content': self.content_id
        }]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(cid=self.container_id,
                                                   data=update_data)

    def _meta2_create_object(self):
        self.container_client.content_create(cid=self.container_id,
                                             path=self.path,
                                             content_id=self.content_id,
                                             stgpol=self.stgpol_name,
                                             size=self.length,
                                             checksum=self.hash,
                                             version=self.version,
                                             chunk_method=self.chunk_method,
                                             mime_type=self.mime_type,
                                             data=self.chunks.raw())

    def rebuild_chunk(self, chunk_id):
        raise NotImplementedError()

    def upload(self, stream):
        try:
            self._upload(stream)
        except Exception as e:
            for chunk in self.chunks:
                try:
                    self.blob_client.chunk_delete(chunk.url)
                except:
                    pass
            raise e

    def _upload(self, stream):
        raise NotImplementedError()

    def download(self):
        raise NotImplementedError()
Example #13
0
class Content(object):
    def __init__(self,
                 conf,
                 container_id,
                 metadata,
                 chunks,
                 storage_method,
                 account,
                 container_name,
                 container_client=None):
        self.conf = conf
        self.container_id = container_id
        self.metadata = metadata
        self.chunks = ChunksHelper(chunks)
        self.storage_method = storage_method
        self.logger = get_logger(self.conf)
        self.blob_client = BlobClient()
        self.container_client = (container_client
                                 or ContainerClient(self.conf,
                                                    logger=self.logger))

        # FIXME: all these may be properties
        self.content_id = self.metadata["id"]
        self.path = self.metadata["name"]
        self.length = int(self.metadata["length"])
        self.version = self.metadata["version"]
        self.checksum = self.metadata["hash"]
        self.chunk_method = self.metadata["chunk_method"]
        self.account = account
        self.container_name = container_name
        if 'full_path' in self.metadata:
            self.full_path = metadata['full_path']
        else:
            self.full_path = [
                '{0}/{1}/{2}/{3}'.format(quote_plus(self.account),
                                         quote_plus(self.container_name),
                                         quote_plus(self.path), self.version)
            ]

    @property
    def mime_type(self):
        return self.metadata["mime_type"]

    @mime_type.setter
    def mime_type(self, value):
        self.metadata["mime_type"] = value

    @property
    def policy(self):
        return self.metadata["policy"]

    @policy.setter
    def policy(self, value):
        self.metadata["policy"] = value

    @property
    def properties(self):
        return self.metadata.get('properties')

    @properties.setter
    def properties(self, value):
        if not isinstance(value, dict):
            raise ValueError("'value' must be a dict")
        self.metadata['properties'] = value

    def _get_spare_chunk(self, chunks_notin, chunks_broken):
        spare_data = {
            "notin": ChunksHelper(chunks_notin, False).raw(),
            "broken": ChunksHelper(chunks_broken, False).raw()
        }
        try:
            spare_resp = self.container_client.content_spare(
                cid=self.container_id,
                path=self.content_id,
                data=spare_data,
                stgpol=self.policy)
        except ClientException as e:
            raise exc.SpareChunkException("No spare chunk (%s)" % e.message)

        url_list = []
        for c in spare_resp["chunks"]:
            url_list.append(c["id"])

        return url_list

    def _add_raw_chunk(self, current_chunk, url):
        data = {
            'type': 'chunk',
            'id': url,
            'hash': current_chunk.checksum,
            'size': current_chunk.size,
            'pos': current_chunk.pos,
            'content': self.content_id
        }

        self.container_client.container_raw_insert(data, cid=self.container_id)

    def _update_spare_chunk(self, current_chunk, new_url):
        old = {
            'type': 'chunk',
            'id': current_chunk.url,
            'hash': current_chunk.checksum,
            'size': current_chunk.size,
            'pos': current_chunk.pos,
            'content': self.content_id
        }
        new = {
            'type': 'chunk',
            'id': new_url,
            'hash': current_chunk.checksum,
            'size': current_chunk.size,
            'pos': current_chunk.pos,
            'content': self.content_id
        }
        self.container_client.container_raw_update(old,
                                                   new,
                                                   cid=self.container_id)

    def _generate_sysmeta(self):
        sysmeta = dict()
        sysmeta['id'] = self.content_id
        sysmeta['version'] = self.version
        sysmeta['policy'] = self.policy
        sysmeta['mime_type'] = self.mime_type
        sysmeta['chunk_method'] = self.chunk_method
        sysmeta['chunk_size'] = self.metadata['chunk_size']
        sysmeta['oio_version'] = OIO_VERSION
        sysmeta['full_path'] = self.full_path
        sysmeta['content_path'] = self.path
        sysmeta['container_id'] = self.container_id
        return sysmeta

    def _create_object(self, **kwargs):
        data = {'chunks': self.chunks.raw(), 'properties': self.properties}
        self.container_client.content_create(cid=self.container_id,
                                             path=self.path,
                                             content_id=self.content_id,
                                             stgpol=self.policy,
                                             size=self.length,
                                             checksum=self.checksum,
                                             version=self.version,
                                             chunk_method=self.chunk_method,
                                             mime_type=self.mime_type,
                                             data=data,
                                             **kwargs)

    def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None):
        raise NotImplementedError()

    def create(self, stream, **kwargs):
        raise NotImplementedError()

    def fetch(self):
        raise NotImplementedError()

    def delete(self, **kwargs):
        self.container_client.content_delete(cid=self.container_id,
                                             path=self.path,
                                             **kwargs)

    def move_chunk(self, chunk_id):
        current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None:
            raise OrphanChunk("Chunk not found in content")

        other_chunks = self.chunks.filter(
            metapos=current_chunk.metapos).exclude(id=chunk_id).all()

        spare_urls = self._get_spare_chunk(other_chunks, [current_chunk])

        self.logger.debug("copy chunk from %s to %s", current_chunk.url,
                          spare_urls[0])
        self.blob_client.chunk_copy(current_chunk.url, spare_urls[0])

        self._update_spare_chunk(current_chunk, spare_urls[0])

        try:
            self.blob_client.chunk_delete(current_chunk.url)
        except:
            self.logger.warn("Failed to delete chunk %s" % current_chunk.url)

        current_chunk.url = spare_urls[0]

        return current_chunk.raw()
Example #14
0
class TestRainContent(BaseTestCase):
    def setUp(self):
        super(TestRainContent, self).setUp()

        if len(self.conf['rawx']) < 12:
            self.skipTest("Not enough rawx. "
                          "Rain tests needs more than 12 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestRainContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestRainContent, self).tearDown()

    def _test_upload(self, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        k = 6
        m = 2
        self.assertEqual(type(content), RainContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], "RAIN")
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        nb_chunks_min = metachunk_nb * (k + m) - (k - 1)
        nb_chunks_max = metachunk_nb * (k + m)
        self.assertEquals(len(chunks) >= nb_chunks_min, True)
        self.assertEquals(len(chunks) <= nb_chunks_max, True)

        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)
            data_chunks_at_pos = chunks_at_pos.filter(is_parity=False)
            parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True)

            if metapos < metachunk_nb - 1:
                self.assertEqual(len(data_chunks_at_pos), k)
            else:
                self.assertEquals(len(data_chunks_at_pos) >= 1, True)
                self.assertEquals(len(data_chunks_at_pos) <= k, True)
            self.assertEqual(len(parity_chunks_at_pos), m)

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk.hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], chunk.hash)

            data_begin = metapos * self.chunk_size
            data_end = metapos * self.chunk_size + self.chunk_size
            target_metachunk_hash = md5_data(data[data_begin:data_end])

            metachunk_hash = hashlib.md5()
            for chunk in data_chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                for d in stream:
                    metachunk_hash.update(d)
            self.assertEqual(metachunk_hash.hexdigest().upper(),
                             target_metachunk_hash)

    def test_upload_0_byte(self):
        self._test_upload(0)

    def test_upload_1_byte(self):
        self._test_upload(1)

    def test_upload_chunksize_bytes(self):
        self._test_upload(self.chunk_size)

    def test_upload_chunksize_plus_1_bytes(self):
        self._test_upload(self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        self.assertEqual(type(content), RainContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos="1.p0"):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos="1.p0"):
            self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url)

    def _test_rebuild(self, data_size, broken_pos_list):
        data = os.urandom(data_size)
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), "RAIN")
        self.assertEqual(type(old_content), RainContent)

        old_content.upload(StringIO.StringIO(data))

        # get the new structure of the uploaded content
        uploaded_content = self.content_factory.get(self.container_id,
                                                    old_content.content_id)

        old_info = {}
        for pos in broken_pos_list:
            old_info[pos] = {}
            c = uploaded_content.chunks.filter(pos=pos)[0]
            old_info[pos]["url"] = c.url
            old_info[pos]["id"] = c.id
            old_info[pos]["hash"] = c.hash
            chunk_id_to_rebuild = c.id
            meta, stream = self.blob_client.chunk_get(c.url)
            old_info[pos]["dl_meta"] = meta
            old_info[pos]["dl_hash"] = md5_stream(stream)
            # delete the chunk
            self.blob_client.chunk_delete(c.url)

        # rebuild the broken chunks
        uploaded_content.rebuild_chunk(chunk_id_to_rebuild)

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   uploaded_content.content_id)
        self.assertEqual(type(rebuilt_content), RainContent)

        for pos in broken_pos_list:
            c = rebuilt_content.chunks.filter(pos=pos)[0]
            rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(rebuilt_meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(rebuilt_stream),
                             old_info[pos]["dl_hash"])
            self.assertEqual(c.hash, old_info[pos]["hash"])
            self.assertThat(c.url, NotEquals(old_info[pos]["url"]))
            del old_info[pos]["dl_meta"]["chunk_id"]
            del rebuilt_meta["chunk_id"]
            self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"])

    def test_content_0_byte_rebuild_pos_0_0(self):
        self._test_rebuild(0, ["0.0"])

    def test_content_0_byte_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(0, ["0.0", "0.p0"])

    def test_content_1_byte_rebuild_pos_0_0(self):
        self._test_rebuild(1, ["0.0"])

    def test_content_1_byte_rebuild_pos_0_p0(self):
        self._test_rebuild(1, ["0.p0"])

    def test_content_1_byte_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(1, ["0.0", "0.p0"])

    def test_content_chunksize_bytes_rebuild_pos_0_0(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0"])

    def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_1(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.1"])

    def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.p0"])

    def test_content_chunksize_bytes_rebuild_pos_0_p0_and_0_p1(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.p0", "0.p1"])

    def test_content_chunksize_bytes_rebuild_more_than_k_chunk(self):
        self.assertRaises(UnrecoverableContent, self._test_rebuild,
                          self.conf["chunk_size"], ["0.0", "0.1", "0.2"])

    def _new_content(self, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), "RAIN")
        self.assertEqual(type(old_content), RainContent)

        old_content.upload(StringIO.StringIO(data))

        for pos in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[0]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_orphan_chunk(self):
        content = self._new_content(random_data(10))

        self.assertRaises(OrphanChunk, content.rebuild_chunk, "uNkNoWnId")

    def test_rebuild_on_the_fly(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.0", "0.p0"])

        stream = content.rebuild_metachunk("0", on_the_fly=True)

        dl_data = "".join(stream)

        self.assertEqual(dl_data, data)

        del_chunk_0_0 = content.chunks.filter(pos="0.0")[0]
        del_chunk_0_p0 = content.chunks.filter(pos="0.p0")[0]

        self.assertRaises(NotFound, self.blob_client.chunk_get,
                          del_chunk_0_0.url)
        self.assertRaises(NotFound, self.blob_client.chunk_get,
                          del_chunk_0_p0.url)

    def _test_download(self, data_size, broken_pos_list):
        data = random_data(data_size)
        content = self._new_content(data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos in broken_pos_list:
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_download_content_0_byte_without_broken_chunks(self):
        self._test_download(0, [])

    def test_download_content_1_byte_without_broken_chunks(self):
        self._test_download(1, [])

    def test_download_content_chunksize_bytes_without_broken_chunks(self):
        self._test_download(self.conf["chunk_size"], [])

    def test_download_content_chunksize_plus_1_without_broken_chunks(self):
        self._test_download(self.conf["chunk_size"] + 1, [])

    def test_download_content_0_byte_with_broken_0_0_and_0_p0(self):
        self._test_download(0, ["0.0", "0.p0"])

    def test_download_content_1_byte_with_broken_0_0_and_0_p0(self):
        self._test_download(1, ["0.0", "0.p0"])

    def test_download_content_2xchunksize_with_broken_0_2_and_1_0(self):
        self._test_download(2 * self.conf["chunk_size"], ["0.2", "1.0"])

    def test_download_content_chunksize_bytes_with_3_broken_chunks(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.0", "0.1", "0.2"])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_download_interrupt_close(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.p0"])

        download_iter = content.download()

        self.assertEqual(download_iter.next(), data[0:READ_CHUNK_SIZE - 1])
        download_iter.close()
Example #15
0
class TestRainContent(BaseTestCase):
    def setUp(self):
        super(TestRainContent, self).setUp()

        if len(self.conf['rawx']) < 12:
            self.skipTest("Not enough rawx. "
                          "Rain tests needs more than 12 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestRainContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestRainContent, self).tearDown()

    def _test_upload(self, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        k = 6
        m = 2
        self.assertEqual(type(content), RainContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], "RAIN")
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        nb_chunks_min = metachunk_nb * (1 + m)
        nb_chunks_max = metachunk_nb * (k + m)
        self.assertGreaterEqual(len(chunks), nb_chunks_min)
        self.assertLessEqual(len(chunks), nb_chunks_max)

        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)
            data_chunks_at_pos = chunks_at_pos.filter(is_parity=False)
            parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True)

            self.assertEquals(len(data_chunks_at_pos) >= 1, True)
            self.assertEquals(len(data_chunks_at_pos) <= k, True)
            self.assertEqual(len(parity_chunks_at_pos), m)

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk.hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], chunk.hash)

            data_begin = metapos * self.chunk_size
            data_end = metapos * self.chunk_size + self.chunk_size
            target_metachunk_hash = md5_data(data[data_begin:data_end])

            metachunk_hash = hashlib.md5()
            for chunk in data_chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                for d in stream:
                    metachunk_hash.update(d)
            self.assertEqual(metachunk_hash.hexdigest().upper(),
                             target_metachunk_hash)

    def test_upload_0_byte(self):
        self._test_upload(0)

    def test_upload_1_byte(self):
        self._test_upload(1)

    def test_upload_chunksize_bytes(self):
        self._test_upload(self.chunk_size)

    def test_upload_chunksize_plus_1_bytes(self):
        self._test_upload(self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        self.assertEqual(type(content), RainContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos="1.p0"):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos="1.p0"):
            self.assertRaises(NotFound,
                              self.blob_client.chunk_head, chunk.url)

    def _test_rebuild(self, data_size, broken_pos_list):
        data = os.urandom(data_size)
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), "RAIN")
        self.assertEqual(type(old_content), RainContent)

        old_content.upload(StringIO.StringIO(data))

        # get the new structure of the uploaded content
        uploaded_content = self.content_factory.get(self.container_id,
                                                    old_content.content_id)

        old_info = {}
        for pos in broken_pos_list:
            old_info[pos] = {}
            c = uploaded_content.chunks.filter(pos=pos)[0]
            old_info[pos]["url"] = c.url
            old_info[pos]["id"] = c.id
            old_info[pos]["hash"] = c.hash
            chunk_id_to_rebuild = c.id
            meta, stream = self.blob_client.chunk_get(c.url)
            old_info[pos]["dl_meta"] = meta
            old_info[pos]["dl_hash"] = md5_stream(stream)
            # delete the chunk
            self.blob_client.chunk_delete(c.url)

        # rebuild the broken chunks
        uploaded_content.rebuild_chunk(chunk_id_to_rebuild)

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   uploaded_content.content_id)
        self.assertEqual(type(rebuilt_content), RainContent)

        for pos in broken_pos_list:
            c = rebuilt_content.chunks.filter(pos=pos)[0]
            rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(rebuilt_meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(rebuilt_stream),
                             old_info[pos]["dl_hash"])
            self.assertEqual(c.hash, old_info[pos]["hash"])
            self.assertThat(c.url, NotEquals(old_info[pos]["url"]))
            del old_info[pos]["dl_meta"]["chunk_id"]
            del rebuilt_meta["chunk_id"]
            self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"])

    def test_content_0_byte_rebuild_pos_0_0(self):
        self._test_rebuild(0, ["0.0"])

    def test_content_0_byte_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(0, ["0.0", "0.p0"])

    def test_content_1_byte_rebuild_pos_0_0(self):
        self._test_rebuild(1, ["0.0"])

    def test_content_1_byte_rebuild_pos_0_p0(self):
        self._test_rebuild(1, ["0.p0"])

    def test_content_1_byte_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(1, ["0.0", "0.p0"])

    def test_content_chunksize_bytes_rebuild_pos_0_0(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0"])

    def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_1(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.1"])

    def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.p0"])

    def test_content_chunksize_bytes_rebuild_pos_0_p0_and_0_p1(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.p0", "0.p1"])

    def test_content_chunksize_bytes_rebuild_more_than_k_chunk(self):
        self.assertRaises(UnrecoverableContent, self._test_rebuild,
                          self.conf["chunk_size"], ["0.0", "0.1", "0.2"])

    def _new_content(self, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), "RAIN")
        self.assertEqual(type(old_content), RainContent)

        old_content.upload(StringIO.StringIO(data))

        for pos in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[0]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_orphan_chunk(self):
        content = self._new_content(random_data(10))

        self.assertRaises(OrphanChunk, content.rebuild_chunk, "uNkNoWnId")

    def test_rebuild_on_the_fly(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.0", "0.p0"])

        stream = content.rebuild_metachunk("0", on_the_fly=True)

        dl_data = "".join(stream)

        self.assertEqual(dl_data, data)

        del_chunk_0_0 = content.chunks.filter(pos="0.0")[0]
        del_chunk_0_p0 = content.chunks.filter(pos="0.p0")[0]

        self.assertRaises(NotFound,
                          self.blob_client.chunk_get, del_chunk_0_0.url)
        self.assertRaises(NotFound,
                          self.blob_client.chunk_get, del_chunk_0_p0.url)

    def _test_download(self, data_size, broken_pos_list):
        data = random_data(data_size)
        content = self._new_content(data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos in broken_pos_list:
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_download_content_0_byte_without_broken_chunks(self):
        self._test_download(0, [])

    def test_download_content_1_byte_without_broken_chunks(self):
        self._test_download(1, [])

    def test_download_content_chunksize_bytes_without_broken_chunks(self):
        self._test_download(self.conf["chunk_size"], [])

    def test_download_content_chunksize_plus_1_without_broken_chunks(self):
        self._test_download(self.conf["chunk_size"] + 1, [])

    def test_download_content_0_byte_with_broken_0_0_and_0_p0(self):
        self._test_download(0, ["0.0", "0.p0"])

    def test_download_content_1_byte_with_broken_0_0_and_0_p0(self):
        self._test_download(1, ["0.0", "0.p0"])

    def test_download_content_2xchunksize_with_broken_0_2_and_1_0(self):
        self._test_download(2 * self.conf["chunk_size"], ["0.2", "1.0"])

    def test_download_content_chunksize_bytes_with_3_broken_chunks(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.0", "0.1", "0.2"])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_download_interrupt_close(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.p0"])

        download_iter = content.download()

        dl_data = ""
        for buf in download_iter:
            dl_data += buf
        self.assertEqual(len(dl_data), len(data))
        self.assertEqual(dl_data, data)
        download_iter.close()
Example #16
0
class TestECContent(BaseTestCase):
    def setUp(self):
        super(TestECContent, self).setUp()

        if len(self.conf['services']['rawx']) < 12:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 12 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestECContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.content = random_str(64)
        self.stgpol = "EC"
        self.size = 1024*1024 + 320
        self.k = 6
        self.m = 3

    def tearDown(self):
        super(TestECContent, self).tearDown()

    def random_chunks(self, nb):
        l = random.sample(xrange(self.k + self.m), nb)
        return ["0.%s" % i for i in l]

    def _test_create(self, data_size):
        # generate random test data
        data = random_data(data_size)
        # using factory create new EC content
        content = self.content_factory.new(
            self.container_id, self.content, len(data), self.stgpol)
        # verify the factory gave us an ECContent
        self.assertEqual(type(content), ECContent)

        # perform the content creation
        content.create(StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        # verify metadata
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], self.stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \
            if len(data) != 0 else 1

        # verify each metachunk
        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(meta['metachunk_size'], str(chunk.size))
                self.assertEqual(meta['metachunk_hash'], chunk.checksum)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], md5_stream(stream))

    def test_create_0_byte(self):
        self._test_create(0)

    def test_create_1_byte(self):
        self._test_create(1)

    def test_create(self):
        self._test_create(DAT_LEGIT_SIZE)

    def _test_rebuild(self, data_size, broken_pos_list):
        # generate test data
        data = os.urandom(data_size)
        # create initial content
        old_content = self.content_factory.new(
            self.container_id, self.content, len(data), self.stgpol)
        # verify factory work as intended
        self.assertEqual(type(old_content), ECContent)

        # perform initial content creation
        old_content.create(StringIO(data))

        uploaded_content = self.content_factory.get(self.container_id,
                                                    old_content.content_id)

        # break the content
        old_info = {}
        for pos in broken_pos_list:
            old_info[pos] = {}
            c = uploaded_content.chunks.filter(pos=pos)[0]
            old_info[pos]["url"] = c.url
            old_info[pos]["id"] = c.id
            old_info[pos]["hash"] = c.checksum
            chunk_id_to_rebuild = c.id
            meta, stream = self.blob_client.chunk_get(c.url)
            old_info[pos]["dl_meta"] = meta
            old_info[pos]["dl_hash"] = md5_stream(stream)
            # delete the chunk
            self.blob_client.chunk_delete(c.url)

            # rebuild the broken chunks
            uploaded_content.rebuild_chunk(chunk_id_to_rebuild)

        rebuilt_content = self.content_factory.get(self.container_id,
                                                   uploaded_content.content_id)
        # sanity check
        self.assertEqual(type(rebuilt_content), ECContent)

        # verify rebuild result
        for pos in broken_pos_list:
            c = rebuilt_content.chunks.filter(pos=pos)[0]
            rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(rebuilt_meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(rebuilt_stream),
                             old_info[pos]["dl_hash"])
            self.assertEqual(c.checksum, old_info[pos]["hash"])
            self.assertNotEqual(c.url, old_info[pos]["url"])
            del old_info[pos]["dl_meta"]["chunk_id"]
            del rebuilt_meta["chunk_id"]
            self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"])

    def test_content_0_byte_rebuild(self):
        self._test_rebuild(0, self.random_chunks(1))

    def test_content_0_byte_rebuild_advanced(self):
        self._test_rebuild(0, self.random_chunks(3))

    def test_content_1_byte_rebuild(self):
        self._test_rebuild(1, self.random_chunks(1))

    def test_content_1_byte_rebuild_advanced(self):
        self._test_rebuild(1, self.random_chunks(3))

    def test_content_rebuild(self):
        self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(1))

    def test_content_rebuild_advanced(self):
        self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(3))

    def test_content_rebuild_unrecoverable(self):
        self.assertRaises(
            UnrecoverableContent, self._test_rebuild, DAT_LEGIT_SIZE,
            self.random_chunks(4))

    def _new_content(self, data, broken_pos_list=[]):
        old_content = self.content_factory.new(
            self.container_id, self.content, len(data), self.stgpol)
        self.assertEqual(type(old_content), ECContent)

        old_content.create(StringIO(data))

        # break content
        for pos in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[0]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_orphan_chunk(self):
        content = self._new_content(random_data(10))
        self.assertRaises(OrphanChunk, content.rebuild_chunk, "invalid")

    def _test_fetch(self, data_size, broken_pos_list=None):
        broken_pos_list = broken_pos_list or []
        test_data = random_data(data_size)
        content = self._new_content(test_data, broken_pos_list)

        data = "".join(content.fetch())

        self.assertEqual(len(data), len(test_data))
        self.assertEqual(md5_data(data), md5_data(test_data))

        # verify that chunks are broken
        for pos in broken_pos_list:
            chunk = content.chunks.filter(pos=pos)[0]
            self.assertRaises(
                NotFound, self.blob_client.chunk_delete, chunk.url)

    def test_fetch_content_0_byte(self):
        self._test_fetch(0)

    def test_fetch_content_1_byte(self):
        self._test_fetch(1)

    def test_fetch_content(self):
        self._test_fetch(DAT_LEGIT_SIZE)

    def test_fetch_content_0_byte_broken(self):
        self._test_fetch(0, self.random_chunks(3))

    def test_fetch_content_1_byte_broken(self):
        self._test_fetch(1, self.random_chunks(3))

    def test_fetch_content_broken(self):
        self._test_fetch(DAT_LEGIT_SIZE, self.random_chunks(3))

    def test_fetch_content_unrecoverable(self):
        broken_chunks = self.random_chunks(4)
        self.assertRaises(
            OioException, self._test_fetch, DAT_LEGIT_SIZE, broken_chunks)