Esempio n. 1
0
    def setUp(self):
        super(TestChunksHelper, self).setUp()

        self.dup_c1_1 = {
            "url": "http://127.0.0.1:6011/C1C1",
            "pos": "0", "size": 1048576,
            "hash": "2E47D13C3E2C47E0C537028AD637CCBF"}
        self.dup_c1_2 = {
            "url": "http://127.0.0.1:6010/C1C2",
            "pos": "0", "size": 1048576,
            "hash": "2E47D13C3E2C47E0C537028AD637CCBF"}
        self.dup_c2_1 = {
            "url": "http://127.0.0.1:6012/C2C1",
            "pos": "1", "size": 1048576,
            "hash": "045B70673D8271767D4D21BCDB040F6C"}
        self.dup_c2_2 = {
            "url": "http://127.0.0.1:6011/C2C2",
            "pos": "1", "size": 1048576,
            "hash": "045B70673D8271767D4D21BCDB040F6C"
        }
        self.dup_chunks_raw = [self.dup_c1_1, self.dup_c1_2,
                               self.dup_c2_1, self.dup_c2_2]
        self.dup_chunks = ChunksHelper(self.dup_chunks_raw)

        self.ec_c0_0 = {
            "url": "http://127.0.0.1:6017/C0_0",
            "pos": "0.0", "size": 1048576,
            "hash": "00000000000000000000000000000000"}
        self.ec_c0_1 = {
            "url": "http://127.0.0.1:6016/C0_1",
            "pos": "0.1", "size": 1048576,
            "hash": "00000000000000000000000000000000"}
        self.ec_c0_2 = {
            "url": "http://127.0.0.1:6011/C0_P",
            "pos": "0.2", "size": 1048576,
            "hash": "00000000000000000000000000000000"}
        self.ec_c1_0 = {
            "url": "http://127.0.0.1:6017/C1_0",
            "pos": "1.0", "size": 1048576,
            "hash": "00000000000000000000000000000000"}
        self.ec_c1_1 = {
            "url": "http://127.0.0.1:6016/C1_1",
            "pos": "1.1", "size": 1048576,
            "hash": "00000000000000000000000000000000"}
        self.ec_c1_2 = {
            "url": "http://127.0.0.1:6011/C1_P",
            "pos": "1.2", "size": 1048576,
            "hash": "00000000000000000000000000000000"}
        self.ec_chunks_raw = [self.ec_c0_0, self.ec_c0_1, self.ec_c0_2,
                              self.ec_c1_0, self.ec_c1_1, self.ec_c1_2]
        self.ec_chunks = ChunksHelper(self.ec_chunks_raw)
Esempio n. 2
0
    def _test_upload(self, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        k = 6
        m = 2
        self.assertEqual(type(content), RainContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], "RAIN")
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        nb_chunks_min = metachunk_nb * (1 + m)
        nb_chunks_max = metachunk_nb * (k + m)
        self.assertGreaterEqual(len(chunks), nb_chunks_min)
        self.assertLessEqual(len(chunks), nb_chunks_max)

        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)
            data_chunks_at_pos = chunks_at_pos.filter(is_parity=False)
            parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True)

            self.assertEquals(len(data_chunks_at_pos) >= 1, True)
            self.assertEquals(len(data_chunks_at_pos) <= k, True)
            self.assertEqual(len(parity_chunks_at_pos), m)

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk.hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], chunk.hash)

            data_begin = metapos * self.chunk_size
            data_end = metapos * self.chunk_size + self.chunk_size
            target_metachunk_hash = md5_data(data[data_begin:data_end])

            metachunk_hash = hashlib.md5()
            for chunk in data_chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                for d in stream:
                    metachunk_hash.update(d)
            self.assertEqual(metachunk_hash.hexdigest().upper(),
                             target_metachunk_hash)
Esempio n. 3
0
 def test_sort_ec(self):
     ec_chunks = ChunksHelper([
         self.ec_c1_2, self.ec_c1_1, self.ec_c1_0,
         self.ec_c0_2, self.ec_c0_1, self.ec_c0_0
     ])
     self.assertEqual(ec_chunks.raw(), [
         self.ec_c0_0, self.ec_c0_1, self.ec_c0_2,
         self.ec_c1_0, self.ec_c1_1, self.ec_c1_2
     ])
Esempio n. 4
0
 def test_sort_dup(self):
     chunks = ChunksHelper([
         self.dup_c2_2, self.dup_c2_1,
         self.dup_c1_2, self.dup_c1_1
     ])
     self.assertEqual(chunks.raw(), [
         self.dup_c1_1, self.dup_c1_2,
         self.dup_c2_1, self.dup_c2_2
     ])
Esempio n. 5
0
 def test_sort_rain(self):
     rain_chunks = ChunksHelper([
         self.rain_c1_p, self.rain_c1_1, self.rain_c1_0, self.rain_c0_p,
         self.rain_c0_1, self.rain_c0_0
     ])
     self.assertEqual(rain_chunks.raw(), [
         self.rain_c0_0, self.rain_c0_1, self.rain_c0_p, self.rain_c1_0,
         self.rain_c1_1, self.rain_c1_p
     ])
Esempio n. 6
0
    def _test_create(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, self.content,
                                           len(data), stgpol)

        content.create(BytesIO(data))

        meta, chunks = self.container_client.content_locate(
            cid=self.container_id, content=content.content_id)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        chunks = ChunksHelper(chunks)

        # TODO NO NO NO
        if stgpol == self.stgpol_threecopies:
            nb_copy = 3
        elif stgpol == self.stgpol_twocopies:
            nb_copy = 2
        elif stgpol == self.stgpol:
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                # Check that chunk data matches chunk hash from xattr
                self.assertEqual(meta['chunk_hash'], chunk_hash)
                # Check that chunk data matches chunk hash from database
                self.assertEqual(chunk.checksum, chunk_hash)
                full_path = encode_fullpath(self.account, self.container_name,
                                            self.content,
                                            meta['content_version'],
                                            meta['content_id'])
                self.assertEqual(meta['full_path'], full_path)
                self.assertEqual(meta['oio_version'], '4.2')
Esempio n. 7
0
    def _test_create(self, data_size):
        # generate random test data
        data = random_data(data_size)
        # using factory create new EC content
        content = self.content_factory.new(self.container_id, self.content,
                                           len(data), self.stgpol)
        # verify the factory gave us an ECContent
        self.assertEqual(type(content), ECContent)

        # perform the content creation
        content.create(BytesIO(data))

        meta, chunks = self.container_client.content_locate(
            cid=self.container_id, content=content.content_id)
        # verify metadata
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], self.stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \
            if len(data) != 0 else 1

        offset = 0
        # verify each metachunk
        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)
            if len(chunks_at_pos) < 1:
                break
            metachunk_size = chunks_at_pos[0].size
            metachunk_hash = md5_data(data[offset:offset + metachunk_size])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(meta['metachunk_size'], str(chunk.size))
                self.assertEqual(meta['metachunk_hash'], chunk.checksum)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], md5_stream(stream))
                full_path = encode_fullpath(self.account, self.container_name,
                                            self.content,
                                            meta['content_version'],
                                            meta['content_id'])
                self.assertEqual(meta['full_path'], full_path)
                self.assertEqual(meta['oio_version'], '4.2')
                self.assertEqual(metachunk_hash, chunk.checksum)

            offset += metachunk_size
Esempio n. 8
0
    def _test_upload(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), stgpol)
        self.assertEqual(type(content), DupContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        if stgpol == "THREECOPIES":
            nb_copy = 3
        elif stgpol == "TWOCOPIES":
            nb_copy = 2
        elif stgpol == "SINGLE":
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                self.assertEqual(meta['chunk_hash'], chunk_hash)
Esempio n. 9
0
    def _test_create(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, self.content,
                                           len(data), stgpol)

        content.create(StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        chunks = ChunksHelper(chunks)

        # TODO NO NO NO
        if stgpol == self.stgpol_threecopies:
            nb_copy = 3
        elif stgpol == self.stgpol_twocopies:
            nb_copy = 2
        elif stgpol == self.stgpol:
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                self.assertEqual(meta['chunk_hash'], chunk_hash)
Esempio n. 10
0
    def _test_create(self, data_size):
        # generate random test data
        data = random_data(data_size)
        # using factory create new EC content
        content = self.content_factory.new(
            self.container_id, self.content, len(data), self.stgpol)
        # verify the factory gave us an ECContent
        self.assertEqual(type(content), ECContent)

        # perform the content creation
        content.create(StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        # verify metadata
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], self.stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \
            if len(data) != 0 else 1

        # verify each metachunk
        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(meta['metachunk_size'], str(chunk.size))
                self.assertEqual(meta['metachunk_hash'], chunk.checksum)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], md5_stream(stream))
Esempio n. 11
0
    def _upload(self, stream):
        global_checksum = hashlib.md5()
        total_bytes_transferred = 0
        content_chunks = []

        def _limit_stream(stream, size):
            read_size = 0
            while read_size < size:
                to_read = size - read_size
                if to_read > WRITE_CHUNK_SIZE:
                    to_read = WRITE_CHUNK_SIZE
                data = stream.read(to_read)
                global_checksum.update(data)
                read_size += to_read
                yield data

        def _decode_chunklist(chunklist):
            res = []
            for c in chunklist.split(';'):
                pos, url, size, hash = c.split('|')
                res.append({
                    "url": "http://%s" % url,
                    "pos": pos,
                    "size": int(size),
                    "hash": hash
                })
            return res

        for pos in xrange(self._get_metachunk_nb()):
            chunks_at_pos = self.chunks.filter(metapos=pos)

            chunk_size = self.chunks[0].size
            remaining_bytes = self.length - total_bytes_transferred
            if chunk_size > remaining_bytes:
                chunk_size = remaining_bytes

            headers = {}
            headers["X-oio-chunk-meta-content-storage-policy"] = \
                self.stgpol_name
            headers["X-oio-chunk-meta-rawxlist"] = \
                self._encode_rawxlist(chunks_at_pos)
            headers[chunk_headers["content_id"]] = self.content_id
            headers[chunk_headers["content_version"]] = self.version
            headers[chunk_headers["content_path"]] = self.path
            headers[chunk_headers["content_size"]] = self.length
            headers[chunk_headers["content_chunksnb"]] = \
                self._get_metachunk_nb()
            headers[chunk_headers["content_cid"]] = self.container_id
            headers[chunk_headers["chunk_pos"]] = pos
            headers["X-oio-chunk-meta-chunk-size"] = chunk_size
            headers[chunk_headers["content_mimetype"]] = self.mime_type
            headers[chunk_headers["content_chunkmethod"]] = self.chunk_method

            resp = self.session.put(self._get_rain_addr(),
                                    data=_limit_stream(stream, chunk_size),
                                    headers=headers)
            resp.raise_for_status()

            content_chunks.extend(_decode_chunklist(resp.headers['chunklist']))

            total_bytes_transferred += chunk_size

        self.chunks = ChunksHelper(content_chunks)
        self.hash = global_checksum.hexdigest().upper()

        self._meta2_create_object()