def _test_upload(self, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") k = 6 m = 2 self.assertEqual(type(content), RainContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], "RAIN") self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content nb_chunks_min = metachunk_nb * (1 + m) nb_chunks_max = metachunk_nb * (k + m) self.assertGreaterEqual(len(chunks), nb_chunks_min) self.assertLessEqual(len(chunks), nb_chunks_max) for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) data_chunks_at_pos = chunks_at_pos.filter(is_parity=False) parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True) self.assertEquals(len(data_chunks_at_pos) >= 1, True) self.assertEquals(len(data_chunks_at_pos) <= k, True) self.assertEqual(len(parity_chunks_at_pos), m) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk.hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], chunk.hash) data_begin = metapos * self.chunk_size data_end = metapos * self.chunk_size + self.chunk_size target_metachunk_hash = md5_data(data[data_begin:data_end]) metachunk_hash = hashlib.md5() for chunk in data_chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) for d in stream: metachunk_hash.update(d) self.assertEqual(metachunk_hash.hexdigest().upper(), target_metachunk_hash)
def _test_create(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, self.content, len(data), stgpol) content.create(BytesIO(data)) meta, chunks = self.container_client.content_locate( cid=self.container_id, content=content.content_id) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content chunks = ChunksHelper(chunks) # TODO NO NO NO if stgpol == self.stgpol_threecopies: nb_copy = 3 elif stgpol == self.stgpol_twocopies: nb_copy = 2 elif stgpol == self.stgpol: nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) # Check that chunk data matches chunk hash from xattr self.assertEqual(meta['chunk_hash'], chunk_hash) # Check that chunk data matches chunk hash from database self.assertEqual(chunk.checksum, chunk_hash) full_path = encode_fullpath(self.account, self.container_name, self.content, meta['content_version'], meta['content_id']) self.assertEqual(meta['full_path'], full_path) self.assertEqual(meta['oio_version'], '4.2')
def _test_create(self, data_size): # generate random test data data = random_data(data_size) # using factory create new EC content content = self.content_factory.new(self.container_id, self.content, len(data), self.stgpol) # verify the factory gave us an ECContent self.assertEqual(type(content), ECContent) # perform the content creation content.create(BytesIO(data)) meta, chunks = self.container_client.content_locate( cid=self.container_id, content=content.content_id) # verify metadata chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], self.stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \ if len(data) != 0 else 1 offset = 0 # verify each metachunk for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) if len(chunks_at_pos) < 1: break metachunk_size = chunks_at_pos[0].size metachunk_hash = md5_data(data[offset:offset + metachunk_size]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(meta['metachunk_size'], str(chunk.size)) self.assertEqual(meta['metachunk_hash'], chunk.checksum) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], md5_stream(stream)) full_path = encode_fullpath(self.account, self.container_name, self.content, meta['content_version'], meta['content_id']) self.assertEqual(meta['full_path'], full_path) self.assertEqual(meta['oio_version'], '4.2') self.assertEqual(metachunk_hash, chunk.checksum) offset += metachunk_size
def _test_fetch(self, data_size, broken_pos_list=None): broken_pos_list = broken_pos_list or [] test_data = random_data(data_size) content = self._new_content(test_data, broken_pos_list) data = b''.join(content.fetch()) self.assertEqual(len(data), len(test_data)) self.assertEqual(md5_data(data), md5_data(test_data)) # verify that chunks are broken for pos in broken_pos_list: chunk = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, chunk.url)
def _test_fetch(self, data_size, broken_pos_list=None): broken_pos_list = broken_pos_list or [] test_data = random_data(data_size) content = self._new_content(test_data, broken_pos_list) data = "".join(content.fetch()) self.assertEqual(len(data), len(test_data)) self.assertEqual(md5_data(data), md5_data(test_data)) # verify that chunks are broken for pos in broken_pos_list: chunk = content.chunks.filter(pos=pos)[0] self.assertRaises( NotFound, self.blob_client.chunk_delete, chunk.url)
def _test_upload(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(content), DupContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content if stgpol == "THREECOPIES": nb_copy = 3 elif stgpol == "TWOCOPIES": nb_copy = 2 elif stgpol == "SINGLE": nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) self.assertEqual(meta['chunk_hash'], chunk_hash)
def _test_create(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, self.content, len(data), stgpol) content.create(StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content chunks = ChunksHelper(chunks) # TODO NO NO NO if stgpol == self.stgpol_threecopies: nb_copy = 3 elif stgpol == self.stgpol_twocopies: nb_copy = 2 elif stgpol == self.stgpol: nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) self.assertEqual(meta['chunk_hash'], chunk_hash)
def _test_create(self, data_size): # generate random test data data = random_data(data_size) # using factory create new EC content content = self.content_factory.new( self.container_id, self.content, len(data), self.stgpol) # verify the factory gave us an ECContent self.assertEqual(type(content), ECContent) # perform the content creation content.create(StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) # verify metadata chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], self.stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \ if len(data) != 0 else 1 # verify each metachunk for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(meta['metachunk_size'], str(chunk.size)) self.assertEqual(meta['metachunk_hash'], chunk.checksum) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], md5_stream(stream))