def test_read_1_by_1_from_iterable_class(self): class DataGen(object): def __init__(self): self.data = b"abcd" self.pos = 0 def __iter__(self): return self def next(self): # Python 2 if self.pos >= len(self.data): raise StopIteration() self.pos += 1 return self.data[self.pos - 1] def __next__(self): # Python 3, yield bytes, not int return bytes((self.next(), )) gen = GeneratorIO(DataGen()) self.assertEqual(gen.read(1), b"a") self.assertEqual(gen.read(1), b"b") self.assertEqual(gen.read(1), b"c") self.assertEqual(gen.read(1), b"d") self.assertEqual(gen.read(1), b"")
def test_read_1_by_1_byte_variable_input(self): data = [b"a", b"bc", b"d"] gen = GeneratorIO(data) self.assertEqual(gen.read(1), b"a") self.assertEqual(gen.read(1), b"b") self.assertEqual(gen.read(1), b"c") self.assertEqual(gen.read(1), b"d") self.assertEqual(gen.read(1), b"")
def test_read_1_by_1_byte_from_list(self): data = [b"a", b"b", b"c", b"d"] gen = GeneratorIO(data) self.assertEqual(gen.read(1), b"a") self.assertEqual(gen.read(1), b"b") self.assertEqual(gen.read(1), b"c") self.assertEqual(gen.read(1), b"d") self.assertEqual(gen.read(1), b"")
def test_read_1_by_1_from_tuple(self): data = (b"a", b"bc", b"d") gen = GeneratorIO(data) self.assertEqual(gen.read(1), b"a") self.assertEqual(gen.read(1), b"b") self.assertEqual(gen.read(1), b"c") self.assertEqual(gen.read(1), b"d") self.assertEqual(gen.read(1), b"")
def test_read_1_by_1_byte(self): data = ["a", "bc", "d"] gen = GeneratorIO(iter(data)) self.assertEqual(gen.read(1), "a") self.assertEqual(gen.read(1), "b") self.assertEqual(gen.read(1), "c") self.assertEqual(gen.read(1), "d") self.assertEqual(gen.read(1), "")
def test_read_1_by_1_from_generator(self): def gen_data(): yield b'a' yield b'bc' yield b'd' gen = GeneratorIO(gen_data()) self.assertEqual(gen.read(1), b"a") self.assertEqual(gen.read(1), b"b") self.assertEqual(gen.read(1), b"c") self.assertEqual(gen.read(1), b"d") self.assertEqual(gen.read(1), b"")
def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None): current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None and chunk_pos is None: raise OrphanChunk("Chunk not found in content") elif current_chunk is None: chunk = {"pos": chunk_pos, "url": ""} current_chunk = Chunk(chunk) chunks = self.chunks.filter(metapos=current_chunk.metapos)\ .exclude(id=chunk_id) if chunk_id is None: current_chunk.size = chunks[0].size current_chunk.checksum = chunks[0].checksum broken_list = list() if not allow_same_rawx: broken_list.append(current_chunk) spare_url = self._get_spare_chunk(chunks.all(), broken_list) handler = ECRebuildHandler(chunks.raw(), current_chunk.subpos, self.storage_method) new_chunk = {'pos': current_chunk.pos, 'url': spare_url[0]} new_chunk = Chunk(new_chunk) stream = handler.rebuild() meta = {} meta['chunk_id'] = new_chunk.id meta['chunk_pos'] = current_chunk.pos meta['container_id'] = self.container_id # FIXME: should be 'content_chunkmethod' everywhere # but sadly it isn't meta['chunk_method'] = self.chunk_method # FIXME: should be 'content_id' everywhere # but sadly it isn't meta['id'] = self.content_id meta['content_path'] = self.path # FIXME: should be 'content_policy' everywhere # but sadly it isn't meta['policy'] = self.policy # FIXME: should be 'content_version' everywhere # but sadly it isn't meta['version'] = self.version meta['metachunk_hash'] = current_chunk.checksum meta['metachunk_size'] = current_chunk.size meta['full_path'] = self.full_path meta['oio_version'] = OIO_VERSION self.blob_client.chunk_put(spare_url[0], meta, GeneratorIO(stream)) if chunk_id is None: self._add_raw_chunk(current_chunk, spare_url[0]) else: self._update_spare_chunk(current_chunk, spare_url[0])
def test_read_1_by_1_from_iterable_class(self): class DataGen(object): def __init__(self): self.data = "abcd" self.pos = 0 def __iter__(self): return self def next(self): if self.pos >= len(self.data): raise StopIteration() self.pos += 1 return self.data[self.pos - 1] gen = GeneratorIO(DataGen()) self.assertEqual(gen.read(1), "a") self.assertEqual(gen.read(1), "b") self.assertEqual(gen.read(1), "c") self.assertEqual(gen.read(1), "d") self.assertEqual(gen.read(1), "")
def stream(self): # Calling that right now will make `headers` field available # before the caller starts reading the stream parts_iter = self.get_iter() def _iter(): for part in parts_iter: for data in part['iter']: yield data raise StopIteration return GeneratorIO(_iter())
def change_policy(self, container_id, content_id, new_policy): old_content = self.get(container_id, content_id) if old_content.policy == new_policy: return old_content new_content = self.copy(old_content, policy=new_policy) stream = old_content.fetch() new_content.create(GeneratorIO(stream)) # the old content is automatically deleted because the new content has # the same name (but not the same id) return new_content
def test_read_empty_data(self): data = [] gen = GeneratorIO(data) self.assertEqual(gen.read(10), b'') data = ["", "", ""] gen = GeneratorIO(data) self.assertEqual(gen.read(10), b'')
def stream(self): """ Get a generator over chunk data. After calling this method, the `headers` field will be available (even if no data is read from the generator). """ parts_iter = self.get_iter() def _iter(): for part in parts_iter: for data in part['iter']: yield data raise StopIteration return GeneratorIO(_iter())
def test_move_with_wrong_size(self): if not self.chunk_method.startswith('ec'): self.skipTest('Only works with EC') orig_chunk = random.choice(self.chunks) chunk_volume = orig_chunk['url'].split('/')[2] chunk_id = orig_chunk['url'].split('/')[3] mover = BlobMoverWorker(self.conf, None, self.rawx_volumes[chunk_volume]) meta, stream = mover.blob_client.chunk_get(orig_chunk['url']) data = stream.read() stream.close() data = data[:-1] del meta['chunk_hash'] wrong_stream = GeneratorIO(data) mover.blob_client.chunk_get = Mock(return_value=(meta, wrong_stream)) self.assertRaises(ChunkException, mover.chunk_move, self._chunk_path(orig_chunk), chunk_id)
def test_read_more_than_data_size(self): data = [b"a", b"bc", b"d"] gen = GeneratorIO(data, True) self.assertEqual(gen.read(10), b"abcd") self.assertEqual(gen.read(10), b"")
def test_read_more_than_data_size(self): data = ["a", "bc", "d"] gen = GeneratorIO(iter(data)) self.assertEqual(gen.read(10), "abcd") self.assertEqual(gen.read(10), "")
def object_create(self, account, container, file_or_path=None, data=None, etag=None, obj_name=None, mime_type=None, metadata=None, policy=None, key_file=None, append=False, properties=None, **kwargs): """ Create an object or append data to object in *container* of *account* with data taken from either *data* (`str` or `generator`) or *file_or_path* (path to a file or file-like object). The object will be named after *obj_name* if specified, or after the base name of *file_or_path*. :param account: name of the account where to create the object :type account: `str` :param container: name of the container where to create the object :type container: `str` :param file_or_path: file-like object or path to a file from which to read object data :type file_or_path: `str` or file-like object :param data: object data (if `file_or_path` is not set) :type data: `str` or `generator` :keyword etag: entity tag of the object :type etag: `str` :keyword obj_name: name of the object to create. If not set, will use the base name of `file_or_path`. :keyword mime_type: MIME type of the object :type mime_type: `str` :keyword properties: a dictionary of properties :type properties: `dict` :keyword policy: name of the storage policy :type policy: `str` :keyword key_file: :param append: if set, data will be append to existing object (or object will be created if unset) :type append: `bool` :returns: `list` of chunks, size and hash of the what has been uploaded """ if (data, file_or_path) == (None, None): raise exc.MissingData() src = data if data is not None else file_or_path if src is file_or_path: if isinstance(file_or_path, basestring): if not os.path.exists(file_or_path): raise exc.FileNotFound("File '%s' not found." % file_or_path) file_name = os.path.basename(file_or_path) else: try: file_name = os.path.basename(file_or_path.name) except AttributeError: file_name = None obj_name = obj_name or file_name elif isgenerator(src): file_or_path = GeneratorIO(src) src = file_or_path if not obj_name: raise exc.MissingName( "No name for the object has been specified" ) sysmeta = {'mime_type': mime_type, 'etag': etag} if metadata: warnings.warn( "You'd better use 'properties' instead of 'metadata'", DeprecationWarning, stacklevel=4) if not properties: properties = metadata else: properties.update(metadata) if src is data: return self._object_create( account, container, obj_name, BytesIO(data), sysmeta, properties=properties, policy=policy, key_file=key_file, append=append, **kwargs) elif hasattr(file_or_path, "read"): return self._object_create( account, container, obj_name, src, sysmeta, properties=properties, policy=policy, key_file=key_file, append=append, **kwargs) else: with open(file_or_path, "rb") as f: return self._object_create( account, container, obj_name, f, sysmeta, properties=properties, policy=policy, key_file=key_file, append=append, **kwargs)
def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None, allow_frozen_container=False): # Identify the chunk to rebuild current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None and chunk_pos is None: raise OrphanChunk("Chunk not found in content") elif current_chunk is None: current_chunk = self.chunks.filter(pos=chunk_pos).one() if current_chunk is None: chunk = {'pos': chunk_pos, 'url': ''} current_chunk = Chunk(chunk) else: chunk_id = current_chunk.id self.logger.debug('Chunk at pos %s has id %s', chunk_pos, chunk_id) chunks = self.chunks.filter(metapos=current_chunk.metapos)\ .exclude(id=chunk_id, pos=chunk_pos) if chunk_id is None: current_chunk.size = chunks[0].size current_chunk.checksum = chunks[0].checksum # Find a spare chunk address broken_list = list() if not allow_same_rawx and chunk_id is not None: broken_list.append(current_chunk) spare_url, _quals = self._get_spare_chunk(chunks.all(), broken_list) new_chunk = Chunk({'pos': current_chunk.pos, 'url': spare_url[0]}) # Regenerate the lost chunk's data, from existing chunks handler = ECRebuildHandler(chunks.raw(), current_chunk.subpos, self.storage_method) stream = handler.rebuild() # Actually create the spare chunk meta = {} meta['chunk_id'] = new_chunk.id meta['chunk_pos'] = current_chunk.pos meta['container_id'] = self.container_id # FIXME: should be 'content_chunkmethod' everywhere # but sadly it isn't meta['chunk_method'] = self.chunk_method # FIXME: should be 'content_id' everywhere # but sadly it isn't meta['id'] = self.content_id meta['content_path'] = self.path # FIXME: should be 'content_policy' everywhere # but sadly it isn't meta['policy'] = self.policy # FIXME: should be 'content_version' everywhere # but sadly it isn't meta['version'] = self.version meta['metachunk_hash'] = current_chunk.checksum meta['metachunk_size'] = current_chunk.size meta['full_path'] = self.full_path meta['oio_version'] = OIO_VERSION self.blob_client.chunk_put(spare_url[0], meta, GeneratorIO(stream)) # Register the spare chunk in object's metadata if chunk_id is None: self._add_raw_chunk(current_chunk, spare_url[0], frozen=allow_frozen_container) else: self._update_spare_chunk(current_chunk, spare_url[0], frozen=allow_frozen_container) self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos, spare_url[0])
def rebuild_chunk(self, chunk_id, service_id=None, allow_same_rawx=False, chunk_pos=None, allow_frozen_container=False): # Identify the chunk to rebuild candidates = self.chunks.filter(id=chunk_id) if service_id is not None: candidates = candidates.filter(host=service_id) current_chunk = candidates.one() if current_chunk is None and chunk_pos is None: raise OrphanChunk("Chunk not found in content") if current_chunk is None: current_chunk = self.chunks.filter(pos=chunk_pos).one() if current_chunk is None: chunk = {'pos': chunk_pos, 'url': ''} current_chunk = Chunk(chunk) else: chunk_id = current_chunk.id self.logger.debug('Chunk at pos %s has id %s', chunk_pos, chunk_id) # Sort chunks by score to try to rebuild with higher score. # When scores are close together (e.g. [95, 94, 94, 93, 50]), # don't always start with the highest element. chunks = self.chunks \ .filter(metapos=current_chunk.metapos) \ .exclude(id=chunk_id, pos=chunk_pos) \ .sort(key=lambda chunk: _get_weighted_random_score(chunk.raw()), reverse=True) if chunk_id is None: current_chunk.size = chunks[0].size current_chunk.checksum = chunks[0].checksum # Find a spare chunk address broken_list = list() if not allow_same_rawx and chunk_id is not None: broken_list.append(current_chunk) spare_url, _quals = self._get_spare_chunk(chunks.all(), broken_list, position=current_chunk.pos) new_chunk = Chunk({'pos': current_chunk.pos, 'url': spare_url[0]}) # Regenerate the lost chunk's data, from existing chunks handler = ECRebuildHandler(chunks.raw(), current_chunk.subpos, self.storage_method) expected_chunk_size, stream = handler.rebuild() # Actually create the spare chunk meta = {} meta['chunk_id'] = new_chunk.id meta['chunk_pos'] = current_chunk.pos meta['container_id'] = self.container_id # FIXME: should be 'content_chunkmethod' everywhere # but sadly it isn't meta['chunk_method'] = self.chunk_method # FIXME: should be 'content_id' everywhere # but sadly it isn't meta['id'] = self.content_id meta['content_path'] = self.path # FIXME: should be 'content_policy' everywhere # but sadly it isn't meta['policy'] = self.policy # FIXME: should be 'content_version' everywhere # but sadly it isn't meta['version'] = self.version meta['metachunk_hash'] = current_chunk.checksum meta['metachunk_size'] = current_chunk.size meta['full_path'] = self.full_path meta['oio_version'] = OIO_VERSION bytes_transferred, _ = self.blob_client.chunk_put( spare_url[0], meta, GeneratorIO(stream, sub_generator=PY2)) if expected_chunk_size is not None \ and bytes_transferred != expected_chunk_size: try: self.blob_client.chunk_delete(spare_url[0]) except Exception as exc: self.logger.warning( 'Failed to rollback the rebuild of the chunk: %s', exc) raise ChunkException('The rebuilt chunk is not the correct size') # Register the spare chunk in object's metadata if chunk_id is None: self._add_raw_chunk(current_chunk, spare_url[0], frozen=allow_frozen_container) else: self._update_spare_chunk(current_chunk, spare_url[0], frozen=allow_frozen_container) self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos, spare_url[0]) return bytes_transferred