Ejemplo n.º 1
0
    def test_read_1_by_1_from_iterable_class(self):
        class DataGen(object):
            def __init__(self):
                self.data = b"abcd"
                self.pos = 0

            def __iter__(self):
                return self

            def next(self):
                # Python 2
                if self.pos >= len(self.data):
                    raise StopIteration()
                self.pos += 1
                return self.data[self.pos - 1]

            def __next__(self):
                # Python 3, yield bytes, not int
                return bytes((self.next(), ))

        gen = GeneratorIO(DataGen())
        self.assertEqual(gen.read(1), b"a")
        self.assertEqual(gen.read(1), b"b")
        self.assertEqual(gen.read(1), b"c")
        self.assertEqual(gen.read(1), b"d")
        self.assertEqual(gen.read(1), b"")
Ejemplo n.º 2
0
 def test_read_1_by_1_byte_variable_input(self):
     data = [b"a", b"bc", b"d"]
     gen = GeneratorIO(data)
     self.assertEqual(gen.read(1), b"a")
     self.assertEqual(gen.read(1), b"b")
     self.assertEqual(gen.read(1), b"c")
     self.assertEqual(gen.read(1), b"d")
     self.assertEqual(gen.read(1), b"")
Ejemplo n.º 3
0
 def test_read_1_by_1_byte_from_list(self):
     data = [b"a", b"b", b"c", b"d"]
     gen = GeneratorIO(data)
     self.assertEqual(gen.read(1), b"a")
     self.assertEqual(gen.read(1), b"b")
     self.assertEqual(gen.read(1), b"c")
     self.assertEqual(gen.read(1), b"d")
     self.assertEqual(gen.read(1), b"")
Ejemplo n.º 4
0
 def test_read_1_by_1_from_tuple(self):
     data = (b"a", b"bc", b"d")
     gen = GeneratorIO(data)
     self.assertEqual(gen.read(1), b"a")
     self.assertEqual(gen.read(1), b"b")
     self.assertEqual(gen.read(1), b"c")
     self.assertEqual(gen.read(1), b"d")
     self.assertEqual(gen.read(1), b"")
Ejemplo n.º 5
0
 def test_read_1_by_1_byte(self):
     data = ["a", "bc", "d"]
     gen = GeneratorIO(iter(data))
     self.assertEqual(gen.read(1), "a")
     self.assertEqual(gen.read(1), "b")
     self.assertEqual(gen.read(1), "c")
     self.assertEqual(gen.read(1), "d")
     self.assertEqual(gen.read(1), "")
Ejemplo n.º 6
0
 def test_read_1_by_1_from_generator(self):
     def gen_data():
         yield b'a'
         yield b'bc'
         yield b'd'
     gen = GeneratorIO(gen_data())
     self.assertEqual(gen.read(1), b"a")
     self.assertEqual(gen.read(1), b"b")
     self.assertEqual(gen.read(1), b"c")
     self.assertEqual(gen.read(1), b"d")
     self.assertEqual(gen.read(1), b"")
Ejemplo n.º 7
0
    def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None):
        current_chunk = self.chunks.filter(id=chunk_id).one()

        if current_chunk is None and chunk_pos is None:
            raise OrphanChunk("Chunk not found in content")
        elif current_chunk is None:
            chunk = {"pos": chunk_pos, "url": ""}
            current_chunk = Chunk(chunk)

        chunks = self.chunks.filter(metapos=current_chunk.metapos)\
            .exclude(id=chunk_id)

        if chunk_id is None:
            current_chunk.size = chunks[0].size
            current_chunk.checksum = chunks[0].checksum

        broken_list = list()
        if not allow_same_rawx:
            broken_list.append(current_chunk)
        spare_url = self._get_spare_chunk(chunks.all(), broken_list)

        handler = ECRebuildHandler(chunks.raw(), current_chunk.subpos,
                                   self.storage_method)

        new_chunk = {'pos': current_chunk.pos, 'url': spare_url[0]}
        new_chunk = Chunk(new_chunk)
        stream = handler.rebuild()

        meta = {}
        meta['chunk_id'] = new_chunk.id
        meta['chunk_pos'] = current_chunk.pos
        meta['container_id'] = self.container_id

        # FIXME: should be 'content_chunkmethod' everywhere
        # but sadly it isn't
        meta['chunk_method'] = self.chunk_method

        # FIXME: should be 'content_id' everywhere
        # but sadly it isn't
        meta['id'] = self.content_id

        meta['content_path'] = self.path

        # FIXME: should be 'content_policy' everywhere
        # but sadly it isn't
        meta['policy'] = self.policy

        # FIXME: should be 'content_version' everywhere
        # but sadly it isn't
        meta['version'] = self.version

        meta['metachunk_hash'] = current_chunk.checksum
        meta['metachunk_size'] = current_chunk.size
        meta['full_path'] = self.full_path
        meta['oio_version'] = OIO_VERSION
        self.blob_client.chunk_put(spare_url[0], meta, GeneratorIO(stream))
        if chunk_id is None:
            self._add_raw_chunk(current_chunk, spare_url[0])
        else:
            self._update_spare_chunk(current_chunk, spare_url[0])
Ejemplo n.º 8
0
    def test_read_1_by_1_from_iterable_class(self):
        class DataGen(object):
            def __init__(self):
                self.data = "abcd"
                self.pos = 0

            def __iter__(self):
                return self

            def next(self):
                if self.pos >= len(self.data):
                    raise StopIteration()
                self.pos += 1
                return self.data[self.pos - 1]

        gen = GeneratorIO(DataGen())
        self.assertEqual(gen.read(1), "a")
        self.assertEqual(gen.read(1), "b")
        self.assertEqual(gen.read(1), "c")
        self.assertEqual(gen.read(1), "d")
        self.assertEqual(gen.read(1), "")
Ejemplo n.º 9
0
    def stream(self):
        # Calling that right now will make `headers` field available
        # before the caller starts reading the stream
        parts_iter = self.get_iter()

        def _iter():
            for part in parts_iter:
                for data in part['iter']:
                    yield data
            raise StopIteration

        return GeneratorIO(_iter())
Ejemplo n.º 10
0
    def change_policy(self, container_id, content_id, new_policy):
        old_content = self.get(container_id, content_id)
        if old_content.policy == new_policy:
            return old_content

        new_content = self.copy(old_content, policy=new_policy)

        stream = old_content.fetch()
        new_content.create(GeneratorIO(stream))
        # the old content is automatically deleted because the new content has
        # the same name (but not the same id)
        return new_content
Ejemplo n.º 11
0
    def test_read_empty_data(self):
        data = []
        gen = GeneratorIO(data)
        self.assertEqual(gen.read(10), b'')

        data = ["", "", ""]
        gen = GeneratorIO(data)
        self.assertEqual(gen.read(10), b'')
Ejemplo n.º 12
0
    def stream(self):
        """
        Get a generator over chunk data.
        After calling this method, the `headers` field will be available
        (even if no data is read from the generator).
        """
        parts_iter = self.get_iter()

        def _iter():
            for part in parts_iter:
                for data in part['iter']:
                    yield data
            raise StopIteration

        return GeneratorIO(_iter())
Ejemplo n.º 13
0
    def test_move_with_wrong_size(self):
        if not self.chunk_method.startswith('ec'):
            self.skipTest('Only works with EC')

        orig_chunk = random.choice(self.chunks)
        chunk_volume = orig_chunk['url'].split('/')[2]
        chunk_id = orig_chunk['url'].split('/')[3]

        mover = BlobMoverWorker(self.conf, None,
                                self.rawx_volumes[chunk_volume])
        meta, stream = mover.blob_client.chunk_get(orig_chunk['url'])
        data = stream.read()
        stream.close()
        data = data[:-1]
        del meta['chunk_hash']
        wrong_stream = GeneratorIO(data)
        mover.blob_client.chunk_get = Mock(return_value=(meta, wrong_stream))

        self.assertRaises(ChunkException, mover.chunk_move,
                          self._chunk_path(orig_chunk), chunk_id)
Ejemplo n.º 14
0
 def test_read_more_than_data_size(self):
     data = [b"a", b"bc", b"d"]
     gen = GeneratorIO(data, True)
     self.assertEqual(gen.read(10), b"abcd")
     self.assertEqual(gen.read(10), b"")
Ejemplo n.º 15
0
 def test_read_more_than_data_size(self):
     data = ["a", "bc", "d"]
     gen = GeneratorIO(iter(data))
     self.assertEqual(gen.read(10), "abcd")
     self.assertEqual(gen.read(10), "")
Ejemplo n.º 16
0
    def object_create(self, account, container, file_or_path=None, data=None,
                      etag=None, obj_name=None, mime_type=None,
                      metadata=None, policy=None, key_file=None,
                      append=False, properties=None, **kwargs):
        """
        Create an object or append data to object in *container* of *account*
        with data taken from either *data* (`str` or `generator`) or
        *file_or_path* (path to a file or file-like object).
        The object will be named after *obj_name* if specified, or after
        the base name of *file_or_path*.

        :param account: name of the account where to create the object
        :type account: `str`
        :param container: name of the container where to create the object
        :type container: `str`
        :param file_or_path: file-like object or path to a file from which
            to read object data
        :type file_or_path: `str` or file-like object
        :param data: object data (if `file_or_path` is not set)
        :type data: `str` or `generator`
        :keyword etag: entity tag of the object
        :type etag: `str`
        :keyword obj_name: name of the object to create. If not set, will use
            the base name of `file_or_path`.
        :keyword mime_type: MIME type of the object
        :type mime_type: `str`
        :keyword properties: a dictionary of properties
        :type properties: `dict`
        :keyword policy: name of the storage policy
        :type policy: `str`
        :keyword key_file:
        :param append: if set, data will be append to existing object (or
        object will be created if unset)
        :type append: `bool`

        :returns: `list` of chunks, size and hash of the what has been uploaded
        """
        if (data, file_or_path) == (None, None):
            raise exc.MissingData()
        src = data if data is not None else file_or_path
        if src is file_or_path:
            if isinstance(file_or_path, basestring):
                if not os.path.exists(file_or_path):
                    raise exc.FileNotFound("File '%s' not found." %
                                           file_or_path)
                file_name = os.path.basename(file_or_path)
            else:
                try:
                    file_name = os.path.basename(file_or_path.name)
                except AttributeError:
                    file_name = None
            obj_name = obj_name or file_name
        elif isgenerator(src):
            file_or_path = GeneratorIO(src)
            src = file_or_path
        if not obj_name:
            raise exc.MissingName(
                "No name for the object has been specified"
            )

        sysmeta = {'mime_type': mime_type,
                   'etag': etag}
        if metadata:
            warnings.warn(
                "You'd better use 'properties' instead of 'metadata'",
                DeprecationWarning, stacklevel=4)
            if not properties:
                properties = metadata
            else:
                properties.update(metadata)

        if src is data:
            return self._object_create(
                account, container, obj_name, BytesIO(data), sysmeta,
                properties=properties, policy=policy,
                key_file=key_file, append=append, **kwargs)
        elif hasattr(file_or_path, "read"):
            return self._object_create(
                account, container, obj_name, src, sysmeta,
                properties=properties, policy=policy, key_file=key_file,
                append=append, **kwargs)
        else:
            with open(file_or_path, "rb") as f:
                return self._object_create(
                    account, container, obj_name, f, sysmeta,
                    properties=properties, policy=policy,
                    key_file=key_file, append=append, **kwargs)
Ejemplo n.º 17
0
    def rebuild_chunk(self,
                      chunk_id,
                      allow_same_rawx=False,
                      chunk_pos=None,
                      allow_frozen_container=False):
        # Identify the chunk to rebuild
        current_chunk = self.chunks.filter(id=chunk_id).one()

        if current_chunk is None and chunk_pos is None:
            raise OrphanChunk("Chunk not found in content")
        elif current_chunk is None:
            current_chunk = self.chunks.filter(pos=chunk_pos).one()
            if current_chunk is None:
                chunk = {'pos': chunk_pos, 'url': ''}
                current_chunk = Chunk(chunk)
            else:
                chunk_id = current_chunk.id
                self.logger.debug('Chunk at pos %s has id %s', chunk_pos,
                                  chunk_id)

        chunks = self.chunks.filter(metapos=current_chunk.metapos)\
            .exclude(id=chunk_id, pos=chunk_pos)

        if chunk_id is None:
            current_chunk.size = chunks[0].size
            current_chunk.checksum = chunks[0].checksum

        # Find a spare chunk address
        broken_list = list()
        if not allow_same_rawx and chunk_id is not None:
            broken_list.append(current_chunk)
        spare_url, _quals = self._get_spare_chunk(chunks.all(), broken_list)
        new_chunk = Chunk({'pos': current_chunk.pos, 'url': spare_url[0]})

        # Regenerate the lost chunk's data, from existing chunks
        handler = ECRebuildHandler(chunks.raw(), current_chunk.subpos,
                                   self.storage_method)
        stream = handler.rebuild()

        # Actually create the spare chunk
        meta = {}
        meta['chunk_id'] = new_chunk.id
        meta['chunk_pos'] = current_chunk.pos
        meta['container_id'] = self.container_id

        # FIXME: should be 'content_chunkmethod' everywhere
        # but sadly it isn't
        meta['chunk_method'] = self.chunk_method

        # FIXME: should be 'content_id' everywhere
        # but sadly it isn't
        meta['id'] = self.content_id

        meta['content_path'] = self.path

        # FIXME: should be 'content_policy' everywhere
        # but sadly it isn't
        meta['policy'] = self.policy

        # FIXME: should be 'content_version' everywhere
        # but sadly it isn't
        meta['version'] = self.version

        meta['metachunk_hash'] = current_chunk.checksum
        meta['metachunk_size'] = current_chunk.size
        meta['full_path'] = self.full_path
        meta['oio_version'] = OIO_VERSION
        self.blob_client.chunk_put(spare_url[0], meta, GeneratorIO(stream))

        # Register the spare chunk in object's metadata
        if chunk_id is None:
            self._add_raw_chunk(current_chunk,
                                spare_url[0],
                                frozen=allow_frozen_container)
        else:
            self._update_spare_chunk(current_chunk,
                                     spare_url[0],
                                     frozen=allow_frozen_container)
        self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos,
                          spare_url[0])
Ejemplo n.º 18
0
    def rebuild_chunk(self,
                      chunk_id,
                      service_id=None,
                      allow_same_rawx=False,
                      chunk_pos=None,
                      allow_frozen_container=False):
        # Identify the chunk to rebuild
        candidates = self.chunks.filter(id=chunk_id)
        if service_id is not None:
            candidates = candidates.filter(host=service_id)
        current_chunk = candidates.one()

        if current_chunk is None and chunk_pos is None:
            raise OrphanChunk("Chunk not found in content")
        if current_chunk is None:
            current_chunk = self.chunks.filter(pos=chunk_pos).one()
            if current_chunk is None:
                chunk = {'pos': chunk_pos, 'url': ''}
                current_chunk = Chunk(chunk)
            else:
                chunk_id = current_chunk.id
                self.logger.debug('Chunk at pos %s has id %s', chunk_pos,
                                  chunk_id)

        # Sort chunks by score to try to rebuild with higher score.
        # When scores are close together (e.g. [95, 94, 94, 93, 50]),
        # don't always start with the highest element.
        chunks = self.chunks \
            .filter(metapos=current_chunk.metapos) \
            .exclude(id=chunk_id, pos=chunk_pos) \
            .sort(key=lambda chunk: _get_weighted_random_score(chunk.raw()),
                  reverse=True)

        if chunk_id is None:
            current_chunk.size = chunks[0].size
            current_chunk.checksum = chunks[0].checksum

        # Find a spare chunk address
        broken_list = list()
        if not allow_same_rawx and chunk_id is not None:
            broken_list.append(current_chunk)
        spare_url, _quals = self._get_spare_chunk(chunks.all(),
                                                  broken_list,
                                                  position=current_chunk.pos)
        new_chunk = Chunk({'pos': current_chunk.pos, 'url': spare_url[0]})

        # Regenerate the lost chunk's data, from existing chunks
        handler = ECRebuildHandler(chunks.raw(), current_chunk.subpos,
                                   self.storage_method)
        expected_chunk_size, stream = handler.rebuild()

        # Actually create the spare chunk
        meta = {}
        meta['chunk_id'] = new_chunk.id
        meta['chunk_pos'] = current_chunk.pos
        meta['container_id'] = self.container_id

        # FIXME: should be 'content_chunkmethod' everywhere
        # but sadly it isn't
        meta['chunk_method'] = self.chunk_method

        # FIXME: should be 'content_id' everywhere
        # but sadly it isn't
        meta['id'] = self.content_id

        meta['content_path'] = self.path

        # FIXME: should be 'content_policy' everywhere
        # but sadly it isn't
        meta['policy'] = self.policy

        # FIXME: should be 'content_version' everywhere
        # but sadly it isn't
        meta['version'] = self.version

        meta['metachunk_hash'] = current_chunk.checksum
        meta['metachunk_size'] = current_chunk.size
        meta['full_path'] = self.full_path
        meta['oio_version'] = OIO_VERSION
        bytes_transferred, _ = self.blob_client.chunk_put(
            spare_url[0], meta, GeneratorIO(stream, sub_generator=PY2))
        if expected_chunk_size is not None \
                and bytes_transferred != expected_chunk_size:
            try:
                self.blob_client.chunk_delete(spare_url[0])
            except Exception as exc:
                self.logger.warning(
                    'Failed to rollback the rebuild of the chunk: %s', exc)
            raise ChunkException('The rebuilt chunk is not the correct size')

        # Register the spare chunk in object's metadata
        if chunk_id is None:
            self._add_raw_chunk(current_chunk,
                                spare_url[0],
                                frozen=allow_frozen_container)
        else:
            self._update_spare_chunk(current_chunk,
                                     spare_url[0],
                                     frozen=allow_frozen_container)
        self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos,
                          spare_url[0])

        return bytes_transferred