Example #1
0
    def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None):
        current_chunk = self.chunks.filter(id=chunk_id).one()

        if current_chunk is None and chunk_pos is None:
            raise OrphanChunk("Chunk not found in content")
        elif current_chunk is None:
            chunk = {"pos": chunk_pos, "url": ""}
            current_chunk = Chunk(chunk)

        chunks = self.chunks.filter(metapos=current_chunk.metapos)\
            .exclude(id=chunk_id)

        if chunk_id is None:
            current_chunk.size = chunks[0].size
            current_chunk.checksum = chunks[0].checksum

        broken_list = list()
        if not allow_same_rawx:
            broken_list.append(current_chunk)
        spare_url = self._get_spare_chunk(chunks.all(), broken_list)

        handler = ECRebuildHandler(chunks.raw(), current_chunk.subpos,
                                   self.storage_method)

        new_chunk = {'pos': current_chunk.pos, 'url': spare_url[0]}
        new_chunk = Chunk(new_chunk)
        stream = handler.rebuild()

        meta = {}
        meta['chunk_id'] = new_chunk.id
        meta['chunk_pos'] = current_chunk.pos
        meta['container_id'] = self.container_id

        # FIXME: should be 'content_chunkmethod' everywhere
        # but sadly it isn't
        meta['chunk_method'] = self.chunk_method

        # FIXME: should be 'content_id' everywhere
        # but sadly it isn't
        meta['id'] = self.content_id

        meta['content_path'] = self.path

        # FIXME: should be 'content_policy' everywhere
        # but sadly it isn't
        meta['policy'] = self.policy

        # FIXME: should be 'content_version' everywhere
        # but sadly it isn't
        meta['version'] = self.version

        meta['metachunk_hash'] = current_chunk.checksum
        meta['metachunk_size'] = current_chunk.size
        meta['full_path'] = self.full_path
        meta['oio_version'] = OIO_VERSION
        self.blob_client.chunk_put(spare_url[0], meta, GeneratorIO(stream))
        if chunk_id is None:
            self._add_raw_chunk(current_chunk, spare_url[0])
        else:
            self._update_spare_chunk(current_chunk, spare_url[0])
Example #2
0
File: ec.py Project: bhyvex/oio-sds
    def rebuild_chunk(self, chunk_id):
        current_chunk = self.chunks.filter(id=chunk_id).one()

        if current_chunk is None:
            raise OrphanChunk("Chunk not found in content")

        chunks = self.chunks.filter(metapos=current_chunk.metapos)\
            .exclude(id=chunk_id)

        spare_url = self._get_spare_chunk(chunks.all(), [current_chunk])

        handler = ECRebuildHandler(
            chunks.raw(), current_chunk.subpos, self.storage_method)

        new_chunk = {'pos': current_chunk.pos, 'url': spare_url[0]}
        new_chunk = Chunk(new_chunk)
        stream = handler.rebuild()

        meta = {}
        meta['chunk_id'] = new_chunk.id
        meta['chunk_pos'] = current_chunk.pos
        meta['container_id'] = self.container_id
        meta['content_chunkmethod'] = self.chunk_method
        meta['content_id'] = self.content_id
        meta['content_path'] = self.path
        meta['content_policy'] = self.stgpol
        meta['content_version'] = self.version
        meta['metachunk_hash'] = current_chunk.checksum
        meta['metachunk_size'] = current_chunk.size
        self.blob_client.chunk_put(spare_url[0], meta, stream)
        self._update_spare_chunk(current_chunk, spare_url[0])
Example #3
0
    def rebuild_chunk(self, chunk_id):
        current_chunk = self.chunks.filter(id=chunk_id).one()

        if current_chunk is None:
            raise OrphanChunk("Chunk not found in content")

        chunks = self.chunks.filter(metapos=current_chunk.metapos)\
            .exclude(id=chunk_id)

        spare_url = self._get_spare_chunk(chunks.all(), [current_chunk])

        handler = ECRebuildHandler(
            chunks.raw(), current_chunk.subpos, self.storage_method)

        new_chunk = {'pos': current_chunk.pos, 'url': spare_url[0]}
        new_chunk = Chunk(new_chunk)
        stream = handler.rebuild()

        meta = {}
        meta['chunk_id'] = new_chunk.id
        meta['chunk_pos'] = current_chunk.pos
        meta['container_id'] = self.container_id
        meta['content_chunkmethod'] = self.chunk_method
        meta['content_id'] = self.content_id
        meta['content_path'] = self.path
        meta['content_policy'] = self.stgpol
        meta['content_version'] = self.version
        meta['metachunk_hash'] = current_chunk.checksum
        meta['metachunk_size'] = current_chunk.size
        self.blob_client.chunk_put(spare_url[0], meta, stream)
        self._update_spare_chunk(current_chunk, spare_url[0])
Example #4
0
    def test_rebuild_with_wrong_chunk_size(self):
        test_data = (b'1234' * self.storage_method.ec_segment_size)[:-777]
        ec_chunks = self._make_ec_chunks(test_data)
        missing_chunk_body = ec_chunks.pop(1)
        meta_chunk = self.meta_chunk()
        missing_chunk = meta_chunk.pop(1)

        responses = list()
        for i, ec_chunk in enumerate(ec_chunks):
            chunk_size = len(ec_chunk)
            if i < self.storage_method.ec_nb_parity - 1:
                # Change the chunk size for the first chunks
                chunk_size = random.randrange(chunk_size)
            headers = {'x-oio-chunk-meta-chunk-size': chunk_size}
            responses.append(FakeResponse(200, ec_chunk[:chunk_size], headers))

        def get_response(req):
            return responses.pop(0) if responses else FakeResponse(404)

        missing = missing_chunk['num']
        nb = self.storage_method.ec_nb_data + self.storage_method.ec_nb_parity

        with set_http_requests(get_response) as conn_record:
            handler = ECRebuildHandler(meta_chunk, missing,
                                       self.storage_method)
            expected_chunk_size, stream = handler.rebuild()
            if expected_chunk_size is not None:
                self.assertEqual(expected_chunk_size, len(missing_chunk_body))
            result = b''.join(stream)
            self.assertEqual(len(result), len(missing_chunk_body))
            self.assertEqual(
                self.checksum(result).hexdigest(),
                self.checksum(missing_chunk_body).hexdigest())
            self.assertEqual(len(conn_record), nb - 1)
Example #5
0
    def test_rebuild_failure(self):
        meta_chunk = self.meta_chunk()

        missing_chunk = meta_chunk.pop(1)

        nb = self.storage_method.ec_nb_data +\
            self.storage_method.ec_nb_parity

        # add errors on other chunks
        errors = [Timeout(), 404, Exception('failure')]
        responses = [
            FakeResponse(random.choice(errors), b'', {}) for i in range(nb - 1)
        ]

        def get_response(req):
            return responses.pop(0) if responses else FakeResponse(404)

        missing = missing_chunk['num']
        nb = self.storage_method.ec_nb_data +\
            self.storage_method.ec_nb_parity

        with set_http_requests(get_response) as conn_record:
            handler = ECRebuildHandler(meta_chunk, missing,
                                       self.storage_method)
            # TODO use specialized exception
            self.assertRaises(exc.OioException, handler.rebuild)
            self.assertEqual(len(conn_record), nb - 1)
Example #6
0
    def rebuild_chunk(self, chunk_id):
        current_chunk = self.chunks.filter(id=chunk_id).one()

        if current_chunk is None:
            raise OrphanChunk("Chunk not found in content")

        chunks = self.chunks.filter(metapos=current_chunk.metapos).exclude(id=chunk_id)

        spare_url = self._get_spare_chunk(chunks.all(), [current_chunk])

        handler = ECRebuildHandler(chunks.raw(), current_chunk.subpos, self.storage_method)

        new_chunk = {"pos": current_chunk.pos, "url": spare_url[0]}
        new_chunk = Chunk(new_chunk)
        stream = handler.rebuild()

        meta = {}
        meta["chunk_id"] = new_chunk.id
        meta["chunk_pos"] = current_chunk.pos
        meta["container_id"] = self.container_id

        # FIXME: should be 'content_chunkmethod' everywhere
        # but sadly it isn't
        meta["chunk_method"] = self.chunk_method

        # FIXME: should be 'content_id' everywhere
        # but sadly it isn't
        meta["id"] = self.content_id

        meta["content_path"] = self.path

        # FIXME: should be 'content_policy' everywhere
        # but sadly it isn't
        meta["policy"] = self.stgpol

        # FIXME: should be 'content_version' everywhere
        # but sadly it isn't
        meta["version"] = self.version

        meta["metachunk_hash"] = current_chunk.checksum
        meta["metachunk_size"] = current_chunk.size
        self.blob_client.chunk_put(spare_url[0], meta, GeneratorReader(stream))
        self._update_spare_chunk(current_chunk, spare_url[0])
Example #7
0
    def test_rebuild_parity_errors(self):
        test_data = (b'1234' * self.storage_method.ec_segment_size)[:-777]

        ec_chunks = self._make_ec_chunks(test_data)

        # break one parity chunk
        missing_chunk_body = ec_chunks.pop(-1)

        meta_chunk = self.meta_chunk()

        missing_chunk = meta_chunk.pop(-1)

        # add also error on another chunk
        for error in (Timeout(), 404, Exception('failure')):
            headers = {}
            base_responses = list()

            for ec_chunk in ec_chunks:
                base_responses.append(FakeResponse(200, ec_chunk, headers))
            responses = base_responses
            error_idx = random.randint(0, len(responses) - 1)
            responses[error_idx] = FakeResponse(error, b'', {})

            def get_response(req):
                return responses.pop(0) if responses else FakeResponse(404)

            missing = missing_chunk['num']
            nb = self.storage_method.ec_nb_data +\
                self.storage_method.ec_nb_parity

            with set_http_requests(get_response) as conn_record:
                handler = ECRebuildHandler(meta_chunk, missing,
                                           self.storage_method)
                expected_chunk_size, stream = handler.rebuild()
                if expected_chunk_size is not None:
                    self.assertEqual(expected_chunk_size,
                                     len(missing_chunk_body))
                result = b''.join(stream)
                self.assertEqual(len(result), len(missing_chunk_body))
                self.assertEqual(
                    self.checksum(result).hexdigest(),
                    self.checksum(missing_chunk_body).hexdigest())
                self.assertEqual(len(conn_record), nb - 1)
Example #8
0
    def test_rebuild(self):
        test_data = (b'1234' * self.storage_method.ec_segment_size)[:-777]

        ec_chunks = self._make_ec_chunks(test_data)

        missing_chunk_body = ec_chunks.pop(1)

        meta_chunk = self.meta_chunk()

        missing_chunk = meta_chunk.pop(1)

        headers = {}
        responses = [
            FakeResponse(200, ec_chunks[0], headers),
            FakeResponse(200, ec_chunks[1], headers),
            FakeResponse(200, ec_chunks[2], headers),
            FakeResponse(200, ec_chunks[3], headers),
            FakeResponse(200, ec_chunks[4], headers),
            FakeResponse(200, ec_chunks[5], headers),
            FakeResponse(200, ec_chunks[6], headers),
        ]

        def get_response(req):
            return responses.pop(0) if responses else FakeResponse(404)

        missing = missing_chunk['num']
        nb = self.storage_method.ec_nb_data + self.storage_method.ec_nb_parity

        with set_http_requests(get_response) as conn_record:
            handler = ECRebuildHandler(meta_chunk, missing,
                                       self.storage_method)
            expected_chunk_size, stream = handler.rebuild()
            if expected_chunk_size is not None:
                self.assertEqual(expected_chunk_size, len(missing_chunk_body))
            result = b''.join(stream)
            self.assertEqual(len(result), len(missing_chunk_body))
            self.assertEqual(
                self.checksum(result).hexdigest(),
                self.checksum(missing_chunk_body).hexdigest())
            self.assertEqual(len(conn_record), nb - 1)
Example #9
0
    def test_rebuild_parity_errors(self):
        test_data = ('1234' * self.storage_method.ec_segment_size)[:-777]

        ec_chunks = self._make_ec_chunks(test_data)

        # break one parity chunk
        missing_chunk_body = ec_chunks.pop(-1)

        meta_chunk = self.meta_chunk()

        missing_chunk = meta_chunk.pop(-1)

        # add also error on another chunk
        for error in (Timeout(), 404, Exception('failure')):
            headers = {}
            base_responses = list()

            for ec_chunk in ec_chunks:
                base_responses.append(FakeResponse(200, ec_chunk, headers))
            responses = base_responses
            error_idx = random.randint(0, len(responses) - 1)
            responses[error_idx] = FakeResponse(error, '', {})

            def get_response(req):
                return responses.pop(0) if responses else FakeResponse(404)

            missing = missing_chunk['num']
            nb = self.storage_method.ec_nb_data +\
                self.storage_method.ec_nb_parity

            with set_http_requests(get_response) as conn_record:
                handler = ECRebuildHandler(
                    meta_chunk, missing, self.storage_method)
                stream = handler.rebuild()
                result = ''.join(stream)
                self.assertEqual(len(result), len(missing_chunk_body))
                self.assertEqual(self.checksum(result).hexdigest(),
                                 self.checksum(missing_chunk_body).hexdigest())
                self.assertEqual(len(conn_record), nb - 1)
Example #10
0
    def test_rebuild(self):
        test_data = ('1234' * self.storage_method.ec_segment_size)[:-777]

        ec_chunks = self._make_ec_chunks(test_data)

        missing_chunk_body = ec_chunks.pop(1)

        meta_chunk = self.meta_chunk()

        missing_chunk = meta_chunk.pop(1)

        headers = {}
        responses = [
            FakeResponse(200, ec_chunks[0], headers),
            FakeResponse(200, ec_chunks[1], headers),
            FakeResponse(200, ec_chunks[2], headers),
            FakeResponse(200, ec_chunks[3], headers),
            FakeResponse(200, ec_chunks[4], headers),
            FakeResponse(200, ec_chunks[5], headers),
            FakeResponse(200, ec_chunks[6], headers),
        ]

        def get_response(req):
            return responses.pop(0) if responses else FakeResponse(404)

        missing = missing_chunk['num']
        nb = self.storage_method.ec_nb_data + self.storage_method.ec_nb_parity

        with set_http_requests(get_response) as conn_record:
            handler = ECRebuildHandler(
                meta_chunk, missing, self.storage_method)
            stream = handler.rebuild()
            result = ''.join(stream)
            self.assertEqual(len(result), len(missing_chunk_body))
            self.assertEqual(self.checksum(result).hexdigest(),
                             self.checksum(missing_chunk_body).hexdigest())
            self.assertEqual(len(conn_record), nb - 1)
Example #11
0
    def rebuild_chunk(self,
                      chunk_id,
                      allow_same_rawx=False,
                      chunk_pos=None,
                      allow_frozen_container=False):
        # Identify the chunk to rebuild
        current_chunk = self.chunks.filter(id=chunk_id).one()

        if current_chunk is None and chunk_pos is None:
            raise OrphanChunk("Chunk not found in content")
        elif current_chunk is None:
            current_chunk = self.chunks.filter(pos=chunk_pos).one()
            if current_chunk is None:
                chunk = {'pos': chunk_pos, 'url': ''}
                current_chunk = Chunk(chunk)
            else:
                chunk_id = current_chunk.id
                self.logger.debug('Chunk at pos %s has id %s', chunk_pos,
                                  chunk_id)

        chunks = self.chunks.filter(metapos=current_chunk.metapos)\
            .exclude(id=chunk_id, pos=chunk_pos)

        if chunk_id is None:
            current_chunk.size = chunks[0].size
            current_chunk.checksum = chunks[0].checksum

        # Find a spare chunk address
        broken_list = list()
        if not allow_same_rawx and chunk_id is not None:
            broken_list.append(current_chunk)
        spare_url, _quals = self._get_spare_chunk(chunks.all(), broken_list)
        new_chunk = Chunk({'pos': current_chunk.pos, 'url': spare_url[0]})

        # Regenerate the lost chunk's data, from existing chunks
        handler = ECRebuildHandler(chunks.raw(), current_chunk.subpos,
                                   self.storage_method)
        stream = handler.rebuild()

        # Actually create the spare chunk
        meta = {}
        meta['chunk_id'] = new_chunk.id
        meta['chunk_pos'] = current_chunk.pos
        meta['container_id'] = self.container_id

        # FIXME: should be 'content_chunkmethod' everywhere
        # but sadly it isn't
        meta['chunk_method'] = self.chunk_method

        # FIXME: should be 'content_id' everywhere
        # but sadly it isn't
        meta['id'] = self.content_id

        meta['content_path'] = self.path

        # FIXME: should be 'content_policy' everywhere
        # but sadly it isn't
        meta['policy'] = self.policy

        # FIXME: should be 'content_version' everywhere
        # but sadly it isn't
        meta['version'] = self.version

        meta['metachunk_hash'] = current_chunk.checksum
        meta['metachunk_size'] = current_chunk.size
        meta['full_path'] = self.full_path
        meta['oio_version'] = OIO_VERSION
        self.blob_client.chunk_put(spare_url[0], meta, GeneratorIO(stream))

        # Register the spare chunk in object's metadata
        if chunk_id is None:
            self._add_raw_chunk(current_chunk,
                                spare_url[0],
                                frozen=allow_frozen_container)
        else:
            self._update_spare_chunk(current_chunk,
                                     spare_url[0],
                                     frozen=allow_frozen_container)
        self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos,
                          spare_url[0])
Example #12
0
    def rebuild_chunk(self,
                      chunk_id,
                      service_id=None,
                      allow_same_rawx=False,
                      chunk_pos=None,
                      allow_frozen_container=False):
        # Identify the chunk to rebuild
        candidates = self.chunks.filter(id=chunk_id)
        if service_id is not None:
            candidates = candidates.filter(host=service_id)
        current_chunk = candidates.one()

        if current_chunk is None and chunk_pos is None:
            raise OrphanChunk("Chunk not found in content")
        if current_chunk is None:
            current_chunk = self.chunks.filter(pos=chunk_pos).one()
            if current_chunk is None:
                chunk = {'pos': chunk_pos, 'url': ''}
                current_chunk = Chunk(chunk)
            else:
                chunk_id = current_chunk.id
                self.logger.debug('Chunk at pos %s has id %s', chunk_pos,
                                  chunk_id)

        # Sort chunks by score to try to rebuild with higher score.
        # When scores are close together (e.g. [95, 94, 94, 93, 50]),
        # don't always start with the highest element.
        chunks = self.chunks \
            .filter(metapos=current_chunk.metapos) \
            .exclude(id=chunk_id, pos=chunk_pos) \
            .sort(key=lambda chunk: _get_weighted_random_score(chunk.raw()),
                  reverse=True)

        if chunk_id is None:
            current_chunk.size = chunks[0].size
            current_chunk.checksum = chunks[0].checksum

        # Find a spare chunk address
        broken_list = list()
        if not allow_same_rawx and chunk_id is not None:
            broken_list.append(current_chunk)
        spare_url, _quals = self._get_spare_chunk(chunks.all(),
                                                  broken_list,
                                                  position=current_chunk.pos)
        new_chunk = Chunk({'pos': current_chunk.pos, 'url': spare_url[0]})

        # Regenerate the lost chunk's data, from existing chunks
        handler = ECRebuildHandler(chunks.raw(), current_chunk.subpos,
                                   self.storage_method)
        expected_chunk_size, stream = handler.rebuild()

        # Actually create the spare chunk
        meta = {}
        meta['chunk_id'] = new_chunk.id
        meta['chunk_pos'] = current_chunk.pos
        meta['container_id'] = self.container_id

        # FIXME: should be 'content_chunkmethod' everywhere
        # but sadly it isn't
        meta['chunk_method'] = self.chunk_method

        # FIXME: should be 'content_id' everywhere
        # but sadly it isn't
        meta['id'] = self.content_id

        meta['content_path'] = self.path

        # FIXME: should be 'content_policy' everywhere
        # but sadly it isn't
        meta['policy'] = self.policy

        # FIXME: should be 'content_version' everywhere
        # but sadly it isn't
        meta['version'] = self.version

        meta['metachunk_hash'] = current_chunk.checksum
        meta['metachunk_size'] = current_chunk.size
        meta['full_path'] = self.full_path
        meta['oio_version'] = OIO_VERSION
        bytes_transferred, _ = self.blob_client.chunk_put(
            spare_url[0], meta, GeneratorIO(stream, sub_generator=PY2))
        if expected_chunk_size is not None \
                and bytes_transferred != expected_chunk_size:
            try:
                self.blob_client.chunk_delete(spare_url[0])
            except Exception as exc:
                self.logger.warning(
                    'Failed to rollback the rebuild of the chunk: %s', exc)
            raise ChunkException('The rebuilt chunk is not the correct size')

        # Register the spare chunk in object's metadata
        if chunk_id is None:
            self._add_raw_chunk(current_chunk,
                                spare_url[0],
                                frozen=allow_frozen_container)
        else:
            self._update_spare_chunk(current_chunk,
                                     spare_url[0],
                                     frozen=allow_frozen_container)
        self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos,
                          spare_url[0])

        return bytes_transferred