Example #1
0
class Content(object):
    def __init__(self, conf, container_id, metadata, chunks, storage_method):
        self.conf = conf
        self.container_id = container_id
        self.metadata = metadata
        self.chunks = ChunksHelper(chunks)
        self.storage_method = storage_method
        self.logger = get_logger(self.conf)
        self.cs_client = ConscienceClient(conf)
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(self.conf)
        self.content_id = self.metadata["id"]
        self.stgpol = self.metadata["policy"]
        self.path = self.metadata["name"]
        self.length = int(self.metadata["length"])
        self.version = self.metadata["version"]
        self.checksum = self.metadata["hash"]
        self.mime_type = self.metadata["mime_type"]
        self.chunk_method = self.metadata["chunk_method"]

    def _get_spare_chunk(self, chunks_notin, chunks_broken):
        spare_data = {
            "notin": ChunksHelper(chunks_notin, False).raw(),
            "broken": ChunksHelper(chunks_broken, False).raw()
        }
        try:
            spare_resp = self.container_client.content_spare(
                cid=self.container_id,
                content=self.content_id,
                data=spare_data,
                stgpol=self.stgpol)
        except ClientException as e:
            raise exc.SpareChunkException("No spare chunk (%s)" % e.message)

        url_list = []
        for c in spare_resp["chunks"]:
            url_list.append(c["id"])

        return url_list

    def _update_spare_chunk(self, current_chunk, new_url):
        old = [{
            'type': 'chunk',
            'id': current_chunk.url,
            'hash': current_chunk.checksum,
            'size': current_chunk.size,
            'pos': current_chunk.pos,
            'content': self.content_id
        }]
        new = [{
            'type': 'chunk',
            'id': new_url,
            'hash': current_chunk.checksum,
            'size': current_chunk.size,
            'pos': current_chunk.pos,
            'content': self.content_id
        }]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(cid=self.container_id,
                                                   data=update_data)

    def _create_object(self):
        self.container_client.content_create(cid=self.container_id,
                                             path=self.path,
                                             content_id=self.content_id,
                                             stgpol=self.stgpol,
                                             size=self.length,
                                             checksum=self.checksum,
                                             version=self.version,
                                             chunk_method=self.chunk_method,
                                             mime_type=self.mime_type,
                                             data=self.chunks.raw())

    def rebuild_chunk(self, chunk_id, allow_same_rawx=False):
        raise NotImplementedError()

    def create(self, stream):
        raise NotImplementedError()

    def fetch(self):
        raise NotImplementedError()

    def delete(self):
        self.container_client.content_delete(cid=self.container_id,
                                             path=self.path)

    def move_chunk(self, chunk_id):
        current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None:
            raise OrphanChunk("Chunk not found in content")

        other_chunks = self.chunks.filter(
            metapos=current_chunk.metapos).exclude(id=chunk_id).all()

        spare_urls = self._get_spare_chunk(other_chunks, [current_chunk])

        self.logger.debug("copy chunk from %s to %s", current_chunk.url,
                          spare_urls[0])
        self.blob_client.chunk_copy(current_chunk.url, spare_urls[0])

        self._update_spare_chunk(current_chunk, spare_urls[0])

        try:
            self.blob_client.chunk_delete(current_chunk.url)
        except:
            self.logger.warn("Failed to delete chunk %s" % current_chunk.url)

        current_chunk.url = spare_urls[0]

        return current_chunk.raw()
Example #2
0
class BlobMoverWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.last_usage_check = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.usage_target = int_value(
            conf.get('usage_target'), 0)
        self.usage_check_interval = int_value(
            conf.get('usage_check_interval'), 3600)
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        self.max_bytes_per_second = int_value(
            conf.get('bytes_per_second'), 10000000)
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(conf)

    def mover_pass(self):
        self.namespace, self.address = check_volume(self.volume)

        start_time = report_time = time.time()

        total_errors = 0
        mover_time = 0

        paths = paths_gen(self.volume)

        for path in paths:
            loop_time = time.time()

            now = time.time()
            if now - self.last_usage_check >= self.usage_check_interval:
                used, total = statfs(self.volume)
                usage = (float(used) / total) * 100
                if usage <= self.usage_target:
                    self.logger.info(
                        'current usage %.2f%%: target reached (%.2f%%)', usage,
                        self.usage_target)
                    self.last_usage_check = now
                    break

            self.safe_chunk_move(path)
            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(mover_time).2f'
                    '%(mover_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'mover_time': mover_time,
                        'mover_rate': mover_time / (now - start_time)
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            mover_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(mover_time).2f '
            '%(mover_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'mover_time': mover_time,
                'mover_rate': mover_time / elapsed
            }
        )

    def safe_chunk_move(self, path):
        try:
            self.chunk_move(path)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while moving chunk %s: %s', path, e)
        self.passes += 1

    def load_chunk_metadata(self, path):
        with open(path) as f:
            return read_chunk_metadata(f)

    def chunk_move(self, path):
        meta = self.load_chunk_metadata(path)
        content_cid = meta['content_cid']
        content_path = meta['content_path']

        chunk_url = 'http://%s/%s' % \
            (self.address, meta['chunk_id'])

        try:
            data = self.container_client.content_show(
                cid=content_cid, path=content_path)
        except exc.NotFound:
            raise exc.OrphanChunk('Content not found')
        current_chunk = None
        notin = []
        for c in data:
            if c['pos'] == meta['chunk_pos']:
                notin.append(c)
        for c in notin:
            if c['url'] == chunk_url:
                current_chunk = c
                notin.remove(c)
        if not current_chunk:
            raise exc.OrphanChunk('Chunk not found in content')
        spare_data = {'notin': notin, 'broken': [current_chunk], 'size': 0}
        spare_resp = self.container_client.content_spare(
            cid=content_cid, path=content_path, data=spare_data)

        new_chunk = spare_resp['chunks'][0]
        self.blob_client.chunk_copy(
            current_chunk['url'], new_chunk['id'])

        old = [{'type': 'chunk',
                'id': current_chunk['url'],
                'hash': meta['chunk_hash'],
                'size': int(meta['chunk_size'])}]
        new = [{'type': 'chunk',
                'id': new_chunk['id'],
                'hash': meta['chunk_hash'],
                'size': int(meta['chunk_size'])}]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(
            cid=content_cid, data=update_data)

        self.blob_client.chunk_delete(current_chunk['url'])

        self.logger.info(
            'moved chunk %s to %s', current_chunk['url'], new_chunk['id'])
Example #3
0
class BlobMoverWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.last_usage_check = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.usage_target = int_value(conf.get('usage_target'), 0)
        self.usage_check_interval = int_value(conf.get('usage_check_interval'),
                                              3600)
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        self.max_bytes_per_second = int_value(conf.get('bytes_per_second'),
                                              10000000)
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(conf)

    def mover_pass(self):
        self.namespace, self.address = check_volume(self.volume)

        start_time = report_time = time.time()

        total_errors = 0
        mover_time = 0

        paths = paths_gen(self.volume)

        for path in paths:
            loop_time = time.time()

            now = time.time()
            if now - self.last_usage_check >= self.usage_check_interval:
                used, total = statfs(self.volume)
                usage = (float(used) / total) * 100
                if usage <= self.usage_target:
                    self.logger.info(
                        'current usage %.2f%%: target reached (%.2f%%)', usage,
                        self.usage_target)
                    self.last_usage_check = now
                    break

            self.safe_chunk_move(path)
            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(mover_time).2f'
                    '%(mover_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'mover_time': mover_time,
                        'mover_rate': mover_time / (now - start_time)
                    })
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            mover_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(mover_time).2f '
            '%(mover_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'mover_time': mover_time,
                'mover_rate': mover_time / elapsed
            })

    def safe_chunk_move(self, path):
        try:
            self.chunk_move(path)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while moving chunk %s: %s', path, e)
        self.passes += 1

    def load_chunk_metadata(self, path):
        with open(path) as f:
            return read_chunk_metadata(f)

    def chunk_move(self, path):
        meta = self.load_chunk_metadata(path)
        content_cid = meta['content_cid']
        content_path = meta['content_path']

        chunk_url = 'http://%s/%s' % \
            (self.address, meta['chunk_id'])

        try:
            _, data = self.container_client.content_show(cid=content_cid,
                                                         path=content_path)
        except exc.NotFound:
            raise exc.OrphanChunk('Content not found')
        current_chunk = None
        notin = []
        for c in data:
            if c['pos'] == meta['chunk_pos']:
                notin.append(c)
        for c in notin:
            if c['url'] == chunk_url:
                current_chunk = c
                notin.remove(c)
        if not current_chunk:
            raise exc.OrphanChunk('Chunk not found in content')
        spare_data = {'notin': notin, 'broken': [current_chunk], 'size': 0}
        spare_resp = self.container_client.content_spare(cid=content_cid,
                                                         path=content_path,
                                                         data=spare_data)

        new_chunk = spare_resp['chunks'][0]
        self.blob_client.chunk_copy(current_chunk['url'], new_chunk['id'])

        old = [{
            'type': 'chunk',
            'id': current_chunk['url'],
            'hash': meta['chunk_hash'],
            'size': int(meta['chunk_size'])
        }]
        new = [{
            'type': 'chunk',
            'id': new_chunk['id'],
            'hash': meta['chunk_hash'],
            'size': int(meta['chunk_size'])
        }]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(cid=content_cid,
                                                   data=update_data)

        self.blob_client.chunk_delete(current_chunk['url'])

        self.logger.info('moved chunk %s to %s', current_chunk['url'],
                         new_chunk['id'])
Example #4
0
class TestPlainContent(BaseTestCase):
    def setUp(self):
        super(TestPlainContent, self).setUp()

        if len(self.conf['services']['rawx']) < 4:
            self.skipTest(
                "Plain tests needs more than 3 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(
            self.gridconf, logger=self.logger)
        self.container_client = ContainerClient(
            self.gridconf, logger=self.logger)
        self.blob_client = BlobClient(self.conf, logger=self.logger)
        self.container_name = "TestPlainContent-%f" % time.time()
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.content = "%s-%s" % (self.__class__.__name__, random_str(4))
        self.stgpol = "SINGLE"
        self.stgpol_twocopies = "TWOCOPIES"
        self.stgpol_threecopies = "THREECOPIES"

    def _test_create(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, self.content,
                                           len(data), stgpol)

        content.create(BytesIO(data))

        meta, chunks = self.container_client.content_locate(
            cid=self.container_id, content=content.content_id)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        chunks = ChunksHelper(chunks)

        # TODO NO NO NO
        if stgpol == self.stgpol_threecopies:
            nb_copy = 3
        elif stgpol == self.stgpol_twocopies:
            nb_copy = 2
        elif stgpol == self.stgpol:
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                # Check that chunk data matches chunk hash from xattr
                self.assertEqual(meta['chunk_hash'], chunk_hash)
                # Check that chunk data matches chunk hash from database
                self.assertEqual(chunk.checksum, chunk_hash)
                full_path = encode_fullpath(
                    self.account, self.container_name, self.content,
                    meta['content_version'], meta['content_id'])
                self.assertEqual(meta['full_path'], full_path)
                self.assertEqual(meta['oio_version'], '4.2')

    def test_twocopies_create_0_byte(self):
        self._test_create(self.stgpol_twocopies, 0)

    def test_twocopies_create_1_byte(self):
        self._test_create(self.stgpol_twocopies, 1)

    def test_twocopies_create_chunksize_bytes(self):
        self._test_create(self.stgpol_twocopies, self.chunk_size)

    def test_twocopies_create_chunksize_plus_1_bytes(self):
        self._test_create(self.stgpol_twocopies, self.chunk_size + 1)

    def test_twocopies_create_6294503_bytes(self):
        self._test_create(self.stgpol_twocopies, 6294503)

    def test_single_create_0_byte(self):
        self._test_create(self.stgpol, 0)

    def test_single_create_chunksize_plus_1_bytes(self):
        self._test_create(self.stgpol, self.chunk_size + 1)

    def _new_content(self, stgpol, data, broken_pos_list=[]):
        old_content = self.content_factory.new(
            self.container_id, self.content, len(data), stgpol)

        old_content.create(BytesIO(data))

        broken_chunks_info = {}
        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            meta, stream = self.blob_client.chunk_get(c.url)
            if pos not in broken_chunks_info:
                broken_chunks_info[pos] = {}
            broken_chunks_info[pos][idx] = {
                "url": c.url,
                "id": c.id,
                "hash": c.checksum,
                "dl_meta": meta,
                "dl_hash": md5_stream(stream)
            }
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return (self.content_factory.get(
            self.container_id, old_content.content_id), broken_chunks_info)

    def _rebuild_and_check(self, content, broken_chunks_info, full_rebuild_pos,
                           allow_frozen_container=False):
        rebuild_pos, rebuild_idx = full_rebuild_pos
        rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
        service_id = urlparse(rebuild_chunk_info['url']).netloc
        content.rebuild_chunk(rebuild_chunk_info["id"], service_id=service_id,
                              allow_frozen_container=allow_frozen_container)

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   content.content_id)

        # find the rebuilt chunk
        for c in rebuilt_content.chunks.filter(pos=rebuild_pos):
            if len(content.chunks.filter(id=c.id)) > 0:
                # not the rebuilt chunk
                # if this chunk is broken, it must not have been rebuilt
                for b_c_i in broken_chunks_info[rebuild_pos].values():
                    if c.url == b_c_i['url']:
                        with ExpectedException(NotFound):
                            _, _ = self.blob_client.chunk_get(c.url)
                continue
            meta, stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(stream),
                             rebuild_chunk_info["dl_hash"])
            self.assertEqual(c.checksum, rebuild_chunk_info["hash"])
            self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"]))
            self.assertGreaterEqual(
                meta['chunk_mtime'],
                rebuild_chunk_info['dl_meta']['chunk_mtime'])
            del meta["chunk_mtime"]
            del rebuild_chunk_info["dl_meta"]["chunk_mtime"]
            del meta["chunk_id"]
            del rebuild_chunk_info["dl_meta"]["chunk_id"]
            self.assertEqual(meta, rebuild_chunk_info["dl_meta"])

    def _test_rebuild(self, stgpol, data_size, broken_pos_list,
                      full_rebuild_pos):
        data = random_data(data_size)
        content, broken_chunks_info = self._new_content(
            stgpol, data, broken_pos_list)

        self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos)

    def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self):
        self._test_rebuild(self.stgpol_twocopies, 0, [(0, 0)], (0, 0))

    def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self):
        self._test_rebuild(self.stgpol_twocopies, 1, [(0, 1)], (0, 1))

    def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self):
        if len(self.conf['services']['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild(self.stgpol_threecopies, self.chunk_size,
                           [(0, 0), (0, 1)], (0, 1))

    def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self):
        self._test_rebuild(self.stgpol_threecopies, 2 * self.chunk_size,
                           [(1, 0), (1, 2)], (1, 2))

    def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self):
        with ExpectedException(UnrecoverableContent):
            self._test_rebuild(
                self.stgpol_twocopies, 0, [(0, 0), (0, 1)], (0, 0))

    def test_rebuild_chunk_in_frozen_container(self):
        data = random_data(self.chunk_size)
        content, broken_chunks_info = self._new_content(
            self.stgpol_twocopies, data, [(0, 0)])
        system = dict()
        system['sys.status'] = str(OIO_DB_FROZEN)
        self.container_client.container_set_properties(
            self.account, self.container_name, None, system=system)

        try:
            full_rebuild_pos = (0, 0)
            rebuild_pos, rebuild_idx = full_rebuild_pos
            rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
            service_id = urlparse(rebuild_chunk_info['url']).netloc
            self.assertRaises(ServiceBusy,
                              content.rebuild_chunk,
                              rebuild_chunk_info["id"], service_id=service_id)
        finally:
            system['sys.status'] = str(OIO_DB_ENABLED)
            self.container_client.container_set_properties(
                self.account, self.container_name, None, system=system)

        self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos,
                                allow_frozen_container=True)

    def _test_fetch(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content, _ = self._new_content(stgpol, data, broken_pos_list)

        fetched_data = b''.join(content.fetch())

        self.assertEqual(fetched_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_fetch_content_0_byte_without_broken_chunks(self):
        self._test_fetch(self.stgpol_twocopies, 0, [])

    def test_twocopies_fetch_content_0_byte_with_broken_0_0(self):
        self._test_fetch(self.stgpol_twocopies, 0, [(0, 0)])

    def test_twocopies_fetch_content_1_byte_without_broken_chunks(self):
        self._test_fetch(self.stgpol_twocopies, 1, [])

    def test_twocopies_fetch_content_1_byte_with_broken_0_0(self):
        self._test_fetch(self.stgpol_twocopies, 1, [(0, 0)])

    def test_twocopies_fetch_chunksize_bytes_without_broken_chunks(self):
        self._test_fetch(self.stgpol_twocopies, self.chunk_size, [])

    def test_twocopies_fetch_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_fetch(
            self.stgpol_twocopies, self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_fetch_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content, _ = self._new_content(
            self.stgpol_twocopies, data, [(0, 0), (0, 1)])
        gen = content.fetch()
        self.assertRaises(UnrecoverableContent, lambda: next(gen))

    def test_single_fetch_content_1_byte_without_broken_chunks(self):
        self._test_fetch(self.stgpol, 1, [])

    def test_single_fetch_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_fetch(self.stgpol, self.chunk_size * 2, [])

    def test_content_spare_with_too_many_locations_known(self):
        for stgpol in (self.stgpol, self.stgpol_twocopies,
                       self.stgpol_threecopies):
            data = random_data(self.chunk_size)
            content, _ = self._new_content(stgpol, data)
            _meta, chunks = self.container_client.content_locate(
                cid=self.container_id, content=content.content_id)
            for _ in range(2):
                spare_data = {
                    "notin": chunks,
                    "broken": []
                }
                try:
                    self.container_client.content_spare(
                        cid=self.container_id, content=content.content_id,
                        data=spare_data, stgpol=stgpol,
                        position=chunks[0]['pos'])
                except BadRequest as exc:
                    self.assertIn("too many locations already known",
                                  exc.message)
                else:
                    self.fail("shouldn't happen")
                chunks += chunks
Example #5
0
class Content(object):
    def __init__(self, conf, container_id, metadata, chunks, stgpol_args):
        self.conf = conf
        self.container_id = container_id
        self.metadata = metadata
        self.chunks = ChunksHelper(chunks)
        self.stgpol_args = stgpol_args
        self.logger = get_logger(self.conf)
        self.cs_client = ConscienceClient(conf)
        self.container_client = ContainerClient(self.conf)
        self.blob_client = BlobClient()
        self.session = requests.Session()
        self.content_id = self.metadata["id"]
        self.stgpol_name = self.metadata["policy"]
        self.path = self.metadata["name"]
        self.length = int(self.metadata["length"])
        self.version = self.metadata["version"]
        self.hash = self.metadata["hash"]
        self.mime_type = self.metadata["mime-type"]
        self.chunk_method = self.metadata["chunk-method"]

    def _meta2_get_spare_chunk(self, chunks_notin, chunks_broken):
        spare_data = {
            "notin": ChunksHelper(chunks_notin, False).raw(),
            "broken": ChunksHelper(chunks_broken, False).raw()
        }
        try:
            spare_resp = self.container_client.content_spare(
                cid=self.container_id, content=self.content_id,
                data=spare_data, stgpol=self.stgpol_name)
        except ClientException as e:
            raise exc.SpareChunkException("No spare chunk (%s)" % e.message)

        url_list = []
        for c in spare_resp["chunks"]:
            url_list.append(c["id"])

        return url_list

    def _meta2_update_spare_chunk(self, current_chunk, new_url):
        old = [{'type': 'chunk',
                'id': current_chunk.url,
                'hash': current_chunk.hash,
                'size': current_chunk.size,
                'pos': current_chunk.pos,
                'content': self.content_id}]
        new = [{'type': 'chunk',
                'id': new_url,
                'hash': current_chunk.hash,
                'size': current_chunk.size,
                'pos': current_chunk.pos,
                'content': self.content_id}]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(
            cid=self.container_id, data=update_data)

    def _meta2_create_object(self):
        self.container_client.content_create(cid=self.container_id,
                                             path=self.path,
                                             content_id=self.content_id,
                                             stgpol=self.stgpol_name,
                                             size=self.length,
                                             checksum=self.hash,
                                             version=self.version,
                                             chunk_method=self.chunk_method,
                                             mime_type=self.mime_type,
                                             data=self.chunks.raw())

    def rebuild_chunk(self, chunk_id):
        raise NotImplementedError()

    def upload(self, stream):
        try:
            self._upload(stream)
        except Exception as e:
            for chunk in self.chunks:
                try:
                    self.blob_client.chunk_delete(chunk.url)
                except:
                    pass
            raise e

    def _upload(self, stream):
        raise NotImplementedError()

    def download(self):
        raise NotImplementedError()
Example #6
0
class Content(object):
    def __init__(self, conf, container_id, metadata, chunks, stgpol_args):
        self.conf = conf
        self.container_id = container_id
        self.metadata = metadata
        self.chunks = ChunksHelper(chunks)
        self.stgpol_args = stgpol_args
        self.logger = get_logger(self.conf)
        self.cs_client = ConscienceClient(conf)
        self.container_client = ContainerClient(self.conf)
        self.blob_client = BlobClient()
        self.session = requests.Session()
        self.content_id = self.metadata["id"]
        self.stgpol_name = self.metadata["policy"]
        self.path = self.metadata["name"]
        self.length = int(self.metadata["length"])
        self.version = self.metadata["version"]
        self.hash = self.metadata["hash"]
        self.mime_type = self.metadata["mime-type"]
        self.chunk_method = self.metadata["chunk-method"]

    def _meta2_get_spare_chunk(self, chunks_notin, chunks_broken):
        spare_data = {
            "notin": ChunksHelper(chunks_notin, False).raw(),
            "broken": ChunksHelper(chunks_broken, False).raw()
        }
        try:
            spare_resp = self.container_client.content_spare(
                cid=self.container_id, content=self.content_id,
                data=spare_data, stgpol=self.stgpol_name)
        except ClientException as e:
            raise exc.SpareChunkException("No spare chunk (%s)" % e.message)

        url_list = []
        for c in spare_resp["chunks"]:
            url_list.append(c["id"])

        return url_list

    def _meta2_update_spare_chunk(self, current_chunk, new_url):
        old = [{'type': 'chunk',
                'id': current_chunk.url,
                'hash': current_chunk.hash,
                'size': current_chunk.size,
                'pos': current_chunk.pos,
                'content': self.content_id}]
        new = [{'type': 'chunk',
                'id': new_url,
                'hash': current_chunk.hash,
                'size': current_chunk.size,
                'pos': current_chunk.pos,
                'content': self.content_id}]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(
            cid=self.container_id, data=update_data)

    def _meta2_create_object(self):
        self.container_client.content_create(cid=self.container_id,
                                             path=self.path,
                                             content_id=self.content_id,
                                             stgpol=self.stgpol_name,
                                             size=self.length,
                                             checksum=self.hash,
                                             version=self.version,
                                             chunk_method=self.chunk_method,
                                             mime_type=self.mime_type,
                                             data=self.chunks.raw())

    def rebuild_chunk(self, chunk_id):
        raise NotImplementedError()

    def upload(self, stream):
        try:
            self._upload(stream)
        except:
            # Keep the stack trace
            exc_info = sys.exc_info()
            for chunk in self.chunks:
                try:
                    self.blob_client.chunk_delete(chunk.url)
                except:
                    self.logger.warn("Failed to delete %s", chunk.url)
            # Raise with the original stack trace
            raise exc_info[0], exc_info[1], exc_info[2]

    def _upload(self, stream):
        raise NotImplementedError()

    def download(self):
        raise NotImplementedError()

    def delete(self):
        self.container_client.content_delete(cid=self.container_id,
                                             path=self.path)

    def move_chunk(self, chunk_id):
        current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None:
            raise OrphanChunk("Chunk not found in content")

        other_chunks = self.chunks.filter(
            metapos=current_chunk.metapos).exclude(id=chunk_id).all()

        spare_urls = self._meta2_get_spare_chunk(other_chunks,
                                                 [current_chunk])

        self.logger.debug("copy chunk from %s to %s",
                          current_chunk.url, spare_urls[0])
        self.blob_client.chunk_copy(current_chunk.url, spare_urls[0])

        self._meta2_update_spare_chunk(current_chunk, spare_urls[0])

        try:
            self.blob_client.chunk_delete(current_chunk.url)
        except:
            self.logger.warn("Failed to delete chunk %s" % current_chunk.url)

        current_chunk.url = spare_urls[0]

        return current_chunk.raw()
Example #7
0
class Content(object):
    def __init__(self, conf, container_id, metadata, chunks, stgpol_args):
        self.conf = conf
        self.container_id = container_id
        self.metadata = metadata
        self.chunks = ChunksHelper(chunks)
        self.stgpol_args = stgpol_args
        self.logger = get_logger(self.conf)
        self.cs_client = ConscienceClient(conf)
        self.container_client = ContainerClient(self.conf)
        self.blob_client = BlobClient()
        self.session = requests.Session()
        self.content_id = self.metadata["id"]
        self.stgpol_name = self.metadata["policy"]
        self.path = self.metadata["name"]
        self.length = int(self.metadata["length"])
        self.version = self.metadata["version"]
        self.hash = self.metadata["hash"]
        self.mime_type = self.metadata["mime-type"]
        self.chunk_method = self.metadata["chunk-method"]

    def _meta2_get_spare_chunk(self, chunks_notin, chunks_broken):
        spare_data = {
            "notin": ChunksHelper(chunks_notin, False).raw(),
            "broken": ChunksHelper(chunks_broken, False).raw()
        }
        try:
            spare_resp = self.container_client.content_spare(
                cid=self.container_id,
                content=self.content_id,
                data=spare_data,
                stgpol=self.stgpol_name)
        except ClientException as e:
            raise exc.SpareChunkException("No spare chunk (%s)" % e.message)

        url_list = []
        for c in spare_resp["chunks"]:
            url_list.append(c["id"])

        return url_list

    def _meta2_update_spare_chunk(self, current_chunk, new_url):
        old = [{
            'type': 'chunk',
            'id': current_chunk.url,
            'hash': current_chunk.hash,
            'size': current_chunk.size,
            'pos': current_chunk.pos,
            'content': self.content_id
        }]
        new = [{
            'type': 'chunk',
            'id': new_url,
            'hash': current_chunk.hash,
            'size': current_chunk.size,
            'pos': current_chunk.pos,
            'content': self.content_id
        }]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(cid=self.container_id,
                                                   data=update_data)

    def _meta2_create_object(self):
        self.container_client.content_create(cid=self.container_id,
                                             path=self.path,
                                             content_id=self.content_id,
                                             stgpol=self.stgpol_name,
                                             size=self.length,
                                             checksum=self.hash,
                                             version=self.version,
                                             chunk_method=self.chunk_method,
                                             mime_type=self.mime_type,
                                             data=self.chunks.raw())

    def rebuild_chunk(self, chunk_id):
        raise NotImplementedError()

    def upload(self, stream):
        try:
            self._upload(stream)
        except Exception as e:
            for chunk in self.chunks:
                try:
                    self.blob_client.chunk_delete(chunk.url)
                except:
                    pass
            raise e

    def _upload(self, stream):
        raise NotImplementedError()

    def download(self):
        raise NotImplementedError()
Example #8
0
class BlobRebuilderWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.dry_run = true_value(
            conf.get('dry_run', False))
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        self.max_bytes_per_second = int_value(
            conf.get('bytes_per_second'), 10000000)
        self.rdir_fetch_limit = int_value(
            conf.get('rdir_fetch_limit'), 100)
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(conf)
        self.rdir_client = RdirClient(conf)

    def rebuilder_pass_with_lock(self):
        self.rdir_client.admin_lock(self.volume,
                                    "rebuilder on %s" % gethostname())
        try:
            self.rebuilder_pass()
        finally:
            self.rdir_client.admin_unlock(self.volume)

    def rebuilder_pass(self):
        start_time = report_time = time.time()

        total_errors = 0
        rebuilder_time = 0

        chunks = self.rdir_client.chunk_fetch(self.volume,
                                              limit=self.rdir_fetch_limit,
                                              rebuild=True)
        for container_id, content_id, chunk_id, data in chunks:
            loop_time = time.time()

            if self.dry_run:
                self.dryrun_chunk_rebuild(container_id, content_id, chunk_id)
            else:
                self.safe_chunk_rebuild(container_id, content_id, chunk_id)

            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(rebuilder_time).2f'
                    '%(rebuilder_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'rebuilder_time': rebuilder_time,
                        'rebuilder_rate': rebuilder_time / (now - start_time)
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            rebuilder_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(rebuilder_time).2f '
            '%(rebuilder_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'rebuilder_time': rebuilder_time,
                'rebuilder_rate': rebuilder_time / elapsed
            }
        )

    def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info("[dryrun] Rebuilding "
                         "container %s, content %s, chunk %s"
                         % (container_id, content_id, chunk_id))
        self.passes += 1

    def safe_chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info('Rebuilding (container %s, content %s, chunk %s)'
                         % (container_id, content_id, chunk_id))
        try:
            self.chunk_rebuild(container_id, content_id, chunk_id)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while rebuilding chunk %s|%s|%s) : %s',
                              container_id, content_id, chunk_id, e)

        self.passes += 1

    def _meta2_get_chunks_at_pos(self, container_id, content_id, chunk_id):
        current_chunk_url = 'http://%s/%s' % (self.volume, chunk_id)

        try:
            data = self.container_client.content_show(
                cid=container_id, content=content_id)
        except exc.NotFound:
            raise exc.OrphanChunk('Content not found')

        current_chunk = None
        for c in data:
            if c['url'] == current_chunk_url:
                current_chunk = c
                break
        if not current_chunk:
            raise exc.OrphanChunk('Chunk not found in content')

        duplicate_chunks = []
        for c in data:
            if c['pos'] == current_chunk['pos'] \
                    and c['url'] != current_chunk['url']:
                duplicate_chunks.append(c)
        if len(duplicate_chunks) == 0:
            raise exc.UnrecoverableContent('No copy of missing chunk')

        return current_chunk, duplicate_chunks

    def _meta2_get_spare_chunk(self, container_id, content_id, notin, broken):
        spare_data = {'notin': notin,
                      'broken': [broken],
                      'size': 0}
        try:
            spare_resp = self.container_client.content_spare(
                cid=container_id, content=content_id, data=spare_data)
        except ClientException as e:
            raise exc.SpareChunkException('No spare chunk (%s)' % e.message)

        return spare_resp['chunks'][0]

    def _meta2_replace_chunk(self, container_id, content_id,
                             current_chunk, new_chunk):
        old = [{'type': 'chunk',
                'id': current_chunk['url'],
                'hash': current_chunk['hash'],
                'size': current_chunk['size'],
                'pos': current_chunk['pos'],
                'content': content_id}]
        new = [{'type': 'chunk',
                'id': new_chunk['id'],
                'hash': current_chunk['hash'],
                'size': current_chunk['size'],
                'pos': current_chunk['pos'],
                'content': content_id}]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(
            cid=container_id, data=update_data)

    # TODO rain support
    def chunk_rebuild(self, container_id, content_id, chunk_id):

        current_chunk, duplicate_chunks = self._meta2_get_chunks_at_pos(
            container_id, content_id, chunk_id)

        spare_chunk = self._meta2_get_spare_chunk(
            container_id, content_id, duplicate_chunks, current_chunk)

        uploaded = False
        for src in duplicate_chunks:
            try:
                self.blob_client.chunk_copy(src['url'], spare_chunk['id'])
                self.logger.debug('copy chunk from %s to %s',
                                  src['url'], spare_chunk['id'])
                uploaded = True
                break
            except Exception as e:
                self.logger.debug('Failed to copy chunk from %s to %s: %s',
                                  src['url'], spare_chunk['id'], type(e))
        if not uploaded:
            raise exc.UnrecoverableContent('No copy available '
                                           'of missing chunk')

        self._meta2_replace_chunk(container_id, content_id,
                                  current_chunk, spare_chunk)

        self.rdir_client.chunk_push(self.volume, container_id, content_id,
                                    chunk_id, rtime=int(time.time()))

        self.bytes_processed += current_chunk['size']
        self.total_bytes_processed += current_chunk['size']