예제 #1
0
class BlobRebuilder(Tool):
    """
    Rebuild chunks.
    """

    DEFAULT_BEANSTALKD_WORKER_TUBE = 'oio-rebuild'
    DEFAULT_DISTRIBUTED_BEANSTALKD_WORKER_TUBE = 'oio-rebuild'
    DEFAULT_RDIR_FETCH_LIMIT = 100
    DEFAULT_RDIR_TIMEOUT = 60.0
    DEFAULT_ALLOW_FROZEN_CT = False
    DEFAULT_ALLOW_SAME_RAWX = True
    DEFAULT_TRY_CHUNK_DELETE = False
    DEFAULT_DRY_RUN = False

    def __init__(self, conf, input_file=None, service_id=None, **kwargs):
        super(BlobRebuilder, self).__init__(conf, **kwargs)

        # counters
        self.bytes_processed = 0
        self.total_bytes_processed = 0

        # input
        self.input_file = input_file
        self.rawx_id = service_id

        # rawx/rdir
        self.rdir_client = RdirClient(self.conf, logger=self.logger)
        self.rdir_fetch_limit = int_value(self.conf.get('rdir_fetch_limit'),
                                          self.DEFAULT_RDIR_FETCH_LIMIT)
        self.rdir_shuffle_chunks = true_value(conf.get('rdir_shuffle_chunks'))
        self.rdir_timeout = float_value(conf.get('rdir_timeout'),
                                        self.DEFAULT_RDIR_TIMEOUT)

    @staticmethod
    def items_from_task_event(task_event):
        namespace = task_event['url']['ns']
        container_id = task_event['url']['id']
        content_id = task_event['url']['content']
        for chunk_id_or_pos in task_event['data']['missing_chunks']:
            yield namespace, container_id, content_id, str(chunk_id_or_pos)

    @staticmethod
    def task_event_from_item(item):
        namespace, container_id, content_id, chunk_id_or_pos = item
        return \
            {
                'when': time.time(),
                'event': EventTypes.CONTENT_BROKEN,
                'url': {
                    'ns': namespace,
                    'id': container_id,
                    'content': content_id
                },
                'data': {
                    'missing_chunks': [
                        chunk_id_or_pos
                    ]
                }
            }

    @staticmethod
    def tasks_res_from_res_event(res_event):
        namespace = res_event['url']['ns']
        container_id = res_event['url']['id']
        content_id = res_event['url']['content']
        for chunk_rebuilt in res_event['data']['chunks_rebuilt']:
            yield (namespace, container_id, content_id,
                   str(chunk_rebuilt['chunk_id_or_pos'])), \
                chunk_rebuilt['bytes_processed'], chunk_rebuilt['error']

    @staticmethod
    def res_event_from_task_res(task_res):
        item, bytes_processed, error = task_res
        namespace, container_id, content_id, chunk_id_or_pos = item
        return \
            {
                'when': time.time(),
                'event': EventTypes.CONTENT_REBUILT,
                'url': {
                    'ns': namespace,
                    'id': container_id,
                    'content': content_id
                },
                'data': {
                    'chunks_rebuilt': [{
                        'chunk_id_or_pos': chunk_id_or_pos,
                        'bytes_processed': bytes_processed,
                        'error': error
                    }]
                }
            }

    @staticmethod
    def string_from_item(item):
        namespace, container_id, content_id, chunk_id_or_pos = item
        return '%s|%s|%s|%s' % (namespace, container_id, content_id,
                                chunk_id_or_pos)

    def _fetch_items_from_input_file(self):
        with open(self.input_file, 'r') as ifile:
            for line in ifile:
                stripped = line.strip()
                if not stripped or stripped.startswith('#'):
                    continue

                container_id, content_id, chunk_id_or_pos = \
                    stripped.split('|', 3)[:3]
                yield self.namespace, container_id, content_id, \
                    chunk_id_or_pos

    def _fetch_items_from_rawx_id(self):
        lost_chunks = self.rdir_client.chunk_fetch(
            self.rawx_id,
            limit=self.rdir_fetch_limit,
            rebuild=True,
            shuffle=self.rdir_shuffle_chunks,
            timeout=self.rdir_timeout)
        for container_id, content_id, chunk_id, _ in lost_chunks:
            yield self.namespace, container_id, content_id, chunk_id

    def _fetch_items(self):
        if self.input_file:
            return self._fetch_items_from_input_file()
        if self.rawx_id:
            return self._fetch_items_from_rawx_id()

        def _empty_generator():
            return
            yield  # pylint: disable=unreachable

        return _empty_generator()

    def update_counters(self, task_res):
        super(BlobRebuilder, self).update_counters(task_res)
        _, bytes_processed, _ = task_res
        if bytes_processed is not None:
            self.bytes_processed += bytes_processed

    def _update_total_counters(self):
        chunks_processed, total_chunks_processed, errors, total_errors = \
            super(BlobRebuilder, self)._update_total_counters()
        bytes_processed = self.bytes_processed
        self.bytes_processed = 0
        self.total_bytes_processed += bytes_processed
        return chunks_processed, total_chunks_processed, \
            bytes_processed, self.total_bytes_processed, \
            errors, total_errors

    def _get_report(self, status, end_time, counters):
        chunks_processed, total_chunks_processed, \
            bytes_processed, total_bytes_processed, \
            errors, total_errors = counters
        time_since_last_report = (end_time - self.last_report) or 0.00001
        total_time = (end_time - self.start_time) or 0.00001
        report = (
            '%(status)s '
            'last_report=%(last_report)s %(time_since_last_report).2fs '
            'chunks=%(chunks)d %(chunks_rate).2f/s '
            'bytes=%(bytes)d %(bytes_rate).2fB/s '
            'errors=%(errors)d %(errors_rate).2f%% '
            'start_time=%(start_time)s %(total_time).2fs '
            'total_chunks=%(total_chunks)d %(total_chunks_rate).2f/s '
            'total_bytes=%(total_bytes)d %(total_bytes_rate).2fB/s '
            'total_errors=%(total_errors)d %(total_errors_rate).2f%%' % {
                'status':
                status,
                'last_report':
                datetime.fromtimestamp(int(self.last_report)).isoformat(),
                'time_since_last_report':
                time_since_last_report,
                'chunks':
                chunks_processed,
                'chunks_rate':
                chunks_processed / time_since_last_report,
                'bytes':
                bytes_processed,
                'bytes_rate':
                bytes_processed / time_since_last_report,
                'errors':
                errors,
                'errors_rate':
                100 * errors / float(chunks_processed or 1),
                'start_time':
                datetime.fromtimestamp(int(self.start_time)).isoformat(),
                'total_time':
                total_time,
                'total_chunks':
                total_chunks_processed,
                'total_chunks_rate':
                total_chunks_processed / total_time,
                'total_bytes':
                total_bytes_processed,
                'total_bytes_rate':
                total_bytes_processed / total_time,
                'total_errors':
                total_errors,
                'total_errors_rate':
                100 * total_errors / float(total_chunks_processed or 1)
            })
        if self.total_expected_items is not None:
            progress = 100 * total_chunks_processed / \
                float(self.total_expected_items or 1)
            report += ' progress=%d/%d %.2f%%' % \
                (total_chunks_processed, self.total_expected_items, progress)
        return report

    def create_worker(self, queue_workers, queue_reply):
        return BlobRebuilderWorker(self, queue_workers, queue_reply)

    def _load_total_expected_items(self):
        if self.rawx_id:
            try:
                info = self.rdir_client.status(self.rawx_id,
                                               read_timeout=self.rdir_timeout)
                self.total_expected_items = info.get('chunk', dict()).get(
                    'to_rebuild', None)
            except Exception as exc:
                self.logger.warn(
                    'Failed to fetch the total chunks to rebuild: %s', exc)

    def run(self):
        if self.rawx_id:
            self.rdir_client.admin_lock(self.rawx_id,
                                        "rebuilder on %s" % gethostname(),
                                        timeout=self.rdir_timeout)
        success = super(BlobRebuilder, self).run()
        if self.rawx_id:
            self.rdir_client.admin_unlock(self.rawx_id,
                                          timeout=self.rdir_timeout)
        return success
예제 #2
0
class TestRdirClient(BaseTestCase):
    @classmethod
    def setUpClass(cls):
        super(TestRdirClient, cls).setUpClass()
        cls._service('@indexer', 'stop')

    @classmethod
    def tearDownClass(cls):
        super(TestRdirClient, cls).tearDownClass()
        cls._service('@indexer', 'start')

    def _push_chunks(self):
        max_mtime = 16
        self.incident_date = random.randrange(2, max_mtime - 1)

        expected_entries = list()
        for _ in range(4):
            cid = random_id(64)
            for _ in range(random.randrange(2, 5)):
                content_id = random_id(32)
                for _ in range(random.randrange(2, 5)):
                    chunk_id = random_id(63)
                    mtime = random.randrange(0, max_mtime + 1)
                    if mtime <= self.incident_date:
                        chunk_id += '0'
                    else:
                        chunk_id += '1'
                    self.rdir.chunk_push(self.rawx_id,
                                         cid,
                                         content_id,
                                         chunk_id,
                                         mtime=mtime)
                    entry = (cid, content_id, chunk_id, {'mtime': mtime})
                    expected_entries.append(entry)
        self.expected_entries = sorted(expected_entries)

    def setUp(self):
        super(TestRdirClient, self).setUp()
        self.rawx_conf = random.choice(self.conf['services']['rawx'])
        self.rawx_id = self.rawx_conf.get('service_id', self.rawx_conf['addr'])
        self.rdir = RdirClient(self.conf)
        self.rdir.admin_clear(self.rawx_id, clear_all=True)

        self._push_chunks()
        self.rdir._direct_request = Mock(side_effect=self.rdir._direct_request)

    def _assert_chunk_fetch(self, expected_entries, entries, limit=0):
        self.assertListEqual(expected_entries, list(entries))
        nb_requests = 1
        if limit > 0 and len(expected_entries) > 0:
            nb_requests = int(math.ceil(len(expected_entries) / float(limit)))
        self.assertEqual(nb_requests, self.rdir._direct_request.call_count)
        self.rdir._direct_request.reset_mock()

    def test_chunk_fetch(self):
        entries = self.rdir.chunk_fetch(self.rawx_id)
        self._assert_chunk_fetch(self.expected_entries, entries)

    def test_chunk_fetch_with_limit(self):
        entries = self.rdir.chunk_fetch(self.rawx_id, limit=2)
        self._assert_chunk_fetch(self.expected_entries, entries, limit=2)

    def test_chunk_fetch_with_container_id(self):
        cid = random.choice(self.expected_entries)[0]
        expected_entries_cid = [
            entry for entry in self.expected_entries if entry[0] == cid
        ]
        entries = self.rdir.chunk_fetch(self.rawx_id, container_id=cid)
        self._assert_chunk_fetch(expected_entries_cid, entries)

    def test_chunk_fetch_with_container_id_limit(self):
        cid = random.choice(self.expected_entries)[0]
        expected_entries_cid = [
            entry for entry in self.expected_entries if entry[0] == cid
        ]
        entries = self.rdir.chunk_fetch(self.rawx_id,
                                        container_id=cid,
                                        limit=2)
        self._assert_chunk_fetch(expected_entries_cid, entries, limit=2)

    def test_chunk_fetch_with_start_after(self):
        start_after_index = random.randrange(0, len(self.expected_entries))
        start_after = '|'.join(self.expected_entries[start_after_index][:3])
        entries = self.rdir.chunk_fetch(self.rawx_id, start_after=start_after)
        self._assert_chunk_fetch(self.expected_entries[start_after_index + 1:],
                                 entries)

    def test_chunk_fetch_with_start_after_limit(self):
        start_after_index = random.randrange(0, len(self.expected_entries))
        start_after = '|'.join(self.expected_entries[start_after_index][:3])
        entries = self.rdir.chunk_fetch(self.rawx_id,
                                        start_after=start_after,
                                        limit=2)
        self._assert_chunk_fetch(self.expected_entries[start_after_index + 1:],
                                 entries,
                                 limit=2)

    def test_chunk_fetch_with_start_after_container_id(self):
        cid = random.choice(self.expected_entries)[0]
        expected_entries_cid = [
            entry for entry in self.expected_entries if entry[0] == cid
        ]
        start_after_index = random.randrange(0, len(expected_entries_cid))
        start_after = '|'.join(expected_entries_cid[start_after_index][:3])
        entries = self.rdir.chunk_fetch(self.rawx_id,
                                        start_after=start_after,
                                        container_id=cid)
        self._assert_chunk_fetch(expected_entries_cid[start_after_index + 1:],
                                 entries)

    def test_chunk_fetch_with_start_after_container_id_limit(self):
        cid = random.choice(self.expected_entries)[0]
        expected_entries_cid = [
            entry for entry in self.expected_entries if entry[0] == cid
        ]
        start_after_index = random.randrange(0, len(expected_entries_cid))
        start_after = '|'.join(expected_entries_cid[start_after_index][:3])
        entries = self.rdir.chunk_fetch(self.rawx_id,
                                        start_after=start_after,
                                        container_id=cid,
                                        limit=2)
        self._assert_chunk_fetch(expected_entries_cid[start_after_index + 1:],
                                 entries,
                                 limit=2)

    def test_chunk_fetch_with_rebuild_no_incident(self):
        entries = self.rdir.chunk_fetch(self.rawx_id, rebuild=True)
        self._assert_chunk_fetch(list(), entries)

    def test_chunk_fetch_with_rebuild(self):
        self.rdir.admin_incident_set(self.rawx_id, self.incident_date)
        self.rdir._direct_request.reset_mock()
        expected_entries_rebuild = [
            entry for entry in self.expected_entries if entry[2][-1] == '0'
        ]
        entries = self.rdir.chunk_fetch(self.rawx_id, rebuild=True)
        self._assert_chunk_fetch(expected_entries_rebuild, entries)

    def test_chunk_fetch_with_rebuild_limit(self):
        self.rdir.admin_incident_set(self.rawx_id, self.incident_date)
        self.rdir._direct_request.reset_mock()
        expected_entries_rebuild = [
            entry for entry in self.expected_entries if entry[2][-1] == '0'
        ]
        entries = self.rdir.chunk_fetch(self.rawx_id, rebuild=True, limit=2)
        self._assert_chunk_fetch(expected_entries_rebuild, entries, limit=2)

    def test_chunk_fetch_with_rebuild_container_id(self):
        self.rdir.admin_incident_set(self.rawx_id, self.incident_date)
        self.rdir._direct_request.reset_mock()
        expected_entries_rebuild = [
            entry for entry in self.expected_entries if entry[2][-1] == '0'
        ]
        cid = random.choice(self.expected_entries)[0]
        expected_entries_rebuild_cid = \
            [entry for entry in expected_entries_rebuild
             if entry[0] == cid]
        entries = self.rdir.chunk_fetch(self.rawx_id,
                                        rebuild=True,
                                        container_id=cid)
        self._assert_chunk_fetch(expected_entries_rebuild_cid, entries)

    def test_chunk_fetch_with_rebuild_start_after(self):
        self.rdir.admin_incident_set(self.rawx_id, self.incident_date)
        self.rdir._direct_request.reset_mock()
        expected_entries_rebuild = [
            entry for entry in self.expected_entries if entry[2][-1] == '0'
        ]
        if expected_entries_rebuild:
            start_after_index = random.randrange(0,
                                                 len(expected_entries_rebuild))
            start_after = '|'.join(
                expected_entries_rebuild[start_after_index][:3])
        else:
            start_after_index = 0
            start_after = '|'.join(
                (random_id(64), random_id(32), random_id(64)))
        entries = self.rdir.chunk_fetch(self.rawx_id,
                                        rebuild=True,
                                        start_after=start_after)
        self._assert_chunk_fetch(
            expected_entries_rebuild[start_after_index + 1:], entries)

    def test_chunk_fetch_with_rebuild_contaier_id_limit(self):
        self.rdir.admin_incident_set(self.rawx_id, self.incident_date)
        self.rdir._direct_request.reset_mock()
        expected_entries_rebuild = [
            entry for entry in self.expected_entries if entry[2][-1] == '0'
        ]
        cid = random.choice(self.expected_entries)[0]
        expected_entries_rebuild_cid = \
            [entry for entry in expected_entries_rebuild
             if entry[0] == cid]
        entries = self.rdir.chunk_fetch(self.rawx_id,
                                        rebuild=True,
                                        container_id=cid,
                                        limit=2)
        self._assert_chunk_fetch(expected_entries_rebuild_cid,
                                 entries,
                                 limit=2)

    def test_chunk_fetch_with_rebuild_container_id_start_after(self):
        self.rdir.admin_incident_set(self.rawx_id, self.incident_date)
        self.rdir._direct_request.reset_mock()
        expected_entries_rebuild = [
            entry for entry in self.expected_entries if entry[2][-1] == '0'
        ]
        cid = random.choice(self.expected_entries)[0]
        expected_entries_rebuild_cid = \
            [entry for entry in expected_entries_rebuild
             if entry[0] == cid]
        if expected_entries_rebuild_cid:
            start_after_index = random.randrange(
                0, len(expected_entries_rebuild_cid))
            start_after = '|'.join(
                expected_entries_rebuild_cid[start_after_index][:3])
        else:
            start_after_index = 0
            start_after = '|'.join(
                (random_id(64), random_id(32), random_id(64)))
        entries = self.rdir.chunk_fetch(self.rawx_id,
                                        rebuild=True,
                                        container_id=cid,
                                        start_after=start_after)
        self._assert_chunk_fetch(
            expected_entries_rebuild_cid[start_after_index + 1:], entries)

    def test_chunk_fetch_with_rebuild_start_after_limit(self):
        self.rdir.admin_incident_set(self.rawx_id, self.incident_date)
        self.rdir._direct_request.reset_mock()
        expected_entries_rebuild = [
            entry for entry in self.expected_entries if entry[2][-1] == '0'
        ]
        if expected_entries_rebuild:
            start_after_index = random.randrange(0,
                                                 len(expected_entries_rebuild))
            start_after = '|'.join(
                expected_entries_rebuild[start_after_index][:3])
        else:
            start_after_index = 0
            start_after = '|'.join(
                (random_id(64), random_id(32), random_id(64)))
        entries = self.rdir.chunk_fetch(self.rawx_id,
                                        rebuild=True,
                                        start_after=start_after,
                                        limit=2)
        self._assert_chunk_fetch(expected_entries_rebuild[start_after_index +
                                                          1:],
                                 entries,
                                 limit=2)

    def test_chunk_fetch_with_rebuild_container_id_start_after_limit(self):
        self.rdir.admin_incident_set(self.rawx_id, self.incident_date)
        self.rdir._direct_request.reset_mock()
        expected_entries_rebuild = [
            entry for entry in self.expected_entries if entry[2][-1] == '0'
        ]
        cid = random.choice(self.expected_entries)[0]
        expected_entries_rebuild_cid = \
            [entry for entry in expected_entries_rebuild
             if entry[0] == cid]
        if expected_entries_rebuild_cid:
            start_after_index = random.randrange(
                0, len(expected_entries_rebuild_cid))
            start_after = '|'.join(
                expected_entries_rebuild_cid[start_after_index][:3])
        else:
            start_after_index = 0
            start_after = '|'.join(
                (random_id(64), random_id(32), random_id(64)))
        entries = self.rdir.chunk_fetch(self.rawx_id,
                                        rebuild=True,
                                        container_id=cid,
                                        start_after=start_after,
                                        limit=2)
        self._assert_chunk_fetch(
            expected_entries_rebuild_cid[start_after_index + 1:],
            entries,
            limit=2)

    def _assert_chunk_status(self,
                             expected_entries,
                             status,
                             max=0,
                             incident=False):
        expected_status = dict()
        expected_status['chunk'] = {'total': len(expected_entries)}
        expected_status['container'] = dict()
        for entry in expected_entries:
            expected_status['container'][entry[0]]['total'] = \
                expected_status['container'].setdefault(
                    entry[0], dict()).get('total', 0) + 1
        if incident:
            expected_entries_rebuild = [
                entry for entry in expected_entries if entry[2][-1] == '0'
            ]
            expected_status['chunk']['to_rebuild'] = \
                len(expected_entries_rebuild)
            for entry in expected_entries_rebuild:
                expected_status['container'][entry[0]]['to_rebuild'] = \
                    expected_status['container'][entry[0]].get(
                        'to_rebuild', 0) + 1
        self.assertDictEqual(expected_status, status)
        nb_requests = 1
        if max > 0 and len(expected_entries) > 0:
            nb_requests = int(math.ceil(len(expected_entries) / float(max)))
        self.assertEqual(nb_requests, self.rdir._direct_request.call_count)
        self.rdir._direct_request.reset_mock()

    def test_chunk_status(self):
        status = self.rdir.status(self.rawx_id)
        self._assert_chunk_status(self.expected_entries, status)

    def test_chunk_status_with_max(self):
        status = self.rdir.status(self.rawx_id, max=2)
        self._assert_chunk_status(self.expected_entries, status, max=2)

    def test_chunk_status_with_prefix(self):
        cid = random.choice(self.expected_entries)[0]
        expected_entries_cid = [
            entry for entry in self.expected_entries if entry[0] == cid
        ]
        status = self.rdir.status(self.rawx_id, prefix=cid)
        self._assert_chunk_status(expected_entries_cid, status)

    def test_chunk_status_with_prefix_max(self):
        cid = random.choice(self.expected_entries)[0]
        expected_entries_cid = [
            entry for entry in self.expected_entries if entry[0] == cid
        ]
        status = self.rdir.status(self.rawx_id, prefix=cid, max=2)
        self._assert_chunk_status(expected_entries_cid, status, max=2)

    def test_chunk_status_with_marker(self):
        marker_index = random.randrange(0, len(self.expected_entries))
        marker = '|'.join(self.expected_entries[marker_index][:3])
        status = self.rdir.status(self.rawx_id, marker=marker)
        self._assert_chunk_status(self.expected_entries[marker_index + 1:],
                                  status)

    def test_chunk_status_with_marker_max(self):
        marker_index = random.randrange(0, len(self.expected_entries))
        marker = '|'.join(self.expected_entries[marker_index][:3])
        status = self.rdir.status(self.rawx_id, marker=marker, max=2)
        self._assert_chunk_status(self.expected_entries[marker_index + 1:],
                                  status,
                                  max=2)

    def test_chunk_status_marker_prefix(self):
        cid = random.choice(self.expected_entries)[0]
        expected_entries_cid = [
            entry for entry in self.expected_entries if entry[0] == cid
        ]
        marker_index = random.randrange(0, len(expected_entries_cid))
        marker = '|'.join(expected_entries_cid[marker_index][:3])
        status = self.rdir.status(self.rawx_id, marker=marker, prefix=cid)
        self._assert_chunk_status(expected_entries_cid[marker_index + 1:],
                                  status)

    def test_chunk_status_with_marker_prefix_max(self):
        cid = random.choice(self.expected_entries)[0]
        expected_entries_cid = [
            entry for entry in self.expected_entries if entry[0] == cid
        ]
        marker_index = random.randrange(0, len(expected_entries_cid))
        marker = '|'.join(expected_entries_cid[marker_index][:3])
        status = self.rdir.status(self.rawx_id,
                                  marker=marker,
                                  prefix=cid,
                                  max=2)
        self._assert_chunk_status(expected_entries_cid[marker_index + 1:],
                                  status,
                                  max=2)

    def test_chunk_status_with_incident(self):
        self.rdir.admin_incident_set(self.rawx_id, self.incident_date)
        self.rdir._direct_request.reset_mock()
        status = self.rdir.status(self.rawx_id)
        self._assert_chunk_status(self.expected_entries, status, incident=True)

    def test_chunk_status_with_incident_max(self):
        self.rdir.admin_incident_set(self.rawx_id, self.incident_date)
        self.rdir._direct_request.reset_mock()
        status = self.rdir.status(self.rawx_id, max=2)
        self._assert_chunk_status(self.expected_entries,
                                  status,
                                  incident=True,
                                  max=2)

    def test_chunk_status_with_incident_prefix(self):
        self.rdir.admin_incident_set(self.rawx_id, self.incident_date)
        self.rdir._direct_request.reset_mock()
        cid = random.choice(self.expected_entries)[0]
        expected_entries_cid = [
            entry for entry in self.expected_entries if entry[0] == cid
        ]
        status = self.rdir.status(self.rawx_id, prefix=cid)
        self._assert_chunk_status(expected_entries_cid, status, incident=True)

    def test_chunk_status_with_incident_prefix_max(self):
        self.rdir.admin_incident_set(self.rawx_id, self.incident_date)
        self.rdir._direct_request.reset_mock()
        cid = random.choice(self.expected_entries)[0]
        expected_entries_cid = [
            entry for entry in self.expected_entries if entry[0] == cid
        ]
        status = self.rdir.status(self.rawx_id, prefix=cid, max=2)
        self._assert_chunk_status(expected_entries_cid,
                                  status,
                                  incident=True,
                                  max=2)

    def test_chunk_status_with_incident_marker(self):
        self.rdir.admin_incident_set(self.rawx_id, self.incident_date)
        self.rdir._direct_request.reset_mock()
        marker_index = random.randrange(0, len(self.expected_entries))
        marker = '|'.join(self.expected_entries[marker_index][:3])
        status = self.rdir.status(self.rawx_id, marker=marker)
        self._assert_chunk_status(self.expected_entries[marker_index + 1:],
                                  status,
                                  incident=True)

    def test_chunk_status_with_incident_marker_max(self):
        self.rdir.admin_incident_set(self.rawx_id, self.incident_date)
        self.rdir._direct_request.reset_mock()
        marker_index = random.randrange(0, len(self.expected_entries))
        marker = '|'.join(self.expected_entries[marker_index][:3])
        status = self.rdir.status(self.rawx_id, marker=marker, max=2)
        self._assert_chunk_status(self.expected_entries[marker_index + 1:],
                                  status,
                                  incident=True,
                                  max=2)

    def test_chunk_status_with_incident_marker_prefix(self):
        self.rdir.admin_incident_set(self.rawx_id, self.incident_date)
        self.rdir._direct_request.reset_mock()
        cid = random.choice(self.expected_entries)[0]
        expected_entries_cid = [
            entry for entry in self.expected_entries if entry[0] == cid
        ]
        marker_index = random.randrange(0, len(expected_entries_cid))
        marker = '|'.join(expected_entries_cid[marker_index][:3])
        status = self.rdir.status(self.rawx_id, marker=marker, prefix=cid)
        self._assert_chunk_status(expected_entries_cid[marker_index + 1:],
                                  status,
                                  incident=True)

    def test_chunk_status_with_incident_marker_prefix_max(self):
        self.rdir.admin_incident_set(self.rawx_id, self.incident_date)
        self.rdir._direct_request.reset_mock()
        cid = random.choice(self.expected_entries)[0]
        expected_entries_cid = [
            entry for entry in self.expected_entries if entry[0] == cid
        ]
        marker_index = random.randrange(0, len(expected_entries_cid))
        marker = '|'.join(expected_entries_cid[marker_index][:3])
        status = self.rdir.status(self.rawx_id,
                                  marker=marker,
                                  prefix=cid,
                                  max=2)
        self._assert_chunk_status(expected_entries_cid[marker_index + 1:],
                                  status,
                                  incident=True,
                                  max=2)
예제 #3
0
class BlobRebuilder(Rebuilder):
    def __init__(self,
                 conf,
                 logger,
                 volume,
                 try_chunk_delete=False,
                 beanstalkd_addr=None,
                 **kwargs):
        super(BlobRebuilder, self).__init__(conf, logger, volume, **kwargs)
        # rdir
        self.rdir_client = RdirClient(conf, logger=self.logger)
        self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100)
        # rawx
        self.try_chunk_delete = try_chunk_delete
        # beanstalk
        if beanstalkd_addr:
            self.beanstalkd_listener = BeanstalkdListener(
                beanstalkd_addr,
                conf.get('beanstalkd_tube', DEFAULT_REBUILDER_TUBE),
                self.logger, **kwargs)
        else:
            self.beanstalkd_listener = None
        # counters
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_expected_chunks = None
        # distributed
        self.distributed = False

    def _create_worker(self, **kwargs):
        return BlobRebuilderWorker(self,
                                   try_chunk_delete=self.try_chunk_delete,
                                   **kwargs)

    def _fill_queue(self, queue, **kwargs):
        chunks = self._fetch_chunks(**kwargs)
        for chunk in chunks:
            queue.put(chunk)

    def _item_to_string(self, chunk, **kwargs):
        cid, content_id, chunk_id_or_pos, _ = chunk
        return 'chunk %s|%s|%s' % (cid, content_id, chunk_id_or_pos)

    def _get_report(self, status, end_time, counters, **kwargs):
        chunks_processed, bytes_processed, errors, total_chunks_processed, \
            total_bytes_processed, total_errors = counters
        time_since_last_report = (end_time - self.last_report) or 0.00001
        total_time = (end_time - self.start_time) or 0.00001
        report = (
            '%(status)s volume=%(volume)s '
            'last_report=%(last_report)s %(time_since_last_report).2fs '
            'chunks=%(chunks)d %(chunks_rate).2f/s '
            'bytes=%(bytes)d %(bytes_rate).2fB/s '
            'errors=%(errors)d %(errors_rate).2f%% '
            'start_time=%(start_time)s %(total_time).2fs '
            'total_chunks=%(total_chunks)d %(total_chunks_rate).2f/s '
            'total_bytes=%(total_bytes)d %(total_bytes_rate).2fB/s '
            'total_errors=%(total_errors)d %(total_errors_rate).2f%%' % {
                'status':
                status,
                'volume':
                self.volume,
                'last_report':
                datetime.fromtimestamp(int(self.last_report)).isoformat(),
                'time_since_last_report':
                time_since_last_report,
                'chunks':
                chunks_processed,
                'chunks_rate':
                chunks_processed / time_since_last_report,
                'bytes':
                bytes_processed,
                'bytes_rate':
                bytes_processed / time_since_last_report,
                'errors':
                errors,
                'errors_rate':
                100 * errors / float(chunks_processed or 1),
                'start_time':
                datetime.fromtimestamp(int(self.start_time)).isoformat(),
                'total_time':
                total_time,
                'total_chunks':
                total_chunks_processed,
                'total_chunks_rate':
                total_chunks_processed / total_time,
                'total_bytes':
                total_bytes_processed,
                'total_bytes_rate':
                total_bytes_processed / total_time,
                'total_errors':
                total_errors,
                'total_errors_rate':
                100 * total_errors / float(total_chunks_processed or 1)
            })
        if self.total_expected_chunks is not None:
            progress = 100 * total_chunks_processed / \
                float(self.total_expected_chunks or 1)
            report += ' progress=%d/%d %.2f%%' % \
                (total_chunks_processed, self.total_expected_chunks, progress)
        return report

    def _update_processed_without_lock(self,
                                       bytes_processed,
                                       error=None,
                                       **kwargs):
        super(BlobRebuilder, self)._update_processed_without_lock(None,
                                                                  error=error,
                                                                  **kwargs)
        if bytes_processed is not None:
            self.bytes_processed += bytes_processed

    def _update_totals_without_lock(self, **kwargs):
        chunks_processed, errors, total_chunks_processed, total_errors = \
            super(BlobRebuilder, self)._update_totals_without_lock(**kwargs)
        bytes_processed = self.bytes_processed
        self.bytes_processed = 0
        self.total_bytes_processed += bytes_processed
        return chunks_processed, bytes_processed, errors, \
            total_chunks_processed, self.total_bytes_processed, total_errors

    def _rebuilder_pass(self, **kwargs):
        return super(BlobRebuilder, self).rebuilder_pass(**kwargs)

    def rebuilder_pass(self, **kwargs):
        success = False
        if self.volume:
            self.rdir_client.admin_lock(self.volume,
                                        "rebuilder on %s" % gethostname())
            info = self.rdir_client.status(self.volume)
            self.total_expected_chunks = info.get('chunk', dict()).get(
                'to_rebuild', None)
        try:
            success = self._rebuilder_pass(**kwargs)
        finally:
            if self.volume:
                self.rdir_client.admin_unlock(self.volume)
        return success

    def _event_from_broken_chunk(self, chunk, reply, **kwargs):
        cid, content_id, chunk_id_or_pos, _ = chunk
        event = {}
        event['when'] = time.time()
        event['event'] = 'storage.content.broken'
        event['data'] = {'missing_chunks': [chunk_id_or_pos]}
        event['url'] = {'ns': self.namespace, 'id': cid, 'content': content_id}
        event['reply'] = reply
        return json.dumps(event)

    def _chunks_from_event(self, job_id, data, **kwargs):
        decoded = json.loads(data)
        container_id = decoded['url']['id']
        content_id = decoded['url']['content']
        more = None
        reply = decoded.get('reply', None)
        if reply:
            more = {'reply': reply}
        for chunk_id_or_pos in decoded['data']['missing_chunks']:
            yield [container_id, content_id, str(chunk_id_or_pos), more]

    def _fetch_events_from_beanstalk(self, **kwargs):
        return self.beanstalkd_listener.fetch_events(self._chunks_from_event,
                                                     **kwargs)

    def _fetch_chunks_from_file(self, **kwargs):
        with open(self.input_file, 'r') as ifile:
            for line in ifile:
                stripped = line.strip()
                if stripped and not stripped.startswith('#'):
                    yield stripped.split('|', 3)[:3] + [None]

    def _fetch_chunks(self, **kwargs):
        if self.input_file:
            return self._fetch_chunks_from_file(**kwargs)
        if self.beanstalkd_listener and not self.distributed:
            return self._fetch_events_from_beanstalk(**kwargs)
        if self.volume:
            return self.rdir_client.chunk_fetch(self.volume,
                                                limit=self.rdir_fetch_limit,
                                                rebuild=True,
                                                **kwargs)
        raise ConfigurationException('No source to fetch chunks from')