class BlobRebuilder(Tool): """ Rebuild chunks. """ DEFAULT_BEANSTALKD_WORKER_TUBE = 'oio-rebuild' DEFAULT_DISTRIBUTED_BEANSTALKD_WORKER_TUBE = 'oio-rebuild' DEFAULT_RDIR_FETCH_LIMIT = 100 DEFAULT_RDIR_TIMEOUT = 60.0 DEFAULT_ALLOW_FROZEN_CT = False DEFAULT_ALLOW_SAME_RAWX = True DEFAULT_TRY_CHUNK_DELETE = False DEFAULT_DRY_RUN = False def __init__(self, conf, input_file=None, service_id=None, **kwargs): super(BlobRebuilder, self).__init__(conf, **kwargs) # counters self.bytes_processed = 0 self.total_bytes_processed = 0 # input self.input_file = input_file self.rawx_id = service_id # rawx/rdir self.rdir_client = RdirClient(self.conf, logger=self.logger) self.rdir_fetch_limit = int_value(self.conf.get('rdir_fetch_limit'), self.DEFAULT_RDIR_FETCH_LIMIT) self.rdir_shuffle_chunks = true_value(conf.get('rdir_shuffle_chunks')) self.rdir_timeout = float_value(conf.get('rdir_timeout'), self.DEFAULT_RDIR_TIMEOUT) @staticmethod def items_from_task_event(task_event): namespace = task_event['url']['ns'] container_id = task_event['url']['id'] content_id = task_event['url']['content'] for chunk_id_or_pos in task_event['data']['missing_chunks']: yield namespace, container_id, content_id, str(chunk_id_or_pos) @staticmethod def task_event_from_item(item): namespace, container_id, content_id, chunk_id_or_pos = item return \ { 'when': time.time(), 'event': EventTypes.CONTENT_BROKEN, 'url': { 'ns': namespace, 'id': container_id, 'content': content_id }, 'data': { 'missing_chunks': [ chunk_id_or_pos ] } } @staticmethod def tasks_res_from_res_event(res_event): namespace = res_event['url']['ns'] container_id = res_event['url']['id'] content_id = res_event['url']['content'] for chunk_rebuilt in res_event['data']['chunks_rebuilt']: yield (namespace, container_id, content_id, str(chunk_rebuilt['chunk_id_or_pos'])), \ chunk_rebuilt['bytes_processed'], chunk_rebuilt['error'] @staticmethod def res_event_from_task_res(task_res): item, bytes_processed, error = task_res namespace, container_id, content_id, chunk_id_or_pos = item return \ { 'when': time.time(), 'event': EventTypes.CONTENT_REBUILT, 'url': { 'ns': namespace, 'id': container_id, 'content': content_id }, 'data': { 'chunks_rebuilt': [{ 'chunk_id_or_pos': chunk_id_or_pos, 'bytes_processed': bytes_processed, 'error': error }] } } @staticmethod def string_from_item(item): namespace, container_id, content_id, chunk_id_or_pos = item return '%s|%s|%s|%s' % (namespace, container_id, content_id, chunk_id_or_pos) def _fetch_items_from_input_file(self): with open(self.input_file, 'r') as ifile: for line in ifile: stripped = line.strip() if not stripped or stripped.startswith('#'): continue container_id, content_id, chunk_id_or_pos = \ stripped.split('|', 3)[:3] yield self.namespace, container_id, content_id, \ chunk_id_or_pos def _fetch_items_from_rawx_id(self): lost_chunks = self.rdir_client.chunk_fetch( self.rawx_id, limit=self.rdir_fetch_limit, rebuild=True, shuffle=self.rdir_shuffle_chunks, timeout=self.rdir_timeout) for container_id, content_id, chunk_id, _ in lost_chunks: yield self.namespace, container_id, content_id, chunk_id def _fetch_items(self): if self.input_file: return self._fetch_items_from_input_file() if self.rawx_id: return self._fetch_items_from_rawx_id() def _empty_generator(): return yield # pylint: disable=unreachable return _empty_generator() def update_counters(self, task_res): super(BlobRebuilder, self).update_counters(task_res) _, bytes_processed, _ = task_res if bytes_processed is not None: self.bytes_processed += bytes_processed def _update_total_counters(self): chunks_processed, total_chunks_processed, errors, total_errors = \ super(BlobRebuilder, self)._update_total_counters() bytes_processed = self.bytes_processed self.bytes_processed = 0 self.total_bytes_processed += bytes_processed return chunks_processed, total_chunks_processed, \ bytes_processed, self.total_bytes_processed, \ errors, total_errors def _get_report(self, status, end_time, counters): chunks_processed, total_chunks_processed, \ bytes_processed, total_bytes_processed, \ errors, total_errors = counters time_since_last_report = (end_time - self.last_report) or 0.00001 total_time = (end_time - self.start_time) or 0.00001 report = ( '%(status)s ' 'last_report=%(last_report)s %(time_since_last_report).2fs ' 'chunks=%(chunks)d %(chunks_rate).2f/s ' 'bytes=%(bytes)d %(bytes_rate).2fB/s ' 'errors=%(errors)d %(errors_rate).2f%% ' 'start_time=%(start_time)s %(total_time).2fs ' 'total_chunks=%(total_chunks)d %(total_chunks_rate).2f/s ' 'total_bytes=%(total_bytes)d %(total_bytes_rate).2fB/s ' 'total_errors=%(total_errors)d %(total_errors_rate).2f%%' % { 'status': status, 'last_report': datetime.fromtimestamp(int(self.last_report)).isoformat(), 'time_since_last_report': time_since_last_report, 'chunks': chunks_processed, 'chunks_rate': chunks_processed / time_since_last_report, 'bytes': bytes_processed, 'bytes_rate': bytes_processed / time_since_last_report, 'errors': errors, 'errors_rate': 100 * errors / float(chunks_processed or 1), 'start_time': datetime.fromtimestamp(int(self.start_time)).isoformat(), 'total_time': total_time, 'total_chunks': total_chunks_processed, 'total_chunks_rate': total_chunks_processed / total_time, 'total_bytes': total_bytes_processed, 'total_bytes_rate': total_bytes_processed / total_time, 'total_errors': total_errors, 'total_errors_rate': 100 * total_errors / float(total_chunks_processed or 1) }) if self.total_expected_items is not None: progress = 100 * total_chunks_processed / \ float(self.total_expected_items or 1) report += ' progress=%d/%d %.2f%%' % \ (total_chunks_processed, self.total_expected_items, progress) return report def create_worker(self, queue_workers, queue_reply): return BlobRebuilderWorker(self, queue_workers, queue_reply) def _load_total_expected_items(self): if self.rawx_id: try: info = self.rdir_client.status(self.rawx_id, read_timeout=self.rdir_timeout) self.total_expected_items = info.get('chunk', dict()).get( 'to_rebuild', None) except Exception as exc: self.logger.warn( 'Failed to fetch the total chunks to rebuild: %s', exc) def run(self): if self.rawx_id: self.rdir_client.admin_lock(self.rawx_id, "rebuilder on %s" % gethostname(), timeout=self.rdir_timeout) success = super(BlobRebuilder, self).run() if self.rawx_id: self.rdir_client.admin_unlock(self.rawx_id, timeout=self.rdir_timeout) return success
class TestRdirClient(BaseTestCase): @classmethod def setUpClass(cls): super(TestRdirClient, cls).setUpClass() cls._service('@indexer', 'stop') @classmethod def tearDownClass(cls): super(TestRdirClient, cls).tearDownClass() cls._service('@indexer', 'start') def _push_chunks(self): max_mtime = 16 self.incident_date = random.randrange(2, max_mtime - 1) expected_entries = list() for _ in range(4): cid = random_id(64) for _ in range(random.randrange(2, 5)): content_id = random_id(32) for _ in range(random.randrange(2, 5)): chunk_id = random_id(63) mtime = random.randrange(0, max_mtime + 1) if mtime <= self.incident_date: chunk_id += '0' else: chunk_id += '1' self.rdir.chunk_push(self.rawx_id, cid, content_id, chunk_id, mtime=mtime) entry = (cid, content_id, chunk_id, {'mtime': mtime}) expected_entries.append(entry) self.expected_entries = sorted(expected_entries) def setUp(self): super(TestRdirClient, self).setUp() self.rawx_conf = random.choice(self.conf['services']['rawx']) self.rawx_id = self.rawx_conf.get('service_id', self.rawx_conf['addr']) self.rdir = RdirClient(self.conf) self.rdir.admin_clear(self.rawx_id, clear_all=True) self._push_chunks() self.rdir._direct_request = Mock(side_effect=self.rdir._direct_request) def _assert_chunk_fetch(self, expected_entries, entries, limit=0): self.assertListEqual(expected_entries, list(entries)) nb_requests = 1 if limit > 0 and len(expected_entries) > 0: nb_requests = int(math.ceil(len(expected_entries) / float(limit))) self.assertEqual(nb_requests, self.rdir._direct_request.call_count) self.rdir._direct_request.reset_mock() def test_chunk_fetch(self): entries = self.rdir.chunk_fetch(self.rawx_id) self._assert_chunk_fetch(self.expected_entries, entries) def test_chunk_fetch_with_limit(self): entries = self.rdir.chunk_fetch(self.rawx_id, limit=2) self._assert_chunk_fetch(self.expected_entries, entries, limit=2) def test_chunk_fetch_with_container_id(self): cid = random.choice(self.expected_entries)[0] expected_entries_cid = [ entry for entry in self.expected_entries if entry[0] == cid ] entries = self.rdir.chunk_fetch(self.rawx_id, container_id=cid) self._assert_chunk_fetch(expected_entries_cid, entries) def test_chunk_fetch_with_container_id_limit(self): cid = random.choice(self.expected_entries)[0] expected_entries_cid = [ entry for entry in self.expected_entries if entry[0] == cid ] entries = self.rdir.chunk_fetch(self.rawx_id, container_id=cid, limit=2) self._assert_chunk_fetch(expected_entries_cid, entries, limit=2) def test_chunk_fetch_with_start_after(self): start_after_index = random.randrange(0, len(self.expected_entries)) start_after = '|'.join(self.expected_entries[start_after_index][:3]) entries = self.rdir.chunk_fetch(self.rawx_id, start_after=start_after) self._assert_chunk_fetch(self.expected_entries[start_after_index + 1:], entries) def test_chunk_fetch_with_start_after_limit(self): start_after_index = random.randrange(0, len(self.expected_entries)) start_after = '|'.join(self.expected_entries[start_after_index][:3]) entries = self.rdir.chunk_fetch(self.rawx_id, start_after=start_after, limit=2) self._assert_chunk_fetch(self.expected_entries[start_after_index + 1:], entries, limit=2) def test_chunk_fetch_with_start_after_container_id(self): cid = random.choice(self.expected_entries)[0] expected_entries_cid = [ entry for entry in self.expected_entries if entry[0] == cid ] start_after_index = random.randrange(0, len(expected_entries_cid)) start_after = '|'.join(expected_entries_cid[start_after_index][:3]) entries = self.rdir.chunk_fetch(self.rawx_id, start_after=start_after, container_id=cid) self._assert_chunk_fetch(expected_entries_cid[start_after_index + 1:], entries) def test_chunk_fetch_with_start_after_container_id_limit(self): cid = random.choice(self.expected_entries)[0] expected_entries_cid = [ entry for entry in self.expected_entries if entry[0] == cid ] start_after_index = random.randrange(0, len(expected_entries_cid)) start_after = '|'.join(expected_entries_cid[start_after_index][:3]) entries = self.rdir.chunk_fetch(self.rawx_id, start_after=start_after, container_id=cid, limit=2) self._assert_chunk_fetch(expected_entries_cid[start_after_index + 1:], entries, limit=2) def test_chunk_fetch_with_rebuild_no_incident(self): entries = self.rdir.chunk_fetch(self.rawx_id, rebuild=True) self._assert_chunk_fetch(list(), entries) def test_chunk_fetch_with_rebuild(self): self.rdir.admin_incident_set(self.rawx_id, self.incident_date) self.rdir._direct_request.reset_mock() expected_entries_rebuild = [ entry for entry in self.expected_entries if entry[2][-1] == '0' ] entries = self.rdir.chunk_fetch(self.rawx_id, rebuild=True) self._assert_chunk_fetch(expected_entries_rebuild, entries) def test_chunk_fetch_with_rebuild_limit(self): self.rdir.admin_incident_set(self.rawx_id, self.incident_date) self.rdir._direct_request.reset_mock() expected_entries_rebuild = [ entry for entry in self.expected_entries if entry[2][-1] == '0' ] entries = self.rdir.chunk_fetch(self.rawx_id, rebuild=True, limit=2) self._assert_chunk_fetch(expected_entries_rebuild, entries, limit=2) def test_chunk_fetch_with_rebuild_container_id(self): self.rdir.admin_incident_set(self.rawx_id, self.incident_date) self.rdir._direct_request.reset_mock() expected_entries_rebuild = [ entry for entry in self.expected_entries if entry[2][-1] == '0' ] cid = random.choice(self.expected_entries)[0] expected_entries_rebuild_cid = \ [entry for entry in expected_entries_rebuild if entry[0] == cid] entries = self.rdir.chunk_fetch(self.rawx_id, rebuild=True, container_id=cid) self._assert_chunk_fetch(expected_entries_rebuild_cid, entries) def test_chunk_fetch_with_rebuild_start_after(self): self.rdir.admin_incident_set(self.rawx_id, self.incident_date) self.rdir._direct_request.reset_mock() expected_entries_rebuild = [ entry for entry in self.expected_entries if entry[2][-1] == '0' ] if expected_entries_rebuild: start_after_index = random.randrange(0, len(expected_entries_rebuild)) start_after = '|'.join( expected_entries_rebuild[start_after_index][:3]) else: start_after_index = 0 start_after = '|'.join( (random_id(64), random_id(32), random_id(64))) entries = self.rdir.chunk_fetch(self.rawx_id, rebuild=True, start_after=start_after) self._assert_chunk_fetch( expected_entries_rebuild[start_after_index + 1:], entries) def test_chunk_fetch_with_rebuild_contaier_id_limit(self): self.rdir.admin_incident_set(self.rawx_id, self.incident_date) self.rdir._direct_request.reset_mock() expected_entries_rebuild = [ entry for entry in self.expected_entries if entry[2][-1] == '0' ] cid = random.choice(self.expected_entries)[0] expected_entries_rebuild_cid = \ [entry for entry in expected_entries_rebuild if entry[0] == cid] entries = self.rdir.chunk_fetch(self.rawx_id, rebuild=True, container_id=cid, limit=2) self._assert_chunk_fetch(expected_entries_rebuild_cid, entries, limit=2) def test_chunk_fetch_with_rebuild_container_id_start_after(self): self.rdir.admin_incident_set(self.rawx_id, self.incident_date) self.rdir._direct_request.reset_mock() expected_entries_rebuild = [ entry for entry in self.expected_entries if entry[2][-1] == '0' ] cid = random.choice(self.expected_entries)[0] expected_entries_rebuild_cid = \ [entry for entry in expected_entries_rebuild if entry[0] == cid] if expected_entries_rebuild_cid: start_after_index = random.randrange( 0, len(expected_entries_rebuild_cid)) start_after = '|'.join( expected_entries_rebuild_cid[start_after_index][:3]) else: start_after_index = 0 start_after = '|'.join( (random_id(64), random_id(32), random_id(64))) entries = self.rdir.chunk_fetch(self.rawx_id, rebuild=True, container_id=cid, start_after=start_after) self._assert_chunk_fetch( expected_entries_rebuild_cid[start_after_index + 1:], entries) def test_chunk_fetch_with_rebuild_start_after_limit(self): self.rdir.admin_incident_set(self.rawx_id, self.incident_date) self.rdir._direct_request.reset_mock() expected_entries_rebuild = [ entry for entry in self.expected_entries if entry[2][-1] == '0' ] if expected_entries_rebuild: start_after_index = random.randrange(0, len(expected_entries_rebuild)) start_after = '|'.join( expected_entries_rebuild[start_after_index][:3]) else: start_after_index = 0 start_after = '|'.join( (random_id(64), random_id(32), random_id(64))) entries = self.rdir.chunk_fetch(self.rawx_id, rebuild=True, start_after=start_after, limit=2) self._assert_chunk_fetch(expected_entries_rebuild[start_after_index + 1:], entries, limit=2) def test_chunk_fetch_with_rebuild_container_id_start_after_limit(self): self.rdir.admin_incident_set(self.rawx_id, self.incident_date) self.rdir._direct_request.reset_mock() expected_entries_rebuild = [ entry for entry in self.expected_entries if entry[2][-1] == '0' ] cid = random.choice(self.expected_entries)[0] expected_entries_rebuild_cid = \ [entry for entry in expected_entries_rebuild if entry[0] == cid] if expected_entries_rebuild_cid: start_after_index = random.randrange( 0, len(expected_entries_rebuild_cid)) start_after = '|'.join( expected_entries_rebuild_cid[start_after_index][:3]) else: start_after_index = 0 start_after = '|'.join( (random_id(64), random_id(32), random_id(64))) entries = self.rdir.chunk_fetch(self.rawx_id, rebuild=True, container_id=cid, start_after=start_after, limit=2) self._assert_chunk_fetch( expected_entries_rebuild_cid[start_after_index + 1:], entries, limit=2) def _assert_chunk_status(self, expected_entries, status, max=0, incident=False): expected_status = dict() expected_status['chunk'] = {'total': len(expected_entries)} expected_status['container'] = dict() for entry in expected_entries: expected_status['container'][entry[0]]['total'] = \ expected_status['container'].setdefault( entry[0], dict()).get('total', 0) + 1 if incident: expected_entries_rebuild = [ entry for entry in expected_entries if entry[2][-1] == '0' ] expected_status['chunk']['to_rebuild'] = \ len(expected_entries_rebuild) for entry in expected_entries_rebuild: expected_status['container'][entry[0]]['to_rebuild'] = \ expected_status['container'][entry[0]].get( 'to_rebuild', 0) + 1 self.assertDictEqual(expected_status, status) nb_requests = 1 if max > 0 and len(expected_entries) > 0: nb_requests = int(math.ceil(len(expected_entries) / float(max))) self.assertEqual(nb_requests, self.rdir._direct_request.call_count) self.rdir._direct_request.reset_mock() def test_chunk_status(self): status = self.rdir.status(self.rawx_id) self._assert_chunk_status(self.expected_entries, status) def test_chunk_status_with_max(self): status = self.rdir.status(self.rawx_id, max=2) self._assert_chunk_status(self.expected_entries, status, max=2) def test_chunk_status_with_prefix(self): cid = random.choice(self.expected_entries)[0] expected_entries_cid = [ entry for entry in self.expected_entries if entry[0] == cid ] status = self.rdir.status(self.rawx_id, prefix=cid) self._assert_chunk_status(expected_entries_cid, status) def test_chunk_status_with_prefix_max(self): cid = random.choice(self.expected_entries)[0] expected_entries_cid = [ entry for entry in self.expected_entries if entry[0] == cid ] status = self.rdir.status(self.rawx_id, prefix=cid, max=2) self._assert_chunk_status(expected_entries_cid, status, max=2) def test_chunk_status_with_marker(self): marker_index = random.randrange(0, len(self.expected_entries)) marker = '|'.join(self.expected_entries[marker_index][:3]) status = self.rdir.status(self.rawx_id, marker=marker) self._assert_chunk_status(self.expected_entries[marker_index + 1:], status) def test_chunk_status_with_marker_max(self): marker_index = random.randrange(0, len(self.expected_entries)) marker = '|'.join(self.expected_entries[marker_index][:3]) status = self.rdir.status(self.rawx_id, marker=marker, max=2) self._assert_chunk_status(self.expected_entries[marker_index + 1:], status, max=2) def test_chunk_status_marker_prefix(self): cid = random.choice(self.expected_entries)[0] expected_entries_cid = [ entry for entry in self.expected_entries if entry[0] == cid ] marker_index = random.randrange(0, len(expected_entries_cid)) marker = '|'.join(expected_entries_cid[marker_index][:3]) status = self.rdir.status(self.rawx_id, marker=marker, prefix=cid) self._assert_chunk_status(expected_entries_cid[marker_index + 1:], status) def test_chunk_status_with_marker_prefix_max(self): cid = random.choice(self.expected_entries)[0] expected_entries_cid = [ entry for entry in self.expected_entries if entry[0] == cid ] marker_index = random.randrange(0, len(expected_entries_cid)) marker = '|'.join(expected_entries_cid[marker_index][:3]) status = self.rdir.status(self.rawx_id, marker=marker, prefix=cid, max=2) self._assert_chunk_status(expected_entries_cid[marker_index + 1:], status, max=2) def test_chunk_status_with_incident(self): self.rdir.admin_incident_set(self.rawx_id, self.incident_date) self.rdir._direct_request.reset_mock() status = self.rdir.status(self.rawx_id) self._assert_chunk_status(self.expected_entries, status, incident=True) def test_chunk_status_with_incident_max(self): self.rdir.admin_incident_set(self.rawx_id, self.incident_date) self.rdir._direct_request.reset_mock() status = self.rdir.status(self.rawx_id, max=2) self._assert_chunk_status(self.expected_entries, status, incident=True, max=2) def test_chunk_status_with_incident_prefix(self): self.rdir.admin_incident_set(self.rawx_id, self.incident_date) self.rdir._direct_request.reset_mock() cid = random.choice(self.expected_entries)[0] expected_entries_cid = [ entry for entry in self.expected_entries if entry[0] == cid ] status = self.rdir.status(self.rawx_id, prefix=cid) self._assert_chunk_status(expected_entries_cid, status, incident=True) def test_chunk_status_with_incident_prefix_max(self): self.rdir.admin_incident_set(self.rawx_id, self.incident_date) self.rdir._direct_request.reset_mock() cid = random.choice(self.expected_entries)[0] expected_entries_cid = [ entry for entry in self.expected_entries if entry[0] == cid ] status = self.rdir.status(self.rawx_id, prefix=cid, max=2) self._assert_chunk_status(expected_entries_cid, status, incident=True, max=2) def test_chunk_status_with_incident_marker(self): self.rdir.admin_incident_set(self.rawx_id, self.incident_date) self.rdir._direct_request.reset_mock() marker_index = random.randrange(0, len(self.expected_entries)) marker = '|'.join(self.expected_entries[marker_index][:3]) status = self.rdir.status(self.rawx_id, marker=marker) self._assert_chunk_status(self.expected_entries[marker_index + 1:], status, incident=True) def test_chunk_status_with_incident_marker_max(self): self.rdir.admin_incident_set(self.rawx_id, self.incident_date) self.rdir._direct_request.reset_mock() marker_index = random.randrange(0, len(self.expected_entries)) marker = '|'.join(self.expected_entries[marker_index][:3]) status = self.rdir.status(self.rawx_id, marker=marker, max=2) self._assert_chunk_status(self.expected_entries[marker_index + 1:], status, incident=True, max=2) def test_chunk_status_with_incident_marker_prefix(self): self.rdir.admin_incident_set(self.rawx_id, self.incident_date) self.rdir._direct_request.reset_mock() cid = random.choice(self.expected_entries)[0] expected_entries_cid = [ entry for entry in self.expected_entries if entry[0] == cid ] marker_index = random.randrange(0, len(expected_entries_cid)) marker = '|'.join(expected_entries_cid[marker_index][:3]) status = self.rdir.status(self.rawx_id, marker=marker, prefix=cid) self._assert_chunk_status(expected_entries_cid[marker_index + 1:], status, incident=True) def test_chunk_status_with_incident_marker_prefix_max(self): self.rdir.admin_incident_set(self.rawx_id, self.incident_date) self.rdir._direct_request.reset_mock() cid = random.choice(self.expected_entries)[0] expected_entries_cid = [ entry for entry in self.expected_entries if entry[0] == cid ] marker_index = random.randrange(0, len(expected_entries_cid)) marker = '|'.join(expected_entries_cid[marker_index][:3]) status = self.rdir.status(self.rawx_id, marker=marker, prefix=cid, max=2) self._assert_chunk_status(expected_entries_cid[marker_index + 1:], status, incident=True, max=2)
class BlobRebuilder(Rebuilder): def __init__(self, conf, logger, volume, try_chunk_delete=False, beanstalkd_addr=None, **kwargs): super(BlobRebuilder, self).__init__(conf, logger, volume, **kwargs) # rdir self.rdir_client = RdirClient(conf, logger=self.logger) self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100) # rawx self.try_chunk_delete = try_chunk_delete # beanstalk if beanstalkd_addr: self.beanstalkd_listener = BeanstalkdListener( beanstalkd_addr, conf.get('beanstalkd_tube', DEFAULT_REBUILDER_TUBE), self.logger, **kwargs) else: self.beanstalkd_listener = None # counters self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_expected_chunks = None # distributed self.distributed = False def _create_worker(self, **kwargs): return BlobRebuilderWorker(self, try_chunk_delete=self.try_chunk_delete, **kwargs) def _fill_queue(self, queue, **kwargs): chunks = self._fetch_chunks(**kwargs) for chunk in chunks: queue.put(chunk) def _item_to_string(self, chunk, **kwargs): cid, content_id, chunk_id_or_pos, _ = chunk return 'chunk %s|%s|%s' % (cid, content_id, chunk_id_or_pos) def _get_report(self, status, end_time, counters, **kwargs): chunks_processed, bytes_processed, errors, total_chunks_processed, \ total_bytes_processed, total_errors = counters time_since_last_report = (end_time - self.last_report) or 0.00001 total_time = (end_time - self.start_time) or 0.00001 report = ( '%(status)s volume=%(volume)s ' 'last_report=%(last_report)s %(time_since_last_report).2fs ' 'chunks=%(chunks)d %(chunks_rate).2f/s ' 'bytes=%(bytes)d %(bytes_rate).2fB/s ' 'errors=%(errors)d %(errors_rate).2f%% ' 'start_time=%(start_time)s %(total_time).2fs ' 'total_chunks=%(total_chunks)d %(total_chunks_rate).2f/s ' 'total_bytes=%(total_bytes)d %(total_bytes_rate).2fB/s ' 'total_errors=%(total_errors)d %(total_errors_rate).2f%%' % { 'status': status, 'volume': self.volume, 'last_report': datetime.fromtimestamp(int(self.last_report)).isoformat(), 'time_since_last_report': time_since_last_report, 'chunks': chunks_processed, 'chunks_rate': chunks_processed / time_since_last_report, 'bytes': bytes_processed, 'bytes_rate': bytes_processed / time_since_last_report, 'errors': errors, 'errors_rate': 100 * errors / float(chunks_processed or 1), 'start_time': datetime.fromtimestamp(int(self.start_time)).isoformat(), 'total_time': total_time, 'total_chunks': total_chunks_processed, 'total_chunks_rate': total_chunks_processed / total_time, 'total_bytes': total_bytes_processed, 'total_bytes_rate': total_bytes_processed / total_time, 'total_errors': total_errors, 'total_errors_rate': 100 * total_errors / float(total_chunks_processed or 1) }) if self.total_expected_chunks is not None: progress = 100 * total_chunks_processed / \ float(self.total_expected_chunks or 1) report += ' progress=%d/%d %.2f%%' % \ (total_chunks_processed, self.total_expected_chunks, progress) return report def _update_processed_without_lock(self, bytes_processed, error=None, **kwargs): super(BlobRebuilder, self)._update_processed_without_lock(None, error=error, **kwargs) if bytes_processed is not None: self.bytes_processed += bytes_processed def _update_totals_without_lock(self, **kwargs): chunks_processed, errors, total_chunks_processed, total_errors = \ super(BlobRebuilder, self)._update_totals_without_lock(**kwargs) bytes_processed = self.bytes_processed self.bytes_processed = 0 self.total_bytes_processed += bytes_processed return chunks_processed, bytes_processed, errors, \ total_chunks_processed, self.total_bytes_processed, total_errors def _rebuilder_pass(self, **kwargs): return super(BlobRebuilder, self).rebuilder_pass(**kwargs) def rebuilder_pass(self, **kwargs): success = False if self.volume: self.rdir_client.admin_lock(self.volume, "rebuilder on %s" % gethostname()) info = self.rdir_client.status(self.volume) self.total_expected_chunks = info.get('chunk', dict()).get( 'to_rebuild', None) try: success = self._rebuilder_pass(**kwargs) finally: if self.volume: self.rdir_client.admin_unlock(self.volume) return success def _event_from_broken_chunk(self, chunk, reply, **kwargs): cid, content_id, chunk_id_or_pos, _ = chunk event = {} event['when'] = time.time() event['event'] = 'storage.content.broken' event['data'] = {'missing_chunks': [chunk_id_or_pos]} event['url'] = {'ns': self.namespace, 'id': cid, 'content': content_id} event['reply'] = reply return json.dumps(event) def _chunks_from_event(self, job_id, data, **kwargs): decoded = json.loads(data) container_id = decoded['url']['id'] content_id = decoded['url']['content'] more = None reply = decoded.get('reply', None) if reply: more = {'reply': reply} for chunk_id_or_pos in decoded['data']['missing_chunks']: yield [container_id, content_id, str(chunk_id_or_pos), more] def _fetch_events_from_beanstalk(self, **kwargs): return self.beanstalkd_listener.fetch_events(self._chunks_from_event, **kwargs) def _fetch_chunks_from_file(self, **kwargs): with open(self.input_file, 'r') as ifile: for line in ifile: stripped = line.strip() if stripped and not stripped.startswith('#'): yield stripped.split('|', 3)[:3] + [None] def _fetch_chunks(self, **kwargs): if self.input_file: return self._fetch_chunks_from_file(**kwargs) if self.beanstalkd_listener and not self.distributed: return self._fetch_events_from_beanstalk(**kwargs) if self.volume: return self.rdir_client.chunk_fetch(self.volume, limit=self.rdir_fetch_limit, rebuild=True, **kwargs) raise ConfigurationException('No source to fetch chunks from')