class Harasser(object): def __init__(self, ns, max_containers=256, max_contents=256): conf = {'namespace': ns} self.cs = ConscienceClient(conf) self.rdir = RdirClient(conf) self.rawx_list = [x['addr'] for x in self.cs.all_services('rawx')] self.sent = set() self.max_containers = max_containers self.max_contents = max_contents def harass_put(self, loops=None): if loops is None: loops = random.randint(1000, 2000) print "Pushing %d fake chunks" % loops loop = loops count_start_container = random.randrange(2**20) count_start_content = random.randrange(2**20) start = time.time() nb_rawx = len(self.rawx_list) while loop > 0: args = {'mtime': int(start)} # vol_id = random.choice(self.rawx_list) # container_id = "%064X" % (random.randrange(self.max_containers)) # content_id = "%032X" % (random.randrange(self.max_contents)) vol_id = self.rawx_list[loop % nb_rawx] container_id = "%064X" % (loop + count_start_container) content_id = "%032X" % (loop + count_start_content) chunk_id = "http://%s/%064X" \ % (vol_id, random.randrange(2**128)) self.rdir.chunk_push( vol_id, container_id, content_id, chunk_id, **args) self.sent.add((vol_id, container_id, content_id, chunk_id)) loop -= 1 end = time.time() print "%d pushed in %.3fs, %d req/s" \ % (loops, end-start, loops/(end-start)) def harass_del(self, min_loops=0): min_loops = min(min_loops, len(self.sent)) loops = random.randint(min_loops, len(self.sent)) print "Removing %d fake chunks" % loops loop = loops start = time.time() while loop > 0: args = self.sent.pop() self.rdir.chunk_delete(*args) loop -= 1 end = time.time() print "%d removed in %.3fs, %d req/s" \ % (loops, end-start, loops/(end-start)) def __call__(self): try: while True: self.harass_put() self.harass_del() except KeyboardInterrupt: print "Cleaning..." self.harass_del(len(self.sent))
class BlobIndexer(Daemon): def __init__(self, conf, **kwargs): super(BlobIndexer, self).__init__(conf) self.logger = get_logger(conf) volume = conf.get('volume') if not volume: raise exc.ConfigurationException('No volume specified for indexer') self.volume = volume self.passes = 0 self.errors = 0 self.last_reported = 0 self.chunks_run_time = 0 self.total_chunks_processed = 0 self.interval = int_value( conf.get('interval'), 300) self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value( conf.get('chunks_per_second'), 30) self.index_client = RdirClient(conf) self.namespace, self.volume_id = check_volume(self.volume) def index_pass(self): start_time = report_time = time.time() total_errors = 0 paths = paths_gen(self.volume) for path in paths: self.safe_update_index(path) self.chunks_run_time = ratelimit( self.chunks_run_time, self.max_chunks_per_second ) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(total).2f ' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'total': (now - start_time) } ) report_time = now total_errors += self.errors self.passes = 0 self.errors = 0 self.last_reported = now elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(chunk_rate).2f ' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed } ) if elapsed < self.interval: time.sleep(self.interval - elapsed) def safe_update_index(self, path): try: self.logger.debug('Updating index: %s' % path) self.update_index(path) except Exception: self.errors += 1 self.logger.exception('ERROR while updating index for chunk %s', path) self.passes += 1 def update_index(self, path): with open(path) as f: try: meta = read_chunk_metadata(f) except exc.MissingAttribute as e: raise exc.FaultyChunk( 'Missing extended attribute %s' % e) data = { 'content_version': meta['content_version'], 'content_nbchunks': meta['content_chunksnb'], 'content_path': meta['content_path'], 'content_size': meta['content_size'], 'chunk_hash': meta['chunk_hash'], 'chunk_position': meta['chunk_pos'], 'chunk_size': meta['chunk_size'], 'mtime': int(time.time()) } self.index_client.chunk_push(self.volume_id, meta['content_cid'], meta['content_id'], meta['chunk_id'], **data) def run(self, *args, **kwargs): time.sleep(random() * self.interval) while True: try: self.index_pass() except Exception as e: self.logger.exception('ERROR during indexing: %s' % e)
class BlobIndexer(Daemon): def __init__(self, conf, **kwargs): super(BlobIndexer, self).__init__(conf) self.logger = get_logger(conf) volume = conf.get('volume') if not volume: raise exc.ConfigurationException('No volume specified for indexer') self.volume = volume self.passes = 0 self.errors = 0 self.successes = 0 self.last_reported = 0 self.chunks_run_time = 0 self.interval = int_value(conf.get('interval'), 300) self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) self.index_client = RdirClient(conf, logger=self.logger) self.namespace, self.volume_id = check_volume(self.volume) def index_pass(self): def safe_update_index(path): try: self.update_index(path) self.successes += 1 self.logger.debug('Updated %s', path) except Exception: self.errors += 1 self.logger.exception('ERROR while updating %s', path) def report(tag): total = self.errors + self.successes now = time.time() elapsed = (now - start_time) or 0.000001 self.logger.info( '%(tag)s=%(current_time)s ' 'elapsed=%(elapsed).02f ' 'pass=%(pass)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s' % { 'tag': tag, 'current_time': datetime.fromtimestamp( int(now)).isoformat(), 'pass': self.passes, 'errors': self.errors, 'nb_chunks': total, 'c_rate': total / (now - self.last_reported), 'elapsed': elapsed }) self.last_reported = now start_time = time.time() self.last_reported = start_time self.errors = 0 self.successes = 0 paths = paths_gen(self.volume) report('started') for path in paths: safe_update_index(path) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) now = time.time() if now - self.last_reported >= self.report_interval: report('running') report('ended') def update_index(self, path): with open(path) as f: try: meta = read_chunk_metadata(f) except exc.MissingAttribute as e: raise exc.FaultyChunk('Missing extended attribute %s' % e) data = {'mtime': int(time.time())} self.index_client.chunk_push(self.volume_id, meta['container_id'], meta['content_id'], meta['chunk_id'], **data) def run(self, *args, **kwargs): time.sleep(random() * self.interval) while True: pre = time.time() try: self.index_pass() except Exception as e: self.logger.exception('ERROR during indexing: %s' % e) else: self.passes += 1 elapsed = (time.time() - pre) or 0.000001 if elapsed < self.interval: time.sleep(self.interval - elapsed)
class BlobIndexer(Daemon): def __init__(self, conf, **kwargs): super(BlobIndexer, self).__init__(conf) self.logger = get_logger(conf) volume = conf.get('volume') if not volume: raise exc.ConfigurationException('No volume specified for indexer') self.volume = volume self.passes = 0 self.errors = 0 self.successes = 0 self.last_reported = 0 self.total_since_last_reported = 0 self.chunks_run_time = 0 self.interval = int_value( conf.get('interval'), 300) self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value( conf.get('chunks_per_second'), 30) pm = get_pool_manager(pool_connections=10) self.index_client = RdirClient(conf, logger=self.logger, pool_manager=pm) self.namespace, self.volume_id = check_volume(self.volume) self.convert_chunks = true_value(conf.get('convert_chunks')) if self.convert_chunks: converter_conf = self.conf.copy() converter_conf['no_backup'] = True self.converter = BlobConverter(converter_conf, logger=self.logger, pool_manager=pm) else: self.converter = None def safe_recover_fullpath(self, path): try: return self.converter.recover_chunk_fullpath(path) except Exception as err: self.logger.error('Could not recover fullpath xattr of %s: %s', path, err) return False def safe_update_index(self, path): chunk_id = path.rsplit('/', 1)[-1] if len(chunk_id) != STRLEN_CHUNKID: if chunk_id.endswith(CHUNK_SUFFIX_PENDING): self.logger.info('Skipping pending chunk %s', path) else: self.logger.warn('WARN Not a chunk %s', path) return for char in chunk_id: if char not in hexdigits: self.logger.warn('WARN Not a chunk %s', path) return try: self.update_index(path, chunk_id) self.successes += 1 self.logger.debug('Updated %s', path) except exc.OioNetworkException as err: self.errors += 1 self.logger.warn('ERROR while updating %s: %s', path, err) except exc.VolumeException as err: self.errors += 1 self.logger.error('Cannot index %s: %s', path, err) # All chunks of this volume are indexed in the same service, # no need to try another chunk, it will generate the same # error. Let the upper level retry later. raise except (exc.ChunkException, exc.MissingAttribute) as err: if (self.convert_chunks and self.converter and self.converter.is_fullpath_error(err)): self.logger.warn( 'Could not update %s: %s, will try to recover', path, err) if self.safe_recover_fullpath(path): self.successes += 1 self.logger.info( 'Fullpath xattr of %s was recovered', path) else: self.errors += 1 # Logging already done by safe_recover_fullpath else: self.errors += 1 self.logger.error('ERROR while updating %s: %s', path, err) except Exception as err: # We cannot compare errno in the 'except' line. # pylint: disable=no-member if isinstance(err, IOError) and err.errno == errno.ENOENT: self.logger.debug('Chunk %s disappeared before indexing', path) # Neither an error nor a success, do not touch counters. else: self.errors += 1 self.logger.exception('ERROR while updating %s', path) self.total_since_last_reported += 1 def report(self, tag, start_time): total = self.errors + self.successes now = time.time() elapsed = (now - start_time) or 0.000001 self.logger.info( '%(tag)s=%(current_time)s ' 'elapsed=%(elapsed).02f ' 'pass=%(pass)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s' % { 'tag': tag, 'current_time': datetime.fromtimestamp( int(now)).isoformat(), 'pass': self.passes, 'errors': self.errors, 'nb_chunks': total, 'c_rate': self.total_since_last_reported / (now - self.last_reported), 'elapsed': elapsed } ) self.last_reported = now self.total_since_last_reported = 0 def index_pass(self): start_time = time.time() self.last_reported = start_time self.errors = 0 self.successes = 0 paths = paths_gen(self.volume) self.report('started', start_time) for path in paths: self.safe_update_index(path) self.chunks_run_time = ratelimit( self.chunks_run_time, self.max_chunks_per_second ) now = time.time() if now - self.last_reported >= self.report_interval: self.report('running', start_time) self.report('ended', start_time) def update_index(self, path, chunk_id): with open(path) as file_: try: meta = None if self.convert_chunks and self.converter: _, meta = self.converter.convert_chunk(file_, chunk_id) if meta is None: meta, _ = read_chunk_metadata(file_, chunk_id) except exc.MissingAttribute as err: raise exc.FaultyChunk(err) data = {'mtime': int(time.time())} headers = {REQID_HEADER: request_id('blob-indexer-')} self.index_client.chunk_push(self.volume_id, meta['container_id'], meta['content_id'], meta['chunk_id'], headers=headers, **data) def run(self, *args, **kwargs): time.sleep(random() * self.interval) while True: pre = time.time() try: self.index_pass() except exc.VolumeException as err: self.logger.error('Cannot index chunks, will retry later: %s', err) except Exception as err: self.logger.exception('ERROR during indexing: %s', err) else: self.passes += 1 elapsed = (time.time() - pre) or 0.000001 if elapsed < self.interval: time.sleep(self.interval - elapsed)
class BlobRebuilderWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.dry_run = true_value( conf.get('dry_run', False)) self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value( conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value( conf.get('bytes_per_second'), 10000000) self.rdir_fetch_limit = int_value( conf.get('rdir_fetch_limit'), 100) self.rdir_client = RdirClient(conf) self.content_factory = ContentFactory(conf) def rebuilder_pass_with_lock(self): self.rdir_client.admin_lock(self.volume, "rebuilder on %s" % gethostname()) try: self.rebuilder_pass() finally: self.rdir_client.admin_unlock(self.volume) def rebuilder_pass(self): start_time = report_time = time.time() total_errors = 0 rebuilder_time = 0 chunks = self.rdir_client.chunk_fetch(self.volume, limit=self.rdir_fetch_limit, rebuild=True) for container_id, content_id, chunk_id, data in chunks: loop_time = time.time() if self.dry_run: self.dryrun_chunk_rebuild(container_id, content_id, chunk_id) else: self.safe_chunk_rebuild(container_id, content_id, chunk_id) self.chunks_run_time = ratelimit( self.chunks_run_time, self.max_chunks_per_second ) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(b_rate).2f ' '%(total).2f ' '%(rebuilder_time).2f' '%(rebuilder_rate).2f' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'rebuilder_time': rebuilder_time, 'rebuilder_rate': rebuilder_time / (now - start_time) } ) report_time = now total_errors += self.errors self.passes = 0 self.bytes_processed = 0 self.last_reported = now rebuilder_time += (now - loop_time) elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(chunk_rate).2f ' '%(bytes_rate).2f ' '%(rebuilder_time).2f ' '%(rebuilder_rate).2f' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed, 'bytes_rate': self.total_bytes_processed / elapsed, 'rebuilder_time': rebuilder_time, 'rebuilder_rate': rebuilder_time / elapsed } ) def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id): self.logger.info("[dryrun] Rebuilding " "container %s, content %s, chunk %s", container_id, content_id, chunk_id) self.passes += 1 def safe_chunk_rebuild(self, container_id, content_id, chunk_id): try: self.chunk_rebuild(container_id, content_id, chunk_id) except Exception as e: self.errors += 1 self.logger.error('ERROR while rebuilding chunk %s|%s|%s) : %s', container_id, content_id, chunk_id, e) self.passes += 1 def chunk_rebuild(self, container_id, content_id, chunk_id): self.logger.info('Rebuilding (container %s, content %s, chunk %s)', container_id, content_id, chunk_id) try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise exc.OrphanChunk('Content not found') chunk = content.chunks.filter(id=chunk_id).one() if chunk is None: raise OrphanChunk("Chunk not found in content") chunk_size = chunk.size content.rebuild_chunk(chunk_id) self.rdir_client.chunk_push(self.volume, container_id, content_id, chunk_id, rtime=int(time.time())) self.bytes_processed += chunk_size self.total_bytes_processed += chunk_size
class EventWorker(object): def __init__(self, conf, name, context, **kwargs): self.conf = conf self.name = name verbose = kwargs.pop('verbose', False) self.logger = get_logger(self.conf, verbose=verbose) self.init_zmq(context) self.cs = ConscienceClient(self.conf) self.rdir = RdirClient(self.conf) self._acct_addr = None self.acct_update = 0 self.acct_refresh_interval = int_value( conf.get('acct_refresh_interval'), 60 ) self.acct_update = true_value( conf.get('acct_update', True)) self.session = requests.Session() self.failed = False def start(self): self.logger.info('worker "%s" starting', self.name) self.running = True self.run() def stop(self): self.logger.info('worker "%s" stopping', self.name) self.running = False def init_zmq(self, context): socket = context.socket(zmq.REP) socket.connect('inproc://event-front') self.socket = socket def safe_ack(self, msg): try: self.socket.send_multipart(msg) except Exception: self.logger.warn('Unable to ack event') def run(self): try: while self.running: msg = self.socket.recv_multipart() self.logger.debug("msg received: %s" % msg) event = decode_msg(msg) success = self.process_event(event) f = "0" if success else "" self.safe_ack([msg[0], f]) except Exception as e: self.logger.warn('ERROR in worker "%s"', e) self.failed = True raise e finally: self.logger.info('worker "%s" stopped', self.name) def process_event(self, event): handler = self.get_handler(event) if not handler: self.logger.warn("No handler found") # mark as success return True success = True try: handler(event) except Exception: success = False finally: return success def get_handler(self, event): event_type = event.get('event') if not event_type: return None if event_type == EventType.CONTAINER_PUT: return self.handle_container_put elif event_type == EventType.CONTAINER_DESTROY: return self.handle_container_destroy elif event_type == EventType.CONTAINER_UPDATE: return self.handle_container_update elif event_type == EventType.OBJECT_PUT: return self.handle_object_put elif event_type == EventType.OBJECT_DELETE: return self.handle_object_delete elif event_type == EventType.REFERENCE_UPDATE: return self.handle_reference_update elif event_type == EventType.CHUNK_PUT: return self.handle_chunk_put elif event_type == EventType.CHUNK_DELETE: return self.handle_chunk_delete elif event_type == EventType.PING: return self.handle_ping else: return None @property def acct_addr(self): if not self._acct_addr or self.acct_refresh(): try: acct_instance = self.cs.next_instance(ACCOUNT_SERVICE) self._acct_addr = acct_instance.get('addr') self.acct_update = time.time() except Exception: self.logger.warn('Unable to find account instance') return self._acct_addr def acct_refresh(self): return (time.time() - self.acct_update) > self.acct_refresh_interval def handle_container_put(self, event): """ Handle container creation. :param event: """ self.logger.debug('worker "%s" handle container put', self.name) if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr mtime = event.get('when') data = event.get('data') name = data.get('url').get('user') account = data.get('url').get('account') event = {'mtime': mtime, 'name': name} self.session.post(uri, params={'id': account}, data=json.dumps(event)) def handle_container_update(self, event): """ Handle container update. :param event: """ self.logger.debug('worker "%s" handle container update', self.name) if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr mtime = event.get('when') data = event.get('data') name = event.get('url').get('user') account = event.get('url').get('account') bytes_count = data.get('bytes-count', 0) object_count = data.get('object-count', 0) event = { 'mtime': mtime, 'name': name, 'bytes': bytes_count, 'objects': object_count } self.session.post(uri, params={'id': account}, data=json.dumps(event)) def handle_container_destroy(self, event): """ Handle container destroy. :param event: """ self.logger.debug('worker "%s" handle container destroy', self.name) if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr dtime = event.get('when') data = event.get('data') name = data.get('url').get('user') account = data.get('url').get('account') event = {'dtime': dtime, 'name': name} self.session.post(uri, params={'id': account}, data=json.dumps(event)) def handle_object_delete(self, event): """ Handle object deletion. Delete the chunks of the object. :param event: """ self.logger.debug('worker "%s" handle object delete', self.name) pile = GreenPile(PARALLEL_CHUNKS_DELETE) chunks = [] for item in event.get('data'): if item.get('type') == 'chunks': chunks.append(item) if not len(chunks): self.logger.warn('No chunks found in event data') return def delete_chunk(chunk): resp = None try: with Timeout(CHUNK_TIMEOUT): resp = self.session.delete(chunk['id']) except (Exception, Timeout) as e: self.logger.exception(e) return resp for chunk in chunks: pile.spawn(delete_chunk, chunk) resps = [resp for resp in pile if resp] for resp in resps: if resp.status_code == 204: self.logger.info('deleted chunk %s' % resp.url) else: self.logger.warn('failed to delete chunk %s' % resp.url) def handle_object_put(self, event): """ Handle object creation. TODO :param event: """ self.logger.debug('worker "%s" handle object put', self.name) def handle_reference_update(self, event): """ Handle reference update. TODO :param event """ self.logger.debug('worker "%s" handle reference update', self.name) def handle_chunk_put(self, event): """ Handle chunk creation. :param event """ self.logger.debug('worker "%s" handle chunk creation', self.name) when = event.get('when') data = event.get('data') volume_id = data.get('volume_id') del data['volume_id'] container_id = data.get('container_id') del data['container_id'] content_id = data.get('content_id') del data['content_id'] chunk_id = data.get('chunk_id') del data['chunk_id'] data['mtime'] = when self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id, **data) def handle_chunk_delete(self, event): """ Handle chunk deletion. :param event """ self.logger.debug('worker "%s" handle chunk deletion', self.name) data = event.get('data') volume_id = data.get('volume_id') container_id = data.get('container_id') content_id = data.get('content_id') chunk_id = data.get('chunk_id') self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id) def handle_ping(self, event): """ Handle ping :param event """ self.logger.debug('worker "%s" handle ping', self.name)
class BlobIndexer(Daemon): def __init__(self, conf, **kwargs): super(BlobIndexer, self).__init__(conf) self.logger = get_logger(conf) volume = conf.get('volume') if not volume: raise exc.ConfigurationException('No volume specified for indexer') self.volume = volume self.passes = 0 self.errors = 0 self.last_reported = 0 self.chunks_run_time = 0 self.total_chunks_processed = 0 self.interval = int_value(conf.get('interval'), 300) self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) self.index_client = RdirClient(conf) self.namespace, self.volume_id = check_volume(self.volume) def index_pass(self): start_time = report_time = time.time() total_errors = 0 paths = paths_gen(self.volume) for path in paths: self.safe_update_index(path) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(total).2f ' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'total': (now - start_time) }) report_time = now total_errors += self.errors self.passes = 0 self.errors = 0 self.last_reported = now elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(chunk_rate).2f ' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed }) if elapsed < self.interval: time.sleep(self.interval - elapsed) def safe_update_index(self, path): try: self.logger.debug('Updating index: %s', path) self.update_index(path) except Exception: self.errors += 1 self.logger.exception('ERROR while updating index for chunk %s', path) self.passes += 1 def update_index(self, path): with open(path) as f: try: meta = read_chunk_metadata(f) except exc.MissingAttribute as e: raise exc.FaultyChunk('Missing extended attribute %s' % e) data = {'mtime': int(time.time())} self.index_client.chunk_push(self.volume_id, meta['container_id'], meta['content_id'], meta['chunk_id'], **data) def run(self, *args, **kwargs): time.sleep(random() * self.interval) while True: try: self.index_pass() except Exception as e: self.logger.exception('ERROR during indexing: %s' % e)
class Harasser(object): def __init__(self, ns, max_containers=256, max_contents=256): conf = {'namespace': ns} self.cs = ConscienceClient(conf) self.rdir = RdirClient(conf) self.rawx_list = [x['addr'] for x in self.cs.all_services('rawx')] self.sent = set() self.max_containers = max_containers self.max_contents = max_contents self.pushed_count = 0 self.pushed_time = 0 self.removed_count = 0 self.removed_time = 0 def harass_put(self, loops=None): if loops is None: loops = random.randint(1000, 2000) print("Pushing %d fake chunks" % loops) loop = loops count_start_container = random.randrange(2**20) count_start_content = random.randrange(2**20) start = time.time() nb_rawx = len(self.rawx_list) while loop > 0: args = {'mtime': int(start)} # vol_id = random.choice(self.rawx_list) # container_id = "%064X" % (random.randrange(self.max_containers)) # content_id = "%032X" % (random.randrange(self.max_contents)) vol_id = self.rawx_list[loop % nb_rawx] container_id = "%064X" % (loop + count_start_container) content_id = "%032X" % (loop + count_start_content) chunk_id = "http://%s/%064X" \ % (vol_id, random.randrange(2**128)) self.rdir.chunk_push(vol_id, container_id, content_id, chunk_id, **args) self.sent.add((vol_id, container_id, content_id, chunk_id)) loop -= 1 end = time.time() self.pushed_count += loops self.pushed_time += end - start print("%d pushed in %.3fs, %d req/s" % (loops, end - start, loops / (end - start))) def harass_del(self, min_loops=0): min_loops = min(min_loops, len(self.sent)) loops = random.randint(min_loops, len(self.sent)) print("Removing %d fake chunks" % loops) loop = loops start = time.time() while loop > 0: args = self.sent.pop() self.rdir.chunk_delete(*args) loop -= 1 end = time.time() self.removed_count += loops self.removed_time += end - start print("%d removed in %.3fs, %d req/s" % (loops, end - start, loops / (end - start))) def __call__(self): try: while True: self.harass_put() self.harass_del() except KeyboardInterrupt: print("Cleaning...") self.harass_del(len(self.sent)) print("Stats:") print("Pushed %d in %.3fs, %d req/s" % (self.pushed_count, self.pushed_time, self.pushed_count / self.pushed_time)) print("Removed %d in %.3fs, %d req/s" % (self.removed_count, self.removed_time, self.removed_count / self.removed_time))
class EventWorker(Worker): def init(self): eventlet.monkey_patch(os=False) self.session = requests.Session() self.cs = ConscienceClient(self.conf) self.rdir = RdirClient(self.conf) self._acct_addr = None self.acct_update = 0 self.graceful_timeout = 1 self.acct_refresh_interval = int_value( self.conf.get('acct_refresh_interval'), 60) self.concurrency = int_value(self.conf.get('concurrency'), 1000) self.acct_update = true_value(self.conf.get('acct_update', True)) self.rdir_update = true_value(self.conf.get('rdir_update', True)) super(EventWorker, self).init() def notify(self): """TODO""" pass def safe_decode_job(self, job): try: return json.loads(job) except Exception as e: self.logger.warn('ERROR decoding job "%s"', str(e.message)) return None def run(self): queue_url = self.conf.get('queue_url', 'tcp://127.0.0.1:11300') self.beanstalk = Beanstalk.from_url(queue_url) gt = eventlet.spawn(self.handle) while self.alive: self.notify() try: eventlet.sleep(1.0) except AssertionError: self.alive = False break self.notify() try: with Timeout(self.graceful_timeout) as t: gt.kill(StopServe()) gt.wait() except Timeout as te: if te != t: raise gt.kill() def handle(self): try: while True: job_id, data = self.beanstalk.reserve() try: event = self.safe_decode_job(data) if event: self.process_event(event) self.beanstalk.delete(job_id) except Exception: self.logger.exception("ERROR handling event %s", job_id) except StopServe: self.logger.info('Stopping event handler') def process_event(self, event): handler = self.get_handler(event) if not handler: self.logger.warn("ERROR no handler found for event") # mark as success return True success = True try: handler(event) except Exception: success = False finally: return success def get_handler(self, event): event_type = event.get('event') if not event_type: return None if event_type == EventType.CONTAINER_PUT: return self.handle_container_put elif event_type == EventType.CONTAINER_DESTROY: return self.handle_container_destroy elif event_type == EventType.CONTAINER_UPDATE: return self.handle_container_update elif event_type == EventType.OBJECT_PUT: return self.handle_object_put elif event_type == EventType.OBJECT_DELETE: return self.handle_object_delete elif event_type == EventType.REFERENCE_UPDATE: return self.handle_reference_update elif event_type == EventType.CHUNK_PUT: return self.handle_chunk_put elif event_type == EventType.CHUNK_DELETE: return self.handle_chunk_delete elif event_type == EventType.PING: return self.handle_ping else: return None @property def acct_addr(self): if not self._acct_addr or self.acct_refresh(): try: acct_instance = self.cs.next_instance(ACCOUNT_SERVICE) self._acct_addr = acct_instance.get('addr') self.acct_update = time.time() except Exception: self.logger.warn('Unable to find account instance') return self._acct_addr def acct_refresh(self): return (time.time() - self.acct_update) > self.acct_refresh_interval def handle_container_put(self, event): """ Handle container creation. :param event: """ self.logger.debug('worker handle container put') if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr mtime = event.get('when') data = event.get('data') name = data.get('url').get('user') account = data.get('url').get('account') event = {'mtime': mtime, 'name': name} self.session.post(uri, params={'id': account}, json=event) def handle_container_update(self, event): """ Handle container update. :param event: """ self.logger.debug('worker handle container update') if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr mtime = event.get('when') data = event.get('data') name = event.get('url').get('user') account = event.get('url').get('account') bytes_count = data.get('bytes-count', 0) object_count = data.get('object-count', 0) event = { 'mtime': mtime, 'name': name, 'bytes': bytes_count, 'objects': object_count } self.session.post(uri, params={'id': account}, json=event) def handle_container_destroy(self, event): """ Handle container destroy. :param event: """ self.logger.debug('worker handle container destroy') if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr dtime = event.get('when') data = event.get('data') name = data.get('url').get('user') account = data.get('url').get('account') event = {'dtime': dtime, 'name': name} self.session.post(uri, params={'id': account}, data=json.dumps(event)) def handle_object_delete(self, event): """ Handle object deletion. Delete the chunks of the object. :param event: """ self.logger.debug('worker handle object delete') pile = GreenPile(PARALLEL_CHUNKS_DELETE) chunks = [] for item in event.get('data'): if item.get('type') == 'chunks': chunks.append(item) if not len(chunks): self.logger.warn('No chunks found in event data') return def delete_chunk(chunk): resp = None try: with Timeout(CHUNK_TIMEOUT): resp = self.session.delete(chunk['id']) except (Exception, Timeout) as e: self.logger.warn('error while deleting chunk %s "%s"', chunk['id'], str(e.message)) return resp for chunk in chunks: pile.spawn(delete_chunk, chunk) resps = [resp for resp in pile if resp] for resp in resps: if resp.status_code == 204: self.logger.debug('deleted chunk %s' % resp.url) else: self.logger.warn('failed to delete chunk %s' % resp.url) def handle_object_put(self, event): """ Handle object creation. TODO :param event: """ self.logger.debug('worker handle object put') def handle_reference_update(self, event): """ Handle reference update. TODO :param event """ self.logger.debug('worker handle reference update') def handle_chunk_put(self, event): """ Handle chunk creation. :param event """ if not self.rdir_update: self.logger.debug('worker skip chunk creation') return self.logger.debug('worker handle chunk creation') when = event.get('when') data = event.get('data') volume_id = data.get('volume_id') del data['volume_id'] container_id = data.get('container_id') del data['container_id'] content_id = data.get('content_id') del data['content_id'] chunk_id = data.get('chunk_id') del data['chunk_id'] data['mtime'] = when self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id, **data) def handle_chunk_delete(self, event): """ Handle chunk deletion. :param event """ if not self.rdir_update: self.logger.debug('worker skip chunk deletion') return self.logger.debug('worker handle chunk deletion') data = event.get('data') volume_id = data.get('volume_id') container_id = data.get('container_id') content_id = data.get('content_id') chunk_id = data.get('chunk_id') self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id) def handle_ping(self, event): """ Handle ping :param event """ self.logger.debug('worker handle ping')
class EventWorker(Worker): def init(self): eventlet.monkey_patch(os=False) self.session = requests.Session() self.cs = ConscienceClient(self.conf) self.rdir = RdirClient(self.conf) self._acct_addr = None self.acct_update = 0 self.graceful_timeout = 1 self.acct_refresh_interval = int_value( self.conf.get('acct_refresh_interval'), 60 ) self.concurrency = int_value(self.conf.get('concurrency'), 1000) self.acct_update = true_value(self.conf.get('acct_update', True)) self.rdir_update = true_value(self.conf.get('rdir_update', True)) super(EventWorker, self).init() def notify(self): """TODO""" pass def safe_decode_job(self, job): try: return json.loads(job) except Exception as e: self.logger.warn('ERROR decoding job "%s"', str(e.message)) return None def run(self): queue_url = self.conf.get('queue_url', 'tcp://127.0.0.1:11300') self.beanstalk = Beanstalk.from_url(queue_url) gt = eventlet.spawn( self.handle) while self.alive: self.notify() try: eventlet.sleep(1.0) except AssertionError: self.alive = False break self.notify() try: with Timeout(self.graceful_timeout) as t: gt.kill(StopServe()) gt.wait() except Timeout as te: if te != t: raise gt.kill() def handle(self): try: while True: job_id, data = self.beanstalk.reserve() try: event = self.safe_decode_job(data) if event: self.process_event(event) self.beanstalk.delete(job_id) except Exception: self.logger.exception("ERROR handling event %s", job_id) except StopServe: self.logger.info('Stopping event handler') def process_event(self, event): handler = self.get_handler(event) if not handler: self.logger.warn("ERROR no handler found for event") # mark as success return True success = True try: handler(event) except Exception: success = False finally: return success def get_handler(self, event): event_type = event.get('event') if not event_type: return None if event_type == EventType.CONTAINER_PUT: return self.handle_container_put elif event_type == EventType.CONTAINER_DESTROY: return self.handle_container_destroy elif event_type == EventType.CONTAINER_UPDATE: return self.handle_container_update elif event_type == EventType.OBJECT_PUT: return self.handle_object_put elif event_type == EventType.OBJECT_DELETE: return self.handle_object_delete elif event_type == EventType.REFERENCE_UPDATE: return self.handle_reference_update elif event_type == EventType.CHUNK_PUT: return self.handle_chunk_put elif event_type == EventType.CHUNK_DELETE: return self.handle_chunk_delete elif event_type == EventType.PING: return self.handle_ping else: return None @property def acct_addr(self): if not self._acct_addr or self.acct_refresh(): try: acct_instance = self.cs.next_instance(ACCOUNT_SERVICE) self._acct_addr = acct_instance.get('addr') self.acct_update = time.time() except Exception: self.logger.warn('Unable to find account instance') return self._acct_addr def acct_refresh(self): return (time.time() - self.acct_update) > self.acct_refresh_interval def handle_container_put(self, event): """ Handle container creation. :param event: """ self.logger.debug('worker handle container put') if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr mtime = event.get('when') data = event.get('data') name = data.get('url').get('user') account = data.get('url').get('account') event = {'mtime': mtime, 'name': name} self.session.post(uri, params={'id': account}, json=event) def handle_container_update(self, event): """ Handle container update. :param event: """ self.logger.debug('worker handle container update') if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr mtime = event.get('when') data = event.get('data') name = event.get('url').get('user') account = event.get('url').get('account') bytes_count = data.get('bytes-count', 0) object_count = data.get('object-count', 0) event = { 'mtime': mtime, 'name': name, 'bytes': bytes_count, 'objects': object_count } self.session.post(uri, params={'id': account}, json=event) def handle_container_destroy(self, event): """ Handle container destroy. :param event: """ self.logger.debug('worker handle container destroy') if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr dtime = event.get('when') data = event.get('data') name = data.get('url').get('user') account = data.get('url').get('account') event = {'dtime': dtime, 'name': name} self.session.post(uri, params={'id': account}, data=json.dumps(event)) def handle_object_delete(self, event): """ Handle object deletion. Delete the chunks of the object. :param event: """ self.logger.debug('worker handle object delete') pile = GreenPile(PARALLEL_CHUNKS_DELETE) chunks = [] for item in event.get('data'): if item.get('type') == 'chunks': chunks.append(item) if not len(chunks): self.logger.warn('No chunks found in event data') return def delete_chunk(chunk): resp = None try: with Timeout(CHUNK_TIMEOUT): resp = self.session.delete(chunk['id']) except (Exception, Timeout) as e: self.logger.warn('error while deleting chunk %s "%s"', chunk['id'], str(e.message)) return resp for chunk in chunks: pile.spawn(delete_chunk, chunk) resps = [resp for resp in pile if resp] for resp in resps: if resp.status_code == 204: self.logger.debug('deleted chunk %s' % resp.url) else: self.logger.warn('failed to delete chunk %s' % resp.url) def handle_object_put(self, event): """ Handle object creation. TODO :param event: """ self.logger.debug('worker handle object put') def handle_reference_update(self, event): """ Handle reference update. TODO :param event """ self.logger.debug('worker handle reference update') def handle_chunk_put(self, event): """ Handle chunk creation. :param event """ if not self.rdir_update: self.logger.debug('worker skip chunk creation') return self.logger.debug('worker handle chunk creation') when = event.get('when') data = event.get('data') volume_id = data.get('volume_id') del data['volume_id'] container_id = data.get('container_id') del data['container_id'] content_id = data.get('content_id') del data['content_id'] chunk_id = data.get('chunk_id') del data['chunk_id'] data['mtime'] = when self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id, **data) def handle_chunk_delete(self, event): """ Handle chunk deletion. :param event """ if not self.rdir_update: self.logger.debug('worker skip chunk deletion') return self.logger.debug('worker handle chunk deletion') data = event.get('data') volume_id = data.get('volume_id') container_id = data.get('container_id') content_id = data.get('content_id') chunk_id = data.get('chunk_id') self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id) def handle_ping(self, event): """ Handle ping :param event """ self.logger.debug('worker handle ping')
class BlobRebuilderWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.dry_run = true_value(conf.get('dry_run', False)) self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value(conf.get('bytes_per_second'), 10000000) self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100) self.rdir_client = RdirClient(conf) self.content_factory = ContentFactory(conf) def rebuilder_pass_with_lock(self): self.rdir_client.admin_lock(self.volume, "rebuilder on %s" % gethostname()) try: self.rebuilder_pass() finally: self.rdir_client.admin_unlock(self.volume) def rebuilder_pass(self): start_time = report_time = time.time() total_errors = 0 rebuilder_time = 0 chunks = self.rdir_client.chunk_fetch(self.volume, limit=self.rdir_fetch_limit, rebuild=True) for container_id, content_id, chunk_id, data in chunks: loop_time = time.time() if self.dry_run: self.dryrun_chunk_rebuild(container_id, content_id, chunk_id) else: self.safe_chunk_rebuild(container_id, content_id, chunk_id) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(b_rate).2f ' '%(total).2f ' '%(rebuilder_time).2f' '%(rebuilder_rate).2f' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'rebuilder_time': rebuilder_time, 'rebuilder_rate': rebuilder_time / (now - start_time) }) report_time = now total_errors += self.errors self.passes = 0 self.bytes_processed = 0 self.last_reported = now rebuilder_time += (now - loop_time) elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(chunk_rate).2f ' '%(bytes_rate).2f ' '%(rebuilder_time).2f ' '%(rebuilder_rate).2f' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed, 'bytes_rate': self.total_bytes_processed / elapsed, 'rebuilder_time': rebuilder_time, 'rebuilder_rate': rebuilder_time / elapsed }) def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id): self.logger.info("[dryrun] Rebuilding " "container %s, content %s, chunk %s" % (container_id, content_id, chunk_id)) self.passes += 1 def safe_chunk_rebuild(self, container_id, content_id, chunk_id): try: self.chunk_rebuild(container_id, content_id, chunk_id) except Exception as e: self.errors += 1 self.logger.error('ERROR while rebuilding chunk %s|%s|%s) : %s', container_id, content_id, chunk_id, e) self.passes += 1 def chunk_rebuild(self, container_id, content_id, chunk_id): self.logger.info('Rebuilding (container %s, content %s, chunk %s)' % (container_id, content_id, chunk_id)) try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise exc.OrphanChunk('Content not found') chunk = content.chunks.filter(id=chunk_id).one() if chunk is None: raise OrphanChunk("Chunk not found in content") chunk_size = chunk.size content.rebuild_chunk(chunk_id) self.rdir_client.chunk_push(self.volume, container_id, content_id, chunk_id, rtime=int(time.time())) self.bytes_processed += chunk_size self.total_bytes_processed += chunk_size
class EventWorker(object): def __init__(self, conf, name, context, **kwargs): self.conf = conf self.name = name verbose = kwargs.pop('verbose', False) self.logger = get_logger(self.conf, verbose=verbose) self.init_zmq(context) self.cs = ConscienceClient(self.conf) self.rdir = RdirClient(self.conf) self._acct_addr = None self.acct_update = 0 self.acct_refresh_interval = int_value( conf.get('acct_refresh_interval'), 60) self.acct_update = true_value(conf.get('acct_update', True)) self.rdir_update = true_value(conf.get('rdir_update', True)) self.session = requests.Session() self.failed = False def start(self): self.logger.info('worker "%s" starting', self.name) self.running = True self.run() def stop(self): self.logger.info('worker "%s" stopping', self.name) self.running = False def init_zmq(self, context): socket = context.socket(zmq.REP) socket.connect('inproc://event-front') self.socket = socket def safe_ack(self, msg): try: self.socket.send_multipart(msg) except Exception: self.logger.warn('Unable to ack event') def run(self): try: while self.running: msg = self.socket.recv_multipart() self.logger.debug("msg received: %s" % msg) event = decode_msg(msg) success = self.process_event(event) f = "0" if success else "" self.safe_ack([msg[0], f]) except Exception as e: self.logger.warn('ERROR in worker "%s"', e) self.failed = True raise e finally: self.logger.info('worker "%s" stopped', self.name) def process_event(self, event): handler = self.get_handler(event) if not handler: self.logger.warn("No handler found") # mark as success return True success = True try: handler(event) except Exception: success = False finally: return success def get_handler(self, event): event_type = event.get('event') if not event_type: return None if event_type == EventType.CONTAINER_PUT: return self.handle_container_put elif event_type == EventType.CONTAINER_DESTROY: return self.handle_container_destroy elif event_type == EventType.CONTAINER_UPDATE: return self.handle_container_update elif event_type == EventType.OBJECT_PUT: return self.handle_object_put elif event_type == EventType.OBJECT_DELETE: return self.handle_object_delete elif event_type == EventType.REFERENCE_UPDATE: return self.handle_reference_update elif event_type == EventType.CHUNK_PUT: return self.handle_chunk_put elif event_type == EventType.CHUNK_DELETE: return self.handle_chunk_delete elif event_type == EventType.PING: return self.handle_ping else: return None @property def acct_addr(self): if not self._acct_addr or self.acct_refresh(): try: acct_instance = self.cs.next_instance(ACCOUNT_SERVICE) self._acct_addr = acct_instance.get('addr') self.acct_update = time.time() except Exception: self.logger.warn('Unable to find account instance') return self._acct_addr def acct_refresh(self): return (time.time() - self.acct_update) > self.acct_refresh_interval def handle_container_put(self, event): """ Handle container creation. :param event: """ self.logger.debug('worker "%s" handle container put', self.name) if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr mtime = event.get('when') data = event.get('data') name = data.get('url').get('user') account = data.get('url').get('account') event = {'mtime': mtime, 'name': name} self.session.post(uri, params={'id': account}, data=json.dumps(event)) def handle_container_update(self, event): """ Handle container update. :param event: """ self.logger.debug('worker "%s" handle container update', self.name) if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr mtime = event.get('when') data = event.get('data') name = event.get('url').get('user') account = event.get('url').get('account') bytes_count = data.get('bytes-count', 0) object_count = data.get('object-count', 0) event = { 'mtime': mtime, 'name': name, 'bytes': bytes_count, 'objects': object_count } self.session.post(uri, params={'id': account}, data=json.dumps(event)) def handle_container_destroy(self, event): """ Handle container destroy. :param event: """ self.logger.debug('worker "%s" handle container destroy', self.name) if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr dtime = event.get('when') data = event.get('data') name = data.get('url').get('user') account = data.get('url').get('account') event = {'dtime': dtime, 'name': name} self.session.post(uri, params={'id': account}, data=json.dumps(event)) def handle_object_delete(self, event): """ Handle object deletion. Delete the chunks of the object. :param event: """ self.logger.debug('worker "%s" handle object delete', self.name) pile = GreenPile(PARALLEL_CHUNKS_DELETE) chunks = [] for item in event.get('data'): if item.get('type') == 'chunks': chunks.append(item) if not len(chunks): self.logger.warn('No chunks found in event data') return def delete_chunk(chunk): resp = None try: with Timeout(CHUNK_TIMEOUT): resp = self.session.delete(chunk['id']) except (Exception, Timeout) as e: self.logger.exception(e) return resp for chunk in chunks: pile.spawn(delete_chunk, chunk) resps = [resp for resp in pile if resp] for resp in resps: if resp.status_code == 204: self.logger.info('deleted chunk %s' % resp.url) else: self.logger.warn('failed to delete chunk %s' % resp.url) def handle_object_put(self, event): """ Handle object creation. TODO :param event: """ self.logger.debug('worker "%s" handle object put', self.name) def handle_reference_update(self, event): """ Handle reference update. TODO :param event """ self.logger.debug('worker "%s" handle reference update', self.name) def handle_chunk_put(self, event): """ Handle chunk creation. :param event """ if not self.rdir_update: self.logger.debug('worker "%s" skip chunk creation', self.name) return self.logger.debug('worker "%s" handle chunk creation', self.name) when = event.get('when') data = event.get('data') volume_id = data.get('volume_id') del data['volume_id'] container_id = data.get('container_id') del data['container_id'] content_id = data.get('content_id') del data['content_id'] chunk_id = data.get('chunk_id') del data['chunk_id'] data['mtime'] = when self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id, **data) def handle_chunk_delete(self, event): """ Handle chunk deletion. :param event """ if not self.rdir_update: self.logger.debug('worker "%s" skip chunk deletion', self.name) return self.logger.debug('worker "%s" handle chunk deletion', self.name) data = event.get('data') volume_id = data.get('volume_id') container_id = data.get('container_id') content_id = data.get('content_id') chunk_id = data.get('chunk_id') self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id) def handle_ping(self, event): """ Handle ping :param event """ self.logger.debug('worker "%s" handle ping', self.name)
class BlobRebuilderWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.dry_run = true_value( conf.get('dry_run', False)) self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value( conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value( conf.get('bytes_per_second'), 10000000) self.rdir_fetch_limit = int_value( conf.get('rdir_fetch_limit'), 100) self.blob_client = BlobClient() self.container_client = ContainerClient(conf) self.rdir_client = RdirClient(conf) def rebuilder_pass_with_lock(self): self.rdir_client.admin_lock(self.volume, "rebuilder on %s" % gethostname()) try: self.rebuilder_pass() finally: self.rdir_client.admin_unlock(self.volume) def rebuilder_pass(self): start_time = report_time = time.time() total_errors = 0 rebuilder_time = 0 chunks = self.rdir_client.chunk_fetch(self.volume, limit=self.rdir_fetch_limit, rebuild=True) for container_id, content_id, chunk_id, data in chunks: loop_time = time.time() if self.dry_run: self.dryrun_chunk_rebuild(container_id, content_id, chunk_id) else: self.safe_chunk_rebuild(container_id, content_id, chunk_id) self.chunks_run_time = ratelimit( self.chunks_run_time, self.max_chunks_per_second ) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(b_rate).2f ' '%(total).2f ' '%(rebuilder_time).2f' '%(rebuilder_rate).2f' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'rebuilder_time': rebuilder_time, 'rebuilder_rate': rebuilder_time / (now - start_time) } ) report_time = now total_errors += self.errors self.passes = 0 self.bytes_processed = 0 self.last_reported = now rebuilder_time += (now - loop_time) elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(chunk_rate).2f ' '%(bytes_rate).2f ' '%(rebuilder_time).2f ' '%(rebuilder_rate).2f' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed, 'bytes_rate': self.total_bytes_processed / elapsed, 'rebuilder_time': rebuilder_time, 'rebuilder_rate': rebuilder_time / elapsed } ) def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id): self.logger.info("[dryrun] Rebuilding " "container %s, content %s, chunk %s" % (container_id, content_id, chunk_id)) self.passes += 1 def safe_chunk_rebuild(self, container_id, content_id, chunk_id): self.logger.info('Rebuilding (container %s, content %s, chunk %s)' % (container_id, content_id, chunk_id)) try: self.chunk_rebuild(container_id, content_id, chunk_id) except Exception as e: self.errors += 1 self.logger.error('ERROR while rebuilding chunk %s|%s|%s) : %s', container_id, content_id, chunk_id, e) self.passes += 1 def _meta2_get_chunks_at_pos(self, container_id, content_id, chunk_id): current_chunk_url = 'http://%s/%s' % (self.volume, chunk_id) try: data = self.container_client.content_show( cid=container_id, content=content_id) except exc.NotFound: raise exc.OrphanChunk('Content not found') current_chunk = None for c in data: if c['url'] == current_chunk_url: current_chunk = c break if not current_chunk: raise exc.OrphanChunk('Chunk not found in content') duplicate_chunks = [] for c in data: if c['pos'] == current_chunk['pos'] \ and c['url'] != current_chunk['url']: duplicate_chunks.append(c) if len(duplicate_chunks) == 0: raise exc.UnrecoverableContent('No copy of missing chunk') return current_chunk, duplicate_chunks def _meta2_get_spare_chunk(self, container_id, content_id, notin, broken): spare_data = {'notin': notin, 'broken': [broken], 'size': 0} try: spare_resp = self.container_client.content_spare( cid=container_id, content=content_id, data=spare_data) except ClientException as e: raise exc.SpareChunkException('No spare chunk (%s)' % e.message) return spare_resp['chunks'][0] def _meta2_replace_chunk(self, container_id, content_id, current_chunk, new_chunk): old = [{'type': 'chunk', 'id': current_chunk['url'], 'hash': current_chunk['hash'], 'size': current_chunk['size'], 'pos': current_chunk['pos'], 'content': content_id}] new = [{'type': 'chunk', 'id': new_chunk['id'], 'hash': current_chunk['hash'], 'size': current_chunk['size'], 'pos': current_chunk['pos'], 'content': content_id}] update_data = {'old': old, 'new': new} self.container_client.container_raw_update( cid=container_id, data=update_data) # TODO rain support def chunk_rebuild(self, container_id, content_id, chunk_id): current_chunk, duplicate_chunks = self._meta2_get_chunks_at_pos( container_id, content_id, chunk_id) spare_chunk = self._meta2_get_spare_chunk( container_id, content_id, duplicate_chunks, current_chunk) uploaded = False for src in duplicate_chunks: try: self.blob_client.chunk_copy(src['url'], spare_chunk['id']) self.logger.debug('copy chunk from %s to %s', src['url'], spare_chunk['id']) uploaded = True break except Exception as e: self.logger.debug('Failed to copy chunk from %s to %s: %s', src['url'], spare_chunk['id'], type(e)) if not uploaded: raise exc.UnrecoverableContent('No copy available ' 'of missing chunk') self._meta2_replace_chunk(container_id, content_id, current_chunk, spare_chunk) self.rdir_client.chunk_push(self.volume, container_id, content_id, chunk_id, rtime=int(time.time())) self.bytes_processed += current_chunk['size'] self.total_bytes_processed += current_chunk['size']
class BlobIndexer(Daemon): def __init__(self, conf, **kwargs): super(BlobIndexer, self).__init__(conf) self.logger = get_logger(conf) volume = conf.get('volume') if not volume: raise exc.ConfigurationException('No volume specified for indexer') self.volume = volume self.passes = 0 self.errors = 0 self.successes = 0 self.last_reported = 0 self.total_since_last_reported = 0 self.chunks_run_time = 0 self.interval = int_value(conf.get('interval'), 300) self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) pm = get_pool_manager(pool_connections=10) self.index_client = RdirClient(conf, logger=self.logger, pool_manager=pm) self.namespace, self.volume_id = check_volume(self.volume) self.convert_chunks = true_value(conf.get('convert_chunks')) if self.convert_chunks: self.converter = BlobConverter(self.conf, logger=self.logger, pool_manager=pm) else: self.converter = None def index_pass(self): def safe_update_index(path): chunk_id = path.rsplit('/', 1)[-1] if len(chunk_id) != STRLEN_CHUNKID: self.logger.warn('WARN Not a chunk %s' % path) return for c in chunk_id: if c not in hexdigits: self.logger.warn('WARN Not a chunk %s' % path) return try: self.update_index(path, chunk_id) self.successes += 1 self.logger.debug('Updated %s', path) except OioNetworkException as exc: self.errors += 1 self.logger.warn('ERROR while updating %s: %s', path, exc) except VolumeException as exc: self.errors += 1 self.logger.error('Cannot index %s: %s', path, exc) # All chunks of this volume are indexed in the same service, # no need to try another chunk, it will generate the same # error. Let the upper level retry later. raise except Exception: self.errors += 1 self.logger.exception('ERROR while updating %s', path) self.total_since_last_reported += 1 def report(tag): total = self.errors + self.successes now = time.time() elapsed = (now - start_time) or 0.000001 self.logger.info( '%(tag)s=%(current_time)s ' 'elapsed=%(elapsed).02f ' 'pass=%(pass)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s' % { 'tag': tag, 'current_time': datetime.fromtimestamp(int(now)).isoformat(), 'pass': self.passes, 'errors': self.errors, 'nb_chunks': total, 'c_rate': self.total_since_last_reported / (now - self.last_reported), 'elapsed': elapsed }) self.last_reported = now self.total_since_last_reported = 0 start_time = time.time() self.last_reported = start_time self.errors = 0 self.successes = 0 paths = paths_gen(self.volume) report('started') for path in paths: safe_update_index(path) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) now = time.time() if now - self.last_reported >= self.report_interval: report('running') report('ended') def update_index(self, path, chunk_id): with open(path) as f: try: meta = None if self.convert_chunks and self.converter: _, meta = self.converter.convert_chunk(f, chunk_id) if meta is None: meta, _ = read_chunk_metadata(f, chunk_id) except exc.MissingAttribute as e: raise exc.FaultyChunk('Missing extended attribute %s' % e) data = {'mtime': int(time.time())} headers = {'X-oio-req-id': 'blob-indexer-' + request_id()[:-13]} self.index_client.chunk_push(self.volume_id, meta['container_id'], meta['content_id'], meta['chunk_id'], headers=headers, **data) def run(self, *args, **kwargs): time.sleep(random() * self.interval) while True: pre = time.time() try: self.index_pass() except VolumeException as exc: self.logger.error('Cannot index chunks, will retry later: %s', exc) except Exception as exc: self.logger.exception('ERROR during indexing: %s', exc) else: self.passes += 1 elapsed = (time.time() - pre) or 0.000001 if elapsed < self.interval: time.sleep(self.interval - elapsed)
class BlobIndexerWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger self.volume = volume self.passes = 0 self.errors = 0 self.last_reported = 0 self.chunks_run_time = 0 self.total_chunks_processed = 0 self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value( conf.get('chunks_per_second'), 30) self.index_client = RdirClient(conf) self.namespace, self.volume_id = check_volume(self.volume) def index_pass(self): start_time = report_time = time.time() total_errors = 0 paths = paths_gen(self.volume) for path in paths: self.safe_update_index(path) self.chunks_run_time = ratelimit( self.chunks_run_time, self.max_chunks_per_second ) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(total).2f ' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'total': (now - start_time) } ) report_time = now total_errors += self.errors self.passes = 0 self.errors = 0 self.last_reported = now elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(chunk_rate).2f ' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed } ) def safe_update_index(self, path): try: self.logger.debug('Updating index: %s' % path) self.update_index(path) except Exception: self.errors += 1 self.logger.exception('ERROR while updating index for chunk %s', path) self.passes += 1 def update_index(self, path): with open(path) as f: try: meta = read_chunk_metadata(f) except exc.MissingAttribute as e: raise exc.FaultyChunk( 'Missing extended attribute %s' % e) data = { 'content_version': meta['content_version'], 'content_nbchunks': meta['content_chunksnb'], 'content_path': meta['content_path'], 'content_size': meta['content_size'], 'chunk_hash': meta['chunk_hash'], 'chunk_position': meta['chunk_pos'], 'chunk_size': meta['chunk_size'], 'mtime': int(time.time()) } self.index_client.chunk_push(self.volume_id, meta['content_cid'], meta['content_id'], meta['chunk_id'], **data)