class BlobRegistratorWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger self.volume = volume self.namespace = self.conf["namespace"] self.volume_ns, self.volume_id = check_volume(self.volume) c = dict() c['namespace'] = self.namespace self.client = ContainerClient(c, logger=self.logger) self.report_interval = conf.get( "report_period", default_report_interval) actions = { 'update': BlobRegistratorWorker._update_chunk, 'insert': BlobRegistratorWorker._insert_chunk, 'check': BlobRegistratorWorker._check_chunk, } self.action = actions[conf.get("action", "check")] def pass_with_lock(self): with lock_volume(self.volume): return self.pass_without_lock() def pass_without_lock(self): last_report = now() count, success, fail = 0, 0, 0 if self.namespace != self.volume_ns: self.logger.warn("Forcing the NS to [%s] (previously [%s])", self.namespace, self.volume_ns) self.logger.info("START %s", self.volume) paths = paths_gen(self.volume) for path in paths: # Action try: with open(path) as f: meta = read_chunk_metadata(f) self.action(self, path, f, meta) success = success + 1 except NotFound as e: fail = fail + 1 self.logger.info("ORPHAN %s/%s in %s/%s %s", meta['content_id'], meta['chunk_id'], meta['container_id'], meta['content_path'], str(e)) except Conflict as e: fail = fail + 1 self.logger.info("ALREADY %s/%s in %s/%s %s", meta['content_id'], meta['chunk_id'], meta['container_id'], meta['content_path'], str(e)) except Exception as e: fail = fail + 1 self.logger.warn("ERROR %s/%s in %s/%s %s", meta['content_id'], meta['chunk_id'], meta['container_id'], meta['content_path'], str(e)) count = count + 1 # TODO(jfs): do the throttling # periodical reporting t = now() if t - last_report > self.report_interval: self.logger.info("STEP %d ok %d ko %d", count, success, fail) self.logger.info("FINAL %s %d ok %d ko %d", self.volume, count, success, fail) def _check_chunk(self, path, f, meta): raise Exception("CHECK not yet implemented") def _insert_chunk(self, path, f, meta): cid = meta['container_id'] chunkid = basename(path) bean = meta2bean(self.volume_id, meta) self.client.container_raw_insert(bean, cid=cid) self.logger.info("inserted %s/%s in %s/%s", meta['content_id'], chunkid, cid, meta['content_path']) def _update_chunk(self, path, f, meta): cid = meta['container_id'] chunkid = basename(path) if str(meta['chunk_pos']).startswith('0'): if not self.conf['first']: self.logger.info("skip %s/%s from %s/%s", meta['content_id'], chunkid, cid, meta['content_path']) return pre = meta2bean(self.volume_id, meta) post = meta2bean(self.volume_id, meta) self.client.container_raw_update(pre, post, cid=cid) self.logger.info("updated %s/%s in %s/%s", meta['content_id'], chunkid, cid, meta['content_path'])
class BlobRegistrator(object): DEFAULT_CHUNKS_PER_SECOND = 30 DEFAULT_REPORT_INTERVAL = 3600 BEAN_TYPES = ('alias', 'header', 'chunk') def __init__(self, conf, logger, volume, container_ids): self.conf = conf self.logger = logger self.volume = volume self.volume_ns, self.volume_id = check_volume(self.volume) self.container_ids = container_ids or list() self.container_ids = [ container_id.upper() for container_id in self.container_ids ] self.namespace = self.conf['namespace'] if self.namespace != self.volume_ns: raise ValueError( 'Namespace (%s) mismatch with volume namespace (%s)', self.namespace, self.volume_ns) # action self.action_name = self.conf['action'].lower() if (self.action_name == 'insert'): self.action = self._insert_bean elif (self.action_name == 'update'): self.action = self._update_bean elif (self.action_name == 'check'): self.action = self._check_bean else: raise ValueError('Unknown action (%s)', self.action_name) # speed self.chunks_run_time = 0 self.max_chunks_per_second = int_value( self.conf.get('chunks_per_second'), self.DEFAULT_CHUNKS_PER_SECOND) # counters self.chunks_processed = 0 self.chunk_errors = 0 self.beans_processed = dict() self.bean_successes = dict() self.bean_already_exists = dict() self.bean_orphans = dict() self.bean_errors = dict() for bean_type in self.BEAN_TYPES: self.beans_processed[bean_type] = 0 self.bean_successes[bean_type] = 0 self.bean_already_exists[bean_type] = 0 self.bean_orphans[bean_type] = 0 self.bean_errors[bean_type] = 0 # report self.start_time = 0 self.last_report = 0 self.report_interval = int_value(conf.get('report_interval'), self.DEFAULT_REPORT_INTERVAL) self.client = ContainerClient({'namespace': self.namespace}, logger=self.logger) self.ctime = int(time.time()) def _beans_from_meta(self, meta): return \ [{ 'type': 'alias', 'name': meta['content_path'], 'version': int(meta['content_version']), 'ctime': self.ctime, 'mtime': self.ctime, 'deleted': False, 'header': meta['content_id'] }, { 'type': 'header', 'id': meta['content_id'], 'size': 0, 'ctime': self.ctime, 'mtime': self.ctime, 'policy': meta['content_policy'], 'chunk-method': meta['content_chunkmethod'], 'mime-type': 'application/octet-stream' }, { 'type': 'chunk', 'id': 'http://' + self.volume_id + '/' + meta['chunk_id'], 'hash': meta.get('metachunk_hash') or meta['chunk_hash'], 'size': int(meta['chunk_size']), 'ctime': self.ctime, 'pos': meta['chunk_pos'], 'content': meta['content_id'] }] def _check_bean(self, meta, bean): raise Exception("CHECK not yet implemented") def _insert_bean(self, meta, bean): self.client.container_raw_insert(bean, cid=meta['container_id']) def _update_bean(self, meta, bean): self.client.container_raw_update([bean], [bean], cid=meta['container_id']) def _get_report(self, status, end_time): time_since_last_report = (end_time - self.last_report) or 0.00001 total_time = (end_time - self.start_time) or 0.00001 report = ( '%(status)s volume=%(volume)s ' 'start_time=%(start_time)s %(total_time).2fs ' 'last_report=%(last_report)s %(time_since_last_report).2fs ' 'chunks_processed=%(chunks_processed)d ' '%(chunks_processed_rate).2f/s ' 'chunk_errors=%(chunk_errors)d ' '%(chunk_errors_rate).2f%% ' % { 'status': status, 'volume': self.volume_id, 'start_time': datetime.fromtimestamp(int(self.start_time)).isoformat(), 'total_time': total_time, 'last_report': datetime.fromtimestamp(int(self.last_report)).isoformat(), 'time_since_last_report': time_since_last_report, 'chunks_processed': self.chunks_processed, 'chunks_processed_rate': self.chunks_processed / total_time, 'chunk_errors': self.chunk_errors, 'chunk_errors_rate': 100 * self.chunk_errors / float(self.chunks_processed or 1), }) for bean_type in self.BEAN_TYPES: report = ( '%(report)s ' 'bean_%(bean_type)s_processed=%(beans_processed)d ' '%(beans_processed_rate).2f/s ' 'bean_%(bean_type)s_successes=%(bean_successes)d ' '%(bean_successes_rate).2f%% ' 'bean_%(bean_type)s_already_exists=%(bean_already_exists)d ' '%(bean_already_exists_rate).2f%% ' 'bean_%(bean_type)s_orphans=%(bean_orphans)d ' '%(bean_orphans_rate).2f%% ' 'bean_%(bean_type)s_errors=%(bean_errors)d ' '%(bean_errors_rate).2f%%' % { 'report': report, 'bean_type': bean_type, 'beans_processed': self.beans_processed[bean_type], 'beans_processed_rate': self.beans_processed[bean_type] / total_time, 'bean_successes': self.bean_successes[bean_type], 'bean_successes_rate': 100 * self.bean_successes[bean_type] / float(self.beans_processed[bean_type] or 1), 'bean_already_exists': self.bean_already_exists[bean_type], 'bean_already_exists_rate': 100 * self.bean_already_exists[bean_type] / float(self.beans_processed[bean_type] or 1), 'bean_orphans': self.bean_orphans[bean_type], 'bean_orphans_rate': 100 * self.bean_orphans[bean_type] / float(self.beans_processed[bean_type] or 1), 'bean_errors': self.bean_errors[bean_type], 'bean_errors_rate': 100 * self.bean_errors[bean_type] / float(self.beans_processed[bean_type] or 1) }) return report def log_report(self, status, force=False): end_time = time.time() if force or (end_time - self.last_report >= self.report_interval): self.logger.info(self._get_report(status, end_time)) self.last_report = end_time def pass_volume(self): self.start_time = self.last_report = time.time() self.log_report('START', force=True) paths = paths_gen(self.volume) for path in paths: try: self.pass_chunk_file(path) self.chunks_processed += 1 except Exception as exc: self.logger.error( 'Failed to pass chunk file (chunk_file=%s): %s', path, exc) self.chunk_errors += 1 self.log_report('RUN') self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.log_report('DONE', force=True) return self.chunk_errors == 0 \ and all(errors == 0 for errors in self.bean_errors.values()) def pass_chunk_file(self, path): chunk_id = path.rsplit('/', 1)[-1] if len(chunk_id) != STRLEN_CHUNKID: if chunk_id.endswith(CHUNK_SUFFIX_PENDING): self.logger.info('Skipping pending chunk %s', path) else: self.logger.warn('WARN Not a chunk %s', path) return for char in chunk_id: if char not in hexdigits: self.logger.warn('WARN Not a chunk %s', path) return with open(path) as f: meta, _ = read_chunk_metadata(f, chunk_id) if self.container_ids \ and meta['container_id'] in self.container_ids: self.logger.debug( 'Skipping chunk file (container_id=%s content_path=%s ' 'content_version=%s content_id=%s chunk_id=%s ' 'chunk_pos=%s)', meta['container_id'], meta['content_path'], meta['content_version'], meta['content_id'], meta['chunk_id'], meta['chunk_pos']) return beans = self._beans_from_meta(meta) for bean in beans: try: self.pass_bean(meta, bean) except Exception as exc: self.logger.error( 'Failed to pass chunk file (container_id=%s ' 'content_path=%s content_version=%s content_id=%s ' 'chunk_id=%s chunk_pos=%s): %s', meta['container_id'], meta['content_path'], meta['content_version'], meta['content_id'], meta['chunk_id'], meta['chunk_pos'], exc) self.bean_errors[bean['type']] = \ self.bean_errors[bean['type']] + 1 def pass_bean(self, meta, bean): try: self.beans_processed[bean['type']] = \ self.beans_processed[bean['type']] + 1 self.action(meta, bean) self.logger.debug( 'Passed %s (container_id=%s content_path=%s ' 'content_version=%s content_id=%s chunk_id=%s chunk_pos=%s)', bean['type'], meta['container_id'], meta['content_path'], meta['content_version'], meta['content_id'], meta['chunk_id'], meta['chunk_pos']) self.bean_successes[bean['type']] = \ self.bean_successes[bean['type']] + 1 except Conflict as exc: self.logger.info( 'Already exists %s (container_id=%s content_path=%s ' 'content_version=%s content_id=%s chunk_id=%s chunk_pos=%s): ' '%s', bean['type'], meta['container_id'], meta['content_path'], meta['content_version'], meta['content_id'], meta['chunk_id'], meta['chunk_pos'], exc) self.bean_already_exists[bean['type']] = \ self.bean_already_exists[bean['type']] + 1 except NotFound as exc: self.logger.info( 'Orphan %s (container_id=%s content_path=%s ' 'content_version=%s content_id=%s chunk_id=%s chunk_pos=%s): ' '%s', bean['type'], meta['container_id'], meta['content_path'], meta['content_version'], meta['content_id'], meta['chunk_id'], meta['chunk_pos'], exc) self.bean_orphans[bean['type']] = \ self.bean_orphans[bean['type']] + 1 except Exception as exc: self.logger.error( 'Failed to pass %s (container_id=%s content_path=%s ' 'content_version=%s content_id=%s chunk_id=%s chunk_pos=%s): ' '%s', bean['type'], meta['container_id'], meta['content_path'], meta['content_version'], meta['content_id'], meta['chunk_id'], meta['chunk_pos'], exc) self.bean_errors[bean['type']] = \ self.bean_errors[bean['type']] + 1