def _distribute_events(self, reply_loc=None): next_worker = 0 items_run_time = 0 try: tasks_events = self._fetch_tasks_events_to_send() items_run_time = ratelimit(items_run_time, self.max_items_per_second) next_worker = self._send_task_event(next(tasks_events), reply_loc, next_worker) self.sending = True for task_event in tasks_events: items_run_time = ratelimit(items_run_time, self.max_items_per_second) next_worker = self._send_task_event(task_event, reply_loc, next_worker) if not self.tool.running: break except Exception as exc: if not isinstance(exc, StopIteration) and self.tool.running: self.logger.error("Failed to distribute events: %s", exc) self.tool.success = False finally: self.sending = False
def process(self, beanstalkd_job): job_id = beanstalkd_job['job_id'] job_config = beanstalkd_job['job_config'] task = self.tasks.get(job_id) if task is None: job_type = beanstalkd_job['job_type'] task_class = JOB_TYPES[job_type].TASK_CLASS job_params = job_config['params'] task = task_class(self.conf, job_params, logger=self.logger) self.tasks[job_id] = task tasks_per_second = job_config['tasks_per_second'] tasks = beanstalkd_job['tasks'] task_errors = Counter() task_results = Counter() tasks_run_time = 0 for task_id, task_payload in iteritems(tasks): tasks_run_time = ratelimit(tasks_run_time, tasks_per_second) reqid = job_id + request_id('-') reqid = reqid[:STRLEN_REQID] try: task_result = task.process(task_id, task_payload, reqid=reqid) task_results.update(task_result) except Exception as exc: self.logger.warn('[job_id=%s] Fail to process task %s: %s', job_id, task_id, exc) task_errors[type(exc).__name__] += 1 return job_id, list(tasks.keys()), task_results, task_errors, \ beanstalkd_job['beanstalkd_reply']
def rebuilder_pass(self, num, queue, **kwargs): start_time = report_time = time.time() while True: item = queue.get() begin_time = time.time() self._rebuild_one(item, **kwargs) end_time = time.time() self.rebuilder_time += (end_time - begin_time) total_time = end_time - start_time self.waiting_time = total_time - self.rebuilder_time self.total_items_processed += 1 queue.task_done() if end_time - self.last_reported >= self.report_interval: self.logger.info( self._get_report(num, start_time, end_time, total_time, report_time, **kwargs)) report_time = end_time self.last_reported = end_time self.passes = 0 self.items_run_time = ratelimit(self.items_run_time, self.max_items_per_second)
def rebuilder_pass(self, num, queue, retry_queue=None, **kwargs): while True: info = None err = None item = queue.get() try: info = self._rebuild_one(item, **kwargs) except exceptions.RetryLater as exc: if retry_queue: self.logger.warn("Putting an item in the retry queue: %s", exc.args[1]) retry_queue.put(exc.args[0]) else: err = str(exc) except Exception as exc: err = str(exc) queue.task_done() self.update_processed(item, info, error=err, **kwargs) self.log_report(**kwargs) self.items_run_time = ratelimit(self.items_run_time, self.max_items_per_second) if self.random_wait: eventlet.sleep(random.randint(0, self.random_wait) / 1.0e6)
def _distribute_events(self, reply_loc=None): next_worker = 0 items_run_time = 0 tasks_events = self._fetch_tasks_events_to_send() try: items_run_time = ratelimit(items_run_time, self.max_items_per_second) next_worker = self._send_task_event(next(tasks_events), reply_loc, next_worker) self.sending = True except StopIteration: self.sending = False return for task_event in tasks_events: items_run_time = ratelimit(items_run_time, self.max_items_per_second) next_worker = self._send_task_event(task_event, reply_loc, next_worker) self.sending = False
def converter_pass(self, input_file=None): def report(tag, now=None): if now is None: now = time.time() total_time = now - self.start_time self.logger.info( '%(tag)s %(volume)s ' 'started=%(start_time)s ' 'passes=%(passes)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s ' 'total_time=%(total_time).2f ' '(converter: %(success_rate).2f%%)' % { 'tag': tag, 'volume': self.volume_id, 'start_time': datetime.fromtimestamp(int(self.start_time)).isoformat(), 'passes': self.passes, 'errors': self.errors, 'nb_chunks': self.total_chunks_processed, 'c_rate': self.total_chunks_processed / total_time, 'total_time': total_time, 'success_rate': 100 * ((self.total_chunks_processed - self.errors) / float(self.total_chunks_processed)) }) self.passes = 0 self.last_reported = now self.start_time = time.time() self.errors = 0 self.passes = 0 self.backup_name = 'backup_%s_%f' % (self.volume_id, self.start_time) paths = self.paths_gen(input_file=input_file) for path in paths: self.safe_convert_chunk(path) now = time.time() if now - self.last_reported >= self.report_interval: report('RUN', now=now) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) report('DONE') return self.errors == 0
def index_pass(self): start_time = report_time = time.time() total_errors = 0 paths = paths_gen(self.volume) for path in paths: self.safe_update_index(path) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( 'started=%(start_time)s ' 'passes=%(passes)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s ' 'total=%(total).2f ' % { 'start_time': datetime.fromtimestamp(int(report_time)).isoformat(), 'passes': self.passes, 'errors': self.errors, 'nb_chunks': self.total_chunks_processed, 'c_rate': self.passes / (now - report_time), 'total': (now - start_time) }) report_time = now total_errors += self.errors self.passes = 0 self.errors = 0 self.last_reported = now end_time = time.time() elapsed = (end_time - start_time) or 0.000001 self.logger.info( 'started=%(start_time)s ' 'ended=%(end_time)s ' 'elapsed=%(elapsed).02f ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s ' % { 'start_time': datetime.fromtimestamp( int(start_time)).isoformat(), 'end_time': datetime.fromtimestamp(int(end_time)).isoformat(), 'elapsed': elapsed, 'errors': total_errors + self.errors, 'nb_chunks': self.total_chunks_processed, 'c_rate': self.total_chunks_processed / elapsed }) if elapsed < self.interval: time.sleep(self.interval - elapsed)
def index_pass(self): def safe_update_index(path): chunk_id = path.rsplit('/', 1)[-1] if len(chunk_id) != STRLEN_CHUNKID: return for c in chunk_id: if c not in hexdigits: return try: self.update_index(path) self.successes += 1 self.logger.debug('Updated %s', path) except OioNetworkException as exc: self.errors += 1 self.logger.warn('ERROR while updating %s: %s', path, exc) except Exception: self.errors += 1 self.logger.exception('ERROR while updating %s', path) def report(tag): total = self.errors + self.successes now = time.time() elapsed = (now - start_time) or 0.000001 self.logger.info( '%(tag)s=%(current_time)s ' 'elapsed=%(elapsed).02f ' 'pass=%(pass)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s' % { 'tag': tag, 'current_time': datetime.fromtimestamp( int(now)).isoformat(), 'pass': self.passes, 'errors': self.errors, 'nb_chunks': total, 'c_rate': total / (now - self.last_reported), 'elapsed': elapsed }) self.last_reported = now start_time = time.time() self.last_reported = start_time self.errors = 0 self.successes = 0 paths = paths_gen(self.volume) report('started') for path in paths: safe_update_index(path) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) now = time.time() if now - self.last_reported >= self.report_interval: report('running') report('ended')
def _fill_queue(self): """ Fill the queue. """ items_run_time = 0 items_with_beanstalkd_reply = \ self.tool.fetch_items_with_beanstalkd_reply() for item_with_beanstalkd_reply in items_with_beanstalkd_reply: items_run_time = ratelimit(items_run_time, self.max_items_per_second) self.queue_workers.put(item_with_beanstalkd_reply)
def chunk_file_audit(self, chunk_file, chunk_id): try: meta, _ = read_chunk_metadata(chunk_file, chunk_id) except exc.MissingAttribute as err: raise exc.FaultyChunk(err) size = int(meta['chunk_size']) md5_checksum = meta['chunk_hash'].lower() reader = ChunkReader(chunk_file, size, md5_checksum, compression=meta.get("compression", "")) with closing(reader): for buf in reader: buf_len = len(buf) self.bytes_running_time = ratelimit(self.bytes_running_time, self.max_bytes_per_second, increment=buf_len) self.bytes_processed += buf_len self.total_bytes_processed += buf_len try: container_id = meta['container_id'] content_id = meta['content_id'] _obj_meta, data = self.container_client.content_locate( cid=container_id, content=content_id, properties=False) # Check chunk data chunk_data = None metachunks = set() for c in data: if c['url'].endswith(meta['chunk_id']): metachunks.add(c['pos'].split('.', 2)[0]) chunk_data = c if not chunk_data: raise exc.OrphanChunk('Not found in content') metachunk_size = meta.get('metachunk_size') if metachunk_size is not None \ and chunk_data['size'] != int(metachunk_size): raise exc.FaultyChunk('Invalid metachunk size found') metachunk_hash = meta.get('metachunk_hash') if metachunk_hash is not None \ and chunk_data['hash'] != meta['metachunk_hash']: raise exc.FaultyChunk('Invalid metachunk hash found') if chunk_data['pos'] != meta['chunk_pos']: raise exc.FaultyChunk('Invalid chunk position found') except exc.NotFound: raise exc.OrphanChunk('Chunk not found in container')
def rebuilder_pass(self, num, queue, **kwargs): while True: info = None err = None item = queue.get() try: info = self._rebuild_one(item, **kwargs) except Exception as exc: err = str(exc) queue.task_done() self.update_processed(item, info, error=err, **kwargs) self.log_report(**kwargs) self.items_run_time = ratelimit(self.items_run_time, self.max_items_per_second)
def chunk_audit(self, path): with open(path) as f: try: meta = read_chunk_metadata(f) except exc.MissingAttribute as e: raise exc.FaultyChunk('Missing extended attribute %s' % e) size = int(meta['chunk_size']) md5_checksum = meta['chunk_hash'].lower() reader = ChunkReader(f, size, md5_checksum) with closing(reader): for buf in reader: buf_len = len(buf) self.bytes_running_time = ratelimit( self.bytes_running_time, self.max_bytes_per_second, increment=buf_len) self.bytes_processed += buf_len self.total_bytes_processed += buf_len try: container_id = meta['container_id'] content_path = meta['content_path'] content_attr, data = self.container_client.content_locate( cid=container_id, path=content_path) # Check chunk data chunk_data = None metachunks = set() for c in data: if c['url'].endswith(meta['chunk_id']): metachunks.add(c['pos'].split('.', 2)[0]) chunk_data = c if not chunk_data: raise exc.OrphanChunk('Not found in content') if chunk_data['size'] != int(meta['chunk_size']): raise exc.FaultyChunk('Invalid chunk size found') if chunk_data['hash'] != meta['chunk_hash']: raise exc.FaultyChunk('Invalid chunk hash found') if chunk_data['pos'] != meta['chunk_pos']: raise exc.FaultyChunk('Invalid chunk position found') except exc.NotFound: raise exc.OrphanChunk('Chunk not found in container')
def index_pass(self): start_time = time.time() self.last_reported = start_time self.errors = 0 self.successes = 0 paths = paths_gen(self.volume) self.report('started', start_time) for path in paths: self.safe_update_index(path) self.chunks_run_time = ratelimit( self.chunks_run_time, self.max_chunks_per_second ) now = time.time() if now - self.last_reported >= self.report_interval: self.report('running', start_time) self.report('ended', start_time)
def run(self): start_time = report_time = time.time() total_errors = 0 for (container_id, content_id) in self._list_contents(): self.safe_change_policy(container_id, content_id) self.contents_run_time = ratelimit( self.contents_run_time, self.max_contents_per_second ) self.total_contents_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(total).2f ' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'total': (now - start_time) } ) report_time = now total_errors += self.errors self.passes = 0 self.errors = 0 self.last_reported = now elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(content_rate).2f ' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'content_rate': self.total_contents_processed / elapsed } )
def _fill_queue(self): """ Fill the queue. """ items_run_time = 0 try: items_with_beanstalkd_reply = \ self.tool.fetch_items_with_beanstalkd_reply() for item_with_beanstalkd_reply in items_with_beanstalkd_reply: items_run_time = ratelimit(items_run_time, self.max_items_per_second) self.queue_workers.put(item_with_beanstalkd_reply) if not self.tool.running: break except Exception as exc: if self.tool.running: self.logger.error("Failed to fill queue: %s", exc) self.tool.success = False
def pass_volume(self): self.start_time = self.last_report = time.time() self.log_report('START', force=True) paths = paths_gen(self.volume) for path in paths: try: self.pass_chunk_file(path) self.chunks_processed += 1 except Exception as exc: self.logger.error( 'Failed to pass chunk file (chunk_file=%s): %s', path, exc) self.chunk_errors += 1 self.log_report('RUN') self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.log_report('DONE', force=True) return self.chunk_errors == 0 \ and all(errors == 0 for errors in self.bean_errors.values())
def crawl_volume(self): """ Crawl the volume assigned to this worker, and index every database. """ paths = paths_gen(self.volume) self.full_scan_nb += 1 self.success_nb = 0 self.failed_nb = 0 now = time.time() self.last_report_time = now self.report("starting") for db_path in paths: # Graceful exit, hopefully if self._stop: break db_id = db_path.rsplit("/")[-1].rsplit(".") if len(db_id) != 3: self.warn("Malformed db file name !", db_path) continue db_id = ".".join(db_id[:2]) self.index_meta2_database(db_id) self.last_index_time = ratelimit( self.last_index_time, self.max_indexed_per_second ) now = time.time() if now - self.last_report_time >= self.report_interval: self.report("running") self.report("ended")
def audit_pass(self): self.namespace, self.address = check_volume(self.volume) start_time = report_time = time.time() total_errors = 0 total_corrupted = 0 total_orphans = 0 total_faulty = 0 audit_time = 0 paths = paths_gen(self.volume) for path in paths: loop_time = time.time() self.safe_chunk_audit(path) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(corrupted)d ' '%(faulty)d ' '%(orphans)d ' '%(errors)d ' '%(c_rate).2f ' '%(b_rate).2f ' '%(total).2f ' '%(audit_time).2f' '%(audit_rate).2f' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'corrupted': self.corrupted_chunks, 'faulty': self.faulty_chunks, 'orphans': self.orphan_chunks, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'audit_time': audit_time, 'audit_rate': audit_time / (now - start_time) }) report_time = now total_corrupted += self.corrupted_chunks total_orphans += self.orphan_chunks total_faulty += self.faulty_chunks total_errors += self.errors self.passes = 0 self.corrupted_chunks = 0 self.orphan_chunks = 0 self.faulty_chunks = 0 self.errors = 0 self.bytes_processed = 0 self.last_reported = now audit_time += (now - loop_time) elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(corrupted)d ' '%(faulty)d ' '%(orphans)d ' '%(errors)d ' '%(chunk_rate).2f ' '%(bytes_rate).2f ' '%(audit_time).2f ' '%(audit_rate).2f' % { 'elapsed': elapsed, 'corrupted': total_corrupted + self.corrupted_chunks, 'faulty': total_faulty + self.faulty_chunks, 'orphans': total_orphans + self.orphan_chunks, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed, 'bytes_rate': self.total_bytes_processed / elapsed, 'audit_time': audit_time, 'audit_rate': audit_time / elapsed })
def index_pass(self): def safe_update_index(path): chunk_id = path.rsplit('/', 1)[-1] if len(chunk_id) != STRLEN_CHUNKID: self.logger.warn('WARN Not a chunk %s' % path) return for c in chunk_id: if c not in hexdigits: self.logger.warn('WARN Not a chunk %s' % path) return try: self.update_index(path, chunk_id) self.successes += 1 self.logger.debug('Updated %s', path) except OioNetworkException as exc: self.errors += 1 self.logger.warn('ERROR while updating %s: %s', path, exc) except VolumeException as exc: self.errors += 1 self.logger.error('Cannot index %s: %s', path, exc) # All chunks of this volume are indexed in the same service, # no need to try another chunk, it will generate the same # error. Let the upper level retry later. raise except Exception: self.errors += 1 self.logger.exception('ERROR while updating %s', path) self.total_since_last_reported += 1 def report(tag): total = self.errors + self.successes now = time.time() elapsed = (now - start_time) or 0.000001 self.logger.info( '%(tag)s=%(current_time)s ' 'elapsed=%(elapsed).02f ' 'pass=%(pass)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s' % { 'tag': tag, 'current_time': datetime.fromtimestamp(int(now)).isoformat(), 'pass': self.passes, 'errors': self.errors, 'nb_chunks': total, 'c_rate': self.total_since_last_reported / (now - self.last_reported), 'elapsed': elapsed }) self.last_reported = now self.total_since_last_reported = 0 start_time = time.time() self.last_reported = start_time self.errors = 0 self.successes = 0 paths = paths_gen(self.volume) report('started') for path in paths: safe_update_index(path) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) now = time.time() if now - self.last_reported >= self.report_interval: report('running') report('ended')
def mover_pass(self): self.namespace, self.address = check_volume(self.volume) start_time = report_time = time.time() total_errors = 0 mover_time = 0 paths = paths_gen(self.volume) for path in paths: loop_time = time.time() now = time.time() if now - self.last_usage_check >= self.usage_check_interval: used, total = statfs(self.volume) usage = (float(used) / total) * 100 if usage <= self.usage_target: self.logger.info( 'current usage %.2f%%: target reached (%.2f%%)', usage, self.usage_target) self.last_usage_check = now break self.safe_chunk_move(path) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(b_rate).2f ' '%(total).2f ' '%(mover_time).2f' '%(mover_rate).2f' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'mover_time': mover_time, 'mover_rate': mover_time / (now - start_time) }) report_time = now total_errors += self.errors self.passes = 0 self.bytes_processed = 0 self.last_reported = now mover_time += (now - loop_time) elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(chunk_rate).2f ' '%(bytes_rate).2f ' '%(mover_time).2f ' '%(mover_rate).2f' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed, 'bytes_rate': self.total_bytes_processed / elapsed, 'mover_time': mover_time, 'mover_rate': mover_time / elapsed })
def dispatch_tasks(self, job_id, job_type, job_info, job): job_config = job_info['config'] job_params = job_config['params'] tasks_per_second = job_config['tasks_per_second'] tasks_batch_size = job_config['tasks_batch_size'] last_task_id = job_info['tasks']['last_sent'] job_tasks = job.get_tasks(job_params, marker=last_task_id) beanstalkd_workers = self.get_beanstalkd_workers() tasks_run_time = 0 batch_per_second = tasks_per_second / float( tasks_batch_size) # The backend must have the tasks in order # to know the last task sent tasks = OrderedDict() for task_id, task_payload in job_tasks: if not self.running: break tasks[task_id] = task_payload if len(tasks) < tasks_batch_size: continue tasks_run_time = ratelimit( tasks_run_time, batch_per_second) sent = self.dispatch_tasks_batch( beanstalkd_workers, job_id, job_type, job_config, tasks) if sent: job_status, exc = self.handle_backend_errors( self.backend.update_tasks_sent, job_id, tasks.keys()) tasks.clear() if exc is not None: self.logger.warn( '[job_id=%s] Job has not been updated ' 'with the sent tasks: %s', job_id, exc) break if job_status == 'PAUSED': self.logger.info('Job %s is paused', job_id) return if not self.running: break else: sent = True if tasks: sent = self.dispatch_tasks_batch( beanstalkd_workers, job_id, job_type, job_config, tasks) if sent: job_status, exc = self.handle_backend_errors( self.backend.update_tasks_sent, job_id, tasks.keys(), all_tasks_sent=True) if exc is None: if job_status == 'FINISHED': self.logger.info('Job %s is finished', job_id) self.logger.info( 'Finished dispatching job (job_id=%s)', job_id) return else: self.logger.warn( '[job_id=%s] Job has not been updated ' 'with the last sent tasks: %s', job_id, exc) _, exc = self.handle_backend_errors(self.backend.free, job_id) if exc is not None: self.logger.warn( '[job_id=%s] Job has not been freed: %s', job_id, exc)
def rebuilder_pass(self): start_time = report_time = time.time() rebuilder_time = 0 chunks = self._fetch_chunks() for cid, content_id, chunk_id_or_pos, _ in chunks: loop_time = time.time() if self.dry_run: self.dryrun_chunk_rebuild(cid, content_id, chunk_id_or_pos) else: self.safe_chunk_rebuild(cid, content_id, chunk_id_or_pos) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( 'RUN %(volume)s ' 'started=%(start_time)s ' 'passes=%(passes)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s ' 'bytes=%(nb_bytes)d %(b_rate).2fB/s ' 'elapsed=%(total).2f ' '(rebuilder: %(success_rate).2f%%)' % { 'volume': self.volume, 'start_time': datetime.fromtimestamp(int(report_time)).isoformat(), 'passes': self.passes, 'errors': self.errors, 'nb_chunks': self.total_chunks_processed, 'nb_bytes': self.total_bytes_processed, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'rebuilder_time': rebuilder_time, 'success_rate': 100 * ((self.total_chunks_processed - self.errors) / float(self.total_chunks_processed)) }) report_time = now self.passes = 0 self.bytes_processed = 0 self.last_reported = now rebuilder_time += (now - loop_time) end_time = time.time() elapsed = (end_time - start_time) or 0.000001 self.logger.info( 'DONE %(volume)s ' 'started=%(start_time)s ' 'ended=%(end_time)s ' 'passes=%(passes)d ' 'elapsed=%(elapsed).02f ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s ' 'bytes=%(nb_bytes)d %(b_rate).2fB/s ' 'elapsed=%(rebuilder_time).2f ' '(rebuilder: %(success_rate).2f%%)' % { 'volume': self.volume, 'start_time': datetime.fromtimestamp(int(start_time)).isoformat(), 'end_time': datetime.fromtimestamp(int(end_time)).isoformat(), 'passes': self.passes, 'elapsed': elapsed, 'errors': self.errors, 'nb_chunks': self.total_chunks_processed, 'nb_bytes': self.total_bytes_processed, 'c_rate': self.total_chunks_processed / elapsed, 'b_rate': self.total_bytes_processed / elapsed, 'rebuilder_time': rebuilder_time, 'success_rate': 100 * ((self.total_chunks_processed - self.errors) / float(self.total_chunks_processed or 1)) })
def dispatch_tasks(self, job_id, job_type, job_info, job): job_config = job_info['config'] job_params = job_config['params'] tasks_per_second = job_config['tasks_per_second'] tasks_batch_size = job_config['tasks_batch_size'] last_task_id = job_info['tasks']['last_sent'] job_tasks = job.get_tasks(job_params, marker=last_task_id) beanstalkd_workers = self.get_beanstalkd_workers() tasks_run_time = 0 batch_per_second = tasks_per_second / float(tasks_batch_size) # The backend must have the tasks in order # to know the last task sent tasks = OrderedDict() for task_id, task_payload in job_tasks: if not self.running: break tasks[task_id] = task_payload if len(tasks) < tasks_batch_size: continue tasks_run_time = ratelimit(tasks_run_time, batch_per_second) # Make sure that the sent tasks will be saved # before being processed exc = None sent = False while not sent: (job_status, old_last_sent), exc = self.handle_backend_errors( self.backend.update_tasks_sent, job_id, tasks.keys()) if exc is not None: self.logger.warn( '[job_id=%s] Job could not update ' 'the sent tasks: %s', job_id, exc) break sent = self.dispatch_tasks_batch(beanstalkd_workers, job_id, job_type, job_config, tasks) if not sent: self.logger.warn( '[job_id=%s] Job aborting the last sent tasks', job_id) job_status, exc = self.handle_backend_errors( self.backend.abort_tasks_sent, job_id, tasks.keys(), old_last_sent) if exc is not None: self.logger.warn( '[job_id=%s] Job could not abort ' 'the last sent tasks: %s', job_id, exc) break if job_status == XcuteJobStatus.PAUSED: self.logger.info('Job %s is paused', job_id) return if not self.running: break sleep(1) if exc is not None and not self.running: break tasks.clear() else: # Make sure that the sent tasks will be saved # before being processed sent = False while not sent: (job_status, old_last_sent), exc = self.handle_backend_errors( self.backend.update_tasks_sent, job_id, tasks.keys(), all_tasks_sent=True) if exc is not None: self.logger.warn( '[job_id=%s] Job could not update ' 'the sent tasks: %s', job_id, exc) break if tasks: sent = self.dispatch_tasks_batch(beanstalkd_workers, job_id, job_type, job_config, tasks) else: sent = True if not sent: self.logger.warn( '[job_id=%s] Job aborting the last sent tasks', job_id) job_status, exc = self.handle_backend_errors( self.backend.abort_tasks_sent, job_id, tasks.keys(), old_last_sent) if exc is not None: self.logger.warn( '[job_id=%s] Job could not abort ' 'the last sent tasks: %s', job_id, exc) break else: if job_status == XcuteJobStatus.FINISHED: self.logger.info('Job %s is finished', job_id) self.logger.info('Finished dispatching job (job_id=%s)', job_id) return if job_status == XcuteJobStatus.PAUSED: self.logger.info('Job %s is paused', job_id) return if not self.running: break sleep(1) self.logger.warn('[job_id=%s] Job was stopped before it was finished', job_id) _, exc = self.handle_backend_errors(self.backend.free, job_id) if exc is not None: self.logger.warn('[job_id=%s] Job has not been freed: %s', job_id, exc)
def mover_pass(self, **kwargs): start_time = report_time = time.time() total_errors = 0 mover_time = 0 pool = GreenPool(self.concurrency) paths = paths_gen(self.volume) for path in paths: loop_time = time.time() now = time.time() if now - self.last_usage_check >= self.usage_check_interval: free_ratio = statfs(self.volume) usage = (1 - float(free_ratio)) * 100 if usage <= self.usage_target: self.logger.info( 'current usage %.2f%%: target reached (%.2f%%)', usage, self.usage_target) break self.last_usage_check = now # Spawn a chunk move task. # The call will block if no green thread is available. pool.spawn_n(self.safe_chunk_move, path) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(b_rate).2f ' '%(total).2f ' '%(mover_time).2f' '%(mover_rate).2f' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'mover_time': mover_time, 'mover_rate': mover_time / (now - start_time) }) report_time = now total_errors += self.errors self.passes = 0 self.bytes_processed = 0 self.last_reported = now mover_time += (now - loop_time) if self.limit != 0 and self.total_chunks_processed >= self.limit: break pool.waitall() elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(chunk_rate).2f ' '%(bytes_rate).2f ' '%(mover_time).2f ' '%(mover_rate).2f' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed, 'bytes_rate': self.total_bytes_processed / elapsed, 'mover_time': mover_time, 'mover_rate': mover_time / elapsed })