def wait_for_score(self, types, timeout=20.0, score_threshold=35): """Wait for services to have a score greater than the threshold.""" deadline = time.time() + timeout while time.time() < deadline: wait = False for type_ in types: try: all_svcs = self.conscience.all_services(type_) for service in all_svcs: if int(service['score']) < score_threshold: wait = True break else: # No service registered yet, must wait. if not all_svcs: wait = True except Exception as err: logging.warn('Could not check service score: %s', err) wait = True if wait: # No need to check other types, we have to wait anyway break if not wait: return time.sleep(1) logging.info('Service(s) fails to reach %d score (timeout %d)', score_threshold, timeout)
def converter_pass(self, input_file=None): def report(tag, now=None): if now is None: now = time.time() total_time = now - self.start_time self.logger.info( '%(tag)s %(volume)s ' 'started=%(start_time)s ' 'passes=%(passes)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s ' 'total_time=%(total_time).2f ' '(converter: %(success_rate).2f%%)' % { 'tag': tag, 'volume': self.volume_id, 'start_time': datetime.fromtimestamp(int(self.start_time)).isoformat(), 'passes': self.passes, 'errors': self.errors, 'nb_chunks': self.total_chunks_processed, 'c_rate': self.total_chunks_processed / total_time, 'total_time': total_time, 'success_rate': 100 * ((self.total_chunks_processed - self.errors) / (float(self.total_chunks_processed) or 1.0)) }) self.passes = 0 self.last_reported = now self.start_time = time.time() self.errors = 0 self.passes = 0 self.backup_name = 'backup_%s_%f' % (self.volume_id, self.start_time) paths = self.paths_gen(input_file=input_file) for path in paths: self.safe_convert_chunk(path) now = time.time() if now - self.last_reported >= self.report_interval: report('RUN', now=now) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) report('DONE') return self.errors == 0
def wait_for_ready_job(self, tube, timeout=float('inf'), poll_interval=0.2): """ Wait until the the specified tube has a ready job, or the timeout expires. """ self.use(tube) job_id, data = self.peek_ready() deadline = time.time() + timeout while job_id is None and time.time() < deadline: time.sleep(poll_interval) job_id, data = self.peek_ready() return job_id, data
def run(self): """ Main worker loop """ self.start_time = time.time() while not self._stop: try: self.crawl_volume() self.last_scan_time = time.time() time.sleep(self.scans_interval) except exc.OioException as exception: self.logger.exception("ERROR during indexing meta2: %s", exception)
def run(self): self.tool.start_time = self.tool.last_report = time.time() self.tool.log_report('START', force=True) reply_loc = { 'addr': self.beanstalkd_reply.addr, 'tube': self.beanstalkd_reply.tube } # pylint: disable=no-member thread = threading.Thread(target=self._distribute_events, args=[reply_loc]) thread.start() # Wait until the thread is started sending events while self.sending is None: sleep(0.1) # Retrieve responses until all events are processed try: while not self._all_events_are_processed(): tasks_res = self.beanstalkd_reply.fetch_job( self._tasks_res_from_res_event, timeout=DISTRIBUTED_DISPATCHER_TIMEOUT) for task_res in tasks_res: self.tool.update_counters(task_res) yield task_res self.tool.log_report('RUN') except OioTimeout: self.logger.error('No response for %d seconds', DISTRIBUTED_DISPATCHER_TIMEOUT) self.tool.success = False except Exception: # pylint: disable=broad-except self.logger.exception('ERROR in distributed dispatcher') self.tool.success = False self.tool.log_report('DONE', force=True)
def report(self, tag): """ Log the status of indexer :param tag: One of three: starting, running, ended. """ total = self.success_nb + self.failed_nb now = time.time() elapsed = (now - self.start_time) or 0.00001 since_last_rprt = (now - self.last_report_time) or 0.00001 self.logger.info( 'volume_id=%(volume_id)s %(tag)s=%(current_time)s ' 'elapsed=%(elapsed).02f ' 'pass=%(pass)d ' 'errors=%(errors)d ' 'containers_indexed=%(total_indexed)d %(index_rate).2f/s', { 'volume_id': self.volume_id, 'tag': tag, 'current_time': datetime.fromtimestamp( int(now)).isoformat(), 'pass': self.full_scan_nb, 'errors': self.failed_nb, 'total_indexed': total, 'index_rate': self.indexed_since_last_report / since_last_rprt, 'elapsed': elapsed } ) self.last_report_time = now self.indexed_since_last_report = 0
def run(self, *args, **kwargs): time.sleep(random() * self.interval) while True: pre = time.time() try: self.index_pass() except exc.VolumeException as err: self.logger.error('Cannot index chunks, will retry later: %s', err) except Exception as err: self.logger.exception('ERROR during indexing: %s', err) else: self.passes += 1 elapsed = (time.time() - pre) or 0.000001 if elapsed < self.interval: time.sleep(self.interval - elapsed)
def __init__(self, conf, logger=None, **kwargs): self.conf = conf self.logger = logger or get_logger(conf) volume = conf.get('volume') if not volume: raise ConfigurationException('No volume specified for converter') self.volume = volume self.namespace, self.volume_id = check_volume(self.volume) # cache self.name_by_cid = CacheDict() self.content_id_by_name = CacheDict() # client self.container_client = ContainerClient(conf, **kwargs) self.content_factory = ContentFactory(conf, self.container_client, logger=self.logger) self._rdir = None # we may never need it # stats/logs self.errors = 0 self.passes = 0 self.total_chunks_processed = 0 self.start_time = 0 self.last_reported = 0 self.report_interval = int_value(conf.get('report_interval'), 3600) # speed self.chunks_run_time = 0 self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) # backup self.no_backup = true_value(conf.get('no_backup', False)) self.backup_dir = conf.get('backup_dir') or tempfile.gettempdir() self.backup_name = 'backup_%s_%f' \ % (self.volume_id, time.time()) # dry run self.dry_run = true_value(conf.get('dry_run', False))
def wait_until_empty(self, tube, timeout=float('inf'), poll_interval=0.2, initial_delay=0.0): """ Wait until the the specified tube is empty, or the timeout expires. """ # TODO(FVE): check tube stats to ensure some jobs have passed through # and then get rid of the initial_delay # peek-ready requires "use", not "watch" self.use(tube) if initial_delay > 0.0: time.sleep(initial_delay) job_id, _ = self.peek_ready() deadline = time.time() + timeout while job_id is not None and time.time() < deadline: time.sleep(poll_interval) job_id, _ = self.peek_ready()
def report(tag, now=None): if now is None: now = time.time() total_time = now - self.start_time self.logger.info( '%(tag)s %(volume)s ' 'started=%(start_time)s ' 'passes=%(passes)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s ' 'total_time=%(total_time).2f ' '(converter: %(success_rate).2f%%)' % { 'tag': tag, 'volume': self.volume_id, 'start_time': datetime.fromtimestamp(int(self.start_time)).isoformat(), 'passes': self.passes, 'errors': self.errors, 'nb_chunks': self.total_chunks_processed, 'c_rate': self.total_chunks_processed / total_time, 'total_time': total_time, 'success_rate': 100 * ((self.total_chunks_processed - self.errors) / (float(self.total_chunks_processed) or 1.0)) }) self.passes = 0 self.last_reported = now
def run(self): self.tool.start_time = self.tool.last_report = time.time() self.tool.log_report('START', force=True) try: with ContextPool(len(self.workers) + 1) as pool: # spawn workers for worker in self.workers: pool.spawn(worker.run) # spawn one worker to fill the queue pool.spawn(self._fill_queue_and_wait_all_items) # with the main thread while True: task_res = self.queue_reply.get() if task_res is None: # end signal break self.tool.update_counters(task_res) yield task_res self.tool.log_report('RUN') except Exception: # pylint: disable=broad-except self.logger.exception('ERROR in local dispatcher') self.tool.success = False self.tool.log_report('DONE', force=True)
def rebuilder_pass(self, **kwargs): self.start_time = self.last_report = time.time() self.log_report('START', force=True) workers = list() with ContextPool(self.nworkers + 1) as pool: # spawn one worker for the retry queue rqueue = eventlet.Queue(self.nworkers) pool.spawn(self._read_retry_queue, rqueue, **kwargs) # spawn workers to rebuild queue = eventlet.Queue(self.nworkers * 10) for i in range(self.nworkers): worker = self._create_worker(**kwargs) workers.append(worker) pool.spawn(worker.rebuilder_pass, i, queue, retry_queue=rqueue, **kwargs) # fill the queue (with the main thread) self._fill_queue(queue, **kwargs) # block until all items are rebuilt queue.join() # block until the retry queue is empty rqueue.join() self.log_report('DONE', force=True) return self.total_errors == 0
def log_report(self, status, force=False): """ Log a report with a fixed interval. """ end_time = time.time() if force or (end_time - self.last_report >= self.report_interval): counters = self._update_total_counters() self.logger.info(self._get_report(status, end_time, counters)) self.last_report = end_time
def access_log_wrapper(self, req, *args, **kwargs): code = -1 pre = time.time() try: rc = func(self, req, *args, **kwargs) code = rc._status_code return rc except HTTPException as exc: code = exc.code raise finally: post = time.time() # remote method code time size user reqid uri self.logger.info('%s %s %d %d %s %s %s %s', req.environ['HTTP_HOST'], req.environ['REQUEST_METHOD'], code, int((post - pre) * 1000000), '-', '-', '-', req.environ['RAW_URI'])
def index_pass(self): start_time = time.time() self.last_reported = start_time self.errors = 0 self.successes = 0 paths = paths_gen(self.volume) self.report('started', start_time) for path in paths: self.safe_update_index(path) self.chunks_run_time = ratelimit( self.chunks_run_time, self.max_chunks_per_second ) now = time.time() if now - self.last_reported >= self.report_interval: self.report('running', start_time) self.report('ended', start_time)
def wait_for_event(self, tube, reqid=None, types=None, fields=None, timeout=30.0): """ Wait for an event in the specified tube. If reqid, types and/or fields are specified, drain events until the specified event is found. :param fields: dict of fields to look for in the event's URL :param types: list of types of events the method should look for """ self.beanstalkd0.wait_for_ready_job(tube, timeout=timeout) self.beanstalkd0.watch(tube) now = time.time() deadline = now + timeout try: job_id = True while now < deadline: to = max(0.0, deadline - now) job_id, data = self.beanstalkd0.reserve(timeout=to) event = Event(jsonlib.loads(data)) self.beanstalkd0.delete(job_id) now = time.time() if types and event.event_type not in types: logging.debug("ignore event %s (event mismatch)", data) continue if reqid and event.reqid != reqid: logging.info("ignore event %s (request_id mismatch)", data) continue if fields and any(fields[k] != event.url.get(k) for k in fields): logging.info("ignore event %s (filter mismatch)", data) continue logging.info("event %s", data) return event logging.warn( ('wait_for_event(reqid=%s, types=%s, fields=%s, timeout=%s) ' 'reached its timeout'), reqid, types, fields, timeout) except ResponseError as err: logging.warn('%s', err) return None
def __init__(self, conf, logger, volume, container_ids): self.conf = conf self.logger = logger self.volume = volume self.volume_ns, self.volume_id = check_volume(self.volume) self.container_ids = container_ids or list() self.container_ids = [ container_id.upper() for container_id in self.container_ids ] self.namespace = self.conf['namespace'] if self.namespace != self.volume_ns: raise ValueError( 'Namespace (%s) mismatch with volume namespace (%s)', self.namespace, self.volume_ns) # action self.action_name = self.conf['action'].lower() if (self.action_name == 'insert'): self.action = self._insert_bean elif (self.action_name == 'update'): self.action = self._update_bean elif (self.action_name == 'check'): self.action = self._check_bean else: raise ValueError('Unknown action (%s)', self.action_name) # speed self.chunks_run_time = 0 self.max_chunks_per_second = int_value( self.conf.get('chunks_per_second'), self.DEFAULT_CHUNKS_PER_SECOND) # counters self.chunks_processed = 0 self.chunk_errors = 0 self.beans_processed = dict() self.bean_successes = dict() self.bean_already_exists = dict() self.bean_orphans = dict() self.bean_errors = dict() for bean_type in self.BEAN_TYPES: self.beans_processed[bean_type] = 0 self.bean_successes[bean_type] = 0 self.bean_already_exists[bean_type] = 0 self.bean_orphans[bean_type] = 0 self.bean_errors[bean_type] = 0 # report self.start_time = 0 self.last_report = 0 self.report_interval = int_value(conf.get('report_interval'), self.DEFAULT_REPORT_INTERVAL) self.client = ContainerClient({'namespace': self.namespace}, logger=self.logger) self.ctime = int(time.time())
def wait_for_event(self, tube, reqid=None, type_=None, fields=None, timeout=30.0): """ Wait for an event in the specified tube. If reqid, type_ and/or fields are specified, drain events until the specified event is found. """ self.beanstalkd0.wait_for_ready_job(tube, timeout=timeout) self.beanstalkd0.watch(tube) now = time.time() deadline = now + timeout try: job_id = True while now < deadline: to = max(0.0, deadline - now) job_id, data = self.beanstalkd0.reserve(timeout=to) edata = jsonlib.loads(data) self.beanstalkd0.delete(job_id) now = time.time() if type_ and edata['event'] != type_: logging.debug("ignore event %s (event mismatch)", data) continue if reqid and edata.get('request_id') != reqid: logging.info("ignore event %s (request_id mismatch)", data) continue if fields and any(fields[k] != edata.get('url', {}).get(k) for k in fields): logging.info("ignore event %s (filter mismatch)", data) continue logging.info("event %s", data) return edata logging.warn( ('wait_for_event(reqid=%s, type_=%s, fields=%s, timeout=%s) ' 'reached its timeout'), reqid, type_, fields, timeout) except ResponseError as err: logging.info('%s', err) return None
def log_report(self, status, force=False, **kwargs): end_time = time.time() if (force and self.lock_report.acquire()) \ or (end_time - self.last_report >= self.report_interval and self.lock_report.acquire(False)): try: counters = self.update_totals() self.logger.info( self._get_report(status, end_time, counters, **kwargs)) self.last_report = end_time finally: self.lock_report.release()
def resolve_service_id(self, service_type, service_id, check_format=True): """ :returns: Service address corresponding to the service ID """ if check_format: url = "http://" + service_id parsed = urlparse(url) if parsed.port is not None: return service_id cached_service_id = self._service_ids.get(service_id) if cached_service_id \ and (time.time() - cached_service_id['mtime'] < self._service_id_max_age): return cached_service_id['addr'] result = self.resolve(srv_type=service_type, service_id=service_id) service_addr = result['addr'] self._service_ids[service_id] = { 'addr': service_addr, 'mtime': time.time() } return service_addr
def crawl_volume(self): """ Crawl the volume assigned to this worker, and index every database. """ paths = paths_gen(self.volume) self.full_scan_nb += 1 self.success_nb = 0 self.failed_nb = 0 now = time.time() self.last_report_time = now self.report("starting") for db_path in paths: # Graceful exit, hopefully if self._stop: break db_id = db_path.rsplit("/")[-1].rsplit(".") if len(db_id) != 3: self.warn("Malformed db file name !", db_path) continue db_id = ".".join(db_id[:2]) self.index_meta2_database(db_id) self.last_index_time = ratelimit( self.last_index_time, self.max_indexed_per_second ) now = time.time() if now - self.last_report_time >= self.report_interval: self.report("running") self.report("ended")
def get_tasks(self, job_params, marker=None): service_id = job_params['service_id'] usage_target = job_params['usage_target'] usage_check_interval = job_params['usage_check_interval'] if usage_target > 0: now = time.time() current_usage = self.get_usage(service_id) if current_usage <= usage_target: self.logger.info( 'current usage %.2f%%: target already reached (%.2f%%)', current_usage, usage_target) return last_usage_check = now chunk_infos = self.get_chunk_infos(job_params, marker=marker) for container_id, content_id, chunk_id, _ in chunk_infos: task_id = '|'.join((container_id, content_id, chunk_id)) yield task_id, { 'container_id': container_id, 'content_id': content_id, 'chunk_id': chunk_id } if usage_target <= 0: continue now = time.time() if now - last_usage_check < usage_check_interval: continue current_usage = self.get_usage(service_id) if current_usage > usage_target: last_usage_check = now continue self.logger.info('current usage %.2f%%: target reached (%.2f%%)', current_usage, usage_target) return
def wait_for_event(self, tube, reqid=None, type_=None, timeout=30.0): """ Wait for an event in the specified tube. If reqid and/or type_ are specified, drain events until the specified event is found. """ self.beanstalkd0.wait_for_ready_job(tube, timeout=timeout) self.beanstalkd0.watch(tube) now = time.time() deadline = now + timeout try: job_id = True while now < deadline: to = max(0.0, deadline - now) job_id, data = self.beanstalkd0.reserve(timeout=to) edata = jsonlib.loads(data) self.beanstalkd0.delete(job_id) if not type_ or edata['event'] == type_: if not reqid or edata.get('request_id') == reqid: return edata now = time.time() except ResponseError as err: logging.info('%s', err) return None
def index_meta2_database(self, db_id): """ Add a meta2 database to the rdir index. Fails if the database isn't handled by the current volume. :param db_id: The ContentID representing the reference to the database. """ if len(db_id) < STRLEN_REFERENCEID: self.warn('Not a valid container ID', db_id) return try: srvcs = self.dir_client.list(cid=db_id) account, container = srvcs['account'], srvcs['name'] is_peer = self.volume_id in [ x['host'] for x in srvcs['srv'] if x['type'] == 'meta2' ] container_id = db_id.rsplit(".")[0] if six.PY2: if isinstance(account, six.text_type): account = account.encode('utf-8') if isinstance(container, six.text_type): container = container.encode('utf-8') cont_url = "{0}/{1}/{2}".format(self.namespace, account, container) if not is_peer: self.warn( "Trying to index a container that isn't handled by" "this volume", db_id) if self.attempt_bad_index_removal: self._attempt_index_removal(cont_url, container_id) return self.index_client.meta2_index_push(volume_id=self.volume_id, container_url=cont_url, mtime=time.time(), container_id=container_id) self.success_nb += 1 except exc.OioException as exception: self.failed_nb += 1 self.warn("Unable to to index container: %s" % str(exception), db_id) self.indexed_since_last_report += 1
def update_index(self, path, chunk_id): with open(path) as file_: try: meta = None if meta is None: meta, _ = read_chunk_metadata(file_, chunk_id) except exc.MissingAttribute as err: raise exc.FaultyChunk(err) data = {'mtime': int(time.time())} headers = {REQID_HEADER: request_id('blob-indexer-')} self.index_client.chunk_push(self.volume_id, meta['container_id'], meta['content_id'], meta['chunk_id'], headers=headers, **data)
def task_event_from_item(item): namespace, container_id, content_id, chunk_id_or_pos = item return \ { 'when': time.time(), 'event': EventTypes.CONTENT_BROKEN, 'url': { 'ns': namespace, 'id': container_id, 'content': content_id }, 'data': { 'missing_chunks': [ chunk_id_or_pos ] } }
def res_event_from_task_res(task_res): item, bytes_processed, error = task_res namespace, container_id, content_id, chunk_id_or_pos = item return \ { 'when': time.time(), 'event': EventTypes.CONTENT_REBUILT, 'url': { 'ns': namespace, 'id': container_id, 'content': content_id }, 'data': { 'chunks_rebuilt': [{ 'chunk_id_or_pos': chunk_id_or_pos, 'bytes_processed': bytes_processed, 'error': error }] } }
def pass_volume(self): self.start_time = self.last_report = time.time() self.log_report('START', force=True) paths = paths_gen(self.volume) for path in paths: try: self.pass_chunk_file(path) self.chunks_processed += 1 except Exception as exc: self.logger.error( 'Failed to pass chunk file (chunk_file=%s): %s', path, exc) self.chunk_errors += 1 self.log_report('RUN') self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.log_report('DONE', force=True) return self.chunk_errors == 0 \ and all(errors == 0 for errors in self.bean_errors.values())
def report(self, tag, start_time): total = self.errors + self.successes now = time.time() elapsed = (now - start_time) or 0.000001 self.logger.info( '%(tag)s=%(current_time)s ' 'elapsed=%(elapsed).02f ' 'pass=%(pass)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s' % { 'tag': tag, 'current_time': datetime.fromtimestamp( int(now)).isoformat(), 'pass': self.passes, 'errors': self.errors, 'nb_chunks': total, 'c_rate': self.total_since_last_reported / (now - self.last_reported), 'elapsed': elapsed } ) self.last_reported = now self.total_since_last_reported = 0
def rebuilder_pass(self, **kwargs): self.start_time = self.last_report = time.time() self.log_report('START', force=True) workers = list() with ContextPool(self.concurrency + 1) as pool: # spawn one worker for the retry queue rqueue = eventlet.Queue(self.concurrency) pool.spawn(self._read_retry_queue, rqueue, **kwargs) # spawn workers to rebuild queue = eventlet.Queue(self.concurrency * 10) for i in range(self.concurrency): worker = self._create_worker(**kwargs) workers.append(worker) pool.spawn(worker.rebuilder_pass, i, queue, retry_queue=rqueue, **kwargs) # fill the queue (with the main thread) try: self._fill_queue(queue, **kwargs) except Exception as exc: if self.running: self.logger.error("Failed to fill queue: %s", exc) self.success = False # block until all items are rebuilt queue.join() # block until the retry queue is empty rqueue.join() self.log_report('DONE', force=True) return self.success and self.total_errors == 0