def list(self): count = 0 while True: queues = self._redis.xreadgroup(groupname=STREAM_GROUP, consumername=CONSUMER_NAME, streams={self._name: '>'}, count=1, block=round(self._timeout_ms)) if not queues: queues = self._claim_olds() stats.set_gauge(['redis', self._name_str, 'nb_messages'], 0) if queues is None and self._stop_if_empty: break if queues: for redis_message in queues: queue_name, queue_messages = redis_message assert queue_name == self._name for message in queue_messages: id_, body = message try: tile = decode_message(body[b'message'], from_redis=True, sqs_message=id_) yield tile except Exception: logger.warning('Failed decoding the Redis message', exc_info=True) stats.increment_counter(['redis', self._name_str, 'decode_error']) count += 1 if count % 100 == 0: stats.set_gauge(['redis', self._name_str, 'nb_messages'], self._redis.xlen(name=self._name))
def _claim_olds(self): pendings = self._master.xpending_range( name=self._name, groupname=STREAM_GROUP, min="-", max="+", count=10 ) if not pendings: # None means there is nothing pending at all return None to_steal = [] to_drop = [] for pending in pendings: if int(pending["time_since_delivered"]) >= self._pending_timeout_ms: id_ = pending["message_id"] nb_retries = int(pending["times_delivered"]) if nb_retries < self._max_retries: logger.info( "A message has been pending for too long. Stealing it (retry #%d): %s", nb_retries, id_, ) to_steal.append(id_) else: logger.warning( "A message has been pending for too long and retried too many times. Dropping it: %s", id_, ) to_drop.append(id_) if to_drop: drop_messages = self._master.xclaim( name=self._name, groupname=STREAM_GROUP, consumername=CONSUMER_NAME, min_idle_time=self._pending_timeout_ms, message_ids=to_drop, ) drop_ids = [drop_message[0] for drop_message in drop_messages] self._master.xack(self._name, STREAM_GROUP, *drop_ids) self._master.xdel(self._name, *drop_ids) for _, drop_message in drop_messages: tile = decode_message(drop_message[b"message"]) self._master.xadd( name=self._errors_name, fields=dict(tilecoord=str(tile.tilecoord)), maxlen=self._max_errors_nb, ) stats.increment_counter(["redis", self._name_str, "dropped"], len(to_drop)) if to_steal: messages = self._master.xclaim( name=self._name, groupname=STREAM_GROUP, consumername=CONSUMER_NAME, min_idle_time=self._pending_timeout_ms, message_ids=to_steal, ) stats.increment_counter(["redis", self._name_str, "stolen"], len(to_steal)) return [[self._name, messages]] else: # Empty means there are pending jobs, but they are not old enough to be stolen return []
def init(config: pyramid.config.Configurator) -> None: if os.path.isfile(VERSIONS_PATH): versions = _read_versions() config.add_route("c2c_versions", _utils.get_base_path(config) + r"/versions.json", request_method="GET") config.add_view(lambda request: versions, route_name="c2c_versions", renderer="fast_json", http_cache=0) LOG.info("Installed the /versions.json service") git_hash = versions['main']['git_hash'] if 'git_tag' in versions['main']: LOG.warning("Starting version %s (%s)", versions['main']['git_tag'], git_hash) else: LOG.warning("Starting version %s", git_hash) if stats.USE_TAGS: stats.increment_counter(['version'], 1, tags=dict(version=git_hash)) else: stats.increment_counter(['version', git_hash], 1)
def list(self): count = 0 while True: queues = self._redis.xreadgroup(groupname=STREAM_GROUP, consumername=CONSUMER_NAME, streams={self._name: '>'}, count=1, block=round(self._timeout_ms)) if not queues: queues = self._claim_olds() stats.set_gauge(['redis', self._name_str, 'nb_messages'], 0) if queues is None and self._stop_if_empty: break if queues: for redis_message in queues: queue_name, queue_messages = redis_message assert queue_name == self._name for message in queue_messages: id_, body = message try: tile = decode_message(body[b'message'], from_redis=True, sqs_message=id_) yield tile except Exception: logger.warning('Failed decoding the Redis message', exc_info=True) stats.increment_counter( ['redis', self._name_str, 'decode_error']) count += 1 if count % 100 == 0: stats.set_gauge(['redis', self._name_str, 'nb_messages'], self._redis.xlen(name=self._name))
def __call__(self, request: pyramid.request.Request) -> str: for binding in _get_bindings(self.session, EngineType.READ_AND_WRITE): with binding as session: if stats.USE_TAGS: key = ["sql", "manual", "health_check", "alembic"] tags: Optional[Dict[str, str]] = dict(conf=alembic_ini_path, con=binding.name()) else: key = [ "sql", "manual", "health_check", "alembic", alembic_ini_path, binding.name(), ] tags = None with stats.timer_context(key, tags): quote = session.bind.dialect.identifier_preparer.quote (actual_version,) = session.execute( "SELECT version_num FROM " # nosec f"{quote(version_schema)}.{quote(version_table)}" ).fetchone() if stats.USE_TAGS: stats.increment_counter( ["alembic_version"], 1, tags=dict(version=actual_version, name=name) ) else: stats.increment_counter(["alembic_version", name, actual_version], 1) if actual_version != version_: raise Exception( f"Invalid alembic version (db: {actual_version}, code: {version_})" ) return version_
def evaluate(self, root_dir: str, files: List[str]): extension_len = len(self._extension) + 1 dest_dir = self._get_dest_dir(root_dir) LOG.info( "Evaluating templates %s -> %s with data keys: %s", root_dir, dest_dir, ", ".join(self._data.keys()), ) for sub_path in files: src_path = os.path.join(root_dir, sub_path) dest_path = os.path.join(dest_dir, sub_path) os.makedirs(os.path.dirname(dest_path), exist_ok=True) if src_path.endswith("." + self._extension): dest_path = dest_path[:-extension_len] LOG.debug("Evaluating template: %s -> %s", src_path, dest_path) try: self._evaluate_file(src_path, dest_path) except Exception: LOG.warning("Failed applying the %s template: %s", self._config["type"], src_path, exc_info=True) stats.increment_counter( ["source", self._source_id, self.get_type(), "error"]) elif src_path != dest_path and not os.path.isdir( src_path) and not os.path.exists(dest_path): os.link(src_path, dest_path)
def hello_get(_): """ Will use the slave. """ with timer_context(["sql", "read_hello"]): hello = models.DBSession.query(models.Hello).first() increment_counter(["test", "counter"]) set_gauge(["test", "gauge/s"], 42, tags={"value": 24, "toto": "tutu"}) return {"value": hello.value}
def hello_get(request): """ Will use the slave """ with timer_context(['sql', 'read_hello']): hello = models.DBSession.query(models.Hello).first() increment_counter(['test', 'counter']) set_gauge(['test', 'gauge/s'], 42, tags={'value': 24, 'toto': 'tutu'}) return {'value': hello.value}
def fetch(self): try: self._is_loaded = False with stats.timer_context(["source", self.get_id(), "fetch"]): self._do_fetch() self._eval_templates() except Exception: LOG.exception("Error with source %s", self.get_id()) stats.increment_counter(["source", self.get_id(), "error"]) raise finally: self._is_loaded = True
def refresh(self): LOG.info("Doing a refresh of %s", self.get_id()) try: self._is_loaded = False with stats.timer_context(["source", self.get_id(), "refresh"]): self._do_refresh() self._eval_templates() except Exception: LOG.exception("Error with source %s", self.get_id()) stats.increment_counter(["source", self.get_id(), "error"]) raise finally: self._is_loaded = True
def _run_one( check: Callable[[pyramid.request.Request], Any], is_auth: bool, level: int, name: str, request: pyramid.request.Request, results: Dict[str, Dict[str, Any]], ) -> None: start = time.monotonic() try: result = check(request) results["successes"][name] = {"timing": time.monotonic() - start, "level": level} if result is not None: results["successes"][name]["result"] = result if stats.USE_TAGS: stats.increment_counter(["health_check"], 1, tags=dict(name=name, outcome="success")) else: stats.increment_counter(["health_check", name, "success"], 1) except Exception as e: # pylint: disable=broad-except if stats.USE_TAGS: stats.increment_counter(["health_check"], 1, tags=dict(name=name, outcome="failure")) else: stats.increment_counter(["health_check", name, "failure"], 1) LOG.warning("Health check %s failed", name, exc_info=True) failure = {"message": str(e), "timing": time.monotonic() - start, "level": level} if isinstance(e, JsonCheckException) and e.json_data() is not None: failure["result"] = e.json_data() if is_auth or os.environ.get("DEVELOPMENT", "0") != "0": failure["stacktrace"] = traceback.format_exc() results["failures"][name] = failure
def check(request: pyramid.request.Request) -> Dict[str, Any]: all_versions = _get_all_versions() assert all_versions versions = [e for e in all_versions if e is not None] # Output the versions we see on the monitoring v: Optional[str] for v, count in Counter(versions).items(): if stats.USE_TAGS: stats.increment_counter(["version"], count, tags=dict(version=v)) else: stats.increment_counter(["version", v], count) ref = versions[0] assert all(v == ref for v in versions), "Non identical versions: " + ", ".join(versions) return dict(version=ref, count=len(versions))
def _view(self, request: pyramid.request.Request) -> Mapping[str, Any]: max_level = int(request.params.get('max_level', '1')) is_auth = auth.is_auth(request) results: Dict[str, Dict[str, Any]] = { 'failures': {}, 'successes': {}, } checks = None if 'checks' in request.params: if request.params['checks'] != '': checks = request.params['checks'].split(',') for name, check, level in self._checks: if level <= max_level and (checks is None or name in checks): start = time.monotonic() try: result = check(request) results['successes'][name] = { 'timing': time.monotonic() - start, 'level': level } if result is not None: results['successes'][name]['result'] = result if stats.USE_TAGS: stats.increment_counter(['health_check'], 1, tags=dict(name=name, outcome='success')) else: stats.increment_counter( ['health_check', name, 'success'], 1) except Exception as e: if stats.USE_TAGS: stats.increment_counter(['health_check'], 1, tags=dict(name=name, outcome='failure')) else: stats.increment_counter( ['health_check', name, 'failure'], 1) LOG.warning("Health check %s failed", name, exc_info=True) failure = { 'message': str(e), 'timing': time.monotonic() - start, 'level': level } if isinstance( e, JsonCheckException) and e.json_data() is not None: failure['result'] = e.json_data() if is_auth or os.environ.get('DEVELOPMENT', '0') != '0': failure['stacktrace'] = traceback.format_exc() results['failures'][name] = failure if results['failures']: request.response.status = 500 return results
def list(self) -> Iterator[Tile]: count = 0 while True: queues = self._master.xreadgroup( groupname=STREAM_GROUP, consumername=CONSUMER_NAME, streams={self._name: ">"}, count=1, block=round(self._timeout_ms), ) logger.debug("Get %d new elements", len(queues)) if not queues: queues = self._claim_olds() if queues is None: stats.set_gauge(["redis", self._name_str, "nb_messages"], 0) stats.set_gauge(["redis", self._name_str, "pending"], 0) if queues is None and self._stop_if_empty: break if queues: for redis_message in queues: queue_name, queue_messages = redis_message assert queue_name == self._name for message in queue_messages: id_, body = message try: tile = decode_message(body[b"message"], from_redis=True, sqs_message=id_) yield tile except Exception: logger.warning("Failed decoding the Redis message", exc_info=True) stats.increment_counter( ["redis", self._name_str, "decode_error"]) count += 1 if count % 10 == 0: stats.set_gauge( ["redis", self._name_str, "nb_messages"], self._slave.xlen(name=self._name), ) pending = self._slave.xpending( self._name, STREAM_GROUP) # type: ignore stats.set_gauge(["redis", self._name_str, "pending"], pending["pending"])
def check(request: pyramid.request.Request) -> Any: versions = _get_all_versions() versions = list(filter(lambda x: x is not None, versions)) assert len(versions) > 0 # output the versions we see on the monitoring for v, count in Counter(versions).items(): if stats.USE_TAGS: stats.increment_counter(['version'], count, tags=dict(version=v)) else: stats.increment_counter(['version', v], count) ref = versions[0] assert all( v == ref for v in versions), "Non identical versions: " + ", ".join(versions) return dict(version=ref, count=len(versions))
def check(_request: Any) -> str: for binding in _get_bindings(session): prev_bind = session.bind try: session.bind = binding if stats.USE_TAGS: key = ['sql', 'manual', 'health_check', 'alembic'] tags: Optional[Dict[str, str]] = dict(conf=alembic_ini_path, con=binding.c2c_name) else: key = [ 'sql', 'manual', 'health_check', 'alembic', alembic_ini_path, binding.c2c_name ] tags = None with stats.timer_context(key, tags): quote = session.bind.dialect.identifier_preparer.quote actual_version, = session.execute( "SELECT version_num FROM {schema}.{table}" . # nosec format(schema=quote(version_schema), table=quote(version_table))).fetchone() if stats.USE_TAGS: stats.increment_counter(['alembic_version'], 1, tags=dict( version=actual_version, name=name)) else: stats.increment_counter( ['alembic_version', name, actual_version], 1) if actual_version != version_: raise Exception( "Invalid alembic version: %s != %s" % (actual_version, version_)) finally: session.bind = prev_bind return version_
def _claim_olds(self): pendings = self._redis.xpending_range(name=self._name, groupname=STREAM_GROUP, min='-', max='+', count=10) if not pendings: # None means there is nothing pending at all return None to_steal = [] to_drop = [] for pending in pendings: if int(pending['time_since_delivered']) >= self._pending_timeout_ms: id_ = pending['message_id'] nb_retries = int(pending['times_delivered']) if nb_retries < self._max_retries: logger.info('A message has been pending for too long. Stealing it (retry #%d): %s', nb_retries, id_) to_steal.append(id_) else: logger.warning( 'A message has been pending for too long and retried too many times. Dropping it: %s', id_) to_drop.append(id_) if to_drop: drop_messages = self._redis.xclaim(name=self._name, groupname=STREAM_GROUP, consumername=CONSUMER_NAME, min_idle_time=self._pending_timeout_ms, message_ids=to_drop) drop_ids = [drop_message[0] for drop_message in drop_messages] self._redis.xack(self._name, STREAM_GROUP, *drop_ids) self._redis.xdel(self._name, *drop_ids) for drop_id, drop_message in drop_messages: tile = decode_message(drop_message[b'message']) self._redis.xadd(name=self._errors_name, fields=dict(tilecoord=str(tile.tilecoord)), maxlen=self._max_errors_nb) stats.increment_counter(['redis', self._name_str, 'dropped'], len(to_drop)) if to_steal: messages = self._redis.xclaim(name=self._name, groupname=STREAM_GROUP, consumername=CONSUMER_NAME, min_idle_time=self._pending_timeout_ms, message_ids=to_steal) stats.increment_counter(['redis', self._name_str, 'stolen'], len(to_steal)) return [[self._name, messages]] else: # Empty means there are pending jobs, but they are not old enough to be stolen return []
def _do_fetch(self): path = self.get_path() url = mode.get_fetch_url(self.get_id(), self._config["key"]) while True: try: LOG.info("Doing a fetch of %s", self.get_id()) response = requests.get(url, stream=True) response.raise_for_status() if os.path.exists(path): shutil.rmtree(path) os.makedirs(path, exist_ok=True) with subprocess.Popen( [ "tar", "--extract", "--gzip", "--no-same-owner", "--no-same-permissions", "--touch", "--no-overwrite-dir", ], cwd=path, stdin=subprocess.PIPE, ) as tar: shutil.copyfileobj(response.raw, tar.stdin) # type: ignore tar.stdin.close() # type: ignore assert tar.wait() == 0 return except Exception as exception: stats.increment_counter( ["source", self.get_id(), "fetch_error"]) LOG.warning( "Error fetching the source %s from the master (will retry in 1s): %s", self.get_id(), str(exception), ) time.sleep(1)
def add_stats(tile): if tile and tile.error: stats.increment_counter( ['error', tile.metadata.get('layer', 'None')]) return tile
def __call__(self, tile: Tile) -> Tile: if tile: stats.increment_counter(["tiles"]) return tile
def __call__(self, tile): if tile and tile.error: stats.increment_counter(['errors']) return tile
def add_stats(tile): if tile and tile.error: stats.increment_counter(['error', tile.metadata.get('layer', 'None')]) return tile
def __call__(self, tile: Tile) -> Tile: if tile and tile.error: stats.increment_counter(["errors"]) return tile
def __call__(self, tile): if tile: stats.increment_counter(['tiles']) return tile