Example #1
0
    def list(self):
        count = 0
        while True:
            queues = self._redis.xreadgroup(groupname=STREAM_GROUP, consumername=CONSUMER_NAME,
                                            streams={self._name: '>'}, count=1, block=round(self._timeout_ms))

            if not queues:
                queues = self._claim_olds()
                stats.set_gauge(['redis', self._name_str, 'nb_messages'], 0)
                if queues is None and self._stop_if_empty:
                    break
            if queues:
                for redis_message in queues:
                    queue_name, queue_messages = redis_message
                    assert queue_name == self._name
                    for message in queue_messages:
                        id_, body = message
                        try:
                            tile = decode_message(body[b'message'], from_redis=True, sqs_message=id_)
                            yield tile
                        except Exception:
                            logger.warning('Failed decoding the Redis message', exc_info=True)
                            stats.increment_counter(['redis', self._name_str, 'decode_error'])
                        count += 1

                if count % 100 == 0:
                    stats.set_gauge(['redis', self._name_str, 'nb_messages'], self._redis.xlen(name=self._name))
Example #2
0
    def _claim_olds(self):
        pendings = self._master.xpending_range(
            name=self._name, groupname=STREAM_GROUP, min="-", max="+", count=10
        )
        if not pendings:
            # None means there is nothing pending at all
            return None
        to_steal = []
        to_drop = []
        for pending in pendings:
            if int(pending["time_since_delivered"]) >= self._pending_timeout_ms:
                id_ = pending["message_id"]
                nb_retries = int(pending["times_delivered"])
                if nb_retries < self._max_retries:
                    logger.info(
                        "A message has been pending for too long. Stealing it (retry #%d): %s",
                        nb_retries,
                        id_,
                    )
                    to_steal.append(id_)
                else:
                    logger.warning(
                        "A message has been pending for too long and retried too many times. Dropping it: %s",
                        id_,
                    )
                    to_drop.append(id_)

        if to_drop:
            drop_messages = self._master.xclaim(
                name=self._name,
                groupname=STREAM_GROUP,
                consumername=CONSUMER_NAME,
                min_idle_time=self._pending_timeout_ms,
                message_ids=to_drop,
            )
            drop_ids = [drop_message[0] for drop_message in drop_messages]
            self._master.xack(self._name, STREAM_GROUP, *drop_ids)
            self._master.xdel(self._name, *drop_ids)
            for _, drop_message in drop_messages:
                tile = decode_message(drop_message[b"message"])
                self._master.xadd(
                    name=self._errors_name,
                    fields=dict(tilecoord=str(tile.tilecoord)),
                    maxlen=self._max_errors_nb,
                )
            stats.increment_counter(["redis", self._name_str, "dropped"], len(to_drop))

        if to_steal:
            messages = self._master.xclaim(
                name=self._name,
                groupname=STREAM_GROUP,
                consumername=CONSUMER_NAME,
                min_idle_time=self._pending_timeout_ms,
                message_ids=to_steal,
            )
            stats.increment_counter(["redis", self._name_str, "stolen"], len(to_steal))
            return [[self._name, messages]]
        else:
            # Empty means there are pending jobs, but they are not old enough to be stolen
            return []
Example #3
0
def init(config: pyramid.config.Configurator) -> None:
    if os.path.isfile(VERSIONS_PATH):
        versions = _read_versions()
        config.add_route("c2c_versions",
                         _utils.get_base_path(config) + r"/versions.json",
                         request_method="GET")
        config.add_view(lambda request: versions,
                        route_name="c2c_versions",
                        renderer="fast_json",
                        http_cache=0)
        LOG.info("Installed the /versions.json service")
        git_hash = versions['main']['git_hash']

        if 'git_tag' in versions['main']:
            LOG.warning("Starting version %s (%s)",
                        versions['main']['git_tag'], git_hash)
        else:
            LOG.warning("Starting version %s", git_hash)

        if stats.USE_TAGS:
            stats.increment_counter(['version'],
                                    1,
                                    tags=dict(version=git_hash))
        else:
            stats.increment_counter(['version', git_hash], 1)
Example #4
0
    def list(self):
        count = 0
        while True:
            queues = self._redis.xreadgroup(groupname=STREAM_GROUP,
                                            consumername=CONSUMER_NAME,
                                            streams={self._name: '>'},
                                            count=1,
                                            block=round(self._timeout_ms))

            if not queues:
                queues = self._claim_olds()
                stats.set_gauge(['redis', self._name_str, 'nb_messages'], 0)
                if queues is None and self._stop_if_empty:
                    break
            if queues:
                for redis_message in queues:
                    queue_name, queue_messages = redis_message
                    assert queue_name == self._name
                    for message in queue_messages:
                        id_, body = message
                        try:
                            tile = decode_message(body[b'message'],
                                                  from_redis=True,
                                                  sqs_message=id_)
                            yield tile
                        except Exception:
                            logger.warning('Failed decoding the Redis message',
                                           exc_info=True)
                            stats.increment_counter(
                                ['redis', self._name_str, 'decode_error'])
                        count += 1

                if count % 100 == 0:
                    stats.set_gauge(['redis', self._name_str, 'nb_messages'],
                                    self._redis.xlen(name=self._name))
Example #5
0
 def __call__(self, request: pyramid.request.Request) -> str:
     for binding in _get_bindings(self.session, EngineType.READ_AND_WRITE):
         with binding as session:
             if stats.USE_TAGS:
                 key = ["sql", "manual", "health_check", "alembic"]
                 tags: Optional[Dict[str, str]] = dict(conf=alembic_ini_path, con=binding.name())
             else:
                 key = [
                     "sql",
                     "manual",
                     "health_check",
                     "alembic",
                     alembic_ini_path,
                     binding.name(),
                 ]
                 tags = None
             with stats.timer_context(key, tags):
                 quote = session.bind.dialect.identifier_preparer.quote
                 (actual_version,) = session.execute(
                     "SELECT version_num FROM "  # nosec
                     f"{quote(version_schema)}.{quote(version_table)}"
                 ).fetchone()
                 if stats.USE_TAGS:
                     stats.increment_counter(
                         ["alembic_version"], 1, tags=dict(version=actual_version, name=name)
                     )
                 else:
                     stats.increment_counter(["alembic_version", name, actual_version], 1)
                 if actual_version != version_:
                     raise Exception(
                         f"Invalid alembic version (db: {actual_version}, code: {version_})"
                     )
     return version_
Example #6
0
    def evaluate(self, root_dir: str, files: List[str]):
        extension_len = len(self._extension) + 1
        dest_dir = self._get_dest_dir(root_dir)
        LOG.info(
            "Evaluating templates %s -> %s with data keys: %s",
            root_dir,
            dest_dir,
            ", ".join(self._data.keys()),
        )

        for sub_path in files:
            src_path = os.path.join(root_dir, sub_path)
            dest_path = os.path.join(dest_dir, sub_path)
            os.makedirs(os.path.dirname(dest_path), exist_ok=True)
            if src_path.endswith("." + self._extension):
                dest_path = dest_path[:-extension_len]
                LOG.debug("Evaluating template: %s -> %s", src_path, dest_path)
                try:
                    self._evaluate_file(src_path, dest_path)
                except Exception:
                    LOG.warning("Failed applying the %s template: %s",
                                self._config["type"],
                                src_path,
                                exc_info=True)
                    stats.increment_counter(
                        ["source", self._source_id,
                         self.get_type(), "error"])
            elif src_path != dest_path and not os.path.isdir(
                    src_path) and not os.path.exists(dest_path):
                os.link(src_path, dest_path)
Example #7
0
def hello_get(_):
    """
    Will use the slave.
    """
    with timer_context(["sql", "read_hello"]):
        hello = models.DBSession.query(models.Hello).first()
    increment_counter(["test", "counter"])
    set_gauge(["test", "gauge/s"], 42, tags={"value": 24, "toto": "tutu"})
    return {"value": hello.value}
Example #8
0
def hello_get(request):
    """
    Will use the slave
    """
    with timer_context(['sql', 'read_hello']):
        hello = models.DBSession.query(models.Hello).first()
    increment_counter(['test', 'counter'])
    set_gauge(['test', 'gauge/s'], 42, tags={'value': 24, 'toto': 'tutu'})
    return {'value': hello.value}
Example #9
0
 def fetch(self):
     try:
         self._is_loaded = False
         with stats.timer_context(["source", self.get_id(), "fetch"]):
             self._do_fetch()
         self._eval_templates()
     except Exception:
         LOG.exception("Error with source %s", self.get_id())
         stats.increment_counter(["source", self.get_id(), "error"])
         raise
     finally:
         self._is_loaded = True
Example #10
0
 def refresh(self):
     LOG.info("Doing a refresh of %s", self.get_id())
     try:
         self._is_loaded = False
         with stats.timer_context(["source", self.get_id(), "refresh"]):
             self._do_refresh()
         self._eval_templates()
     except Exception:
         LOG.exception("Error with source %s", self.get_id())
         stats.increment_counter(["source", self.get_id(), "error"])
         raise
     finally:
         self._is_loaded = True
Example #11
0
 def _run_one(
     check: Callable[[pyramid.request.Request], Any],
     is_auth: bool,
     level: int,
     name: str,
     request: pyramid.request.Request,
     results: Dict[str, Dict[str, Any]],
 ) -> None:
     start = time.monotonic()
     try:
         result = check(request)
         results["successes"][name] = {"timing": time.monotonic() - start, "level": level}
         if result is not None:
             results["successes"][name]["result"] = result
         if stats.USE_TAGS:
             stats.increment_counter(["health_check"], 1, tags=dict(name=name, outcome="success"))
         else:
             stats.increment_counter(["health_check", name, "success"], 1)
     except Exception as e:  # pylint: disable=broad-except
         if stats.USE_TAGS:
             stats.increment_counter(["health_check"], 1, tags=dict(name=name, outcome="failure"))
         else:
             stats.increment_counter(["health_check", name, "failure"], 1)
         LOG.warning("Health check %s failed", name, exc_info=True)
         failure = {"message": str(e), "timing": time.monotonic() - start, "level": level}
         if isinstance(e, JsonCheckException) and e.json_data() is not None:
             failure["result"] = e.json_data()
         if is_auth or os.environ.get("DEVELOPMENT", "0") != "0":
             failure["stacktrace"] = traceback.format_exc()
         results["failures"][name] = failure
Example #12
0
        def check(request: pyramid.request.Request) -> Dict[str, Any]:
            all_versions = _get_all_versions()
            assert all_versions
            versions = [e for e in all_versions if e is not None]
            # Output the versions we see on the monitoring
            v: Optional[str]
            for v, count in Counter(versions).items():
                if stats.USE_TAGS:
                    stats.increment_counter(["version"], count, tags=dict(version=v))
                else:
                    stats.increment_counter(["version", v], count)

                ref = versions[0]
            assert all(v == ref for v in versions), "Non identical versions: " + ", ".join(versions)
            return dict(version=ref, count=len(versions))
Example #13
0
    def _view(self, request: pyramid.request.Request) -> Mapping[str, Any]:
        max_level = int(request.params.get('max_level', '1'))
        is_auth = auth.is_auth(request)
        results: Dict[str, Dict[str, Any]] = {
            'failures': {},
            'successes': {},
        }
        checks = None
        if 'checks' in request.params:
            if request.params['checks'] != '':
                checks = request.params['checks'].split(',')
        for name, check, level in self._checks:
            if level <= max_level and (checks is None or name in checks):
                start = time.monotonic()
                try:
                    result = check(request)
                    results['successes'][name] = {
                        'timing': time.monotonic() - start,
                        'level': level
                    }
                    if result is not None:
                        results['successes'][name]['result'] = result
                    if stats.USE_TAGS:
                        stats.increment_counter(['health_check'],
                                                1,
                                                tags=dict(name=name,
                                                          outcome='success'))
                    else:
                        stats.increment_counter(
                            ['health_check', name, 'success'], 1)
                except Exception as e:
                    if stats.USE_TAGS:
                        stats.increment_counter(['health_check'],
                                                1,
                                                tags=dict(name=name,
                                                          outcome='failure'))
                    else:
                        stats.increment_counter(
                            ['health_check', name, 'failure'], 1)
                    LOG.warning("Health check %s failed", name, exc_info=True)
                    failure = {
                        'message': str(e),
                        'timing': time.monotonic() - start,
                        'level': level
                    }
                    if isinstance(
                            e,
                            JsonCheckException) and e.json_data() is not None:
                        failure['result'] = e.json_data()
                    if is_auth or os.environ.get('DEVELOPMENT', '0') != '0':
                        failure['stacktrace'] = traceback.format_exc()
                    results['failures'][name] = failure

        if results['failures']:
            request.response.status = 500

        return results
Example #14
0
    def list(self) -> Iterator[Tile]:
        count = 0
        while True:
            queues = self._master.xreadgroup(
                groupname=STREAM_GROUP,
                consumername=CONSUMER_NAME,
                streams={self._name: ">"},
                count=1,
                block=round(self._timeout_ms),
            )
            logger.debug("Get %d new elements", len(queues))

            if not queues:
                queues = self._claim_olds()
                if queues is None:
                    stats.set_gauge(["redis", self._name_str, "nb_messages"],
                                    0)
                    stats.set_gauge(["redis", self._name_str, "pending"], 0)
                if queues is None and self._stop_if_empty:
                    break
            if queues:
                for redis_message in queues:
                    queue_name, queue_messages = redis_message
                    assert queue_name == self._name
                    for message in queue_messages:
                        id_, body = message
                        try:
                            tile = decode_message(body[b"message"],
                                                  from_redis=True,
                                                  sqs_message=id_)
                            yield tile
                        except Exception:
                            logger.warning("Failed decoding the Redis message",
                                           exc_info=True)
                            stats.increment_counter(
                                ["redis", self._name_str, "decode_error"])
                        count += 1

                if count % 10 == 0:
                    stats.set_gauge(
                        ["redis", self._name_str, "nb_messages"],
                        self._slave.xlen(name=self._name),
                    )
                    pending = self._slave.xpending(
                        self._name, STREAM_GROUP)  # type: ignore
                    stats.set_gauge(["redis", self._name_str, "pending"],
                                    pending["pending"])
Example #15
0
        def check(request: pyramid.request.Request) -> Any:
            versions = _get_all_versions()
            versions = list(filter(lambda x: x is not None, versions))
            assert len(versions) > 0
            # output the versions we see on the monitoring
            for v, count in Counter(versions).items():
                if stats.USE_TAGS:
                    stats.increment_counter(['version'],
                                            count,
                                            tags=dict(version=v))
                else:
                    stats.increment_counter(['version', v], count)

                ref = versions[0]
            assert all(
                v == ref for v in
                versions), "Non identical versions: " + ", ".join(versions)
            return dict(version=ref, count=len(versions))
Example #16
0
 def check(_request: Any) -> str:
     for binding in _get_bindings(session):
         prev_bind = session.bind
         try:
             session.bind = binding
             if stats.USE_TAGS:
                 key = ['sql', 'manual', 'health_check', 'alembic']
                 tags: Optional[Dict[str,
                                     str]] = dict(conf=alembic_ini_path,
                                                  con=binding.c2c_name)
             else:
                 key = [
                     'sql', 'manual', 'health_check', 'alembic',
                     alembic_ini_path, binding.c2c_name
                 ]
                 tags = None
             with stats.timer_context(key, tags):
                 quote = session.bind.dialect.identifier_preparer.quote
                 actual_version, = session.execute(
                     "SELECT version_num FROM {schema}.{table}"
                     .  # nosec
                     format(schema=quote(version_schema),
                            table=quote(version_table))).fetchone()
                 if stats.USE_TAGS:
                     stats.increment_counter(['alembic_version'],
                                             1,
                                             tags=dict(
                                                 version=actual_version,
                                                 name=name))
                 else:
                     stats.increment_counter(
                         ['alembic_version', name, actual_version], 1)
                 if actual_version != version_:
                     raise Exception(
                         "Invalid alembic version: %s != %s" %
                         (actual_version, version_))
         finally:
             session.bind = prev_bind
     return version_
Example #17
0
    def _claim_olds(self):
        pendings = self._redis.xpending_range(name=self._name, groupname=STREAM_GROUP, min='-', max='+', count=10)
        if not pendings:
            # None means there is nothing pending at all
            return None
        to_steal = []
        to_drop = []
        for pending in pendings:
            if int(pending['time_since_delivered']) >= self._pending_timeout_ms:
                id_ = pending['message_id']
                nb_retries = int(pending['times_delivered'])
                if nb_retries < self._max_retries:
                    logger.info('A message has been pending for too long. Stealing it (retry #%d): %s', nb_retries, id_)
                    to_steal.append(id_)
                else:
                    logger.warning(
                        'A message has been pending for too long and retried too many times. Dropping it: %s', id_)
                    to_drop.append(id_)

        if to_drop:
            drop_messages = self._redis.xclaim(name=self._name, groupname=STREAM_GROUP, consumername=CONSUMER_NAME,
                                               min_idle_time=self._pending_timeout_ms, message_ids=to_drop)
            drop_ids = [drop_message[0] for drop_message in drop_messages]
            self._redis.xack(self._name, STREAM_GROUP, *drop_ids)
            self._redis.xdel(self._name, *drop_ids)
            for drop_id, drop_message in drop_messages:
                tile = decode_message(drop_message[b'message'])
                self._redis.xadd(name=self._errors_name, fields=dict(tilecoord=str(tile.tilecoord)),
                                 maxlen=self._max_errors_nb)
            stats.increment_counter(['redis', self._name_str, 'dropped'], len(to_drop))

        if to_steal:
            messages = self._redis.xclaim(name=self._name, groupname=STREAM_GROUP, consumername=CONSUMER_NAME,
                                          min_idle_time=self._pending_timeout_ms, message_ids=to_steal)
            stats.increment_counter(['redis', self._name_str, 'stolen'], len(to_steal))
            return [[self._name, messages]]
        else:
            # Empty means there are pending jobs, but they are not old enough to be stolen
            return []
Example #18
0
 def _do_fetch(self):
     path = self.get_path()
     url = mode.get_fetch_url(self.get_id(), self._config["key"])
     while True:
         try:
             LOG.info("Doing a fetch of %s", self.get_id())
             response = requests.get(url, stream=True)
             response.raise_for_status()
             if os.path.exists(path):
                 shutil.rmtree(path)
             os.makedirs(path, exist_ok=True)
             with subprocess.Popen(
                 [
                     "tar",
                     "--extract",
                     "--gzip",
                     "--no-same-owner",
                     "--no-same-permissions",
                     "--touch",
                     "--no-overwrite-dir",
                 ],
                     cwd=path,
                     stdin=subprocess.PIPE,
             ) as tar:
                 shutil.copyfileobj(response.raw, tar.stdin)  # type: ignore
                 tar.stdin.close()  # type: ignore
                 assert tar.wait() == 0
             return
         except Exception as exception:
             stats.increment_counter(
                 ["source", self.get_id(), "fetch_error"])
             LOG.warning(
                 "Error fetching the source %s from the master (will retry in 1s): %s",
                 self.get_id(),
                 str(exception),
             )
             time.sleep(1)
Example #19
0
 def add_stats(tile):
     if tile and tile.error:
         stats.increment_counter(
             ['error', tile.metadata.get('layer', 'None')])
     return tile
Example #20
0
 def __call__(self, tile: Tile) -> Tile:
     if tile:
         stats.increment_counter(["tiles"])
     return tile
Example #21
0
 def __call__(self, tile):
     if tile and tile.error:
         stats.increment_counter(['errors'])
     return tile
Example #22
0
 def add_stats(tile):
     if tile and tile.error:
         stats.increment_counter(['error', tile.metadata.get('layer', 'None')])
     return tile
Example #23
0
 def __call__(self, tile: Tile) -> Tile:
     if tile and tile.error:
         stats.increment_counter(["errors"])
     return tile
Example #24
0
 def __call__(self, tile):
     if tile:
         stats.increment_counter(['tiles'])
     return tile