예제 #1
0
def get_rates(bucket: str, rollup: int = 60) -> Sequence[Any]:
    now = int(time.time())
    bucket = "{}{}".format(ratelimit_prefix, bucket)
    pipe = rds.pipeline(transaction=False)
    rate_history_s = get_config("rate_history_sec", 3600)
    for i in reversed(range(now - rollup, now - rate_history_s, -rollup)):
        pipe.zcount(bucket, i, "({:f}".format(i + rollup))
    return [c / float(rollup) for c in pipe.execute()]
예제 #2
0
def get_rates(bucket, rollup=60):
    now = int(time.time())
    bucket = '{}{}'.format(ratelimit_prefix, bucket)
    pipe = rds.pipeline(transaction=False)
    rate_history_s = get_config('rate_history_sec', 3600)
    for i in reversed(range(now - rollup, now - rate_history_s, -rollup)):
        pipe.zcount(bucket, i, '({:f}'.format(i + rollup))
    return [c / float(rollup) for c in pipe.execute()]
예제 #3
0
def rate_limit(bucket, per_second_limit=None, concurrent_limit=None):
    """
    A context manager for rate limiting that allows for limiting based on
    on a rolling-window per-second rate as well as the number of requests
    concurrently running.

    Uses a single redis sorted set per rate-limiting bucket to track both the
    concurrency and rate, the score is the query timestamp. Queries are thrown
    ahead in time when they start so we can count them as concurrent, and
    thrown back to their start time once they finish so we can count them
    towards the historical rate.

               time >>----->
    +-----------------------------+--------------------------------+
    | historical query window     | currently executing queries    |
    +-----------------------------+--------------------------------+
                                  ^
                                 now
    """
    bucket = '{}{}'.format(ratelimit_prefix, bucket)
    query_id = uuid.uuid4()
    now = time.time()
    bypass_rate_limit, rate_history_s = get_configs([
        ('bypass_rate_limit', 0),
        ('rate_history_sec', 3600)
    ])

    if bypass_rate_limit == 1:
        yield (True, 0, 0)
        return

    pipe = rds.pipeline(transaction=False)
    pipe.zremrangebyscore(bucket, '-inf', '({:f}'.format(now - rate_history_s))  # cleanup
    pipe.zadd(bucket, now + max_query_duration_s, query_id)  # add query
    pipe.zcount(bucket, now - rate_lookback_s, now)  # get rate
    pipe.zcount(bucket, '({:f}'.format(now), '+inf')  # get concurrent
    try:
        _, _, rate, concurrent = pipe.execute()
    except Exception as ex:
        logger.exception(ex)
        yield (True, 0, 0)  # fail open if redis is having issues
        return

    per_second = rate / float(rate_lookback_s)
    allowed = (per_second_limit is None or per_second <= per_second_limit) and\
        (concurrent_limit is None or concurrent <= concurrent_limit)
    try:
        yield (allowed, per_second, concurrent)
    finally:
        try:
            if allowed:
                # return the query to its start time
                rds.zincrby(bucket, query_id, -float(max_query_duration_s))
            else:
                rds.zrem(bucket, query_id)  # not allowed / not counted
        except Exception as ex:
            logger.exception(ex)
            pass
예제 #4
0
def record_query(data: Mapping[str, Optional[Any]]) -> None:
    global kfk
    max_redis_queries = 200
    try:
        data = safe_dumps(data)
        rds.pipeline(transaction=False).lpush(queries_list, data).ltrim(
            queries_list, 0, max_redis_queries - 1).execute()

        if kfk is None:
            kfk = Producer(
                {"bootstrap.servers": ",".join(settings.DEFAULT_BROKERS)})

        kfk.produce(
            settings.QUERIES_TOPIC,
            data.encode("utf-8"),
        )
    except Exception as ex:
        logger.exception("Could not record query due to error: %r", ex)
예제 #5
0
 def _add_to_redis(self, num_hits: int) -> None:
     """
     Increments the current time entry in Redis by number of hits
     """
     now = int(time())
     p = redis_client.pipeline()
     p.hincrby(self.__name, str(now), num_hits)
     p.hkeys(self.__name)
     self._prune(now, p.execute()[1])
예제 #6
0
def record_query(query_metadata: Mapping[str, Any]) -> None:
    global kfk
    max_redis_queries = 200
    try:
        data = safe_dumps(query_metadata)
        rds.pipeline(transaction=False).lpush(queries_list, data).ltrim(
            queries_list, 0, max_redis_queries - 1).execute()

        if kfk is None:
            kfk = Producer(build_default_kafka_producer_configuration())

        kfk.poll(0)  # trigger queued delivery callbacks
        kfk.produce(
            settings.QUERIES_TOPIC,
            data.encode("utf-8"),
            on_delivery=_record_query_delivery_callback,
        )
    except Exception as ex:
        logger.exception("Could not record query due to error: %r", ex)
예제 #7
0
def record_query(data):
    global kfk
    max_redis_queries = 200
    try:
        data = safe_dumps(data)
        rds.pipeline(transaction=False)\
            .lpush(queries_list, data)\
            .ltrim(queries_list, 0, max_redis_queries - 1)\
            .execute()

        if kfk is None:
            kfk = Producer(
                {'bootstrap.servers': ','.join(settings.DEFAULT_BROKERS)})

        kfk.produce(
            settings.QUERIES_TOPIC,
            data.encode('utf-8'),
        )
    except Exception as ex:
        logger.exception('Could not record query due to error: %r', ex)
예제 #8
0
def set_project_needs_final(
    project_id: int,
    state_name: Optional[ReplacerState],
    replacement_type: ReplacementType,
) -> None:
    key, type_key = ProjectsQueryFlags._build_project_needs_final_key_and_type_key(
        project_id, state_name
    )
    p = redis_client.pipeline()
    p.set(key, time.time(), ex=settings.REPLACER_KEY_TTL)
    p.set(type_key, replacement_type, ex=settings.REPLACER_KEY_TTL)
    p.execute()
예제 #9
0
파일: replacer.py 프로젝트: Appva/snuba
def set_project_exclude_groups(project_id, group_ids):
    """Add {group_id: now, ...} to the ZSET for each `group_id` to exclude,
    remove outdated entries based on `settings.REPLACER_KEY_TTL`, and expire
    the entire ZSET incase it's rarely touched."""

    now = time.time()
    key = get_project_exclude_groups_key(project_id)
    p = redis_client.pipeline()

    p.zadd(key, **{str(group_id): now for group_id in group_ids})
    p.zremrangebyscore(key, -1, now - settings.REPLACER_KEY_TTL)
    p.expire(key, int(settings.REPLACER_KEY_TTL))

    p.execute()
예제 #10
0
def record_query(data):
    global kfk
    max_redis_queries = 200
    data = json.dumps(data, for_json=True)
    try:
        rds.pipeline(transaction=False)\
            .lpush(queries_list, data)\
            .ltrim(queries_list, 0, max_redis_queries - 1)\
            .execute()

        if settings.RECORD_QUERIES:
            if kfk is None:
                kfk = Producer({
                    'bootstrap.servers': ','.join(settings.DEFAULT_BROKERS)
                })

            kfk.produce(
                settings.QUERIES_TOPIC,
                data.encode('utf-8'),
            )
    except Exception as ex:
        logger.exception(ex)
        pass
예제 #11
0
def set_project_exclude_groups(
    project_id: int, group_ids: Sequence[int], state_name: Optional[ReplacerState]
) -> None:
    """Add {group_id: now, ...} to the ZSET for each `group_id` to exclude,
    remove outdated entries based on `settings.REPLACER_KEY_TTL`, and expire
    the entire ZSET incase it's rarely touched."""

    now = time.time()
    key = get_project_exclude_groups_key(project_id, state_name)
    p = redis_client.pipeline()

    group_id_data: Mapping[str, float] = {str(group_id): now for group_id in group_ids}
    p.zadd(key, **group_id_data)
    p.zremrangebyscore(key, -1, now - settings.REPLACER_KEY_TTL)
    p.expire(key, int(settings.REPLACER_KEY_TTL))

    p.execute()
예제 #12
0
def get_projects_query_flags(
    project_ids: Sequence[int], state_name: Optional[ReplacerState]
) -> Tuple[bool, Sequence[int]]:
    """\
    1. Fetch `needs_final` for each Project
    2. Fetch groups to exclude for each Project
    3. Trim groups to exclude ZSET for each Project

    Returns (needs_final, group_ids_to_exclude)
    """

    s_project_ids = set(project_ids)
    now = time.time()
    p = redis_client.pipeline()

    needs_final_keys = [
        get_project_needs_final_key(project_id, state_name)
        for project_id in s_project_ids
    ]
    for needs_final_key in needs_final_keys:
        p.get(needs_final_key)

    exclude_groups_keys = [
        get_project_exclude_groups_key(project_id, state_name)
        for project_id in s_project_ids
    ]
    for exclude_groups_key in exclude_groups_keys:
        p.zremrangebyscore(
            exclude_groups_key, float("-inf"), now - settings.REPLACER_KEY_TTL
        )
        p.zrevrangebyscore(
            exclude_groups_key, float("inf"), now - settings.REPLACER_KEY_TTL
        )

    results = p.execute()

    needs_final = any(results[: len(s_project_ids)])
    exclude_groups = sorted(
        {int(group_id) for group_id in sum(results[(len(s_project_ids) + 1) :: 2], [])}
    )

    return (needs_final, exclude_groups)
예제 #13
0
def set_project_exclude_groups(
    project_id: int,
    group_ids: Sequence[int],
    state_name: Optional[ReplacerState],
    #  replacement type is just for metrics, not necessary for functionality
    replacement_type: ReplacementType,
) -> None:
    """
    This method is called when a replacement comes in. For a specific project, record
    the group ids which were deleted as a result of this replacement

    Add {group_id: now, ...} to the ZSET for each `group_id` to exclude,
    remove outdated entries based on `settings.REPLACER_KEY_TTL`, and expire
    the entire ZSET incase it's rarely touched.

    Add replacement type for this replacement.
    """
    now = time.time()
    key, type_key = ProjectsQueryFlags._build_project_exclude_groups_key_and_type_key(
        project_id, state_name
    )
    p = redis_client.pipeline()

    group_id_data: Mapping[str | bytes, bytes | float | int | str] = {
        str(group_id): now for group_id in group_ids
    }
    p.zadd(key, group_id_data)
    # remove group id deletions that should have been merged by now
    p.zremrangebyscore(key, -1, now - settings.REPLACER_KEY_TTL)
    p.expire(key, int(settings.REPLACER_KEY_TTL))

    # store the replacement type data
    replacement_type_data: Mapping[str | bytes, bytes | float | int | str] = {
        replacement_type: now
    }
    p.zadd(type_key, replacement_type_data)
    p.zremrangebyscore(type_key, -1, now - settings.REPLACER_KEY_TTL)
    p.expire(type_key, int(settings.REPLACER_KEY_TTL))

    p.execute()
예제 #14
0
파일: replacer.py 프로젝트: Appva/snuba
def get_projects_query_flags(project_ids):
    """\
    1. Fetch `needs_final` for each Project
    2. Fetch groups to exclude for each Project
    3. Trim groups to exclude ZSET for each Project

    Returns (needs_final, group_ids_to_exclude)
    """

    project_ids = set(project_ids)
    now = time.time()
    p = redis_client.pipeline()

    needs_final_keys = [
        get_project_needs_final_key(project_id) for project_id in project_ids
    ]
    for needs_final_key in needs_final_keys:
        p.get(needs_final_key)

    exclude_groups_keys = [
        get_project_exclude_groups_key(project_id)
        for project_id in project_ids
    ]
    for exclude_groups_key in exclude_groups_keys:
        p.zremrangebyscore(exclude_groups_key, float('-inf'),
                           now - settings.REPLACER_KEY_TTL)
        p.zrevrangebyscore(exclude_groups_key, float('inf'),
                           now - settings.REPLACER_KEY_TTL)

    results = p.execute()

    needs_final = any(results[:len(project_ids)])
    exclude_groups = sorted({
        int(group_id)
        for group_id in sum(results[(len(project_ids) + 1)::2], [])
    })

    return (needs_final, exclude_groups)
예제 #15
0
def rate_limit(
    rate_limit_params: RateLimitParameters,
) -> Iterator[Optional[RateLimitStats]]:
    """
    A context manager for rate limiting that allows for limiting based on
    on a rolling-window per-second rate as well as the number of requests
    concurrently running.

    Uses a single redis sorted set per rate-limiting bucket to track both the
    concurrency and rate, the score is the query timestamp. Queries are thrown
    ahead in time when they start so we can count them as concurrent, and
    thrown back to their start time once they finish so we can count them
    towards the historical rate.

               time >>----->
    +-----------------------------+--------------------------------+
    | historical query window     | currently executing queries    |
    +-----------------------------+--------------------------------+
                                  ^
                                 now
    """

    bucket = "{}{}".format(state.ratelimit_prefix, rate_limit_params.bucket)
    query_id = uuid.uuid4()

    now = time.time()
    bypass_rate_limit, rate_history_s = state.get_configs([
        ("bypass_rate_limit", 0), ("rate_history_sec", 3600)
    ])
    assert isinstance(rate_history_s, (int, float))

    if bypass_rate_limit == 1:
        yield None
        return

    pipe = rds.pipeline(transaction=False)
    pipe.zremrangebyscore(bucket, "-inf",
                          "({:f}".format(now - rate_history_s))  # cleanup
    pipe.zadd(bucket, now + state.max_query_duration_s,
              query_id)  # type: ignore
    if rate_limit_params.per_second_limit is None:
        pipe.exists("nosuchkey")  # no-op if we don't need per-second
    else:
        pipe.zcount(bucket, now - state.rate_lookback_s, now)  # get historical
    if rate_limit_params.concurrent_limit is None:
        pipe.exists("nosuchkey")  # no-op if we don't need concurrent
    else:
        pipe.zcount(bucket, "({:f}".format(now), "+inf")  # get concurrent

    try:
        _, _, historical, concurrent = pipe.execute()
        historical = int(historical)
        concurrent = int(concurrent)
    except Exception as ex:
        logger.exception(ex)
        yield None  # fail open if redis is having issues
        return

    per_second = historical / float(state.rate_lookback_s)

    stats = RateLimitStats(rate=per_second, concurrent=concurrent)

    rate_limit_name = rate_limit_params.rate_limit_name

    Reason = namedtuple("Reason", "scope name val limit")
    reasons = [
        Reason(
            rate_limit_name,
            "concurrent",
            concurrent,
            rate_limit_params.concurrent_limit,
        ),
        Reason(
            rate_limit_name,
            "per-second",
            per_second,
            rate_limit_params.per_second_limit,
        ),
    ]

    reason = next(
        (r for r in reasons if r.limit is not None and r.val > r.limit), None)

    if reason:
        try:
            rds.zrem(bucket, query_id)  # not allowed / not counted
        except Exception as ex:
            logger.exception(ex)

        raise RateLimitExceeded(
            "{r.scope} {r.name} of {r.val:.0f} exceeds limit of {r.limit:.0f}".
            format(r=reason))

    try:
        yield stats
    finally:
        try:
            # return the query to its start time
            rds.zincrby(bucket, query_id, -float(state.max_query_duration_s))
        except Exception as ex:
            logger.exception(ex)
예제 #16
0
def rate_limit(
    rate_limit_params: RateLimitParameters,
) -> Iterator[Optional[RateLimitStats]]:
    """
    A context manager for rate limiting that allows for limiting based on:
        * a rolling-window per-second rate
        * the number of queries concurrently running.

    It uses one redis sorted set to keep track of both of these limits
    The following mapping is kept in redis:

        bucket: SortedSet([(timestamp1, query_id1), (timestamp2, query_id2) ...])


    Queries are thrown ahead in time when they start so we can count them
    as concurrent, and thrown back to their start time once they finish so
    we can count them towards the historical rate. See the comments for
    an example.

               time >>----->
    +-----------------------------+--------------------------------+
    | historical query window     | currently executing queries    |
    +-----------------------------+--------------------------------+
                                  ^
                                 now
    """

    bucket = "{}{}".format(state.ratelimit_prefix, rate_limit_params.bucket)
    query_id = str(uuid.uuid4())

    now = time.time()
    bypass_rate_limit, rate_history_s = state.get_configs(
        [("bypass_rate_limit", 0), ("rate_history_sec", 3600)]
        #                               ^ number of seconds the timestamps are kept
    )
    assert isinstance(rate_history_s, (int, float))

    if bypass_rate_limit == 1:
        yield None
        return

    pipe = rds.pipeline(transaction=False)
    # cleanup old query timestamps past our retention window
    stale_queries = pipe.zremrangebyscore(bucket, "-inf",
                                          "({:f}".format(now - rate_history_s))
    metrics.increment("rate_limit.stale",
                      stale_queries,
                      tags={"bucket": bucket})

    # Now for the tricky bit:
    # ======================
    # The query's *deadline* is added to the sorted set of timestamps, therefore
    # labeling its execution as in the future.

    # All queries with timestamps in the future are considered to be executing *right now*
    # Example:

    # now = 100
    # max_query_duration_s = 30
    # rate_lookback_s = 10
    # sorted_set (timestamps only for clarity) = [91, 94, 97, 103, 105, 130]

    # EXPLANATION:
    # ===========

    # queries that have finished running
    # (in this example there are 3 queries in the last 10 seconds
    #  thus the per second rate is 3/10 = 0.3)
    #      |
    #      v
    #  -----------              v--- the current query, vaulted into the future
    #  [91, 94, 97, 103, 105, 130]
    #               -------------- < - queries currently running
    #                                (how many queries are
    #                                   running concurrently; in this case 3)
    #              ^
    #              | current time
    pipe.zadd(bucket, {query_id: now + state.max_query_duration_s})
    if rate_limit_params.per_second_limit is None:
        pipe.exists("nosuchkey")  # no-op if we don't need per-second
    else:
        # count queries that have finished for the per-second rate
        pipe.zcount(bucket, now - state.rate_lookback_s, now)
    if rate_limit_params.concurrent_limit is None:
        pipe.exists("nosuchkey")  # no-op if we don't need concurrent
    else:
        # count the amount queries in the "future" which tells us the amount
        # of concurrent queries
        pipe.zcount(bucket, "({:f}".format(now), "+inf")

    try:
        _, _, historical, concurrent = pipe.execute()
        historical = int(historical)
        concurrent = int(concurrent)
    except Exception as ex:
        logger.exception(ex)
        yield None  # fail open if redis is having issues
        return

    per_second = historical / float(state.rate_lookback_s)

    stats = RateLimitStats(rate=per_second, concurrent=concurrent)

    rate_limit_name = rate_limit_params.rate_limit_name

    Reason = namedtuple("Reason", "scope name val limit")
    reasons = [
        Reason(
            rate_limit_name,
            "concurrent",
            concurrent,
            rate_limit_params.concurrent_limit,
        ),
        Reason(
            rate_limit_name,
            "per-second",
            per_second,
            rate_limit_params.per_second_limit,
        ),
    ]
    reason = next(
        (r for r in reasons if r.limit is not None and r.val > r.limit), None)
    if reason:
        try:
            # Remove the query from the sorted set
            # because we rate limited it. It shouldn't count towards
            # rate limiting future queries in this bucket.
            rds.zrem(bucket, query_id)
        except Exception as ex:
            logger.exception(ex)

        raise RateLimitExceeded(
            "{r.scope} {r.name} of {r.val:.0f} exceeds limit of {r.limit:.0f}".
            format(r=reason),
            scope=reason.scope,
            name=reason.name,
        )

    rate_limited = False
    try:
        yield stats
        _, err, _ = sys.exc_info()
        if isinstance(err, RateLimitExceeded):
            # If another rate limiter throws an exception, it won't be propagated
            # through this context. So check for the exception explicitly.
            # If another rate limit was hit, we don't want to count this query
            # against this limit.
            try:
                rds.zrem(bucket, query_id)  # not allowed / not counted
                rate_limited = True
            except Exception as ex:
                logger.exception(ex)
    finally:
        try:
            # return the query to its start time, if the query_id was actually added.
            if not rate_limited:
                rds.zincrby(bucket, -float(state.max_query_duration_s),
                            query_id)
        except Exception as ex:
            logger.exception(ex)
예제 #17
0
    def test_latest_replacement_time_by_projects(self) -> None:
        project_ids = [1, 2, 3]
        p = redis_client.pipeline()

        exclude_groups_keys = [
            errors_replacer.ProjectsQueryFlags.
            _build_project_exclude_groups_key_and_type_key(
                project_id, ReplacerState.ERRORS) for project_id in project_ids
        ]

        project_needs_final_keys = [
            errors_replacer.ProjectsQueryFlags.
            _build_project_needs_final_key_and_type_key(
                project_id, ReplacerState.ERRORS) for project_id in project_ids
        ]

        now = datetime.now()

        # No replacements or needs final
        flags = ProjectsQueryFlags.load_from_redis(project_ids,
                                                   ReplacerState.ERRORS)
        assert flags.latest_replacement_time is None

        # All projects need final
        time_offset = 0
        for project_needs_final_key, _ in project_needs_final_keys:
            p.set(project_needs_final_key, now.timestamp() + time_offset)
            time_offset += 10
        p.execute()
        flags = ProjectsQueryFlags.load_from_redis(project_ids,
                                                   ReplacerState.ERRORS)
        expected_time = now + timedelta(seconds=20)
        assert (flags.latest_replacement_time is not None and abs(
            (flags.latest_replacement_time - expected_time).total_seconds()) <
                1)
        redis_client.flushdb()

        # Some projects need final
        time_offset = 0
        for project_needs_final_key, _ in project_needs_final_keys[1:]:
            p.set(project_needs_final_key, now.timestamp() + time_offset)
            time_offset += 10
        p.execute()
        flags = ProjectsQueryFlags.load_from_redis(project_ids,
                                                   ReplacerState.ERRORS)
        expected_time = now + timedelta(seconds=10)
        assert (flags.latest_replacement_time is not None and abs(
            (flags.latest_replacement_time - expected_time).total_seconds()) <
                1)
        redis_client.flushdb()

        # One exclude group per project
        group_id_data_asc: MutableMapping[str, float] = {"1": now.timestamp()}
        for exclude_groups_key, _ in exclude_groups_keys:
            group_id_data_asc["1"] += 10
            to_insert: Mapping[str | bytes, bytes | int | float | str] = {
                "1": group_id_data_asc["1"],
            }  # typing error fix
            p.zadd(exclude_groups_key, to_insert)
        p.execute()
        expected_time = now + timedelta(seconds=30)
        flags = ProjectsQueryFlags.load_from_redis(project_ids,
                                                   ReplacerState.ERRORS)
        assert (flags.latest_replacement_time is not None and abs(
            (flags.latest_replacement_time - expected_time).total_seconds()) <
                1)
        redis_client.flushdb()

        # Multiple exclude groups per project
        group_id_data_multiple: MutableMapping[str, float] = {
            "1": (now + timedelta(seconds=10)).timestamp(),
            "2": now.timestamp(),
        }
        for exclude_groups_key, _ in exclude_groups_keys:
            group_id_data_multiple["1"] -= 10
            group_id_data_multiple["2"] -= 10
            to_insert = {
                "1": group_id_data_multiple["1"],
                "2": group_id_data_multiple["2"],
            }  # typing error fix
            p.zadd(exclude_groups_key, to_insert)
        p.execute()
        expected_time = now
        flags = ProjectsQueryFlags.load_from_redis(project_ids,
                                                   ReplacerState.ERRORS)
        assert (flags.latest_replacement_time is not None and abs(
            (flags.latest_replacement_time - expected_time).total_seconds()) <
                1)
        redis_client.flushdb()