Beispiel #1
0
def update_cached_items() -> None:

    tasks = []
    items = (Insight.objects.filter(
        Q(
            Q(dashboard__is_shared=True)
            | Q(dashboard__last_accessed_at__gt=timezone.now() -
                relativedelta(days=7)))).exclude(
                    dashboard__deleted=True).exclude(refreshing=True).exclude(
                        deleted=True).exclude(refresh_attempt__gt=2).exclude(
                            filters={}).order_by(
                                F("last_refresh").asc(nulls_first=True)))

    for item in items[0:PARALLEL_INSIGHT_CACHE]:
        try:
            cache_key, cache_type, payload = dashboard_item_update_task_params(
                item)
            if item.filters_hash != cache_key:
                item.save(
                )  # force update if the saved key is different from the cache key
            tasks.append(
                update_cache_item_task.s(cache_key, cache_type, payload))
        except Exception as e:
            item.refresh_attempt = (item.refresh_attempt or 0) + 1
            item.save()
            capture_exception(e)

    logger.info("Found {} items to refresh".format(len(tasks)))
    taskset = group(tasks)
    taskset.apply_async()
    statsd.gauge("update_cache_queue_depth", items.count())
Beispiel #2
0
def redis_celery_queue_depth():
    from statshog.defaults.django import statsd

    try:
        llen = get_client().llen("celery")
        statsd.gauge(f"posthog_celery_queue_depth", llen)
    except:
        # if we can't connect to statsd don't complain about it.
        # not every installation will have statsd available
        return
Beispiel #3
0
 def sync_execute(query, args=None, settings=None):
     with ch_pool.get_client() as client:
         start_time = time()
         tags = {}
         try:
             sql, tags = _annotate_tagged_query(query, args)
             result = client.execute(sql, args, settings=settings)
         finally:
             execution_time = time() - start_time
             statsd.gauge("clickhouse_sync_execution_time", execution_time * 1000.0, tags=tags)
             if app_settings.SHELL_PLUS_PRINT_SQL:
                 print(format_sql(query, args))
                 print("Execution time: %.6fs" % (execution_time,))
             if _request_information is not None and _request_information.get("save", False):
                 save_query(query, args, execution_time)
     return result
Beispiel #4
0
def clickhouse_row_count():
    if is_ee_enabled() and settings.EE_AVAILABLE:
        from statshog.defaults.django import statsd

        from ee.clickhouse.client import sync_execute

        for table in CLICKHOUSE_TABLES:
            try:
                QUERY = """select count(1) freq from {table};"""
                query = QUERY.format(table=table)
                rows = sync_execute(query)[0][0]
                statsd.gauge(f"posthog_celery_clickhouse_table_row_count",
                             rows,
                             tags={"table": table})
            except:
                pass
    else:
        pass
Beispiel #5
0
def clickhouse_part_count():
    if is_ee_enabled() and settings.EE_AVAILABLE:
        from statshog.defaults.django import statsd

        from ee.clickhouse.client import sync_execute

        QUERY = """
            select table, count(1) freq
            from system.parts
            group by table
            order by freq desc;
        """
        rows = sync_execute(QUERY)
        for (table, parts) in rows:
            statsd.gauge(f"posthog_celery_clickhouse_table_parts_count",
                         parts,
                         tags={"table": table})
    else:
        pass
Beispiel #6
0
def clickhouse_lag():
    if is_ee_enabled() and settings.EE_AVAILABLE:
        from statshog.defaults.django import statsd

        from ee.clickhouse.client import sync_execute

        for table in CLICKHOUSE_TABLES:
            try:
                QUERY = (
                    """select max(_timestamp) observed_ts, now() now_ts, now() - max(_timestamp) as lag from {table};"""
                )
                query = QUERY.format(table=table)
                lag = sync_execute(query)[0][2]
                statsd.gauge("posthog_celery_clickhouse__table_lag_seconds",
                             lag,
                             tags={"table": table})
            except:
                pass
    else:
        pass
Beispiel #7
0
def clickhouse_mutation_count():
    if is_ee_enabled() and settings.EE_AVAILABLE:
        from statshog.defaults.django import statsd

        from ee.clickhouse.client import sync_execute

        QUERY = """
            SELECT
                table,
                count(1) AS freq
            FROM system.mutations
            GROUP BY table
            ORDER BY freq DESC
        """
        rows = sync_execute(QUERY)
        for (table, muts) in rows:
            statsd.gauge(f"posthog_celery_clickhouse_table_mutations_count",
                         muts,
                         tags={"table": table})
    else:
        pass
Beispiel #8
0
def gauge(metric_name: str, value: Union[int, float], tags: Tags = None):
    statsd.gauge(metric_name, value, tags=tags)
    _capture(metric_name, value, tags)