def update_cached_items() -> None: tasks = [] items = (Insight.objects.filter( Q( Q(dashboard__is_shared=True) | Q(dashboard__last_accessed_at__gt=timezone.now() - relativedelta(days=7)))).exclude( dashboard__deleted=True).exclude(refreshing=True).exclude( deleted=True).exclude(refresh_attempt__gt=2).exclude( filters={}).order_by( F("last_refresh").asc(nulls_first=True))) for item in items[0:PARALLEL_INSIGHT_CACHE]: try: cache_key, cache_type, payload = dashboard_item_update_task_params( item) if item.filters_hash != cache_key: item.save( ) # force update if the saved key is different from the cache key tasks.append( update_cache_item_task.s(cache_key, cache_type, payload)) except Exception as e: item.refresh_attempt = (item.refresh_attempt or 0) + 1 item.save() capture_exception(e) logger.info("Found {} items to refresh".format(len(tasks))) taskset = group(tasks) taskset.apply_async() statsd.gauge("update_cache_queue_depth", items.count())
def redis_celery_queue_depth(): from statshog.defaults.django import statsd try: llen = get_client().llen("celery") statsd.gauge(f"posthog_celery_queue_depth", llen) except: # if we can't connect to statsd don't complain about it. # not every installation will have statsd available return
def sync_execute(query, args=None, settings=None): with ch_pool.get_client() as client: start_time = time() tags = {} try: sql, tags = _annotate_tagged_query(query, args) result = client.execute(sql, args, settings=settings) finally: execution_time = time() - start_time statsd.gauge("clickhouse_sync_execution_time", execution_time * 1000.0, tags=tags) if app_settings.SHELL_PLUS_PRINT_SQL: print(format_sql(query, args)) print("Execution time: %.6fs" % (execution_time,)) if _request_information is not None and _request_information.get("save", False): save_query(query, args, execution_time) return result
def clickhouse_row_count(): if is_ee_enabled() and settings.EE_AVAILABLE: from statshog.defaults.django import statsd from ee.clickhouse.client import sync_execute for table in CLICKHOUSE_TABLES: try: QUERY = """select count(1) freq from {table};""" query = QUERY.format(table=table) rows = sync_execute(query)[0][0] statsd.gauge(f"posthog_celery_clickhouse_table_row_count", rows, tags={"table": table}) except: pass else: pass
def clickhouse_part_count(): if is_ee_enabled() and settings.EE_AVAILABLE: from statshog.defaults.django import statsd from ee.clickhouse.client import sync_execute QUERY = """ select table, count(1) freq from system.parts group by table order by freq desc; """ rows = sync_execute(QUERY) for (table, parts) in rows: statsd.gauge(f"posthog_celery_clickhouse_table_parts_count", parts, tags={"table": table}) else: pass
def clickhouse_lag(): if is_ee_enabled() and settings.EE_AVAILABLE: from statshog.defaults.django import statsd from ee.clickhouse.client import sync_execute for table in CLICKHOUSE_TABLES: try: QUERY = ( """select max(_timestamp) observed_ts, now() now_ts, now() - max(_timestamp) as lag from {table};""" ) query = QUERY.format(table=table) lag = sync_execute(query)[0][2] statsd.gauge("posthog_celery_clickhouse__table_lag_seconds", lag, tags={"table": table}) except: pass else: pass
def clickhouse_mutation_count(): if is_ee_enabled() and settings.EE_AVAILABLE: from statshog.defaults.django import statsd from ee.clickhouse.client import sync_execute QUERY = """ SELECT table, count(1) AS freq FROM system.mutations GROUP BY table ORDER BY freq DESC """ rows = sync_execute(QUERY) for (table, muts) in rows: statsd.gauge(f"posthog_celery_clickhouse_table_mutations_count", muts, tags={"table": table}) else: pass
def gauge(metric_name: str, value: Union[int, float], tags: Tags = None): statsd.gauge(metric_name, value, tags=tags) _capture(metric_name, value, tags)