Example #1
0
def insert_actors_into_cohort_by_query(cohort: Cohort, query: str,
                                       params: Dict[str, Any]):
    try:
        sync_execute(
            INSERT_COHORT_ALL_PEOPLE_THROUGH_PERSON_ID.format(
                cohort_table=PERSON_STATIC_COHORT_TABLE, query=query),
            {
                "cohort_id": cohort.pk,
                "_timestamp": datetime.now(),
                "team_id": cohort.team.pk,
                **params
            },
        )

        cohort.is_calculating = False
        cohort.last_calculation = timezone.now()
        cohort.errors_calculating = 0
        cohort.save()
    except Exception as err:

        if settings.DEBUG:
            raise err
        cohort.is_calculating = False
        cohort.errors_calculating = F("errors_calculating") + 1
        cohort.save()
        capture_exception(err)
Example #2
0
def get_active_feature_flags(
    team_id: int,
    distinct_id: str,
    groups: Dict[GroupTypeName, str] = {},
) -> Dict[str, Union[bool, str, None]]:
    cache = FlagsMatcherCache(team_id)
    flags_enabled: Dict[str, Union[bool, str, None]] = {}
    feature_flags = FeatureFlag.objects.filter(team_id=team_id,
                                               active=True,
                                               deleted=False).only(
                                                   "id",
                                                   "team_id",
                                                   "filters",
                                                   "key",
                                                   "rollout_percentage",
                                               )

    for feature_flag in feature_flags:
        try:
            match = feature_flag.matches(distinct_id, groups, cache)
            if match:
                flags_enabled[feature_flag.key] = match.variant or True
        except Exception as err:
            capture_exception(err)
    return flags_enabled
Example #3
0
def log_event_to_dead_letter_queue(
    raw_payload: Dict,
    event_name: str,
    event: Dict,
    error_message: str,
    error_location: str,
    topic: str = KAFKA_DEAD_LETTER_QUEUE,
):
    data = event.copy()

    data["error_timestamp"] = datetime.now().isoformat()
    data["error_location"] = error_location
    data["error"] = error_message
    data["elements_chain"] = ""
    data["id"] = str(UUIDT())
    data["event"] = event_name
    data["raw_payload"] = json.dumps(raw_payload)
    data["now"] = datetime.fromisoformat(data["now"]).replace(
        tzinfo=None).isoformat() if data["now"] else None
    data["tags"] = ["django_server"]
    data["event_uuid"] = event["uuid"]
    del data["uuid"]

    try:
        KafkaProducer().produce(topic=topic, data=data)
        statsd.incr(settings.EVENTS_DEAD_LETTER_QUEUE_STATSD_METRIC)
    except Exception as e:
        capture_exception(e)
        statsd.incr("events_dead_letter_queue_produce_error")

        if settings.DEBUG:
            print("Failed to produce to events dead letter queue with error:",
                  e)
Example #4
0
def get_active_feature_flags(
        team: Team, distinct_id: str) -> Dict[str, Union[bool, str, None]]:
    flags_enabled: Dict[str, Union[bool, str, None]] = {}
    feature_flags = FeatureFlag.objects.filter(team=team,
                                               active=True,
                                               deleted=False).only(
                                                   "id",
                                                   "team_id",
                                                   "filters",
                                                   "key",
                                                   "rollout_percentage",
                                               )

    for feature_flag in feature_flags:
        try:
            if not feature_flag.distinct_id_matches(distinct_id):
                continue
            if len(feature_flag.variants) > 0:
                variant = feature_flag.get_variant_for_distinct_id(distinct_id)
                if variant is not None:
                    flags_enabled[feature_flag.key] = variant
            else:
                flags_enabled[feature_flag.key] = True
        except Exception as err:
            capture_exception(err)
    return flags_enabled
Example #5
0
def insert_stickiness_people_into_cohort(cohort: Cohort, target_entity: Entity,
                                         filter: StickinessFilter) -> None:
    content_sql, params = ClickhouseStickinessActors(
        entity=target_entity, filter=filter, team=cohort.team).actor_query()

    try:
        sync_execute(
            INSERT_COHORT_ALL_PEOPLE_SQL.format(
                content_sql=content_sql,
                latest_person_sql=GET_LATEST_PERSON_SQL.format(query=""),
                cohort_table=PERSON_STATIC_COHORT_TABLE,
                GET_TEAM_PERSON_DISTINCT_IDS=get_team_distinct_ids_query(
                    cohort.team_id),
            ),
            {
                "cohort_id": cohort.pk,
                "_timestamp": datetime.now(),
                **params
            },
        )
        cohort.is_calculating = False
        cohort.last_calculation = timezone.now()
        cohort.errors_calculating = 0
        cohort.save()
    except Exception as err:
        if settings.DEBUG:
            raise err
        cohort.is_calculating = False
        cohort.errors_calculating = F("errors_calculating") + 1
        cohort.save()
        capture_exception(err)
Example #6
0
def get_flamegraphs(query_id: str) -> Dict:
    try:
        with tempfile.TemporaryDirectory() as tmpdirname:
            subprocess.run(
                [
                    CLICKHOUSE_FLAMEGRAPH_EXECUTABLE,
                    "--query-id",
                    query_id,
                    "--clickhouse-dsn",
                    f"http://{CLICKHOUSE_USER}:{CLICKHOUSE_PASSWORD}@{CLICKHOUSE_STABLE_HOST}:8123/",
                    "--console",
                    "--flamegraph-script",
                    FLAMEGRAPH_PL,
                    "--date-from",
                    "2021-01-01",
                    "--width",
                    "1900",
                ],
                cwd=tmpdirname,
                check=True,
            )

            flamegraphs = {}
            for file_path in glob.glob(join(tmpdirname, "*/*/global*.svg")):
                with open(file_path) as file:
                    flamegraphs[basename(file_path)] = file.read()

            return flamegraphs
    except Exception as err:
        capture_exception(err)
        return {}
Example #7
0
def get_internal_metrics_team_id() -> Optional[int]:
    from posthog.models.organization import Organization
    from posthog.models.team import Team

    if not settings.CAPTURE_INTERNAL_METRICS:
        return None

    try:
        with transaction.atomic():
            team = Team.objects.filter(
                organization__for_internal_metrics=True).first()

            if team is None:
                organization = Organization.objects.create(
                    name=NAME, for_internal_metrics=True)
                team = Team.objects.create(
                    name=NAME,
                    organization=organization,
                    ingested_event=True,
                    completed_snippet_onboarding=True,
                    is_demo=True,
                )

        return team.pk
    except:
        # Ignore errors during team finding/creation.
        capture_exception()

        return None
Example #8
0
def is_unchunked_snapshot(event: Dict) -> bool:
    try:
        is_snapshot = event["event"] == "$snapshot"
    except KeyError:
        raise ValueError('All events must have the event name field "event"!')
    try:
        return is_snapshot and "chunk_id" not in event["properties"]["$snapshot_data"]
    except KeyError:
        capture_exception()
        raise ValueError('$snapshot events must contain property "$snapshot_data"!')
Example #9
0
def insert_cohort_actors_into_ch(cohort: Cohort, filter_data: Dict):
    insight_type = filter_data.get("insight")
    query_builder: ActorBaseQuery

    if insight_type == INSIGHT_TRENDS:
        filter = Filter(data=filter_data, team=cohort.team)
        entity = get_target_entity(filter)
        query_builder = ClickhouseTrendsActors(cohort.team, entity, filter)
    elif insight_type == INSIGHT_STICKINESS:
        stickiness_filter = StickinessFilter(data=filter_data,
                                             team=cohort.team)
        entity = get_target_entity(stickiness_filter)
        query_builder = ClickhouseStickinessActors(cohort.team, entity,
                                                   stickiness_filter)
    elif insight_type == INSIGHT_FUNNELS:
        funnel_filter = Filter(data=filter_data, team=cohort.team)
        if funnel_filter.correlation_person_entity:
            query_builder = FunnelCorrelationActors(filter=funnel_filter,
                                                    team=cohort.team)
        else:
            funnel_actor_class = get_funnel_actor_class(funnel_filter)
            query_builder = funnel_actor_class(filter=funnel_filter,
                                               team=cohort.team)
    elif insight_type == INSIGHT_PATHS:
        path_filter = PathFilter(data=filter_data, team=cohort.team)
        query_builder = ClickhousePathsActors(path_filter,
                                              cohort.team,
                                              funnel_filter=None)
    else:
        if settings.DEBUG:
            raise ValueError(
                f"Insight type: {insight_type} not supported for cohort creation"
            )
        else:
            capture_exception(
                Exception(
                    f"Insight type: {insight_type} not supported for cohort creation"
                ))

    if query_builder.is_aggregating_by_groups:
        if settings.DEBUG:
            raise ValueError(
                f"Query type: Group based queries are not supported for cohort creation"
            )
        else:
            capture_exception(
                Exception(
                    f"Query type: Group based queries are not supported for cohort creation"
                ))
    else:
        query, params = query_builder.actor_query(limit_actors=False)

    insert_actors_into_cohort_by_query(cohort, query, params)
Example #10
0
def save_query(sql: str, params: Dict, execution_time: float) -> None:
    """
    Save query for debugging purposes
    """

    try:
        key = "save_query_{}".format(_save_query_user_id)
        queries = json.loads(get_safe_cache(key) or "[]")

        queries.insert(
            0, {"timestamp": now().isoformat(), "query": format_sql(sql, params), "execution_time": execution_time}
        )
        cache.set(key, json.dumps(queries), timeout=120)
    except Exception as e:
        capture_exception(e)
Example #11
0
def get_active_feature_flags(team: Team, distinct_id: str) -> List[str]:
    flags_enabled = []
    feature_flags = FeatureFlag.objects.filter(team=team,
                                               active=True,
                                               deleted=False).only(
                                                   "id", "team_id", "filters",
                                                   "key", "rollout_percentage")
    for feature_flag in feature_flags:
        try:
            # distinct_id will always be a string, but data can have non-string values ("Any")
            if feature_flag.distinct_id_matches(distinct_id):
                flags_enabled.append(feature_flag.key)
        except Exception as err:
            capture_exception(err)
    return flags_enabled
Example #12
0
    def process_people_in_period(
        self, filter: RetentionFilter, vals, people_dict: Dict[str, ReturnDict]
    ) -> List[Dict[str, Any]]:
        marker_length = filter.total_intervals
        result = []
        for val in vals:
            # NOTE: This try/except shouldn't be necessary but there do seem to be a handful of missing persons that can't be looked up
            try:
                result.append(
                    {"person": people_dict[val[0]], "appearances": appearance_to_markers(sorted(val[2]), marker_length)}
                )
            except Exception as e:
                capture_exception(e)
                continue

        return result
Example #13
0
    def _run_query(self, filter: Filter, entity: Entity, team_id: int) -> List[Dict[str, Any]]:
        sql, params, parse_function = self._get_sql_for_entity(filter, entity, team_id)
        try:
            result = sync_execute(sql, params)
        except Exception as e:
            capture_exception(e)
            if settings.TEST:
                raise e
            result = []
        result = parse_function(result)
        serialized_data = self._format_serialized(entity, result)

        if filter.display == TRENDS_CUMULATIVE:
            serialized_data = self._handle_cumulative(serialized_data)

        return serialized_data
Example #14
0
 def _handle_static(self, cohort: Cohort, request: Request):
     if request.FILES.get("csv"):
         self._calculate_static_by_csv(request.FILES["csv"], cohort)
     else:
         try:
             filter = Filter(request=request)
             team = request.user.team
             target_entity = get_target_entity(request)
             if filter.shown_as == TRENDS_STICKINESS:
                 stickiness_filter = StickinessFilter(
                     request=request, team=team, get_earliest_timestamp=self.earliest_timestamp_func
                 )
                 self._handle_stickiness_people(target_entity, cohort, stickiness_filter)
             else:
                 self._handle_trend_people(target_entity, cohort, filter)
         except Exception as e:
             capture_exception(e)
             raise ValueError("This cohort has no conditions")
Example #15
0
 def get_count(self, action: Action) -> Optional[int]:
     if self.context.get("view") and self.context["view"].action != "list":
         query, params = format_action_filter(action)
         if query == "":
             return None
         try:
             return sync_execute(
                 "SELECT count(1) FROM events WHERE team_id = %(team_id)s AND {}"
                 .format(query),
                 {
                     "team_id": action.team_id,
                     **params
                 },
             )[0][0]
         except Exception as e:
             capture_exception(e)
             return None
     return None
Example #16
0
    def process_people_in_period(
        self, filter: RetentionFilter, people_appearances: List[AppearanceRow], people_dict: Dict[str, ReturnDict]
    ) -> List[Dict[Literal["person", "appearances"], Any]]:
        marker_length = filter.total_intervals
        result: List[Dict[Literal["person", "appearances"], Any]] = []
        for person in people_appearances:
            # NOTE: This try/except shouldn't be necessary but there do seem to be a handful of missing persons that can't be looked up
            try:
                result.append(
                    {
                        "person": people_dict[person.person_id],
                        "appearances": appearance_to_markers(sorted(person.appearances), marker_length),
                    }
                )
            except Exception as e:
                capture_exception(e)
                continue

        return result
Example #17
0
def _capture(metric_name: str, value: Any, tags: Tags):
    from posthog.api.capture import capture_internal

    try:
        team_id = get_internal_metrics_team_id()
        if team_id is not None:
            now = timezone.now()
            distinct_id = utils.get_machine_id()
            event = {
                "event": f"$${metric_name}",
                "properties": {
                    "value": value,
                    **(tags or {})
                }
            }
            capture_internal(event, distinct_id, None, None, now, now, team_id)
    except Exception as err:
        # Ignore errors, this is not important enough to fail API on
        capture_exception(err)
Example #18
0
    def send_message(self,
                     message: str,
                     channel: str = None,
                     thread_id: str = None) -> str:
        ''' Sends a message to the passed Slack channel and returns the thread ID. Posts as a reply if  `thread_id` is passed'''

        # Fall back on the default channel if a channel name isn't immediately passed
        if not channel:
            channel = self.default_channel

        # Ensure the channel name starts with '#'
        if channel[0] != '#':
            channel = f'#{channel}'

        try:
            # Send the message to the channel
            response = self._client.api_call(
                api_method='chat.postMessage',
                json={
                    'channel': channel,
                    'text': message,
                    **({} if not thread_id else {
                           'thread_ts': thread_id
                       })
                },
            )

            # Log errors
            if not response or 'error' in response:
                capture_exception(
                    Exception(f"Slack logging error: {response['error']}"))
                return None

            # Return the thread ID
            return response.get('ts')

        # Catch and log any exceptions thrown when logging
        except Exception as e:
            capture_exception(e)
Example #19
0
def save_query(sql: str, execution_time: float) -> None:
    """
    Save query for debugging purposes
    """
    if _request_information is None:
        return

    try:
        key = "save_query_{}".format(_request_information["user_id"])
        queries = json.loads(get_safe_cache(key) or "[]")

        queries.insert(
            0,
            {
                "timestamp": now().isoformat(),
                "query": format_sql(sql, colorize=False),
                "execution_time": execution_time,
            },
        )
        cache.set(key, json.dumps(queries), timeout=120)
    except Exception as e:
        capture_exception(e)
Example #20
0
 def handle(self, span: Span) -> None:
     if span.get_error() is not None:
         capture_exception(span.get_error())
Example #21
0
def setup_periodic_tasks(sender: Celery, **kwargs):
    if not settings.DEBUG:
        sender.add_periodic_task(1.0,
                                 redis_celery_queue_depth.s(),
                                 name="1 sec queue probe",
                                 priority=0)
    # Heartbeat every 10sec to make sure the worker is alive
    sender.add_periodic_task(10.0,
                             redis_heartbeat.s(),
                             name="10 sec heartbeat",
                             priority=0)

    # Update events table partitions twice a week
    sender.add_periodic_task(
        crontab(day_of_week="mon,fri", hour=0, minute=0),
        update_event_partitions.s(),  # check twice a week
    )

    # Send weekly status report on self-hosted instances
    if not getattr(settings, "MULTI_TENANCY", False):
        sender.add_periodic_task(crontab(day_of_week="mon", hour=0, minute=0),
                                 status_report.s())

    # Cloud (posthog-cloud) cron jobs
    if getattr(settings, "MULTI_TENANCY", False):
        sender.add_periodic_task(
            crontab(hour=0, minute=0),
            calculate_billing_daily_usage.s())  # every day midnight UTC

    sender.add_periodic_task(crontab(day_of_week="fri", hour=0, minute=0),
                             clean_stale_partials.s())

    # delete old plugin logs every 4 hours
    sender.add_periodic_task(crontab(minute=0, hour="*/4"),
                             delete_old_plugin_logs.s())

    # sync all Organization.available_features every hour
    sender.add_periodic_task(crontab(minute=30, hour="*"),
                             sync_all_organization_available_features.s())

    sender.add_periodic_task(UPDATE_CACHED_DASHBOARD_ITEMS_INTERVAL_SECONDS,
                             check_cached_items.s(),
                             name="check dashboard items")

    sender.add_periodic_task(crontab(minute="*/15"),
                             check_async_migration_health.s())

    sender.add_periodic_task(
        crontab(
            hour=0, minute=randrange(0, 40)
        ),  # every day at a random minute past midnight. Sends data from the preceding whole day.
        send_org_usage_report.s(),
        name="send event usage report",
    )

    sender.add_periodic_task(120,
                             clickhouse_lag.s(),
                             name="clickhouse table lag")
    sender.add_periodic_task(120,
                             clickhouse_row_count.s(),
                             name="clickhouse events table row count")
    sender.add_periodic_task(120,
                             clickhouse_part_count.s(),
                             name="clickhouse table parts count")
    sender.add_periodic_task(120,
                             clickhouse_mutation_count.s(),
                             name="clickhouse table mutations count")

    sender.add_periodic_task(crontab(minute=0, hour="*"),
                             calculate_cohort_ids_in_feature_flags_task.s())

    sender.add_periodic_task(
        crontab(hour=0, minute=randrange(0, 40)),
        clickhouse_send_license_usage.s()
    )  # every day at a random minute past midnight. Randomize to avoid overloading license.posthog.com
    try:
        from ee.settings import MATERIALIZE_COLUMNS_SCHEDULE_CRON

        minute, hour, day_of_month, month_of_year, day_of_week = MATERIALIZE_COLUMNS_SCHEDULE_CRON.strip(
        ).split(" ")

        sender.add_periodic_task(
            crontab(
                minute=minute,
                hour=hour,
                day_of_month=day_of_month,
                month_of_year=month_of_year,
                day_of_week=day_of_week,
            ),
            clickhouse_materialize_columns.s(),
            name="clickhouse materialize columns",
        )

        sender.add_periodic_task(
            crontab(hour="*/4", minute=0),
            clickhouse_mark_all_materialized.s(),
            name="clickhouse mark all columns as materialized",
        )
    except Exception as err:
        capture_exception(err)
        print(f"Scheduling materialized column task failed: {err}")

    sender.add_periodic_task(120,
                             calculate_cohort.s(),
                             name="recalculate cohorts")

    if settings.ASYNC_EVENT_PROPERTY_USAGE:
        sender.add_periodic_task(
            EVENT_PROPERTY_USAGE_INTERVAL_SECONDS,
            calculate_event_property_usage.s(),
            name="calculate event property usage",
        )
Example #22
0
 def validate_filters(self, value):
     # :KLUDGE: Debug code to track down the cause of blank dashboards
     if len(value) == 0 or ("from_dashboard" in value and len(value) == 1):
         capture_exception(
             Exception("Saving dashbord_item with blank filters"))
     return value
Example #23
0
def get_event(request):
    timer = statsd.timer("posthog_cloud_event_endpoint").start()
    now = timezone.now()

    data, error_response = get_data(request)

    if error_response:
        return error_response

    sent_at = _get_sent_at(data, request)

    token = get_token(data, request)

    if not token:
        return cors_response(
            request,
            generate_exception_response(
                "capture",
                "API key not provided. You can find your project API key in PostHog project settings.",
                type="authentication_error",
                code="missing_api_key",
                status_code=status.HTTP_401_UNAUTHORIZED,
            ),
        )

    ingestion_context, db_error, error_response = get_event_ingestion_context(
        request, data, token)

    if error_response:
        return error_response

    send_events_to_dead_letter_queue = False
    if db_error:
        send_events_to_dead_letter_queue = True

    if isinstance(data, dict):
        if data.get("batch"):  # posthog-python and posthog-ruby
            data = data["batch"]
            assert data is not None
        elif "engage" in request.path_info:  # JS identify call
            data["event"] = "$identify"  # make sure it has an event name

    if isinstance(data, list):
        events = data
    else:
        events = [data]

    try:
        events = preprocess_session_recording_events(events)
    except ValueError as e:
        return cors_response(
            request,
            generate_exception_response("capture",
                                        f"Invalid payload: {e}",
                                        code="invalid_payload"))

    site_url = request.build_absolute_uri("/")[:-1]

    ip = None if not ingestion_context or ingestion_context.anonymize_ips else get_ip_address(
        request)
    for event in events:
        event_uuid = UUIDT()
        distinct_id = get_distinct_id(event)
        if not distinct_id:
            continue

        payload_uuid = event.get("uuid", None)
        if payload_uuid:
            if UUIDT.is_valid_uuid(payload_uuid):
                event_uuid = UUIDT(uuid_str=payload_uuid)
            else:
                statsd.incr("invalid_event_uuid")

        event = parse_event(event, distinct_id, ingestion_context)
        if not event:
            continue

        if send_events_to_dead_letter_queue:
            kafka_event = parse_kafka_event_data(
                distinct_id=distinct_id,
                ip=None,
                site_url=site_url,
                team_id=None,
                now=now,
                event_uuid=event_uuid,
                data=event,
                sent_at=sent_at,
            )

            log_event_to_dead_letter_queue(
                data,
                event["event"],
                kafka_event,
                f"Unable to fetch team from Postgres. Error: {db_error}",
                "django_server_capture_endpoint",
            )
            continue

        try:
            capture_internal(event, distinct_id, ip, site_url, now, sent_at,
                             ingestion_context.team_id,
                             event_uuid)  # type: ignore
        except Exception as e:
            timer.stop()
            capture_exception(e, {"data": data})
            statsd.incr(
                "posthog_cloud_raw_endpoint_failure",
                tags={
                    "endpoint": "capture",
                },
            )
            return cors_response(
                request,
                generate_exception_response(
                    "capture",
                    "Unable to store event. Please try again. If you are the owner of this app you can check the logs for further details.",
                    code="server_error",
                    type="server_error",
                    status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
                ),
            )

    timer.stop()
    statsd.incr(
        "posthog_cloud_raw_endpoint_success",
        tags={
            "endpoint": "capture",
        },
    )
    return cors_response(request, JsonResponse({"status": 1}))