def track_trends_person_property_breakdown(self): filter = Filter( data={"events": [{"id": "$pageview"}], "breakdown": "$browser", "breakdown_type": "person", **DATE_RANGE,} ) with no_materialized_columns(): Trends().run(filter, self.team)
def filter_persons(team_id: int, request: request.Request, queryset: QuerySet) -> QuerySet: if request.GET.get("id"): ids = request.GET["id"].split(",") queryset = queryset.filter(id__in=ids) if request.GET.get("uuid"): uuids = request.GET["uuid"].split(",") queryset = queryset.filter(uuid__in=uuids) if request.GET.get("search"): parts = request.GET["search"].split(" ") contains = [] for part in parts: if ":" in part: matcher, key = part.split(":") if matcher == "has": # Matches for example has:email or has:name queryset = queryset.filter(properties__has_key=key) else: contains.append(part) queryset = queryset.filter( Q(properties__icontains=" ".join(contains)) | Q(persondistinctid__distinct_id__icontains=" ".join(contains)) ).distinct("id") if request.GET.get("cohort"): queryset = queryset.filter(cohort__id=request.GET["cohort"]) if request.GET.get("properties"): filter = Filter( data={"properties": json.loads(request.GET["properties"])}) queryset = queryset.filter( properties_to_Q(filter.properties, team_id=team_id)) queryset = queryset.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) return queryset
def get_filter(team, data: dict = {}, request: Optional[HttpRequest] = None): from posthog.models.event import Event from posthog.models.filters.filter import Filter from posthog.models.filters.retention_filter import RetentionFilter from posthog.models.filters.sessions_filter import SessionsFilter from posthog.models.filters.stickiness_filter import StickinessFilter insight = data.get("insight") if not insight and request: insight = request.GET.get("insight") if insight == INSIGHT_RETENTION: return RetentionFilter(data={ **data, "insight": INSIGHT_RETENTION }, request=request) elif insight == INSIGHT_SESSIONS: return SessionsFilter(data={ **data, "insight": INSIGHT_SESSIONS }, request=request) elif insight == INSIGHT_TRENDS and data.get("shown_as") == "Stickiness": earliest_timestamp_func = lambda team_id: Event.objects.earliest_timestamp( team_id) return StickinessFilter(data=data, request=request, team=team, get_earliest_timestamp=earliest_timestamp_func) elif insight == INSIGHT_PATHS: return PathFilter(data={ **data, "insight": INSIGHT_PATHS }, request=request) return Filter(data=data, request=request)
def __init__( self, filter: Filter, team: Team, feature_flag: FeatureFlag, experiment_start_date: datetime, experiment_end_date: Optional[datetime] = None, ): breakdown_key = f"$feature/{feature_flag.key}" variants = [variant["key"] for variant in feature_flag.variants] query_filter = filter.with_data( { "date_from": experiment_start_date, "date_to": experiment_end_date, "breakdown": breakdown_key, "breakdown_type": "event", "properties": [{"key": breakdown_key, "value": variants, "operator": "exact", "type": "event"}], # :TRICKY: We don't use properties set on filters, instead using experiment variant options } ) self.team = team if query_filter.insight == INSIGHT_TRENDS: query_filter = query_filter.with_data({"display": TRENDS_CUMULATIVE}) self.query_filter = query_filter
def track_correlations_by_events(self): filter = Filter( data={"events": [{"id": "user signed up"}, {"id": "insight analyzed"}], **SHORT_DATE_RANGE,}, team=self.team, ) FunnelCorrelation(filter, self.team).run()
def __init__( self, filter: Filter, team: Team, feature_flag: FeatureFlag, experiment_start_date: datetime, experiment_end_date: Optional[datetime] = None, funnel_class: Type[ClickhouseFunnel] = ClickhouseFunnel, ): breakdown_key = f"$feature/{feature_flag.key}" variants = [variant["key"] for variant in feature_flag.variants] query_filter = filter.with_data({ "date_from": experiment_start_date, "date_to": experiment_end_date, "breakdown": breakdown_key, "breakdown_type": "event", "properties": [{ "key": breakdown_key, "value": variants, "operator": "exact", "type": "event" }], # :TRICKY: We don't use properties set on filters, instead using experiment variant options }) self.funnel = funnel_class(query_filter, team)
def track_lifecycle_event_property_filter_materialized(self): filter = Filter( data={ "insight": "LIFECYCLE", "events": [{"id": "$pageview", "type": "events"}], "interval": "week", "shown_as": "Lifecycle", "date_from": "-14d", "properties": [ { "key": "$host", "operator": "is_not", "value": [ "localhost:8000", "localhost:5000", "127.0.0.1:8000", "127.0.0.1:3000", "localhost:3000", ], } ], **DATE_RANGE, }, team=self.team, ) Trends().run(filter, self.team)
def track_lifecycle_person_property_filter_materialized(self): filter = Filter( data={ "insight": "LIFECYCLE", "events": [{ "id": "$pageview", "type": "events" }], "interval": "week", "shown_as": "Lifecycle", "date_from": "-14d", "properties": [{ "key": "email", "operator": "icontains", "value": ".com", "type": "person" }], **DATE_RANGE, }, team=self.team, ) ClickhouseTrends().run(filter, self.team)
def insert_cohort_from_query(cohort_id: int, insight_type: str, filter_data: Dict[str, Any], entity_data: Dict[str, Any]) -> None: if is_ee_enabled(): from ee.clickhouse.queries.clickhouse_stickiness import insert_stickiness_people_into_cohort from ee.clickhouse.queries.util import get_earliest_timestamp from ee.clickhouse.views.actions import insert_entity_people_into_cohort from ee.clickhouse.views.cohort import insert_cohort_people_into_pg from posthog.models.entity import Entity from posthog.models.filters.filter import Filter from posthog.models.filters.stickiness_filter import StickinessFilter cohort = Cohort.objects.get(pk=cohort_id) entity = Entity(data=entity_data) if insight_type == INSIGHT_STICKINESS: _stickiness_filter = StickinessFilter( data=filter_data, team=cohort.team, get_earliest_timestamp=get_earliest_timestamp) insert_stickiness_people_into_cohort(cohort, entity, _stickiness_filter) else: _filter = Filter(data=filter_data) insert_entity_people_into_cohort(cohort, entity, _filter) insert_cohort_people_into_pg(cohort=cohort)
def secondary_results(self, request: Request, *args: Any, **kwargs: Any) -> Response: experiment: Experiment = self.get_object() if not experiment.secondary_metrics: raise ValidationError("Experiment has no secondary metrics") metric_id = request.query_params.get("id") if not metric_id: raise ValidationError("Secondary metric id is required") try: parsed_id = int(metric_id) except ValueError: raise ValidationError("Secondary metric id must be an integer") if parsed_id > len(experiment.secondary_metrics): raise ValidationError("Invalid metric ID") filter = Filter(experiment.secondary_metrics[parsed_id]["filters"]) result = ClickhouseSecondaryExperimentResult( filter, self.team, experiment.feature_flag, experiment.start_date, experiment.end_date, ).get_results() return Response(result)
def determine_compared_filter(filter): if not filter.date_to or not filter.date_from: raise ValueError("You need date_from and date_to to compare") date_from, date_to = get_compare_period_dates(filter.date_from, filter.date_to) compared_filter = Filter( data={**filter._data, "date_from": date_from.date().isoformat(), "date_to": date_to.date().isoformat()} ) return compared_filter
def track_trends_filter_by_action_current_url_materialized(self): action = Action.objects.create(team=self.team, name="docs view") ActionStep.objects.create( action=action, event="$pageview", url="docs", url_matching="contains", ) filter = Filter(data={"actions": [{"id": action.id}], **DATE_RANGE}, team=self.team) ClickhouseTrends().run(filter, self.team)
def track_trends_dau(self): filter = Filter(data={ "events": [{ "id": "$pageview", "math": "dau" }], **DATE_RANGE, }) ClickhouseTrends().run(filter, self.team)
def track_trends_filter_by_action_with_person_filters_materialized(self): action = Action.objects.create(team=self.team, name=".com-users page views") ActionStep.objects.create( action=action, event="$pageview", properties=[{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}], ) filter = Filter(data={"actions": [{"id": action.id}], **DATE_RANGE}, team=self.team) ClickhouseTrends().run(filter, self.team)
def get_filter(team, data: dict = {}, request: Optional[Request] = None): from posthog.models.filters.filter import Filter from posthog.models.filters.retention_filter import RetentionFilter from posthog.models.filters.sessions_filter import SessionsFilter from posthog.models.filters.stickiness_filter import StickinessFilter insight = data.get("insight") if not insight and request: insight = request.GET.get("insight") or request.data.get("insight") if insight == INSIGHT_RETENTION: return RetentionFilter(data={ **data, "insight": INSIGHT_RETENTION }, request=request, team=team) elif insight == INSIGHT_SESSIONS: return SessionsFilter(data={ **data, "insight": INSIGHT_SESSIONS }, request=request, team=team) elif insight == INSIGHT_STICKINESS or (insight == INSIGHT_TRENDS and data.get("shown_as") == "Stickiness"): return StickinessFilter(data=data, request=request, team=team, get_earliest_timestamp=earliest_timestamp_func) elif insight == INSIGHT_PATHS: return PathFilter(data={ **data, "insight": INSIGHT_PATHS }, request=request, team=team) elif insight == INSIGHT_FUNNELS: return Filter(data={ **data, **(request.data if request else {}), "insight": INSIGHT_FUNNELS }, request=request, team=team) return Filter(data=data, request=request, team=team)
def track_trends_person_property_filter_materialized(self): filter = Filter( data={ "events": [{"id": "$pageview"}], "properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}], **DATE_RANGE, } ) ClickhouseTrends().run(filter, self.team)
def setUpTestData(cls): super().setUpTestData() cls.dashboard = Dashboard.objects.create(team=cls.team, name="example dashboard", created_by=cls.user) cls.insight = Insight.objects.create( filters=Filter(data=cls.insight_filter_dict).to_dict(), team=cls.team, created_by=cls.user)
def track_funnel_normal(self): filter = Filter( data={ "insight": "FUNNELS", "events": [{"id": "user signed up", "order": 0}, {"id": "insight analyzed", "order": 1}], **DATE_RANGE, }, team=self.team, ) ClickhouseFunnel(filter, self.team).run()
def track_trends_event_property_breakdown_materialized(self): filter = Filter(data={ "events": [{ "id": "$pageview" }], "breakdown": "$host", **DATE_RANGE, }) ClickhouseTrends().run(filter, self.team)
def test_get_target_entity(self): request = lambda url: cast(Any, RequestFactory().get(url)) filter = Filter( data={ "entity_id": "$pageview", "entity_type": "events", "events": [{ "id": "$pageview", "type": "events" }], }) entity = get_target_entity(filter) assert entity.id == "$pageview" assert entity.type == "events" assert entity.math is None filter = Filter( data={ "entity_id": "$pageview", "entity_type": "events", "entity_math": "unique_group", "events": [ { "id": "$pageview", "type": "events", "math": "unique_group" }, { "id": "$pageview", "type": "events" }, ], }) entity = get_target_entity(filter) assert entity.id == "$pageview" assert entity.type == "events" assert entity.math == "unique_group"
def track_trends_dau_person_property_filter(self): filter = Filter( data={ "events": [{"id": "$pageview", "math": "dau"}], "properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}], **DATE_RANGE, } ) with no_materialized_columns(): Trends().run(filter, self.team)
def __init__(self, filter: Filter, team: Team, base_uri: str = "/") -> None: # Filtering on persons / groups properties can be pushed down to funnel_actors CTE new_correlation_filter = filter.with_data( { "properties": filter.property_groups.combine_properties( PropertyOperatorType.AND, filter.correlation_property_values or [] ).to_dict() } ) self._funnel_correlation = FunnelCorrelation(new_correlation_filter, team, base_uri=base_uri) super().__init__(team, filter)
def track_correlations_by_event_properties_materialized(self): filter = Filter( data={ "events": [{"id": "user signed up"}, {"id": "insight analyzed"}], **SHORT_DATE_RANGE, "funnel_correlation_type": FunnelCorrelationType.EVENT_WITH_PROPERTIES, "funnel_correlation_event_names": ["$autocapture"], }, team=self.team, ) FunnelCorrelation(filter, self.team).run()
def track_correlations_by_properties(self): filter = Filter( data={ "events": [{"id": "user signed up"}, {"id": "insight analyzed"}], **SHORT_DATE_RANGE, "funnel_correlation_type": FunnelCorrelationType.PROPERTIES, "funnel_correlation_names": ["$browser"], }, team=self.team, ) with no_materialized_columns(): FunnelCorrelation(filter, self.team).run()
def test_test_account_filters_with_groups(self): self.team.test_account_filters = [ {"key": "key", "type": "group", "value": "value", "group_type_index": 0}, ] self.team.save() GroupTypeMapping.objects.create(team=self.team, group_type="organization", group_type_index=0) create_group(self.team.pk, group_type_index=0, group_key="in", properties={"key": "value"}) create_group(self.team.pk, group_type_index=0, group_key="out", properties={"key": "othervalue"}) with freeze_time("2020-01-11T12:00:00Z"): Person.objects.create(distinct_ids=["person1"], team_id=self.team.pk) with freeze_time("2020-01-09T12:00:00Z"): Person.objects.create(distinct_ids=["person2"], team_id=self.team.pk) journeys_for( { "person1": [ {"event": "$pageview", "timestamp": datetime(2020, 1, 11, 12), "properties": {"$group_0": "out"},}, ], "person2": [ {"event": "$pageview", "timestamp": datetime(2020, 1, 9, 12), "properties": {"$group_0": "in"},}, {"event": "$pageview", "timestamp": datetime(2020, 1, 12, 12), "properties": {"$group_0": "in"},}, {"event": "$pageview", "timestamp": datetime(2020, 1, 15, 12), "properties": {"$group_0": "in"},}, ], }, self.team, ) result = ClickhouseTrends().run( Filter( data={ "date_from": "2020-01-12T00:00:00Z", "date_to": "2020-01-19T00:00:00Z", "events": [{"id": "$pageview", "type": "events", "order": 0}], "shown_as": TRENDS_LIFECYCLE, FILTER_TEST_ACCOUNTS: True, }, team=self.team, ), self.team, ) self.assertLifecycleResults( result, [ {"status": "dormant", "data": [0, -1, 0, 0, -1, 0, 0, 0]}, {"status": "new", "data": [0, 0, 0, 0, 0, 0, 0, 0]}, {"status": "resurrecting", "data": [1, 0, 0, 1, 0, 0, 0, 0]}, {"status": "returning", "data": [0, 0, 0, 0, 0, 0, 0, 0]}, ], )
def track_trends_filter_by_cohort_precalculated(self): self.cohort.last_calculation = now() self.cohort.save() filter = Filter( data={ "events": [{"id": "$pageview"}], "properties": [{"key": "id", "value": self.cohort.pk, "type": "cohort"}], **DATE_RANGE, }, team=self.team, ) ClickhouseTrends().run(filter, self.team)
def track_lifecycle(self): filter = Filter( data={ "insight": "LIFECYCLE", "events": [{"id": "$pageview", "type": "events"}], "interval": "week", "shown_as": "Lifecycle", "date_from": "-14d", **DATE_RANGE, }, team=self.team, ) Trends().run(filter, self.team)
def track_trends_filter_by_cohort(self): self.cohort.last_calculation = None self.cohort.save() filter = Filter( data={ "events": [{"id": "$pageview"}], "properties": [{"key": "id", "value": self.cohort.pk, "type": "cohort"}], **DATE_RANGE, }, team=self.team, ) with no_materialized_columns(): ClickhouseTrends().run(filter, self.team)
def _run(self, extra: Dict = {}, run_at: Optional[str] = None): with freeze_time(run_at or "2020-01-04T13:01:01Z"): action_response = ClickhouseTrends().run( Filter( data={ "events": [ {"id": "session start", "math": "sum", "math_property": "session duration"}, {"id": "session start", "math": "avg", "math_property": "session duration"}, ], "formula": "A + B", **extra, } ), self.team, ) return action_response
def _handle_static(self, cohort: Cohort, request: Request): if request.FILES.get("csv"): self._calculate_static_by_csv(request.FILES["csv"], cohort) else: try: filter = Filter(request=request) team = request.user.team target_entity = get_target_entity(request) if filter.shown_as == TRENDS_STICKINESS: stickiness_filter = StickinessFilter( request=request, team=team, get_earliest_timestamp=self.earliest_timestamp_func ) self._handle_stickiness_people(target_entity, cohort, stickiness_filter) else: self._handle_trend_people(target_entity, cohort, filter) except Exception as e: capture_exception(e) raise ValueError("This cohort has no conditions")