Example #1
0
    def calculate_list(self, filter: Filter, team: Team, limit: int, offset: int):
        filters, params = parse_prop_clauses("uuid", filter.properties, team)

        if not filter._date_from:
            filter._date_from = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0)
        if not filter._date_to and filter.date_from:
            filter._date_to = filter.date_from + relativedelta(days=1)

        date_from, date_to = parse_timestamps(filter)
        params = {**params, "team_id": team.pk, "limit": limit, "offset": offset, "distinct_id_limit": limit + offset}
        query = SESSION_SQL.format(
            date_from=date_from,
            date_to=date_to,
            filters="{}".format(filters) if filter.properties else "",
            sessions_limit="LIMIT %(offset)s, %(limit)s",
        )
        query_result = sync_execute(query, params)
        result = self._parse_list_results(query_result)

        self._add_person_properties(team, result)
        add_session_recording_ids(team, result)

        return result
Example #2
0
 def test_old_style_properties(self):
     filter = Filter(data={
         'properties': {
             '$browser__is_not': 'IE7',
             '$OS': 'Mac',
         }
     })
     self.assertEqual(filter.properties[0].key, '$browser')
     self.assertEqual(filter.properties[0].operator, 'is_not')
     self.assertEqual(filter.properties[0].value, 'IE7')
     self.assertEqual(filter.properties[0].type, 'event')
     self.assertEqual(filter.properties[1].key, '$OS')
     self.assertEqual(filter.properties[1].operator, None)
     self.assertEqual(filter.properties[1].value, 'Mac')
Example #3
0
    def test_retention_action_start_point(self):
        person1 = Person.objects.create(team=self.team,
                                        distinct_ids=["person1", "alias1"])
        person2 = Person.objects.create(team=self.team,
                                        distinct_ids=["person2"])

        action = self._create_signup_actions([
            ("person1", self._date(0)),
            ("person1", self._date(1)),
            ("person1", self._date(2)),
            ("person1", self._date(5)),
            ("alias1", self._date(5, 9)),
            ("person1", self._date(6)),
            ("person2", self._date(1)),
            ("person2", self._date(2)),
            ("person2", self._date(3)),
            ("person2", self._date(6)),
        ])

        start_entity = Entity({
            "id": action.pk,
            "type": TREND_FILTER_TYPE_ACTIONS
        })
        result = Retention().run(
            Filter(data={
                "date_from": self._date(0, hour=0),
                "entities": [start_entity]
            }),
            self.team,
            total_days=7)

        self.assertEqual(len(result), 7)
        self.assertEqual(
            self.pluck(result, "label"),
            ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"],
        )
        self.assertEqual(result[0]["date"], "Wed. 10 June")

        self.assertEqual(
            self.pluck(result, "values", "count"),
            [
                [1, 1, 1, 0, 0, 1, 1],
                [2, 2, 1, 0, 1, 2],
                [2, 1, 0, 1, 2],
                [1, 0, 0, 1],
                [0, 0, 0],
                [1, 1],
                [2],
            ],
        )
Example #4
0
    def test_trends_compare(self):
        self._create_events()
        with freeze_time("2020-01-04T13:00:01Z"):
            response = Trends().run(
                Filter(data={
                    "compare": "true",
                    "events": [{
                        "id": "sign up"
                    }]
                }), self.team)

        self.assertEqual(response[0]["label"], "sign up - current")
        self.assertEqual(response[0]["labels"][4], "day 4")
        self.assertEqual(response[0]["data"][4], 3.0)
        self.assertEqual(response[0]["labels"][5], "day 5")
        self.assertEqual(response[0]["data"][5], 1.0)

        self.assertEqual(response[1]["label"], "sign up - previous")
        self.assertEqual(response[1]["labels"][4], "day 4")
        self.assertEqual(response[1]["data"][4], 1.0)
        self.assertEqual(response[1]["labels"][5], "day 5")
        self.assertEqual(response[1]["data"][5], 0.0)

        with freeze_time("2020-01-04T13:00:01Z"):
            no_compare_response = Trends().run(
                Filter(data={
                    "compare": "false",
                    "events": [{
                        "id": "sign up"
                    }]
                }), self.team)

        self.assertEqual(no_compare_response[0]["label"], "sign up")
        self.assertEqual(no_compare_response[0]["labels"][4], "Wed. 1 January")
        self.assertEqual(no_compare_response[0]["data"][4], 3.0)
        self.assertEqual(no_compare_response[0]["labels"][5], "Thu. 2 January")
        self.assertEqual(no_compare_response[0]["data"][5], 1.0)
Example #5
0
    def test_retention_with_properties(self):
        person1 = Person.objects.create(
            team=self.team,
            distinct_ids=["person1", "alias1"],
            properties={"email": "*****@*****.**"})
        person2 = Person.objects.create(
            team=self.team,
            distinct_ids=["person2"],
            properties={"email": "*****@*****.**"})

        self._create_pageviews([
            ("person1", self._date(0)),
            ("person1", self._date(1)),
            ("person1", self._date(2)),
            ("person1", self._date(5)),
            ("alias1", self._date(5, 9)),
            ("person1", self._date(6)),
            ("person2", self._date(1)),
            ("person2", self._date(2)),
            ("person2", self._date(3)),
            ("person2", self._date(6)),
        ])

        result = Retention().run(
            Filter(
                data={
                    "properties": [{
                        "key": "email",
                        "value": "*****@*****.**",
                        "type": "person"
                    }],
                    "date_from":
                    self._date(0, hour=0),
                }),
            self.team,
            total_days=7,
        )

        self.assertEqual(len(result), 7)
        self.assertEqual(
            self.pluck(result, "label"),
            ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"],
        )
        self.assertEqual(result[0]["date"], "Wed. 10 June")
        self.assertEqual(
            self.pluck(result, "values", "count"),
            [[1, 1, 1, 0, 0, 1, 1], [1, 1, 0, 0, 1, 1], [1, 0, 0, 1, 1],
             [0, 0, 0, 0], [0, 0, 0], [1, 1], [1]],
        )
Example #6
0
def format_action_filter(action: Action,
                         prepend: str = "action",
                         index=0,
                         use_loop: bool = False) -> Tuple[str, Dict]:
    # get action steps
    params = {"team_id": action.team.pk}
    steps = action.steps.all()
    if len(steps) == 0:
        # If no steps, it shouldn't match this part of the query
        return "1=2", {}

    or_queries = []
    for index, step in enumerate(steps):
        conditions: List[str] = []
        # filter element
        if step.event == AUTOCAPTURE_EVENT:
            el_conditions, element_params = filter_element(
                step, "{}{}".format(index, prepend))
            params = {**params, **element_params}
            conditions += el_conditions

        # filter event conditions (ie URL)
        event_conditions, event_params = filter_event(
            step, "{}{}".format(index, prepend), index)
        params = {**params, **event_params}
        conditions += event_conditions

        if step.properties:
            from ee.clickhouse.models.property import parse_prop_clauses

            prop_query, prop_params = parse_prop_clauses(
                Filter(data={
                    "properties": step.properties
                }).properties,
                action.team.pk,
                prepend="action_props_{}".format(index),
            )
            conditions.append(prop_query.replace("AND", "", 1))
            params = {**params, **prop_params}

        if len(conditions) > 0:
            or_queries.append(" AND ".join(conditions))
    if use_loop:
        formatted_query = "SELECT uuid FROM events WHERE {} AND team_id = %(team_id)s".format(
            ") OR uuid IN (SELECT uuid FROM events WHERE team_id = %(team_id)s AND "
            .join(or_queries))
    else:
        formatted_query = "(({}))".format(") OR (".join(or_queries))
    return formatted_query, params
Example #7
0
def get_actions(queryset: QuerySet, params: dict, team_id: int) -> QuerySet:
    if params.get(TREND_FILTER_TYPE_ACTIONS):
        queryset = queryset.filter(pk__in=[
            action.id for action in Filter({
                "actions":
                json.loads(params.get("actions", "[]"))
            }).actions
        ])

    if params.get("include_count"):
        queryset = queryset.annotate(count=Count(TREND_FILTER_TYPE_EVENTS))

    queryset = queryset.prefetch_related(
        Prefetch("steps", queryset=ActionStep.objects.order_by("id")))
    return queryset.filter(team_id=team_id).order_by("-id")
Example #8
0
        def test_regex(self):
            event1 = event_factory(team=self.team,
                                   distinct_id="test",
                                   event="$pageview")
            event2 = event_factory(
                team=self.team,
                event="$pageview",
                distinct_id="test",
                properties={"$current_url": "https://whatever.com"},
            )
            filter = Filter(
                data={"properties": {
                    "$current_url__regex": "\.com$"
                }})
            events = filter_events(filter, self.team)
            self.assertEqual(events[0]["id"], event2.pk)

            filter = Filter(
                data={"properties": {
                    "$current_url__not_regex": "\.eee$"
                }})
            events = filter_events(filter, self.team, order_by="timestamp")
            self.assertEqual(events[0]["id"], event1.pk)
            self.assertEqual(events[1]["id"], event2.pk)
Example #9
0
    def calculate_trends(self, request: request.Request) -> List[Dict[str, Any]]:
        team = self.team
        filter = Filter(request=request)
        if filter.shown_as == TRENDS_STICKINESS:
            earliest_timestamp_func = lambda team_id: Event.objects.earliest_timestamp(team_id)
            stickiness_filter = StickinessFilter(
                request=request, team=team, get_earliest_timestamp=earliest_timestamp_func
            )
            result = stickiness.Stickiness().run(stickiness_filter, team)
        else:
            result = trends.Trends().run(filter, team)

        self._refresh_dashboard(request=request)

        return result
Example #10
0
        def test_basic_results(self):
            """
            The `skip_results` query parameter can be passed so that only a list of objects is returned, without
            the actual query data. This can speed things up if it's not needed.
            """
            filter_dict = {
                "events": [{"id": "$pageview"}],
            }

            DashboardItem.objects.create(
                filters=Filter(data=filter_dict).to_dict(), team=self.team, short_id="12345678",
            )
            DashboardItem.objects.create(
                filters=Filter(data=filter_dict).to_dict(), team=self.team, saved=True,
            )

            response = self.client.get("/api/insight/?basic=true")
            self.assertEqual(response.status_code, status.HTTP_200_OK)

            self.assertEqual(len(response.json()["results"]), 2)
            self.assertEqual(
                list(response.json()["results"][0].keys()),
                ["id", "short_id", "name", "filters", "dashboard", "color", "last_refresh", "refreshing", "saved"],
            )
Example #11
0
def get_action_tables_and_properties(
        action: Action) -> Counter[PropertyIdentifier]:
    from ee.clickhouse.models.property import extract_tables_and_properties

    result: Counter[PropertyIdentifier] = Counter()

    for action_step in action.steps.all():
        if action_step.url:
            result[("$current_url", "event", None)] += 1
        result += extract_tables_and_properties(
            Filter(data={
                "properties": action_step.properties or []
            }).properties)

    return result
Example #12
0
    def _calculate_trends(self,
                          request: request.Request) -> List[Dict[str, Any]]:
        team = request.user.team
        filter = Filter(request=request)
        if filter.shown_as == "Stickiness":
            result = stickiness.Stickiness().run(filter, team)
        else:
            result = trends.Trends().run(filter, team)

        dashboard_id = request.GET.get("from_dashboard", None)
        if dashboard_id:
            DashboardItem.objects.filter(pk=dashboard_id).update(
                last_refresh=now())

        return result
Example #13
0
    def trend(self, request: request.Request, *args: Any, **kwargs: Any) -> Response:
        try:
            serializer = TrendSerializer(request=request)
            serializer.is_valid(raise_exception=True)
        except Exception as e:
            capture_exception(e)

        result = self.calculate_trends(request)
        filter = Filter(request=request, team=self.team)
        next = (
            format_paginated_url(request, filter.offset, BREAKDOWN_VALUES_LIMIT)
            if len(result["result"]) >= BREAKDOWN_VALUES_LIMIT
            else None
        )
        return Response({**result, "next": next})
Example #14
0
        def test_is_not_persons(self):
            person_factory(team_id=self.team.pk, distinct_ids=["p1"], properties={"url": "https://whatever.com"})
            p2_uuid = person_factory(
                team_id=self.team.pk, distinct_ids=["p2"], properties={"url": "https://example.com"}
            )

            filter = Filter(
                data={
                    "properties": [
                        {"type": "person", "key": "url", "value": "https://whatever.com", "operator": "is_not"}
                    ]
                }
            )
            results = filter_persons(filter, self.team)
            self.assertCountEqual(results, [p2_uuid])
Example #15
0
 def test_property_filtering(self):
     self._create_events()
     with freeze_time("2020-01-04"):
         response = Trends().run(
             Filter(data={
                 "properties": [{
                     "key": "$some_property",
                     "value": "value"
                 }]
             }), self.team)
     self.assertEqual(response[0]["labels"][4], "Wed. 1 January")
     self.assertEqual(response[0]["data"][4], 1.0)
     self.assertEqual(response[0]["labels"][5], "Thu. 2 January")
     self.assertEqual(response[0]["data"][5], 0)
     self.assertEqual(response[1]["count"], 0)
Example #16
0
    def get_queryset(self):
        queryset = super().get_queryset()
        if self.action == 'list':  # type: ignore
            queryset = queryset.filter(deleted=False)

        if self.request.GET.get(TREND_FILTER_TYPE_ACTIONS):
            queryset = queryset.filter(pk__in=[action.id for action in Filter({'actions': json.loads(self.request.GET['actions'])}).actions])

        if self.request.GET.get('include_count'):
            queryset = queryset.annotate(count=Count(TREND_FILTER_TYPE_EVENTS))

        queryset = queryset.prefetch_related(Prefetch('steps', queryset=ActionStep.objects.order_by('id')))
        return queryset\
            .filter(team=self.request.user.team_set.get())\
            .order_by('-id')
Example #17
0
 def test_filter_by_all(self):
     filter = Filter(
         data={
             "properties": [
                 {
                     "key": "name",
                     "value": json.dumps({"first_name": "Mary", "last_name": "Smith"}),
                     "type": "person",
                 }
             ],
             "date_from": "all",
         }
     )
     date_filter_query = filter.date_filter_Q
     self.assertEqual(date_filter_query, Q())
Example #18
0
 def test_default_filter_by_date_from(self):
     filter = Filter(
         data={
             "properties": [
                 {
                     "key": "name",
                     "value": json.dumps({"first_name": "Mary", "last_name": "Smith"}),
                     "type": "person",
                 }
             ],
         }
     )
     one_week_ago = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0) - relativedelta(days=7)
     date_filter_query = filter.date_filter_Q
     self.assertEqual(date_filter_query, Q(timestamp__gte=one_week_ago))
Example #19
0
 def test_json_object(self):
     person1 = Person.objects.create(
         team=self.team, distinct_ids=["person1"], properties={"name": {"first_name": "Mary", "last_name": "Smith"}},
     )
     event1 = Event.objects.create(
         team=self.team,
         distinct_id="person1",
         event="$pageview",
         properties={"$current_url": "https://something.com"},
     )
     filter = Filter(
         data={
             "properties": [
                 {
                     "key": "name",
                     "value": json.dumps({"first_name": "Mary", "last_name": "Smith"}),
                     "type": "person",
                 }
             ]
         }
     )
     events = Event.objects.add_person_id(self.team.pk).filter(filter.properties_to_Q(team_id=self.team.pk))
     self.assertEqual(events[0], event1)
     self.assertEqual(len(events), 1)
Example #20
0
    def create(self, validated_data: Dict, *args: Any,
               **kwargs: Any) -> DashboardItem:

        request = self.context["request"]
        team = request.user.team
        validated_data.pop(
            "last_refresh", None
        )  # last_refresh sometimes gets sent if dashboard_item is duplicated

        if not validated_data.get("dashboard", None):
            dashboard_item = DashboardItem.objects.create(
                team=team, created_by=request.user, **validated_data)
            return dashboard_item
        elif validated_data["dashboard"].team == team:
            filter_data = validated_data.pop("filters", None)
            filters = Filter(data=filter_data) if filter_data else None
            dashboard_item = DashboardItem.objects.create(
                team=team,
                last_refresh=now(),
                filters=filters.to_dict() if filters else {},
                **validated_data)
            return dashboard_item
        else:
            raise serializers.ValidationError("Dashboard not found")
Example #21
0
    def calculate_trends(self, request: request.Request) -> Dict[str, Any]:
        team = self.team
        filter = Filter(request=request, team=self.team)

        if filter.insight == INSIGHT_STICKINESS or filter.shown_as == TRENDS_STICKINESS:
            stickiness_filter = StickinessFilter(
                request=request, team=team, get_earliest_timestamp=get_earliest_timestamp
            )
            result = ClickhouseStickiness().run(stickiness_filter, team)
        else:
            trends_query = ClickhouseTrends()
            result = trends_query.run(filter, team)

        self._refresh_dashboard(request=request)
        return {"result": result}
Example #22
0
        def test_lifecycle_trend(self):
            self._create_events(data=[
                (
                    "p1",
                    [
                        "2020-01-11T12:00:00Z",
                        "2020-01-12T12:00:00Z",
                        "2020-01-13T12:00:00Z",
                        "2020-01-15T12:00:00Z",
                        "2020-01-17T12:00:00Z",
                        "2020-01-19T12:00:00Z",
                    ],
                ),
                ("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]),
                ("p3", ["2020-01-12T12:00:00Z"]),
                ("p4", ["2020-01-15T12:00:00Z"]),
            ])

            result = trends().run(
                Filter(
                    data={
                        "date_from":
                        "2020-01-12T00:00:00Z",
                        "date_to":
                        "2020-01-19T00:00:00Z",
                        "events": [{
                            "id": "$pageview",
                            "type": "events",
                            "order": 0
                        }],
                        "shown_as":
                        TRENDS_LIFECYCLE,
                    }),
                self.team,
            )

            self.assertEqual(len(result), 4)
            self.assertEqual(sorted([res["status"] for res in result]),
                             ["dormant", "new", "resurrecting", "returning"])
            for res in result:
                if res["status"] == "dormant":
                    self.assertEqual(res["data"], [0, -2, -1, 0, -2, 0, -1, 0])
                elif res["status"] == "returning":
                    self.assertEqual(res["data"], [1, 1, 0, 0, 0, 0, 0, 0])
                elif res["status"] == "resurrecting":
                    self.assertEqual(res["data"], [1, 0, 0, 1, 0, 1, 0, 1])
                elif res["status"] == "new":
                    self.assertEqual(res["data"], [1, 0, 0, 1, 0, 0, 0, 0])
Example #23
0
    def test_retention(self):
        person1 = Person.objects.create(team=self.team,
                                        distinct_ids=["person1", "alias1"])
        person2 = Person.objects.create(team=self.team,
                                        distinct_ids=["person2"])

        self._create_pageviews([
            ("person1", self._date(0)),
            ("person1", self._date(1)),
            ("person1", self._date(2)),
            ("person1", self._date(5)),
            ("alias1", self._date(5, 9)),
            ("person1", self._date(6)),
            ("person2", self._date(1)),
            ("person2", self._date(2)),
            ("person2", self._date(3)),
            ("person2", self._date(6)),
        ])

        result = Retention().run(
            Filter(data={"date_from": self._date(0, hour=0)}), self.team)

        self.assertEqual(len(result), 11)
        self.assertEqual(
            self.pluck(result, "label"),
            [
                "Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6",
                "Day 7", "Day 8", "Day 9", "Day 10"
            ],
        )
        self.assertEqual(result[0]["date"], "Wed. 10 June")

        self.assertEqual(
            self.pluck(result, "values", "count"),
            [
                [1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0],
                [2, 2, 1, 0, 1, 2, 0, 0, 0, 0],
                [2, 1, 0, 1, 2, 0, 0, 0, 0],
                [1, 0, 0, 1, 0, 0, 0, 0],
                [0, 0, 0, 0, 0, 0, 0],
                [1, 1, 0, 0, 0, 0],
                [2, 0, 0, 0, 0],
                [0, 0, 0, 0],
                [0, 0, 0],
                [0, 0],
                [0],
            ],
        )
Example #24
0
    def calculate_sessions(self, events: QuerySet, filter: Filter, team: Team) -> List[Dict[str, Any]]:
        all_sessions, sessions_sql_params = self.build_all_sessions_query(events)

        if filter.session == SESSION_AVG:
            if not filter.date_from:
                filter = filter.with_data(
                    {
                        "date_from": Event.objects.filter(team=team)
                        .order_by("timestamp")[0]
                        .timestamp.replace(hour=0, minute=0, second=0, microsecond=0)
                        .isoformat(),
                    }
                )
            return self._session_avg(all_sessions, sessions_sql_params, filter)
        else:  # SESSION_DIST
            return self._session_dist(all_sessions, sessions_sql_params)
Example #25
0
def update_cache_item(key: str, cache_type: str, payload: dict) -> None:

    result: Optional[Union[List, Dict]] = None
    filter_dict = json.loads(payload["filter"])
    filter = Filter(data=filter_dict)
    if cache_type == CacheType.TRENDS:
        result = _calculate_trends(filter, int(payload["team_id"]))
    elif cache_type == CacheType.FUNNEL:
        result = _calculate_funnel(filter, int(payload["team_id"]))

    if result:
        cache.set(key, {
            "result": result,
            "details": payload,
            "type": cache_type
        }, 25 * 60)
Example #26
0
def forwards_func(apps, schema_editor):
    Dashboard = apps.get_model("posthog", "Dashboard")
    DashboardItem = apps.get_model("posthog", "DashboardItem")
    dashboards = Dashboard.objects.all()
    for dashboard in dashboards:
        dashboard.share_token = secrets.token_urlsafe(22)
        dashboard.save()

    items = DashboardItem.objects.filter(filters__isnull=False)
    for item in items:
        if item.filters == {}:
            continue
        if item.filters.get("funnel_id"):
            item.funnel_id = item.filters["funnel_id"]
        item.filters = Filter(data=item.filters).to_dict()
        item.save()
Example #27
0
 def test_contains(self):
     event_factory(team=self.team,
                   distinct_id="test",
                   event="$pageview")
     event2 = event_factory(
         team=self.team,
         event="$pageview",
         distinct_id="test",
         properties={"$current_url": "https://whatever.com"},
     )
     filter = Filter(
         data={"properties": {
             "$current_url__icontains": "whatever"
         }})
     events = filter_events(filter, self.team)
     self.assertEqual(events[0]["id"], event2.pk)
Example #28
0
    def test_person_cohort_properties(self):
        person1_distinct_id = "person1"
        person1 = Person.objects.create(
            team=self.team, distinct_ids=[person1_distinct_id], properties={"$some_prop": 1}
        )
        cohort1 = Cohort.objects.create(team=self.team, groups=[{"properties": {"$some_prop": 1}}], name="cohort1")
        cohort1.people.add(person1)

        filter = Filter(data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}],})

        matched_person = (
            Person.objects.filter(team_id=self.team.pk, persondistinctid__distinct_id=person1_distinct_id)
            .filter(properties_to_Q(filter.property_groups.flat, team_id=self.team.pk, is_direct_query=True))
            .exists()
        )
        self.assertTrue(matched_person)
Example #29
0
 def test_simple(self):
     event_factory(team=self.team,
                   distinct_id="test",
                   event="$pageview")
     event_factory(
         team=self.team,
         event="$pageview",
         distinct_id="test",
         properties={"$current_url": "https://whatever.com"},
     )
     filter = Filter(
         data={"properties": {
             "$current_url": "https://whatever.com"
         }})
     events = filter_events(filter, self.team)
     self.assertEqual(len(events), 1)
Example #30
0
    def _calculate_trends(self, request: request.Request) -> List[Dict[str, Any]]:
        team = self.team
        filter = Filter(request=request)
        if filter.shown_as == TRENDS_STICKINESS:
            filter = StickinessFilter(
                request=request, team=team, get_earliest_timestamp=Event.objects.earliest_timestamp
            )
            result = stickiness.Stickiness().run(filter, team)
        else:
            result = trends.Trends().run(filter, team)

        dashboard_id = request.GET.get("from_dashboard", None)
        if dashboard_id:
            DashboardItem.objects.filter(pk=dashboard_id).update(last_refresh=now())

        return result