Example #1
0
        def test_retention_people_first_time(self):
            _, _, p3, _ = self._create_first_time_retention_events()
            # even if set to hour 6 it should default to beginning of day and include all pageviews above

            target_entity = json.dumps({"id": "$user_signed_up", "type": TREND_FILTER_TYPE_EVENTS})
            result = retention().actors(
                RetentionFilter(
                    data={
                        "date_to": self._date(10, hour=6),
                        RETENTION_TYPE: RETENTION_FIRST_TIME,
                        "target_entity": target_entity,
                        "returning_entity": {"id": "$pageview", "type": "events"},
                        "selected_interval": 1,
                    }
                ),
                self.team,
            )

            self.assertEqual(len(result), 1)
            self.assertIn(result[0]["id"], [p3.pk, p3.uuid])

            result = retention().actors(
                RetentionFilter(
                    data={
                        "date_to": self._date(14, hour=6),
                        RETENTION_TYPE: RETENTION_FIRST_TIME,
                        "target_entity": target_entity,
                        "returning_entity": {"id": "$pageview", "type": "events"},
                        "selected_interval": 1,
                    }
                ),
                self.team,
            )

            self.assertEqual(len(result), 0)
Example #2
0
 def calculate_retention(self,
                         request: request.Request) -> List[Dict[str, Any]]:
     team = self.team
     filter = RetentionFilter(request=request)
     if not filter.date_from:
         filter._date_from = "-11d"
     result = retention.Retention().run(filter, team)
     return result
Example #3
0
    def _retrieve_people(self, filter: RetentionFilter, team: Team):
        period = filter.period
        trunc, fields = self._get_trunc_func("timestamp", period)
        is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME
        entity_condition, _ = self.get_entity_condition(
            filter.target_entity, "events")
        returning_condition, _ = self.get_entity_condition(
            filter.returning_entity, "first_event_date")
        _entity_condition = returning_condition if filter.selected_interval > 0 else entity_condition

        events = Event.objects.filter(team_id=team.pk).add_person_id(team.pk)

        filtered_events = events.filter(
            filter.recurring_date_filter_Q()).filter(
                properties_to_Q(filter.properties, team_id=team.pk))

        inner_events = (Event.objects.filter(team_id=team.pk).filter(
            properties_to_Q(filter.properties, team_id=team.pk)).add_person_id(
                team.pk).filter(**{
                    "person_id": OuterRef("id")
                }).filter(entity_condition).values("person_id").annotate(
                    first_date=Min(trunc)).filter(
                        filter.reference_date_filter_Q("first_date")).distinct(
                        ) if is_first_time_retention else Event.objects.filter(
                            team_id=team.pk).filter(
                                filter.reference_date_filter_Q()).filter(
                                    properties_to_Q(
                                        filter.properties,
                                        team_id=team.pk)).add_person_id(
                                            team.pk).filter(
                                                **{
                                                    "person_id": OuterRef("id")
                                                }).filter(entity_condition))

        filtered_events = (filtered_events.filter(_entity_condition).filter(
            Exists(
                Person.objects.filter(**{
                    "id": OuterRef("person_id"),
                }).filter(Exists(inner_events)).only("id"))).values(
                    "person_id").distinct()).all()

        people = Person.objects.filter(
            team=team,
            id__in=[
                p["person_id"]
                for p in filtered_events[filter.offset:filter.offset + 100]
            ],
        )

        people = people.prefetch_related(
            Prefetch("persondistinctid_set", to_attr="distinct_ids_cache"))

        from posthog.api.person import PersonSerializer

        return PersonSerializer(people, many=True).data
Example #4
0
        def test_first_time_retention(self):
            self._create_first_time_retention_events()

            target_entity = json.dumps({"id": "$user_signed_up", "type": TREND_FILTER_TYPE_EVENTS})
            result = retention().run(
                RetentionFilter(
                    data={
                        "date_to": self._date(5, hour=6),
                        RETENTION_TYPE: RETENTION_FIRST_TIME,
                        "target_entity": target_entity,
                        "returning_entity": {"id": "$pageview", "type": "events"},
                        "total_intervals": 7,
                    }
                ),
                self.team,
            )

            self.assertEqual(len(result), 7)
            self.assertEqual(
                self.pluck(result, "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"],
            )

            self.assertEqual(
                self.pluck(result, "values", "count"),
                [[2, 1, 2, 2, 1, 0, 1], [1, 1, 0, 1, 1, 1], [0, 0, 0, 0, 0], [1, 1, 0, 1], [0, 0, 0], [0, 0], [0]],
            )
Example #5
0
        def test_retention_people_in_period(self):
            person1 = person_factory(team_id=self.team.pk,
                                     distinct_ids=["person1", "alias1"])
            person2 = person_factory(team_id=self.team.pk,
                                     distinct_ids=["person2"])

            self._create_events([
                ("person1", self._date(0)),
                ("person1", self._date(1)),
                ("person1", self._date(2)),
                ("person1", self._date(5)),
                ("alias1", self._date(5, 9)),
                ("person1", self._date(6)),
                ("person2", self._date(1)),
                ("person2", self._date(2)),
                ("person2", self._date(3)),
                ("person2", self._date(6)),
                ("person2", self._date(7)),
            ])

            # even if set to hour 6 it should default to beginning of day and include all pageviews above
            result = retention().people_in_period(
                RetentionFilter(data={
                    "date_to": self._date(10, hour=6),
                    "selected_interval": 2
                }), self.team)

            self.assertEqual(result[0]["person"]["id"], person2.pk)
            self.assertEqual(result[0]["appearances"],
                             [1, 1, 0, 0, 1, 1, 0, 0, 0])

            self.assertEqual(result[1]["person"]["id"], person1.pk)
            self.assertEqual(result[1]["appearances"],
                             [1, 0, 0, 1, 1, 0, 0, 0, 0])
Example #6
0
        def test_first_time_retention_people(self):
            _, _, p3, _ = self._create_first_time_retention_events()

            target_entity = json.dumps({
                "id": "$user_signed_up",
                "type": TREND_FILTER_TYPE_EVENTS
            })
            result = retention().people_in_period(
                RetentionFilter(
                    data={
                        "date_to": self._date(6, hour=6),
                        RETENTION_TYPE: RETENTION_FIRST_TIME,
                        "target_entity": target_entity,
                        "returning_entity": {
                            "id": "$pageview",
                            "type": "events"
                        },
                        "total_intervals": 7,
                        "selected_interval": 0,
                    }),
                self.team,
            )
            self.assertEqual(len(result["detail"]), 1)
            self.assertEqual(result["detail"][0]["person"]["id"], p3.pk)
            self.assertEqual(result["detail"][0]["appearances"],
                             [1, 1, 0, 1, 1, 1, 0])
Example #7
0
        def test_retention_people_basic(self):
            person1 = person_factory(team_id=self.team.pk, distinct_ids=["person1", "alias1"])
            person2 = person_factory(team_id=self.team.pk, distinct_ids=["person2"])

            self._create_events(
                [
                    ("person1", self._date(0)),
                    ("person1", self._date(1)),
                    ("person1", self._date(2)),
                    ("person1", self._date(5)),
                    ("alias1", self._date(5, 9)),
                    ("person1", self._date(6)),
                    ("person2", self._date(1)),
                    ("person2", self._date(2)),
                    ("person2", self._date(3)),
                    ("person2", self._date(6)),
                ]
            )

            # even if set to hour 6 it should default to beginning of day and include all pageviews above
            result = retention().actors(
                RetentionFilter(data={"date_to": self._date(10, hour=6), "selected_interval": 2}), self.team
            )
            self.assertEqual(len(result), 1)
            self.assertTrue(result[0]["id"] == person1.pk or result[0]["id"], person1.uuid)
Example #8
0
        def test_retention_people_in_perieod_first_time(self):
            p1, p2, p3, p4 = self._create_first_time_retention_events()
            # even if set to hour 6 it should default to beginning of day and include all pageviews above
            target_entity = json.dumps({
                "id": "$user_signed_up",
                "type": TREND_FILTER_TYPE_EVENTS
            })
            result1 = retention().actors_in_period(
                RetentionFilter(
                    data={
                        "date_to": self._date(10, hour=6),
                        RETENTION_TYPE: RETENTION_FIRST_TIME,
                        "target_entity": target_entity,
                        "returning_entity": {
                            "id": "$pageview",
                            "type": "events"
                        },
                        "selected_interval": 0,
                    }),
                self.team,
            )

            self.assertEqual(len(result1), 1)
            self.assertTrue(result1[0]["person"]["id"] == p3.pk
                            or result1[0]["person"]["id"] == p3.uuid)
            self.assertEqual(result1[0]["appearances"],
                             [1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0])
Example #9
0
        def test_day_interval(self):
            person1 = person_factory(team_id=self.team.pk,
                                     distinct_ids=["person1", "alias1"])
            person2 = person_factory(team_id=self.team.pk,
                                     distinct_ids=["person2"])

            self._create_events([
                ("person1", self._date(0)),
                ("person1", self._date(1)),
                ("person1", self._date(2)),
                ("person1", self._date(5)),
                ("alias1", self._date(5, 9)),
                ("person1", self._date(6)),
                ("person2", self._date(1)),
                ("person2", self._date(2)),
                ("person2", self._date(3)),
                ("person2", self._date(6)),
            ])

            # even if set to hour 6 it should default to beginning of day and include all pageviews above
            result = retention().run(
                RetentionFilter(data={"date_to": self._date(10, hour=6)}),
                self.team)
            self.assertEqual(len(result), 11)
            self.assertEqual(
                self.pluck(result, "label"),
                [
                    "Day 0",
                    "Day 1",
                    "Day 2",
                    "Day 3",
                    "Day 4",
                    "Day 5",
                    "Day 6",
                    "Day 7",
                    "Day 8",
                    "Day 9",
                    "Day 10",
                ],
            )
            self.assertEqual(result[0]["date"],
                             datetime(2020, 6, 10, 0, tzinfo=pytz.UTC))

            self.assertEqual(
                self.pluck(result, "values", "count"),
                [
                    [1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0],
                    [2, 2, 1, 0, 1, 2, 0, 0, 0, 0],
                    [2, 1, 0, 1, 2, 0, 0, 0, 0],
                    [1, 0, 0, 1, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0],
                    [1, 1, 0, 0, 0, 0],
                    [2, 0, 0, 0, 0],
                    [0, 0, 0, 0],
                    [0, 0, 0],
                    [0, 0],
                    [0],
                ],
            )
Example #10
0
 def calculate_retention(self, request: request.Request) -> Dict[str, Any]:
     team = self.team
     data = {}
     if not request.GET.get("date_from"):
         data.update({"date_from": "-11d"})
     filter = RetentionFilter(data=data, request=request)
     result = retention.Retention().run(filter, team)
     return {"result": result}
Example #11
0
        def test_retention_graph(self):
            person1 = person_factory(team_id=self.team.pk,
                                     distinct_ids=["person1", "alias1"])
            person2 = person_factory(team_id=self.team.pk,
                                     distinct_ids=["person2"])

            self._create_events([
                ("person1", self._date(0)),
                ("person1", self._date(1)),
                ("person1", self._date(2)),
                ("person1", self._date(5)),
                ("alias1", self._date(5, 9)),
                ("person1", self._date(6)),
                ("person2", self._date(0)),
                ("person2", self._date(1)),
                ("person2", self._date(2)),
                ("person2", self._date(3)),
                ("person2", self._date(6)),
            ])
            result = retention().run(
                RetentionFilter(data={
                    "date_to": self._date(10, hour=6),
                    "display": TRENDS_LINEAR
                }), self.team)
            self.assertEqual(
                result[0]["count"],
                2,
            )
            self.assertEqual(
                result[0]["labels"],
                [
                    "Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5",
                    "Day 6", "Day 7", "Day 8", "Day 9", "Day 10"
                ],
            )
            self.assertEqual(
                result[0]["days"],
                [
                    "2020-06-10",
                    "2020-06-11",
                    "2020-06-12",
                    "2020-06-13",
                    "2020-06-14",
                    "2020-06-15",
                    "2020-06-16",
                    "2020-06-17",
                    "2020-06-18",
                    "2020-06-19",
                    "2020-06-20",
                ],
            )
            self.assertEqual(
                result[0]["data"],
                [
                    100.0, 100.0, 100.0, 50.0, 0.0, 50.0, 100.0, 0.0, 0.0, 0.0,
                    0.0
                ],
            )
Example #12
0
 def calculate_retention(self, request: request.Request) -> Dict[str, Any]:
     team = self.team
     data = {}
     if not request.GET.get("date_from"):
         data.update({"date_from": "-11d"})
     filter = RetentionFilter(data=data, request=request, team=self.team)
     base_uri = request.build_absolute_uri("/")
     result = ClickhouseRetention(base_uri=base_uri).run(filter, team)
     return {"result": result}
Example #13
0
 def _construct_people_url_for_trend_breakdown_interval(
     self,
     filter: RetentionFilter,
     selected_interval: int,
     breakdown_values: BreakdownValues,
 ):
     params = RetentionFilter({
         **filter._data, "breakdown_values": breakdown_values,
         "selected_interval": selected_interval
     }).to_params()
     return f"{self._base_uri}api/person/retention/?{urlencode(params)}"
Example #14
0
        def test_interval_rounding(self):
            Person.objects.create(
                team=self.team, distinct_ids=["person1", "alias1"], properties={"email": "*****@*****.**"},
            )
            Person.objects.create(
                team=self.team, distinct_ids=["person2"], properties={"email": "*****@*****.**"},
            )

            self._create_events(
                [
                    ("person1", self._date(0)),
                    ("person2", self._date(0)),
                    ("person1", self._date(1)),
                    ("person2", self._date(1)),
                    ("person1", self._date(7)),
                    ("person2", self._date(7)),
                    ("person1", self._date(14)),
                    ("person1", self._date(month=1, day=-6)),
                    ("person2", self._date(month=1, day=-6)),
                    ("person2", self._date(month=1, day=1)),
                    ("person1", self._date(month=1, day=1)),
                    ("person2", self._date(month=1, day=15)),
                ]
            )

            result = retention().run(
                RetentionFilter(
                    data={"date_to": self._date(14, month=1, hour=0), "period": "Week", "total_intervals": 7}
                ),
                self.team,
            )

            self.assertEqual(
                self.pluck(result, "label"), ["Week 0", "Week 1", "Week 2", "Week 3", "Week 4", "Week 5", "Week 6"],
            )

            self.assertEqual(
                self.pluck(result, "values", "count"),
                [[2, 2, 1, 2, 2, 0, 1], [2, 1, 2, 2, 0, 1], [1, 1, 1, 0, 0], [2, 2, 0, 1], [2, 0, 1], [0, 0], [1],],
            )

            self.assertEqual(
                self.pluck(result, "date"),
                [
                    datetime(2020, 6, 7, 0, tzinfo=pytz.UTC),
                    datetime(2020, 6, 14, 0, tzinfo=pytz.UTC),
                    datetime(2020, 6, 21, 0, tzinfo=pytz.UTC),
                    datetime(2020, 6, 28, 0, tzinfo=pytz.UTC),
                    datetime(2020, 7, 5, 0, tzinfo=pytz.UTC),
                    datetime(2020, 7, 12, 0, tzinfo=pytz.UTC),
                    datetime(2020, 7, 19, 0, tzinfo=pytz.UTC),
                ],
            )
Example #15
0
        def test_retention_with_properties(self):

            person1 = person_factory(team_id=self.team.pk, distinct_ids=["person1", "alias1"])
            person2 = person_factory(team_id=self.team.pk, distinct_ids=["person2"])

            self._create_events(
                [
                    ("person1", self._date(0)),
                    ("person1", self._date(1)),
                    ("person1", self._date(2)),
                    ("person1", self._date(5)),
                    ("alias1", self._date(5, 9)),
                    ("person1", self._date(6)),
                    ("person2", self._date(1)),
                    ("person2", self._date(2)),
                    ("person2", self._date(3)),
                    ("person2", self._date(6)),
                ]
            )

            result = retention().run(
                RetentionFilter(
                    data={
                        "properties": [{"key": "$some_property", "value": "value"}],
                        "date_to": self._date(10, hour=0),
                    }
                ),
                self.team,
            )
            self.assertEqual(len(result), 11)
            self.assertEqual(
                self.pluck(result, "label"),
                ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6", "Day 7", "Day 8", "Day 9", "Day 10",],
            )
            self.assertEqual(result[0]["date"], datetime(2020, 6, 10, 0, tzinfo=pytz.UTC))

            self.assertEqual(
                self.pluck(result, "values", "count"),
                [
                    [1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0],
                    [1, 0, 1, 0, 0, 0, 0, 0, 0, 0],
                    [1, 0, 0, 1, 0, 0, 0, 0, 0],
                    [1, 0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0],
                    [1, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0],
                    [0, 0, 0, 0],
                    [0, 0, 0],
                    [0, 0],
                    [0],
                ],
            )
Example #16
0
def build_returning_event_query(filter: RetentionFilter, team: Team):
    returning_event_query_templated, returning_event_params = RetentionEventsQuery(
        filter=filter.with_data({
            "breakdowns": []
        }),  # Avoid pulling in breakdown values from returning event query
        team_id=team.pk,
        event_query_type=RetentionQueryType.RETURNING,
    ).get_query()

    query = substitute_params(returning_event_query_templated,
                              returning_event_params)

    return query
Example #17
0
        def test_filter_test_accounts(self):
            person1 = person_factory(
                team_id=self.team.pk, distinct_ids=["person1", "alias1"], properties={"email": "*****@*****.**"}
            )
            person2 = person_factory(team_id=self.team.pk, distinct_ids=["person2"])

            self._create_events(
                [
                    ("person1", self._date(0)),
                    ("person1", self._date(1)),
                    ("person1", self._date(2)),
                    ("person1", self._date(5)),
                    ("alias1", self._date(5, 9)),
                    ("person1", self._date(6)),
                    ("person2", self._date(1)),
                    ("person2", self._date(2)),
                    ("person2", self._date(3)),
                    ("person2", self._date(6)),
                ]
            )

            # even if set to hour 6 it should default to beginning of day and include all pageviews above
            result = retention().run(
                RetentionFilter(data={"date_to": self._date(10, hour=6), FILTER_TEST_ACCOUNTS: True}, team=self.team),
                self.team,
            )
            self.assertEqual(len(result), 11)
            self.assertEqual(
                self.pluck(result, "label"),
                ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6", "Day 7", "Day 8", "Day 9", "Day 10",],
            )
            self.assertEqual(result[0]["date"], datetime(2020, 6, 10, 0, tzinfo=pytz.UTC))

            self.assertEqual(
                self.pluck(result, "values", "count"),
                [
                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                    [1, 1, 1, 0, 0, 1, 0, 0, 0, 0],
                    [1, 1, 0, 0, 1, 0, 0, 0, 0],
                    [1, 0, 0, 1, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0],
                    [1, 0, 0, 0, 0],
                    [0, 0, 0, 0],
                    [0, 0, 0],
                    [0, 0],
                    [0],
                ],
            )
Example #18
0
    def retention(self, request: request.Request) -> response.Response:
        team = request.user.team
        if not team:
            return response.Response(
                {
                    "message": "Could not retrieve team",
                    "detail": "Could not validate team associated with user"
                },
                status=400,
            )
        filter = RetentionFilter(request=request)
        people = self.retention_class().people(filter, team)
        next_url = paginated_result(people, request, filter.offset)

        return response.Response({"result": people, "next": next_url})
Example #19
0
    def _retrieve_people_in_period(self, filter: RetentionFilter, team: Team):

        new_data = filter._data
        new_data.update({
            "total_intervals":
            filter.total_intervals - filter.selected_interval
        })
        filter = RetentionFilter(data=new_data)

        format_fields, params = self._determine_query_params(filter, team)

        final_query = """
            SELECT person_id, count(person_id) appearance_count, array_agg(date) appearances FROM (
                SELECT DISTINCT
                    {fields}
                    "events"."person_id"
                FROM ({event_query}) events
                LEFT JOIN ({reference_event_query}) first_event_date
                ON (events.person_id = first_event_date.person_id)
                WHERE event_date >= first_date
                AND {target_condition} AND {return_condition}
                OR ({target_condition} AND event_date = first_date)
            ) person_appearances
            WHERE first_date = 0
            GROUP BY person_id
            ORDER BY appearance_count DESC
            LIMIT %s OFFSET %s
        """.format(**format_fields)

        result = []

        from posthog.api.person import PersonSerializer

        with connection.cursor() as cursor:
            cursor.execute(
                final_query,
                params + (100, filter.offset),
            )
            raw_results = cursor.fetchall()
            people_dict = {}
            for person in Person.objects.filter(
                    team_id=team.pk, id__in=[val[0] for val in raw_results]):
                people_dict.update({person.pk: PersonSerializer(person).data})

            result = self.process_people_in_period(filter, raw_results,
                                                   people_dict)

        return result
Example #20
0
        def test_retention_multiple_events(self):
            person_factory(team_id=self.team.pk, distinct_ids=["person1", "alias1"])
            person_factory(team_id=self.team.pk, distinct_ids=["person2"])
            person_factory(team_id=self.team.pk, distinct_ids=["person3"])
            person_factory(team_id=self.team.pk, distinct_ids=["person4"])

            first_event = "$some_event"
            self._create_events(
                [
                    ("person1", self._date(0)),
                    ("person1", self._date(1)),
                    ("person1", self._date(2)),
                    ("person1", self._date(3)),
                    ("person2", self._date(0)),
                    ("person2", self._date(1)),
                    ("person2", self._date(2)),
                    ("person2", self._date(3)),
                    ("person3", self._date(5)),
                ],
                first_event,
            )

            self._create_events(
                [("person1", self._date(5)), ("person1", self._date(6)), ("person2", self._date(5)),], "$pageview",
            )

            target_entity = json.dumps({"id": first_event, "type": TREND_FILTER_TYPE_EVENTS})
            result = retention().run(
                RetentionFilter(
                    data={
                        "date_to": self._date(6, hour=6),
                        "target_entity": target_entity,
                        "returning_entity": {"id": "$pageview", "type": "events"},
                        "total_intervals": 7,
                    }
                ),
                self.team,
            )
            self.assertEqual(len(result), 7)
            self.assertEqual(
                self.pluck(result, "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"],
            )

            self.assertEqual(
                self.pluck(result, "values", "count"),
                [[2, 0, 0, 0, 0, 2, 1], [2, 0, 0, 0, 2, 1], [2, 0, 0, 2, 1], [2, 0, 2, 1], [0, 0, 0], [1, 0], [0]],
            )
Example #21
0
    def process_table_result(
        self, resultset: Dict[Tuple[int, int], Dict[str, Any]], filter: RetentionFilter,
    ):

        result = [
            {
                "values": [
                    resultset.get((first_day, day), {"count": 0, "people": []})
                    for day in range(filter.total_intervals - first_day)
                ],
                "label": "{} {}".format(filter.period, first_day),
                "date": (filter.date_from + RetentionFilter.determine_time_delta(first_day, filter.period)[0]),
            }
            for first_day in range(filter.total_intervals)
        ]

        return result
Example #22
0
        def test_retention_event_action(self):
            person1 = person_factory(team=self.team, distinct_ids=["person1", "alias1"])
            person2 = person_factory(team=self.team, distinct_ids=["person2"])

            action = self._create_signup_actions(
                [
                    ("person1", self._date(0)),
                    ("person1", self._date(1)),
                    ("person1", self._date(2)),
                    ("person1", self._date(3)),
                    ("person2", self._date(0)),
                    ("person2", self._date(1)),
                    ("person2", self._date(2)),
                    ("person2", self._date(3)),
                ]
            )

            some_event = "$some_event"
            self._create_events(
                [("person1", self._date(3)), ("person2", self._date(5)),], some_event,
            )

            start_entity = json.dumps({"id": action.pk, "type": TREND_FILTER_TYPE_ACTIONS})
            result = retention().run(
                RetentionFilter(
                    data={
                        "date_to": self._date(6, hour=0),
                        "target_entity": start_entity,
                        "returning_entity": {"id": some_event, "type": TREND_FILTER_TYPE_EVENTS},
                        "total_intervals": 7,
                    }
                ),
                self.team,
            )

            self.assertEqual(len(result), 7)
            self.assertEqual(
                self.pluck(result, "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"],
            )
            self.assertEqual(result[0]["date"], datetime(2020, 6, 10, 0, tzinfo=pytz.UTC))

            self.assertEqual(
                self.pluck(result, "values", "count"),
                [[2, 0, 0, 1, 0, 1, 0], [2, 0, 1, 0, 1, 0], [2, 1, 0, 1, 0], [2, 0, 1, 0], [0, 0, 0], [0, 0], [0],],
            )
Example #23
0
        def test_retention_with_user_properties(self):
            person1 = person_factory(
                team_id=self.team.pk, distinct_ids=["person1", "alias1"], properties={"email": "*****@*****.**"},
            )
            person2 = person_factory(
                team_id=self.team.pk, distinct_ids=["person2"], properties={"email": "*****@*****.**"},
            )

            self._create_events(
                [
                    ("person1", self._date(0)),
                    ("person1", self._date(1)),
                    ("person1", self._date(2)),
                    ("person1", self._date(5)),
                    ("alias1", self._date(5, 9)),
                    ("person1", self._date(6)),
                    ("person2", self._date(1)),
                    ("person2", self._date(2)),
                    ("person2", self._date(3)),
                    ("person2", self._date(6)),
                ]
            )

            result = retention().run(
                RetentionFilter(
                    data={
                        "properties": [{"key": "email", "value": "*****@*****.**", "type": "person",}],
                        "date_to": self._date(6, hour=0),
                        "total_intervals": 7,
                    }
                ),
                self.team,
            )

            self.assertEqual(len(result), 7)
            self.assertEqual(
                self.pluck(result, "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"],
            )
            self.assertEqual(result[0]["date"], datetime(2020, 6, 10, 0, tzinfo=pytz.UTC))
            self.assertEqual(
                self.pluck(result, "values", "count"),
                [[1, 1, 1, 0, 0, 1, 1], [1, 1, 0, 0, 1, 1], [1, 0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0], [1, 1], [1],],
            )
Example #24
0
    def retention(self, request: request.Request) -> response.Response:

        display = request.GET.get("display", None)
        team = cast(User, request.user).team
        if not team:
            return response.Response(
                {"message": "Could not retrieve team", "detail": "Could not validate team associated with user"},
                status=400,
            )
        filter = RetentionFilter(request=request)

        if display == TRENDS_TABLE:
            people = self.retention_class().people_in_period(filter, team)
        else:
            people = self.retention_class().people(filter, team)

        next_url = paginated_result(people, request, filter.offset)

        return response.Response({"result": people, "next": next_url})
Example #25
0
        def test_retention_default(self):
            person1 = person_factory(team_id=self.team.pk,
                                     distinct_ids=["person1", "alias1"])
            person2 = person_factory(team_id=self.team.pk,
                                     distinct_ids=["person2"])

            self._create_events([
                ("person1", self._date(0)),
                ("person1", self._date(1)),
                ("person1", self._date(2)),
                ("person1", self._date(5)),
                ("alias1", self._date(5, 9)),
                ("person1", self._date(6)),
                ("person2", self._date(1)),
                ("person2", self._date(2)),
                ("person2", self._date(3)),
                ("person2", self._date(6)),
            ])

            result = retention().run(RetentionFilter(data={"dummy": "dummy"}),
                                     self.team)
            self.assertEqual(
                self.pluck(result, "values", "count"),
                [
                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0],
                    [0, 0, 0, 0],
                    [0, 0, 0],
                    [0, 0],
                    [0],
                ],
            )
Example #26
0
    def process_table_result(
        self,
        resultset: Dict[CohortKey, Dict[str, Any]],
        filter: RetentionFilter,
    ):
        """
        Constructs a response for the rest api when there is no breakdown specified

        We process the non-breakdown case separately from the breakdown case so
        we can easily maintain compatability from when we didn't have
        breakdowns. The key difference is that we "zero fill" the cohorts as we
        want to have a result for each cohort between the specified date range.
        """
        def construct_url(first_day):
            params = RetentionFilter({
                **filter._data, "display": "ActionsTable",
                "breakdown_values": [first_day]
            }).to_params()
            return "/api/person/retention/?" f"{urlencode(params)}"

        result = [{
            "values": [
                resultset.get(CohortKey((first_day, ), day), {
                    "count": 0,
                    "people": []
                }) for day in range(filter.total_intervals - first_day)
            ],
            "label":
            "{} {}".format(filter.period, first_day),
            "date": (filter.date_from + RetentionFilter.determine_time_delta(
                first_day, filter.period)[0]),
            "people_url":
            construct_url(first_day),
        } for first_day in range(filter.total_intervals)]

        return result
Example #27
0
        def test_month_interval(self):
            Person.objects.create(
                team=self.team,
                distinct_ids=["person1", "alias1"],
                properties={"email": "*****@*****.**"},
            )
            Person.objects.create(
                team=self.team,
                distinct_ids=["person2"],
                properties={"email": "*****@*****.**"},
            )

            self._create_events([
                ("person1", self._date(day=0, month=-5)),
                ("person2", self._date(day=0, month=-5)),
                ("person1", self._date(day=0, month=-4)),
                ("person2", self._date(day=0, month=-4)),
                ("person1", self._date(day=0, month=-3)),
                ("person2", self._date(day=0, month=-3)),
                ("person1", self._date(day=0, month=-1)),
                ("person1", self._date(day=0, month=0)),
                ("person2", self._date(day=0, month=0)),
                ("person2", self._date(day=0, month=1)),
                ("person1", self._date(day=0, month=3)),
                ("person2", self._date(day=0, month=5)),
            ])

            filter = RetentionFilter(data={
                "date_to": self._date(0, month=5, hour=0),
                "period": "Month"
            })

            result = retention().run(filter, self.team, total_intervals=11)

            self.assertEqual(
                self.pluck(result, "label"),
                [
                    "Month 0",
                    "Month 1",
                    "Month 2",
                    "Month 3",
                    "Month 4",
                    "Month 5",
                    "Month 6",
                    "Month 7",
                    "Month 8",
                    "Month 9",
                    "Month 10",
                ],
            )

            self.assertEqual(
                self.pluck(result, "values", "count"),
                [
                    [2, 2, 2, 0, 1, 2, 1, 0, 1, 0, 1],
                    [2, 2, 0, 1, 2, 1, 0, 1, 0, 1],
                    [2, 0, 1, 2, 1, 0, 1, 0, 1],
                    [0, 0, 0, 0, 0, 0, 0, 0],
                    [1, 1, 0, 0, 1, 0, 0],
                    [2, 1, 0, 1, 0, 1],
                    [1, 0, 0, 0, 1],
                    [0, 0, 0, 0],
                    [1, 0, 0],
                    [0, 0],
                    [1],
                ],
            )

            self.assertEqual(
                self.pluck(result, "date"),
                [
                    datetime(2020, 1, 10, 0, tzinfo=pytz.UTC),
                    datetime(2020, 2, 10, 0, tzinfo=pytz.UTC),
                    datetime(2020, 3, 10, 0, tzinfo=pytz.UTC),
                    datetime(2020, 4, 10, 0, tzinfo=pytz.UTC),
                    datetime(2020, 5, 10, 0, tzinfo=pytz.UTC),
                    datetime(2020, 6, 10, 0, tzinfo=pytz.UTC),
                    datetime(2020, 7, 10, 0, tzinfo=pytz.UTC),
                    datetime(2020, 8, 10, 0, tzinfo=pytz.UTC),
                    datetime(2020, 9, 10, 0, tzinfo=pytz.UTC),
                    datetime(2020, 10, 10, 0, tzinfo=pytz.UTC),
                    datetime(2020, 11, 10, 0, tzinfo=pytz.UTC),
                ],
            )
    def _retrieve_people_in_period(self, filter: RetentionFilter, team: Team):
        period = filter.period
        is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME
        trunc_func = get_trunc_func_ch(period)
        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties, team.pk)

        target_query, target_params = self._get_condition(filter.target_entity,
                                                          table="e")
        target_query_formatted = "AND {target_query}".format(
            target_query=target_query)
        return_query, return_params = self._get_condition(
            filter.returning_entity, table="e", prepend="returning")
        return_query_formatted = "AND {return_query}".format(
            return_query=return_query)

        first_event_sql = (REFERENCE_EVENT_UNIQUE_PEOPLE_PER_PERIOD_SQL
                           if is_first_time_retention else
                           REFERENCE_EVENT_PEOPLE_PER_PERIOD_SQL).format(
                               target_query=target_query_formatted,
                               filters=prop_filters,
                               trunc_func=trunc_func,
                           )
        default_event_query = (
            DEFAULT_REFERENCE_EVENT_UNIQUE_PEOPLE_PER_PERIOD_SQL
            if is_first_time_retention else
            DEFAULT_REFERENCE_EVENT_PEOPLE_PER_PERIOD_SQL).format(
                target_query=target_query_formatted,
                filters=prop_filters,
                trunc_func=trunc_func,
            )

        date_from = filter.date_from + filter.selected_interval * filter.period_increment
        date_to = filter.date_to

        filter = filter.with_data({
            "total_intervals":
            filter.total_intervals - filter.selected_interval
        })

        query_result = sync_execute(
            RETENTION_PEOPLE_PER_PERIOD_SQL.format(
                returning_query=return_query_formatted,
                filters=prop_filters,
                first_event_sql=first_event_sql,
                first_event_default_sql=default_event_query,
                trunc_func=trunc_func,
            ),
            {
                "team_id":
                team.pk,
                "start_date":
                date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "end_date":
                date_to.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "offset":
                filter.offset,
                "limit":
                100,
                "period":
                period,
                **target_params,
                **return_params,
                **prop_filter_params,
            },
        )
        people_dict = {}

        from posthog.api.person import PersonSerializer

        people = get_persons_by_uuids(team_id=team.pk,
                                      uuids=[val[0] for val in query_result])
        people = people.prefetch_related(
            Prefetch("persondistinctid_set", to_attr="distinct_ids_cache"))

        for person in people:
            people_dict.update(
                {str(person.uuid): PersonSerializer(person).data})

        result = self.process_people_in_period(filter, query_result,
                                               people_dict)
        return result
Example #29
0
    def _determine_query_params(self, filter: RetentionFilter, team: Team):

        period = filter.period
        is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME

        events: QuerySet = QuerySet()
        entity_condition, entity_condition_strigified = self.get_entity_condition(
            filter.target_entity, "first_event_date"
        )
        returning_condition, returning_condition_stringified = self.get_entity_condition(
            filter.returning_entity, "events"
        )
        events = Event.objects.filter(team_id=team.pk).add_person_id(team.pk).annotate(event_date=F("timestamp"))

        trunc, fields = self._get_trunc_func("timestamp", period)

        if is_first_time_retention:
            filtered_events = events.filter(properties_to_Q(filter.properties, team_id=team.pk))
            first_date = (
                filtered_events.filter(entity_condition)
                .values("person_id", "event", "action")
                .annotate(first_date=Min(trunc))
                .filter(filter.custom_date_filter_Q("first_date"))
                .distinct()
            )
            final_query = (
                filtered_events.filter(filter.date_filter_Q)
                .filter(returning_condition)
                .values_list("person_id", "event_date", "event", "action")
                .union(first_date.values_list("first_date", "person_id", "event", "action"))
            )
        else:
            filtered_events = events.filter(filter.date_filter_Q).filter(
                properties_to_Q(filter.properties, team_id=team.pk)
            )
            first_date = (
                filtered_events.filter(entity_condition)
                .annotate(first_date=trunc)
                .values("first_date", "person_id", "event", "action")
                .distinct()
            )

            final_query = (
                filtered_events.filter(returning_condition)
                .values_list("person_id", "event_date", "event", "action")
                .union(first_date.values_list("first_date", "person_id", "event", "action"))
            )

        start_params = (
            (filter.date_from, filter.date_from) if period == "Month" or period == "Hour" else (filter.date_from,)
        )

        event_query, events_query_params = final_query.query.sql_with_params()
        reference_event_query, first_date_params = first_date.query.sql_with_params()

        event_params = (filter.target_entity.id, filter.returning_entity.id, filter.target_entity.id)

        return (
            {
                "event_query": event_query,
                "reference_event_query": reference_event_query,
                "fields": fields,
                "return_condition": returning_condition_stringified,
                "target_condition": entity_condition_strigified,
            },
            start_params + events_query_params + first_date_params + event_params,
        )
Example #30
0
        def test_hour_interval(self):
            Person.objects.create(
                team=self.team,
                distinct_ids=["person1", "alias1"],
                properties={"email": "*****@*****.**"},
            )
            Person.objects.create(
                team=self.team,
                distinct_ids=["person2"],
                properties={"email": "*****@*****.**"},
            )

            self._create_events([
                ("person1", self._date(day=0, hour=6)),
                ("person2", self._date(day=0, hour=6)),
                ("person1", self._date(day=0, hour=7)),
                ("person2", self._date(day=0, hour=7)),
                ("person1", self._date(day=0, hour=8)),
                ("person2", self._date(day=0, hour=8)),
                ("person1", self._date(day=0, hour=10)),
                ("person1", self._date(day=0, hour=11)),
                ("person2", self._date(day=0, hour=11)),
                ("person2", self._date(day=0, hour=12)),
                ("person1", self._date(day=0, hour=14)),
                ("person2", self._date(day=0, hour=16)),
            ])

            filter = RetentionFilter(data={
                "date_to": self._date(0, hour=16),
                "period": "Hour"
            })

            result = retention().run(filter, self.team, total_intervals=11)

            self.assertEqual(
                self.pluck(result, "label"),
                [
                    "Hour 0",
                    "Hour 1",
                    "Hour 2",
                    "Hour 3",
                    "Hour 4",
                    "Hour 5",
                    "Hour 6",
                    "Hour 7",
                    "Hour 8",
                    "Hour 9",
                    "Hour 10",
                ],
            )

            self.assertEqual(
                self.pluck(result, "values", "count"),
                [
                    [2, 2, 2, 0, 1, 2, 1, 0, 1, 0, 1],
                    [2, 2, 0, 1, 2, 1, 0, 1, 0, 1],
                    [2, 0, 1, 2, 1, 0, 1, 0, 1],
                    [0, 0, 0, 0, 0, 0, 0, 0],
                    [1, 1, 0, 0, 1, 0, 0],
                    [2, 1, 0, 1, 0, 1],
                    [1, 0, 0, 0, 1],
                    [0, 0, 0, 0],
                    [1, 0, 0],
                    [0, 0],
                    [1],
                ],
            )

            self.assertEqual(
                self.pluck(result, "date"),
                [
                    datetime(2020, 6, 10, 6, tzinfo=pytz.UTC),
                    datetime(2020, 6, 10, 7, tzinfo=pytz.UTC),
                    datetime(2020, 6, 10, 8, tzinfo=pytz.UTC),
                    datetime(2020, 6, 10, 9, tzinfo=pytz.UTC),
                    datetime(2020, 6, 10, 10, tzinfo=pytz.UTC),
                    datetime(2020, 6, 10, 11, tzinfo=pytz.UTC),
                    datetime(2020, 6, 10, 12, tzinfo=pytz.UTC),
                    datetime(2020, 6, 10, 13, tzinfo=pytz.UTC),
                    datetime(2020, 6, 10, 14, tzinfo=pytz.UTC),
                    datetime(2020, 6, 10, 15, tzinfo=pytz.UTC),
                    datetime(2020, 6, 10, 16, tzinfo=pytz.UTC),
                ],
            )