def build_returning_event_query(filter: RetentionFilter, team: Team): returning_event_query_templated, returning_event_params = RetentionEventsQuery( filter=filter.with_data({ "breakdowns": [] }), # Avoid pulling in breakdown values from returning event query team_id=team.pk, event_query_type=RetentionQueryType.RETURNING, ).get_query() query = substitute_params(returning_event_query_templated, returning_event_params) return query
def _retrieve_people_in_period(self, filter: RetentionFilter, team: Team): filter = filter.with_data({"total_intervals": filter.total_intervals - filter.selected_interval}) format_fields, params = self._determine_query_params(filter, team) final_query = """ SELECT person_id, count(person_id) appearance_count, array_agg(date) appearances FROM ( SELECT DISTINCT {fields} "events"."person_id" FROM ({event_query}) events LEFT JOIN ({reference_event_query}) first_event_date ON (events.person_id = first_event_date.person_id) WHERE event_date >= first_date AND {target_condition} AND {return_condition} OR ({target_condition} AND event_date = first_date) ) person_appearances WHERE first_date = 0 GROUP BY person_id ORDER BY appearance_count DESC LIMIT %s OFFSET %s """.format( **format_fields ) from posthog.api.person import PersonSerializer with connection.cursor() as cursor: cursor.execute( final_query, params + (100, filter.offset), ) raw_results = cursor.fetchall() people_appearances = [ AppearanceRow(person_id=result[0], appearance_count=result[1], appearances=result[2]) for result in raw_results ] people_dict = { person.pk: PersonSerializer(person).data for person in Person.objects.filter( team_id=team.pk, id__in=[person.person_id for person in people_appearances] ) } return self.process_people_in_period(filter, people_appearances, people_dict)
def _retrieve_people_in_period(self, filter: RetentionFilter, team: Team): period = filter.period is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME trunc_func = get_trunc_func_ch(period) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk) target_query, target_params = self._get_condition(filter.target_entity, table="e") target_query_formatted = "AND {target_query}".format( target_query=target_query) return_query, return_params = self._get_condition( filter.returning_entity, table="e", prepend="returning") return_query_formatted = "AND {return_query}".format( return_query=return_query) first_event_sql = (REFERENCE_EVENT_UNIQUE_PEOPLE_PER_PERIOD_SQL if is_first_time_retention else REFERENCE_EVENT_PEOPLE_PER_PERIOD_SQL).format( target_query=target_query_formatted, filters=prop_filters, trunc_func=trunc_func, ) default_event_query = ( DEFAULT_REFERENCE_EVENT_UNIQUE_PEOPLE_PER_PERIOD_SQL if is_first_time_retention else DEFAULT_REFERENCE_EVENT_PEOPLE_PER_PERIOD_SQL).format( target_query=target_query_formatted, filters=prop_filters, trunc_func=trunc_func, ) date_from = filter.date_from + filter.selected_interval * filter.period_increment date_to = filter.date_to filter = filter.with_data({ "total_intervals": filter.total_intervals - filter.selected_interval }) query_result = sync_execute( RETENTION_PEOPLE_PER_PERIOD_SQL.format( returning_query=return_query_formatted, filters=prop_filters, first_event_sql=first_event_sql, first_event_default_sql=default_event_query, trunc_func=trunc_func, ), { "team_id": team.pk, "start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "end_date": date_to.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "offset": filter.offset, "limit": 100, "period": period, **target_params, **return_params, **prop_filter_params, }, ) people_dict = {} from posthog.api.person import PersonSerializer people = get_persons_by_uuids(team_id=team.pk, uuids=[val[0] for val in query_result]) people = people.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) for person in people: people_dict.update( {str(person.uuid): PersonSerializer(person).data}) result = self.process_people_in_period(filter, query_result, people_dict) return result