def _get_date_filter(self) -> Tuple[str, Dict]: date_filter = "" date_params: Dict[str, Any] = {} interval_annotation = get_trunc_func_ch(self._filter.interval) _, _, round_interval = get_time_diff(self._filter.interval or "day", self._filter.date_from, self._filter.date_to, team_id=self._team_id) _, parsed_date_to, date_params = parse_timestamps( filter=self._filter, team_id=self._team_id) parsed_date_from = date_from_clause(interval_annotation, round_interval) self.parsed_date_from = parsed_date_from self.parsed_date_to = parsed_date_to if self._entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]: date_filter = "{parsed_date_from_prev_range} {parsed_date_to}" format_params = get_active_user_params(self._filter, self._entity, self._team_id) self.active_user_params = format_params date_filter = date_filter.format(**format_params, parsed_date_to=parsed_date_to) else: date_filter = "{parsed_date_from} {parsed_date_to}".format( parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to) return date_filter, date_params
def stickiness(self, entity: Entity, filter: StickinessFilter, team_id: int) -> Dict[str, Any]: parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team_id) prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team_id) trunc_func = get_trunc_func_ch(filter.interval) params: Dict = {"team_id": team_id} params = {**params, **prop_filter_params, "num_intervals": filter.num_intervals} if entity.type == TREND_FILTER_TYPE_ACTIONS: action = Action.objects.get(pk=entity.id) action_query, action_params = format_action_filter(action) if action_query == "": return {} params = {**params, **action_params} content_sql = STICKINESS_ACTIONS_SQL.format( team_id=team_id, actions_query=action_query, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, trunc_func=trunc_func, ) else: content_sql = STICKINESS_SQL.format( team_id=team_id, event=entity.id, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, trunc_func=trunc_func, ) counts = sync_execute(content_sql, params) return self.process_result(counts, filter)
def _format_lifecycle_query(self, entity: Entity, filter: Filter, team_id: int) -> Tuple[str, Dict, Callable]: date_from = filter.date_from if not date_from: date_from = get_earliest_timestamp(team_id) interval = filter.interval or "day" num_intervals, seconds_in_interval, _ = get_time_diff( interval, filter.date_from, filter.date_to, team_id) interval_increment, interval_string, sub_interval_string = self.get_interval( interval) trunc_func = get_trunc_func_ch(interval) event_query = "" event_params: Dict[str, Any] = {} props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( props_to_filter, team_id, filter_test_accounts=filter.filter_test_accounts) _, _, date_params = parse_timestamps(filter=filter, team_id=team_id) if entity.type == TREND_FILTER_TYPE_ACTIONS: try: action = entity.get_action() event_query, event_params = format_action_filter(action) except: return "", {}, self._parse_result(filter, entity) else: event_query = "event = %(event)s" event_params = {"event": entity.id} return ( LIFECYCLE_SQL.format( interval=interval_string, trunc_func=trunc_func, event_query=event_query, filters=prop_filters, sub_interval=sub_interval_string, GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS, ), { "team_id": team_id, "prev_date_from": (date_from - interval_increment).strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.interval == "hour" or filter.interval == "minute" else " 00:00:00")), "num_intervals": num_intervals, "seconds_in_interval": seconds_in_interval, **event_params, **date_params, **prop_filter_params, }, self._parse_result(filter, entity), )
def _process_content_sql(target_entity: Entity, filter: StickinessFilter, team: Team) -> Tuple[str, Dict[str, Any]]: parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team.pk) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk, filter_test_accounts=filter.filter_test_accounts) entity_sql, entity_params = _format_entity_filter(entity=target_entity) trunc_func = get_trunc_func_ch(filter.interval) params: Dict = { "team_id": team.pk, **prop_filter_params, "stickiness_day": filter.selected_interval, **entity_params, "offset": filter.offset, } content_sql = STICKINESS_PEOPLE_SQL.format( entity_filter=entity_sql, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, trunc_func=trunc_func, GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS, ) return content_sql, params
def _retrieve_people(self, target_entity: Entity, filter: StickinessFilter, team: Team) -> ReturnDict: parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team.pk) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk) entity_sql, entity_params = self._format_entity_filter( entity=target_entity) trunc_func = get_trunc_func_ch(filter.interval) params: Dict = { "team_id": team.pk, **prop_filter_params, "stickiness_day": filter.selected_interval, **entity_params, "offset": filter.offset, } content_sql = STICKINESS_PEOPLE_SQL.format( entity_filter=entity_sql, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, trunc_func=trunc_func, ) people = sync_execute( PEOPLE_SQL.format( content_sql=content_sql, query="", latest_person_sql=GET_LATEST_PERSON_SQL.format(query="")), params, ) return ClickhousePersonSerializer(people, many=True).data
def _retrieve_people(self, filter: RetentionFilter, team: Team): period = filter.period is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME trunc_func = get_trunc_func_ch(period) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk) returning_entity = filter.returning_entity if filter.selected_interval > 0 else filter.target_entity target_query, target_params = self._get_condition(filter.target_entity, table="e") target_query_formatted = "AND {target_query}".format( target_query=target_query) return_query, return_params = self._get_condition(returning_entity, table="e", prepend="returning") return_query_formatted = "AND {return_query}".format( return_query=return_query) reference_event_query = (REFERENCE_EVENT_UNIQUE_SQL if is_first_time_retention else REFERENCE_EVENT_SQL).format( target_query=target_query_formatted, filters=prop_filters, trunc_func=trunc_func, ) reference_date_from = filter.date_from reference_date_to = filter.date_from + filter.period_increment date_from = filter.date_from + filter.selected_interval * filter.period_increment date_to = date_from + filter.period_increment result = sync_execute( RETENTION_PEOPLE_SQL.format( reference_event_query=reference_event_query, target_query=return_query_formatted, filters=prop_filters), { "team_id": team.pk, "start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "end_date": date_to.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "reference_start_date": reference_date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "reference_end_date": reference_date_to.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "offset": filter.offset, **target_params, **return_params, **prop_filter_params, }, ) serialized = ClickhousePersonSerializer(result, many=True).data return serialized
def _total_volume_query(self, entity: Entity, filter: Filter, team_id: int) -> Tuple[str, Dict, Callable]: interval_annotation = get_trunc_func_ch(filter.interval) num_intervals, seconds_in_interval, _ = get_time_diff( filter.interval or "day", filter.date_from, filter.date_to, team_id=team_id ) aggregate_operation, join_condition, math_params = process_math(entity) trend_event_query = TrendsEventQuery( filter=filter, entity=entity, team_id=team_id, should_join_distinct_ids=True if join_condition != "" or entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE] else False, ) event_query, event_query_params = trend_event_query.get_query() content_sql_params = { "aggregate_operation": aggregate_operation, "timestamp": "e.timestamp", "interval": interval_annotation, } params: Dict = {"team_id": team_id} params = {**params, **math_params, **event_query_params} if filter.display in TRENDS_DISPLAY_BY_VALUE: content_sql = VOLUME_TOTAL_AGGREGATE_SQL.format(event_query=event_query, **content_sql_params) time_range = enumerate_time_range(filter, seconds_in_interval) return ( content_sql, params, lambda result: [ {"aggregated_value": result[0][0] if result and len(result) else 0, "days": time_range} ], ) else: if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]: content_sql = ACTIVE_USER_SQL.format( event_query=event_query, **content_sql_params, parsed_date_to=trend_event_query.parsed_date_to, parsed_date_from=trend_event_query.parsed_date_from, **trend_event_query.active_user_params ) else: content_sql = VOLUME_SQL.format(event_query=event_query, **content_sql_params) null_sql = NULL_SQL.format( interval=interval_annotation, seconds_in_interval=seconds_in_interval, num_intervals=num_intervals, date_to=filter.date_to.strftime("%Y-%m-%d %H:%M:%S"), ) final_query = AGGREGATE_SQL.format(null_sql=null_sql, content_sql=content_sql) return final_query, params, self._parse_total_volume_result(filter)
def calculate_avg(self, filter: Filter, team: Team): parsed_date_from, parsed_date_to, _ = parse_timestamps(filter, team.pk) filters, params = parse_prop_clauses( filter.properties, team.pk, filter_test_accounts=filter.filter_test_accounts ) interval_notation = get_trunc_func_ch(filter.interval) num_intervals, seconds_in_interval, _ = get_time_diff( filter.interval or "day", filter.date_from, filter.date_to, team.pk ) entity_conditions, entity_params = entity_query_conditions(filter, team) if not entity_conditions: entity_conditions = ["event != '$feature_flag_called'"] # default conditino params = {**params, **entity_params} entity_query = " OR ".join(entity_conditions) avg_query = SESSIONS_NO_EVENTS_SQL.format( team_id=team.pk, date_from=parsed_date_from, date_to=parsed_date_to, filters=filters, sessions_limit="", entity_filter=f"AND ({entity_query})", ) per_period_query = AVERAGE_PER_PERIOD_SQL.format(sessions=avg_query, interval=interval_notation) null_sql = NULL_SQL.format( date_to=filter.date_to.strftime("%Y-%m-%d 00:00:00"), interval=interval_notation, num_intervals=num_intervals, seconds_in_interval=seconds_in_interval, ) final_query = AVERAGE_SQL.format(sessions=per_period_query, null_sql=null_sql) params = {**params, "team_id": team.pk} response = sync_execute(final_query, params) values = self.clean_values(filter, response) time_series_data = append_data(values, interval=filter.interval, math=None) scaled_data, _ = scale_time_series(time_series_data["data"]) time_series_data.update({"data": scaled_data}) # calculate average total = sum(val[1] for val in values) if total == 0: return [] valid_days = sum(1 if val[1] else 0 for val in values) overall_average = (total / valid_days) if valid_days else 0 result = self._format_avg(overall_average) time_series_data.update(result) return [time_series_data]
def get_query(self) -> str: step_counts = self.get_step_counts_without_aggregation_query() # Expects multiple rows for same person, first event time, steps taken. self.params.update(self.funnel_order.params) reached_from_step_count_condition, reached_to_step_count_condition, _ = self.get_steps_reached_conditions( ) trunc_func = get_trunc_func_ch(self._filter.interval) interval_func = get_interval_func_ch(self._filter.interval) if self._filter.date_from is None: _date_from = get_earliest_timestamp(self._team.pk) else: _date_from = self._filter.date_from breakdown_clause = self._get_breakdown_prop() formatted_date_from = format_ch_timestamp(_date_from, self._filter) formatted_date_to = format_ch_timestamp(self._filter.date_to, self._filter) self.params.update({ "formatted_date_from": formatted_date_from, "formatted_date_to": formatted_date_to, "interval": self._filter.interval, }) query = f""" SELECT entrance_period_start, reached_from_step_count, reached_to_step_count, if(reached_from_step_count > 0, round(reached_to_step_count / reached_from_step_count * 100, 2), 0) AS conversion_rate {breakdown_clause} FROM ( SELECT entrance_period_start, countIf({reached_from_step_count_condition}) AS reached_from_step_count, countIf({reached_to_step_count_condition}) AS reached_to_step_count {breakdown_clause} FROM ( {step_counts} ) GROUP BY entrance_period_start {breakdown_clause} ) data RIGHT OUTER JOIN ( SELECT {trunc_func}(toDateTime(%(formatted_date_from)s) + {interval_func}(number)) AS entrance_period_start {', breakdown_value as prop' if breakdown_clause else ''} FROM numbers(dateDiff(%(interval)s, toDateTime(%(formatted_date_from)s), toDateTime(%(formatted_date_to)s)) + 1) AS period_offsets {'ARRAY JOIN (%(breakdown_values)s) AS breakdown_value' if breakdown_clause else ''} ) fill USING (entrance_period_start {breakdown_clause}) ORDER BY entrance_period_start ASC SETTINGS allow_experimental_window_functions = 1""" return query
def _total_volume_query(self, entity: Entity, filter: Filter, team_id: int) -> Tuple[str, Dict, Callable]: trunc_func = get_trunc_func_ch(filter.interval) interval_func = get_interval_func_ch(filter.interval) aggregate_operation, join_condition, math_params = process_math(entity) trend_event_query = TrendsEventQuery( filter=filter, entity=entity, team_id=team_id, should_join_distinct_ids=True if join_condition != "" or entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE] else False, ) event_query, event_query_params = trend_event_query.get_query() content_sql_params = { "aggregate_operation": aggregate_operation, "timestamp": "e.timestamp", "interval": trunc_func, } params: Dict = {"team_id": team_id} params = {**params, **math_params, **event_query_params} if filter.display in TRENDS_DISPLAY_BY_VALUE: content_sql = VOLUME_TOTAL_AGGREGATE_SQL.format( event_query=event_query, **content_sql_params) return (content_sql, params, self._parse_aggregate_volume_result( filter, entity, team_id)) else: if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]: content_sql = ACTIVE_USER_SQL.format( event_query=event_query, **content_sql_params, parsed_date_to=trend_event_query.parsed_date_to, parsed_date_from=trend_event_query.parsed_date_from, **trend_event_query.active_user_params, ) elif filter.display == TRENDS_CUMULATIVE and entity.math == "dau": cumulative_sql = CUMULATIVE_SQL.format(event_query=event_query) content_sql = VOLUME_SQL.format(event_query=cumulative_sql, **content_sql_params) else: content_sql = VOLUME_SQL.format(event_query=event_query, **content_sql_params) null_sql = NULL_SQL.format(trunc_func=trunc_func, interval_func=interval_func) params["interval"] = filter.interval final_query = AGGREGATE_SQL.format(null_sql=null_sql, content_sql=content_sql) return final_query, params, self._parse_total_volume_result( filter, entity, team_id)
def _get_funnel_trend_null_sql(self): interval_annotation = get_trunc_func_ch(self._filter.interval) num_intervals, seconds_in_interval, round_interval = get_time_diff( self._filter.interval or "day", self._filter.date_from, self._filter.date_to, team_id=self._team.id ) funnel_trend_null_sql = NULL_SQL_FUNNEL_TRENDS.format( interval=interval_annotation, seconds_in_interval=seconds_in_interval, num_intervals=num_intervals, date_to=self._filter.date_to.strftime("%Y-%m-%d %H:%M:%S"), ) return funnel_trend_null_sql
def get_query(self) -> str: steps_per_person_query = self.funnel_order.get_step_counts_without_aggregation_query( ) # expects multiple rows for same person, first event time, steps taken. self.params.update(self.funnel_order.params) num_intervals, seconds_in_interval, _ = get_time_diff( self._filter.interval or "day", self._filter.date_from, self._filter.date_to, team_id=self._team.pk) interval_method = get_trunc_func_ch(self._filter.interval) # How many steps must have been done to count for the denominator from_step = self._filter.funnel_from_step or 1 # How many steps must have been done to count for the numerator to_step = self._filter.funnel_to_step or len(self._filter.entities) reached_from_step_count_condition = f"steps_completed >= {from_step}" reached_to_step_count_condition = f"steps_completed >= {to_step}" query = f""" SELECT entrance_period_start, reached_from_step_count, reached_to_step_count, if(reached_from_step_count > 0, round(reached_to_step_count / reached_from_step_count * 100, 2), 0) AS conversion_rate FROM ( SELECT entrance_period_start, countIf({reached_from_step_count_condition}) AS reached_from_step_count, countIf({reached_to_step_count_condition}) AS reached_to_step_count FROM ( SELECT person_id, {interval_method}(timestamp) AS entrance_period_start, max(steps) AS steps_completed FROM ( {steps_per_person_query} ) GROUP BY person_id, entrance_period_start ) GROUP BY entrance_period_start ) data RIGHT JOIN ( SELECT {interval_method}(toDateTime('{self._filter.date_from.strftime(TIMESTAMP_FORMAT)}') + number * {seconds_in_interval}) AS entrance_period_start FROM numbers({num_intervals}) AS period_offsets ) fill USING (entrance_period_start) ORDER BY entrance_period_start ASC SETTINGS allow_experimental_window_functions = 1""" return query
def _normal_query(self, entity: Entity, filter: Filter, team_id: int) -> Tuple[str, Dict, Callable]: interval_annotation = get_trunc_func_ch(filter.interval) num_intervals, seconds_in_interval, round_interval = get_time_diff( filter.interval or "day", filter.date_from, filter.date_to, team_id=team_id ) _, parsed_date_to, date_params = parse_timestamps(filter=filter, team_id=team_id) props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses(props_to_filter, team_id) aggregate_operation, join_condition, math_params = process_math(entity) params: Dict = {"team_id": team_id} params = {**params, **prop_filter_params, **math_params, **date_params} content_sql_params = { "interval": interval_annotation, "parsed_date_from": date_from_clause(interval_annotation, round_interval), "parsed_date_to": parsed_date_to, "timestamp": "timestamp", "team_id": team_id, "filters": prop_filters, "event_join": join_condition, "aggregate_operation": aggregate_operation, } entity_params, entity_format_params = self._populate_entity_params(entity) params = {**params, **entity_params} content_sql_params = {**content_sql_params, **entity_format_params} if filter.display in TRENDS_DISPLAY_BY_VALUE: agg_query = self._determine_single_aggregate_query(filter, entity) content_sql = agg_query.format(**content_sql_params) return ( content_sql, params, lambda result: [{"aggregated_value": result[0][0] if result and len(result) else 0}], ) else: content_sql = self._determine_trend_aggregate_query(filter, entity) content_sql = content_sql.format(**content_sql_params) null_sql = NULL_SQL.format( interval=interval_annotation, seconds_in_interval=seconds_in_interval, num_intervals=num_intervals, date_to=filter.date_to.strftime("%Y-%m-%d %H:%M:%S"), ) final_query = AGGREGATE_SQL.format(null_sql=null_sql, content_sql=content_sql) return final_query, params, self._parse_normal_result(filter)
def calculate_avg(self, filter: Filter, team: Team): parsed_date_from, parsed_date_to, _ = parse_timestamps(filter, team.pk) filters, params = parse_prop_clauses(filter.properties, team.pk) interval_notation = get_trunc_func_ch(filter.interval) num_intervals, seconds_in_interval = get_time_diff( filter.interval or "day", filter.date_from, filter.date_to, team.pk) avg_query = SESSIONS_NO_EVENTS_SQL.format( team_id=team.pk, date_from=parsed_date_from, date_to=parsed_date_to, filters=filters, sessions_limit="", ) per_period_query = AVERAGE_PER_PERIOD_SQL.format( sessions=avg_query, interval=interval_notation) null_sql = NULL_SQL.format( date_to=filter.date_to.strftime("%Y-%m-%d 00:00:00"), interval=interval_notation, num_intervals=num_intervals, seconds_in_interval=seconds_in_interval, ) final_query = AVERAGE_SQL.format(sessions=per_period_query, null_sql=null_sql) params = {**params, "team_id": team.pk} response = sync_execute(final_query, params) values = self.clean_values(filter, response) time_series_data = append_data(values, interval=filter.interval, math=None) # calculate average total = sum(val[1] for val in values) if total == 0: return [] valid_days = sum(1 if val[1] else 0 for val in values) overall_average = (total / valid_days) if valid_days else 0 result = self._format_avg(overall_average) time_series_data.update(result) return [time_series_data]
def get_step_counts_without_aggregation_query( self, *, specific_entrance_period_start: Optional[datetime] = None) -> str: steps_per_person_query = self.funnel_order.get_step_counts_without_aggregation_query( ) interval_method = get_trunc_func_ch(self._filter.interval) # This is used by funnel trends when we only need data for one period, e.g. person per data point if specific_entrance_period_start: self.params[ "entrance_period_start"] = specific_entrance_period_start.strftime( TIMESTAMP_FORMAT) return f"""
def get_query(self) -> str: step_counts = self.get_step_counts_without_aggregation_query() # Expects multiple rows for same person, first event time, steps taken. self.params.update(self.funnel_order.params) reached_from_step_count_condition, reached_to_step_count_condition, _ = self.get_steps_reached_conditions( ) interval_method = get_trunc_func_ch(self._filter.interval) num_intervals, seconds_in_interval, _ = get_time_diff( self._filter.interval or "day", self._filter.date_from, self._filter.date_to, team_id=self._team.pk) breakdown_clause = self._get_breakdown_prop() query = f""" SELECT entrance_period_start, reached_from_step_count, reached_to_step_count, if(reached_from_step_count > 0, round(reached_to_step_count / reached_from_step_count * 100, 2), 0) AS conversion_rate {breakdown_clause} FROM ( SELECT entrance_period_start, countIf({reached_from_step_count_condition}) AS reached_from_step_count, countIf({reached_to_step_count_condition}) AS reached_to_step_count {breakdown_clause} FROM ( {step_counts} ) GROUP BY entrance_period_start {breakdown_clause} ) data RIGHT OUTER JOIN ( SELECT {interval_method}(toDateTime('{self._filter.date_from.strftime(TIMESTAMP_FORMAT)}') + number * {seconds_in_interval}) AS entrance_period_start {', breakdown_value as prop' if breakdown_clause else ''} FROM numbers({num_intervals}) AS period_offsets {'ARRAY JOIN (%(breakdown_values)s) AS breakdown_value' if breakdown_clause else ''} ) fill USING (entrance_period_start {breakdown_clause}) ORDER BY entrance_period_start ASC SETTINGS allow_experimental_window_functions = 1""" return query
def _configure_sql(self): funnel_trend_null_sql = self._get_funnel_trend_null_sql() parsed_date_from, parsed_date_to, _ = self._get_dates() prop_filters, _ = self._get_filters() steps = self._get_steps() step_count = len(steps) interval_method = get_trunc_func_ch(self._filter.interval) sql = FUNNEL_TREND_SQL.format( team_id=self._team.pk, steps=", ".join(steps), step_count=step_count, filters=prop_filters.replace("uuid IN", "events.uuid IN", 1), parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, within_time=self._filter.milliseconds_from_days(self._filter.funnel_window_days), latest_distinct_id_sql=GET_LATEST_PERSON_DISTINCT_ID_SQL, funnel_trend_null_sql=funnel_trend_null_sql, interval_method=interval_method, ) return sql
def get_step_counts_without_aggregation_query( self, *, specific_entrance_period_start: Optional[datetime] = None) -> str: steps_per_person_query = self.funnel_order.get_step_counts_without_aggregation_query( ) trunc_func = get_trunc_func_ch(self._filter.interval) # This is used by funnel trends when we only need data for one period, e.g. person per data point if specific_entrance_period_start: self.params[ "entrance_period_start"] = specific_entrance_period_start.strftime( TIMESTAMP_FORMAT) event_select_clause = "" if self._filter.include_recordings: max_steps = len(self._filter.entities) event_select_clause = self._get_matching_event_arrays(max_steps) breakdown_clause = self._get_breakdown_prop() return f"""
def get_people( self, filter: Filter, team_id: int, target_date: datetime, lifecycle_type: str, request: Request, limit: int = 100, ): entity = filter.entities[0] date_from = filter.date_from if not date_from: date_from = get_earliest_timestamp(team_id) interval = filter.interval or "day" num_intervals, seconds_in_interval, _ = get_time_diff(interval, filter.date_from, filter.date_to, team_id=team_id) interval_increment, interval_string, sub_interval_string = self.get_interval( interval) trunc_func = get_trunc_func_ch(interval) event_query = "" event_params: Dict[str, Any] = {} _, _, date_params = parse_timestamps(filter=filter, team_id=team_id) if entity.type == TREND_FILTER_TYPE_ACTIONS: try: action = entity.get_action() event_query, event_params = format_action_filter(action) except: return [] else: event_query = "event = %(event)s" event_params = {"event": entity.id} props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( props_to_filter, team_id, filter_test_accounts=filter.filter_test_accounts) result = sync_execute( LIFECYCLE_PEOPLE_SQL.format( interval=interval_string, trunc_func=trunc_func, event_query=event_query, filters=prop_filters, sub_interval=sub_interval_string, GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS, ), { "team_id": team_id, "prev_date_from": (date_from - interval_increment).strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.interval == "hour" or filter.interval == "minute" else " 00:00:00")), "num_intervals": num_intervals, "seconds_in_interval": seconds_in_interval, **event_params, **date_params, **prop_filter_params, "status": lifecycle_type, "target_date": target_date.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.interval == "hour" or filter.interval == "minute" else " 00:00:00")), "offset": filter.offset, "limit": limit, }, ) people = get_persons_by_uuids(team_id=team_id, uuids=[p[0] for p in result]) people = people.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) from posthog.api.person import PersonSerializer return PersonSerializer(people, many=True).data
def _execute_sql( self, filter: RetentionFilter, team: Team, ) -> Dict[Tuple[int, int], Dict[str, Any]]: period = filter.period prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk, filter_test_accounts=filter.filter_test_accounts) target_entity = filter.target_entity returning_entity = filter.returning_entity is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME date_from = filter.date_from date_to = filter.date_to target_query = "" target_params: Dict = {} trunc_func = get_trunc_func_ch(period) target_query, target_params = self._get_condition(target_entity, table="e") returning_query, returning_params = self._get_condition( returning_entity, table="e", prepend="returning") target_query_formatted = "AND {target_query}".format( target_query=target_query) returning_query_formatted = "AND {returning_query}".format( returning_query=returning_query) reference_event_sql = (REFERENCE_EVENT_UNIQUE_SQL if is_first_time_retention else REFERENCE_EVENT_SQL).format( target_query=target_query_formatted, filters=prop_filters, trunc_func=trunc_func, ) target_condition, _ = self._get_condition(target_entity, table="reference_event") if is_first_time_retention: target_condition = target_condition.replace( "reference_event.uuid", "reference_event.min_uuid") target_condition = target_condition.replace( "reference_event.event", "reference_event.min_event") returning_condition, _ = self._get_condition(returning_entity, table="event", prepend="returning") result = sync_execute( RETENTION_SQL.format( target_query=target_query_formatted, returning_query=returning_query_formatted, filters=prop_filters, trunc_func=trunc_func, extra_union="UNION ALL {} ".format(reference_event_sql), reference_event_sql=reference_event_sql, target_condition=target_condition, returning_condition=returning_condition, ), { "team_id": team.pk, "start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "end_date": date_to.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "reference_start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "reference_end_date": ((date_from + filter.period_increment) if filter.display == TRENDS_LINEAR else date_to).strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), **prop_filter_params, **target_params, **returning_params, "period": period, }, ) initial_interval_result = sync_execute( INITIAL_INTERVAL_SQL.format( reference_event_sql=reference_event_sql, trunc_func=trunc_func, ), { "team_id": team.pk, "start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "end_date": date_to.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "reference_start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "reference_end_date": ((date_from + filter.period_increment) if filter.display == TRENDS_LINEAR else date_to).strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), **prop_filter_params, **target_params, **returning_params, "period": period, }, ) result_dict = {} for initial_res in initial_interval_result: result_dict.update({ (initial_res[0], 0): { "count": initial_res[1], "people": [] } }) for res in result: result_dict.update({ (res[0], res[1]): { "count": res[2], "people": [] } }) return result_dict
def _retrieve_people_in_period(self, filter: RetentionFilter, team: Team): period = filter.period is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME trunc_func = get_trunc_func_ch(period) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk) target_query, target_params = self._get_condition(filter.target_entity, table="e") target_query_formatted = "AND {target_query}".format( target_query=target_query) return_query, return_params = self._get_condition( filter.returning_entity, table="e", prepend="returning") return_query_formatted = "AND {return_query}".format( return_query=return_query) first_event_sql = (REFERENCE_EVENT_UNIQUE_PEOPLE_PER_PERIOD_SQL if is_first_time_retention else REFERENCE_EVENT_PEOPLE_PER_PERIOD_SQL).format( target_query=target_query_formatted, filters=prop_filters, trunc_func=trunc_func, ) default_event_query = ( DEFAULT_REFERENCE_EVENT_UNIQUE_PEOPLE_PER_PERIOD_SQL if is_first_time_retention else DEFAULT_REFERENCE_EVENT_PEOPLE_PER_PERIOD_SQL).format( target_query=target_query_formatted, filters=prop_filters, trunc_func=trunc_func, ) date_from = filter.date_from + filter.selected_interval * filter.period_increment date_to = filter.date_to filter = filter.with_data({ "total_intervals": filter.total_intervals - filter.selected_interval }) query_result = sync_execute( RETENTION_PEOPLE_PER_PERIOD_SQL.format( returning_query=return_query_formatted, filters=prop_filters, first_event_sql=first_event_sql, first_event_default_sql=default_event_query, trunc_func=trunc_func, ), { "team_id": team.pk, "start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "end_date": date_to.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "offset": filter.offset, "limit": 100, "period": period, **target_params, **return_params, **prop_filter_params, }, ) people_dict = {} from posthog.api.person import PersonSerializer people = get_persons_by_uuids(team_id=team.pk, uuids=[val[0] for val in query_result]) people = people.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) for person in people: people_dict.update( {str(person.uuid): PersonSerializer(person).data}) result = self.process_people_in_period(filter, query_result, people_dict) return result
def _execute_sql( self, filter: RetentionFilter, team: Team, ) -> Dict[Tuple[int, int], Dict[str, Any]]: period = filter.period is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME date_from = filter.date_from trunc_func = get_trunc_func_ch(period) returning_event_query, returning_event_params = RetentionEventsQuery( filter=filter, team_id=team.pk, event_query_type=RetentionQueryType.RETURNING).get_query() target_event_query, target_event_params = RetentionEventsQuery( filter=filter, team_id=team.pk, event_query_type=RetentionQueryType.TARGET_FIRST_TIME if is_first_time_retention else RetentionQueryType.TARGET, ).get_query() all_params = { "team_id": team.pk, "start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), **returning_event_params, **target_event_params, "period": period, } result = sync_execute( RETENTION_SQL.format( returning_event_query=returning_event_query, trunc_func=trunc_func, target_event_query=target_event_query, ), all_params, ) initial_interval_result = sync_execute( INITIAL_INTERVAL_SQL.format( reference_event_sql=target_event_query, trunc_func=trunc_func, ), all_params) result_dict = {} for initial_res in initial_interval_result: result_dict.update({ (initial_res[0], 0): { "count": initial_res[1], "people": [] } }) for res in result: result_dict.update({ (res[0], res[1]): { "count": res[2], "people": [] } }) return result_dict
def _format_breakdown_query(self, entity: Entity, filter: Filter, team_id: int) -> Tuple[str, Dict, Callable]: # process params params: Dict[str, Any] = {"team_id": team_id} interval_annotation = get_trunc_func_ch(filter.interval) num_intervals, seconds_in_interval, round_interval = get_time_diff( filter.interval or "day", filter.date_from, filter.date_to, team_id) _, parsed_date_to, date_params = parse_timestamps(filter=filter, team_id=team_id) props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( props_to_filter, team_id, table_name="e", filter_test_accounts=filter.filter_test_accounts) aggregate_operation, _, math_params = process_math(entity) if entity.math == "dau" or filter.breakdown_type == "person": join_condition = EVENT_JOIN_PERSON_SQL else: join_condition = "" action_query = "" action_params: Dict = {} if entity.type == TREND_FILTER_TYPE_ACTIONS: action = Action.objects.get(pk=entity.id) action_query, action_params = format_action_filter(action) null_sql = NULL_BREAKDOWN_SQL.format( interval=interval_annotation, seconds_in_interval=seconds_in_interval, num_intervals=num_intervals, date_to=(filter.date_to).strftime("%Y-%m-%d %H:%M:%S"), ) params = { **params, **math_params, **prop_filter_params, **action_params, "event": entity.id, "key": filter.breakdown, **date_params, } breakdown_filter_params = { "parsed_date_from": date_from_clause(interval_annotation, round_interval), "parsed_date_to": parsed_date_to, "actions_query": "AND {}".format(action_query) if action_query else "", "event_filter": "AND event = %(event)s" if not action_query else "", "filters": prop_filters if props_to_filter else "", } breakdown_query = self._get_breakdown_query(filter) _params, _breakdown_filter_params = {}, {} if filter.breakdown_type == "cohort": _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_cohort_params( team_id, filter, entity) elif filter.breakdown_type == "person": _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_person_params( filter, team_id) else: _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_prop_params( filter, team_id) if len(_params["values"]) == 0: return "SELECT 1", {}, lambda _: [] params = {**params, **_params} breakdown_filter_params = { **breakdown_filter_params, **_breakdown_filter_params } if filter.display in TRENDS_DISPLAY_BY_VALUE: breakdown_filter = breakdown_filter.format( **breakdown_filter_params) content_sql = breakdown_query.format( breakdown_filter=breakdown_filter, event_join=join_condition, aggregate_operation=aggregate_operation, breakdown_value=breakdown_value, ) return content_sql, params, self._parse_single_aggregate_result( filter, entity) else: null_sql = null_sql.format( interval=interval_annotation, seconds_in_interval=seconds_in_interval, num_intervals=num_intervals, date_to=(filter.date_to).strftime("%Y-%m-%d %H:%M:%S"), ) breakdown_filter = breakdown_filter.format( **breakdown_filter_params) breakdown_query = breakdown_query.format( null_sql=null_sql, breakdown_filter=breakdown_filter, event_join=join_condition, aggregate_operation=aggregate_operation, interval_annotation=interval_annotation, breakdown_value=breakdown_value, ) return breakdown_query, params, self._parse_trend_result( filter, entity)
def get_query(self) -> Tuple[str, Dict, Callable]: interval_annotation = get_trunc_func_ch(self.filter.interval) num_intervals, seconds_in_interval, round_interval = get_time_diff( self.filter.interval, self.filter.date_from, self.filter.date_to, self.team_id) _, parsed_date_to, date_params = parse_timestamps(filter=self.filter, team_id=self.team_id) props_to_filter = self.filter.property_groups.combine_property_group( PropertyOperatorType.AND, self.entity.property_groups) outer_properties = self.column_optimizer.property_optimizer.parse_property_groups( props_to_filter).outer prop_filters, prop_filter_params = parse_prop_grouped_clauses( team_id=self.team_id, property_group=outer_properties, table_name="e", person_properties_mode=PersonPropertiesMode. USING_PERSON_PROPERTIES_COLUMN, ) aggregate_operation, _, math_params = process_math(self.entity) action_query = "" action_params: Dict = {} if self.entity.type == TREND_FILTER_TYPE_ACTIONS: action = self.entity.get_action() action_query, action_params = format_action_filter( team_id=self.team_id, action=action, table_name="e") self.params = { **self.params, **math_params, **prop_filter_params, **action_params, "event": self.entity.id, "key": self.filter.breakdown, **date_params, } breakdown_filter_params = { "parsed_date_from": date_from_clause(interval_annotation, round_interval), "parsed_date_to": parsed_date_to, "actions_query": "AND {}".format(action_query) if action_query else "", "event_filter": "AND event = %(event)s" if not action_query else "", "filters": prop_filters if props_to_filter.values else "", } _params, _breakdown_filter_params = {}, {} if self.filter.breakdown_type == "cohort": _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_cohort_params( ) else: _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_prop_params( "count(*)" if self.entity.math == "dau" else aggregate_operation, math_params, ) if len(_params["values"]) == 0: # If there are no breakdown values, we are sure that there's no relevant events, so instead of adjusting # a "real" SELECT for this, we only include the below dummy SELECT. # It's a drop-in replacement for a "real" one, simply always returning 0 rows. # See https://github.com/PostHog/posthog/pull/5674 for context. return ( "SELECT [now()] AS date, [0] AS data, '' AS breakdown_value LIMIT 0", {}, lambda _: [], ) person_join_condition, person_join_params = self._person_join_condition( ) groups_join_condition, groups_join_params = GroupsJoinQuery( self.filter, self.team_id, self.column_optimizer).get_join_query() self.params = { **self.params, **_params, **person_join_params, **groups_join_params } breakdown_filter_params = { **breakdown_filter_params, **_breakdown_filter_params } if self.filter.display in TRENDS_DISPLAY_BY_VALUE: breakdown_filter = breakdown_filter.format( **breakdown_filter_params) content_sql = BREAKDOWN_AGGREGATE_QUERY_SQL.format( breakdown_filter=breakdown_filter, person_join=person_join_condition, groups_join=groups_join_condition, aggregate_operation=aggregate_operation, breakdown_value=breakdown_value, ) time_range = enumerate_time_range(self.filter, seconds_in_interval) return ( content_sql, self.params, self._parse_single_aggregate_result(self.filter, self.entity, {"days": time_range}), ) else: breakdown_filter = breakdown_filter.format( **breakdown_filter_params) if self.entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]: active_user_params = get_active_user_params( self.filter, self.entity, self.team_id) conditions = BREAKDOWN_ACTIVE_USER_CONDITIONS_SQL.format( **breakdown_filter_params, **active_user_params) inner_sql = BREAKDOWN_ACTIVE_USER_INNER_SQL.format( breakdown_filter=breakdown_filter, person_join=person_join_condition, groups_join=groups_join_condition, aggregate_operation=aggregate_operation, interval_annotation=interval_annotation, breakdown_value=breakdown_value, conditions=conditions, GET_TEAM_PERSON_DISTINCT_IDS=get_team_distinct_ids_query( self.team_id), **active_user_params, **breakdown_filter_params, ) elif self.filter.display == TRENDS_CUMULATIVE and self.entity.math == "dau": inner_sql = BREAKDOWN_CUMULATIVE_INNER_SQL.format( breakdown_filter=breakdown_filter, person_join=person_join_condition, groups_join=groups_join_condition, aggregate_operation=aggregate_operation, interval_annotation=interval_annotation, breakdown_value=breakdown_value, **breakdown_filter_params, ) else: inner_sql = BREAKDOWN_INNER_SQL.format( breakdown_filter=breakdown_filter, person_join=person_join_condition, groups_join=groups_join_condition, aggregate_operation=aggregate_operation, interval_annotation=interval_annotation, breakdown_value=breakdown_value, ) breakdown_query = BREAKDOWN_QUERY_SQL.format( interval=interval_annotation, num_intervals=num_intervals, inner_sql=inner_sql, ) self.params.update({ "seconds_in_interval": seconds_in_interval, "num_intervals": num_intervals, }) return breakdown_query, self.params, self._parse_trend_result( self.filter, self.entity)
def _get_trends(self) -> List[Dict[str, Any]]: serialized: Dict[str, Any] = { "count": 0, "data": [], "days": [], "labels": [] } prop_filters, prop_filter_params = parse_prop_clauses( self._filter.properties, self._team.pk, prepend="global", allow_denormalized_props=True) parsed_date_from, parsed_date_to, _ = parse_timestamps( filter=self._filter, table="events.", team_id=self._team.pk) self.params.update(prop_filter_params) steps = [ self._build_steps_query(entity, index) for index, entity in enumerate(self._filter.entities) ] funnel_query = FUNNEL_SQL.format( team_id=self._team.id, steps=", ".join(steps), filters=prop_filters.replace("uuid IN", "events.uuid IN", 1), parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, top_level_groupby=", date", extra_select="{}(timestamp) as date,".format( get_trunc_func_ch(self._filter.interval)), extra_groupby=",{}(timestamp)".format( get_trunc_func_ch(self._filter.interval)), within_time="86400000000", ) results = sync_execute(funnel_query, self.params) parsed_results = [] for result in results: temp = [item for item in result] temp[1] = datetime( result[1].year, result[1].month, result[1].day, getattr(result[1], "hour", 0), getattr(result[1], "minute", 0), getattr(result[1], "second", 0), tzinfo=pytz.utc, ) parsed_results.append(temp) date_range = get_daterange(self._filter.date_from or parsed_results[0][1], self._filter.date_to, frequency=self._filter.interval) # Rejig the data from a row for each date and step to one row per date data_dict: Dict[datetime, Dict] = {} for item in parsed_results: if not data_dict.get(item[1]): data_dict[item[1]] = { "date": item[1], "total_people": item[2], "count": 0 } else: # the query gives people who made it to that step # so we need to count all the people from each step data_dict[item[1]]["total_people"] += item[2] data_dict[item[1]]["count"] = round( item[2] / data_dict[item[1]]["total_people"] * 100) data_array = [value for _, value in data_dict.items()] if self._filter.interval == "week": for df in data_array: df["date"] -= timedelta(days=df["date"].weekday() + 1) elif self._filter.interval == "month": for df in data_array: df["date"] = df["date"].replace(day=1) for df in data_array: df["date"] = df["date"].isoformat() datewise_data = {d["date"]: d["count"] for d in data_array} values = [(key, datewise_data.get(key.isoformat(), 0)) for key in date_range] for data_item in values: serialized["days"].append(data_item[0]) serialized["data"].append(data_item[1]) serialized["labels"].append( format_label_date(data_item[0], self._filter.interval)) return [serialized]
def _format_breakdown_query(self, entity: Entity, filter: Filter, team_id: int) -> Tuple[str, Dict, Callable]: # process params params: Dict[str, Any] = {"team_id": team_id} interval_annotation = get_trunc_func_ch(filter.interval) num_intervals, seconds_in_interval, round_interval = get_time_diff( filter.interval or "day", filter.date_from, filter.date_to, team_id) _, parsed_date_to, date_params = parse_timestamps(filter=filter, team_id=team_id) props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( props_to_filter, team_id, table_name="e", filter_test_accounts=filter.filter_test_accounts) aggregate_operation, _, math_params = process_math(entity) if entity.math == "dau" or filter.breakdown_type == "person": join_condition = EVENT_JOIN_PERSON_SQL else: join_condition = "" action_query = "" action_params: Dict = {} if entity.type == TREND_FILTER_TYPE_ACTIONS: action = entity.get_action() action_query, action_params = format_action_filter(action, table_name="e") params = { **params, **math_params, **prop_filter_params, **action_params, "event": entity.id, "key": filter.breakdown, **date_params, } breakdown_filter_params = { "parsed_date_from": date_from_clause(interval_annotation, round_interval), "parsed_date_to": parsed_date_to, "actions_query": "AND {}".format(action_query) if action_query else "", "event_filter": "AND event = %(event)s" if not action_query else "", "filters": prop_filters if props_to_filter else "", } _params, _breakdown_filter_params = {}, {} if filter.breakdown_type == "cohort": _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_cohort_params( team_id, filter, entity) elif filter.breakdown_type == "person": ( _params, breakdown_filter, _breakdown_filter_params, breakdown_value, ) = self._breakdown_person_params( "count(*)" if entity.math == "dau" else aggregate_operation, entity, filter, team_id) else: ( _params, breakdown_filter, _breakdown_filter_params, breakdown_value, ) = self._breakdown_prop_params( "count(*)" if entity.math == "dau" else aggregate_operation, entity, filter, team_id) if len(_params["values"]) == 0: return "SELECT 1", {}, lambda _: [] params = {**params, **_params} breakdown_filter_params = { **breakdown_filter_params, **_breakdown_filter_params } if filter.display in TRENDS_DISPLAY_BY_VALUE: breakdown_filter = breakdown_filter.format( **breakdown_filter_params) content_sql = BREAKDOWN_AGGREGATE_QUERY_SQL.format( breakdown_filter=breakdown_filter, event_join=join_condition, aggregate_operation=aggregate_operation, breakdown_value=breakdown_value, ) time_range = enumerate_time_range(filter, seconds_in_interval) return content_sql, params, self._parse_single_aggregate_result( filter, entity, {"days": time_range}) else: breakdown_filter = breakdown_filter.format( **breakdown_filter_params) if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]: active_user_params = get_active_user_params( filter, entity, team_id) conditions = BREAKDOWN_ACTIVE_USER_CONDITIONS_SQL.format( **breakdown_filter_params, **active_user_params) inner_sql = BREAKDOWN_ACTIVE_USER_INNER_SQL.format( breakdown_filter=breakdown_filter, event_join=join_condition, aggregate_operation=aggregate_operation, interval_annotation=interval_annotation, breakdown_value=breakdown_value, conditions=conditions, GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS, **active_user_params, **breakdown_filter_params) else: inner_sql = BREAKDOWN_INNER_SQL.format( breakdown_filter=breakdown_filter, event_join=join_condition, aggregate_operation=aggregate_operation, interval_annotation=interval_annotation, breakdown_value=breakdown_value, ) breakdown_query = BREAKDOWN_QUERY_SQL.format( interval=interval_annotation, num_intervals=num_intervals, inner_sql=inner_sql, ) params.update({ "date_to": filter.date_to.strftime("%Y-%m-%d %H:%M:%S"), "seconds_in_interval": seconds_in_interval, "num_intervals": num_intervals, }) return breakdown_query, params, self._parse_trend_result( filter, entity)
def _retrieve_people(self, filter: RetentionFilter, team: Team): period = filter.period is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME trunc_func = get_trunc_func_ch(period) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk, filter_test_accounts=filter.filter_test_accounts) returning_entity = filter.returning_entity if filter.selected_interval > 0 else filter.target_entity target_query, target_params = self._get_condition(filter.target_entity, table="e") target_query_formatted = "AND {target_query}".format( target_query=target_query) return_query, return_params = self._get_condition(returning_entity, table="e", prepend="returning") return_query_formatted = "AND {return_query}".format( return_query=return_query) reference_event_query = ( REFERENCE_EVENT_UNIQUE_SQL if is_first_time_retention else REFERENCE_EVENT_SQL).format( target_query=target_query_formatted, filters=prop_filters, trunc_func=trunc_func, GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS, ) reference_date_from = filter.date_from reference_date_to = filter.date_from + filter.period_increment date_from = filter.date_from + filter.selected_interval * filter.period_increment date_to = date_from + filter.period_increment result = sync_execute( RETENTION_PEOPLE_SQL.format( reference_event_query=reference_event_query, target_query=return_query_formatted, filters=prop_filters, GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS, ), { "team_id": team.pk, "start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "end_date": date_to.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "reference_start_date": reference_date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "reference_end_date": reference_date_to.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "offset": filter.offset, **target_params, **return_params, **prop_filter_params, }, ) people = Person.objects.filter(team_id=team.pk, uuid__in=[val[0] for val in result]) from posthog.api.person import PersonSerializer return PersonSerializer(people, many=True).data
def _serialize_lifecycle(self, entity: Entity, filter: Filter, team_id: int) -> List[Dict[str, Any]]: date_from = filter.date_from if not date_from: date_from = get_earliest_timestamp(team_id) interval = filter.interval or "day" num_intervals, seconds_in_interval, _ = get_time_diff( interval, filter.date_from, filter.date_to, team_id) interval_increment, interval_string, sub_interval_string = self.get_interval( interval) trunc_func = get_trunc_func_ch(interval) event_query = "" event_params: Dict[str, Any] = {} props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( props_to_filter, team_id) _, _, date_params = parse_timestamps(filter=filter, team_id=team_id) if entity.type == TREND_FILTER_TYPE_ACTIONS: try: action = Action.objects.get(pk=entity.id) event_query, event_params = format_action_filter(action) except: return [] else: event_query = "event = %(event)s" event_params = {"event": entity.id} result = sync_execute( LIFECYCLE_SQL.format( interval=interval_string, trunc_func=trunc_func, event_query=event_query, filters=prop_filters, sub_interval=sub_interval_string, ), { "team_id": team_id, "prev_date_from": (date_from - interval_increment).strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.interval == "hour" or filter.interval == "minute" else " 00:00:00")), "num_intervals": num_intervals, "seconds_in_interval": seconds_in_interval, **event_params, **date_params, **prop_filter_params, }, ) res = [] for val in result: label = "{} - {}".format(entity.name, val[2]) additional_values = {"label": label, "status": val[2]} parsed_result = parse_response(val, filter, additional_values) res.append(parsed_result) return res
def _normal_query(self, entity: Entity, filter: Filter, team_id: int) -> Tuple[str, Dict, Callable]: interval_annotation = get_trunc_func_ch(filter.interval) num_intervals, seconds_in_interval, round_interval = get_time_diff( filter.interval or "day", filter.date_from, filter.date_to, team_id=team_id) _, parsed_date_to, date_params = parse_timestamps(filter=filter, team_id=team_id) props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( props_to_filter, team_id, filter_test_accounts=filter.filter_test_accounts) aggregate_operation, join_condition, math_params = process_math(entity) params: Dict = {"team_id": team_id} params = {**params, **prop_filter_params, **math_params, **date_params} content_sql_params = { "interval": interval_annotation, "parsed_date_from": date_from_clause(interval_annotation, round_interval), "parsed_date_to": parsed_date_to, "timestamp": "timestamp", "filters": prop_filters, "event_join": join_condition, "aggregate_operation": aggregate_operation, "entity_query": "AND {actions_query}" if entity.type == TREND_FILTER_TYPE_ACTIONS else "AND event = %(event)s", } entity_params, entity_format_params = self._populate_entity_params( entity) params = {**params, **entity_params} if filter.display in TRENDS_DISPLAY_BY_VALUE: content_sql = VOLUME_TOTAL_AGGREGATE_SQL.format( **content_sql_params).format(**entity_format_params) time_range = self._enumerate_time_range(filter, seconds_in_interval) return ( content_sql, params, lambda result: [{ "aggregated_value": result[0][0] if result and len(result) else 0, "days": time_range }], ) else: if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]: sql_params = get_active_user_params(filter, entity, team_id) content_sql = ACTIVE_USER_SQL.format( **content_sql_params, **sql_params).format(**entity_format_params) else: # entity_format_params depends on format clause from content_sql_params content_sql = VOLUME_SQL.format(**content_sql_params).format( **entity_format_params) null_sql = NULL_SQL.format( interval=interval_annotation, seconds_in_interval=seconds_in_interval, num_intervals=num_intervals, date_to=filter.date_to.strftime("%Y-%m-%d %H:%M:%S"), ) final_query = AGGREGATE_SQL.format(null_sql=null_sql, content_sql=content_sql) return final_query, params, self._parse_normal_result(filter)
def _format_breakdown_query( self, entity: Entity, filter: Filter, breakdown: List, team_id: int ) -> List[Dict[str, Any]]: # process params params: Dict[str, Any] = {"team_id": team_id} interval_annotation = get_trunc_func_ch(filter.interval) num_intervals, seconds_in_interval, round_interval = get_time_diff( filter.interval or "day", filter.date_from, filter.date_to, team_id ) _, parsed_date_to, date_params = parse_timestamps(filter=filter, team_id=team_id) props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses(props_to_filter, team_id, table_name="e") aggregate_operation, _, math_params = process_math(entity) if entity.math == "dau" or filter.breakdown_type == "person": join_condition = EVENT_JOIN_PERSON_SQL else: join_condition = "" action_query = "" action_params: Dict = {} if entity.type == TREND_FILTER_TYPE_ACTIONS: action = Action.objects.get(pk=entity.id) action_query, action_params = format_action_filter(action) null_sql = NULL_BREAKDOWN_SQL.format( interval=interval_annotation, seconds_in_interval=seconds_in_interval, num_intervals=num_intervals, date_to=(filter.date_to).strftime("%Y-%m-%d %H:%M:%S"), ) params = { **params, **math_params, **prop_filter_params, **action_params, "event": entity.id, "key": filter.breakdown, **date_params, } breakdown_filter_params = { "parsed_date_from": date_from_clause(interval_annotation, round_interval), "parsed_date_to": parsed_date_to, "actions_query": "AND {}".format(action_query) if action_query else "", "event_filter": "AND event = %(event)s" if not action_query else "", "filters": prop_filters if props_to_filter else "", } breakdown_query = self._get_breakdown_query(filter, breakdown) _params, _breakdown_filter_params = {}, {} if filter.breakdown_type == "cohort": if "all" in breakdown: null_sql = NULL_SQL breakdown_filter = BREAKDOWN_CONDITIONS_SQL breakdown_value = "" else: _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_cohort_params( breakdown, team_id ) elif filter.breakdown_type == "person": _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_person_params( filter, team_id ) else: _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_prop_params( filter, team_id ) params = {**params, **_params} breakdown_filter_params = {**breakdown_filter_params, **_breakdown_filter_params} if filter.display == TRENDS_TABLE or filter.display == TRENDS_PIE: breakdown_filter = breakdown_filter.format(**breakdown_filter_params) content_sql = breakdown_query.format( breakdown_filter=breakdown_filter, event_join=join_condition, aggregate_operation=aggregate_operation, breakdown_value=breakdown_value, ) result = sync_execute(content_sql, params) parsed_results = self._parse_single_aggregate_result(result, filter, entity, breakdown) return parsed_results else: null_sql = null_sql.format( interval=interval_annotation, seconds_in_interval=seconds_in_interval, num_intervals=num_intervals, date_to=(filter.date_to).strftime("%Y-%m-%d %H:%M:%S"), ) breakdown_filter = breakdown_filter.format(**breakdown_filter_params) breakdown_query = breakdown_query.format( null_sql=null_sql, breakdown_filter=breakdown_filter, event_join=join_condition, aggregate_operation=aggregate_operation, interval_annotation=interval_annotation, breakdown_value=breakdown_value, ) try: result = sync_execute(breakdown_query, params) except: result = [] parsed_results = self._parse_trend_result(result, filter, entity, breakdown) return parsed_results