def _breakdown_person_params(self, aggregate_operation: str, filter: Filter, team_id: int): parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team_id) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team_id, table_name="e", filter_test_accounts=filter.filter_test_accounts) elements_query = TOP_PERSON_PROPS_ARRAY_OF_KEY_SQL.format( parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, latest_person_sql=GET_LATEST_PERSON_SQL.format(query=""), prop_filters=prop_filters, aggregate_operation=aggregate_operation, ) top_elements_array = self._get_top_elements(elements_query, filter, team_id, params=prop_filter_params) params = { "values": top_elements_array, } breakdown_filter = BREAKDOWN_PERSON_PROP_JOIN_SQL breakdown_filter_params = { "latest_person_sql": GET_LATEST_PERSON_SQL.format(query=""), } return params, breakdown_filter, breakdown_filter_params, "value"
def _retrieve_people(self, target_entity: Entity, filter: StickinessFilter, team: Team) -> ReturnDict: parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team.pk) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk) entity_sql, entity_params = self._format_entity_filter( entity=target_entity) trunc_func = get_trunc_func_ch(filter.interval) params: Dict = { "team_id": team.pk, **prop_filter_params, "stickiness_day": filter.selected_interval, **entity_params, "offset": filter.offset, } content_sql = STICKINESS_PEOPLE_SQL.format( entity_filter=entity_sql, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, trunc_func=trunc_func, ) people = sync_execute( PEOPLE_SQL.format( content_sql=content_sql, query="", latest_person_sql=GET_LATEST_PERSON_SQL.format(query="")), params, ) return ClickhousePersonSerializer(people, many=True).data
def _exec_query(self) -> List[Tuple]: prop_filters, prop_filter_params = parse_prop_clauses( "uuid", self._filter.properties, self._team, prepend="global") # format default dates if not self._filter._date_from: self._filter._date_from = relative_date_parse("-7d") if not self._filter._date_to: self._filter._date_to = timezone.now() parsed_date_from, parsed_date_to = parse_timestamps( filter=self._filter) self.params: Dict = {"team_id": self._team.pk, **prop_filter_params} steps = [ self._build_steps_query(entity, index) for index, entity in enumerate(self._filter.entities) ] query = FUNNEL_SQL.format( select_steps=",".join([ "step_{}".format(index) for index, _ in enumerate(self._filter.entities) ]), team_id=self._team.id, steps=", ".join(steps), filters=prop_filters.replace("uuid IN", "events.uuid IN", 1), parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, ) return sync_execute(query, self.params)
def _calculate_entity_people(self, team: Team, entity: Entity, filter: Filter): parsed_date_from, parsed_date_to = parse_timestamps(filter=filter) prop_filters, prop_filter_params = parse_prop_clauses( "uuid", filter.properties, team) entity_sql, entity_params = self._format_entity_filter(entity=entity) params: Dict = { "team_id": team.pk, **prop_filter_params, **entity_params, "offset": filter.offset } content_sql = PERSON_TREND_SQL.format( entity_filter=entity_sql, parsed_date_from=(parsed_date_from or ""), parsed_date_to=(parsed_date_to or ""), filters="{filters}".format( filters=prop_filters) if filter.properties else "", breakdown_filter="", ) people = sync_execute( PEOPLE_THROUGH_DISTINCT_SQL.format(content_sql=content_sql), params) serialized_people = ClickhousePersonSerializer(people, many=True).data return serialized_people
def _breakdown_prop_params(self, aggregate_operation: str, filter: Filter, team_id: int): parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team_id) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team_id, table_name="e", filter_test_accounts=filter.filter_test_accounts) elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format( parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, prop_filters=prop_filters, aggregate_operation=aggregate_operation, ) top_elements_array = self._get_top_elements(elements_query, filter, team_id, params=prop_filter_params) params = { "values": top_elements_array, } breakdown_filter = BREAKDOWN_PROP_JOIN_SQL return params, breakdown_filter, {}, "JSONExtractRaw(properties, %(key)s)"
def stats(self, request: request.Request, **kwargs) -> response.Response: filter = Filter(request=request, team=self.team) date_from, date_to, date_params = parse_timestamps( filter, team_id=self.team.pk) prop_filters, prop_filter_params = parse_prop_grouped_clauses( team_id=self.team.pk, property_group=filter.property_groups) result = sync_execute( GET_ELEMENTS.format(date_from=date_from, date_to=date_to, query=prop_filters), { "team_id": self.team.pk, **prop_filter_params, **date_params }, ) return response.Response([{ "count": elements[1], "hash": None, "elements": [ ElementSerializer(element).data for element in chain_to_elements(elements[0]) ], } for elements in result])
def calculate_dist(self, filter: Filter, team: Team): parsed_date_from, parsed_date_to = parse_timestamps(filter) filters, params = parse_prop_clauses("uuid", filter.properties, team) dist_query = DIST_SQL.format( team_id=team.pk, date_from=parsed_date_from, date_to=parsed_date_to, filters="{}".format(filters) if filter.properties else "", sessions_limit="", ) params = {**params, "team_id": team.pk} result = sync_execute(dist_query, params) dist_labels = [ "0 seconds (1 event)", "0-3 seconds", "3-10 seconds", "10-30 seconds", "30-60 seconds", "1-3 minutes", "3-10 minutes", "10-30 minutes", "30-60 minutes", "1+ hours", ] res = [{"label": dist_labels[index], "count": result[0][index]} for index in range(len(dist_labels))] return res
def _exec_query(self) -> List[Tuple]: prop_filters, prop_filter_params = parse_prop_clauses( self._filter.properties, self._team.pk, prepend="global") # format default dates data = {} if not self._filter._date_from: data.update({"date_from": relative_date_parse("-7d")}) if not self._filter._date_to: data.update({"date_to": timezone.now()}) self._filter = Filter(data={**self._filter._data, **data}) parsed_date_from, parsed_date_to, _ = parse_timestamps( filter=self._filter, table="events.", team_id=self._team.pk) self.params: Dict = { "team_id": self._team.pk, "events": [], # purely a speed optimization, don't need this for filtering **prop_filter_params, } steps = [ self._build_steps_query(entity, index) for index, entity in enumerate(self._filter.entities) ] query = FUNNEL_SQL.format( team_id=self._team.id, steps=", ".join(steps), filters=prop_filters.replace("uuid IN", "events.uuid IN", 1), parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, ) return sync_execute(query, self.params)
def stickiness(self, entity: Entity, filter: StickinessFilter, team_id: int) -> Dict[str, Any]: parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team_id) prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team_id) trunc_func = get_trunc_func_ch(filter.interval) params: Dict = {"team_id": team_id} params = {**params, **prop_filter_params, "num_intervals": filter.num_intervals} if entity.type == TREND_FILTER_TYPE_ACTIONS: action = Action.objects.get(pk=entity.id) action_query, action_params = format_action_filter(action) if action_query == "": return {} params = {**params, **action_params} content_sql = STICKINESS_ACTIONS_SQL.format( team_id=team_id, actions_query=action_query, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, trunc_func=trunc_func, ) else: content_sql = STICKINESS_SQL.format( team_id=team_id, event=entity.id, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, trunc_func=trunc_func, ) counts = sync_execute(content_sql, params) return self.process_result(counts, filter)
def _format_lifecycle_query(self, entity: Entity, filter: Filter, team_id: int) -> Tuple[str, Dict, Callable]: date_from = filter.date_from if not date_from: date_from = get_earliest_timestamp(team_id) interval = filter.interval or "day" num_intervals, seconds_in_interval, _ = get_time_diff( interval, filter.date_from, filter.date_to, team_id) interval_increment, interval_string, sub_interval_string = self.get_interval( interval) trunc_func = get_trunc_func_ch(interval) event_query = "" event_params: Dict[str, Any] = {} props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( props_to_filter, team_id, filter_test_accounts=filter.filter_test_accounts) _, _, date_params = parse_timestamps(filter=filter, team_id=team_id) if entity.type == TREND_FILTER_TYPE_ACTIONS: try: action = entity.get_action() event_query, event_params = format_action_filter(action) except: return "", {}, self._parse_result(filter, entity) else: event_query = "event = %(event)s" event_params = {"event": entity.id} return ( LIFECYCLE_SQL.format( interval=interval_string, trunc_func=trunc_func, event_query=event_query, filters=prop_filters, sub_interval=sub_interval_string, GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS, ), { "team_id": team_id, "prev_date_from": (date_from - interval_increment).strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.interval == "hour" or filter.interval == "minute" else " 00:00:00")), "num_intervals": num_intervals, "seconds_in_interval": seconds_in_interval, **event_params, **date_params, **prop_filter_params, }, self._parse_result(filter, entity), )
def _process_content_sql(team: Team, entity: Entity, filter: Filter): filter = _handle_date_interval(filter) parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team.pk) entity_sql, entity_params = format_entity_filter(entity=entity) person_filter = "" person_filter_params: Dict[str, Any] = {} if filter.breakdown_type == "cohort" and filter.breakdown_value != "all": cohort = Cohort.objects.get(pk=filter.breakdown_value) person_filter, person_filter_params = format_filter_query(cohort) person_filter = "AND distinct_id IN ({})".format(person_filter) elif ( filter.breakdown_type == "person" and isinstance(filter.breakdown, str) and isinstance(filter.breakdown_value, str) ): person_prop = Property(**{"key": filter.breakdown, "value": filter.breakdown_value, "type": "person"}) filter.properties.append(person_prop) prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team.pk) params: Dict = {"team_id": team.pk, **prop_filter_params, **entity_params, "offset": filter.offset} content_sql = PERSON_TREND_SQL.format( entity_filter=f"AND {entity_sql}", parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, breakdown_filter="", person_filter=person_filter, ) return content_sql, {**params, **person_filter_params}
def get_breakdown_event_prop_values(filter: Filter, entity: Entity, aggregate_operation: str, team_id: int, limit: int = 25): parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team_id) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team_id, table_name="e", filter_test_accounts=filter.filter_test_accounts, ) entity_params, entity_format_params = populate_entity_params(entity) elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format( parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, prop_filters=prop_filters, aggregate_operation=aggregate_operation, **entity_format_params, ) top_elements_array = _get_top_elements( filter=filter, team_id=team_id, query=elements_query, params={ **prop_filter_params, **entity_params }, limit=limit, ) return top_elements_array
def _exec_query(self) -> List[Tuple]: prop_filters, prop_filter_params = parse_prop_clauses( self._filter.properties, self._team.pk, prepend="global", allow_denormalized_props=True) # format default dates data = {} if not self._filter._date_from: data.update({"date_from": relative_date_parse("-7d")}) if not self._filter._date_to: data.update({"date_to": timezone.now()}) self._filter = self._filter.with_data(data) parsed_date_from, parsed_date_to, _ = parse_timestamps( filter=self._filter, table="events.", team_id=self._team.pk) self.params.update(prop_filter_params) steps = [ self._build_steps_query(entity, index) for index, entity in enumerate(self._filter.entities) ] query = FUNNEL_SQL.format( team_id=self._team.id, steps=", ".join(steps), filters=prop_filters.replace("uuid IN", "events.uuid IN", 1), parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, top_level_groupby="", extra_select="", extra_groupby="", within_time="6048000000000000", ) return sync_execute(query, self.params)
def run(self, filter: SessionsFilter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: limit = kwargs.get("limit", SESSIONS_LIST_DEFAULT_LIMIT) offset = kwargs.get("offset", 0) filter = set_default_dates(filter) filters, params = parse_prop_clauses(filter.properties, team.pk) action_filter_timestamp_sql, action_filter_params = format_action_filter_aggregate( filter, team.pk) date_from, date_to, _ = parse_timestamps(filter, team.pk) params = { **params, **action_filter_params, "team_id": team.pk, "limit": limit, "offset": offset, "distinct_id_limit": limit + offset, } query = SESSION_SQL.format( date_from=date_from, date_to=date_to, filters=filters, action_filter_timestamp=action_filter_timestamp_sql, sessions_limit="LIMIT %(offset)s, %(limit)s", ) query_result = sync_execute(query, params) result = self._parse_list_results(query_result) self._add_person_properties(team, result) return filter_sessions_by_recordings(team, result, filter)
def _format_all_query(team_id: int, filter: Filter, **kwargs) -> Tuple[str, Dict]: entity = kwargs.pop("entity", None) parsed_date_from, parsed_date_to, date_params = parse_timestamps( filter=filter, team_id=team_id, table="all_events.") props_to_filter = [*filter.properties] if entity and isinstance(entity, Entity): props_to_filter = [*props_to_filter, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( props_to_filter, team_id, prepend="all_cohort_", table_name="all_events") query = f""" SELECT DISTINCT distinct_id, 0 as value FROM events all_events WHERE team_id = {team_id} {parsed_date_from} {parsed_date_to} {prop_filters} """ return query, {**date_params, **prop_filter_params}
def _get_date_filter(self) -> Tuple[str, Dict]: date_filter = "" date_params: Dict[str, Any] = {} interval_annotation = get_trunc_func_ch(self._filter.interval) _, _, round_interval = get_time_diff(self._filter.interval or "day", self._filter.date_from, self._filter.date_to, team_id=self._team_id) _, parsed_date_to, date_params = parse_timestamps( filter=self._filter, team_id=self._team_id) parsed_date_from = date_from_clause(interval_annotation, round_interval) self.parsed_date_from = parsed_date_from self.parsed_date_to = parsed_date_to if self._entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]: date_filter = "{parsed_date_from_prev_range} {parsed_date_to}" format_params = get_active_user_params(self._filter, self._entity, self._team_id) self.active_user_params = format_params date_filter = date_filter.format(**format_params, parsed_date_to=parsed_date_to) else: date_filter = "{parsed_date_from} {parsed_date_to}".format( parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to) return date_filter, date_params
def stats(self, request: request.Request) -> response.Response: filter = Filter(request=request) team = request.user.team assert team is not None date_from, date_to = parse_timestamps(filter) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk) result = sync_execute( GET_ELEMENTS.format(date_from=date_from, date_to=date_to, query=prop_filters), { "team_id": team.id, **prop_filter_params }, ) return response.Response([{ "count": elements[1], "hash": None, "elements": [ ElementSerializer(element).data for element in chain_to_elements(elements[0]) ], } for elements in result])
def _process_content_sql(target_entity: Entity, filter: StickinessFilter, team: Team) -> Tuple[str, Dict[str, Any]]: parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team.pk) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk, filter_test_accounts=filter.filter_test_accounts) entity_sql, entity_params = _format_entity_filter(entity=target_entity) trunc_func = get_trunc_func_ch(filter.interval) params: Dict = { "team_id": team.pk, **prop_filter_params, "stickiness_day": filter.selected_interval, **entity_params, "offset": filter.offset, } content_sql = STICKINESS_PEOPLE_SQL.format( entity_filter=entity_sql, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, trunc_func=trunc_func, GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS, ) return content_sql, params
def stickiness(self, entity: Entity, filter: Filter, team_id: int) -> Dict[str, Any]: if not filter.date_to or not filter.date_from: raise ValueError("_stickiness needs date_to and date_from set") range_days = (filter.date_to - filter.date_from).days + 2 parsed_date_from, parsed_date_to = parse_timestamps(filter=filter) prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team_id) params: Dict = {"team_id": team_id} params = {**params, **prop_filter_params} if entity.type == TREND_FILTER_TYPE_ACTIONS: action = Action.objects.get(pk=entity.id) action_query, action_params = format_action_filter(action) if action_query == "": return {} params = {**params, **action_params} content_sql = STICKINESS_ACTIONS_SQL.format( team_id=team_id, actions_query=action_query, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, ) else: content_sql = STICKINESS_SQL.format( team_id=team_id, event=entity.id, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, ) counts = sync_execute(content_sql, params) return self.process_result(counts, range_days)
def _format_all_query(team_id: int, filter: Filter, **kwargs) -> Tuple[str, Dict]: entity = kwargs.pop("entity", None) parsed_date_from, parsed_date_to, date_params = parse_timestamps( filter=filter, team_id=team_id, table="all_events.") props_to_filter = filter.property_groups if entity and isinstance(entity, Entity): props_to_filter = props_to_filter.combine_property_group( PropertyOperatorType.AND, entity.property_groups) prop_filters, prop_filter_params = parse_prop_grouped_clauses( team_id=team_id, property_group=props_to_filter, prepend="all_cohort_", table_name="all_events", ) query = f""" SELECT DISTINCT distinct_id, {ALL_USERS_COHORT_ID} as value FROM events all_events WHERE team_id = {team_id} {parsed_date_from} {parsed_date_to} {prop_filters} """ return query, {**date_params, **prop_filter_params}
def _calculate_stickiness_entity_people(self, team: Team, entity: Entity, filter: Filter, stickiness_day: int): parsed_date_from, parsed_date_to = parse_timestamps(filter=filter) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk) entity_sql, entity_params = self._format_entity_filter(entity=entity) params: Dict = { "team_id": team.pk, **prop_filter_params, "stickiness_day": stickiness_day, **entity_params, "offset": filter.offset, } content_sql = STICKINESS_PEOPLE_SQL.format( entity_filter=entity_sql, parsed_date_from=(parsed_date_from or ""), parsed_date_to=(parsed_date_to or ""), filters="{filters}".format( filters=prop_filters) if filter.properties else "", ) people = sync_execute( PEOPLE_SQL.format( content_sql=content_sql, query="", latest_person_sql=GET_LATEST_PERSON_SQL.format(query="")), params, ) serialized_people = ClickhousePersonSerializer(people, many=True).data return serialized_people
def calculate_list(self, filter: Filter, team: Team, limit: int, offset: int): filters, params = parse_prop_clauses("uuid", filter.properties, team) if not filter._date_from: filter._date_from = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0) if not filter._date_to and filter.date_from: filter._date_to = filter.date_from + relativedelta(days=1) date_from, date_to = parse_timestamps(filter) params = { **params, "team_id": team.pk, "limit": limit, "offset": offset } query = SESSION_SQL.format( date_from=date_from, date_to=date_to, filters="{}".format(filters) if filter.properties else "", sessions_limit="LIMIT %(offset)s, %(limit)s", ) query_result = sync_execute(query, params) result = self._parse_list_results(query_result) self._add_person_properties(team, result) return result
def calculate_paths(self, filter: Filter, team: Team): # format default dates if not filter._date_from: filter._date_from = relative_date_parse("-7d") if not filter._date_to: filter._date_to = timezone.now() parsed_date_from, parsed_date_to = parse_timestamps(filter=filter) event, path_type, start_comparator = self._determine_path_type(filter.path_type if filter else None) prop_filters, prop_filter_params = parse_prop_clauses("uuid", filter.properties, team) # Step 0. Event culling subexpression for step 1. # Make an expression that removes events in a session that are definitely unused. # For example the 4th, 5th, etc row after a "new_session = 1" or "marked_session_start = 1" row gets removed excess_row_filter = "(" for i in range(4): if i > 0: excess_row_filter += " or " excess_row_filter += "neighbor(new_session, {}, 0) = 1".format(-i) if filter and filter.start_point: excess_row_filter += " or neighbor(marked_session_start, {}, 0) = 1".format(-i) excess_row_filter += ")" paths_query = PATHS_QUERY_FINAL.format( event_query="event = %(event)s" if event else "event NOT IN ('$autocapture', '$pageview', '$identify', '$pageleave', '$screen')", path_type=path_type, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters if filter.properties else "", marked_session_start="{} = %(start_point)s".format(start_comparator) if filter and filter.start_point else "new_session", excess_row_filter=excess_row_filter, select_elements_chain=", events.elements_chain as elements_chain" if event == AUTOCAPTURE_EVENT else "", group_by_elements_chain=", events.elements_chain" if event == AUTOCAPTURE_EVENT else "", ) params: Dict = { "team_id": team.pk, "property": "$current_url", "event": event, "start_point": filter.start_point, } params = {**params, **prop_filter_params} rows = sync_execute(paths_query, params) resp: List[Dict[str, str]] = [] for row in rows: resp.append( {"source": row[0], "source_id": row[1], "target": row[2], "target_id": row[3], "value": row[4],} ) resp = sorted(resp, key=lambda x: x["value"], reverse=True) return resp
def calculate_avg(self, filter: Filter, team: Team): parsed_date_from, parsed_date_to, _ = parse_timestamps(filter, team.pk) filters, params = parse_prop_clauses( filter.properties, team.pk, filter_test_accounts=filter.filter_test_accounts ) interval_notation = get_trunc_func_ch(filter.interval) num_intervals, seconds_in_interval, _ = get_time_diff( filter.interval or "day", filter.date_from, filter.date_to, team.pk ) entity_conditions, entity_params = entity_query_conditions(filter, team) if not entity_conditions: entity_conditions = ["event != '$feature_flag_called'"] # default conditino params = {**params, **entity_params} entity_query = " OR ".join(entity_conditions) avg_query = SESSIONS_NO_EVENTS_SQL.format( team_id=team.pk, date_from=parsed_date_from, date_to=parsed_date_to, filters=filters, sessions_limit="", entity_filter=f"AND ({entity_query})", ) per_period_query = AVERAGE_PER_PERIOD_SQL.format(sessions=avg_query, interval=interval_notation) null_sql = NULL_SQL.format( date_to=filter.date_to.strftime("%Y-%m-%d 00:00:00"), interval=interval_notation, num_intervals=num_intervals, seconds_in_interval=seconds_in_interval, ) final_query = AVERAGE_SQL.format(sessions=per_period_query, null_sql=null_sql) params = {**params, "team_id": team.pk} response = sync_execute(final_query, params) values = self.clean_values(filter, response) time_series_data = append_data(values, interval=filter.interval, math=None) scaled_data, _ = scale_time_series(time_series_data["data"]) time_series_data.update({"data": scaled_data}) # calculate average total = sum(val[1] for val in values) if total == 0: return [] valid_days = sum(1 if val[1] else 0 for val in values) overall_average = (total / valid_days) if valid_days else 0 result = self._format_avg(overall_average) time_series_data.update(result) return [time_series_data]
def _format_stickiness_query(self, entity: Entity, filter: Filter, team: Team) -> Optional[Dict[str, Any]]: if not filter.date_to or not filter.date_from: raise ValueError("_stickiness needs date_to and date_from set") range_days = (filter.date_to - filter.date_from).days + 2 parsed_date_from, parsed_date_to = parse_timestamps(filter=filter) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team) params: Dict = {"team_id": team.pk} params = {**params, **prop_filter_params} if entity.type == TREND_FILTER_TYPE_ACTIONS: action = Action.objects.get(pk=entity.id) action_query, action_params = format_action_filter(action) if action_query == "": return None params = {**params, **action_params} content_sql = STICKINESS_ACTIONS_SQL.format( team_id=team.pk, actions_query=action_query, parsed_date_from=(parsed_date_from or ""), parsed_date_to=(parsed_date_to or ""), filters="{filters}".format( filters=prop_filters) if filter.properties else "", ) else: content_sql = STICKINESS_SQL.format( team_id=team.pk, event=entity.id, parsed_date_from=(parsed_date_from or ""), parsed_date_to=(parsed_date_to or ""), filters="{filters}".format( filters=prop_filters) if filter.properties else "", ) aggregated_counts = sync_execute(content_sql, params) response: Dict[int, int] = {} for result in aggregated_counts: response[result[1]] = result[0] labels = [] data = [] for day in range(1, range_days): label = "{} day{}".format(day, "s" if day > 1 else "") labels.append(label) data.append(response[day] if day in response else 0) return { "labels": labels, "days": [day for day in range(1, range_days)], "data": data, "count": sum(data), }
def run(self, filter: SessionEventsFilter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: date_from, date_to, _ = parse_timestamps(filter, team.pk) raw_events = sync_execute( SESSION_EVENTS.format(date_from=date_from, date_to=date_to), {"team_id": team.pk, "distinct_id": filter.distinct_id}, ) return self._serialize(raw_events, filter.distinct_id, team.pk)
def _process_content_sql(team: Team, entity: Entity, filter: Filter): filter = _handle_date_interval(filter) parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team.pk) entity_sql, entity_params = format_entity_filter(entity=entity) person_filter = "" person_filter_params: Dict[str, Any] = {} if filter.breakdown_type == "cohort" and filter.breakdown_value != "all": cohort = Cohort.objects.get(pk=filter.breakdown_value) person_filter, person_filter_params = format_filter_query(cohort) person_filter = "AND distinct_id IN ({})".format(person_filter) elif filter.breakdown_type and isinstance( filter.breakdown, str) and isinstance(filter.breakdown_value, str): breakdown_prop = Property( **{ "key": filter.breakdown, "value": filter.breakdown_value, "type": filter.breakdown_type }) filter.properties.append(breakdown_prop) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk, filter_test_accounts=filter.filter_test_accounts) params: Dict = { "team_id": team.pk, **prop_filter_params, **entity_params, "offset": filter.offset } if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]: active_user_params = get_active_user_params(filter, entity, team.pk) content_sql = PERSONS_ACTIVE_USER_SQL.format( entity_query=f"AND {entity_sql}", parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, breakdown_filter="", person_filter=person_filter, GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS, **active_user_params, ) else: content_sql = PERSON_TREND_SQL.format( entity_filter=f"AND {entity_sql}", parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, breakdown_filter="", person_filter=person_filter, ) return content_sql, {**params, **person_filter_params}
def calculate_avg(self, filter: Filter, team: Team): # format default dates if not filter._date_from: filter._date_from = relative_date_parse("-7d") if not filter._date_to: filter._date_to = timezone.now() parsed_date_from, parsed_date_to = parse_timestamps(filter) filters, params = parse_prop_clauses("uuid", filter.properties, team) interval_notation = get_interval_annotation_ch(filter.interval) num_intervals, seconds_in_interval = get_time_diff( filter.interval or "day", filter.date_from, filter.date_to) avg_query = SESSIONS_NO_EVENTS_SQL.format( team_id=team.pk, date_from=parsed_date_from, date_to=parsed_date_to, filters="{}".format(filters) if filter.properties else "", sessions_limit="", ) per_period_query = AVERAGE_PER_PERIOD_SQL.format( sessions=avg_query, interval=interval_notation) null_sql = NULL_SQL.format( date_to=(filter.date_to or timezone.now()).strftime("%Y-%m-%d 00:00:00"), interval=interval_notation, num_intervals=num_intervals, seconds_in_interval=seconds_in_interval, ) final_query = AVERAGE_SQL.format(sessions=per_period_query, null_sql=null_sql) params = {**params, "team_id": team.pk} response = sync_execute(final_query, params) values = self.clean_values(filter, response) time_series_data = append_data(values, interval=filter.interval, math=None) # calculate average total = sum(val[1] for val in values) if total == 0: return [] valid_days = sum(1 if val[1] else 0 for val in values) overall_average = (total / valid_days) if valid_days else 0 result = self._format_avg(overall_average) time_series_data.update(result) return [time_series_data]
def _get_date_filter(self) -> Tuple[str, Dict]: parsed_date_from, parsed_date_to, date_params = parse_timestamps(filter=self._filter, team_id=self._team_id) query = f""" {parsed_date_from} {parsed_date_to} """ return query, date_params
def _get_date_filter(self): _, _, date_params = parse_timestamps(filter=self._filter, team_id=self._team_id) params = {**date_params, "interval": self._filter.interval} # :TRICKY: We fetch all data even for the period before the graph starts up until the end of the last period return ( f""" AND timestamp >= toDateTime(dateTrunc(%(interval)s, toDateTime(%(date_from)s))) - INTERVAL 1 {self._filter.interval} AND timestamp < toDateTime(dateTrunc(%(interval)s, toDateTime(%(date_to)s))) + INTERVAL 1 {self._filter.interval} """, params, )