Exemple #1
0
    def calculate_paths(self, filter: PathFilter, team: Team):

        parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter,
                                                               team_id=team.pk)
        event, path_type, start_comparator = self._determine_path_type(
            filter.path_type if filter else None)

        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties,
            team.pk,
            filter_test_accounts=filter.filter_test_accounts)

        # Step 0. Event culling subexpression for step 1.
        # Make an expression that removes events in a session that are definitely unused.
        # For example the 4th, 5th, etc row after a "new_session = 1" or "marked_session_start = 1" row gets removed
        excess_row_filter = "("
        for i in range(4):
            if i > 0:
                excess_row_filter += " or "
            excess_row_filter += "neighbor(new_session, {}, 0) = 1".format(-i)
            if filter and filter.start_point:
                excess_row_filter += " or neighbor(marked_session_start, {}, 0) = 1".format(
                    -i)
        excess_row_filter += ")"

        paths_query = PATHS_QUERY_FINAL.format(
            event_query="event = %(event)s" if event else
            "event NOT IN ('$autocapture', '$pageview', '$identify', '$pageleave', '$screen')",
            path_type=path_type,
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            filters=prop_filters,
            marked_session_start="{} = %(start_point)s".format(
                start_comparator)
            if filter and filter.start_point else "new_session",
            excess_row_filter=excess_row_filter,
            select_elements_chain=", events.elements_chain as elements_chain"
            if event == AUTOCAPTURE_EVENT else "",
            group_by_elements_chain=", events.elements_chain"
            if event == AUTOCAPTURE_EVENT else "",
        )

        params: Dict = {
            "team_id": team.pk,
            "property": "$current_url",
            "event": event,
            "start_point": filter.start_point,
        }
        params = {**params, **prop_filter_params}

        rows = sync_execute(paths_query, params)

        resp: List[Dict[str, str]] = []
        for row in rows:
            resp.append({
                "source": row[0],
                "source_id": row[1],
                "target": row[2],
                "target_id": row[3],
                "value": row[4],
            })

        resp = sorted(resp, key=lambda x: x["value"], reverse=True)
        return resp
Exemple #2
0
    def list(self, request: Request, *args: Any, **kwargs: Any) -> Response:
        team = self.team
        filter = Filter(request=request)
        if request.GET.get("after"):
            filter._date_from = request.GET["after"]
        if request.GET.get("before"):
            filter._date_to = request.GET["before"]
        limit = "LIMIT 101"
        conditions, condition_params = determine_event_conditions(
            request.GET.dict())
        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties, team.pk)
        if request.GET.get("action_id"):
            action = Action.objects.get(pk=request.GET["action_id"])
            if action.steps.count() == 0:
                return Response({"next": False, "results": []})
            action_query, params = format_action_filter(action)
            prop_filters += " AND {}".format(action_query)
            prop_filter_params = {**prop_filter_params, **params}

        if prop_filters != "":
            query_result = sync_execute(
                SELECT_EVENT_WITH_PROP_SQL.format(conditions=conditions,
                                                  limit=limit,
                                                  filters=prop_filters),
                {
                    "team_id": team.pk,
                    **condition_params,
                    **prop_filter_params
                },
            )
        else:
            query_result = sync_execute(
                SELECT_EVENT_WITH_ARRAY_PROPS_SQL.format(conditions=conditions,
                                                         limit=limit),
                {
                    "team_id": team.pk,
                    **condition_params
                },
            )

        result = ClickhouseEventSerializer(
            query_result[0:100],
            many=True,
            context={
                "people": self._get_people(query_result, team),
            },
        ).data

        if len(query_result) > 100:
            path = request.get_full_path()
            reverse = request.GET.get("orderBy", "-timestamp") != "-timestamp"
            next_url: Optional[str] = request.build_absolute_uri(
                "{}{}{}={}".format(
                    path,
                    "&" if "?" in path else "?",
                    "after" if reverse else "before",
                    query_result[99][3].strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
                ))
        else:
            next_url = None

        return Response({"next": next_url, "results": result})
Exemple #3
0
    def _serialize_lifecycle(self, entity: Entity, filter: Filter,
                             team_id: int) -> List[Dict[str, Any]]:

        date_from = filter.date_from

        if not date_from:
            date_from = get_earliest_timestamp(team_id)

        interval = filter.interval or "day"
        num_intervals, seconds_in_interval = get_time_diff(
            interval, filter.date_from, filter.date_to, team_id)
        interval_increment, interval_string, sub_interval_string = self.get_interval(
            interval)
        trunc_func = get_trunc_func_ch(interval)
        event_query = ""
        event_params: Dict[str, Any] = {}

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            props_to_filter, team_id)

        _, _, date_params = parse_timestamps(filter=filter, team_id=team_id)

        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            try:
                action = Action.objects.get(pk=entity.id)
                event_query, event_params = format_action_filter(action)
            except:
                return []
        else:
            event_query = "event = %(event)s"
            event_params = {"event": entity.id}

        result = sync_execute(
            LIFECYCLE_SQL.format(
                interval=interval_string,
                trunc_func=trunc_func,
                event_query=event_query,
                filters=prop_filters,
                sub_interval=sub_interval_string,
            ),
            {
                "team_id":
                team_id,
                "prev_date_from":
                (date_from - interval_increment).strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.interval == "hour"
                    or filter.interval == "minute" else " 00:00:00")),
                "num_intervals":
                num_intervals,
                "seconds_in_interval":
                seconds_in_interval,
                **event_params,
                **date_params,
                **prop_filter_params,
            },
        )

        res = []
        for val in result:
            label = "{} - {}".format(entity.name, val[2])
            additional_values = {"label": label, "status": val[2]}
            parsed_result = parse_response(val, filter, additional_values)
            res.append(parsed_result)

        return res
Exemple #4
0
    def _retrieve_people_in_period(self, filter: RetentionFilter, team: Team):
        period = filter.period
        is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME
        trunc_func = get_trunc_func_ch(period)
        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties, team.pk)

        returning_entity = filter.returning_entity if filter.selected_interval > 0 else filter.target_entity
        target_query, target_params = self._get_condition(filter.target_entity,
                                                          table="e")
        target_query_formatted = "AND {target_query}".format(
            target_query=target_query)
        return_query, return_params = self._get_condition(returning_entity,
                                                          table="e",
                                                          prepend="returning")
        return_query_formatted = "AND {return_query}".format(
            return_query=return_query)

        first_event_sql = (REFERENCE_EVENT_UNIQUE_PEOPLE_PER_PERIOD_SQL
                           if is_first_time_retention else
                           REFERENCE_EVENT_PEOPLE_PER_PERIOD_SQL).format(
                               target_query=target_query_formatted,
                               filters=prop_filters,
                               trunc_func=trunc_func,
                           )
        default_event_query = (
            DEFAULT_REFERENCE_EVENT_UNIQUE_PEOPLE_PER_PERIOD_SQL
            if is_first_time_retention else
            DEFAULT_REFERENCE_EVENT_PEOPLE_PER_PERIOD_SQL).format(
                target_query=target_query_formatted,
                filters=prop_filters,
                trunc_func=trunc_func,
            )

        date_from = filter.date_from + filter.selected_interval * filter.period_increment
        date_to = filter.date_to

        new_data = filter._data
        new_data.update({
            "total_intervals":
            filter.total_intervals - filter.selected_interval
        })
        filter = RetentionFilter(data=new_data)

        query_result = sync_execute(
            RETENTION_PEOPLE_PER_PERIOD_SQL.format(
                returning_query=return_query_formatted,
                filters=prop_filters,
                first_event_sql=first_event_sql,
                first_event_default_sql=default_event_query,
                trunc_func=trunc_func,
            ),
            {
                "team_id":
                team.pk,
                "start_date":
                date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "end_date":
                date_to.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "offset":
                filter.offset,
                "limit":
                100,
                "period":
                period,
                **target_params,
                **return_params,
                **prop_filter_params,
            },
        )
        people_dict = {}

        from posthog.api.person import PersonSerializer

        for person in Person.objects.filter(
                team_id=team.pk, uuid__in=[val[0] for val in query_result]):
            people_dict.update(
                {str(person.uuid): PersonSerializer(person).data})

        result = self.process_people_in_period(filter, query_result,
                                               people_dict)
        return result
def execute_query(query: str) -> Any:
    from ee.clickhouse.client import sync_execute

    return sync_execute(query)
Exemple #6
0
def get_property_values_for_key(key: str, team: Team, value: Optional[str] = None):
    if value:
        return sync_execute(
            SELECT_PROP_VALUES_SQL_WITH_FILTER, {"team_id": team.pk, "key": key, "value": "%{}%".format(value)}
        )
    return sync_execute(SELECT_PROP_VALUES_SQL, {"team_id": team.pk, "key": key})
Exemple #7
0
 def run(self, *args, **kwargs) -> SessionRecordingQueryResult:
     query, query_params = self.get_query()
     query_results = sync_execute(query, query_params)
     session_recordings = self._data_to_return(query_results)
     return self._paginate_results(session_recordings)
Exemple #8
0
def get_persons(team_id: int):
    result = sync_execute(GET_PERSON_SQL, {"team_id": team_id})
    return ClickhousePersonSerializer(result, many=True).data
Exemple #9
0
def get_person_distinct_ids(team_id: int):
    result = sync_execute(GET_DISTINCT_IDS_SQL, {"team_id": team_id})
    return ClickhousePersonDistinctIdSerializer(result, many=True).data
Exemple #10
0
 def _exec_query(self) -> List[Tuple]:
     query = self.get_query()
     return sync_execute(query, self.params)
Exemple #11
0
def distinct_ids_exist(team_id: int, ids: List[str]) -> bool:
    return bool(
        sync_execute(
            PERSON_DISTINCT_ID_EXISTS_SQL.format([str(id) for id in ids]),
            {"team_id": team_id})[0][0])
 def _team_ids(self):
     return list(
         sorted(row[0] for row in sync_execute(
             "SELECT DISTINCT team_id FROM person_distinct_id")))
def _get_people():
    return [Person(p) for p in sync_execute("select * from person")]
def _get_events():
    return sync_execute("select * from events")
Exemple #15
0
 def perform_query(self):
     sql = self._configure_sql()
     results = sync_execute(sql, self.params)
     summary = self._summarize_data(results)
     return summary
Exemple #16
0
def update_person_properties(team_id: int, id: str, properties: Dict) -> None:
    sync_execute(UPDATE_PERSON_PROPERTIES, {
        "team_id": team_id,
        "id": id,
        "properties": json.dumps(properties)
    })
Exemple #17
0
    def _run_formula_query(self, filter: Filter, team_id: int):
        letters = [chr(65 + i) for i in range(0, len(filter.entities))]
        queries = []
        params: Dict[str, Any] = {}
        for idx, entity in enumerate(filter.entities):
            sql, entity_params, _ = self._get_sql_for_entity(
                filter, entity, team_id)  # type: ignore
            sql = sql.replace("%(", "%({}_".format(idx))
            entity_params = {
                "{}_{}".format(idx, key): value
                for key, value in entity_params.items()
            }
            queries.append(sql)
            params = {**params, **entity_params}

        breakdown_value = (", sub_A.breakdown_value"
                           if filter.breakdown_type == "cohort" else
                           ", trim(BOTH '\"' FROM sub_A.breakdown_value)")
        is_aggregate = filter.display in [TRENDS_TABLE, TRENDS_PIE]

        sql = """SELECT
            {date_select}
            arrayMap(({letters_select}) -> {formula}, {selects})
            {breakdown_value}
            FROM ({first_query}) as sub_A
            {queries}
        """.format(
            date_select="'' as date," if is_aggregate else "sub_A.date,",
            letters_select=", ".join(letters),
            formula=filter.
            formula,  # formula is properly escaped in the filter
            # Need to wrap aggregates in arrays so we can still use arrayMap
            selects=", ".join([
                ("[sub_{}.data]" if is_aggregate else "sub_{}.data").format(
                    letters[i]) for i in range(0, len(filter.entities))
            ]),
            breakdown_value=breakdown_value if filter.breakdown else "",
            first_query=queries[0],
            queries="".join([
                "FULL OUTER JOIN ({query}) as sub_{letter} ON sub_A.breakdown_value = sub_{letter}.breakdown_value "
                .format(query=query, letter=letters[i + 1])
                for i, query in enumerate(queries[1:])
            ]) if filter.breakdown else "".join([
                " CROSS JOIN ({}) as sub_{}".format(query, letters[i + 1])
                for i, query in enumerate(queries[1:])
            ]),
        )
        result = sync_execute(sql, params)
        response = []
        for item in result:
            additional_values: Dict[str, Any] = {
                "label": self._label(filter, item, team_id),
            }
            if is_aggregate:
                additional_values["data"] = []
                additional_values["aggregated_value"] = item[1][0]
            else:
                additional_values["data"] = [
                    round(number, 2) if not math.isnan(number)
                    and not math.isinf(number) else 0.0 for number in item[1]
                ]
                if filter.display == TRENDS_CUMULATIVE:
                    additional_values["data"] = list(
                        accumulate(additional_values["data"]))
            additional_values["count"] = float(sum(additional_values["data"]))
            response.append(parse_response(item, filter, additional_values))
        return response
Exemple #18
0
def get_events():
    events = sync_execute(GET_EVENTS_SQL)
    return ClickhouseEventSerializer(events, many=True, context={"elements": None, "people": None}).data
Exemple #19
0
    def test_person_properties_filter(self):
        filter = Filter(
            data={
                "date_from":
                "2021-05-01 00:00:00",
                "date_to":
                "2021-05-07 00:00:00",
                "events": [
                    {
                        "id": "viewed",
                        "order": 0
                    },
                ],
                "properties": [
                    {
                        "key": "email",
                        "value": "@posthog.com",
                        "operator": "not_icontains",
                        "type": "person"
                    },
                    {
                        "key": "key",
                        "value": "val"
                    },
                ],
            })

        entity = Entity({"id": "viewed", "type": "events"})

        global_prop_query, global_prop_query_params = TrendsEventQuery(
            filter=filter, entity=entity, team_id=self.team.pk).get_query()
        sync_execute(global_prop_query, global_prop_query_params)

        filter = Filter(
            data={
                "date_from": "2021-05-01 00:00:00",
                "date_to": "2021-05-07 00:00:00",
                "events": [
                    {
                        "id": "viewed",
                        "order": 0
                    },
                ],
            })

        entity = Entity({
            "id":
            "viewed",
            "type":
            "events",
            "properties": [
                {
                    "key": "email",
                    "value": "@posthog.com",
                    "operator": "not_icontains",
                    "type": "person"
                },
                {
                    "key": "key",
                    "value": "val"
                },
            ],
        })

        entity_prop_query, entity_prop_query_params = TrendsEventQuery(
            filter=filter, entity=entity, team_id=self.team.pk).get_query()

        # global queries and enttiy queries should be the same
        self.assertEqual(sqlparse.format(global_prop_query, reindent=True),
                         sqlparse.format(entity_prop_query, reindent=True))
        sync_execute(entity_prop_query, entity_prop_query_params)
Exemple #20
0
def get_events_by_team(team_id: Union[str, int]):
    events = sync_execute(GET_EVENTS_BY_TEAM_SQL, {"team_id": str(team_id)})
    return ClickhouseEventSerializer(events, many=True, context={"elements": None, "people": None}).data
Exemple #21
0
    def _retrieve_people(self, filter: RetentionFilter, team: Team):
        period = filter.period
        is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME
        trunc_func = get_trunc_func_ch(period)
        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties, team.pk)

        returning_entity = filter.returning_entity if filter.selected_interval > 0 else filter.target_entity
        target_query, target_params = self._get_condition(filter.target_entity,
                                                          table="e")
        target_query_formatted = "AND {target_query}".format(
            target_query=target_query)
        return_query, return_params = self._get_condition(returning_entity,
                                                          table="e",
                                                          prepend="returning")
        return_query_formatted = "AND {return_query}".format(
            return_query=return_query)

        reference_event_query = (REFERENCE_EVENT_UNIQUE_SQL
                                 if is_first_time_retention else
                                 REFERENCE_EVENT_SQL).format(
                                     target_query=target_query_formatted,
                                     filters=prop_filters,
                                     trunc_func=trunc_func,
                                 )
        reference_date_from = filter.date_from
        reference_date_to = filter.date_from + filter.period_increment
        date_from = filter.date_from + filter.selected_interval * filter.period_increment
        date_to = date_from + filter.period_increment

        result = sync_execute(
            RETENTION_PEOPLE_SQL.format(
                reference_event_query=reference_event_query,
                target_query=return_query_formatted,
                filters=prop_filters),
            {
                "team_id":
                team.pk,
                "start_date":
                date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "end_date":
                date_to.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "reference_start_date":
                reference_date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "reference_end_date":
                reference_date_to.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "offset":
                filter.offset,
                **target_params,
                **return_params,
                **prop_filter_params,
            },
        )
        people = Person.objects.filter(team_id=team.pk,
                                       uuid__in=[val[0] for val in result])

        from posthog.api.person import PersonSerializer

        return PersonSerializer(people, many=True).data
Exemple #22
0
    def _format_breakdown_query(self, entity: Entity, filter: Filter,
                                team: Team) -> List[Dict[str, Any]]:
        params = {"team_id": team.pk}
        interval_annotation = get_interval_annotation_ch(filter.interval)
        num_intervals, seconds_in_interval = get_time_diff(
            filter.interval or "day", filter.date_from, filter.date_to)
        parsed_date_from, parsed_date_to = parse_timestamps(filter=filter)

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            "uuid", props_to_filter, team)

        aggregate_operation, join_condition, math_params = self._process_math(
            entity)

        action_query = ""
        action_params: Dict = {}
        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            action = Action.objects.get(pk=entity.id)
            action_query, action_params = format_action_filter(action)

        null_sql = NULL_BREAKDOWN_SQL.format(
            interval=interval_annotation,
            seconds_in_interval=seconds_in_interval,
            num_intervals=num_intervals,
            date_to=((filter.date_to
                      or timezone.now())).strftime("%Y-%m-%d %H:%M:%S"),
        )

        params = {**params, **math_params, **prop_filter_params}
        top_elements_array = []

        if filter.breakdown_type == "cohort":
            breakdown = filter.breakdown if filter.breakdown and isinstance(
                filter.breakdown, list) else []
            if "all" in breakdown:
                params = {**params, "event": entity.id, **action_params}
                null_sql = NULL_SQL.format(
                    interval=interval_annotation,
                    seconds_in_interval=seconds_in_interval,
                    num_intervals=num_intervals,
                    date_to=((filter.date_to or
                              timezone.now())).strftime("%Y-%m-%d %H:%M:%S"),
                )
                conditions = BREAKDOWN_CONDITIONS_SQL.format(
                    parsed_date_from=parsed_date_from,
                    parsed_date_to=parsed_date_to,
                    actions_query="AND uuid IN ({})".format(action_query)
                    if action_query else "",
                    event_filter="AND event = %(event)s"
                    if not action_query else "",
                    filters="{filters}".format(
                        filters=prop_filters) if props_to_filter else "",
                )
                breakdown_query = BREAKDOWN_DEFAULT_SQL.format(
                    null_sql=null_sql,
                    conditions=conditions,
                    event_join=join_condition,
                    aggregate_operation=aggregate_operation,
                    interval_annotation=interval_annotation,
                )
            else:
                cohort_queries, cohort_ids, cohort_params = self._format_breakdown_cohort_join_query(
                    breakdown, team)
                params = {
                    **params, "values": cohort_ids,
                    "event": entity.id,
                    **action_params,
                    **cohort_params
                }
                breakdown_filter = BREAKDOWN_COHORT_JOIN_SQL.format(
                    cohort_queries=cohort_queries,
                    parsed_date_from=parsed_date_from,
                    parsed_date_to=parsed_date_to,
                    actions_query="AND uuid IN ({})".format(action_query)
                    if action_query else "",
                    event_filter="AND event = %(event)s"
                    if not action_query else "",
                    filters="{filters}".format(
                        filters=prop_filters) if props_to_filter else "",
                )
                breakdown_query = BREAKDOWN_QUERY_SQL.format(
                    null_sql=null_sql,
                    breakdown_filter=breakdown_filter,
                    event_join=join_condition,
                    aggregate_operation=aggregate_operation,
                    interval_annotation=interval_annotation,
                )
        elif filter.breakdown_type == "person":
            top_elements_array = self._get_top_elements(
                TOP_PERSON_PROPS_ARRAY_OF_KEY_SQL, filter, parsed_date_from,
                parsed_date_to, team)
            params = {
                **params,
                "values": top_elements_array,
                "key": filter.breakdown,
                "event": entity.id,
                **action_params,
            }
            breakdown_filter = BREAKDOWN_PERSON_PROP_JOIN_SQL.format(
                parsed_date_from=parsed_date_from,
                parsed_date_to=parsed_date_to,
                actions_query="AND uuid IN ({})".format(action_query)
                if action_query else "",
                event_filter="AND event = %(event)s"
                if not action_query else "",
            )
            breakdown_query = BREAKDOWN_QUERY_SQL.format(
                null_sql=null_sql,
                breakdown_filter=breakdown_filter,
                event_join=join_condition,
                aggregate_operation=aggregate_operation,
                interval_annotation=interval_annotation,
            )
        else:

            top_elements_array = self._get_top_elements(
                TOP_ELEMENTS_ARRAY_OF_KEY_SQL, filter, parsed_date_from,
                parsed_date_to, team)

            params = {
                **params,
                "values": top_elements_array,
                "key": filter.breakdown,
                "event": entity.id,
                **action_params,
            }
            breakdown_filter = BREAKDOWN_PROP_JOIN_SQL.format(
                parsed_date_from=parsed_date_from,
                parsed_date_to=parsed_date_to,
                actions_query="AND uuid IN ({})".format(action_query)
                if action_query else "",
                event_filter="AND event = %(event)s"
                if not action_query else "",
                filters="{filters}".format(
                    filters=prop_filters) if props_to_filter else "",
            )
            breakdown_query = BREAKDOWN_QUERY_SQL.format(
                null_sql=null_sql,
                breakdown_filter=breakdown_filter,
                event_join=join_condition,
                aggregate_operation=aggregate_operation,
                interval_annotation=interval_annotation,
            )
        try:
            result = sync_execute(breakdown_query, params)
        except:
            result = []

        parsed_results = []

        for idx, stats in enumerate(result):

            breakdown_value = stats[
                2] if not filter.breakdown_type == "cohort" else ""
            stripped_value = breakdown_value.strip('"') if isinstance(
                breakdown_value, str) else breakdown_value

            extra_label = self._determine_breakdown_label(
                idx, filter.breakdown_type, filter.breakdown, stripped_value)
            label = "{} - {}".format(entity.name, extra_label)
            additional_values = {
                "label":
                label,
                "breakdown_value":
                filter.breakdown[idx]
                if isinstance(filter.breakdown, list) else filter.breakdown
                if filter.breakdown_type == "cohort" else stripped_value,
            }
            parsed_result = self._parse_response(stats, filter,
                                                 additional_values)
            parsed_results.append(parsed_result)

        return parsed_results
Exemple #23
0
    def _execute_sql(
        self,
        filter: RetentionFilter,
        team: Team,
    ) -> Dict[Tuple[int, int], Dict[str, Any]]:
        period = filter.period
        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties, team.pk)
        target_entity = filter.target_entity
        returning_entity = filter.returning_entity
        is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME
        date_from = filter.date_from
        date_to = filter.date_to

        target_query = ""
        target_params: Dict = {}
        trunc_func = get_trunc_func_ch(period)

        target_query, target_params = self._get_condition(target_entity,
                                                          table="e")
        returning_query, returning_params = self._get_condition(
            returning_entity, table="e", prepend="returning")

        target_query_formatted = "AND {target_query}".format(
            target_query=target_query)
        returning_query_formatted = "AND {returning_query}".format(
            returning_query=returning_query)

        reference_event_sql = (REFERENCE_EVENT_UNIQUE_SQL
                               if is_first_time_retention else
                               REFERENCE_EVENT_SQL).format(
                                   target_query=target_query_formatted,
                                   filters=prop_filters,
                                   trunc_func=trunc_func,
                               )

        target_condition, _ = self._get_condition(target_entity,
                                                  table="reference_event")
        if is_first_time_retention:
            target_condition = target_condition.replace(
                "reference_event.uuid", "reference_event.min_uuid")
            target_condition = target_condition.replace(
                "reference_event.event", "reference_event.min_event")
        returning_condition, _ = self._get_condition(returning_entity,
                                                     table="event",
                                                     prepend="returning")
        result = sync_execute(
            RETENTION_SQL.format(
                target_query=target_query_formatted,
                returning_query=returning_query_formatted,
                filters=prop_filters,
                trunc_func=trunc_func,
                extra_union="UNION ALL {} ".format(reference_event_sql),
                reference_event_sql=reference_event_sql,
                target_condition=target_condition,
                returning_condition=returning_condition,
            ),
            {
                "team_id":
                team.pk,
                "start_date":
                date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "end_date":
                date_to.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "reference_start_date":
                date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "reference_end_date":
                ((date_from + filter.period_increment) if filter.display
                 == TRENDS_LINEAR else date_to).strftime("%Y-%m-%d{}".format(
                     " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                **prop_filter_params,
                **target_params,
                **returning_params,
                "period":
                period,
            },
        )

        initial_interval_result = sync_execute(
            INITIAL_INTERVAL_SQL.format(
                reference_event_sql=reference_event_sql),
            {
                "team_id":
                team.pk,
                "start_date":
                date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "end_date":
                date_to.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "reference_start_date":
                date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "reference_end_date":
                ((date_from + filter.period_increment) if filter.display
                 == TRENDS_LINEAR else date_to).strftime("%Y-%m-%d{}".format(
                     " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                **prop_filter_params,
                **target_params,
                **returning_params,
                "period":
                period,
            },
        )

        result_dict = {}
        for initial_res in initial_interval_result:
            result_dict.update({
                (initial_res[0], 0): {
                    "count": initial_res[1],
                    "people": []
                }
            })

        for res in result:
            result_dict.update({
                (res[0], res[1]): {
                    "count": res[2],
                    "people": []
                }
            })

        return result_dict
Exemple #24
0
    def _format_normal_query(self, entity: Entity, filter: Filter,
                             team: Team) -> List[Dict[str, Any]]:

        interval_annotation = get_interval_annotation_ch(filter.interval)
        num_intervals, seconds_in_interval = get_time_diff(
            filter.interval or "day", filter.date_from, filter.date_to)
        parsed_date_from, parsed_date_to = parse_timestamps(filter=filter)

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            "uuid", props_to_filter, team)

        aggregate_operation, join_condition, math_params = self._process_math(
            entity)

        params: Dict = {"team_id": team.pk}
        params = {**params, **prop_filter_params, **math_params}

        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            try:
                action = Action.objects.get(pk=entity.id)
                action_query, action_params = format_action_filter(action)
                params = {**params, **action_params}
                content_sql = VOLUME_ACTIONS_SQL.format(
                    interval=interval_annotation,
                    timestamp="timestamp",
                    team_id=team.pk,
                    actions_query=action_query,
                    parsed_date_from=(parsed_date_from or ""),
                    parsed_date_to=(parsed_date_to or ""),
                    filters="{filters}".format(
                        filters=prop_filters) if props_to_filter else "",
                    event_join=join_condition,
                    aggregate_operation=aggregate_operation,
                )
            except:
                return []
        else:
            content_sql = VOLUME_SQL.format(
                interval=interval_annotation,
                timestamp="timestamp",
                team_id=team.pk,
                parsed_date_from=(parsed_date_from or ""),
                parsed_date_to=(parsed_date_to or ""),
                filters="{filters}".format(
                    filters=prop_filters) if props_to_filter else "",
                event_join=join_condition,
                aggregate_operation=aggregate_operation,
            )
            params = {**params, "event": entity.id}
        null_sql = NULL_SQL.format(
            interval=interval_annotation,
            seconds_in_interval=seconds_in_interval,
            num_intervals=num_intervals,
            date_to=((filter.date_to
                      or timezone.now())).strftime("%Y-%m-%d %H:%M:%S"),
        )

        final_query = AGGREGATE_SQL.format(null_sql=null_sql,
                                           content_sql=content_sql)

        try:
            result = sync_execute(final_query, params)

        except:
            result = []

        parsed_results = []
        for _, stats in enumerate(result):
            parsed_result = self._parse_response(stats, filter)
            parsed_results.append(parsed_result)

        return parsed_results
Exemple #25
0
    def _format_normal_query(self, entity: Entity, filter: Filter,
                             team_id: int) -> List[Dict[str, Any]]:

        interval_annotation = get_trunc_func_ch(filter.interval)
        num_intervals, seconds_in_interval = get_time_diff(filter.interval
                                                           or "day",
                                                           filter.date_from,
                                                           filter.date_to,
                                                           team_id=team_id)
        parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter,
                                                               team_id=team_id)

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            props_to_filter, team_id)

        aggregate_operation, join_condition, math_params = process_math(entity)

        params: Dict = {"team_id": team_id}
        params = {**params, **prop_filter_params, **math_params}
        content_sql_params = {
            "interval": interval_annotation,
            "timestamp": "timestamp",
            "team_id": team_id,
            "parsed_date_from": parsed_date_from,
            "parsed_date_to": parsed_date_to,
            "filters": prop_filters,
            "event_join": join_condition,
            "aggregate_operation": aggregate_operation,
        }

        entity_params, entity_format_params = self._populate_entity_params(
            entity)
        params = {**params, **entity_params}
        content_sql_params = {**content_sql_params, **entity_format_params}

        if filter.display == TRENDS_TABLE or filter.display == TRENDS_PIE:
            agg_query = self._determine_single_aggregate_query(filter, entity)
            content_sql = agg_query.format(**content_sql_params)

            try:
                result = sync_execute(content_sql, params)
            except:
                result = []

            return [{
                "aggregated_value":
                result[0][0] if result and len(result) else 0
            }]
        else:
            content_sql = self._determine_trend_aggregate_query(filter, entity)
            content_sql = content_sql.format(**content_sql_params)

            null_sql = NULL_SQL.format(
                interval=interval_annotation,
                seconds_in_interval=seconds_in_interval,
                num_intervals=num_intervals,
                date_to=filter.date_to.strftime("%Y-%m-%d %H:%M:%S"),
            )
            final_query = AGGREGATE_SQL.format(null_sql=null_sql,
                                               content_sql=content_sql)
            try:
                result = sync_execute(final_query, params)
            except:
                result = []

            parsed_results = []
            for _, stats in enumerate(result):
                parsed_result = parse_response(stats, filter)
                parsed_results.append(parsed_result)

            return parsed_results
Exemple #26
0
 def get_properties(self, request: Request):
     rows = sync_execute(GET_PERSON_PROPERTIES_COUNT,
                         {"team_id": self.team.pk})
     return [{"name": name, "count": count} for name, count in rows]
Exemple #27
0
    def get_people(
        self,
        filter: Filter,
        team_id: int,
        target_date: datetime,
        lifecycle_type: str,
        limit: int = 100,
    ):
        entity = filter.entities[0]
        date_from = filter.date_from

        if not date_from:
            date_from = get_earliest_timestamp(team_id)

        interval = filter.interval or "day"
        num_intervals, seconds_in_interval = get_time_diff(interval,
                                                           filter.date_from,
                                                           filter.date_to,
                                                           team_id=team_id)
        interval_increment, interval_string, sub_interval_string = self.get_interval(
            interval)
        trunc_func = get_trunc_func_ch(interval)
        event_query = ""
        event_params: Dict[str, Any] = {}

        _, _, date_params = parse_timestamps(filter=filter, team_id=team_id)

        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            try:
                action = Action.objects.get(pk=entity.id)
                event_query, event_params = format_action_filter(action)
            except:
                return []
        else:
            event_query = "event = %(event)s"
            event_params = {"event": entity.id}

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            props_to_filter, team_id)

        result = sync_execute(
            LIFECYCLE_PEOPLE_SQL.format(
                interval=interval_string,
                trunc_func=trunc_func,
                event_query=event_query,
                filters=prop_filters,
                sub_interval=sub_interval_string,
            ),
            {
                "team_id":
                team_id,
                "prev_date_from":
                (date_from - interval_increment).strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.interval == "hour"
                    or filter.interval == "minute" else " 00:00:00")),
                "num_intervals":
                num_intervals,
                "seconds_in_interval":
                seconds_in_interval,
                **event_params,
                **date_params,
                **prop_filter_params,
                "status":
                lifecycle_type,
                "target_date":
                target_date.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.interval == "hour"
                    or filter.interval == "minute" else " 00:00:00")),
                "offset":
                filter.offset,
                "limit":
                limit,
            },
        )
        people = get_persons_by_uuids(team_id=team_id,
                                      uuids=[p[0] for p in result])
        people = people.prefetch_related(
            Prefetch("persondistinctid_set", to_attr="distinct_ids_cache"))

        from posthog.api.person import PersonSerializer

        return PersonSerializer(people, many=True).data
    def test_create_trends_cohort(self, _insert_cohort_from_query):
        _create_person(team_id=self.team.pk, distinct_ids=["blabla"])
        with freeze_time("2021-01-01 00:06:34"):
            _create_event(
                team=self.team,
                event="$pageview",
                distinct_id="blabla",
                properties={"$math_prop": 1},
                timestamp="2021-01-01T12:00:00Z",
            )

        with freeze_time("2021-01-02 00:06:34"):
            _create_event(
                team=self.team,
                event="$pageview",
                distinct_id="blabla",
                properties={"$math_prop": 4},
                timestamp="2021-01-01T12:00:00Z",
            )

        response = self.client.post(
            f"/api/projects/{self.team.id}/cohorts/?interval=day&display=ActionsLineGraph&events=%5B%7B%22id%22%3A%22%24pageview%22%2C%22name%22%3A%22%24pageview%22%2C%22type%22%3A%22events%22%2C%22order%22%3A0%7D%5D&properties=%5B%5D&entity_id=%24pageview&entity_type=events&date_from=2021-01-01&date_to=2021-01-01&label=%24pageview",
            {"name": "test", "is_static": True},
        ).json()
        cohort_id = response["id"]
        _insert_cohort_from_query.assert_called_once_with(
            cohort_id,
            "TRENDS",
            {
                "date_from": "2021-01-01",
                "date_to": "2021-01-01",
                "display": "ActionsLineGraph",
                "events": [
                    {
                        "id": "$pageview",
                        "type": "events",
                        "order": 0,
                        "name": "$pageview",
                        "custom_name": None,
                        "math": None,
                        "math_property": None,
                        "math_group_type_index": None,
                        "properties": [],
                    }
                ],
                "entity_id": "$pageview",
                "entity_type": "events",
                "insight": "TRENDS",
                "interval": "day",
            },
            entity_data={
                "id": "$pageview",
                "type": "events",
                "order": None,
                "name": "$pageview",
                "custom_name": None,
                "math": None,
                "math_property": None,
                "math_group_type_index": None,
                "properties": [],
            },
        )
        insert_cohort_from_query(
            cohort_id,
            "TRENDS",
            {
                "date_from": "2021-01-01",
                "date_to": "2021-01-01",
                "display": "ActionsLineGraph",
                "events": [
                    {
                        "id": "$pageview",
                        "type": "events",
                        "order": 0,
                        "name": "$pageview",
                        "math": None,
                        "math_property": None,
                        "math_group_type_index": None,
                        "properties": [],
                    }
                ],
                "entity_id": "$pageview",
                "entity_type": "events",
                "insight": "TRENDS",
                "interval": "day",
            },
            entity_data={
                "id": "$pageview",
                "type": "events",
                "order": 0,
                "name": "$pageview",
                "math": None,
                "math_property": None,
                "math_group_type_index": None,
                "properties": [],
            },
        )
        cohort = Cohort.objects.get(pk=cohort_id)
        people = Person.objects.filter(cohort__id=cohort.pk)
        self.assertEqual(cohort.errors_calculating, 0)
        self.assertEqual(
            len(people),
            1,
            {
                "a": sync_execute(
                    "select person_id from person_static_cohort where team_id = {} and cohort_id = {} ".format(
                        self.team.id, cohort.pk
                    )
                ),
                "b": sync_execute(
                    "select person_id from person_static_cohort FINAL where team_id = {} and cohort_id = {} ".format(
                        self.team.id, cohort.pk
                    )
                ),
            },
        )
Exemple #29
0
    def test_prop_cohort_basic_event_days(self):

        _create_person(distinct_ids=["some_other_id"],
                       team_id=self.team.pk,
                       properties={"$some_prop": "something"})

        _create_person(
            distinct_ids=["some_id"],
            team_id=self.team.pk,
            properties={
                "$some_prop": "something",
                "$another_prop": "something"
            },
        )

        _create_event(
            event="$pageview",
            team=self.team,
            distinct_id="some_id",
            properties={"attr": "some_val"},
            timestamp=datetime(2020, 1, 9, 12, 0, 1),
        )

        _create_event(
            event="$pageview",
            team=self.team,
            distinct_id="some_other_id",
            properties={"attr": "some_val"},
            timestamp=datetime(2020, 1, 5, 12, 0, 1),
        )

        with freeze_time("2020-01-10"):
            cohort1 = Cohort.objects.create(
                team=self.team,
                groups=[{
                    "event_id": "$pageview",
                    "days": 1
                }],
                name="cohort1",
            )

            filter = Filter(
                data={
                    "properties": [{
                        "key": "id",
                        "value": cohort1.pk,
                        "type": "cohort"
                    }],
                })
            query, params = parse_prop_clauses(filter.properties, self.team.pk)
            final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(
                query)
            result = sync_execute(final_query, {
                **params, "team_id": self.team.pk
            })
            self.assertEqual(len(result), 1)

            cohort2 = Cohort.objects.create(
                team=self.team,
                groups=[{
                    "event_id": "$pageview",
                    "days": 7
                }],
                name="cohort2",
            )

            filter = Filter(
                data={
                    "properties": [{
                        "key": "id",
                        "value": cohort2.pk,
                        "type": "cohort"
                    }],
                })
            query, params = parse_prop_clauses(filter.properties, self.team.pk)
            final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(
                query)
            result = sync_execute(final_query, {
                **params, "team_id": self.team.pk
            })
            self.assertEqual(len(result), 2)
Exemple #30
0
def get_elements(event_id: Union[int, UUID]) -> List[Element]:
    return chain_to_elements(
        sync_execute("select elements_chain from events where uuid = %(id)s", {"id": event_id})[0][0]
    )