Exemple #1
0
    def operations(self):
        TABLE_MIGRATION_OPERATIONS = list(
            flatten([
                list(self.replicated_table_operations(table))
                for table in self.tables_to_migrate()
            ]))
        RE_ENABLE_INGESTION_OPERATIONS = list(
            flatten([
                list(self.finalize_table_operations(table))
                for table in self.tables_to_migrate()
            ]))

        return [
            AsyncMigrationOperationSQL(sql="SYSTEM STOP MERGES",
                                       rollback="SYSTEM START MERGES"),
            AsyncMigrationOperation(
                fn=lambda _: setattr(
                    config, "COMPUTE_MATERIALIZED_COLUMNS_ENABLED", False),
                rollback_fn=lambda _: setattr(
                    config, "COMPUTE_MATERIALIZED_COLUMNS_ENABLED", True),
            ),
            *TABLE_MIGRATION_OPERATIONS,
            *RE_ENABLE_INGESTION_OPERATIONS,
            AsyncMigrationOperation(
                fn=lambda _: setattr(
                    config, "COMPUTE_MATERIALIZED_COLUMNS_ENABLED", False),
                rollback_fn=lambda _: setattr(
                    config, "COMPUTE_MATERIALIZED_COLUMNS_ENABLED", True),
            ),
            AsyncMigrationOperationSQL(
                sql="SYSTEM START MERGES",
                rollback="SYSTEM STOP MERGES",
            ),
        ]
Exemple #2
0
    def _parse_list_results(self, results: List[Tuple]):
        final = []
        for result in results:
            events = []
            for i in range(len(result[4])):
                event = [
                    result[4][i],  # uuid
                    result[5][i],  # event
                    result[6][i],  # properties
                    result[7][i],  # timestamp
                    None,  # team_id,
                    result[0],  # distinct_id
                    result[8][i],  # elements_chain
                    None,  # properties keys
                    None,  # properties values
                ]
                events.append(ClickhouseEventSerializer(event, many=False).data)

            final.append(
                {
                    "distinct_id": result[0],
                    "global_session_id": result[1],
                    "length": result[2],
                    "start_time": result[3],
                    "end_time": result[9],
                    "event_count": len(result[4]),
                    "events": list(events),
                    "properties": {},
                    "matching_events": list(sorted(set(flatten(result[10:])))),
                }
            )

        return final
Exemple #3
0
 def _parse_list_results(self, results: List[Tuple]):
     return [{
         "distinct_id": result[0],
         "global_session_id": result[1],
         "length": result[2],
         "start_time": result[3],
         "end_time": result[4],
         "start_url": _process_url(result[5]),
         "end_url": _process_url(result[6]),
         "matching_events": list(sorted(set(flatten(result[7:])))),
     } for result in results]
Exemple #4
0
    def get_values(self, request: request.Request) -> List[Dict[str, Any]]:
        key = request.GET.get("key")
        params: List[Optional[Union[str, int]]] = [key, key]

        if key == "custom_event":
            event_names = (
                Event.objects.filter(team_id=self.team_id)
                .filter(~Q(event__in=["$autocapture", "$pageview", "$identify", "$pageleave", "$screen"]))
                .values("event")
                .distinct()
            )
            return [{"name": value["event"]} for value in event_names]

        if request.GET.get("value"):
            where = " AND properties ->> %s LIKE %s"
            params.append(key)
            params.append("%{}%".format(request.GET["value"]))
        else:
            where = ""

        params.append(self.team_id)
        params.append(relative_date_parse("-7d").strftime("%Y-%m-%d 00:00:00"))
        params.append(timezone.now().strftime("%Y-%m-%d 23:59:59"))

        # This samples a bunch of events with that property, and then orders them by most popular in that sample
        # This is much quicker than trying to do this over the entire table
        values = Event.objects.raw(
            """
            SELECT
                value, COUNT(1) as id
            FROM (
                SELECT
                    ("posthog_event"."properties" -> %s) as "value"
                FROM
                    "posthog_event"
                WHERE
                    ("posthog_event"."properties" -> %s) IS NOT NULL {} AND
                    ("posthog_event"."team_id" = %s) AND
                    ("posthog_event"."timestamp" >= %s) AND
                    ("posthog_event"."timestamp" <= %s)
                LIMIT 10000
            ) as "value"
            GROUP BY value
            ORDER BY id DESC
            LIMIT 50;
        """.format(
                where
            ),
            params,
        )

        flattened = flatten([value.value for value in values])
        return [{"name": convert_property_value(value)} for value in flattened]
Exemple #5
0
 def values(self, request: Request, **kwargs) -> Response:
     key = request.GET.get("key")
     team = self.team
     result = []
     flattened = []
     if key:
         result = get_property_values_for_key(key, team, value=request.GET.get("value"))
         for value in result:
             try:
                 # Try loading as json for dicts or arrays
                 flattened.append(json.loads(value[0]))
             except json.decoder.JSONDecodeError:
                 flattened.append(value[0])
     return Response([{"name": convert_property_value(value)} for value in flatten(flattened)])
Exemple #6
0
 def _session_end(self, distinct_id: str):
     self.sessions_count += 1
     session = self.running_sessions[distinct_id]
     # :TRICKY: Remove sessions where some filtered actions did not occur _after_ limiting to avoid running into pagination issues
     if self.action_filter_count == 0 or all(
             len(ids) > 0 for ids in session["matching_events"]):
         self._sessions.append({
             **session,
             "matching_events":
             list(sorted(set(flatten(session["matching_events"])))),
             "global_session_id":
             f"{distinct_id}-{session['start_time']}",
             "length":
             (session["end_time"] - session["start_time"]).seconds,
         })
     del self.running_sessions[distinct_id]
Exemple #7
0
 def values(self, request: Request, **kwargs) -> Response:
     key = request.GET.get("key")
     team = self.team
     result = []
     flattened = []
     if key == "custom_event":
         events = sync_execute(GET_CUSTOM_EVENTS, {"team_id": team.pk})
         return Response([{"name": event[0]} for event in events])
     elif key:
         result = get_property_values_for_key(key, team, value=request.GET.get("value"))
         for value in result:
             try:
                 # Try loading as json for dicts or arrays
                 flattened.append(json.loads(value[0]))
             except json.decoder.JSONDecodeError:
                 flattened.append(value[0])
     return Response([{"name": convert_property_value(value)} for value in flatten(flattened)])
Exemple #8
0
    def get_values(self, request: request.Request) -> List[Dict[str, Any]]:
        key = request.GET.get("key")
        params: List[Optional[Union[str, int]]] = [key, key]
        if request.GET.get("value"):
            where = " AND properties ->> %s LIKE %s"
            params.append(key)
            params.append("%{}%".format(request.GET["value"]))
        else:
            where = ""

        params.append(self.team_id)
        # This samples a bunch of events with that property, and then orders them by most popular in that sample
        # This is much quicker than trying to do this over the entire table
        values = Event.objects.raw(
            """
            SELECT
                value, COUNT(1) as id
            FROM (
                SELECT
                    ("posthog_event"."properties" -> %s) as "value"
                FROM
                    "posthog_event"
                WHERE
                    ("posthog_event"."properties" -> %s) IS NOT NULL {} AND
                    ("posthog_event"."team_id" = %s)
                LIMIT 10000
            ) as "value"
            GROUP BY value
            ORDER BY id DESC
            LIMIT 50;
        """.format(where),
            params,
        )

        flattened = flatten([value.value for value in values])
        return [{"name": convert_property_value(value)} for value in flattened]