def operations(self): TABLE_MIGRATION_OPERATIONS = list( flatten([ list(self.replicated_table_operations(table)) for table in self.tables_to_migrate() ])) RE_ENABLE_INGESTION_OPERATIONS = list( flatten([ list(self.finalize_table_operations(table)) for table in self.tables_to_migrate() ])) return [ AsyncMigrationOperationSQL(sql="SYSTEM STOP MERGES", rollback="SYSTEM START MERGES"), AsyncMigrationOperation( fn=lambda _: setattr( config, "COMPUTE_MATERIALIZED_COLUMNS_ENABLED", False), rollback_fn=lambda _: setattr( config, "COMPUTE_MATERIALIZED_COLUMNS_ENABLED", True), ), *TABLE_MIGRATION_OPERATIONS, *RE_ENABLE_INGESTION_OPERATIONS, AsyncMigrationOperation( fn=lambda _: setattr( config, "COMPUTE_MATERIALIZED_COLUMNS_ENABLED", False), rollback_fn=lambda _: setattr( config, "COMPUTE_MATERIALIZED_COLUMNS_ENABLED", True), ), AsyncMigrationOperationSQL( sql="SYSTEM START MERGES", rollback="SYSTEM STOP MERGES", ), ]
def _parse_list_results(self, results: List[Tuple]): final = [] for result in results: events = [] for i in range(len(result[4])): event = [ result[4][i], # uuid result[5][i], # event result[6][i], # properties result[7][i], # timestamp None, # team_id, result[0], # distinct_id result[8][i], # elements_chain None, # properties keys None, # properties values ] events.append(ClickhouseEventSerializer(event, many=False).data) final.append( { "distinct_id": result[0], "global_session_id": result[1], "length": result[2], "start_time": result[3], "end_time": result[9], "event_count": len(result[4]), "events": list(events), "properties": {}, "matching_events": list(sorted(set(flatten(result[10:])))), } ) return final
def _parse_list_results(self, results: List[Tuple]): return [{ "distinct_id": result[0], "global_session_id": result[1], "length": result[2], "start_time": result[3], "end_time": result[4], "start_url": _process_url(result[5]), "end_url": _process_url(result[6]), "matching_events": list(sorted(set(flatten(result[7:])))), } for result in results]
def get_values(self, request: request.Request) -> List[Dict[str, Any]]: key = request.GET.get("key") params: List[Optional[Union[str, int]]] = [key, key] if key == "custom_event": event_names = ( Event.objects.filter(team_id=self.team_id) .filter(~Q(event__in=["$autocapture", "$pageview", "$identify", "$pageleave", "$screen"])) .values("event") .distinct() ) return [{"name": value["event"]} for value in event_names] if request.GET.get("value"): where = " AND properties ->> %s LIKE %s" params.append(key) params.append("%{}%".format(request.GET["value"])) else: where = "" params.append(self.team_id) params.append(relative_date_parse("-7d").strftime("%Y-%m-%d 00:00:00")) params.append(timezone.now().strftime("%Y-%m-%d 23:59:59")) # This samples a bunch of events with that property, and then orders them by most popular in that sample # This is much quicker than trying to do this over the entire table values = Event.objects.raw( """ SELECT value, COUNT(1) as id FROM ( SELECT ("posthog_event"."properties" -> %s) as "value" FROM "posthog_event" WHERE ("posthog_event"."properties" -> %s) IS NOT NULL {} AND ("posthog_event"."team_id" = %s) AND ("posthog_event"."timestamp" >= %s) AND ("posthog_event"."timestamp" <= %s) LIMIT 10000 ) as "value" GROUP BY value ORDER BY id DESC LIMIT 50; """.format( where ), params, ) flattened = flatten([value.value for value in values]) return [{"name": convert_property_value(value)} for value in flattened]
def values(self, request: Request, **kwargs) -> Response: key = request.GET.get("key") team = self.team result = [] flattened = [] if key: result = get_property_values_for_key(key, team, value=request.GET.get("value")) for value in result: try: # Try loading as json for dicts or arrays flattened.append(json.loads(value[0])) except json.decoder.JSONDecodeError: flattened.append(value[0]) return Response([{"name": convert_property_value(value)} for value in flatten(flattened)])
def _session_end(self, distinct_id: str): self.sessions_count += 1 session = self.running_sessions[distinct_id] # :TRICKY: Remove sessions where some filtered actions did not occur _after_ limiting to avoid running into pagination issues if self.action_filter_count == 0 or all( len(ids) > 0 for ids in session["matching_events"]): self._sessions.append({ **session, "matching_events": list(sorted(set(flatten(session["matching_events"])))), "global_session_id": f"{distinct_id}-{session['start_time']}", "length": (session["end_time"] - session["start_time"]).seconds, }) del self.running_sessions[distinct_id]
def values(self, request: Request, **kwargs) -> Response: key = request.GET.get("key") team = self.team result = [] flattened = [] if key == "custom_event": events = sync_execute(GET_CUSTOM_EVENTS, {"team_id": team.pk}) return Response([{"name": event[0]} for event in events]) elif key: result = get_property_values_for_key(key, team, value=request.GET.get("value")) for value in result: try: # Try loading as json for dicts or arrays flattened.append(json.loads(value[0])) except json.decoder.JSONDecodeError: flattened.append(value[0]) return Response([{"name": convert_property_value(value)} for value in flatten(flattened)])
def get_values(self, request: request.Request) -> List[Dict[str, Any]]: key = request.GET.get("key") params: List[Optional[Union[str, int]]] = [key, key] if request.GET.get("value"): where = " AND properties ->> %s LIKE %s" params.append(key) params.append("%{}%".format(request.GET["value"])) else: where = "" params.append(self.team_id) # This samples a bunch of events with that property, and then orders them by most popular in that sample # This is much quicker than trying to do this over the entire table values = Event.objects.raw( """ SELECT value, COUNT(1) as id FROM ( SELECT ("posthog_event"."properties" -> %s) as "value" FROM "posthog_event" WHERE ("posthog_event"."properties" -> %s) IS NOT NULL {} AND ("posthog_event"."team_id" = %s) LIMIT 10000 ) as "value" GROUP BY value ORDER BY id DESC LIMIT 50; """.format(where), params, ) flattened = flatten([value.value for value in values]) return [{"name": convert_property_value(value)} for value in flattened]