def test_status_report_multiple_ids_per_person(self) -> None: person_id1 = str(UUIDT()) person_id2 = str(UUIDT()) create_person_distinct_id(self.team.id, "id1", person_id1) create_person_distinct_id(self.team.id, "id2", person_id1) create_person_distinct_id(self.team.id, "id3", person_id1) create_person_distinct_id(self.team.id, "id4", person_id1) create_person_distinct_id(self.team.id, "id5", person_id1) create_person_distinct_id(self.team.id, "id6", person_id2) create_person_distinct_id(self.team.id, "id7", person_id2) create_person_distinct_id(self.team.id, "id8", person_id2) report = status_report( dry_run=True).get("teams")[self.team.id] # type: ignore multiple_ids_report = report["multiple_ids_per_person"] expected_result = { "total_persons_with_more_than_2_ids": 2, "max_distinct_ids_for_one_person": 5, } self.assertEqual(multiple_ids_report, expected_result)
def test_old_logs_are_deleted_while_newer_ones_kept(self) -> None: plugin_server_instance_id = str(UUIDT()) now = timezone.now() some_plugin: Plugin = Plugin.objects.create( organization=self.organization) some_plugin_config: PluginConfig = PluginConfig.objects.create( plugin=some_plugin, order=1) for days_before in [0, 2, 6, 9, 31]: PluginLogEntry.objects.create( team_id=self.team.pk, plugin_id=some_plugin.pk, plugin_config_id=some_plugin_config.pk, type=PluginLogEntry.Type.INFO, message="Test", instance_id=plugin_server_instance_id, timestamp=now - timezone.timedelta(days_before), ) self.assertEqual(PluginLogEntry.objects.count(), 5) delete_old_plugin_logs() self.assertEqual(PluginLogEntry.objects.count(), 3)
def attempt_migration_rollback(migration_instance: AsyncMigration): """ Cycle through the operations in reverse order starting from the last completed op and run the specified rollback statements. """ migration_instance.refresh_from_db() ops = get_async_migration_definition(migration_instance.name).operations # if the migration was completed the index is set 1 after, normally we should try rollback for current op current_index = min(migration_instance.current_operation_index, len(ops) - 1) for op_index in range(current_index, -1, -1): try: op = ops[op_index] execute_op(op, str(UUIDT()), rollback=True) except Exception as e: error = f"At operation {op_index} rollback failed with error:{str(e)}" process_error( migration_instance=migration_instance, error=error, rollback=False, alert=True, current_operation_index=op_index, ) return update_async_migration(migration_instance=migration_instance, status=MigrationStatus.RolledBack, progress=0)
def log_event_to_dead_letter_queue( raw_payload: Dict, event_name: str, event: Dict, error_message: str, error_location: str, topic: str = KAFKA_DEAD_LETTER_QUEUE, ): data = event.copy() data["error_timestamp"] = datetime.now().isoformat() data["error_location"] = error_location data["error"] = error_message data["elements_chain"] = "" data["id"] = str(UUIDT()) data["event"] = event_name data["raw_payload"] = json.dumps(raw_payload) data["now"] = datetime.fromisoformat(data["now"]).replace(tzinfo=None).isoformat() if data["now"] else None data["tags"] = ["django_server"] data["event_uuid"] = event["uuid"] del data["uuid"] try: KafkaProducer().produce(topic=topic, data=data) statsd.incr(settings.EVENTS_DEAD_LETTER_QUEUE_STATSD_METRIC) except Exception as e: capture_exception(e) statsd.incr("events_dead_letter_queue_produce_error") if settings.DEBUG: print("Failed to produce to events dead letter queue with error:", e)
def capture_internal(event, distinct_id, ip, site_url, now, sent_at, team_id): event_uuid = UUIDT() if is_clickhouse_enabled(): log_event( distinct_id=distinct_id, ip=ip, site_url=site_url, data=event, team_id=team_id, now=now, sent_at=sent_at, event_uuid=event_uuid, ) else: task_name = "posthog.tasks.process_event.process_event_with_plugins" celery_queue = settings.PLUGINS_CELERY_QUEUE celery_app.send_task( name=task_name, queue=celery_queue, args=[ distinct_id, ip, site_url, event, team_id, now.isoformat(), sent_at, ], )
def _generate_psql_data(self, team, n_events, n_days): distinct_ids = [] for i in range(0, n_events): distinct_id = str(UUIDT()) distinct_ids.append(distinct_id) Person.objects.create(team=team, distinct_ids=[distinct_id], properties={"is_demo": True}) Event.objects.bulk_create( Event( event="$purchase", team=team, distinct_id=distinct_ids[i], properties={ "plan": PRICING_TIERS[_deterministic_random_value( distinct_ids[i])][0], "purchase_value": PRICING_TIERS[_deterministic_random_value(distinct_ids[i])] [1], }, timestamp=now() - relativedelta(days=random.randint(0, n_days)), ) for i in range(0, n_events))
def test_simple_log_is_fetched(self): plugin_server_instance_id = str(UUIDT()) some_plugin: Plugin = Plugin.objects.create( organization=self.organization) some_plugin_config: PluginConfig = PluginConfig.objects.create( plugin=some_plugin, order=1) plugin_log_entry_factory( team_id=self.team.pk, plugin_id=some_plugin.pk, plugin_config_id=some_plugin_config.pk, source=PluginLogEntry.Source.CONSOLE, type=PluginLogEntry.Type.INFO, message="Something happened!", instance_id=plugin_server_instance_id, ) results = fetch_plugin_log_entries( plugin_config_id=some_plugin_config.pk, after=timezone.datetime.min, before=timezone.now() + timezone.timedelta(seconds=5), ) self.assertEqual(len(results), 1) self.assertEqual(results[0].message, "Something happened!")
def test_run_migration_in_full(self): self.migration.sec.reset_count() migration_successful = start_async_migration("test") sm = AsyncMigration.objects.get(name="test") with connection.cursor() as cursor: cursor.execute("SELECT * FROM test_async_migration") res = cursor.fetchone() self.assertEqual(res, ("a", "c")) self.assertTrue(migration_successful) self.assertEqual(sm.name, "test") self.assertEqual(sm.description, self.TEST_MIGRATION_DESCRIPTION) self.assertEqual(sm.status, MigrationStatus.CompletedSuccessfully) self.assertEqual(sm.progress, 100) errors = AsyncMigrationError.objects.filter(async_migration=sm) self.assertEqual(errors.count(), 0) self.assertTrue(UUIDT.is_valid_uuid(sm.current_query_id)) self.assertEqual(sm.current_operation_index, 7) self.assertEqual(sm.posthog_min_version, "1.0.0") self.assertEqual(sm.posthog_max_version, "100000.0.0") self.assertEqual(sm.finished_at.day, datetime.today().day) self.assertEqual(self.migration.sec.side_effect_count, 3) self.assertEqual(self.migration.sec.side_effect_rollback_count, 0)
def emit_omni_person( event_uuid: UUID, team_id: int, distinct_id: str, uuid: Optional[UUID] = None, properties: Optional[Dict] = {}, sync: bool = False, is_identified: bool = False, timestamp: Optional[datetime.datetime] = None, ) -> UUID: if not uuid: uuid = UUIDT() if not timestamp: timestamp = now() data = { "event_uuid": str(event_uuid), "uuid": str(uuid), "distinct_id": distinct_id, "team_id": team_id, "properties": json.dumps(properties), "is_identified": int(is_identified), "ts": timestamp.strftime("%Y-%m-%d %H:%M:%S.%f"), } p = KafkaProducer() p.produce(topic=KAFKA_OMNI_PERSON, data=data) return uuid
def create_people(self): self.people = [self.make_person(i) for i in range(self.n_people)] self.distinct_ids = [str(UUIDT()) for _ in self.people] self.people = Person.objects.bulk_create(self.people) pids = [ PersonDistinctId(team=self.team, person=person, distinct_id=distinct_id) for person, distinct_id in zip(self.people, self.distinct_ids) ] PersonDistinctId.objects.bulk_create(pids) from ee.clickhouse.models.person import create_person, create_person_distinct_id for person in self.people: create_person( uuid=str(person.uuid), team_id=person.team.pk, properties=person.properties, is_identified=person.is_identified, ) for pid in pids: create_person_distinct_id( pid.team.pk, pid.distinct_id, str(pid.person.uuid)) # use dummy number for id
def create_person( team_id: int, uuid: Optional[str] = None, properties: Optional[Dict] = {}, sync: bool = False, is_identified: bool = False, timestamp: Optional[datetime.datetime] = None, ) -> str: if uuid: uuid = str(uuid) else: uuid = str(UUIDT()) if not timestamp: timestamp = now() data = { "id": str(uuid), "team_id": team_id, "properties": json.dumps(properties), "is_identified": int(is_identified), "timestamp": timestamp.strftime("%Y-%m-%d %H:%M:%S"), } p = ClickhouseProducer() p.produce(topic=KAFKA_PERSON, sql=INSERT_PERSON_SQL, data=data, sync=sync) return uuid
def create_element( element: Element, team: Team, event_uuid: UUID, elements_hash: str, timestamp: Optional[datetime.datetime] = None, ) -> None: if not timestamp: timestamp = now() data = { "uuid": str(UUIDT()), "event_uuid": str(event_uuid), "created_at": timestamp.strftime("%Y-%m-%d %H:%M:%S.%f"), "text": element.text or "", "tag_name": element.tag_name or "", "href": element.href or "", "attr_id": element.attr_id or "", "attr_class": element.attr_class or [], "nth_child": element.nth_child or 0, "nth_of_type": element.nth_of_type or 0, "attributes": json.dumps(element.attributes or {}), "order": element.order or 0, "team_id": team.pk, "elements_hash": elements_hash, } p = ClickhouseProducer() p.produce(topic=KAFKA_ELEMENTS, sql=INSERT_ELEMENTS_SQL, data=data)
def test_log_limit_works(self): plugin_server_instance_id = str(UUIDT()) some_plugin: Plugin = Plugin.objects.create( organization=self.organization) some_plugin_config: PluginConfig = PluginConfig.objects.create( plugin=some_plugin, order=1) plugin_log_entry_factory( team_id=self.team.pk, plugin_id=some_plugin.pk, plugin_config_id=some_plugin_config.pk, source=PluginLogEntry.Source.CONSOLE, type=PluginLogEntry.Type.INFO, message="Something happened!", instance_id=plugin_server_instance_id, ) plugin_log_entry_factory( team_id=self.team.pk, plugin_id=some_plugin.pk, plugin_config_id=some_plugin_config.pk, source=PluginLogEntry.Source.CONSOLE, type=PluginLogEntry.Type.ERROR, message="Random error", instance_id=plugin_server_instance_id, ) results = fetch_plugin_log_entries( plugin_config_id=some_plugin_config.pk, limit=1) self.assertEqual(len(results), 1) self.assertEqual(results[0].message, "Random error")
def run_query(fn, *args): uuid = str(UUIDT()) client._request_information = {"kind": "benchmark", "id": f"{uuid}::${fn.__name__}"} try: fn(*args) return get_clickhouse_query_stats(uuid) finally: client._request_information = None
def test_create_cache(self) -> None: self.assertEqual(len(get_all_elements()), 0) create_elements( event_uuid=UUIDT(), team=self.team, elements=[ Element(tag_name="a", href="/a-url", nth_child=1, nth_of_type=0), Element(tag_name="button", nth_child=0, nth_of_type=0), Element(tag_name="div", nth_child=0, nth_of_type=0), Element( tag_name="div", nth_child=0, nth_of_type=0, attr_id="nested", ), ], use_cache=True, ) self.assertEqual(len(get_all_elements()), 4) create_elements( event_uuid=UUIDT(), team=self.team, elements=[ Element(tag_name="a", href="/a-url", nth_child=1, nth_of_type=0), Element(tag_name="button", nth_child=0, nth_of_type=0), Element(tag_name="div", nth_child=0, nth_of_type=0), Element( tag_name="div", nth_child=0, nth_of_type=0, attr_id="nested", ), ], use_cache=True, ) self.assertEqual(len(get_all_elements()), 4)
def retrieve(self, request: Request, pk: Optional[Union[int, str]] = None, *args: Any, **kwargs: Any) -> Response: if not isinstance(pk, str) or not UUIDT.is_valid_uuid(pk): return Response({"detail": "Invalid UUID", "code": "invalid", "type": "validation_error",}, status=400) query_result = sync_execute(SELECT_ONE_EVENT_SQL, {"team_id": self.team.pk, "event_id": pk.replace("-", "")}) if len(query_result) == 0: raise NotFound(detail=f"No events exist for event UUID {pk}") res = ClickhouseEventSerializer(query_result[0], many=False).data return Response(res)
def process_event_ee( distinct_id: str, ip: str, site_url: str, data: dict, team_id: int, now: datetime.datetime, sent_at: Optional[datetime.datetime], ) -> None: timer = statsd.Timer("%s_posthog_cloud" % (settings.STATSD_PREFIX, )) timer.start() properties = data.get("properties", {}) if data.get("$set"): properties["$set"] = data["$set"] person_uuid = UUIDT() event_uuid = UUIDT() ts = handle_timestamp(data, now, sent_at) handle_identify_or_alias(data["event"], properties, distinct_id, team_id) if data["event"] == "$snapshot": create_session_recording_event( uuid=event_uuid, team_id=team_id, distinct_id=distinct_id, session_id=properties["$session_id"], snapshot_data=properties["$snapshot_data"], timestamp=ts, ) return _capture_ee( event_uuid=event_uuid, person_uuid=person_uuid, ip=ip, site_url=site_url, team_id=team_id, event=data["event"], distinct_id=distinct_id, properties=properties, timestamp=ts, ) timer.stop("process_event_ee")
def _create_person(**kwargs) -> Person: if kwargs.get("uuid"): uuid = str(kwargs.pop("uuid")) else: uuid = str(UUIDT()) distinct_ids = kwargs.pop("distinct_ids") person = create_person(uuid=uuid, **kwargs) for id in distinct_ids: create_person_distinct_id(0, kwargs["team_id"], id, str(person)) return Person(id=person, uuid=person)
def process_event_ee( distinct_id: str, ip: str, site_url: str, data: dict, team_id: int, now: str, sent_at: Optional[str], ) -> None: properties = data.get("properties", data.get("$set", {})) person_uuid = UUIDT() event_uuid = UUIDT() ts = handle_timestamp(data, now, sent_at) _capture_ee( event_uuid=event_uuid, person_uuid=person_uuid, ip=ip, site_url=site_url, team_id=team_id, event=data["event"], distinct_id=distinct_id, properties=properties, timestamp=ts, )
def create_people(self): self.people = [self.make_person(i) for i in range(self.n_people)] self.distinct_ids = [str(UUIDT()) for _ in self.people] Person.objects.bulk_create(self.people) PersonDistinctId.objects.bulk_create([ PersonDistinctId(team=self.team, person=person, distinct_id=distinct_id) for person, distinct_id in zip(self.people, self.distinct_ids) ])
def run_async_migration_next_op( migration_name: str, migration_instance: Optional[AsyncMigration] = None): """ Runs the next operation specified by the currently running migration We run the next operation of the migration which needs attention Returns (run_next, success) Terminology: - migration_instance: The migration object as stored in the DB - migration_definition: The actual migration class outlining the operations (e.g. async_migrations/examples/example.py) """ if not migration_instance: try: migration_instance = AsyncMigration.objects.get( name=migration_name, status=MigrationStatus.Running) except AsyncMigration.DoesNotExist: return (False, False) else: migration_instance.refresh_from_db() assert migration_instance is not None migration_definition = get_async_migration_definition(migration_name) if migration_instance.current_operation_index > len( migration_definition.operations) - 1: complete_migration(migration_instance) return (False, True) error = None current_query_id = str(UUIDT()) try: op = migration_definition.operations[ migration_instance.current_operation_index] execute_op(op, current_query_id) update_async_migration( migration_instance=migration_instance, current_query_id=current_query_id, current_operation_index=migration_instance.current_operation_index + 1, ) except Exception as e: error = f"Exception was thrown while running operation {migration_instance.current_operation_index} : {str(e)}" process_error(migration_instance, error, alert=True) if error: return (False, False) update_migration_progress(migration_instance) return (True, False)
def process_event_ee( distinct_id: str, ip: str, site_url: str, data: dict, team_id: int, now: str, sent_at: Optional[str], ) -> None: properties = data.get("properties", {}) if data.get("$set"): properties["$set"] = data["$set"] person_uuid = UUIDT() event_uuid = UUIDT() ts = handle_timestamp(data, now, sent_at) handle_identify_or_alias(data["event"], properties, distinct_id, team_id) if data["event"] == "$snapshot": create_session_recording_event( uuid=event_uuid, team_id=team_id, distinct_id=distinct_id, session_id=properties["$session_id"], snapshot_data=properties["$snapshot_data"], timestamp=ts, ) return _capture_ee( event_uuid=event_uuid, person_uuid=person_uuid, ip=ip, site_url=site_url, team_id=team_id, event=data["event"], distinct_id=distinct_id, properties=properties, timestamp=ts, )
def test_can_not_save_if_there_is_neither_a_team_id_nor_an_organisation_id(self): # even when there are logs with team id or org id saved ActivityLog.objects.create(team_id=3) ActivityLog.objects.create(organization_id=UUIDT()) # we cannot save a new log if it has neither team nor org id with self.assertRaises(IntegrityError) as error: ActivityLog.objects.create() self.assertIn( 'new row for relation "posthog_activitylog" violates check constraint "must_have_team_or_organization_id', error.exception.args[0], )
def test_status_report_duplicate_distinct_ids(self) -> None: create_person_distinct_id(self.team.id, "duplicate_id1", str(UUIDT())) create_person_distinct_id(self.team.id, "duplicate_id1", str(UUIDT())) create_person_distinct_id(self.team.id, "duplicate_id2", str(UUIDT())) create_person_distinct_id(self.team.id, "duplicate_id2", str(UUIDT())) create_person_distinct_id(self.team.id, "duplicate_id2", str(UUIDT())) for index in range(0, 2): sync_execute( "INSERT INTO person_distinct_id SELECT %(distinct_id)s, %(person_id)s, %(team_id)s, 1, %(timestamp)s, 0 VALUES", { "distinct_id": "duplicate_id_old", "person_id": str(UUIDT()), "team_id": self.team.id, "timestamp": "2020-01-01 12:01:0%s" % index, }, ) report = status_report(dry_run=True).get("teams")[self.team.id] # type: ignore duplicate_ids_report = report["duplicate_distinct_ids"] expected_result = { "prev_total_ids_with_duplicates": 1, "prev_total_extra_distinct_id_rows": 1, "new_total_ids_with_duplicates": 2, "new_total_extra_distinct_id_rows": 4, } self.assertEqual(duplicate_ids_report, expected_result)
def populate_session_recording(self, person: Person, distinct_id: str, index: int): if index != 0: return date = now() start_time = self.demo_recording["result"]["snapshots"][0]["timestamp"] session_id = str(UUIDT()) window_id = str(UUIDT()) for snapshot in self.demo_recording["result"]["snapshots"]: self.snapshots.append({ "session_id": session_id, "window_id": window_id, "distinct_id": distinct_id, "timestamp": date + timedelta(milliseconds=snapshot["timestamp"] - start_time), "snapshot_data": snapshot, })
def _generate_ch_data(self, team, n_events, n_days): distinct_ids = [] for i in range(0, n_events): distinct_id = str(UUIDT()) distinct_ids.append(distinct_id) Person.objects.create(team=team, distinct_ids=[distinct_id], properties={"is_demo": True}) for i in range(0, n_events): event_uuid = uuid4() plan = random.choice(PRICING_TIERS) create_event( event="$purchase", team=team, distinct_id=distinct_ids[i], properties={"plan": plan[0], "purchase_value": plan[1],}, timestamp=now() - relativedelta(days=random.randint(0, n_days)), event_uuid=event_uuid, )
def plugin_log_factory_ch(*, team_id: int, plugin_id: int, plugin_config_id: int, source: PluginLogEntry.Source, type: PluginLogEntry.Type, message: str, instance_id: str): sync_execute( INSERT_PLUGIN_LOG_ENTRY_SQL, { "id": UUIDT(), "team_id": team_id, "plugin_id": plugin_id, "plugin_config_id": plugin_config_id, "source": source, "type": type, "instance_id": instance_id, "message": message, "timestamp": timezone.now().strftime("%Y-%m-%dT%H:%M:%S.%f"), }, )
def _process_event_ee( distinct_id: str, ip: str, site_url: str, data: dict, team_id: int, now: str, sent_at: Optional[str], ) -> None: return process_event_ee( distinct_id=distinct_id, ip=ip, site_url=site_url, data=data, team_id=team_id, now=parser.isoparse(now), sent_at=parser.isoparse(sent_at) if sent_at else None, event_uuid=UUIDT(), )
def capture_internal(event, distinct_id, ip, site_url, now, sent_at, team_id, event_uuid=UUIDT()) -> None: parsed_event = parse_kafka_event_data( distinct_id=distinct_id, ip=ip, site_url=site_url, data=event, team_id=team_id, now=now, sent_at=sent_at, event_uuid=event_uuid, ) partition_key = hashlib.sha256( f"{team_id}:{distinct_id}".encode()).hexdigest() log_event(parsed_event, event["event"], partition_key=partition_key)
def test_does_not_throw_if_cannot_log_activity(self): with self.assertLogs(level="WARN") as log: try: log_activity( organization_id=UUIDT(), team_id=1, # will cause logging to raise exception because user is unsaved # avoids needing to mock anything to force the exception user=User(first_name="testy", email="*****@*****.**"), item_id="12345", scope="testing throwing exceptions on create", activity="does not explode", detail=Detail(), ) except Exception as e: raise pytest.fail(f"Should not have raised exception: {e}") logged_warning = log.records[0].__dict__ self.assertEqual(logged_warning["levelname"], "WARNING") self.assertEqual(logged_warning["msg"]["event"], "failed to write activity log") self.assertEqual(logged_warning["msg"]["scope"], "testing throwing exceptions on create") self.assertEqual(logged_warning["msg"]["team"], 1) self.assertEqual(logged_warning["msg"]["activity"], "does not explode") self.assertIsInstance(logged_warning["msg"]["exception"], ValueError)