def test_ingest_session_event_abnormal(self): timestamp = datetime.now(timezone.utc) started = timestamp - timedelta(hours=1) payload = { "device_family": "iPhone12,3", "distinct_id": "b3ef3211-58a4-4b36-a9a1-5a55df0d9aaf", "duration": 1947.49, "environment": "production", "org_id": 1, "os": "iOS", "os_version": "13.3.1", "project_id": 42, "release": "[email protected]", "retention_days": 90, "seq": 42, "errors": 0, "session_id": "8333339f-5675-4f89-a9a0-1c935255ab58", "started": started.timestamp(), "status": "abnormal", "received": timestamp.timestamp(), } meta = KafkaMessageMetadata(offset=1, partition=2, timestamp=datetime(1970, 1, 1)) assert SessionsProcessor().process_message( payload, meta) == InsertBatch([{ "distinct_id": "b3ef3211-58a4-4b36-a9a1-5a55df0d9aaf", "quantity": 1, "duration": 1947490, "environment": "production", "org_id": 1, "project_id": 42, "release": "[email protected]", "retention_days": 90, "seq": 42, # abnormal counts as at least one error "errors": 1, "session_id": "8333339f-5675-4f89-a9a0-1c935255ab58", "started": started.replace(tzinfo=None), "status": 3, "received": timestamp.replace(tzinfo=None), }])
def test_ingest_session_event_max_sample_rate(self): timestamp = datetime.now(timezone.utc) started = timestamp - timedelta(hours=1) payload = { "device_family": "iPhone12,3", "distinct_id": "b3ef3211-58a4-4b36-a9a1-5a55df0d9aaf", "duration": 1947.49, "environment": "production", "org_id": 1, "os": "iOS", "os_version": "13.3.1", "project_id": 42, "release": "[email protected]", "retention_days": 90, "seq": 42, "errors": 0, "session_id": "8333339f-5675-4f89-a9a0-1c935255ab58", "started": started.timestamp(), "status": "exited", "received": timestamp.timestamp(), } meta = KafkaMessageMetadata(offset=1, partition=2) processor = SessionsProcessor() ret = processor.process_message(payload, meta) assert ret is not None assert ret.action == ProcessorAction.INSERT assert ret.data == [{ "distinct_id": "b3ef3211-58a4-4b36-a9a1-5a55df0d9aaf", "duration": 1947490, "environment": "production", "org_id": 1, "project_id": 42, "release": "[email protected]", "retention_days": 90, "seq": 42, "errors": 0, "session_id": "8333339f-5675-4f89-a9a0-1c935255ab58", "started": started.replace(tzinfo=None), "status": 1, "received": timestamp.replace(tzinfo=None), }]
dist_table_name=READ_DIST_TABLE_NAME, storage_set_key=StorageSetKey.SESSIONS, ) materialized_view_schema = TableSchema( local_table_name=READ_LOCAL_MV_NAME, dist_table_name=READ_DIST_MV_NAME, storage_set_key=StorageSetKey.SESSIONS, columns=read_columns, ) # The raw table we write onto, and that potentially we could # query. raw_storage = WritableTableStorage( storage_key=StorageKey.SESSIONS_RAW, storage_set_key=StorageSetKey.SESSIONS, schema=raw_schema, query_processors=[], stream_loader=build_kafka_stream_loader_from_settings( StorageKey.SESSIONS_RAW, processor=SessionsProcessor(), default_topic_name="ingest-sessions", ), ) # The materialized view we query aggregate data from. materialized_storage = ReadableTableStorage( storage_key=StorageKey.SESSIONS_HOURLY, storage_set_key=StorageSetKey.SESSIONS, schema=read_schema, query_processors=[PrewhereProcessor(["project_id", "org_id"])], )
def process_query(self, query: Query, request_settings: RequestSettings) -> None: # NOTE: the product side is restricted to a 6h window, however it rounds # outwards, which extends the window to 7h. from_date, to_date = get_time_range(query, "started") if not from_date or not to_date or (to_date - from_date) > timedelta(hours=7): raise ValidationException( "Minute-resolution queries are restricted to a 7-hour time window." ) # The raw table we write onto, and that potentially we could # query. raw_storage = WritableTableStorage( storage_key=StorageKey.SESSIONS_RAW, storage_set_key=StorageSetKey.SESSIONS, schema=raw_schema, query_processors=[MinuteResolutionProcessor()], mandatory_condition_checkers=[OrgIdEnforcer(), ProjectIdEnforcer()], stream_loader=build_kafka_stream_loader_from_settings( processor=SessionsProcessor(), default_topic=Topic.SESSIONS, ), ) # The materialized view we query aggregate data from. materialized_storage = ReadableTableStorage( storage_key=StorageKey.SESSIONS_HOURLY, storage_set_key=StorageSetKey.SESSIONS, schema=read_schema, query_processors=[PrewhereProcessor(["project_id", "org_id"])], mandatory_condition_checkers=[OrgIdEnforcer(), ProjectIdEnforcer()], )