def write(*, dataset: Dataset) -> RespTuple: from snuba.processor import InsertBatch rows: MutableSequence[WriterTableRow] = [] offset_base = int(round(time.time() * 1000)) for index, message in enumerate(json.loads(http_request.data)): offset = offset_base + index processed_message = ( enforce_table_writer(dataset) .get_stream_loader() .get_processor() .process_message( message, KafkaMessageMetadata( offset=offset, partition=0, timestamp=datetime.utcnow() ), ) ) if processed_message: assert isinstance(processed_message, InsertBatch) rows.extend(processed_message.rows) BatchWriterEncoderWrapper( enforce_table_writer(dataset).get_batch_writer(metrics), JSONRowEncoder(), ).write(rows) return ("ok", 200, {"Content-Type": "text/plain"})
def generate_outcomes( self, org_id: int, project_id: int, num_outcomes: int, outcome: int, time_since_base: timedelta, ) -> None: outcomes = [] for _ in range(num_outcomes): processed = (self.storage.get_table_writer().get_stream_loader( ).get_processor().process_message( { "project_id": project_id, "event_id": uuid.uuid4().hex, "timestamp": (self.base_time + time_since_base).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), "org_id": org_id, "reason": None, "key_id": 1, "outcome": outcome, }, KafkaMessageMetadata(0, 0, self.base_time), )) if processed: outcomes.append(processed) write_processed_messages(self.storage, outcomes)
def generate_sets(self) -> None: events = [] processor = self.storage.get_table_writer().get_stream_loader( ).get_processor() for n in range(self.seconds): for p in self.project_ids: msg = { "org_id": self.org_id, "project_id": p, "type": METRICS_SET_TYPE, "value": [n % self.unique_set_values], "timestamp": self.base_time.timestamp() + n, "tags": self.default_tags, "metric_id": self.metric_id, "retention_days": RETENTION_DAYS, } processed = processor.process_message( msg, KafkaMessageMetadata(0, 0, self.base_time), ) if processed: events.append(processed) write_processed_messages(self.storage, events)
def generate_transactions() -> None: from datetime import datetime table_writer = get_writable_storage( StorageKey.TRANSACTIONS).get_table_writer() rows = [] for i in range(5): raw_transaction = get_raw_transaction() # Older versions of this table did not have measurements del raw_transaction["data"]["measurements"] processed = ( table_writer.get_stream_loader().get_processor().process_message( (2, "insert", raw_transaction), KafkaMessageMetadata(0, 0, datetime.utcnow()), )) rows.extend(processed.rows) BatchWriterEncoderWrapper( table_writer.get_batch_writer(metrics=DummyMetricsBackend( strict=True)), JSONRowEncoder(), ).write(rows)
def test_metrics_processor( message: Mapping[str, Any], expected_set: Optional[Sequence[Mapping[str, Any]]], expected_counter: Optional[Sequence[Mapping[str, Any]]], expected_distributions: Optional[Sequence[Mapping[str, Any]]], ) -> None: settings.DISABLED_DATASETS = set() meta = KafkaMessageMetadata(offset=100, partition=1, timestamp=datetime(1970, 1, 1)) expected_set_result = (InsertBatch(expected_set, None) if expected_set is not None else None) assert SetsMetricsProcessor().process_message(message, meta) == expected_set_result expected_counter_result = (InsertBatch(expected_counter, None) if expected_counter is not None else None) assert (CounterMetricsProcessor().process_message( message, meta) == expected_counter_result) expected_distributions_result = (InsertBatch(expected_distributions, None) if expected_distributions is not None else None) assert (DistributionsMetricsProcessor().process_message( message, meta) == expected_distributions_result)
def process_message( processor: MessageProcessor, consumer_group: str, message: Message[KafkaPayload] ) -> Union[None, BytesInsertBatch, ReplacementBatch]: if skip_kafka_message(message): logger.warning( f"A consumer for {message.partition.topic.name} skipped a message!", extra=__message_to_dict(message), ) return None try: result = processor.process_message( rapidjson.loads(message.payload.value), KafkaMessageMetadata(message.offset, message.partition.index, message.timestamp), ) except Exception as err: logger.error(err, exc_info=True) raise InvalidMessages( [__invalid_kafka_message(message, consumer_group, err)]) from err if isinstance(result, InsertBatch): return BytesInsertBatch( [json_row_encoder.encode(row) for row in result.rows], result.origin_timestamp, ) else: return result
def test_missing_trace_context(self) -> None: start, finish = self.__get_timestamps() message = TransactionEvent( event_id="e5e062bf2e1d4afd96fd2f90b6770431", trace_id="7400045b25c443b885914600aa83ad04", span_id="8841662216cc598b", transaction_name="/organizations/:orgId/issues/", status="cancelled", op="navigation", timestamp=finish, start_timestamp=start, platform="python", dist="", user_name="me", user_id="myself", user_email="*****@*****.**", ipv4="127.0.0.1", ipv6=None, environment="prod", release="34a554c14b68285d8a8eb6c5c4c56dfc1db9a83a", sdk_name="sentry.python", sdk_version="0.9.0", http_method="POST", http_referer="tagstore.something", geo={"country_code": "XY", "region": "fake_region", "city": "fake_city"}, ) payload = message.serialize() # Force an invalid event del payload[2]["data"]["contexts"] meta = KafkaMessageMetadata( offset=1, partition=2, timestamp=datetime(1970, 1, 1) ) processor = TransactionsMessageProcessor() assert processor.process_message(payload, meta) is None
def test_process_message(self) -> None: meta = KafkaMessageMetadata(offset=0, partition=0, timestamp=datetime(1970, 1, 1)) message = ReplayEvent( replay_id="e5e062bf2e1d4afd96fd2f90b6770431", title="/organizations/:orgId/issues/", trace_ids=[ "36e980a9-c602-4cde-9f5d-089f15b83b5f", "8bea4461-d8b9-44f3-93c1-5a3cb1c4169a", ], sequence_id=0, timestamp=datetime.now(tz=timezone.utc).timestamp(), platform="python", dist="", user_name="me", user_id="232", user_email="*****@*****.**", ipv4="127.0.0.1", ipv6=None, environment="prod", release="34a554c14b68285d8a8eb6c5c4c56dfc1db9a83a", sdk_name="sentry.python", sdk_version="0.9.0", ) assert ReplaysProcessor().process_message( message.serialize(), meta) == InsertBatch([message.build_result(meta)], None)
def process_message_multistorage( message: Message[MultistorageKafkaPayload], ) -> Sequence[Tuple[StorageKey, Union[None, JSONRowInsertBatch, ReplacementBatch]]]: # XXX: Avoid circular import on KafkaMessageMetadata, remove when that type # is itself removed. from snuba.datasets.storages.factory import get_writable_storage value = rapidjson.loads(message.payload.payload.value) metadata = KafkaMessageMetadata(message.offset, message.partition.index, message.timestamp) results: MutableSequence[Tuple[StorageKey, Union[None, JSONRowInsertBatch, ReplacementBatch]]] = [] for storage_key in message.payload.storage_keys: result = (get_writable_storage(storage_key).get_table_writer( ).get_stream_loader().get_processor().process_message(value, metadata)) if isinstance(result, InsertBatch): results.append(( storage_key, JSONRowInsertBatch( [json_row_encoder.encode(row) for row in result.rows], result.origin_timestamp, ), )) else: results.append((storage_key, result)) return results
def generate_uniform_distributions(self) -> None: events = [] processor = self.storage.get_table_writer().get_stream_loader( ).get_processor() value_array = list(range(self.d_range_min, self.d_range_max)) for n in range(self.seconds): for p in self.project_ids: msg = { "org_id": self.org_id, "project_id": p, "type": METRICS_DISTRIBUTIONS_TYPE, "value": value_array, "timestamp": self.base_time.timestamp() + n, "tags": self.default_tags, "metric_id": self.metric_id, "retention_days": RETENTION_DAYS, } processed = processor.process_message( msg, KafkaMessageMetadata(0, 0, self.base_time), ) if processed: events.append(processed) write_processed_messages(self.storage, events)
def setup_method(self, test_method): self.metadata = KafkaMessageMetadata(0, 0, datetime.now()) self.event = get_raw_event() self.processor = (get_writable_storage( StorageKey.EVENTS).get_table_writer().get_stream_loader(). get_processor())
def test_messages(self) -> None: processor = GroupedMessageProcessor("sentry_groupedmessage") metadata = KafkaMessageMetadata(offset=42, partition=0, timestamp=datetime(1970, 1, 1)) ret = processor.process_message(self.INSERT_MSG, metadata) assert ret == InsertBatch([self.PROCESSED], datetime(2019, 9, 19, 0, 17, 21, 447870, tzinfo=pytz.UTC)) write_processed_messages(self.storage, [ret]) ret = (get_cluster(StorageSetKey.EVENTS).get_query_connection( ClickhouseClientSettings.INSERT).execute( "SELECT * FROM groupedmessage_local;")) assert ret[0] == ( 42, # offset 0, # deleted 2, # project_id 74, # id 0, # status datetime(2019, 6, 19, 6, 46, 28), datetime(2019, 6, 19, 6, 45, 32), datetime(2019, 6, 19, 6, 45, 32), None, ) ret = processor.process_message(self.UPDATE_MSG, metadata) assert ret == InsertBatch([self.PROCESSED], datetime(2019, 9, 19, 0, 17, 21, 447870, tzinfo=pytz.UTC)) ret = processor.process_message(self.DELETE_MSG, metadata) assert ret == InsertBatch([self.DELETED], datetime(2019, 9, 19, 0, 17, 21, 447870, tzinfo=pytz.UTC))
def test_processors_of_multistorage_consumer_are_idempotent( message: Tuple[int, str, InsertEvent], processor: MessageProcessor) -> None: """ Test that when the same message is provided to the processors, the result would be the same. That is the process message operation is idempotent. """ metadata = KafkaMessageMetadata(1000, 1, datetime.now()) result1 = processor.process_message(message, metadata) result2 = processor.process_message(message, metadata) assert result1 == result2
def generate_session_events(self, org_id, project_id: int) -> None: processor = self.storage.get_table_writer().get_stream_loader().get_processor() meta = KafkaMessageMetadata( offset=1, partition=2, timestamp=datetime(1970, 1, 1) ) distinct_id = uuid4().hex template = { "session_id": uuid4().hex, "distinct_id": distinct_id, "duration": None, "environment": "production", "org_id": org_id, "project_id": project_id, "release": "[email protected]", "retention_days": settings.DEFAULT_RETENTION_DAYS, "seq": 0, "errors": 0, "received": datetime.utcnow().timestamp(), "started": self.started.timestamp(), } events = [ processor.process_message( { **template, "status": "exited", "duration": 1947.49, "session_id": uuid4().hex, "started": (self.started + timedelta(minutes=13)).timestamp(), }, meta, ), processor.process_message( {**template, "status": "exited", "quantity": 5}, meta, ), processor.process_message( {**template, "status": "errored", "errors": 1, "quantity": 2}, meta, ), processor.process_message( { **template, "distinct_id": distinct_id, "status": "errored", "errors": 1, "quantity": 2, "started": (self.started + timedelta(minutes=24)).timestamp(), }, meta, ), ] filtered = [e for e in events if e] write_processed_messages(self.storage, filtered)
def write_unprocessed_events(storage: WritableStorage, events: Sequence[InsertEvent]) -> None: processor = storage.get_table_writer().get_stream_loader().get_processor() processed_messages = [] for i, event in enumerate(events): processed_message = processor.process_message( (2, "insert", event, {}), KafkaMessageMetadata(i, 0, datetime.now())) assert processed_message is not None processed_messages.append(processed_message) write_processed_messages(storage, processed_messages)
def process_message( processor: MessageProcessor, message: Message[KafkaPayload] ) -> Union[None, JSONRowInsertBatch, ReplacementBatch]: result = processor.process_message( rapidjson.loads(message.payload.value), KafkaMessageMetadata(message.offset, message.partition.index, message.timestamp), ) if isinstance(result, InsertBatch): return JSONRowInsertBatch( [json_row_encoder.encode(row) for row in result.rows], result.origin_timestamp, ) else: return result
def test_ingest_session_event_abnormal(self) -> None: timestamp = datetime.now(timezone.utc) started = timestamp - timedelta(hours=1) payload = { "device_family": "iPhone12,3", "distinct_id": "b3ef3211-58a4-4b36-a9a1-5a55df0d9aaf", "duration": 1947.49, "environment": "production", "org_id": 1, "os": "iOS", "os_version": "13.3.1", "project_id": 42, "release": "[email protected]", "retention_days": 90, "seq": 42, "errors": 0, "session_id": "8333339f-5675-4f89-a9a0-1c935255ab58", "started": started.timestamp(), "status": "abnormal", "received": timestamp.timestamp(), } meta = KafkaMessageMetadata(offset=1, partition=2, timestamp=datetime(1970, 1, 1)) assert SessionsProcessor().process_message( payload, meta) == InsertBatch( [{ "distinct_id": "b3ef3211-58a4-4b36-a9a1-5a55df0d9aaf", "quantity": 1, "duration": 1947490, "environment": "production", "org_id": 1, "project_id": 42, "release": "[email protected]", "retention_days": 90, "seq": 42, # abnormal counts as at least one error "errors": 1, "session_id": "8333339f-5675-4f89-a9a0-1c935255ab58", "started": started.replace(tzinfo=None), "status": 3, "received": timestamp.replace(tzinfo=None), }], None, )
def process_message_multistorage( message: Message[MultistorageKafkaPayload], ) -> MultistorageProcessedMessage: value = rapidjson.loads(message.payload.payload.value) metadata = KafkaMessageMetadata(message.offset, message.partition.index, message.timestamp) results: MutableSequence[Tuple[StorageKey, Union[None, BytesInsertBatch, ReplacementBatch]]] = [] for index, storage_key in enumerate(message.payload.storage_keys): result = _process_message_multistorage_work(metadata=metadata, storage_key=storage_key, storage_message=value) results.append((storage_key, result)) return results
def generate_outcomes( self, org_id: int, project_id: int, num_outcomes: int, outcome: int, time_since_base: timedelta, category: Optional[int], quantity: Optional[int] = None, ) -> None: outcomes = [] for _ in range(num_outcomes): message = { "project_id": project_id, "event_id": uuid.uuid4().hex, "timestamp": (self.base_time + time_since_base).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), "org_id": org_id, "reason": None, "key_id": 1, "outcome": outcome, "category": category, "quantity": quantity, } if message["category"] is None: del message["category"] # for testing None category case if message["quantity"] is None: del message["quantity"] # for testing None quantity case processed = (self.storage.get_table_writer().get_stream_loader(). get_processor().process_message( message, KafkaMessageMetadata(0, 0, self.base_time), )) if processed: outcomes.append(processed) write_processed_messages(self.storage, outcomes)
def test_messages(self) -> None: processor = GroupAssigneeProcessor("sentry_groupasignee") metadata = KafkaMessageMetadata( offset=42, partition=0, timestamp=datetime(1970, 1, 1) ) ret = processor.process_message(self.INSERT_MSG, metadata) assert ret == InsertBatch( [self.PROCESSED], datetime(2019, 9, 19, 0, 17, 55, 32443, tzinfo=pytz.UTC) ) write_processed_messages(self.storage, [ret]) ret = ( self.storage.get_cluster() .get_query_connection(ClickhouseClientSettings.QUERY) .execute("SELECT * FROM groupassignee_local;") .results ) assert ret[0] == ( 42, # offset 0, # deleted 2, # project_id 1359, # group_id datetime(2019, 9, 19, 0, 17, 55), 1, # user_id None, # team_id ) ret = processor.process_message(self.UPDATE_MSG_NO_KEY_CHANGE, metadata) assert ret == InsertBatch( [self.PROCESSED], datetime(2019, 9, 19, 0, 6, 56, 376853, tzinfo=pytz.UTC) ) # Tests an update with key change which becomes a two inserts: # one deletion and the insertion of the new row. ret = processor.process_message(self.UPDATE_MSG_WITH_KEY_CHANGE, metadata) assert ret == InsertBatch( [self.DELETED, self.PROCESSED_UPDATE], datetime(2019, 9, 19, 0, 6, 56, 376853, tzinfo=pytz.UTC), ) ret = processor.process_message(self.DELETE_MSG, metadata) assert ret == InsertBatch( [self.DELETED], datetime(2019, 9, 19, 0, 17, 21, 447870, tzinfo=pytz.UTC) )
def test_metrics_polymorphic_processor( message: Mapping[str, Any], expected_output: Optional[Sequence[Mapping[str, Any]]], ) -> None: settings.DISABLED_DATASETS = set() meta = KafkaMessageMetadata(offset=100, partition=1, timestamp=datetime(1970, 1, 1)) # test_time_bucketing tests the bucket function, parameterizing the output times here # would require repeating the code in the class we're testing with patch( "snuba.datasets.metrics_aggregate_processor.timestamp_to_bucket", lambda _, __: MOCK_TIME_BUCKET, ): expected_polymorphic_result = (AggregateInsertBatch( expected_output, None) if expected_output is not None else None) assert (PolymorphicMetricsProcessor().process_message( message, meta) == expected_polymorphic_result)
def test_send_message(xid: int, expected: Optional[ProcessedMessage]) -> None: processor = (get_writable_storage(StorageKey.GROUPEDMESSAGES). get_table_writer().get_stream_loader().get_processor()) worker = SnapshotProcessor( processor=processor, snapshot_id=SnapshotId(str(uuid1())), transaction_data=TransactionData(xmin=Xid(100), xmax=Xid(200), xip_list=[Xid(120), Xid(130)]), ) ret = worker.process_message( get_insert_event(xid), KafkaMessageMetadata(offset=1, partition=0, timestamp=datetime.now()), ) assert ret == expected
def test_span_process() -> None: timestamp = datetime.now(tz=timezone.utc) - timedelta(seconds=5) start_timestamp = timestamp - timedelta(seconds=4) message = SpanEvent( event_id="e5e062bf2e1d4afd96fd2f90b6770431", trace_id="7400045b25c443b885914600aa83ad04", span_id="8841662216cc598b", parent_span_id="b76a8ca0b0908a15", transaction_name="/organizations/:orgId/issues/", op="navigation", timestamp=timestamp.timestamp(), start_timestamp=start_timestamp.timestamp(), spans=[ SpanData( trace_id="7400045b25c443b885914600aa83ad04", span_id="b95eff64930fef25", parent_span_id="8841662216cc598b", op="db", start_timestamp=(start_timestamp + timedelta(seconds=1)).timestamp(), timestamp=(start_timestamp + timedelta(seconds=2)).timestamp(), ), SpanData( trace_id="7400045b25c443b885914600aa83ad04", span_id="9f8e7bbe7bf22e09", parent_span_id="b95eff64930fef25", op="web", start_timestamp=(start_timestamp + timedelta(seconds=2)).timestamp(), timestamp=(start_timestamp + timedelta(seconds=3)).timestamp(), ), ], ) meta = KafkaMessageMetadata(offset=1, partition=2, timestamp=datetime(1970, 1, 1)) processed = SpansMessageProcessor().process_message( message.serialize(), meta) assert isinstance(processed, InsertBatch) expected_rows = message.build_result(meta) for span, expected in zip(processed.rows, expected_rows): assert span == expected
def process_message_multistorage_identical_storages( message: Message[MultistorageKafkaPayload], ) -> MultistorageProcessedMessage: """ This method is similar to process_message_multistorage except for a minor difference. It performs an optimization where it avoids processing a message multiple times if the it finds that the storages on which data needs to be written are identical. This is a performance optimization since we remove the message processing time completely for all identical storages like errors and errors_v2. It is possible that the storage keys in the message could be a mix of identical and non-identical storages. This method takes into account that scenario as well. The reason why this method has been created rather than modifying the existing process_message_multistorage is to avoid doing a check for every message in cases where there are no identical storages like metrics. """ value = rapidjson.loads(message.payload.payload.value) metadata = KafkaMessageMetadata(message.offset, message.partition.index, message.timestamp) intermediate_results: MutableMapping[StorageKey, Union[None, BytesInsertBatch, ReplacementBatch]] = {} for index, storage_key in enumerate(message.payload.storage_keys): result = None for other_storage_key, insert_batch in intermediate_results.items(): if are_writes_identical(storage_key, other_storage_key): result = insert_batch break if result is None: result = _process_message_multistorage_work( metadata=metadata, storage_key=storage_key, storage_message=value, ) intermediate_results[storage_key] = result return list(intermediate_results.items())
def generate_counters(self) -> None: events = [] for n in range(self.seconds): for p in self.project_ids: processed = (self.storage.get_table_writer().get_stream_loader( ).get_processor().process_message( ({ "org_id": self.org_id, "project_id": p, "unit": "ms", "type": METRICS_COUNTERS_TYPE, "value": 1.0, "timestamp": self.base_time.timestamp() + n, "tags": self.default_tags, "metric_id": self.metric_id, "retention_days": RETENTION_DAYS, }), KafkaMessageMetadata(0, 0, self.base_time), )) if processed: events.append(processed) write_processed_messages(self.storage, events)
def test_base_process(self) -> None: old_skip_context = settings.TRANSACT_SKIP_CONTEXT_STORE settings.TRANSACT_SKIP_CONTEXT_STORE = {1: {"experiments"}} start, finish = self.__get_timestamps() message = TransactionEvent( event_id="e5e062bf2e1d4afd96fd2f90b6770431", trace_id="7400045b25c443b885914600aa83ad04", span_id="8841662216cc598b", transaction_name="/organizations/:orgId/issues/", status="cancelled", op="navigation", timestamp=finish, start_timestamp=start, platform="python", dist="", user_name="me", user_id="myself", user_email="*****@*****.**", ipv4="127.0.0.1", ipv6=None, environment="prod", release="34a554c14b68285d8a8eb6c5c4c56dfc1db9a83a", sdk_name="sentry.python", sdk_version="0.9.0", http_method="POST", http_referer="tagstore.something", geo={"country_code": "XY", "region": "fake_region", "city": "fake_city"}, ) meta = KafkaMessageMetadata( offset=1, partition=2, timestamp=datetime(1970, 1, 1) ) assert TransactionsMessageProcessor().process_message( message.serialize(), meta ) == InsertBatch([message.build_result(meta)], None) settings.TRANSACT_SKIP_CONTEXT_STORE = old_skip_context
def test_metrics_aggregate_processor( message: Mapping[str, Any], expected_set: Optional[Sequence[Mapping[str, Any]]], expected_counter: Optional[Sequence[Mapping[str, Any]]], expected_distributions: Optional[Sequence[Mapping[str, Any]]], ) -> None: settings.DISABLED_DATASETS = set() settings.WRITE_METRICS_AGG_DIRECTLY = True meta = KafkaMessageMetadata(offset=100, partition=1, timestamp=datetime(1970, 1, 1)) expected_set_result = (AggregateInsertBatch(expected_set, None) if expected_set is not None else None) # test_time_bucketing tests the bucket function, parameterizing the output times here # would require repeating the code in the class we're testing with patch( "snuba.datasets.metrics_aggregate_processor.timestamp_to_bucket", lambda _, __: MOCK_TIME_BUCKET, ): assert (SetsAggregateProcessor().process_message( message, meta) == expected_set_result) expected_counter_result = (AggregateInsertBatch( expected_counter, None) if expected_counter is not None else None) assert (CounterAggregateProcessor().process_message( message, meta) == expected_counter_result) expected_distributions_result = (AggregateInsertBatch( expected_distributions, None) if expected_distributions is not None else None) assert (DistributionsAggregateProcessor().process_message( message, meta) == expected_distributions_result) settings.WRITE_METRICS_AGG_DIRECTLY = False
def generate_fizzbuzz_events(self) -> None: """ Generate a deterministic set of events across a time range. """ events = [] for tick in range(self.minutes): tock = tick + 1 for p in self.project_ids: # project N sends an event every Nth minute if tock % p == 0: trace_id = "7400045b25c443b885914600aa83ad04" span_id = "8841662216cc598b" processed = ( self.storage.get_table_writer().get_stream_loader(). get_processor().process_message( ( 2, "insert", { "project_id": p, "event_id": uuid.uuid4().hex, "deleted": 0, "datetime": (self.base_time + timedelta(minutes=tick)).isoformat(), "platform": self.platforms[(tock * p) % len(self.platforms)], "retention_days": settings.DEFAULT_RETENTION_DAYS, "data": { # Project N sends every Nth (mod len(hashes)) hash (and platform) "received": calendar.timegm( (self.base_time + timedelta( minutes=tick)).timetuple()), "type": "transaction", "transaction": "/api/do_things", "start_timestamp": datetime.timestamp( (self.base_time + timedelta(minutes=tick))), "timestamp": datetime.timestamp( (self.base_time + timedelta( minutes=tick, seconds=1))), "tags": { # Sentry "environment": self.environments[(tock * p) % len( self.environments)], "sentry:release": str(tick), "sentry:dist": "dist1", # User "foo": "baz", "foo.bar": "qux", "os_name": "linux", }, "user": { "email": "*****@*****.**", "ip_address": "8.8.8.8", }, "contexts": { "trace": { "trace_id": trace_id, "span_id": span_id, "op": "http", "status": "0", }, }, "measurements": { "lcp": { "value": 32.129 }, "lcp.elementSize": { "value": 4242 }, }, "breakdowns": { "span_ops": { "ops.db": { "value": 62.512 }, "ops.http": { "value": 109.774 }, "total.time": { "value": 172.286 }, } }, "spans": [{ "op": "db", "trace_id": trace_id, "span_id": span_id + "1", "parent_span_id": None, "same_process_as_parent": True, "description": "SELECT * FROM users", "data": {}, "timestamp": calendar.timegm( (self.base_time + timedelta(minutes=tick) ).timetuple()), }], }, }, ), KafkaMessageMetadata(0, 0, self.base_time), )) if processed: events.append(processed) write_processed_messages(self.storage, events)
def test_error_processor() -> None: received_timestamp = datetime.now() - timedelta(minutes=1) error_timestamp = received_timestamp - timedelta(minutes=1) trace_id = str(uuid.uuid4()) span_id = "deadbeef" error = ( 2, "insert", InsertEvent({ "organization_id": 1, "retention_days": 58, "event_id": "dcb9d002cac548c795d1c9adbfc68040", "group_id": 100, "project_id": 300688, "platform": "python", "message": "", "datetime": error_timestamp.strftime(PAYLOAD_DATETIME_FORMAT), "primary_hash": "04233d08ac90cf6fc015b1be5932e7e2", "data": { "event_id": "dcb9d002cac548c795d1c9adbfc68040", "project_id": 300688, "release": None, "dist": None, "platform": "python", "message": "", "datetime": error_timestamp.strftime(PAYLOAD_DATETIME_FORMAT), "tags": [ ["handled", "no"], ["level", "error"], ["mechanism", "excepthook"], ["runtime", "CPython 3.7.6"], ["runtime.name", "CPython"], ["server_name", "snuba"], ["environment", "dev"], ["sentry:user", "this_is_me"], ["sentry:release", "4d23338017cdee67daf25f2c"], ], "user": { "username": "******", "ip_address": "127.0.0.1", "id": "still_me", "email": "*****@*****.**", "geo": { "country_code": "XY", "region": "fake_region", "city": "fake_city", }, }, "request": { "url": "http://127.0.0.1:/query", "headers": [ ["Accept-Encoding", "identity"], ["Content-Length", "398"], ["Host", "127.0.0.1:"], ["Referer", "tagstore.something"], ["Trace", "8fa73032d-1"], ], "data": "", "method": "POST", "env": { "SERVER_PORT": "1010", "SERVER_NAME": "snuba" }, }, "_relay_processed": True, "breadcrumbs": { "values": [ { "category": "snuba.utils.streams.batching", "level": "info", "timestamp": error_timestamp.timestamp(), "data": { "asctime": error_timestamp.strftime( PAYLOAD_DATETIME_FORMAT) }, "message": "New partitions assigned: {}", "type": "default", }, { "category": "snuba.utils.streams.batching", "level": "info", "timestamp": error_timestamp.timestamp(), "data": { "asctime": error_timestamp.strftime( PAYLOAD_DATETIME_FORMAT) }, "message": "Flushing ", "type": "default", }, { "category": "httplib", "timestamp": error_timestamp.timestamp(), "type": "http", "data": { "url": "http://127.0.0.1:8123/", "status_code": 500, "reason": "Internal Server Error", "method": "POST", }, "level": "info", }, ] }, "contexts": { "runtime": { "version": "3.7.6", "type": "runtime", "name": "CPython", "build": "3.7.6", }, "trace": { "trace_id": trace_id, "span_id": span_id }, }, "culprit": "snuba.clickhouse.http in write", "exception": { "values": [{ "stacktrace": { "frames": [ { "function": "<module>", "abs_path": "/usr/local/bin/snuba", "pre_context": [ "from pkg_resources import load_entry_point", "", "if __name__ == '__main__':", " sys.argv[0] = re.sub(r'(-script\\.pyw?|\\.exe)?$', '', sys.argv[0])", " sys.exit(", ], "post_context": [" )"], "vars": { "__spec__": "None", "__builtins__": "<module 'builtins' (built-in)>", "__annotations__": {}, "__file__": "'/usr/local/bin/snuba'", "__loader__": "<_frozen_importlib_external.SourceFileLoader object at 0x7fbbc3a36ed0>", "__requires__": "'snuba'", "__cached__": "None", "__name__": "'__main__'", "__package__": "None", "__doc__": "None", }, "module": "__main__", "filename": "snuba", "lineno": 11, "in_app": False, "data": { "orig_in_app": 1 }, "context_line": " load_entry_point('snuba', 'console_scripts', 'snuba')()", }, ] }, "type": "ClickHouseError", "module": "snuba.clickhouse.http", "value": "[171] DB::Exception: Block structure mismatch", "mechanism": { "type": "excepthook", "handled": False }, }] }, "extra": { "sys.argv": [ "/usr/local/bin/snuba", "consumer", "--dataset", "transactions", ] }, "fingerprint": ["{{ default }}"], "hashes": ["c8b21c571231e989060b9110a2ade7d3"], "hierarchical_hashes": [ "04233d08ac90cf6fc015b1be5932e7e3", "04233d08ac90cf6fc015b1be5932e7e4", ], "key_id": "537125", "level": "error", "location": "snuba/clickhouse/http.py", "logger": "", "metadata": { "function": "write", "type": "ClickHouseError", "value": "[171] DB::Exception: Block structure mismatch", "filename": "snuba/something.py", }, "modules": { "cffi": "1.13.2", "ipython-genutils": "0.2.0", "isodate": "0.6.0", }, "received": received_timestamp.timestamp(), "sdk": { "version": "0.0.0.0.1", "name": "sentry.python", "packages": [{ "version": "0.0.0.0.1", "name": "pypi:sentry-sdk" }], "integrations": [ "argv", "atexit", "dedupe", "excepthook", "logging", "modules", "stdlib", "threading", ], }, "timestamp": error_timestamp.timestamp(), "title": "ClickHouseError: [171] DB::Exception: Block structure mismatch", "type": "error", "version": "7", }, }), None, ) expected_result = { "project_id": 300688, "timestamp": error_timestamp, "event_id": str(UUID("dcb9d002cac548c795d1c9adbfc68040")), "platform": "python", "dist": None, "environment": "dev", "release": "4d23338017cdee67daf25f2c", "ip_address_v4": "127.0.0.1", "user": "******", "user_name": "me", "user_id": "still_me", "user_email": "*****@*****.**", "sdk_name": "sentry.python", "sdk_version": "0.0.0.0.1", "http_method": "POST", "http_referer": "tagstore.something", "trace_id": trace_id, "span_id": int(span_id, 16), "tags.key": [ "environment", "handled", "level", "mechanism", "runtime", "runtime.name", "sentry:release", "sentry:user", "server_name", ], "tags.value": [ "dev", "no", "error", "excepthook", "CPython 3.7.6", "CPython", "4d23338017cdee67daf25f2c", "this_is_me", "snuba", ], "contexts.key": [ "runtime.version", "runtime.name", "runtime.build", "trace.trace_id", "trace.span_id", "geo.country_code", "geo.region", "geo.city", ], "contexts.value": [ "3.7.6", "CPython", "3.7.6", trace_id, span_id, "XY", "fake_region", "fake_city", ], "partition": 1, "offset": 2, "message_timestamp": datetime(1970, 1, 1), "retention_days": 90, "deleted": 0, "group_id": 100, "primary_hash": str(UUID("04233d08ac90cf6fc015b1be5932e7e2")), "hierarchical_hashes": [ str(UUID("04233d08ac90cf6fc015b1be5932e7e3")), str(UUID("04233d08ac90cf6fc015b1be5932e7e4")), ], "received": received_timestamp.astimezone(pytz.utc).replace(tzinfo=None, microsecond=0), "message": "", "title": "ClickHouseError: [171] DB::Exception: Block structure mismatch", "culprit": "snuba.clickhouse.http in write", "level": "error", "location": "snuba/clickhouse/http.py", "version": "7", "type": "error", "exception_stacks.type": ["ClickHouseError"], "exception_stacks.value": ["[171] DB::Exception: Block structure mismatch"], "exception_stacks.mechanism_type": ["excepthook"], "exception_stacks.mechanism_handled": [False], "exception_frames.abs_path": ["/usr/local/bin/snuba"], "exception_frames.colno": [None], "exception_frames.filename": ["snuba"], "exception_frames.lineno": [11], "exception_frames.in_app": [False], "exception_frames.package": [None], "exception_frames.module": ["__main__"], "exception_frames.function": ["<module>"], "exception_frames.stack_level": [0], "sdk_integrations": [ "argv", "atexit", "dedupe", "excepthook", "logging", "modules", "stdlib", "threading", ], "modules.name": ["cffi", "ipython-genutils", "isodate"], "modules.version": ["1.13.2", "0.2.0", "0.6.0"], "transaction_name": "", } meta = KafkaMessageMetadata(offset=2, partition=1, timestamp=datetime(1970, 1, 1)) processor = ErrorsProcessor({ "environment": "environment", "sentry:release": "release", "sentry:dist": "dist", "sentry:user": "******", "transaction": "transaction_name", "level": "level", }) processed_message = processor.process_message(error, meta) expected_message = InsertBatch([expected_result], None) # assert on the rows first so we get a nice diff from pytest assert processed_message.rows[0] == expected_message.rows[0] assert processed_message == expected_message
def test_simple() -> None: request_body = { "selected_columns": ["event_id"], "orderby": "event_id", "sample": 0.1, "limit": 100, "offset": 50, "project": 1, } query = Query( Entity(EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model())) request = Request( id=uuid.UUID("a" * 32).hex, original_body=request_body, query=query, snql_anonymized="", query_settings=HTTPQuerySettings(referrer="search"), attribution_info=AttributionInfo(get_app_id("default"), "search", None, None, None), ) time = TestingClock() timer = Timer("test", clock=time) time.sleep(0.01) message = SnubaQueryMetadata( request=request, start_timestamp=datetime.utcnow() - timedelta(days=3), end_timestamp=datetime.utcnow(), dataset="events", timer=timer, query_list=[ ClickhouseQueryMetadata( sql= "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100", sql_anonymized= "select event_id from sentry_dist sample 0.1 prewhere project_id in ($I) limit 50, 100", start_timestamp=datetime.utcnow() - timedelta(days=3), end_timestamp=datetime.utcnow(), stats={ "sample": 10, "error_code": 386 }, status=QueryStatus.SUCCESS, profile=ClickhouseQueryProfile( time_range=10, table="events", all_columns={"timestamp", "tags"}, multi_level_condition=False, where_profile=FilterProfile( columns={"timestamp"}, mapping_cols={"tags"}, ), groupby_cols=set(), array_join_cols=set(), ), trace_id="b" * 32, ) ], projects={2}, snql_anonymized=request.snql_anonymized, entity=EntityKey.EVENTS.value, ).to_dict() processor = (get_writable_storage(StorageKey.QUERYLOG).get_table_writer(). get_stream_loader().get_processor()) assert processor.process_message( message, KafkaMessageMetadata(0, 0, datetime.now()) ) == InsertBatch( [{ "request_id": str(uuid.UUID("a" * 32)), "request_body": '{"limit": 100, "offset": 50, "orderby": "event_id", "project": 1, "sample": 0.1, "selected_columns": ["event_id"]}', "referrer": "search", "dataset": "events", "projects": [2], "organization": None, "timestamp": timer.for_json()["timestamp"], "duration_ms": 10, "status": "success", "clickhouse_queries.sql": [ "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100" ], "clickhouse_queries.status": ["success"], "clickhouse_queries.trace_id": [str(uuid.UUID("b" * 32))], "clickhouse_queries.duration_ms": [0], "clickhouse_queries.stats": ['{"error_code": 386, "sample": 10}'], "clickhouse_queries.final": [0], "clickhouse_queries.cache_hit": [0], "clickhouse_queries.sample": [10.0], "clickhouse_queries.max_threads": [0], "clickhouse_queries.num_days": [10], "clickhouse_queries.clickhouse_table": [""], "clickhouse_queries.query_id": [""], "clickhouse_queries.is_duplicate": [0], "clickhouse_queries.consistent": [0], "clickhouse_queries.all_columns": [["tags", "timestamp"]], "clickhouse_queries.or_conditions": [False], "clickhouse_queries.where_columns": [["timestamp"]], "clickhouse_queries.where_mapping_columns": [["tags"]], "clickhouse_queries.groupby_columns": [[]], "clickhouse_queries.array_join_columns": [[]], }], None, )