Example #1
0
    def write(*, dataset: Dataset) -> RespTuple:
        from snuba.processor import InsertBatch

        rows: MutableSequence[WriterTableRow] = []
        offset_base = int(round(time.time() * 1000))
        for index, message in enumerate(json.loads(http_request.data)):
            offset = offset_base + index
            processed_message = (
                enforce_table_writer(dataset)
                .get_stream_loader()
                .get_processor()
                .process_message(
                    message,
                    KafkaMessageMetadata(
                        offset=offset, partition=0, timestamp=datetime.utcnow()
                    ),
                )
            )
            if processed_message:
                assert isinstance(processed_message, InsertBatch)
                rows.extend(processed_message.rows)

        BatchWriterEncoderWrapper(
            enforce_table_writer(dataset).get_batch_writer(metrics), JSONRowEncoder(),
        ).write(rows)

        return ("ok", 200, {"Content-Type": "text/plain"})
Example #2
0
    def generate_outcomes(
        self,
        org_id: int,
        project_id: int,
        num_outcomes: int,
        outcome: int,
        time_since_base: timedelta,
    ) -> None:
        outcomes = []
        for _ in range(num_outcomes):
            processed = (self.storage.get_table_writer().get_stream_loader(
            ).get_processor().process_message(
                {
                    "project_id":
                    project_id,
                    "event_id":
                    uuid.uuid4().hex,
                    "timestamp":
                    (self.base_time +
                     time_since_base).strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
                    "org_id":
                    org_id,
                    "reason":
                    None,
                    "key_id":
                    1,
                    "outcome":
                    outcome,
                },
                KafkaMessageMetadata(0, 0, self.base_time),
            ))
            if processed:
                outcomes.append(processed)

        write_processed_messages(self.storage, outcomes)
Example #3
0
    def generate_sets(self) -> None:
        events = []
        processor = self.storage.get_table_writer().get_stream_loader(
        ).get_processor()

        for n in range(self.seconds):
            for p in self.project_ids:
                msg = {
                    "org_id": self.org_id,
                    "project_id": p,
                    "type": METRICS_SET_TYPE,
                    "value": [n % self.unique_set_values],
                    "timestamp": self.base_time.timestamp() + n,
                    "tags": self.default_tags,
                    "metric_id": self.metric_id,
                    "retention_days": RETENTION_DAYS,
                }

                processed = processor.process_message(
                    msg,
                    KafkaMessageMetadata(0, 0, self.base_time),
                )
                if processed:
                    events.append(processed)
        write_processed_messages(self.storage, events)
Example #4
0
def generate_transactions() -> None:
    from datetime import datetime

    table_writer = get_writable_storage(
        StorageKey.TRANSACTIONS).get_table_writer()

    rows = []

    for i in range(5):
        raw_transaction = get_raw_transaction()
        # Older versions of this table did not have measurements
        del raw_transaction["data"]["measurements"]

        processed = (
            table_writer.get_stream_loader().get_processor().process_message(
                (2, "insert", raw_transaction),
                KafkaMessageMetadata(0, 0, datetime.utcnow()),
            ))
        rows.extend(processed.rows)

    BatchWriterEncoderWrapper(
        table_writer.get_batch_writer(metrics=DummyMetricsBackend(
            strict=True)),
        JSONRowEncoder(),
    ).write(rows)
def test_metrics_processor(
    message: Mapping[str, Any],
    expected_set: Optional[Sequence[Mapping[str, Any]]],
    expected_counter: Optional[Sequence[Mapping[str, Any]]],
    expected_distributions: Optional[Sequence[Mapping[str, Any]]],
) -> None:
    settings.DISABLED_DATASETS = set()

    meta = KafkaMessageMetadata(offset=100,
                                partition=1,
                                timestamp=datetime(1970, 1, 1))

    expected_set_result = (InsertBatch(expected_set, None)
                           if expected_set is not None else None)
    assert SetsMetricsProcessor().process_message(message,
                                                  meta) == expected_set_result

    expected_counter_result = (InsertBatch(expected_counter, None)
                               if expected_counter is not None else None)
    assert (CounterMetricsProcessor().process_message(
        message, meta) == expected_counter_result)

    expected_distributions_result = (InsertBatch(expected_distributions, None)
                                     if expected_distributions is not None else
                                     None)
    assert (DistributionsMetricsProcessor().process_message(
        message, meta) == expected_distributions_result)
Example #6
0
def process_message(
    processor: MessageProcessor, consumer_group: str,
    message: Message[KafkaPayload]
) -> Union[None, BytesInsertBatch, ReplacementBatch]:

    if skip_kafka_message(message):
        logger.warning(
            f"A consumer for {message.partition.topic.name} skipped a message!",
            extra=__message_to_dict(message),
        )
        return None

    try:
        result = processor.process_message(
            rapidjson.loads(message.payload.value),
            KafkaMessageMetadata(message.offset, message.partition.index,
                                 message.timestamp),
        )
    except Exception as err:
        logger.error(err, exc_info=True)
        raise InvalidMessages(
            [__invalid_kafka_message(message, consumer_group, err)]) from err

    if isinstance(result, InsertBatch):
        return BytesInsertBatch(
            [json_row_encoder.encode(row) for row in result.rows],
            result.origin_timestamp,
        )
    else:
        return result
Example #7
0
    def test_missing_trace_context(self) -> None:
        start, finish = self.__get_timestamps()
        message = TransactionEvent(
            event_id="e5e062bf2e1d4afd96fd2f90b6770431",
            trace_id="7400045b25c443b885914600aa83ad04",
            span_id="8841662216cc598b",
            transaction_name="/organizations/:orgId/issues/",
            status="cancelled",
            op="navigation",
            timestamp=finish,
            start_timestamp=start,
            platform="python",
            dist="",
            user_name="me",
            user_id="myself",
            user_email="*****@*****.**",
            ipv4="127.0.0.1",
            ipv6=None,
            environment="prod",
            release="34a554c14b68285d8a8eb6c5c4c56dfc1db9a83a",
            sdk_name="sentry.python",
            sdk_version="0.9.0",
            http_method="POST",
            http_referer="tagstore.something",
            geo={"country_code": "XY", "region": "fake_region", "city": "fake_city"},
        )
        payload = message.serialize()
        # Force an invalid event
        del payload[2]["data"]["contexts"]

        meta = KafkaMessageMetadata(
            offset=1, partition=2, timestamp=datetime(1970, 1, 1)
        )
        processor = TransactionsMessageProcessor()
        assert processor.process_message(payload, meta) is None
    def test_process_message(self) -> None:
        meta = KafkaMessageMetadata(offset=0,
                                    partition=0,
                                    timestamp=datetime(1970, 1, 1))

        message = ReplayEvent(
            replay_id="e5e062bf2e1d4afd96fd2f90b6770431",
            title="/organizations/:orgId/issues/",
            trace_ids=[
                "36e980a9-c602-4cde-9f5d-089f15b83b5f",
                "8bea4461-d8b9-44f3-93c1-5a3cb1c4169a",
            ],
            sequence_id=0,
            timestamp=datetime.now(tz=timezone.utc).timestamp(),
            platform="python",
            dist="",
            user_name="me",
            user_id="232",
            user_email="*****@*****.**",
            ipv4="127.0.0.1",
            ipv6=None,
            environment="prod",
            release="34a554c14b68285d8a8eb6c5c4c56dfc1db9a83a",
            sdk_name="sentry.python",
            sdk_version="0.9.0",
        )

        assert ReplaysProcessor().process_message(
            message.serialize(),
            meta) == InsertBatch([message.build_result(meta)], None)
Example #9
0
def process_message_multistorage(
    message: Message[MultistorageKafkaPayload],
) -> Sequence[Tuple[StorageKey, Union[None, JSONRowInsertBatch,
                                      ReplacementBatch]]]:
    # XXX: Avoid circular import on KafkaMessageMetadata, remove when that type
    # is itself removed.
    from snuba.datasets.storages.factory import get_writable_storage

    value = rapidjson.loads(message.payload.payload.value)
    metadata = KafkaMessageMetadata(message.offset, message.partition.index,
                                    message.timestamp)

    results: MutableSequence[Tuple[StorageKey, Union[None, JSONRowInsertBatch,
                                                     ReplacementBatch]]] = []

    for storage_key in message.payload.storage_keys:
        result = (get_writable_storage(storage_key).get_table_writer(
        ).get_stream_loader().get_processor().process_message(value, metadata))
        if isinstance(result, InsertBatch):
            results.append((
                storage_key,
                JSONRowInsertBatch(
                    [json_row_encoder.encode(row) for row in result.rows],
                    result.origin_timestamp,
                ),
            ))
        else:
            results.append((storage_key, result))

    return results
Example #10
0
    def generate_uniform_distributions(self) -> None:
        events = []
        processor = self.storage.get_table_writer().get_stream_loader(
        ).get_processor()
        value_array = list(range(self.d_range_min, self.d_range_max))

        for n in range(self.seconds):
            for p in self.project_ids:
                msg = {
                    "org_id": self.org_id,
                    "project_id": p,
                    "type": METRICS_DISTRIBUTIONS_TYPE,
                    "value": value_array,
                    "timestamp": self.base_time.timestamp() + n,
                    "tags": self.default_tags,
                    "metric_id": self.metric_id,
                    "retention_days": RETENTION_DAYS,
                }

                processed = processor.process_message(
                    msg,
                    KafkaMessageMetadata(0, 0, self.base_time),
                )
                if processed:
                    events.append(processed)
        write_processed_messages(self.storage, events)
Example #11
0
    def setup_method(self, test_method):
        self.metadata = KafkaMessageMetadata(0, 0, datetime.now())
        self.event = get_raw_event()

        self.processor = (get_writable_storage(
            StorageKey.EVENTS).get_table_writer().get_stream_loader().
                          get_processor())
Example #12
0
    def test_messages(self) -> None:
        processor = GroupedMessageProcessor("sentry_groupedmessage")

        metadata = KafkaMessageMetadata(offset=42,
                                        partition=0,
                                        timestamp=datetime(1970, 1, 1))

        ret = processor.process_message(self.INSERT_MSG, metadata)
        assert ret == InsertBatch([self.PROCESSED],
                                  datetime(2019,
                                           9,
                                           19,
                                           0,
                                           17,
                                           21,
                                           447870,
                                           tzinfo=pytz.UTC))
        write_processed_messages(self.storage, [ret])
        ret = (get_cluster(StorageSetKey.EVENTS).get_query_connection(
            ClickhouseClientSettings.INSERT).execute(
                "SELECT * FROM groupedmessage_local;"))
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            74,  # id
            0,  # status
            datetime(2019, 6, 19, 6, 46, 28),
            datetime(2019, 6, 19, 6, 45, 32),
            datetime(2019, 6, 19, 6, 45, 32),
            None,
        )

        ret = processor.process_message(self.UPDATE_MSG, metadata)
        assert ret == InsertBatch([self.PROCESSED],
                                  datetime(2019,
                                           9,
                                           19,
                                           0,
                                           17,
                                           21,
                                           447870,
                                           tzinfo=pytz.UTC))

        ret = processor.process_message(self.DELETE_MSG, metadata)
        assert ret == InsertBatch([self.DELETED],
                                  datetime(2019,
                                           9,
                                           19,
                                           0,
                                           17,
                                           21,
                                           447870,
                                           tzinfo=pytz.UTC))
Example #13
0
def test_processors_of_multistorage_consumer_are_idempotent(
        message: Tuple[int, str, InsertEvent],
        processor: MessageProcessor) -> None:
    """
    Test that when the same message is provided to the processors, the result would be the same. That is the process
    message operation is idempotent.
    """
    metadata = KafkaMessageMetadata(1000, 1, datetime.now())

    result1 = processor.process_message(message, metadata)
    result2 = processor.process_message(message, metadata)

    assert result1 == result2
 def generate_session_events(self, org_id, project_id: int) -> None:
     processor = self.storage.get_table_writer().get_stream_loader().get_processor()
     meta = KafkaMessageMetadata(
         offset=1, partition=2, timestamp=datetime(1970, 1, 1)
     )
     distinct_id = uuid4().hex
     template = {
         "session_id": uuid4().hex,
         "distinct_id": distinct_id,
         "duration": None,
         "environment": "production",
         "org_id": org_id,
         "project_id": project_id,
         "release": "[email protected]",
         "retention_days": settings.DEFAULT_RETENTION_DAYS,
         "seq": 0,
         "errors": 0,
         "received": datetime.utcnow().timestamp(),
         "started": self.started.timestamp(),
     }
     events = [
         processor.process_message(
             {
                 **template,
                 "status": "exited",
                 "duration": 1947.49,
                 "session_id": uuid4().hex,
                 "started": (self.started + timedelta(minutes=13)).timestamp(),
             },
             meta,
         ),
         processor.process_message(
             {**template, "status": "exited", "quantity": 5}, meta,
         ),
         processor.process_message(
             {**template, "status": "errored", "errors": 1, "quantity": 2}, meta,
         ),
         processor.process_message(
             {
                 **template,
                 "distinct_id": distinct_id,
                 "status": "errored",
                 "errors": 1,
                 "quantity": 2,
                 "started": (self.started + timedelta(minutes=24)).timestamp(),
             },
             meta,
         ),
     ]
     filtered = [e for e in events if e]
     write_processed_messages(self.storage, filtered)
Example #15
0
def write_unprocessed_events(storage: WritableStorage,
                             events: Sequence[InsertEvent]) -> None:

    processor = storage.get_table_writer().get_stream_loader().get_processor()

    processed_messages = []
    for i, event in enumerate(events):
        processed_message = processor.process_message(
            (2, "insert", event, {}),
            KafkaMessageMetadata(i, 0, datetime.now()))
        assert processed_message is not None
        processed_messages.append(processed_message)

    write_processed_messages(storage, processed_messages)
Example #16
0
def process_message(
    processor: MessageProcessor, message: Message[KafkaPayload]
) -> Union[None, JSONRowInsertBatch, ReplacementBatch]:
    result = processor.process_message(
        rapidjson.loads(message.payload.value),
        KafkaMessageMetadata(message.offset, message.partition.index,
                             message.timestamp),
    )

    if isinstance(result, InsertBatch):
        return JSONRowInsertBatch(
            [json_row_encoder.encode(row) for row in result.rows],
            result.origin_timestamp,
        )
    else:
        return result
Example #17
0
    def test_ingest_session_event_abnormal(self) -> None:
        timestamp = datetime.now(timezone.utc)
        started = timestamp - timedelta(hours=1)

        payload = {
            "device_family": "iPhone12,3",
            "distinct_id": "b3ef3211-58a4-4b36-a9a1-5a55df0d9aaf",
            "duration": 1947.49,
            "environment": "production",
            "org_id": 1,
            "os": "iOS",
            "os_version": "13.3.1",
            "project_id": 42,
            "release": "[email protected]",
            "retention_days": 90,
            "seq": 42,
            "errors": 0,
            "session_id": "8333339f-5675-4f89-a9a0-1c935255ab58",
            "started": started.timestamp(),
            "status": "abnormal",
            "received": timestamp.timestamp(),
        }

        meta = KafkaMessageMetadata(offset=1,
                                    partition=2,
                                    timestamp=datetime(1970, 1, 1))
        assert SessionsProcessor().process_message(
            payload, meta) == InsertBatch(
                [{
                    "distinct_id": "b3ef3211-58a4-4b36-a9a1-5a55df0d9aaf",
                    "quantity": 1,
                    "duration": 1947490,
                    "environment": "production",
                    "org_id": 1,
                    "project_id": 42,
                    "release": "[email protected]",
                    "retention_days": 90,
                    "seq": 42,
                    # abnormal counts as at least one error
                    "errors": 1,
                    "session_id": "8333339f-5675-4f89-a9a0-1c935255ab58",
                    "started": started.replace(tzinfo=None),
                    "status": 3,
                    "received": timestamp.replace(tzinfo=None),
                }],
                None,
            )
Example #18
0
def process_message_multistorage(
    message: Message[MultistorageKafkaPayload],
) -> MultistorageProcessedMessage:
    value = rapidjson.loads(message.payload.payload.value)
    metadata = KafkaMessageMetadata(message.offset, message.partition.index,
                                    message.timestamp)

    results: MutableSequence[Tuple[StorageKey, Union[None, BytesInsertBatch,
                                                     ReplacementBatch]]] = []

    for index, storage_key in enumerate(message.payload.storage_keys):
        result = _process_message_multistorage_work(metadata=metadata,
                                                    storage_key=storage_key,
                                                    storage_message=value)
        results.append((storage_key, result))

    return results
Example #19
0
    def generate_outcomes(
        self,
        org_id: int,
        project_id: int,
        num_outcomes: int,
        outcome: int,
        time_since_base: timedelta,
        category: Optional[int],
        quantity: Optional[int] = None,
    ) -> None:
        outcomes = []
        for _ in range(num_outcomes):
            message = {
                "project_id":
                project_id,
                "event_id":
                uuid.uuid4().hex,
                "timestamp":
                (self.base_time +
                 time_since_base).strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
                "org_id":
                org_id,
                "reason":
                None,
                "key_id":
                1,
                "outcome":
                outcome,
                "category":
                category,
                "quantity":
                quantity,
            }
            if message["category"] is None:
                del message["category"]  # for testing None category case
            if message["quantity"] is None:
                del message["quantity"]  # for testing None quantity case
            processed = (self.storage.get_table_writer().get_stream_loader().
                         get_processor().process_message(
                             message,
                             KafkaMessageMetadata(0, 0, self.base_time),
                         ))
            if processed:
                outcomes.append(processed)

        write_processed_messages(self.storage, outcomes)
Example #20
0
    def test_messages(self) -> None:
        processor = GroupAssigneeProcessor("sentry_groupasignee")

        metadata = KafkaMessageMetadata(
            offset=42, partition=0, timestamp=datetime(1970, 1, 1)
        )

        ret = processor.process_message(self.INSERT_MSG, metadata)
        assert ret == InsertBatch(
            [self.PROCESSED], datetime(2019, 9, 19, 0, 17, 55, 32443, tzinfo=pytz.UTC)
        )
        write_processed_messages(self.storage, [ret])
        ret = (
            self.storage.get_cluster()
            .get_query_connection(ClickhouseClientSettings.QUERY)
            .execute("SELECT * FROM groupassignee_local;")
            .results
        )
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            1359,  # group_id
            datetime(2019, 9, 19, 0, 17, 55),
            1,  # user_id
            None,  # team_id
        )

        ret = processor.process_message(self.UPDATE_MSG_NO_KEY_CHANGE, metadata)
        assert ret == InsertBatch(
            [self.PROCESSED], datetime(2019, 9, 19, 0, 6, 56, 376853, tzinfo=pytz.UTC)
        )

        # Tests an update with key change which becomes a two inserts:
        # one deletion and the insertion of the new row.
        ret = processor.process_message(self.UPDATE_MSG_WITH_KEY_CHANGE, metadata)
        assert ret == InsertBatch(
            [self.DELETED, self.PROCESSED_UPDATE],
            datetime(2019, 9, 19, 0, 6, 56, 376853, tzinfo=pytz.UTC),
        )

        ret = processor.process_message(self.DELETE_MSG, metadata)
        assert ret == InsertBatch(
            [self.DELETED], datetime(2019, 9, 19, 0, 17, 21, 447870, tzinfo=pytz.UTC)
        )
Example #21
0
def test_metrics_polymorphic_processor(
    message: Mapping[str, Any],
    expected_output: Optional[Sequence[Mapping[str, Any]]],
) -> None:
    settings.DISABLED_DATASETS = set()

    meta = KafkaMessageMetadata(offset=100,
                                partition=1,
                                timestamp=datetime(1970, 1, 1))
    # test_time_bucketing tests the bucket function, parameterizing the output times here
    # would require repeating the code in the class we're testing
    with patch(
            "snuba.datasets.metrics_aggregate_processor.timestamp_to_bucket",
            lambda _, __: MOCK_TIME_BUCKET,
    ):
        expected_polymorphic_result = (AggregateInsertBatch(
            expected_output, None) if expected_output is not None else None)
        assert (PolymorphicMetricsProcessor().process_message(
            message, meta) == expected_polymorphic_result)
Example #22
0
def test_send_message(xid: int, expected: Optional[ProcessedMessage]) -> None:
    processor = (get_writable_storage(StorageKey.GROUPEDMESSAGES).
                 get_table_writer().get_stream_loader().get_processor())

    worker = SnapshotProcessor(
        processor=processor,
        snapshot_id=SnapshotId(str(uuid1())),
        transaction_data=TransactionData(xmin=Xid(100),
                                         xmax=Xid(200),
                                         xip_list=[Xid(120),
                                                   Xid(130)]),
    )

    ret = worker.process_message(
        get_insert_event(xid),
        KafkaMessageMetadata(offset=1, partition=0, timestamp=datetime.now()),
    )

    assert ret == expected
Example #23
0
def test_span_process() -> None:
    timestamp = datetime.now(tz=timezone.utc) - timedelta(seconds=5)
    start_timestamp = timestamp - timedelta(seconds=4)
    message = SpanEvent(
        event_id="e5e062bf2e1d4afd96fd2f90b6770431",
        trace_id="7400045b25c443b885914600aa83ad04",
        span_id="8841662216cc598b",
        parent_span_id="b76a8ca0b0908a15",
        transaction_name="/organizations/:orgId/issues/",
        op="navigation",
        timestamp=timestamp.timestamp(),
        start_timestamp=start_timestamp.timestamp(),
        spans=[
            SpanData(
                trace_id="7400045b25c443b885914600aa83ad04",
                span_id="b95eff64930fef25",
                parent_span_id="8841662216cc598b",
                op="db",
                start_timestamp=(start_timestamp +
                                 timedelta(seconds=1)).timestamp(),
                timestamp=(start_timestamp + timedelta(seconds=2)).timestamp(),
            ),
            SpanData(
                trace_id="7400045b25c443b885914600aa83ad04",
                span_id="9f8e7bbe7bf22e09",
                parent_span_id="b95eff64930fef25",
                op="web",
                start_timestamp=(start_timestamp +
                                 timedelta(seconds=2)).timestamp(),
                timestamp=(start_timestamp + timedelta(seconds=3)).timestamp(),
            ),
        ],
    )
    meta = KafkaMessageMetadata(offset=1,
                                partition=2,
                                timestamp=datetime(1970, 1, 1))
    processed = SpansMessageProcessor().process_message(
        message.serialize(), meta)
    assert isinstance(processed, InsertBatch)
    expected_rows = message.build_result(meta)

    for span, expected in zip(processed.rows, expected_rows):
        assert span == expected
Example #24
0
def process_message_multistorage_identical_storages(
    message: Message[MultistorageKafkaPayload],
) -> MultistorageProcessedMessage:
    """
    This method is similar to process_message_multistorage except for a minor difference.
    It performs an optimization where it avoids processing a message multiple times if the
    it finds that the storages on which data needs to be written are identical. This is a
    performance optimization since we remove the message processing time completely for all
    identical storages like errors and errors_v2.

    It is possible that the storage keys in the message could be a mix of identical and
    non-identical storages. This method takes into account that scenario as well.

    The reason why this method has been created rather than modifying the existing
    process_message_multistorage is to avoid doing a check for every message in cases
    where there are no identical storages like metrics.
    """
    value = rapidjson.loads(message.payload.payload.value)
    metadata = KafkaMessageMetadata(message.offset, message.partition.index,
                                    message.timestamp)

    intermediate_results: MutableMapping[StorageKey,
                                         Union[None, BytesInsertBatch,
                                               ReplacementBatch]] = {}

    for index, storage_key in enumerate(message.payload.storage_keys):
        result = None
        for other_storage_key, insert_batch in intermediate_results.items():
            if are_writes_identical(storage_key, other_storage_key):
                result = insert_batch
                break

        if result is None:
            result = _process_message_multistorage_work(
                metadata=metadata,
                storage_key=storage_key,
                storage_message=value,
            )

        intermediate_results[storage_key] = result

    return list(intermediate_results.items())
Example #25
0
 def generate_counters(self) -> None:
     events = []
     for n in range(self.seconds):
         for p in self.project_ids:
             processed = (self.storage.get_table_writer().get_stream_loader(
             ).get_processor().process_message(
                 ({
                     "org_id": self.org_id,
                     "project_id": p,
                     "unit": "ms",
                     "type": METRICS_COUNTERS_TYPE,
                     "value": 1.0,
                     "timestamp": self.base_time.timestamp() + n,
                     "tags": self.default_tags,
                     "metric_id": self.metric_id,
                     "retention_days": RETENTION_DAYS,
                 }),
                 KafkaMessageMetadata(0, 0, self.base_time),
             ))
             if processed:
                 events.append(processed)
     write_processed_messages(self.storage, events)
Example #26
0
    def test_base_process(self) -> None:
        old_skip_context = settings.TRANSACT_SKIP_CONTEXT_STORE
        settings.TRANSACT_SKIP_CONTEXT_STORE = {1: {"experiments"}}

        start, finish = self.__get_timestamps()
        message = TransactionEvent(
            event_id="e5e062bf2e1d4afd96fd2f90b6770431",
            trace_id="7400045b25c443b885914600aa83ad04",
            span_id="8841662216cc598b",
            transaction_name="/organizations/:orgId/issues/",
            status="cancelled",
            op="navigation",
            timestamp=finish,
            start_timestamp=start,
            platform="python",
            dist="",
            user_name="me",
            user_id="myself",
            user_email="*****@*****.**",
            ipv4="127.0.0.1",
            ipv6=None,
            environment="prod",
            release="34a554c14b68285d8a8eb6c5c4c56dfc1db9a83a",
            sdk_name="sentry.python",
            sdk_version="0.9.0",
            http_method="POST",
            http_referer="tagstore.something",
            geo={"country_code": "XY", "region": "fake_region", "city": "fake_city"},
        )
        meta = KafkaMessageMetadata(
            offset=1, partition=2, timestamp=datetime(1970, 1, 1)
        )
        assert TransactionsMessageProcessor().process_message(
            message.serialize(), meta
        ) == InsertBatch([message.build_result(meta)], None)
        settings.TRANSACT_SKIP_CONTEXT_STORE = old_skip_context
Example #27
0
def test_metrics_aggregate_processor(
    message: Mapping[str, Any],
    expected_set: Optional[Sequence[Mapping[str, Any]]],
    expected_counter: Optional[Sequence[Mapping[str, Any]]],
    expected_distributions: Optional[Sequence[Mapping[str, Any]]],
) -> None:
    settings.DISABLED_DATASETS = set()
    settings.WRITE_METRICS_AGG_DIRECTLY = True

    meta = KafkaMessageMetadata(offset=100,
                                partition=1,
                                timestamp=datetime(1970, 1, 1))

    expected_set_result = (AggregateInsertBatch(expected_set, None)
                           if expected_set is not None else None)
    # test_time_bucketing tests the bucket function, parameterizing the output times here
    # would require repeating the code in the class we're testing
    with patch(
            "snuba.datasets.metrics_aggregate_processor.timestamp_to_bucket",
            lambda _, __: MOCK_TIME_BUCKET,
    ):
        assert (SetsAggregateProcessor().process_message(
            message, meta) == expected_set_result)

        expected_counter_result = (AggregateInsertBatch(
            expected_counter, None) if expected_counter is not None else None)
        assert (CounterAggregateProcessor().process_message(
            message, meta) == expected_counter_result)

        expected_distributions_result = (AggregateInsertBatch(
            expected_distributions, None) if expected_distributions is not None
                                         else None)
        assert (DistributionsAggregateProcessor().process_message(
            message, meta) == expected_distributions_result)

        settings.WRITE_METRICS_AGG_DIRECTLY = False
Example #28
0
 def generate_fizzbuzz_events(self) -> None:
     """
     Generate a deterministic set of events across a time range.
     """
     events = []
     for tick in range(self.minutes):
         tock = tick + 1
         for p in self.project_ids:
             # project N sends an event every Nth minute
             if tock % p == 0:
                 trace_id = "7400045b25c443b885914600aa83ad04"
                 span_id = "8841662216cc598b"
                 processed = (
                     self.storage.get_table_writer().get_stream_loader().
                     get_processor().process_message(
                         (
                             2,
                             "insert",
                             {
                                 "project_id":
                                 p,
                                 "event_id":
                                 uuid.uuid4().hex,
                                 "deleted":
                                 0,
                                 "datetime":
                                 (self.base_time +
                                  timedelta(minutes=tick)).isoformat(),
                                 "platform":
                                 self.platforms[(tock * p) %
                                                len(self.platforms)],
                                 "retention_days":
                                 settings.DEFAULT_RETENTION_DAYS,
                                 "data": {
                                     # Project N sends every Nth (mod len(hashes)) hash (and platform)
                                     "received":
                                     calendar.timegm(
                                         (self.base_time + timedelta(
                                             minutes=tick)).timetuple()),
                                     "type":
                                     "transaction",
                                     "transaction":
                                     "/api/do_things",
                                     "start_timestamp":
                                     datetime.timestamp(
                                         (self.base_time +
                                          timedelta(minutes=tick))),
                                     "timestamp":
                                     datetime.timestamp(
                                         (self.base_time + timedelta(
                                             minutes=tick, seconds=1))),
                                     "tags": {
                                         # Sentry
                                         "environment":
                                         self.environments[(tock * p) % len(
                                             self.environments)],
                                         "sentry:release":
                                         str(tick),
                                         "sentry:dist":
                                         "dist1",
                                         # User
                                         "foo":
                                         "baz",
                                         "foo.bar":
                                         "qux",
                                         "os_name":
                                         "linux",
                                     },
                                     "user": {
                                         "email": "*****@*****.**",
                                         "ip_address": "8.8.8.8",
                                     },
                                     "contexts": {
                                         "trace": {
                                             "trace_id": trace_id,
                                             "span_id": span_id,
                                             "op": "http",
                                             "status": "0",
                                         },
                                     },
                                     "measurements": {
                                         "lcp": {
                                             "value": 32.129
                                         },
                                         "lcp.elementSize": {
                                             "value": 4242
                                         },
                                     },
                                     "breakdowns": {
                                         "span_ops": {
                                             "ops.db": {
                                                 "value": 62.512
                                             },
                                             "ops.http": {
                                                 "value": 109.774
                                             },
                                             "total.time": {
                                                 "value": 172.286
                                             },
                                         }
                                     },
                                     "spans": [{
                                         "op":
                                         "db",
                                         "trace_id":
                                         trace_id,
                                         "span_id":
                                         span_id + "1",
                                         "parent_span_id":
                                         None,
                                         "same_process_as_parent":
                                         True,
                                         "description":
                                         "SELECT * FROM users",
                                         "data": {},
                                         "timestamp":
                                         calendar.timegm(
                                             (self.base_time +
                                              timedelta(minutes=tick)
                                              ).timetuple()),
                                     }],
                                 },
                             },
                         ),
                         KafkaMessageMetadata(0, 0, self.base_time),
                     ))
                 if processed:
                     events.append(processed)
     write_processed_messages(self.storage, events)
Example #29
0
def test_error_processor() -> None:
    received_timestamp = datetime.now() - timedelta(minutes=1)
    error_timestamp = received_timestamp - timedelta(minutes=1)
    trace_id = str(uuid.uuid4())
    span_id = "deadbeef"
    error = (
        2,
        "insert",
        InsertEvent({
            "organization_id":
            1,
            "retention_days":
            58,
            "event_id":
            "dcb9d002cac548c795d1c9adbfc68040",
            "group_id":
            100,
            "project_id":
            300688,
            "platform":
            "python",
            "message":
            "",
            "datetime":
            error_timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
            "primary_hash":
            "04233d08ac90cf6fc015b1be5932e7e2",
            "data": {
                "event_id":
                "dcb9d002cac548c795d1c9adbfc68040",
                "project_id":
                300688,
                "release":
                None,
                "dist":
                None,
                "platform":
                "python",
                "message":
                "",
                "datetime":
                error_timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                "tags": [
                    ["handled", "no"],
                    ["level", "error"],
                    ["mechanism", "excepthook"],
                    ["runtime", "CPython 3.7.6"],
                    ["runtime.name", "CPython"],
                    ["server_name", "snuba"],
                    ["environment", "dev"],
                    ["sentry:user", "this_is_me"],
                    ["sentry:release", "4d23338017cdee67daf25f2c"],
                ],
                "user": {
                    "username": "******",
                    "ip_address": "127.0.0.1",
                    "id": "still_me",
                    "email": "*****@*****.**",
                    "geo": {
                        "country_code": "XY",
                        "region": "fake_region",
                        "city": "fake_city",
                    },
                },
                "request": {
                    "url":
                    "http://127.0.0.1:/query",
                    "headers": [
                        ["Accept-Encoding", "identity"],
                        ["Content-Length", "398"],
                        ["Host", "127.0.0.1:"],
                        ["Referer", "tagstore.something"],
                        ["Trace", "8fa73032d-1"],
                    ],
                    "data":
                    "",
                    "method":
                    "POST",
                    "env": {
                        "SERVER_PORT": "1010",
                        "SERVER_NAME": "snuba"
                    },
                },
                "_relay_processed":
                True,
                "breadcrumbs": {
                    "values": [
                        {
                            "category": "snuba.utils.streams.batching",
                            "level": "info",
                            "timestamp": error_timestamp.timestamp(),
                            "data": {
                                "asctime":
                                error_timestamp.strftime(
                                    PAYLOAD_DATETIME_FORMAT)
                            },
                            "message": "New partitions assigned: {}",
                            "type": "default",
                        },
                        {
                            "category": "snuba.utils.streams.batching",
                            "level": "info",
                            "timestamp": error_timestamp.timestamp(),
                            "data": {
                                "asctime":
                                error_timestamp.strftime(
                                    PAYLOAD_DATETIME_FORMAT)
                            },
                            "message": "Flushing ",
                            "type": "default",
                        },
                        {
                            "category": "httplib",
                            "timestamp": error_timestamp.timestamp(),
                            "type": "http",
                            "data": {
                                "url": "http://127.0.0.1:8123/",
                                "status_code": 500,
                                "reason": "Internal Server Error",
                                "method": "POST",
                            },
                            "level": "info",
                        },
                    ]
                },
                "contexts": {
                    "runtime": {
                        "version": "3.7.6",
                        "type": "runtime",
                        "name": "CPython",
                        "build": "3.7.6",
                    },
                    "trace": {
                        "trace_id": trace_id,
                        "span_id": span_id
                    },
                },
                "culprit":
                "snuba.clickhouse.http in write",
                "exception": {
                    "values": [{
                        "stacktrace": {
                            "frames": [
                                {
                                    "function":
                                    "<module>",
                                    "abs_path":
                                    "/usr/local/bin/snuba",
                                    "pre_context": [
                                        "from pkg_resources import load_entry_point",
                                        "",
                                        "if __name__ == '__main__':",
                                        "    sys.argv[0] = re.sub(r'(-script\\.pyw?|\\.exe)?$', '', sys.argv[0])",
                                        "    sys.exit(",
                                    ],
                                    "post_context": ["    )"],
                                    "vars": {
                                        "__spec__": "None",
                                        "__builtins__":
                                        "<module 'builtins' (built-in)>",
                                        "__annotations__": {},
                                        "__file__": "'/usr/local/bin/snuba'",
                                        "__loader__":
                                        "<_frozen_importlib_external.SourceFileLoader object at 0x7fbbc3a36ed0>",
                                        "__requires__": "'snuba'",
                                        "__cached__": "None",
                                        "__name__": "'__main__'",
                                        "__package__": "None",
                                        "__doc__": "None",
                                    },
                                    "module":
                                    "__main__",
                                    "filename":
                                    "snuba",
                                    "lineno":
                                    11,
                                    "in_app":
                                    False,
                                    "data": {
                                        "orig_in_app": 1
                                    },
                                    "context_line":
                                    "        load_entry_point('snuba', 'console_scripts', 'snuba')()",
                                },
                            ]
                        },
                        "type": "ClickHouseError",
                        "module": "snuba.clickhouse.http",
                        "value":
                        "[171] DB::Exception: Block structure mismatch",
                        "mechanism": {
                            "type": "excepthook",
                            "handled": False
                        },
                    }]
                },
                "extra": {
                    "sys.argv": [
                        "/usr/local/bin/snuba",
                        "consumer",
                        "--dataset",
                        "transactions",
                    ]
                },
                "fingerprint": ["{{ default }}"],
                "hashes": ["c8b21c571231e989060b9110a2ade7d3"],
                "hierarchical_hashes": [
                    "04233d08ac90cf6fc015b1be5932e7e3",
                    "04233d08ac90cf6fc015b1be5932e7e4",
                ],
                "key_id":
                "537125",
                "level":
                "error",
                "location":
                "snuba/clickhouse/http.py",
                "logger":
                "",
                "metadata": {
                    "function": "write",
                    "type": "ClickHouseError",
                    "value": "[171] DB::Exception: Block structure mismatch",
                    "filename": "snuba/something.py",
                },
                "modules": {
                    "cffi": "1.13.2",
                    "ipython-genutils": "0.2.0",
                    "isodate": "0.6.0",
                },
                "received":
                received_timestamp.timestamp(),
                "sdk": {
                    "version":
                    "0.0.0.0.1",
                    "name":
                    "sentry.python",
                    "packages": [{
                        "version": "0.0.0.0.1",
                        "name": "pypi:sentry-sdk"
                    }],
                    "integrations": [
                        "argv",
                        "atexit",
                        "dedupe",
                        "excepthook",
                        "logging",
                        "modules",
                        "stdlib",
                        "threading",
                    ],
                },
                "timestamp":
                error_timestamp.timestamp(),
                "title":
                "ClickHouseError: [171] DB::Exception: Block structure mismatch",
                "type":
                "error",
                "version":
                "7",
            },
        }),
        None,
    )

    expected_result = {
        "project_id":
        300688,
        "timestamp":
        error_timestamp,
        "event_id":
        str(UUID("dcb9d002cac548c795d1c9adbfc68040")),
        "platform":
        "python",
        "dist":
        None,
        "environment":
        "dev",
        "release":
        "4d23338017cdee67daf25f2c",
        "ip_address_v4":
        "127.0.0.1",
        "user":
        "******",
        "user_name":
        "me",
        "user_id":
        "still_me",
        "user_email":
        "*****@*****.**",
        "sdk_name":
        "sentry.python",
        "sdk_version":
        "0.0.0.0.1",
        "http_method":
        "POST",
        "http_referer":
        "tagstore.something",
        "trace_id":
        trace_id,
        "span_id":
        int(span_id, 16),
        "tags.key": [
            "environment",
            "handled",
            "level",
            "mechanism",
            "runtime",
            "runtime.name",
            "sentry:release",
            "sentry:user",
            "server_name",
        ],
        "tags.value": [
            "dev",
            "no",
            "error",
            "excepthook",
            "CPython 3.7.6",
            "CPython",
            "4d23338017cdee67daf25f2c",
            "this_is_me",
            "snuba",
        ],
        "contexts.key": [
            "runtime.version",
            "runtime.name",
            "runtime.build",
            "trace.trace_id",
            "trace.span_id",
            "geo.country_code",
            "geo.region",
            "geo.city",
        ],
        "contexts.value": [
            "3.7.6",
            "CPython",
            "3.7.6",
            trace_id,
            span_id,
            "XY",
            "fake_region",
            "fake_city",
        ],
        "partition":
        1,
        "offset":
        2,
        "message_timestamp":
        datetime(1970, 1, 1),
        "retention_days":
        90,
        "deleted":
        0,
        "group_id":
        100,
        "primary_hash":
        str(UUID("04233d08ac90cf6fc015b1be5932e7e2")),
        "hierarchical_hashes": [
            str(UUID("04233d08ac90cf6fc015b1be5932e7e3")),
            str(UUID("04233d08ac90cf6fc015b1be5932e7e4")),
        ],
        "received":
        received_timestamp.astimezone(pytz.utc).replace(tzinfo=None,
                                                        microsecond=0),
        "message":
        "",
        "title":
        "ClickHouseError: [171] DB::Exception: Block structure mismatch",
        "culprit":
        "snuba.clickhouse.http in write",
        "level":
        "error",
        "location":
        "snuba/clickhouse/http.py",
        "version":
        "7",
        "type":
        "error",
        "exception_stacks.type": ["ClickHouseError"],
        "exception_stacks.value":
        ["[171] DB::Exception: Block structure mismatch"],
        "exception_stacks.mechanism_type": ["excepthook"],
        "exception_stacks.mechanism_handled": [False],
        "exception_frames.abs_path": ["/usr/local/bin/snuba"],
        "exception_frames.colno": [None],
        "exception_frames.filename": ["snuba"],
        "exception_frames.lineno": [11],
        "exception_frames.in_app": [False],
        "exception_frames.package": [None],
        "exception_frames.module": ["__main__"],
        "exception_frames.function": ["<module>"],
        "exception_frames.stack_level": [0],
        "sdk_integrations": [
            "argv",
            "atexit",
            "dedupe",
            "excepthook",
            "logging",
            "modules",
            "stdlib",
            "threading",
        ],
        "modules.name": ["cffi", "ipython-genutils", "isodate"],
        "modules.version": ["1.13.2", "0.2.0", "0.6.0"],
        "transaction_name":
        "",
    }

    meta = KafkaMessageMetadata(offset=2,
                                partition=1,
                                timestamp=datetime(1970, 1, 1))
    processor = ErrorsProcessor({
        "environment": "environment",
        "sentry:release": "release",
        "sentry:dist": "dist",
        "sentry:user": "******",
        "transaction": "transaction_name",
        "level": "level",
    })

    processed_message = processor.process_message(error, meta)
    expected_message = InsertBatch([expected_result], None)
    # assert on the rows first so we get a nice diff from pytest
    assert processed_message.rows[0] == expected_message.rows[0]
    assert processed_message == expected_message
Example #30
0
def test_simple() -> None:
    request_body = {
        "selected_columns": ["event_id"],
        "orderby": "event_id",
        "sample": 0.1,
        "limit": 100,
        "offset": 50,
        "project": 1,
    }

    query = Query(
        Entity(EntityKey.EVENTS,
               get_entity(EntityKey.EVENTS).get_data_model()))

    request = Request(
        id=uuid.UUID("a" * 32).hex,
        original_body=request_body,
        query=query,
        snql_anonymized="",
        query_settings=HTTPQuerySettings(referrer="search"),
        attribution_info=AttributionInfo(get_app_id("default"), "search", None,
                                         None, None),
    )

    time = TestingClock()

    timer = Timer("test", clock=time)
    time.sleep(0.01)

    message = SnubaQueryMetadata(
        request=request,
        start_timestamp=datetime.utcnow() - timedelta(days=3),
        end_timestamp=datetime.utcnow(),
        dataset="events",
        timer=timer,
        query_list=[
            ClickhouseQueryMetadata(
                sql=
                "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100",
                sql_anonymized=
                "select event_id from sentry_dist sample 0.1 prewhere project_id in ($I) limit 50, 100",
                start_timestamp=datetime.utcnow() - timedelta(days=3),
                end_timestamp=datetime.utcnow(),
                stats={
                    "sample": 10,
                    "error_code": 386
                },
                status=QueryStatus.SUCCESS,
                profile=ClickhouseQueryProfile(
                    time_range=10,
                    table="events",
                    all_columns={"timestamp", "tags"},
                    multi_level_condition=False,
                    where_profile=FilterProfile(
                        columns={"timestamp"},
                        mapping_cols={"tags"},
                    ),
                    groupby_cols=set(),
                    array_join_cols=set(),
                ),
                trace_id="b" * 32,
            )
        ],
        projects={2},
        snql_anonymized=request.snql_anonymized,
        entity=EntityKey.EVENTS.value,
    ).to_dict()

    processor = (get_writable_storage(StorageKey.QUERYLOG).get_table_writer().
                 get_stream_loader().get_processor())

    assert processor.process_message(
        message, KafkaMessageMetadata(0, 0, datetime.now())
    ) == InsertBatch(
        [{
            "request_id":
            str(uuid.UUID("a" * 32)),
            "request_body":
            '{"limit": 100, "offset": 50, "orderby": "event_id", "project": 1, "sample": 0.1, "selected_columns": ["event_id"]}',
            "referrer":
            "search",
            "dataset":
            "events",
            "projects": [2],
            "organization":
            None,
            "timestamp":
            timer.for_json()["timestamp"],
            "duration_ms":
            10,
            "status":
            "success",
            "clickhouse_queries.sql": [
                "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100"
            ],
            "clickhouse_queries.status": ["success"],
            "clickhouse_queries.trace_id": [str(uuid.UUID("b" * 32))],
            "clickhouse_queries.duration_ms": [0],
            "clickhouse_queries.stats": ['{"error_code": 386, "sample": 10}'],
            "clickhouse_queries.final": [0],
            "clickhouse_queries.cache_hit": [0],
            "clickhouse_queries.sample": [10.0],
            "clickhouse_queries.max_threads": [0],
            "clickhouse_queries.num_days": [10],
            "clickhouse_queries.clickhouse_table": [""],
            "clickhouse_queries.query_id": [""],
            "clickhouse_queries.is_duplicate": [0],
            "clickhouse_queries.consistent": [0],
            "clickhouse_queries.all_columns": [["tags", "timestamp"]],
            "clickhouse_queries.or_conditions": [False],
            "clickhouse_queries.where_columns": [["timestamp"]],
            "clickhouse_queries.where_mapping_columns": [["tags"]],
            "clickhouse_queries.groupby_columns": [[]],
            "clickhouse_queries.array_join_columns": [[]],
        }],
        None,
    )