Exemple #1
0
    def test_tags_hash_map(self) -> None:
        """
        Adds an event and ensures the tags_hash_map is properly populated
        including escaping.
        """
        self.event = get_raw_event()
        self.event["data"]["tags"].append(["test_tag1", "value1"])
        self.event["data"]["tags"].append(["test_tag=2",
                                           "value2"])  # Requires escaping
        storage = get_writable_storage(StorageKey.EVENTS)
        write_unprocessed_events(storage, [self.event])

        clickhouse = storage.get_cluster().get_query_connection(
            ClickhouseClientSettings.QUERY)

        hashed = clickhouse.execute(
            "SELECT cityHash64('test_tag1=value1'), cityHash64('test_tag\\\\=2=value2')"
        )
        tag1, tag2 = hashed[0]

        event = clickhouse.execute((
            f"SELECT event_id FROM sentry_local WHERE has(_tags_hash_map, {tag1}) "
            f"AND has(_tags_hash_map, {tag2})"))
        assert len(event) == 1
        assert event[0][0] == self.event["data"]["id"]
Exemple #2
0
    def test_tags_hash_map(self) -> None:
        """
        Adds an event and ensures the tags_hash_map is properly populated
        including escaping.
        """
        self.event = get_raw_event()
        self.event["data"]["tags"].append(["test_tag1", "value1"])
        self.event["data"]["tags"].append(["test_tag=2",
                                           "value2"])  # Requires escaping
        storage = get_writable_storage(StorageKey.ERRORS)
        schema = storage.get_schema()
        assert isinstance(schema, TableSchema)
        table_name = schema.get_table_name()
        write_unprocessed_events(storage, [self.event])

        clickhouse = storage.get_cluster().get_query_connection(
            ClickhouseClientSettings.QUERY)

        hashed = clickhouse.execute(
            "SELECT cityHash64('test_tag1=value1'), cityHash64('test_tag\\\\=2=value2')"
        )
        tag1, tag2 = hashed[0]

        event = clickhouse.execute((
            f"SELECT replaceAll(toString(event_id), '-', '') FROM {table_name} WHERE has(_tags_hash_map, {tag1}) "
            f"AND has(_tags_hash_map, {tag2})"))
        assert len(event) == 1
        assert event[0][0] == self.event["data"]["id"]
Exemple #3
0
    def test_delete_tag_promoted_insert(self) -> None:
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["data"]["tags"].append(["browser.name", "foo"])
        self.event["data"]["tags"].append(["notbrowser", "foo"])
        write_unprocessed_events(self.storage, [self.event])

        project_id = self.project_id

        def _issue_count(total: bool = False) -> Sequence[Mapping[str, Any]]:
            clickhouse = self.storage.get_cluster().get_query_connection(
                ClickhouseClientSettings.QUERY)

            total_cond = (
                "AND has(_tags_hash_map, cityHash64('browser.name=foo'))"
                if not total else "")

            data = clickhouse.execute(f"""
                SELECT group_id, count()
                FROM errors_local
                FINAL
                WHERE deleted = 0
                AND project_id = {project_id}
                {total_cond}
                GROUP BY group_id
                """).results

            return [{"group_id": row[0], "count": row[1]} for row in data]

        assert _issue_count() == [{"count": 1, "group_id": 1}]
        assert _issue_count(total=True) == [{"count": 1, "group_id": 1}]

        timestamp = datetime.now(tz=pytz.utc)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.END_DELETE_TAG,
                    {
                        "project_id": project_id,
                        "tag": "browser.name",
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        assert processed is not None
        self.replacer.flush_batch([processed])

        assert _issue_count() == []
        assert _issue_count(total=True) == [{"count": 1, "group_id": 1}]
Exemple #4
0
 def setup_method(self, test_method):
     super().setup_method(test_method)
     self.app.post = partial(self.app.post, headers={"referer": "test"})
     self.trace_id = uuid.UUID("7400045b-25c4-43b8-8591-4600aa83ad04")
     self.event = get_raw_event()
     self.project_id = self.event["project_id"]
     write_unprocessed_events(get_writable_storage(StorageKey.EVENTS),
                              [self.event])
     write_unprocessed_events(
         get_writable_storage(StorageKey.TRANSACTIONS),
         [get_raw_transaction()],
     )
Exemple #5
0
    def test_delete_tag_promoted_insert(self):
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["data"]["tags"].append(["browser.name", "foo"])
        self.event["data"]["tags"].append(["notbrowser", "foo"])
        write_unprocessed_events(self.storage, [self.event])

        project_id = self.project_id

        def _issue_count(total=False):
            return json.loads(
                self.app.post(
                    "/query",
                    data=json.dumps({
                        "project": [project_id],
                        "aggregations": [["count()", "", "count"]],
                        "conditions": [["tags[browser.name]", "=", "foo"]]
                        if not total else [],
                        "groupby": ["group_id"],
                    }),
                ).data)["data"]

        assert _issue_count() == [{"count": 1, "group_id": 1}]
        assert _issue_count(total=True) == [{"count": 1, "group_id": 1}]

        timestamp = datetime.now(tz=pytz.utc)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    "end_delete_tag",
                    {
                        "project_id": project_id,
                        "tag": "browser.name",
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert _issue_count() == []
        assert _issue_count(total=True) == [{"count": 1, "group_id": 1}]
Exemple #6
0
    def test_multiple_partitions(self) -> None:
        """
        Different partitions should have independent offset checks.
        """
        set_config("skip_seen_offsets", True)
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["primary_hash"] = "a" * 32
        write_unprocessed_events(self.storage, [self.event])

        payload = KafkaPayload(
            None,
            json.dumps((
                2,
                ReplacementType.END_UNMERGE,
                {
                    "project_id":
                    self.project_id,
                    "previous_group_id":
                    1,
                    "new_group_id":
                    2,
                    "hashes": ["a" * 32],
                    "datetime":
                    datetime.utcnow().strftime(PAYLOAD_DATETIME_FORMAT),
                },
            )).encode("utf-8"),
            [],
        )
        offset = 42
        timestamp = datetime.now()

        partition_one: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            offset,
            payload,
            timestamp,
        )
        partition_two: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 2),
            offset,
            payload,
            timestamp,
        )

        processed = self.replacer.process_message(partition_one)
        self.replacer.flush_batch([processed])
        # different partition should be unaffected even if it's the same offset
        assert self.replacer.process_message(partition_two) is not None
Exemple #7
0
 def setup_method(self, test_method):
     super().setup_method(test_method)
     self.app.post = partial(self.app.post, headers={"referer": "test"})
     self.trace_id = uuid.UUID("7400045b-25c4-43b8-8591-4600aa83ad04")
     self.event = get_raw_event()
     self.project_id = self.event["project_id"]
     self.skew = timedelta(minutes=180)
     self.base_time = datetime.utcnow().replace(
         minute=0, second=0, microsecond=0,
         tzinfo=pytz.utc) - timedelta(minutes=180)
     write_unprocessed_events(get_writable_storage(StorageKey.EVENTS),
                              [self.event])
     write_unprocessed_events(
         get_writable_storage(StorageKey.TRANSACTIONS),
         [get_raw_transaction()],
     )
Exemple #8
0
    def test_unmerge_hierarchical_insert(self) -> None:
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["primary_hash"] = "b" * 32
        self.event["data"]["hierarchical_hashes"] = ["a" * 32]
        write_unprocessed_events(self.storage, [self.event])

        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 1
        }]

        timestamp = datetime.now(tz=pytz.utc)

        project_id = self.project_id

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.END_UNMERGE_HIERARCHICAL,
                    {
                        "project_id": project_id,
                        "previous_group_id": 1,
                        "new_group_id": 2,
                        "hierarchical_hash": "a" * 32,
                        "primary_hash": "b" * 32,
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        assert processed is not None
        self.replacer.flush_batch([processed])

        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 2
        }]
def test_backfill_errors() -> None:

    backfill_migration_id = "0014_backfill_errors"
    runner = Runner()
    runner.run_migration(MigrationKey(MigrationGroup.SYSTEM,
                                      "0001_migrations"))

    run_prior_migrations(MigrationGroup.EVENTS, backfill_migration_id, runner)

    errors_storage = get_writable_storage(StorageKey.ERRORS)
    clickhouse = errors_storage.get_cluster().get_query_connection(
        ClickhouseClientSettings.QUERY)
    errors_table_name = errors_storage.get_table_writer().get_schema(
    ).get_table_name()

    raw_events = []
    for i in range(10):
        event = get_raw_event()
        raw_events.append(event)

    events_storage = get_writable_storage(StorageKey.EVENTS)

    write_unprocessed_events(events_storage, raw_events)

    assert get_count_from_storage(errors_table_name, clickhouse) == 0

    # Run 0014_backfill_errors
    runner.run_migration(MigrationKey(MigrationGroup.EVENTS,
                                      backfill_migration_id),
                         force=True)

    assert get_count_from_storage(errors_table_name, clickhouse) == 10

    outcome = perform_select_query(["contexts.key", "contexts.value"],
                                   errors_table_name, None, str(1), clickhouse)

    assert outcome[0] == (
        [
            "device.model_id",
            "geo.city",
            "geo.country_code",
            "geo.region",
            "os.kernel_version",
        ],
        ["Galaxy", "San Francisco", "US", "CA", "1.1.1"],
    )
Exemple #10
0
    def test_offset_already_processed(self) -> None:
        """
        Don't process an offset that already exists in Redis.
        """
        set_config("skip_seen_offsets", True)
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["primary_hash"] = "a" * 32
        write_unprocessed_events(self.storage, [self.event])

        key = f"replacement:{CONSUMER_GROUP}:errors:1"
        redis_client.set(key, 42)

        old_offset: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            41,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.END_UNMERGE,
                    {},
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        same_offset: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.END_UNMERGE,
                    {},
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        assert self.replacer.process_message(old_offset) is None
        assert self.replacer.process_message(same_offset) is None
Exemple #11
0
 def setup_method(self, test_method: Callable[..., Any]) -> None:
     super().setup_method(test_method)
     self.trace_id = uuid.UUID("7400045b-25c4-43b8-8591-4600aa83ad04")
     self.event = get_raw_event()
     self.project_id = self.event["project_id"]
     self.org_id = self.event["organization_id"]
     self.skew = timedelta(minutes=180)
     self.base_time = datetime.utcnow().replace(
         minute=0, second=0, microsecond=0) - timedelta(minutes=180)
     events_storage = get_entity(EntityKey.EVENTS).get_writable_storage()
     assert events_storage is not None
     write_unprocessed_events(events_storage, [self.event])
     self.next_time = datetime.utcnow().replace(
         minute=0, second=0, microsecond=0) + timedelta(minutes=180)
     write_unprocessed_events(
         get_writable_storage(StorageKey.TRANSACTIONS),
         [get_raw_transaction()],
     )
Exemple #12
0
    def test_unmerge_insert(self):
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["primary_hash"] = "a" * 32
        write_unprocessed_events(self.storage, [self.event])

        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 1
        }]

        timestamp = datetime.now(tz=pytz.utc)

        project_id = self.project_id

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    "end_unmerge",
                    {
                        "project_id": project_id,
                        "previous_group_id": 1,
                        "new_group_id": 2,
                        "hashes": ["a" * 32],
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 2
        }]
Exemple #13
0
    def setup_method(self) -> None:
        self.project_id = 1
        self.platforms = ["a", "b"]
        self.minutes = 20
        self.dataset = get_dataset("events")
        self.entity_key = ENTITY_NAME_LOOKUP[self.dataset.get_default_entity()]

        self.base_time = datetime.utcnow().replace(
            minute=0, second=0,
            microsecond=0) - timedelta(minutes=self.minutes)

        events_storage = get_writable_storage(StorageKey.ERRORS)

        write_unprocessed_events(
            events_storage,
            [
                InsertEvent({
                    "event_id":
                    uuid.uuid4().hex,
                    "group_id":
                    tick,
                    "primary_hash":
                    uuid.uuid4().hex,
                    "project_id":
                    self.project_id,
                    "message":
                    "a message",
                    "platform":
                    self.platforms[tick % len(self.platforms)],
                    "datetime":
                    (self.base_time + timedelta(minutes=tick)).strftime(
                        settings.PAYLOAD_DATETIME_FORMAT),
                    "data": {
                        "received":
                        calendar.timegm((self.base_time +
                                         timedelta(minutes=tick)).timetuple()),
                    },
                    "organization_id":
                    1,
                    "retention_days":
                    settings.DEFAULT_RETENTION_DAYS,
                }) for tick in range(self.minutes)
            ],
        )
Exemple #14
0
    def setup_method(self, test_method):
        super().setup_method(test_method)
        self.app.post = partial(self.app.post, headers={"referer": "test"})
        self.event = get_raw_event()
        self.project_id = self.event["project_id"]
        self.base_time = datetime.utcnow().replace(
            second=0, microsecond=0, tzinfo=pytz.utc
        ) - timedelta(minutes=90)
        self.next_time = self.base_time + timedelta(minutes=95)

        self.events_storage = get_entity(EntityKey.EVENTS).get_writable_storage()
        write_unprocessed_events(self.events_storage, [self.event])

        groups = [
            {
                "offset": 0,
                "project_id": self.project_id,
                "id": self.event["group_id"],
                "record_deleted": 0,
                "status": 0,
            }
        ]

        groups_storage = get_entity(EntityKey.GROUPEDMESSAGES).get_writable_storage()
        groups_storage.get_table_writer().get_batch_writer(
            metrics=DummyMetricsBackend(strict=True)
        ).write([json.dumps(group).encode("utf-8") for group in groups])

        assignees = [
            {
                "offset": 0,
                "project_id": self.project_id,
                "group_id": self.event["group_id"],
                "record_deleted": 0,
                "user_id": 100,
            }
        ]

        assignees_storage = get_entity(EntityKey.GROUPASSIGNEE).get_writable_storage()
        assignees_storage.get_table_writer().get_batch_writer(
            metrics=DummyMetricsBackend(strict=True)
        ).write([json.dumps(assignee).encode("utf-8") for assignee in assignees])
Exemple #15
0
    def test_delete_groups_insert(self) -> None:
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        write_unprocessed_events(self.storage, [self.event])

        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 1
        }]

        timestamp = datetime.utcnow()

        project_id = self.project_id

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.END_DELETE_GROUPS,
                    {
                        "project_id": project_id,
                        "group_ids": [1],
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert self._issue_count(self.project_id) == []

        # Count is still zero after Redis flushed and parts merged
        self._clear_redis_and_force_merge()
        assert self._issue_count(self.project_id) == []
Exemple #16
0
    def test_merge_insert(self) -> None:
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        write_unprocessed_events(self.storage, [self.event])

        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 1
        }]

        timestamp = datetime.utcnow()

        project_id = self.project_id

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.END_MERGE,
                    {
                        "project_id": project_id,
                        "new_group_id": 2,
                        "previous_group_ids": [1],
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert self._issue_count(1) == [{"count": 1, "group_id": 2}]
def test_backfill_errors() -> None:
    errors_storage = get_writable_storage(StorageKey.ERRORS)
    clickhouse = errors_storage.get_cluster().get_query_connection(
        ClickhouseClientSettings.QUERY)
    errors_table_name = errors_storage.get_table_writer().get_schema(
    ).get_table_name()

    def get_errors_count() -> int:
        return clickhouse.execute(
            f"SELECT count() from {errors_table_name}")[0][0]

    raw_events = []
    for i in range(10):
        event = get_raw_event()
        raw_events.append(event)

    events_storage = get_writable_storage(StorageKey.EVENTS)

    write_unprocessed_events(events_storage, raw_events)

    assert get_errors_count() == 0

    backfill_errors()

    assert get_errors_count() == 10

    assert clickhouse.execute(
        f"SELECT contexts.key, contexts.value from {errors_table_name} LIMIT 1;"
    )[0] == (
        (
            "device.model_id",
            "geo.city",
            "geo.country_code",
            "geo.region",
            "os.kernel_version",
        ),
        ("Galaxy", "San Francisco", "US", "CA", "1.1.1"),
    )
Exemple #18
0
    def test_reset_consumer_group_offset_check(self) -> None:
        set_config("skip_seen_offsets", True)
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["primary_hash"] = "a" * 32
        write_unprocessed_events(self.storage, [self.event])

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.END_UNMERGE,
                    {
                        "project_id":
                        self.project_id,
                        "previous_group_id":
                        1,
                        "new_group_id":
                        2,
                        "hashes": ["a" * 32],
                        "datetime":
                        datetime.utcnow().strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        self.replacer.flush_batch([self.replacer.process_message(message)])

        set_config(replacer.RESET_CHECK_CONFIG, f"[{CONSUMER_GROUP}]")

        # Offset to check against should be reset so this message shouldn't be skipped
        assert self.replacer.process_message(message) is not None
Exemple #19
0
    def test_process_offset_twice(self) -> None:
        set_config("skip_seen_offsets", True)
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["primary_hash"] = "a" * 32
        write_unprocessed_events(self.storage, [self.event])

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    ReplacementType.END_UNMERGE,
                    {
                        "project_id":
                        self.project_id,
                        "previous_group_id":
                        1,
                        "new_group_id":
                        2,
                        "hashes": ["a" * 32],
                        "datetime":
                        datetime.utcnow().strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        # should be None since the offset should be in Redis, indicating it should be skipped
        assert self.replacer.process_message(message) is None
Exemple #20
0
def test_transform_column_names() -> None:
    """
    Runs a simple query containing selected expressions names that
    do not match the aliases of the expressions themselves.
    It verifies that the names of the columns in the result correspond
    to the SelectedExpression names and not to the expression aliases
    (which are supposed to be internal).
    """
    events_storage = get_entity(EntityKey.EVENTS).get_writable_storage()

    event_id = uuid.uuid4().hex

    event_date = datetime.utcnow()
    write_unprocessed_events(
        events_storage,
        [
            InsertEvent(
                {
                    "event_id": event_id,
                    "group_id": 10,
                    "primary_hash": uuid.uuid4().hex,
                    "project_id": 1,
                    "message": "a message",
                    "platform": "python",
                    "datetime": event_date.strftime(settings.PAYLOAD_DATETIME_FORMAT),
                    "data": {"received": time.time()},
                    "organization_id": 1,
                    "retention_days": settings.DEFAULT_RETENTION_DAYS,
                }
            )
        ],
    )

    query = Query(
        Entity(EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()),
        selected_columns=[
            # The selected expression names are those provided by the
            # user in the query and those the user expect in the response.
            # Aliases will be internal names to prevent shadowing.
            SelectedExpression("event_id", Column("_snuba_event_id", None, "event_id")),
            SelectedExpression(
                "message",
                FunctionCall(
                    "_snuba_message",
                    "ifNull",
                    (Column(None, None, "message"), Literal(None, "")),
                ),
            ),
        ],
    )
    query_settings = HTTPRequestSettings()
    apply_query_extensions(
        query,
        {
            "timeseries": {
                "from_date": (event_date - timedelta(minutes=5)).strftime(
                    settings.PAYLOAD_DATETIME_FORMAT
                ),
                "to_date": (event_date + timedelta(minutes=1)).strftime(
                    settings.PAYLOAD_DATETIME_FORMAT
                ),
                "granularity": 3600,
            },
            "project": {"project": [1]},
        },
        query_settings,
    )

    dataset = get_dataset("events")
    timer = Timer("test")

    result = parse_and_run_query(
        dataset,
        Request(
            id="asd", body={}, query=query, settings=query_settings, referrer="asd",
        ),
        timer,
    )

    data = result.result["data"]
    assert data == [{"event_id": event_id, "message": "a message"}]
    meta = result.result["meta"]

    assert meta == [
        MetaColumn(name="event_id", type="String"),
        MetaColumn(name="message", type="String"),
    ]
Exemple #21
0
    def test_reprocessing_flow_insert(self) -> None:
        # We have a group that contains two events, 1 and 2.
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["event_id"] = event_id = "00e24a150d7f4ee4b142b61b4d893b6d"
        write_unprocessed_events(self.storage, [self.event])
        self.event["event_id"] = event_id2 = "00e24a150d7f4ee4b142b61b4d893b6e"
        write_unprocessed_events(self.storage, [self.event])

        assert self._issue_count(self.project_id) == [{
            "count": 2,
            "group_id": 1
        }]

        project_id = self.project_id

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    "tombstone_events",
                    {
                        "project_id": project_id,
                        "event_ids": [event_id]
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        # The user chooses to reprocess a subset of the group and throw away
        # the other events. Event 1 gets manually tombstoned by Sentry while
        # Event 2 prevails.
        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        # At this point the count doesn't make any sense but we don't care.
        assert self._issue_count(self.project_id) == [{
            "count": 2,
            "group_id": 1
        }]

        # The reprocessed event is inserted with a guaranteed-new group ID but
        # the *same* event ID (this is why we need to skip tombstoning this
        # event ID)
        self.event["group_id"] = 2
        write_unprocessed_events(self.storage, [self.event])

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    "exclude_groups",
                    {
                        "project_id": project_id,
                        "group_ids": [1]
                    },
                )).encode("utf-8"),
                [],
            ),
            datetime.now(),
        )

        # Group 1 is excluded from queries. At this point we have almost a
        # regular group deletion, except only a subset of events have been
        # tombstoned (the ones that will *not* be reprocessed).
        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        # Group 2 should contain the one event that the user chose to
        # reprocess, and Group 1 should be gone. (Note: In the product Group 2
        # looks identical to Group 1, including short ID).
        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 2
        }]
        assert self._get_group_id(project_id, event_id2) == 2
        assert not self._get_group_id(project_id, event_id)