Beispiel #1
0
 def test_bulk_load(self) -> None:
     row = GroupedMessageRow.from_bulk(
         {
             "project_id": "2",
             "id": "10",
             "status": "0",
             "last_seen": "2019-06-28 17:57:32+00",
             "first_seen": "2019-06-28 06:40:17+00",
             "active_at": "2019-06-28 06:40:17+00",
             "first_release_id": "26",
         }
     )
     write_processed_messages(self.storage, [InsertBatch([row.to_clickhouse()])])
     ret = (
         get_cluster(StorageSetKey.EVENTS)
         .get_query_connection(ClickhouseClientSettings.QUERY)
         .execute("SELECT * FROM groupedmessage_local;")
     )
     assert ret[0] == (
         0,  # offset
         0,  # deleted
         2,  # project_id
         10,  # id
         0,  # status
         datetime(2019, 6, 28, 17, 57, 32),
         datetime(2019, 6, 28, 6, 40, 17),
         datetime(2019, 6, 28, 6, 40, 17),
         26,
     )
Beispiel #2
0
 def test_bulk_load(self) -> None:
     row = GroupAssigneeRow.from_bulk(
         {
             "project_id": "2",
             "group_id": "1359",
             "date_added": "2019-09-19 00:17:55+00",
             "user_id": "1",
             "team_id": "",
         }
     )
     write_processed_messages(
         self.storage, [InsertBatch([row.to_clickhouse()], None)]
     )
     ret = (
         self.storage.get_cluster()
         .get_query_connection(ClickhouseClientSettings.QUERY)
         .execute("SELECT * FROM groupassignee_local;")
         .results
     )
     assert ret[0] == (
         0,  # offset
         0,  # deleted
         2,  # project_id
         1359,  # group_id
         datetime(2019, 9, 19, 0, 17, 55),
         1,  # user_id
         None,  # team_id
     )
Beispiel #3
0
    def test_messages(self) -> None:
        processor = GroupedMessageProcessor("sentry_groupedmessage")

        metadata = KafkaMessageMetadata(
            offset=42, partition=0, timestamp=datetime(1970, 1, 1)
        )

        ret = processor.process_message(self.INSERT_MSG, metadata)
        assert ret == InsertBatch([self.PROCESSED])
        write_processed_messages(self.storage, [ret])
        ret = (
            get_cluster(StorageSetKey.EVENTS)
            .get_query_connection(ClickhouseClientSettings.INSERT)
            .execute("SELECT * FROM groupedmessage_local;")
        )
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            74,  # id
            0,  # status
            datetime(2019, 6, 19, 6, 46, 28),
            datetime(2019, 6, 19, 6, 45, 32),
            datetime(2019, 6, 19, 6, 45, 32),
            None,
        )

        ret = processor.process_message(self.UPDATE_MSG, metadata)
        assert ret == InsertBatch([self.PROCESSED])

        ret = processor.process_message(self.DELETE_MSG, metadata)
        assert ret == InsertBatch([self.DELETED])
Beispiel #4
0
    def generate_outcomes(
        self,
        org_id: int,
        project_id: int,
        num_outcomes: int,
        outcome: int,
        time_since_base: timedelta,
    ) -> None:
        outcomes = []
        for _ in range(num_outcomes):
            processed = (self.storage.get_table_writer().get_stream_loader(
            ).get_processor().process_message(
                {
                    "project_id":
                    project_id,
                    "event_id":
                    uuid.uuid4().hex,
                    "timestamp":
                    (self.base_time +
                     time_since_base).strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
                    "org_id":
                    org_id,
                    "reason":
                    None,
                    "key_id":
                    1,
                    "outcome":
                    outcome,
                },
                KafkaMessageMetadata(0, 0, self.base_time),
            ))
            if processed:
                outcomes.append(processed)

        write_processed_messages(self.storage, outcomes)
Beispiel #5
0
    def test_messages(self) -> None:
        processor = GroupAssigneeProcessor("sentry_groupasignee")

        metadata = KafkaMessageMetadata(offset=42,
                                        partition=0,
                                        timestamp=datetime(1970, 1, 1))

        ret = processor.process_message(self.INSERT_MSG, metadata)
        assert ret == InsertBatch([self.PROCESSED])
        write_processed_messages(self.storage, [ret])
        ret = (self.storage.get_cluster().get_query_connection(
            ClickhouseClientSettings.QUERY).execute(
                "SELECT * FROM groupassignee_local;"))
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            1359,  # group_id
            datetime(2019, 9, 19, 0, 17, 55),
            1,  # user_id
            None,  # team_id
        )

        ret = processor.process_message(self.UPDATE_MSG_NO_KEY_CHANGE,
                                        metadata)
        assert ret == InsertBatch([self.PROCESSED])

        # Tests an update with key change which becomes a two inserts:
        # one deletion and the insertion of the new row.
        ret = processor.process_message(self.UPDATE_MSG_WITH_KEY_CHANGE,
                                        metadata)
        assert ret == InsertBatch([self.DELETED, self.PROCESSED_UPDATE])

        ret = processor.process_message(self.DELETE_MSG, metadata)
        assert ret == InsertBatch([self.DELETED])
Beispiel #6
0
    def generate_uniform_distributions(self) -> None:
        events = []
        processor = self.storage.get_table_writer().get_stream_loader(
        ).get_processor()
        value_array = list(range(self.d_range_min, self.d_range_max))

        for n in range(self.seconds):
            for p in self.project_ids:
                msg = {
                    "org_id": self.org_id,
                    "project_id": p,
                    "type": METRICS_DISTRIBUTIONS_TYPE,
                    "value": value_array,
                    "timestamp": self.base_time.timestamp() + n,
                    "tags": self.default_tags,
                    "metric_id": self.metric_id,
                    "retention_days": RETENTION_DAYS,
                }

                processed = processor.process_message(
                    msg,
                    KafkaMessageMetadata(0, 0, self.base_time),
                )
                if processed:
                    events.append(processed)
        write_processed_messages(self.storage, events)
Beispiel #7
0
    def generate_sets(self) -> None:
        events = []
        processor = self.storage.get_table_writer().get_stream_loader(
        ).get_processor()

        for n in range(self.seconds):
            for p in self.project_ids:
                msg = {
                    "org_id": self.org_id,
                    "project_id": p,
                    "type": METRICS_SET_TYPE,
                    "value": [n % self.unique_set_values],
                    "timestamp": self.base_time.timestamp() + n,
                    "tags": self.default_tags,
                    "metric_id": self.metric_id,
                    "retention_days": RETENTION_DAYS,
                }

                processed = processor.process_message(
                    msg,
                    KafkaMessageMetadata(0, 0, self.base_time),
                )
                if processed:
                    events.append(processed)
        write_processed_messages(self.storage, events)
Beispiel #8
0
 def generate_session_events(self):
     processor = self.storage.get_table_writer().get_stream_loader(
     ).get_processor()
     meta = KafkaMessageMetadata(offset=1,
                                 partition=2,
                                 timestamp=datetime(1970, 1, 1))
     template = {
         "session_id": "00000000-0000-0000-0000-000000000000",
         "distinct_id": "b3ef3211-58a4-4b36-a9a1-5a55df0d9aaf",
         "duration": None,
         "environment": "production",
         "org_id": 1,
         "project_id": 2,
         "release": "[email protected]",
         "retention_days": settings.DEFAULT_RETENTION_DAYS,
         "seq": 0,
         "errors": 0,
         "received": datetime.utcnow().timestamp(),
         "started": self.started.timestamp(),
     }
     events = [
         processor.process_message(
             {
                 **template,
                 "status": "exited",
                 "duration": 1947.49,
                 "session_id": "8333339f-5675-4f89-a9a0-1c935255ab58",
             },
             meta,
         ),
         processor.process_message(
             {
                 **template, "status": "exited",
                 "quantity": 5
             },
             meta,
         ),
         processor.process_message(
             {
                 **template, "status": "errored",
                 "errors": 1,
                 "quantity": 2
             },
             meta,
         ),
         processor.process_message(
             {
                 **template,
                 "distinct_id": "b3ef3211-58a4-4b36-a9a1-5a55df0d9aaf",
                 "status": "errored",
                 "errors": 1,
                 "quantity": 2,
             },
             meta,
         ),
     ]
     write_processed_messages(self.storage, events)
 def generate_session_events(self, org_id, project_id: int) -> None:
     processor = self.storage.get_table_writer().get_stream_loader().get_processor()
     meta = KafkaMessageMetadata(
         offset=1, partition=2, timestamp=datetime(1970, 1, 1)
     )
     distinct_id = uuid4().hex
     template = {
         "session_id": uuid4().hex,
         "distinct_id": distinct_id,
         "duration": None,
         "environment": "production",
         "org_id": org_id,
         "project_id": project_id,
         "release": "[email protected]",
         "retention_days": settings.DEFAULT_RETENTION_DAYS,
         "seq": 0,
         "errors": 0,
         "received": datetime.utcnow().timestamp(),
         "started": self.started.timestamp(),
     }
     events = [
         processor.process_message(
             {
                 **template,
                 "status": "exited",
                 "duration": 1947.49,
                 "session_id": uuid4().hex,
                 "started": (self.started + timedelta(minutes=13)).timestamp(),
             },
             meta,
         ),
         processor.process_message(
             {**template, "status": "exited", "quantity": 5}, meta,
         ),
         processor.process_message(
             {**template, "status": "errored", "errors": 1, "quantity": 2}, meta,
         ),
         processor.process_message(
             {
                 **template,
                 "distinct_id": distinct_id,
                 "status": "errored",
                 "errors": 1,
                 "quantity": 2,
                 "started": (self.started + timedelta(minutes=24)).timestamp(),
             },
             meta,
         ),
     ]
     filtered = [e for e in events if e]
     write_processed_messages(self.storage, filtered)
Beispiel #10
0
    def generate_outcomes(
        self,
        org_id: int,
        project_id: int,
        num_outcomes: int,
        outcome: int,
        time_since_base: timedelta,
        category: Optional[int],
        quantity: Optional[int] = None,
    ) -> None:
        outcomes = []
        for _ in range(num_outcomes):
            message = {
                "project_id":
                project_id,
                "event_id":
                uuid.uuid4().hex,
                "timestamp":
                (self.base_time +
                 time_since_base).strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
                "org_id":
                org_id,
                "reason":
                None,
                "key_id":
                1,
                "outcome":
                outcome,
                "category":
                category,
                "quantity":
                quantity,
            }
            if message["category"] is None:
                del message["category"]  # for testing None category case
            if message["quantity"] is None:
                del message["quantity"]  # for testing None quantity case
            processed = (self.storage.get_table_writer().get_stream_loader().
                         get_processor().process_message(
                             message,
                             KafkaMessageMetadata(0, 0, self.base_time),
                         ))
            if processed:
                outcomes.append(processed)

        write_processed_messages(self.storage, outcomes)
Beispiel #11
0
    def test_midnight_error_case(
        self,
        current_time: MagicMock,
        storage_key: StorageKey,
        create_event_row_for_date: Callable[[datetime, Optional[int]],
                                            InsertBatch],
    ) -> None:
        """
        This test is simulating a failure case that happened in production, where when the script ran,
        it attempted to delete a part whose last day was within the retention period. The script was
        using datetimes not aligned to midnight, and so it was removing a part on the same day, but
        technically outside of the window because of the script's extra time.

        """
        def to_monday(d: datetime) -> datetime:
            rounded = d - timedelta(days=d.weekday())
            return datetime(rounded.year, rounded.month, rounded.day)

        storage = get_writable_storage(storage_key)
        clickhouse = storage.get_cluster().get_query_connection(
            ClickhouseClientSettings.CLEANUP)

        table = storage.get_table_writer().get_schema().get_local_table_name()
        database = storage.get_cluster().get_database()

        parts = cleanup.get_active_partitions(clickhouse, storage, database,
                                              table)
        assert parts == []

        # Pick a time a few minutes after midnight
        base = datetime(2022, 1, 29, 0, 4, 37)
        current_time.return_value = base

        # Insert an event that is outside retention, but its last day is just inside retention
        # Note that without rounding the base time to midnight, base - retention > last_day(timestamp)
        timestamp = datetime(2021, 10, 25)
        write_processed_messages(storage,
                                 [create_event_row_for_date(timestamp, 90)])
        parts = cleanup.get_active_partitions(clickhouse, storage, database,
                                              table)

        assert [(p.date, p.retention_days)
                for p in parts] == [(to_monday(timestamp), 90)]
        stale = cleanup.filter_stale_partitions(parts)
        assert stale == []
Beispiel #12
0
 def generate_counters(self) -> None:
     events = []
     for n in range(self.seconds):
         for p in self.project_ids:
             processed = (self.storage.get_table_writer().get_stream_loader(
             ).get_processor().process_message(
                 ({
                     "org_id": self.org_id,
                     "project_id": p,
                     "unit": "ms",
                     "type": METRICS_COUNTERS_TYPE,
                     "value": 1.0,
                     "timestamp": self.base_time.timestamp() + n,
                     "tags": self.default_tags,
                     "metric_id": self.metric_id,
                     "retention_days": RETENTION_DAYS,
                 }),
                 KafkaMessageMetadata(0, 0, self.base_time),
             ))
             if processed:
                 events.append(processed)
     write_processed_messages(self.storage, events)
Beispiel #13
0
    def test_optimize(
        self,
        storage_key: StorageKey,
        create_event_row_for_date: Callable[[datetime], InsertBatch],
    ) -> None:
        storage = get_writable_storage(storage_key)
        cluster = storage.get_cluster()
        clickhouse = cluster.get_query_connection(ClickhouseClientSettings.OPTIMIZE)
        table = storage.get_table_writer().get_schema().get_local_table_name()
        database = cluster.get_database()

        # no data, 0 partitions to optimize
        parts = optimize.get_partitions_to_optimize(
            clickhouse, storage, database, table
        )
        assert parts == []

        base = datetime(1999, 12, 26)  # a sunday
        base_monday = base - timedelta(days=base.weekday())

        # 1 event, 0 unoptimized parts
        write_processed_messages(storage, [create_event_row_for_date(base)])
        parts = optimize.get_partitions_to_optimize(
            clickhouse, storage, database, table
        )
        assert parts == []

        # 2 events in the same part, 1 unoptimized part
        write_processed_messages(storage, [create_event_row_for_date(base)])
        parts = optimize.get_partitions_to_optimize(
            clickhouse, storage, database, table
        )
        assert [(p.date, p.retention_days) for p in parts] == [(base_monday, 90)]

        # 3 events in the same part, 1 unoptimized part
        write_processed_messages(storage, [create_event_row_for_date(base)])
        parts = optimize.get_partitions_to_optimize(
            clickhouse, storage, database, table
        )
        assert [(p.date, p.retention_days) for p in parts] == [(base_monday, 90)]

        # 3 events in one part, 2 in another, 2 unoptimized parts
        a_month_earlier = base_monday - timedelta(days=31)
        a_month_earlier_monday = a_month_earlier - timedelta(
            days=a_month_earlier.weekday()
        )
        write_processed_messages(
            storage, [create_event_row_for_date(a_month_earlier_monday)]
        )
        write_processed_messages(
            storage, [create_event_row_for_date(a_month_earlier_monday)]
        )
        parts = optimize.get_partitions_to_optimize(
            clickhouse, storage, database, table
        )
        assert [(p.date, p.retention_days) for p in parts] == [
            (base_monday, 90),
            (a_month_earlier_monday, 90),
        ]

        # respects before (base is properly excluded)
        assert [
            (p.date, p.retention_days)
            for p in list(
                optimize.get_partitions_to_optimize(
                    clickhouse, storage, database, table, before=base
                )
            )
        ] == [(a_month_earlier_monday, 90)]

        optimize.optimize_partitions(clickhouse, database, table, parts)

        # all parts should be optimized
        parts = optimize.get_partitions_to_optimize(
            clickhouse, storage, database, table
        )
        assert parts == []
Beispiel #14
0
    def test(
        self,
        storage_key: StorageKey,
        create_event_row_for_date: Callable[[datetime, Optional[int]],
                                            InsertBatch],
    ) -> None:
        def to_monday(d: datetime) -> datetime:
            return d - timedelta(days=d.weekday())

        base = datetime(1999, 12, 26)  # a sunday

        storage = get_writable_storage(storage_key)
        clickhouse = storage.get_cluster().get_query_connection(
            ClickhouseClientSettings.CLEANUP)

        table = storage.get_table_writer().get_schema().get_table_name()
        database = storage.get_cluster().get_database()

        parts = cleanup.get_active_partitions(clickhouse, storage, database,
                                              table)
        assert parts == []

        # base, 90 retention
        write_processed_messages(storage,
                                 [create_event_row_for_date(base, None)])
        parts = cleanup.get_active_partitions(clickhouse, storage, database,
                                              table)

        assert [(p.date, p.retention_days)
                for p in parts] == [(to_monday(base), 90)]
        stale = cleanup.filter_stale_partitions(parts, as_of=base)
        assert stale == []

        # -40 days, 90 retention
        three_weeks_ago = base - timedelta(days=7 * 3)
        write_processed_messages(
            storage, [create_event_row_for_date(three_weeks_ago, None)])
        parts = cleanup.get_active_partitions(clickhouse, storage, database,
                                              table)
        assert [(p.date, p.retention_days) for p in parts] == [
            (to_monday(three_weeks_ago), 90),
            (to_monday(base), 90),
        ]
        stale = cleanup.filter_stale_partitions(parts, as_of=base)
        assert stale == []

        # -100 days, 90 retention
        thirteen_weeks_ago = base - timedelta(days=7 * 13)
        write_processed_messages(
            storage, [create_event_row_for_date(thirteen_weeks_ago, None)])
        parts = cleanup.get_active_partitions(clickhouse, storage, database,
                                              table)
        assert [(p.date, p.retention_days) for p in parts] == [
            (to_monday(thirteen_weeks_ago), 90),
            (to_monday(three_weeks_ago), 90),
            (to_monday(base), 90),
        ]
        stale = cleanup.filter_stale_partitions(parts, as_of=base)
        assert [(p.date, p.retention_days)
                for p in stale] == [(to_monday(thirteen_weeks_ago), 90)]

        # -1 week, 30 retention
        one_week_ago = base - timedelta(days=7)
        write_processed_messages(storage,
                                 [create_event_row_for_date(one_week_ago, 30)])
        parts = cleanup.get_active_partitions(clickhouse, storage, database,
                                              table)

        assert {(p.date, p.retention_days)
                for p in parts} == set([
                    (to_monday(thirteen_weeks_ago), 90),
                    (to_monday(three_weeks_ago), 90),
                    (to_monday(one_week_ago), 30),
                    (to_monday(base), 90),
                ])
        stale = cleanup.filter_stale_partitions(parts, as_of=base)
        assert [(p.date, p.retention_days)
                for p in stale] == [(to_monday(thirteen_weeks_ago), 90)]

        # -5 weeks, 30 retention
        five_weeks_ago = base - timedelta(days=7 * 5)
        write_processed_messages(
            storage, [create_event_row_for_date(five_weeks_ago, 30)])
        parts = cleanup.get_active_partitions(clickhouse, storage, database,
                                              table)
        assert {(p.date, p.retention_days)
                for p in parts} == set([
                    (to_monday(thirteen_weeks_ago), 90),
                    (to_monday(five_weeks_ago), 30),
                    (to_monday(three_weeks_ago), 90),
                    (to_monday(one_week_ago), 30),
                    (to_monday(base), 90),
                ])
        stale = cleanup.filter_stale_partitions(parts, as_of=base)
        assert {(p.date, p.retention_days)
                for p in stale} == set([(to_monday(thirteen_weeks_ago), 90),
                                        (to_monday(five_weeks_ago), 30)])

        cleanup.drop_partitions(clickhouse,
                                database,
                                table,
                                stale,
                                dry_run=False)

        parts = cleanup.get_active_partitions(clickhouse, storage, database,
                                              table)
        assert {(p.date, p.retention_days)
                for p in parts} == set([
                    (to_monday(three_weeks_ago), 90),
                    (to_monday(one_week_ago), 30),
                    (to_monday(base), 90),
                ])
Beispiel #15
0
 def generate_fizzbuzz_events(self) -> None:
     """
     Generate a deterministic set of events across a time range.
     """
     events = []
     for tick in range(self.minutes):
         tock = tick + 1
         for p in self.project_ids:
             # project N sends an event every Nth minute
             if tock % p == 0:
                 trace_id = "7400045b25c443b885914600aa83ad04"
                 span_id = "8841662216cc598b"
                 processed = (
                     self.storage.get_table_writer().get_stream_loader().
                     get_processor().process_message(
                         (
                             2,
                             "insert",
                             {
                                 "project_id":
                                 p,
                                 "event_id":
                                 uuid.uuid4().hex,
                                 "deleted":
                                 0,
                                 "datetime":
                                 (self.base_time +
                                  timedelta(minutes=tick)).isoformat(),
                                 "platform":
                                 self.platforms[(tock * p) %
                                                len(self.platforms)],
                                 "retention_days":
                                 settings.DEFAULT_RETENTION_DAYS,
                                 "data": {
                                     # Project N sends every Nth (mod len(hashes)) hash (and platform)
                                     "received":
                                     calendar.timegm(
                                         (self.base_time + timedelta(
                                             minutes=tick)).timetuple()),
                                     "type":
                                     "transaction",
                                     "transaction":
                                     "/api/do_things",
                                     "start_timestamp":
                                     datetime.timestamp(
                                         (self.base_time +
                                          timedelta(minutes=tick))),
                                     "timestamp":
                                     datetime.timestamp(
                                         (self.base_time + timedelta(
                                             minutes=tick, seconds=1))),
                                     "tags": {
                                         # Sentry
                                         "environment":
                                         self.environments[(tock * p) % len(
                                             self.environments)],
                                         "sentry:release":
                                         str(tick),
                                         "sentry:dist":
                                         "dist1",
                                         # User
                                         "foo":
                                         "baz",
                                         "foo.bar":
                                         "qux",
                                         "os_name":
                                         "linux",
                                     },
                                     "user": {
                                         "email": "*****@*****.**",
                                         "ip_address": "8.8.8.8",
                                     },
                                     "contexts": {
                                         "trace": {
                                             "trace_id": trace_id,
                                             "span_id": span_id,
                                             "op": "http",
                                             "status": "0",
                                         },
                                     },
                                     "measurements": {
                                         "lcp": {
                                             "value": 32.129
                                         },
                                         "lcp.elementSize": {
                                             "value": 4242
                                         },
                                     },
                                     "breakdowns": {
                                         "span_ops": {
                                             "ops.db": {
                                                 "value": 62.512
                                             },
                                             "ops.http": {
                                                 "value": 109.774
                                             },
                                             "total.time": {
                                                 "value": 172.286
                                             },
                                         }
                                     },
                                     "spans": [{
                                         "op":
                                         "db",
                                         "trace_id":
                                         trace_id,
                                         "span_id":
                                         span_id + "1",
                                         "parent_span_id":
                                         None,
                                         "same_process_as_parent":
                                         True,
                                         "description":
                                         "SELECT * FROM users",
                                         "data": {},
                                         "timestamp":
                                         calendar.timegm(
                                             (self.base_time +
                                              timedelta(minutes=tick)
                                              ).timetuple()),
                                     }],
                                 },
                             },
                         ),
                         KafkaMessageMetadata(0, 0, self.base_time),
                     ))
                 if processed:
                     events.append(processed)
     write_processed_messages(self.storage, events)
Beispiel #16
0
    def test(self) -> None:
        def to_monday(d: datetime) -> datetime:
            return d - timedelta(days=d.weekday())

        base = datetime(1999, 12, 26)  # a sunday

        storage = get_writable_storage(StorageKey.EVENTS)
        clickhouse = storage.get_cluster().get_query_connection(
            ClickhouseClientSettings.CLEANUP)

        table = storage.get_table_writer().get_schema().get_table_name()
        database = storage.get_cluster().get_database()

        parts = cleanup.get_active_partitions(clickhouse, database, table)
        assert parts == []

        # base, 90 retention
        write_processed_messages(storage,
                                 [self.create_event_row_for_date(base)])
        parts = cleanup.get_active_partitions(clickhouse, database, table)
        assert parts == [(to_monday(base), 90)]
        stale = cleanup.filter_stale_partitions(parts, as_of=base)
        assert stale == []

        # -40 days, 90 retention
        three_weeks_ago = base - timedelta(days=7 * 3)
        write_processed_messages(
            storage, [self.create_event_row_for_date(three_weeks_ago)])
        parts = cleanup.get_active_partitions(clickhouse, database, table)
        assert parts == [(to_monday(three_weeks_ago), 90),
                         (to_monday(base), 90)]
        stale = cleanup.filter_stale_partitions(parts, as_of=base)
        assert stale == []

        # -100 days, 90 retention
        thirteen_weeks_ago = base - timedelta(days=7 * 13)
        write_processed_messages(
            storage, [self.create_event_row_for_date(thirteen_weeks_ago)])
        parts = cleanup.get_active_partitions(clickhouse, database, table)
        assert parts == [
            (to_monday(thirteen_weeks_ago), 90),
            (to_monday(three_weeks_ago), 90),
            (to_monday(base), 90),
        ]
        stale = cleanup.filter_stale_partitions(parts, as_of=base)
        assert stale == [(to_monday(thirteen_weeks_ago), 90)]

        # -1 week, 30 retention
        one_week_ago = base - timedelta(days=7)
        write_processed_messages(
            storage, [self.create_event_row_for_date(one_week_ago, 30)])
        parts = cleanup.get_active_partitions(clickhouse, database, table)
        assert parts == [
            (to_monday(thirteen_weeks_ago), 90),
            (to_monday(three_weeks_ago), 90),
            (to_monday(one_week_ago), 30),
            (to_monday(base), 90),
        ]
        stale = cleanup.filter_stale_partitions(parts, as_of=base)
        assert stale == [(to_monday(thirteen_weeks_ago), 90)]

        # -5 weeks, 30 retention
        five_weeks_ago = base - timedelta(days=7 * 5)
        write_processed_messages(
            storage, [self.create_event_row_for_date(five_weeks_ago, 30)])
        parts = cleanup.get_active_partitions(clickhouse, database, table)
        assert parts == [
            (to_monday(thirteen_weeks_ago), 90),
            (to_monday(five_weeks_ago), 30),
            (to_monday(three_weeks_ago), 90),
            (to_monday(one_week_ago), 30),
            (to_monday(base), 90),
        ]
        stale = cleanup.filter_stale_partitions(parts, as_of=base)
        assert stale == [
            (to_monday(thirteen_weeks_ago), 90),
            (to_monday(five_weeks_ago), 30),
        ]

        cleanup.drop_partitions(clickhouse,
                                database,
                                table,
                                stale,
                                dry_run=False)

        parts = cleanup.get_active_partitions(clickhouse, database, table)
        assert parts == [
            (to_monday(three_weeks_ago), 90),
            (to_monday(one_week_ago), 30),
            (to_monday(base), 90),
        ]