def test(self): def to_monday(d): return d - timedelta(days=d.weekday()) base = datetime(1999, 12, 26) # a sunday parts = cleanup.get_active_partitions(self.clickhouse, self.database, self.table) assert parts == [] # base, 90 retention self.write_processed_records(self.create_event_for_date(base)) parts = cleanup.get_active_partitions(self.clickhouse, self.database, self.table) assert parts == [(to_monday(base), 90)] stale = cleanup.filter_stale_partitions(parts, as_of=base) assert stale == [] # -40 days, 90 retention three_weeks_ago = base - timedelta(days=7 * 3) self.write_processed_records(self.create_event_for_date(three_weeks_ago)) parts = cleanup.get_active_partitions(self.clickhouse, self.database, self.table) assert parts == [(to_monday(three_weeks_ago), 90), (to_monday(base), 90)] stale = cleanup.filter_stale_partitions(parts, as_of=base) assert stale == [] # -100 days, 90 retention thirteen_weeks_ago = base - timedelta(days=7 * 13) self.write_processed_records(self.create_event_for_date(thirteen_weeks_ago)) parts = cleanup.get_active_partitions(self.clickhouse, self.database, self.table) assert parts == [ (to_monday(thirteen_weeks_ago), 90), (to_monday(three_weeks_ago), 90), (to_monday(base), 90) ] stale = cleanup.filter_stale_partitions(parts, as_of=base) assert stale == [(to_monday(thirteen_weeks_ago), 90)] # -1 week, 30 retention one_week_ago = base - timedelta(days=7) self.write_processed_records(self.create_event_for_date(one_week_ago, 30)) parts = cleanup.get_active_partitions(self.clickhouse, self.database, self.table) assert parts == [ (to_monday(thirteen_weeks_ago), 90), (to_monday(three_weeks_ago), 90), (to_monday(one_week_ago), 30), (to_monday(base), 90) ] stale = cleanup.filter_stale_partitions(parts, as_of=base) assert stale == [(to_monday(thirteen_weeks_ago), 90)] # -5 weeks, 30 retention five_weeks_ago = base - timedelta(days=7 * 5) self.write_processed_records(self.create_event_for_date(five_weeks_ago, 30)) parts = cleanup.get_active_partitions(self.clickhouse, self.database, self.table) assert parts == [ (to_monday(thirteen_weeks_ago), 90), (to_monday(five_weeks_ago), 30), (to_monday(three_weeks_ago), 90), (to_monday(one_week_ago), 30), (to_monday(base), 90) ] stale = cleanup.filter_stale_partitions(parts, as_of=base) assert stale == [ (to_monday(thirteen_weeks_ago), 90), (to_monday(five_weeks_ago), 30) ] cleanup.drop_partitions(self.clickhouse, self.database, self.table, stale, dry_run=False) parts = cleanup.get_active_partitions(self.clickhouse, self.database, self.table) assert parts == [ (to_monday(three_weeks_ago), 90), (to_monday(one_week_ago), 30), (to_monday(base), 90) ]
def test(self) -> None: def to_monday(d: datetime) -> datetime: return d - timedelta(days=d.weekday()) base = datetime(1999, 12, 26) # a sunday storage = get_writable_storage(StorageKey.EVENTS) clickhouse = storage.get_cluster().get_query_connection( ClickhouseClientSettings.CLEANUP) table = storage.get_table_writer().get_schema().get_table_name() database = storage.get_cluster().get_database() parts = cleanup.get_active_partitions(clickhouse, database, table) assert parts == [] # base, 90 retention self.write_processed_messages([self.create_event_row_for_date(base)]) parts = cleanup.get_active_partitions(clickhouse, database, table) assert parts == [(to_monday(base), 90)] stale = cleanup.filter_stale_partitions(parts, as_of=base) assert stale == [] # -40 days, 90 retention three_weeks_ago = base - timedelta(days=7 * 3) self.write_processed_messages( [self.create_event_row_for_date(three_weeks_ago)]) parts = cleanup.get_active_partitions(clickhouse, database, table) assert parts == [(to_monday(three_weeks_ago), 90), (to_monday(base), 90)] stale = cleanup.filter_stale_partitions(parts, as_of=base) assert stale == [] # -100 days, 90 retention thirteen_weeks_ago = base - timedelta(days=7 * 13) self.write_processed_messages( [self.create_event_row_for_date(thirteen_weeks_ago)]) parts = cleanup.get_active_partitions(clickhouse, database, table) assert parts == [ (to_monday(thirteen_weeks_ago), 90), (to_monday(three_weeks_ago), 90), (to_monday(base), 90), ] stale = cleanup.filter_stale_partitions(parts, as_of=base) assert stale == [(to_monday(thirteen_weeks_ago), 90)] # -1 week, 30 retention one_week_ago = base - timedelta(days=7) self.write_processed_messages( [self.create_event_row_for_date(one_week_ago, 30)]) parts = cleanup.get_active_partitions(clickhouse, database, table) assert parts == [ (to_monday(thirteen_weeks_ago), 90), (to_monday(three_weeks_ago), 90), (to_monday(one_week_ago), 30), (to_monday(base), 90), ] stale = cleanup.filter_stale_partitions(parts, as_of=base) assert stale == [(to_monday(thirteen_weeks_ago), 90)] # -5 weeks, 30 retention five_weeks_ago = base - timedelta(days=7 * 5) self.write_processed_messages( [self.create_event_row_for_date(five_weeks_ago, 30)]) parts = cleanup.get_active_partitions(clickhouse, database, table) assert parts == [ (to_monday(thirteen_weeks_ago), 90), (to_monday(five_weeks_ago), 30), (to_monday(three_weeks_ago), 90), (to_monday(one_week_ago), 30), (to_monday(base), 90), ] stale = cleanup.filter_stale_partitions(parts, as_of=base) assert stale == [ (to_monday(thirteen_weeks_ago), 90), (to_monday(five_weeks_ago), 30), ] cleanup.drop_partitions(clickhouse, database, table, stale, dry_run=False) parts = cleanup.get_active_partitions(clickhouse, database, table) assert parts == [ (to_monday(three_weeks_ago), 90), (to_monday(one_week_ago), 30), (to_monday(base), 90), ]
def test( self, storage_key: StorageKey, create_event_row_for_date: Callable[[datetime, Optional[int]], InsertBatch], ) -> None: def to_monday(d: datetime) -> datetime: return d - timedelta(days=d.weekday()) base = datetime(1999, 12, 26) # a sunday storage = get_writable_storage(storage_key) clickhouse = storage.get_cluster().get_query_connection( ClickhouseClientSettings.CLEANUP) table = storage.get_table_writer().get_schema().get_table_name() database = storage.get_cluster().get_database() parts = cleanup.get_active_partitions(clickhouse, storage, database, table) assert parts == [] # base, 90 retention write_processed_messages(storage, [create_event_row_for_date(base, None)]) parts = cleanup.get_active_partitions(clickhouse, storage, database, table) assert [(p.date, p.retention_days) for p in parts] == [(to_monday(base), 90)] stale = cleanup.filter_stale_partitions(parts, as_of=base) assert stale == [] # -40 days, 90 retention three_weeks_ago = base - timedelta(days=7 * 3) write_processed_messages( storage, [create_event_row_for_date(three_weeks_ago, None)]) parts = cleanup.get_active_partitions(clickhouse, storage, database, table) assert [(p.date, p.retention_days) for p in parts] == [ (to_monday(three_weeks_ago), 90), (to_monday(base), 90), ] stale = cleanup.filter_stale_partitions(parts, as_of=base) assert stale == [] # -100 days, 90 retention thirteen_weeks_ago = base - timedelta(days=7 * 13) write_processed_messages( storage, [create_event_row_for_date(thirteen_weeks_ago, None)]) parts = cleanup.get_active_partitions(clickhouse, storage, database, table) assert [(p.date, p.retention_days) for p in parts] == [ (to_monday(thirteen_weeks_ago), 90), (to_monday(three_weeks_ago), 90), (to_monday(base), 90), ] stale = cleanup.filter_stale_partitions(parts, as_of=base) assert [(p.date, p.retention_days) for p in stale] == [(to_monday(thirteen_weeks_ago), 90)] # -1 week, 30 retention one_week_ago = base - timedelta(days=7) write_processed_messages(storage, [create_event_row_for_date(one_week_ago, 30)]) parts = cleanup.get_active_partitions(clickhouse, storage, database, table) assert {(p.date, p.retention_days) for p in parts} == set([ (to_monday(thirteen_weeks_ago), 90), (to_monday(three_weeks_ago), 90), (to_monday(one_week_ago), 30), (to_monday(base), 90), ]) stale = cleanup.filter_stale_partitions(parts, as_of=base) assert [(p.date, p.retention_days) for p in stale] == [(to_monday(thirteen_weeks_ago), 90)] # -5 weeks, 30 retention five_weeks_ago = base - timedelta(days=7 * 5) write_processed_messages( storage, [create_event_row_for_date(five_weeks_ago, 30)]) parts = cleanup.get_active_partitions(clickhouse, storage, database, table) assert {(p.date, p.retention_days) for p in parts} == set([ (to_monday(thirteen_weeks_ago), 90), (to_monday(five_weeks_ago), 30), (to_monday(three_weeks_ago), 90), (to_monday(one_week_ago), 30), (to_monday(base), 90), ]) stale = cleanup.filter_stale_partitions(parts, as_of=base) assert {(p.date, p.retention_days) for p in stale} == set([(to_monday(thirteen_weeks_ago), 90), (to_monday(five_weeks_ago), 30)]) cleanup.drop_partitions(clickhouse, database, table, stale, dry_run=False) parts = cleanup.get_active_partitions(clickhouse, storage, database, table) assert {(p.date, p.retention_days) for p in parts} == set([ (to_monday(three_weeks_ago), 90), (to_monday(one_week_ago), 30), (to_monday(base), 90), ])