def test(self): # no data, 0 partitions to optimize parts = optimize.get_partitions_to_optimize( self.clickhouse, self.database, self.table ) assert parts == [] base = datetime(1999, 12, 26) # a sunday base_monday = base - timedelta(days=base.weekday()) # 1 event, 0 unoptimized parts self.write_processed_records(self.create_event_for_date(base)) parts = optimize.get_partitions_to_optimize( self.clickhouse, self.database, self.table ) assert parts == [] # 2 events in the same part, 1 unoptimized part self.write_processed_records(self.create_event_for_date(base)) parts = optimize.get_partitions_to_optimize( self.clickhouse, self.database, self.table ) assert parts == [(base_monday, 90)] # 3 events in the same part, 1 unoptimized part self.write_processed_records(self.create_event_for_date(base)) parts = optimize.get_partitions_to_optimize( self.clickhouse, self.database, self.table ) assert parts == [(base_monday, 90)] # 3 events in one part, 2 in another, 2 unoptimized parts a_month_earlier = base_monday - timedelta(days=31) a_month_earlier_monday = a_month_earlier - timedelta( days=a_month_earlier.weekday() ) self.write_processed_records(self.create_event_for_date(a_month_earlier_monday)) self.write_processed_records(self.create_event_for_date(a_month_earlier_monday)) parts = optimize.get_partitions_to_optimize( self.clickhouse, self.database, self.table ) assert parts == [(base_monday, 90), (a_month_earlier_monday, 90)] # respects before (base is properly excluded) assert list( optimize.get_partitions_to_optimize( self.clickhouse, self.database, self.table, before=base ) ) == [(a_month_earlier_monday, 90)] optimize.optimize_partitions(self.clickhouse, self.database, self.table, parts) # all parts should be optimized parts = optimize.get_partitions_to_optimize( self.clickhouse, self.database, self.table ) assert parts == []
def test(self) -> None: storage = get_writable_storage(StorageKey.EVENTS) cluster = storage.get_cluster() clickhouse = cluster.get_query_connection(ClickhouseClientSettings.OPTIMIZE) table = storage.get_table_writer().get_schema().get_table_name() database = cluster.get_database() # no data, 0 partitions to optimize parts = optimize.get_partitions_to_optimize(clickhouse, database, table) assert parts == [] base = datetime(1999, 12, 26) # a sunday base_monday = base - timedelta(days=base.weekday()) # 1 event, 0 unoptimized parts self.write_processed_messages([self.create_event_row_for_date(base)]) parts = optimize.get_partitions_to_optimize(clickhouse, database, table) assert parts == [] # 2 events in the same part, 1 unoptimized part self.write_processed_messages([self.create_event_row_for_date(base)]) parts = optimize.get_partitions_to_optimize(clickhouse, database, table) assert parts == [(base_monday, 90)] # 3 events in the same part, 1 unoptimized part self.write_processed_messages([self.create_event_row_for_date(base)]) parts = optimize.get_partitions_to_optimize(clickhouse, database, table) assert parts == [(base_monday, 90)] # 3 events in one part, 2 in another, 2 unoptimized parts a_month_earlier = base_monday - timedelta(days=31) a_month_earlier_monday = a_month_earlier - timedelta( days=a_month_earlier.weekday() ) self.write_processed_messages( [self.create_event_row_for_date(a_month_earlier_monday)] ) self.write_processed_messages( [self.create_event_row_for_date(a_month_earlier_monday)] ) parts = optimize.get_partitions_to_optimize(clickhouse, database, table) assert parts == [(base_monday, 90), (a_month_earlier_monday, 90)] # respects before (base is properly excluded) assert list( optimize.get_partitions_to_optimize( clickhouse, database, table, before=base ) ) == [(a_month_earlier_monday, 90)] optimize.optimize_partitions(clickhouse, database, table, parts) # all parts should be optimized parts = optimize.get_partitions_to_optimize(clickhouse, database, table) assert parts == []
def test_optimize( self, storage_key: StorageKey, create_event_row_for_date: Callable[[datetime], InsertBatch], ) -> None: storage = get_writable_storage(storage_key) cluster = storage.get_cluster() clickhouse = cluster.get_query_connection(ClickhouseClientSettings.OPTIMIZE) table = storage.get_table_writer().get_schema().get_local_table_name() database = cluster.get_database() # no data, 0 partitions to optimize parts = optimize.get_partitions_to_optimize( clickhouse, storage, database, table ) assert parts == [] base = datetime(1999, 12, 26) # a sunday base_monday = base - timedelta(days=base.weekday()) # 1 event, 0 unoptimized parts write_processed_messages(storage, [create_event_row_for_date(base)]) parts = optimize.get_partitions_to_optimize( clickhouse, storage, database, table ) assert parts == [] # 2 events in the same part, 1 unoptimized part write_processed_messages(storage, [create_event_row_for_date(base)]) parts = optimize.get_partitions_to_optimize( clickhouse, storage, database, table ) assert [(p.date, p.retention_days) for p in parts] == [(base_monday, 90)] # 3 events in the same part, 1 unoptimized part write_processed_messages(storage, [create_event_row_for_date(base)]) parts = optimize.get_partitions_to_optimize( clickhouse, storage, database, table ) assert [(p.date, p.retention_days) for p in parts] == [(base_monday, 90)] # 3 events in one part, 2 in another, 2 unoptimized parts a_month_earlier = base_monday - timedelta(days=31) a_month_earlier_monday = a_month_earlier - timedelta( days=a_month_earlier.weekday() ) write_processed_messages( storage, [create_event_row_for_date(a_month_earlier_monday)] ) write_processed_messages( storage, [create_event_row_for_date(a_month_earlier_monday)] ) parts = optimize.get_partitions_to_optimize( clickhouse, storage, database, table ) assert [(p.date, p.retention_days) for p in parts] == [ (base_monday, 90), (a_month_earlier_monday, 90), ] # respects before (base is properly excluded) assert [ (p.date, p.retention_days) for p in list( optimize.get_partitions_to_optimize( clickhouse, storage, database, table, before=base ) ) ] == [(a_month_earlier_monday, 90)] optimize.optimize_partitions(clickhouse, database, table, parts) # all parts should be optimized parts = optimize.get_partitions_to_optimize( clickhouse, storage, database, table ) assert parts == []