def test_failure(self) -> None: with pytest.raises(AssertionError): GroupedMessageRow.from_bulk({ "project_id": "2", "id": "1", "status": "0", # Non UTC date with nanoseconds "last_seen": "2019-07-01 18:03:07.984+05", # UTC date without nanosecods "first_seen": "2019-07-01 18:03:07+00", # another UTC date with less precision "active_at": "2019-06-25 22:15:57.6+00", "first_release_id": "0", })
def test_bulk_load(self) -> None: row = GroupedMessageRow.from_bulk( { "project_id": "2", "id": "10", "status": "0", "last_seen": "2019-06-28 17:57:32+00", "first_seen": "2019-06-28 06:40:17+00", "active_at": "2019-06-28 06:40:17+00", "first_release_id": "26", } ) write_processed_messages(self.storage, [InsertBatch([row.to_clickhouse()])]) ret = ( get_cluster(StorageSetKey.EVENTS) .get_query_connection(ClickhouseClientSettings.QUERY) .execute("SELECT * FROM groupedmessage_local;") ) assert ret[0] == ( 0, # offset 0, # deleted 2, # project_id 10, # id 0, # status datetime(2019, 6, 28, 17, 57, 32), datetime(2019, 6, 28, 6, 40, 17), datetime(2019, 6, 28, 6, 40, 17), 26, )
def test_basic_date(self) -> None: message = GroupedMessageRow.from_bulk({ "project_id": "2", "id": "1", "status": "0", # UTC date with nanoseconds "last_seen": "2019-07-01 18:03:07.984123+00", # UTC date without nanosecods "first_seen": "2019-07-01 18:03:07+00", # None date "active_at": "", "first_release_id": "0", }) assert message.to_clickhouse() == { "offset": 0, "project_id": 2, "id": 1, "record_deleted": 0, "status": 0, "last_seen": "2019-07-01 18:03:07", "first_seen": "2019-07-01 18:03:07", "active_at": None, "first_release_id": 0, }
def get_bulk_loader(self, source, dest_table): return SingleTableBulkLoader( source=source, source_table=self.POSTGRES_TABLE, dest_table=dest_table, row_processor=lambda row: GroupedMessageRow.from_bulk(row). to_clickhouse(), )
def test_bulk_load(self): row = GroupedMessageRow.from_bulk({ "project_id": "2", "id": "10", "status": "0", "last_seen": "2019-06-28 17:57:32+00", "first_seen": "2019-06-28 06:40:17+00", "active_at": "2019-06-28 06:40:17+00", "first_release_id": "26", }) self.write_processed_records(row.to_clickhouse()) ret = clickhouse_ro.execute("SELECT * FROM test_groupedmessage_local;") assert ret[0] == ( 0, # offset 0, # deleted 2, # project_id 10, # id 0, # status datetime(2019, 6, 28, 17, 57, 32), datetime(2019, 6, 28, 6, 40, 17), datetime(2019, 6, 28, 6, 40, 17), 26, )
def test_bulk_load(self): row = GroupedMessageRow.from_bulk({ 'project_id': '2', 'id': '10', 'status': '0', 'last_seen': '2019-06-28 17:57:32+00', 'first_seen': '2019-06-28 06:40:17+00', 'active_at': '2019-06-28 06:40:17+00', 'first_release_id': '26', }) self.write_processed_records(row.to_clickhouse()) cp = ClickhousePool() ret = cp.execute("SELECT * FROM test_groupedmessage_local;") assert ret[0] == ( 0, # offset 0, # deleted 2, # project_id 10, # id 0, # status datetime(2019, 6, 28, 17, 57, 32), datetime(2019, 6, 28, 6, 40, 17), datetime(2019, 6, 28, 6, 40, 17), 26, )
local_table_name="groupedmessage_local", dist_table_name="groupedmessage_dist", storage_set_key=StorageSetKey.EVENTS, mandatory_conditions=[ binary_condition( ConditionFunctions.EQ, Column(None, None, "record_deleted"), Literal(None, 0), ), ], prewhere_candidates=["project_id", "id"], ) POSTGRES_TABLE = "sentry_groupedmessage" storage = CdcStorage( storage_key=StorageKey.GROUPEDMESSAGES, storage_set_key=StorageSetKey.EVENTS, schema=schema, query_processors=[], stream_loader=build_kafka_stream_loader_from_settings( StorageKey.GROUPEDMESSAGES, processor=GroupedMessageProcessor(POSTGRES_TABLE), default_topic_name="cdc", pre_filter=CdcTableNameMessageFilter(POSTGRES_TABLE), ), default_control_topic="cdc_control", postgres_table=POSTGRES_TABLE, row_processor=lambda row: GroupedMessageRow.from_bulk(row).to_clickhouse(), )