コード例 #1
0
ファイル: test_groupassignee.py プロジェクト: alexef/snuba
    def test_messages(self) -> None:
        processor = GroupAssigneeProcessor("sentry_groupasignee")

        metadata = KafkaMessageMetadata(offset=42,
                                        partition=0,
                                        timestamp=datetime(1970, 1, 1))

        ret = processor.process_message(self.INSERT_MSG, metadata)
        assert ret == InsertBatch([self.PROCESSED])
        self.write_processed_messages([ret])
        ret = (get_cluster(StorageSetKey.EVENTS).get_query_connection(
            ClickhouseClientSettings.QUERY).execute(
                "SELECT * FROM groupassignee_local;"))
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            1359,  # group_id
            datetime(2019, 9, 19, 0, 17, 55),
            1,  # user_id
            None,  # team_id
        )

        ret = processor.process_message(self.UPDATE_MSG_NO_KEY_CHANGE,
                                        metadata)
        assert ret == InsertBatch([self.PROCESSED])

        # Tests an update with key change which becomes a two inserts:
        # one deletion and the insertion of the new row.
        ret = processor.process_message(self.UPDATE_MSG_WITH_KEY_CHANGE,
                                        metadata)
        assert ret == InsertBatch([self.DELETED, self.PROCESSED_UPDATE])

        ret = processor.process_message(self.DELETE_MSG, metadata)
        assert ret == InsertBatch([self.DELETED])
コード例 #2
0
    def test_messages(self):
        processor = GroupAssigneeProcessor('sentry_groupasignee')

        metadata = KafkaMessageMetadata(
            offset=42,
            partition=0,
        )

        begin_msg = json.loads(self.BEGIN_MSG)
        ret = processor.process_message(begin_msg, metadata)
        assert ret is None

        commit_msg = json.loads(self.COMMIT_MSG)
        ret = processor.process_message(commit_msg, metadata)
        assert ret is None

        insert_msg = json.loads(self.INSERT_MSG)
        ret = processor.process_message(insert_msg, metadata)
        assert ret.data == [self.PROCESSED]
        self.write_processed_records(ret.data)
        cp = ClickhousePool()
        ret = cp.execute("SELECT * FROM test_groupassignee_local;")
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            1359,  # group_id
            datetime(2019, 9, 19, 0, 17, 55),
            1,  # user_id
            None,  # team_id
        )

        update_msg = json.loads(self.UPDATE_MSG_NO_KEY_CHANGE)
        ret = processor.process_message(update_msg, metadata)
        assert ret.data == [self.PROCESSED]

        # Tests an update with key change which becomes a two inserts:
        # one deletion and the insertion of the new row.
        update_msg = json.loads(self.UPDATE_MSG_WITH_KEY_CHANGE)
        ret = processor.process_message(update_msg, metadata)
        assert ret.data == [self.DELETED, self.PROCESSED_UPDATE]

        delete_msg = json.loads(self.DELETE_MSG)
        ret = processor.process_message(delete_msg, metadata)
        assert ret.data == [self.DELETED]
コード例 #3
0
    def __init__(self) -> None:
        columns = ColumnSet([
            # columns to maintain the dataset
            # Kafka topic offset
            ("offset", UInt(64)),
            ("record_deleted", UInt(8)),
            # PG columns
            ("project_id", UInt(64)),
            ("group_id", UInt(64)),
            ("date_added", Nullable(DateTime())),
            ("user_id", Nullable(UInt(64))),
            ("team_id", Nullable(UInt(64))),
        ])

        schema = ReplacingMergeTreeSchema(
            columns=columns,
            local_table_name='groupassignee_local',
            dist_table_name='groupassignee_dist',
            order_by='(project_id, group_id)',
            partition_by=None,
            version_column='offset',
        )

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        super().__init__(
            dataset_schemas=dataset_schemas,
            table_writer=GroupAssigneeTableWriter(
                write_schema=schema,
                stream_loader=KafkaStreamLoader(
                    processor=GroupAssigneeProcessor(self.POSTGRES_TABLE),
                    default_topic="cdc",
                ),
                postgres_table=self.POSTGRES_TABLE,
            ),
            default_control_topic="cdc_control",
            postgres_table=self.POSTGRES_TABLE,
        )
コード例 #4
0
    ("user_id", UInt(64, Modifiers(nullable=True))),
    ("team_id", UInt(64, Modifiers(nullable=True))),
])

schema = WritableTableSchema(
    columns=columns,
    local_table_name="groupassignee_local",
    dist_table_name="groupassignee_dist",
    storage_set_key=StorageSetKey.CDC,
)

POSTGRES_TABLE = "sentry_groupasignee"

storage = CdcStorage(
    storage_key=StorageKey.GROUPASSIGNEES,
    storage_set_key=StorageSetKey.CDC,
    schema=schema,
    query_processors=[
        PrewhereProcessor(["project_id"]),
        ConsistencyEnforcerProcessor(),
    ],
    stream_loader=build_kafka_stream_loader_from_settings(
        processor=GroupAssigneeProcessor(POSTGRES_TABLE),
        default_topic=Topic.CDC,
        pre_filter=CdcTableNameMessageFilter(POSTGRES_TABLE),
    ),
    default_control_topic="cdc_control",
    postgres_table=POSTGRES_TABLE,
    row_processor=lambda row: GroupAssigneeRow.from_bulk(row).to_clickhouse(),
)
コード例 #5
0
    def test_messages(self):
        processor = GroupAssigneeProcessor("sentry_groupasignee")
        message_filter = CdcTableNameMessageFilter(
            postgres_table=POSTGRES_TABLE)

        metadata = KafkaMessageMetadata(offset=42,
                                        partition=0,
                                        timestamp=datetime(1970, 1, 1))

        assert message_filter.should_drop(
            self.__make_msg(0, 42, self.BEGIN_MSG, []))

        assert message_filter.should_drop(
            self.__make_msg(0, 42, self.COMMIT_MSG, []))

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.INSERT_MSG,
                            [("table", POSTGRES_TABLE.encode())]))
        insert_msg = json.loads(self.INSERT_MSG)
        ret = processor.process_message(insert_msg, metadata)
        assert ret == InsertBatch([self.PROCESSED])
        self.write_processed_messages([ret])
        ret = (get_cluster(StorageSetKey.EVENTS).get_query_connection(
            ClickhouseClientSettings.QUERY).execute(
                "SELECT * FROM groupassignee_local;"))
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            1359,  # group_id
            datetime(2019, 9, 19, 0, 17, 55),
            1,  # user_id
            None,  # team_id
        )

        assert not message_filter.should_drop(
            self.__make_msg(
                0,
                42,
                self.UPDATE_MSG_NO_KEY_CHANGE,
                [("table", POSTGRES_TABLE.encode())],
            ))
        update_msg = json.loads(self.UPDATE_MSG_NO_KEY_CHANGE)
        ret = processor.process_message(update_msg, metadata)
        assert ret == InsertBatch([self.PROCESSED])

        # Tests an update with key change which becomes a two inserts:
        # one deletion and the insertion of the new row.
        assert not message_filter.should_drop(
            self.__make_msg(
                0,
                42,
                self.UPDATE_MSG_WITH_KEY_CHANGE,
                [("table", POSTGRES_TABLE.encode())],
            ))
        update_msg = json.loads(self.UPDATE_MSG_WITH_KEY_CHANGE)
        ret = processor.process_message(update_msg, metadata)
        assert ret == InsertBatch([self.DELETED, self.PROCESSED_UPDATE])

        assert not message_filter.should_drop(
            self.__make_msg(
                0,
                42,
                self.DELETE_MSG,
                [("table", POSTGRES_TABLE.encode())],
            ))
        delete_msg = json.loads(self.DELETE_MSG)
        ret = processor.process_message(delete_msg, metadata)
        assert ret == InsertBatch([self.DELETED])
コード例 #6
0
    def test_messages(self):
        processor = GroupAssigneeProcessor("sentry_groupasignee")
        message_filter = CdcTableNameMessageFilter(
            postgres_table=POSTGRES_TABLE)

        metadata = KafkaMessageMetadata(
            offset=42,
            partition=0,
        )

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.BEGIN_MSG, []))
        begin_msg = json.loads(self.BEGIN_MSG)
        ret = processor.process_message(begin_msg, metadata)
        assert ret is None

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.COMMIT_MSG, []))
        commit_msg = json.loads(self.COMMIT_MSG)
        ret = processor.process_message(commit_msg, metadata)
        assert ret is None

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.INSERT_MSG,
                            [("table", POSTGRES_TABLE.encode())]))
        insert_msg = json.loads(self.INSERT_MSG)
        ret = processor.process_message(insert_msg, metadata)
        assert ret.data == [self.PROCESSED]
        self.write_processed_records(ret.data)
        ret = clickhouse_ro.execute("SELECT * FROM test_groupassignee_local;")
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            1359,  # group_id
            datetime(2019, 9, 19, 0, 17, 55),
            1,  # user_id
            None,  # team_id
        )

        assert not message_filter.should_drop(
            self.__make_msg(
                0,
                42,
                self.UPDATE_MSG_NO_KEY_CHANGE,
                [("table", POSTGRES_TABLE.encode())],
            ))
        update_msg = json.loads(self.UPDATE_MSG_NO_KEY_CHANGE)
        ret = processor.process_message(update_msg, metadata)
        assert ret.data == [self.PROCESSED]

        # Tests an update with key change which becomes a two inserts:
        # one deletion and the insertion of the new row.
        assert not message_filter.should_drop(
            self.__make_msg(
                0,
                42,
                self.UPDATE_MSG_WITH_KEY_CHANGE,
                [("table", POSTGRES_TABLE.encode())],
            ))
        update_msg = json.loads(self.UPDATE_MSG_WITH_KEY_CHANGE)
        ret = processor.process_message(update_msg, metadata)
        assert ret.data == [self.DELETED, self.PROCESSED_UPDATE]

        assert not message_filter.should_drop(
            self.__make_msg(0, 42, self.DELETE_MSG, []))
        delete_msg = json.loads(self.DELETE_MSG)
        ret = processor.process_message(delete_msg, metadata)
        assert ret.data == [self.DELETED]