Exemple #1
0
def test_local_executor(
    nodes: Mapping[int, Sequence[Tuple[ClickhouseNode, bool]]],
    backup_connection: ClickhousePool,
    expected_queries: Mapping[str, Sequence[str]],
) -> None:
    queries: MutableMapping[str, List[str]] = defaultdict(list)

    def run_query(
        connection: ClickhousePool,
        query: str,
        records_count: int,
        metrics: MetricsBackend,
    ) -> None:
        connection.execute_robust(query)
        queries[connection.host].append(query)

    all_nodes: List[Tuple[ClickhouseNode, bool]] = []
    for shard_nodes in nodes.values():
        all_nodes.extend(shard_nodes)

    cluster = FakeClickhouseCluster(
        host="query_node",
        port=9000,
        user="******",
        password="",
        database="default",
        http_port=8123,
        storage_sets={"events"},
        single_node=False,
        cluster_name="my_cluster",
        distributed_cluster_name="my_distributed_cluster",
        nodes=all_nodes,
    )
    insert_executor = ShardedExecutor(
        cluster=cluster,
        runner=run_query,
        thread_pool=ThreadPoolExecutor(),
        main_connection_pool=RoundRobinConnectionPool(cluster),
        local_table_name="errors_local",
        backup_executor=QueryNodeExecutor(
            runner=run_query,
            connection=backup_connection,
            table="errors_dist",
            metrics=DummyMetricsBackend(),
        ),
        metrics=DummyMetricsBackend(),
    )

    insert_executor.execute(
        replacement=LegacyReplacement(
            COUNT_QUERY_TEMPLATE,
            INSERT_QUERY_TEMPLATE,
            FINAL_QUERY_TEMPLATE,
            (NEEDS_FINAL, 1),
        ),
        records_count=1,
    )

    assert queries == expected_queries
Exemple #2
0
    def test(self):
        executor = SubscriptionExecutor(self.dataset, ThreadPoolExecutor(),
                                        DummyMetricsBackend(strict=True))

        subscription = Subscription(
            SubscriptionIdentifier(PartitionId(0), uuid1()),
            SubscriptionData(
                project_id=self.project_id,
                conditions=[["platform", "IN", ["a"]]],
                aggregations=[["count()", "", "count"]],
                time_window=timedelta(minutes=500),
                resolution=timedelta(minutes=1),
            ),
        )

        now = datetime.utcnow()
        tick = Tick(
            offsets=Interval(1, 2),
            timestamps=Interval(now - timedelta(minutes=1), now),
        )

        result = executor.execute(ScheduledTask(now, subscription),
                                  tick).result()
        assert result["data"][0]["count"] == 10

        result = executor.execute(
            ScheduledTask(
                now + timedelta(minutes=self.minutes) +
                subscription.data.time_window,
                subscription,
            ),
            tick,
        ).result()

        assert result["data"][0]["count"] == 0
Exemple #3
0
    def eventstream(*, dataset: Dataset):
        ensure_table_exists(dataset)
        record = json.loads(http_request.data)

        version = record[0]
        if version != 2:
            raise RuntimeError("Unsupported protocol version: %s" % record)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, http_request.data),
            datetime.now(),
        )

        type_ = record[1]
        metrics = DummyMetricsBackend()
        if type_ == "insert":
            from snuba.consumer import ConsumerWorker

            worker = ConsumerWorker(dataset, metrics=metrics)
        else:
            from snuba.replacer import ReplacerWorker

            worker = ReplacerWorker(clickhouse_rw, dataset, metrics=metrics)

        processed = worker.process_message(message)
        if processed is not None:
            batch = [processed]
            worker.flush_batch(batch)

        return ("ok", 200, {"Content-Type": "text/plain"})
Exemple #4
0
    def eventstream(dataset_name):
        dataset = get_dataset(dataset_name)
        ensure_table_exists(dataset)
        record = json.loads(http_request.data)

        version = record[0]
        if version != 2:
            raise RuntimeError("Unsupported protocol version: %s" % record)

        message = KafkaMessage(
            TopicPartition('topic', 0),
            0,
            http_request.data,
        )

        type_ = record[1]
        metrics = DummyMetricsBackend()
        if type_ == 'insert':
            from snuba.consumer import ConsumerWorker
            worker = ConsumerWorker(dataset,
                                    producer=None,
                                    replacements_topic=None,
                                    metrics=metrics)
        else:
            from snuba.replacer import ReplacerWorker
            worker = ReplacerWorker(clickhouse_rw, dataset, metrics=metrics)

        processed = worker.process_message(message)
        if processed is not None:
            batch = [processed]
            worker.flush_batch(batch)

        return ('ok', 200, {'Content-Type': 'text/plain'})
Exemple #5
0
def test_load_balancing(
        override_cluster: Callable[[bool], FakeClickhouseCluster]) -> None:
    """
    Test running two replacements in a row and verify the queries
    are properly load balanced on different nodes.
    """
    set_config("write_node_replacements_projects", "[1]")
    cluster = override_cluster(True)

    replacer = ReplacerWorker(get_writable_storage(StorageKey.ERRORS),
                              DummyMetricsBackend())
    replacement = LegacyReplacement(
        COUNT_QUERY_TEMPLATE,
        INSERT_QUERY_TEMPLATE,
        FINAL_QUERY_TEMPLATE,
        (NEEDS_FINAL, 1),
    )
    replacer.flush_batch([replacement, replacement])

    assert cluster.get_queries() == {
        "query_node": [
            "SELECT count() FROM errors_dist FINAL WHERE event_id = '6f0ccc03-6efb-4f7c-8005-d0c992106b31'",
            "SELECT count() FROM errors_dist FINAL WHERE event_id = '6f0ccc03-6efb-4f7c-8005-d0c992106b31'",
        ],
        "storage-0-0": [LOCAL_QUERY],
        "storage-0-1": [LOCAL_QUERY],
        "storage-1-0": [LOCAL_QUERY],
        "storage-1-1": [LOCAL_QUERY],
        "storage-2-0": [LOCAL_QUERY],
        "storage-2-1": [LOCAL_QUERY],
    }
Exemple #6
0
def test_gzip_load() -> None:
    content = gzip.compress(DATA.encode("utf-8"))

    dataset = get_dataset("groupedmessage")
    metrics = DummyMetricsBackend(strict=True)
    writer = enforce_table_writer(dataset).get_bulk_writer(
        metrics,
        "gzip",
        [
            "project_id",
            "id",
            "status",
            "last_seen",
            "first_seen",
            "active_at",
            "first_release_id",
        ],
        options=None,
        table_name="groupedmessage_local",
    )

    writer.write([content])

    cluster = dataset.get_default_entity().get_all_storages()[0].get_cluster()
    reader = cluster.get_reader()

    ret = reader.execute(FakeQuery([]))
    assert ret["data"][0] == {"count()": 2}
Exemple #7
0
    def run_test(
        self,
        subscriptions: Collection[Subscription],
        start: timedelta,
        end: timedelta,
        expected: Collection[ScheduledTask[Subscription]],
        sort_key=None,
    ) -> None:
        store = RedisSubscriptionDataStore(
            redis_client,
            self.dataset,
            self.partition_id,
        )
        for subscription in subscriptions:
            store.create(subscription.identifier.uuid, subscription.data)

        scheduler = SubscriptionScheduler(
            store,
            self.partition_id,
            timedelta(minutes=1),
            DummyMetricsBackend(strict=True),
        )

        result = list(scheduler.find(self.build_interval(start, end)))
        if sort_key:
            result.sort(key=sort_key)

        assert result == expected
Exemple #8
0
class TestHTTPBatchWriter:
    dataset = get_dataset("events")
    metrics = DummyMetricsBackend(strict=True)

    def test_empty_batch(self) -> None:
        enforce_table_writer(
            self.dataset).get_batch_writer(metrics=self.metrics).write([])

    def test_error_handling(self) -> None:
        table_writer = enforce_table_writer(self.dataset)

        with pytest.raises(ClickhouseWriterError) as error:
            table_writer.get_batch_writer(table_name="invalid",
                                          metrics=self.metrics).write([
                                              rapidjson.dumps({
                                                  "x": "y"
                                              }).encode("utf-8")
                                          ])

        assert error.value.code == 60

        with pytest.raises(ClickhouseWriterError) as error:
            table_writer.get_batch_writer(metrics=self.metrics).write([
                b"{}",
                rapidjson.dumps({
                    "timestamp": "invalid"
                }).encode("utf-8")
            ])

        assert error.value.code == 41
        assert error.value.row == 2
Exemple #9
0
    def test_send_message(
        self,
        value: str,
        expected: Optional[ProcessedMessage],
    ) -> None:
        storage = get_storage("groupedmessages")
        snapshot_id = uuid1()
        transact_data = TransactionData(xmin=100,
                                        xmax=200,
                                        xip_list=[120, 130])

        worker = SnapshotAwareWorker(
            storage=storage,
            producer=FakeConfluentKafkaProducer(),
            snapshot_id=str(snapshot_id),
            transaction_data=transact_data,
            replacements_topic=None,
            metrics=DummyMetricsBackend(strict=True),
        )

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            1,
            KafkaPayload(
                None,
                value.encode("utf-8"),
                [("table", "sentry_groupedmessage".encode())],
            ),
            datetime.now(),
        )

        ret = worker.process_message(message)
        assert ret == expected
Exemple #10
0
    def run_test(
        self,
        subscriptions: Collection[Subscription],
        start: timedelta,
        end: timedelta,
        expected: Collection[ScheduledSubscriptionTask],
        sort_key: Optional[Callable[[ScheduledSubscriptionTask],
                                    Tuple[datetime, uuid.UUID]]] = None,
    ) -> None:
        tick = self.build_tick(start, end)

        store = RedisSubscriptionDataStore(
            redis_client,
            self.entity_key,
            self.partition_id,
        )
        for subscription in subscriptions:
            store.create(subscription.identifier.uuid, subscription.data)

        scheduler = SubscriptionScheduler(
            EntityKey.EVENTS,
            store,
            self.partition_id,
            timedelta(minutes=1),
            DummyMetricsBackend(strict=True),
        )

        result = list(scheduler.find(tick))
        if sort_key:
            result.sort(key=sort_key)

        assert result == expected
Exemple #11
0
def create_metrics(prefix: str, tags: Optional[Tags] = None) -> MetricsBackend:
    """Create a DogStatsd object if DOGSTATSD_HOST and DOGSTATSD_PORT are defined,
    with the specified prefix and tags. Return a DummyMetricsBackend otherwise.
    Prefixes must start with `snuba.<category>`, for example: `snuba.processor`.
    """
    host = settings.DOGSTATSD_HOST
    port = settings.DOGSTATSD_PORT

    if host is None and port is None:
        from snuba.utils.metrics.backends.dummy import DummyMetricsBackend

        return DummyMetricsBackend()
    elif host is None or port is None:
        raise ValueError(
            f"DOGSTATSD_HOST and DOGSTATSD_PORT should both be None or not None. Found DOGSTATSD_HOST: {host}, DOGSTATSD_PORT: {port} instead."
        )

    from datadog import DogStatsd
    from snuba.utils.metrics.backends.datadog import DatadogMetricsBackend

    return DatadogMetricsBackend(
        partial(
            DogStatsd,
            host=host,
            port=port,
            namespace=prefix,
            constant_tags=[f"{key}:{value}" for key, value in tags.items()]
            if tags is not None
            else None,
        ),
    )
Exemple #12
0
def test_failing_query(
        override_cluster: Callable[[bool], FakeClickhouseCluster]) -> None:
    """
    Test the execution of replacement queries on single node
    when the query fails.
    """
    set_config("write_node_replacements_projects", "[1]")
    override_cluster(False)

    replacer = ReplacerWorker(
        get_writable_storage(StorageKey.ERRORS),
        "consumer_group",
        DummyMetricsBackend(),
    )

    with pytest.raises(ServerExplodedException):
        replacer.flush_batch([
            LegacyReplacement(
                COUNT_QUERY_TEMPLATE,
                INSERT_QUERY_TEMPLATE,
                FINAL_QUERY_TEMPLATE,
                (NEEDS_FINAL, 1),
                REPLACEMENT_TYPE,
                REPLACEMENT_MESSAGE_METADATA,
            )
        ])
Exemple #13
0
def generate_transactions() -> None:
    from datetime import datetime

    table_writer = get_writable_storage(
        StorageKey.TRANSACTIONS).get_table_writer()

    rows = []

    for i in range(5):
        raw_transaction = get_raw_transaction()
        # Older versions of this table did not have measurements
        del raw_transaction["data"]["measurements"]

        processed = (
            table_writer.get_stream_loader().get_processor().process_message(
                (2, "insert", raw_transaction),
                KafkaMessageMetadata(0, 0, datetime.utcnow()),
            ))
        rows.extend(processed.rows)

    BatchWriterEncoderWrapper(
        table_writer.get_batch_writer(metrics=DummyMetricsBackend(
            strict=True)),
        JSONRowEncoder(),
    ).write(rows)
Exemple #14
0
def test_write_each_node(
    override_fixture: Callable[[bool], FakeClickhouseCluster],
    write_node_replacements_projects: str,
    expected_queries: Mapping[str, Sequence[str]],
    request: Any,
) -> None:
    """
    Test the execution of replacement queries on both storage nodes and
    query nodes.
    """
    set_config("write_node_replacements_projects",
               write_node_replacements_projects)
    override_func = request.getfixturevalue(override_fixture)
    test_cluster = override_func(True)

    replacer = ReplacerWorker(
        get_writable_storage(StorageKey.ERRORS),
        "consumer_group",
        DummyMetricsBackend(),
    )

    replacer.flush_batch([
        LegacyReplacement(
            COUNT_QUERY_TEMPLATE,
            INSERT_QUERY_TEMPLATE,
            FINAL_QUERY_TEMPLATE,
            (NEEDS_FINAL, 1),
            REPLACEMENT_TYPE,
            REPLACEMENT_MESSAGE_METADATA,
        )
    ])

    queries = test_cluster.get_queries()
    assert queries == expected_queries
Exemple #15
0
    def test_send_message(
        self,
        message: str,
        expected: Optional[ProcessedMessage],
    ) -> None:
        dataset = get_dataset("groupedmessage")
        snapshot_id = uuid1()
        transact_data = TransactionData(xmin=100,
                                        xmax=200,
                                        xip_list=[120, 130])

        worker = SnapshotAwareWorker(
            dataset=dataset,
            producer=FakeConfluentKafkaProducer(),
            snapshot_id=str(snapshot_id),
            transaction_data=transact_data,
            replacements_topic=None,
            metrics=DummyMetricsBackend(strict=True),
        )

        ret = worker.process_message(
            KafkaMessage(
                TopicPartition('topic', 0),
                1,
                message.encode('utf-8'),
            ))
        assert ret == expected
Exemple #16
0
    def test_batch_size(self, broker: Broker[int]) -> None:
        topic = Topic("topic")
        broker.create_topic(topic, partitions=1)
        producer = broker.get_producer()
        for i in [1, 2, 3]:
            producer.produce(topic, i).result()

        consumer = broker.get_consumer("group")

        worker = FakeWorker()
        batching_consumer = StreamProcessor(
            consumer,
            topic,
            BatchProcessingStrategyFactory(
                worker=worker,
                max_batch_size=2,
                max_batch_time=100,
                metrics=DummyMetricsBackend(strict=True),
            ),
        )

        for _ in range(3):
            batching_consumer._run_once()

        batching_consumer._shutdown()

        assert worker.processed == [1, 2, 3]
        assert worker.flushed == [[1, 2]]
        assert consumer.commit_offsets_calls == 1
        assert consumer.close_calls == 1
Exemple #17
0
    def test_batch_time(self, mock_time: Any) -> None:
        consumer = FakeKafkaConsumer()
        worker = FakeWorker()
        batching_consumer = BatchingKafkaConsumer(
            consumer,
            'topic',
            worker=worker,
            max_batch_size=100,
            max_batch_time=2000,
            metrics=DummyMetricsBackend(strict=True),
        )

        mock_time.return_value = time.mktime(datetime(2018, 1, 1, 0, 0, 0).timetuple())
        consumer.items = [KafkaMessage(TopicPartition('topic', 0), i, f'{i}'.encode('utf-8')) for i in [1, 2, 3]]
        for x in range(len(consumer.items)):
            batching_consumer._run_once()

        mock_time.return_value = time.mktime(datetime(2018, 1, 1, 0, 0, 1).timetuple())
        consumer.items = [KafkaMessage(TopicPartition('topic', 0), i, f'{i}'.encode('utf-8')) for i in [4, 5, 6]]
        for x in range(len(consumer.items)):
            batching_consumer._run_once()

        mock_time.return_value = time.mktime(datetime(2018, 1, 1, 0, 0, 5).timetuple())
        consumer.items = [KafkaMessage(TopicPartition('topic', 0), i, f'{i}'.encode('utf-8')) for i in [7, 8, 9]]
        for x in range(len(consumer.items)):
            batching_consumer._run_once()

        batching_consumer._shutdown()

        assert worker.processed == [b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9']
        assert worker.flushed == [[b'1', b'2', b'3', b'4', b'5', b'6']]
        assert consumer.commit_calls == 1
        assert consumer.close_calls == 1
Exemple #18
0
    def setup_method(self, test_method):
        super().setup_method(test_method)
        self.app.post = partial(self.app.post, headers={"referer": "test"})
        self.event = get_raw_event()
        self.project_id = self.event["project_id"]
        self.base_time = datetime.utcnow().replace(
            second=0, microsecond=0, tzinfo=pytz.utc
        ) - timedelta(minutes=90)
        self.next_time = self.base_time + timedelta(minutes=95)

        self.events_storage = get_entity(EntityKey.EVENTS).get_writable_storage()
        write_unprocessed_events(self.events_storage, [self.event])

        groups = [
            {
                "offset": 0,
                "project_id": self.project_id,
                "id": self.event["group_id"],
                "record_deleted": 0,
                "status": 0,
            }
        ]

        groups_storage = get_entity(EntityKey.GROUPEDMESSAGES).get_writable_storage()
        groups_storage.get_table_writer().get_batch_writer(
            metrics=DummyMetricsBackend(strict=True)
        ).write([json.dumps(group).encode("utf-8") for group in groups])

        assignees = [
            {
                "offset": 0,
                "project_id": self.project_id,
                "group_id": self.event["group_id"],
                "record_deleted": 0,
                "user_id": 100,
            }
        ]

        assignees_storage = get_entity(EntityKey.GROUPASSIGNEE).get_writable_storage()
        assignees_storage.get_table_writer().get_batch_writer(
            metrics=DummyMetricsBackend(strict=True)
        ).write([json.dumps(assignee).encode("utf-8") for assignee in assignees])
Exemple #19
0
    def setup_method(self, test_method):
        super(TestReplacer, self).setup_method(test_method)

        from snuba.views import application
        assert application.testing is True

        self.app = application.test_client()
        self.app.post = partial(self.app.post, headers={'referer': 'test'})
        self.replacer = replacer.ReplacerWorker(
            self.clickhouse, self.dataset, DummyMetricsBackend(strict=True))

        self.project_id = 1
Exemple #20
0
def write_processed_messages(storage: WritableStorage,
                             messages: Sequence[ProcessedMessage]) -> None:
    rows: MutableSequence[WriterTableRow] = []
    for message in messages:
        assert isinstance(message, InsertBatch)
        rows.extend(message.rows)

    BatchWriterEncoderWrapper(
        storage.get_table_writer().get_batch_writer(
            metrics=DummyMetricsBackend(strict=True)),
        JSONRowEncoder(),
    ).write(rows)
Exemple #21
0
    def test_batch_time(self, mock_time: Any, broker: Broker[int]) -> None:
        topic = Topic("topic")
        broker.create_topic(topic, partitions=1)
        producer = broker.get_producer()
        consumer = broker.get_consumer("group")

        worker = FakeWorker()
        metrics = DummyMetricsBackend(strict=True)
        batching_consumer = StreamProcessor(
            consumer,
            topic,
            BatchProcessingStrategyFactory(
                worker=worker,
                max_batch_size=100,
                max_batch_time=2000,
                metrics=metrics,
            ),
            metrics=metrics,
        )

        mock_time.return_value = time.mktime(
            datetime(2018, 1, 1, 0, 0, 0).timetuple())

        for i in [1, 2, 3]:
            producer.produce(topic, i).result()

        for _ in range(3):
            batching_consumer._run_once()

        mock_time.return_value = time.mktime(
            datetime(2018, 1, 1, 0, 0, 1).timetuple())

        for i in [4, 5, 6]:
            producer.produce(topic, i).result()

        for _ in range(3):
            batching_consumer._run_once()

        mock_time.return_value = time.mktime(
            datetime(2018, 1, 1, 0, 0, 5).timetuple())

        for i in [7, 8, 9]:
            producer.produce(topic, i).result()

        for _ in range(3):
            batching_consumer._run_once()

        batching_consumer._shutdown()

        assert worker.processed == [1, 2, 3, 4, 5, 6, 7, 8, 9]
        assert worker.flushed == [[1, 2, 3, 4, 5, 6]]
        assert consumer.commit_offsets_calls == 1
        assert consumer.close_calls == 1
Exemple #22
0
def test_subscription_worker_consistent(
        subscription_data: SubscriptionData) -> None:
    state.set_config("event_subscription_non_consistent_sample_rate", 1)
    broker: Broker[SubscriptionTaskResult] = Broker(MemoryMessageStorage(),
                                                    TestingClock())

    result_topic = Topic("subscription-results")

    broker.create_topic(result_topic, partitions=1)

    frequency = timedelta(minutes=1)
    evaluations = 1

    subscription = Subscription(
        SubscriptionIdentifier(PartitionId(0), uuid1()),
        subscription_data,
    )

    store = DummySubscriptionDataStore()
    store.create(subscription.identifier.uuid, subscription.data)

    metrics = TestingMetricsBackend()

    dataset = get_dataset("events")
    worker = SubscriptionWorker(
        dataset,
        ThreadPoolExecutor(),
        {
            0:
            SubscriptionScheduler(store, PartitionId(0), timedelta(),
                                  DummyMetricsBackend(strict=True))
        },
        broker.get_producer(),
        result_topic,
        metrics,
    )

    now = datetime(2000, 1, 1)

    tick = Tick(
        offsets=Interval(0, 1),
        timestamps=Interval(now - (frequency * evaluations), now),
    )

    worker.process_message(Message(Partition(Topic("events"), 0), 0, tick,
                                   now))

    time.sleep(0.1)

    assert (len([
        m for m in metrics.calls
        if isinstance(m, Increment) and m.name == "consistent"
    ]) == 1)
Exemple #23
0
    def setup_method(self):
        from snuba.web.views import application

        assert application.testing is True

        self.app = application.test_client()
        self.app.post = partial(self.app.post, headers={"referer": "test"})
        self.storage = get_storage(StorageKey.EVENTS)

        self.replacer = replacer.ReplacerWorker(
            self.storage, DummyMetricsBackend(strict=True))

        self.project_id = 1
        self.event = get_raw_event()
    def setup_method(self, test_method):
        super(TestReplacer, self).setup_method(test_method, "events_migration")

        from snuba.web.views import application

        assert application.testing is True

        self.app = application.test_client()
        self.app.post = partial(self.app.post, headers={"referer": "test"})

        self.replacer = replacer.ReplacerWorker(
            self.dataset.get_writable_storage(),
            DummyMetricsBackend(strict=True))

        self.project_id = 1
Exemple #25
0
    def setup_method(self):
        from snuba.web.views import application

        assert application.testing is True

        self.app = application.test_client()
        self.app.post = partial(self.app.post, headers={"referer": "test"})

        self.storage = get_writable_storage(StorageKey.ERRORS)
        self.replacer = replacer.ReplacerWorker(
            self.storage, DummyMetricsBackend(strict=True))

        self.project_id = 1
        self.event = get_raw_event()
        settings.ERRORS_ROLLOUT_ALL = True
        settings.ERRORS_ROLLOUT_WRITABLE_STORAGE = True
Exemple #26
0
    def setup_method(self) -> None:
        from snuba.web.views import application

        assert application.testing is True

        self.app = application.test_client()
        self.storage = get_writable_storage(StorageKey.ERRORS)

        self.replacer = replacer.ReplacerWorker(
            self.storage,
            CONSUMER_GROUP,
            DummyMetricsBackend(strict=True),
        )

        self.project_id = 1
        self.event = get_raw_event()
Exemple #27
0
    def test_error_handling(self):
        table_writer = enforce_table_writer(self.dataset)
        metrics = DummyMetricsBackend(strict=True)

        with pytest.raises(ClickhouseWriterError) as error:
            table_writer.get_batch_writer(table_name="invalid", metrics=metrics).write(
                [rapidjson.dumps({"x": "y"}).encode("utf-8")]
            )

        assert error.value.code == 60

        with pytest.raises(ClickhouseWriterError) as error:
            table_writer.get_batch_writer(metrics=metrics).write(
                [b"{}", rapidjson.dumps({"timestamp": "invalid"}).encode("utf-8")]
            )

        assert error.value.code == 41
        assert error.value.row == 2
Exemple #28
0
    def test_batch_size(self) -> None:
        consumer = FakeKafkaConsumer()
        worker = FakeWorker()
        batching_consumer = BatchingKafkaConsumer(
            consumer,
            'topic',
            worker=worker,
            max_batch_size=2,
            max_batch_time=100,
            metrics=DummyMetricsBackend(strict=True),
        )

        consumer.items = [KafkaMessage(TopicPartition('topic', 0), i, f'{i}'.encode('utf-8')) for i in [1, 2, 3]]
        for x in range(len(consumer.items)):
            batching_consumer._run_once()
        batching_consumer._shutdown()

        assert worker.processed == [b'1', b'2', b'3']
        assert worker.flushed == [[b'1', b'2']]
        assert consumer.commit_calls == 1
        assert consumer.close_calls == 1
Exemple #29
0
    def setup_method(self) -> None:
        from snuba.web.views import application

        assert application.testing is True

        self.app = application.test_client()

        self.storage = get_writable_storage(StorageKey.ERRORS)
        self.replacer = replacer.ReplacerWorker(
            self.storage,
            CONSUMER_GROUP,
            DummyMetricsBackend(strict=True),
        )

        # Total query time range is 24h before to 24h after now to account
        # for local machine time zones
        self.from_time = datetime.now().replace(
            minute=0, second=0, microsecond=0) - timedelta(days=1)

        self.to_time = self.from_time + timedelta(days=2)

        self.project_id = 1
        self.event = get_raw_event()
Exemple #30
0
class TestConsumer(BaseEventsTest):

    metrics = DummyMetricsBackend()

    def test_offsets(self):
        event = self.event

        message: Message[KafkaPayload] = Message(
            Partition(Topic("events"), 456),
            123,
            KafkaPayload(
                None, json.dumps((0, "insert", event)).encode("utf-8")
            ),  # event doesn't really matter
            datetime.now(),
        )

        test_worker = ConsumerWorker(
            self.dataset,
            producer=FakeConfluentKafkaProducer(),
            replacements_topic=Topic(
                enforce_table_writer(self.dataset)
                .get_stream_loader()
                .get_replacement_topic_spec()
                .topic_name
            ),
            metrics=self.metrics,
        )
        batch = [test_worker.process_message(message)]
        test_worker.flush_batch(batch)

        assert self.clickhouse.execute(
            "SELECT project_id, event_id, offset, partition FROM %s" % self.table
        ) == [(self.event["project_id"], self.event["event_id"], 123, 456)]

    def test_skip_too_old(self):
        test_worker = ConsumerWorker(
            self.dataset,
            producer=FakeConfluentKafkaProducer(),
            replacements_topic=Topic(
                enforce_table_writer(self.dataset)
                .get_stream_loader()
                .get_replacement_topic_spec()
                .topic_name
            ),
            metrics=self.metrics,
        )

        event = self.event
        old_timestamp = datetime.utcnow() - timedelta(days=300)
        old_timestamp_str = old_timestamp.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
        event["datetime"] = old_timestamp_str
        event["data"]["datetime"] = old_timestamp_str
        event["data"]["received"] = int(calendar.timegm(old_timestamp.timetuple()))

        message: Message[KafkaPayload] = Message(
            Partition(Topic("events"), 1),
            42,
            KafkaPayload(None, json.dumps((0, "insert", event)).encode("utf-8")),
            datetime.now(),
        )

        assert test_worker.process_message(message) is None

    def test_produce_replacement_messages(self):
        producer = FakeConfluentKafkaProducer()
        test_worker = ConsumerWorker(
            self.dataset,
            producer=producer,
            replacements_topic=Topic(
                enforce_table_writer(self.dataset)
                .get_stream_loader()
                .get_replacement_topic_spec()
                .topic_name
            ),
            metrics=self.metrics,
        )

        test_worker.flush_batch(
            [
                ProcessedMessage(
                    action=ProcessorAction.REPLACE, data=[("1", {"project_id": 1})],
                ),
                ProcessedMessage(
                    action=ProcessorAction.REPLACE, data=[("2", {"project_id": 2})],
                ),
            ]
        )

        assert [(m._topic, m._key, m._value) for m in producer.messages] == [
            ("event-replacements", b"1", b'{"project_id": 1}'),
            ("event-replacements", b"2", b'{"project_id": 2}'),
        ]