コード例 #1
0
    def test_encode(self):
        result = SubscriptionResult(
            ScheduledTask(
                datetime.now(),
                Subscription(
                    SubscriptionIdentifier(PartitionId(1), uuid.uuid1()),
                    SubscriptionData(
                        1,
                        [],
                        [["count()", "", "count"]],
                        timedelta(minutes=1),
                        timedelta(minutes=1),
                    ),
                ),
            ),
            {"data": {
                "count": 100
            }},
        )

        codec = SubscriptionResultCodec()
        message = codec.encode(result)
        data = json.loads(message.value.decode("utf-8"))
        assert data["version"] == 1
        payload = data["payload"]

        assert payload["subscription_id"] == str(result.task.task.identifier)
        assert payload["values"] == result.result
        assert payload["timestamp"] == result.task.timestamp.isoformat()
コード例 #2
0
    def test(self):
        executor = SubscriptionExecutor(self.dataset, ThreadPoolExecutor(),
                                        DummyMetricsBackend(strict=True))

        subscription = Subscription(
            SubscriptionIdentifier(PartitionId(0), uuid1()),
            SubscriptionData(
                project_id=self.project_id,
                conditions=[["platform", "IN", ["a"]]],
                aggregations=[["count()", "", "count"]],
                time_window=timedelta(minutes=500),
                resolution=timedelta(minutes=1),
            ),
        )

        now = datetime.utcnow()
        tick = Tick(
            offsets=Interval(1, 2),
            timestamps=Interval(now - timedelta(minutes=1), now),
        )

        result = executor.execute(ScheduledTask(now, subscription),
                                  tick).result()
        assert result["data"][0]["count"] == 10

        result = executor.execute(
            ScheduledTask(
                now + timedelta(minutes=self.minutes) +
                subscription.data.time_window,
                subscription,
            ),
            tick,
        ).result()

        assert result["data"][0]["count"] == 0
コード例 #3
0
ファイル: scheduler.py プロジェクト: pombredanne/snuba
    def __get_subscriptions(self) -> List[Subscription]:
        current_time = datetime.now()

        if (
            self.__last_refresh is None
            or (current_time - self.__last_refresh) > self.__cache_ttl
        ):
            self.__subscriptions = [
                Subscription(SubscriptionIdentifier(self.__partition_id, uuid), data)
                for uuid, data in self.__store.all()
            ]
            self.__last_refresh = current_time
            self.__metrics.gauge(
                "schedule.size",
                len(self.__subscriptions),
                tags={"partition": str(self.__partition_id)},
            )

        self.__metrics.timing(
            "schedule.staleness",
            (current_time - self.__last_refresh).total_seconds() * 1000.0,
            tags={"partition": str(self.__partition_id)},
        )

        return self.__subscriptions
コード例 #4
0
def build_subscription(resolution: timedelta, sequence: int) -> Subscription:
    return Subscription(
        SubscriptionIdentifier(PartitionId(1), UUIDS[sequence]),
        SnQLSubscriptionData(
            project_id=1,
            time_window=timedelta(minutes=5),
            resolution=resolution,
            query="MATCH events SELECT count()",
        ),
    )
コード例 #5
0
ファイル: scheduler.py プロジェクト: jiankunking/snuba
 def __get_subscriptions(self,
                         current_time: datetime) -> List[Subscription]:
     if (self.__last_refresh is None
             or (current_time - self.__last_refresh) > self.__cache_ttl):
         self.__subscriptions = [
             Subscription(SubscriptionIdentifier(self.__partition_id, uuid),
                          data) for uuid, data in self.__store.all()
         ]
         self.__last_refresh = current_time
     return self.__subscriptions
コード例 #6
0
 def build_subscription(self, resolution: timedelta) -> Subscription:
     return Subscription(
         SubscriptionIdentifier(self.partition_id, uuid.uuid4()),
         LegacySubscriptionData(
             project_id=1,
             conditions=[],
             aggregations=[["count()", "", "count"]],
             time_window=timedelta(minutes=1),
             resolution=resolution,
         ),
     )
コード例 #7
0
 def build_subscription(self, resolution: timedelta) -> Subscription:
     return Subscription(
         SubscriptionIdentifier(self.partition_id, uuid.uuid4()),
         SubscriptionData(
             project_id=1,
             query="MATCH (events) SELECT count() AS count",
             time_window_sec=60,
             resolution_sec=int(resolution.total_seconds()),
             entity_subscription=create_entity_subscription(),
         ),
     )
コード例 #8
0
def build_subscription(resolution: timedelta, sequence: int) -> Subscription:
    entity_subscription = EventsSubscription(data_dict={})
    return Subscription(
        SubscriptionIdentifier(PartitionId(1), UUIDS[sequence]),
        SubscriptionData(
            project_id=1,
            time_window_sec=int(timedelta(minutes=5).total_seconds()),
            resolution_sec=int(resolution.total_seconds()),
            query="MATCH events SELECT count()",
            entity_subscription=entity_subscription,
        ),
    )
コード例 #9
0
def test_subscription_worker_consistent(
        subscription_data: SubscriptionData) -> None:
    state.set_config("event_subscription_non_consistent_sample_rate", 1)
    broker: Broker[SubscriptionTaskResult] = Broker(MemoryMessageStorage(),
                                                    TestingClock())

    result_topic = Topic("subscription-results")

    broker.create_topic(result_topic, partitions=1)

    frequency = timedelta(minutes=1)
    evaluations = 1

    subscription = Subscription(
        SubscriptionIdentifier(PartitionId(0), uuid1()),
        subscription_data,
    )

    store = DummySubscriptionDataStore()
    store.create(subscription.identifier.uuid, subscription.data)

    metrics = TestingMetricsBackend()

    dataset = get_dataset("events")
    worker = SubscriptionWorker(
        dataset,
        ThreadPoolExecutor(),
        {
            0:
            SubscriptionScheduler(store, PartitionId(0), timedelta(),
                                  DummyMetricsBackend(strict=True))
        },
        broker.get_producer(),
        result_topic,
        metrics,
    )

    now = datetime(2000, 1, 1)

    tick = Tick(
        offsets=Interval(0, 1),
        timestamps=Interval(now - (frequency * evaluations), now),
    )

    worker.process_message(Message(Partition(Topic("events"), 0), 0, tick,
                                   now))

    time.sleep(0.1)

    assert (len([
        m for m in metrics.calls
        if isinstance(m, Increment) and m.name == "consistent"
    ]) == 1)
コード例 #10
0
def test_subscription_task_result_encoder() -> None:
    codec = SubscriptionTaskResultEncoder()

    timestamp = datetime.now()

    entity_subscription = EventsSubscription(data_dict={})
    subscription_data = SubscriptionData(
        project_id=1,
        query="MATCH (events) SELECT count() AS count",
        time_window_sec=60,
        resolution_sec=60,
        entity_subscription=entity_subscription,
    )

    # XXX: This seems way too coupled to the dataset.
    request = subscription_data.build_request(get_dataset("events"), timestamp,
                                              None, Timer("timer"))
    result: Result = {
        "meta": [{
            "type": "UInt64",
            "name": "count"
        }],
        "data": [{
            "count": 1
        }],
    }

    task_result = SubscriptionTaskResult(
        ScheduledSubscriptionTask(
            timestamp,
            SubscriptionWithMetadata(
                EntityKey.EVENTS,
                Subscription(
                    SubscriptionIdentifier(PartitionId(1), uuid.uuid1()),
                    subscription_data,
                ),
                5,
            ),
        ),
        (request, result),
    )

    message = codec.encode(task_result)
    data = json.loads(message.value.decode("utf-8"))
    assert data["version"] == 3
    payload = data["payload"]

    assert payload["subscription_id"] == str(
        task_result.task.task.subscription.identifier)
    assert payload["request"] == request.original_body
    assert payload["result"] == result
    assert payload["timestamp"] == task_result.task.timestamp.isoformat()
    assert payload["entity"] == EntityKey.EVENTS.value
コード例 #11
0
ファイル: subscription.py プロジェクト: getsentry/snuba
    def create(self, data: SubscriptionData, timer: Timer) -> SubscriptionIdentifier:
        data.validate()
        self._test_request(data, timer)

        identifier = SubscriptionIdentifier(
            self.__partitioner.build_partition_id(data),
            uuid1(),
        )
        RedisSubscriptionDataStore(
            redis_client, self.entity_key, identifier.partition
        ).create(
            identifier.uuid,
            data,
        )
        return identifier
コード例 #12
0
ファイル: subscription.py プロジェクト: ruezetle/snuba
 def create(self, data: SubscriptionData,
            timer: Timer) -> SubscriptionIdentifier:
     # We want to test the query out here to make sure it's valid and can run
     request = data.build_request(self.dataset, datetime.utcnow(), None,
                                  timer)
     parse_and_run_query(self.dataset, request, timer)
     identifier = SubscriptionIdentifier(
         self.__partitioner.build_partition_id(data),
         uuid1(),
     )
     RedisSubscriptionDataStore(redis_client, self.dataset,
                                identifier.partition).create(
                                    identifier.uuid,
                                    data,
                                )
     return identifier
コード例 #13
0
def test_execute_and_produce_result() -> None:
    state.set_config("subscription_mode_events", "new")
    dataset = get_dataset("events")
    entity_names = ["events"]
    max_concurrent_queries = 2
    total_concurrent_queries = 2
    metrics = TestingMetricsBackend()

    scheduled_topic = Topic("scheduled-subscriptions-events")
    result_topic = Topic("events-subscriptions-results")
    clock = TestingClock()
    broker_storage: MemoryMessageStorage[KafkaPayload] = MemoryMessageStorage()
    broker: Broker[KafkaPayload] = Broker(broker_storage, clock)
    broker.create_topic(scheduled_topic, partitions=1)
    broker.create_topic(result_topic, partitions=1)
    producer = broker.get_producer()

    commit = mock.Mock()

    strategy = ExecuteQuery(
        dataset,
        entity_names,
        max_concurrent_queries,
        total_concurrent_queries,
        None,
        metrics,
        ProduceResult(producer, result_topic.name, commit),
        commit,
    )

    subscription_identifier = SubscriptionIdentifier(PartitionId(0),
                                                     uuid.uuid1())

    make_message = generate_message(EntityKey.EVENTS, subscription_identifier)
    message = next(make_message)
    strategy.submit(message)

    # Eventually a message should be produced and offsets committed
    while (broker_storage.consume(Partition(result_topic, 0), 0) is None
           or commit.call_count == 0):
        strategy.poll()

    produced_message = broker_storage.consume(Partition(result_topic, 0), 0)
    assert produced_message is not None
    assert produced_message.payload.key == str(subscription_identifier).encode(
        "utf-8")
    assert commit.call_count == 1
コード例 #14
0
def test_subscription_task_result_encoder() -> None:
    codec = SubscriptionTaskResultEncoder()

    timestamp = datetime.now()

    subscription_data = LegacySubscriptionData(
        project_id=1,
        conditions=[],
        aggregations=[["count()", "", "count"]],
        time_window=timedelta(minutes=1),
        resolution=timedelta(minutes=1),
    )

    # XXX: This seems way too coupled to the dataset.
    request = subscription_data.build_request(get_dataset("events"), timestamp,
                                              None, Timer("timer"))
    result: Result = {
        "meta": [{
            "type": "UInt64",
            "name": "count"
        }],
        "data": [{
            "count": 1
        }],
    }

    task_result = SubscriptionTaskResult(
        ScheduledTask(
            timestamp,
            Subscription(
                SubscriptionIdentifier(PartitionId(1), uuid.uuid1()),
                subscription_data,
            ),
        ),
        (request, result),
    )

    message = codec.encode(task_result)
    data = json.loads(message.value.decode("utf-8"))
    assert data["version"] == 2
    payload = data["payload"]

    assert payload["subscription_id"] == str(task_result.task.task.identifier)
    assert payload["request"] == request.body
    assert payload["result"] == result
    assert payload["timestamp"] == task_result.task.timestamp.isoformat()
コード例 #15
0
ファイル: subscription.py プロジェクト: pombredanne/snuba
    def create(self, data: SubscriptionData, timer: Timer) -> SubscriptionIdentifier:
        # We want to test the query out here to make sure it's valid and can run
        # If there is a delegate subscription, we need to run both the SnQL and Legacy validator
        if isinstance(data, DelegateSubscriptionData):
            self._test_request(data.to_snql(), timer)
            self._test_request(data.to_legacy(), timer)
        else:
            self._test_request(data, timer)

        identifier = SubscriptionIdentifier(
            self.__partitioner.build_partition_id(data), uuid1(),
        )
        RedisSubscriptionDataStore(
            redis_client, self.dataset, identifier.partition
        ).create(
            identifier.uuid, data,
        )
        return identifier
コード例 #16
0
def test_subscription_task_encoder() -> None:
    encoder = SubscriptionScheduledTaskEncoder()

    subscription_data = SubscriptionData(
        project_id=1,
        query="MATCH events SELECT count()",
        time_window_sec=60,
        resolution_sec=60,
        entity_subscription=EventsSubscription(data_dict={}),
    )

    subscription_id = uuid.UUID("91b46cb6224f11ecb2ddacde48001122")

    epoch = datetime(1970, 1, 1)

    tick_upper_offset = 5

    subscription_with_metadata = SubscriptionWithMetadata(
        EntityKey.EVENTS,
        Subscription(SubscriptionIdentifier(PartitionId(1), subscription_id),
                     subscription_data),
        tick_upper_offset,
    )

    task = ScheduledSubscriptionTask(timestamp=epoch,
                                     task=subscription_with_metadata)

    encoded = encoder.encode(task)

    assert encoded.key == b"1/91b46cb6224f11ecb2ddacde48001122"

    assert encoded.value == (
        b"{"
        b'"timestamp":"1970-01-01T00:00:00",'
        b'"entity":"events",'
        b'"task":{'
        b'"data":{"project_id":1,"time_window":60,"resolution":60,"query":"MATCH events SELECT count()"}},'
        b'"tick_upper_offset":5'
        b"}")

    decoded = encoder.decode(encoded)

    assert decoded == task
コード例 #17
0
def test_skip_stale_message() -> None:
    dataset = get_dataset("events")
    entity_names = ["events"]
    max_concurrent_queries = 2
    total_concurrent_queries = 2
    metrics = TestingMetricsBackend()

    scheduled_topic = Topic("scheduled-subscriptions-events")
    result_topic = Topic("events-subscriptions-results")
    clock = TestingClock()
    broker_storage: MemoryMessageStorage[KafkaPayload] = MemoryMessageStorage()
    broker: Broker[KafkaPayload] = Broker(broker_storage, clock)
    broker.create_topic(scheduled_topic, partitions=1)
    broker.create_topic(result_topic, partitions=1)
    producer = broker.get_producer()

    commit = mock.Mock()

    stale_threshold_seconds = 60

    strategy = ExecuteQuery(
        dataset,
        entity_names,
        max_concurrent_queries,
        total_concurrent_queries,
        stale_threshold_seconds,
        metrics,
        ProduceResult(producer, result_topic.name, commit),
        commit,
    )

    subscription_identifier = SubscriptionIdentifier(PartitionId(0),
                                                     uuid.uuid1())

    make_message = generate_message(EntityKey.EVENTS, subscription_identifier)
    message = next(make_message)
    strategy.submit(message)

    # No message will be produced
    strategy.poll()
    assert broker_storage.consume(Partition(result_topic, 0), 0) is None
    assert Increment("skipped_execution", 1,
                     {"entity": "events"}) in metrics.calls
コード例 #18
0
def generate_message(
    entity_key: EntityKey,
    subscription_identifier: Optional[SubscriptionIdentifier] = None,
) -> Iterator[Message[KafkaPayload]]:
    codec = SubscriptionScheduledTaskEncoder()
    epoch = datetime(1970, 1, 1)
    i = 0

    if subscription_identifier is None:
        subscription_identifier = SubscriptionIdentifier(
            PartitionId(1), uuid.uuid1())

    data_dict = {}
    if entity_key in (EntityKey.METRICS_SETS, EntityKey.METRICS_COUNTERS):
        data_dict = {"organization": 1}

    entity_subscription = ENTITY_KEY_TO_SUBSCRIPTION_MAPPER[entity_key](
        data_dict=data_dict)

    while True:
        payload = codec.encode(
            ScheduledSubscriptionTask(
                epoch + timedelta(minutes=i),
                SubscriptionWithMetadata(
                    entity_key,
                    Subscription(
                        subscription_identifier,
                        SubscriptionData(
                            project_id=1,
                            time_window_sec=60,
                            resolution_sec=60,
                            query=f"MATCH ({entity_key.value}) SELECT count()",
                            entity_subscription=entity_subscription,
                        ),
                    ),
                    i + 1,
                ),
            ))

        yield Message(Partition(Topic("test"), 0), i, payload, epoch)
        i += 1
コード例 #19
0
    def decode(self, value: KafkaPayload) -> ScheduledSubscriptionTask:
        payload_value = value.value

        assert value.key is not None
        subscription_identifier = value.key.decode("utf-8")

        scheduled_subscription_dict = rapidjson.loads(payload_value.decode("utf-8"))

        entity_key = EntityKey(scheduled_subscription_dict["entity"])

        return ScheduledSubscriptionTask(
            datetime.fromisoformat(scheduled_subscription_dict["timestamp"]),
            SubscriptionWithMetadata(
                entity_key,
                Subscription(
                    SubscriptionIdentifier.from_string(subscription_identifier),
                    SubscriptionData.from_dict(
                        scheduled_subscription_dict["task"]["data"], entity_key
                    ),
                ),
                scheduled_subscription_dict["tick_upper_offset"],
            ),
        )
コード例 #20
0
def test_subscription_worker(broker: Broker[SubscriptionTaskResult], ) -> None:
    result_topic = Topic("subscription-results")

    broker.create_topic(result_topic, partitions=1)

    frequency = timedelta(minutes=1)
    evaluations = 3

    subscription = Subscription(
        SubscriptionIdentifier(PartitionId(0), uuid1()),
        SubscriptionData(
            project_id=1,
            conditions=[],
            aggregations=[["count()", "", "count"]],
            time_window=timedelta(minutes=60),
            resolution=frequency,
        ),
    )

    store = DummySubscriptionDataStore()
    store.create(subscription.identifier.uuid, subscription.data)

    metrics = DummyMetricsBackend(strict=True)

    dataset = get_dataset("events")
    worker = SubscriptionWorker(
        dataset,
        ThreadPoolExecutor(),
        {
            0: SubscriptionScheduler(store, PartitionId(0), timedelta(),
                                     metrics)
        },
        broker.get_producer(),
        result_topic,
        metrics,
    )

    now = datetime(2000, 1, 1)

    tick = Tick(
        offsets=Interval(0, 1),
        timestamps=Interval(now - (frequency * evaluations), now),
    )

    result_futures = worker.process_message(
        Message(Partition(Topic("events"), 0), 0, tick, now))

    assert result_futures is not None and len(result_futures) == evaluations

    # Publish the results.
    worker.flush_batch([result_futures])

    # Check to make sure the results were published.
    # NOTE: This does not cover the ``SubscriptionTaskResultCodec``!
    consumer = broker.get_consumer("group")
    consumer.subscribe([result_topic])

    for i in range(evaluations):
        timestamp = now - frequency * (evaluations - i)

        message = consumer.poll()
        assert message is not None
        assert message.partition.topic == result_topic

        task, future = result_futures[i]
        future_result = request, result = future.result()
        assert message.payload.task.timestamp == timestamp
        assert message.payload == SubscriptionTaskResult(task, future_result)

        # NOTE: The time series extension is folded back into the request
        # body, ideally this would reference the timeseries options in
        # isolation.
        assert (request.body.items() > {
            "from_date":
            (timestamp - subscription.data.time_window).isoformat(),
            "to_date":
            timestamp.isoformat(),
        }.items())

        assert result == {
            "meta": [{
                "name": "count",
                "type": "UInt64"
            }],
            "data": [{
                "count": 0
            }],
        }
コード例 #21
0
def test_metrics_subscription_task_result_encoder(
        subscription_cls: Type[EntitySubscription], aggregate: str,
        entity_key: EntityKey) -> None:
    codec = SubscriptionTaskResultEncoder()

    timestamp = datetime.now()

    entity_subscription = subscription_cls(data_dict={"organization": 1})
    subscription_data = SubscriptionData(
        project_id=1,
        query=(f"""
            MATCH ({entity_key.value}) SELECT {aggregate}(value) AS value BY project_id, tags[3]
            WHERE org_id = 1 AND project_id IN array(1) AND metric_id = 7 AND tags[3] IN array(1,2)
            """),
        time_window_sec=60,
        resolution_sec=60,
        entity_subscription=entity_subscription,
    )

    # XXX: This seems way too coupled to the dataset.
    request = subscription_data.build_request(get_dataset("metrics"),
                                              timestamp, None, Timer("timer"))
    result: Result = {
        "data": [
            {
                "project_id": 1,
                "tags[3]": 13,
                "value": 8
            },
            {
                "project_id": 1,
                "tags[3]": 4,
                "value": 46
            },
        ],
        "meta": [
            {
                "name": "project_id",
                "type": "UInt64"
            },
            {
                "name": "tags[3]",
                "type": "UInt64"
            },
            {
                "name": "value",
                "type": "Float64"
            },
        ],
    }
    task_result = SubscriptionTaskResult(
        ScheduledSubscriptionTask(
            timestamp,
            SubscriptionWithMetadata(
                entity_key,
                Subscription(
                    SubscriptionIdentifier(PartitionId(1), uuid.uuid1()),
                    subscription_data,
                ),
                5,
            ),
        ),
        (request, result),
    )
    message = codec.encode(task_result)
    data = json.loads(message.value.decode("utf-8"))
    assert data["version"] == 3
    payload = data["payload"]

    assert payload["subscription_id"] == str(
        task_result.task.task.subscription.identifier)
    assert payload["request"] == request.original_body
    assert payload["result"] == result
    assert payload["timestamp"] == task_result.task.timestamp.isoformat()
    assert payload["entity"] == entity_key.value
コード例 #22
0
def test_produce_result() -> None:
    state.set_config("subscription_mode_events", "new")
    epoch = datetime(1970, 1, 1)
    scheduled_topic = Topic("scheduled-subscriptions-events")
    result_topic = Topic("events-subscriptions-results")
    clock = TestingClock()
    broker_storage: MemoryMessageStorage[KafkaPayload] = MemoryMessageStorage()
    broker: Broker[KafkaPayload] = Broker(broker_storage, clock)
    broker.create_topic(scheduled_topic, partitions=1)
    broker.create_topic(result_topic, partitions=1)

    producer = broker.get_producer()
    commit = mock.Mock()

    strategy = ProduceResult(producer, result_topic.name, commit)

    subscription_data = SubscriptionData(
        project_id=1,
        query="MATCH (events) SELECT count() AS count",
        time_window_sec=60,
        resolution_sec=60,
        entity_subscription=EventsSubscription(data_dict={}),
    )

    subscription = Subscription(
        SubscriptionIdentifier(PartitionId(0), uuid.uuid1()),
        subscription_data)

    request = subscription_data.build_request(get_dataset("events"), epoch,
                                              None, Timer("timer"))
    result: Result = {
        "meta": [{
            "type": "UInt64",
            "name": "count"
        }],
        "data": [{
            "count": 1
        }],
    }

    message = Message(
        Partition(scheduled_topic, 0),
        1,
        SubscriptionTaskResult(
            ScheduledSubscriptionTask(
                epoch,
                SubscriptionWithMetadata(EntityKey.EVENTS, subscription, 1),
            ),
            (request, result),
        ),
        epoch,
    )

    strategy.submit(message)

    produced_message = broker_storage.consume(Partition(result_topic, 0), 0)
    assert produced_message is not None
    assert produced_message.payload.key == str(
        subscription.identifier).encode("utf-8")
    assert broker_storage.consume(Partition(result_topic, 0), 1) is None
    assert commit.call_count == 0
    strategy.poll()
    assert commit.call_count == 1

    # Commit is throttled so if we immediately submit another message, the commit count will not change
    strategy.submit(message)
    strategy.poll()
    assert commit.call_count == 1

    # Commit count immediately increases once we call join()
    strategy.join()
    assert commit.call_count == 2
コード例 #23
0
ファイル: test_scheduler.py プロジェクト: ruezetle/snuba
 def build_subscription(self, resolution: timedelta) -> Subscription:
     return Subscription(
         SubscriptionIdentifier(self.partition_id, uuid.uuid4()),
         SubscriptionData(1, [], [], timedelta(minutes=1), resolution),
     )
コード例 #24
0
def test_executor_consumer() -> None:
    """
    End to end integration test
    """
    state.set_config("subscription_mode_events", "new")
    admin_client = AdminClient(get_default_kafka_configuration())
    create_topics(admin_client, [SnubaTopic.SUBSCRIPTION_SCHEDULED_EVENTS])
    create_topics(admin_client, [SnubaTopic.SUBSCRIPTION_RESULTS_EVENTS])

    dataset_name = "events"
    entity_name = "events"
    entity_key = EntityKey(entity_name)
    entity = get_entity(entity_key)
    storage = entity.get_writable_storage()
    assert storage is not None
    stream_loader = storage.get_table_writer().get_stream_loader()

    scheduled_result_topic_spec = stream_loader.get_subscription_result_topic_spec(
    )
    assert scheduled_result_topic_spec is not None
    result_producer = KafkaProducer(
        build_kafka_producer_configuration(scheduled_result_topic_spec.topic))

    result_consumer = KafkaConsumer(
        build_kafka_consumer_configuration(
            scheduled_result_topic_spec.topic,
            str(uuid.uuid1().hex),
            auto_offset_reset="latest",
            strict_offset_reset=False,
        ))
    assigned = False

    def on_partitions_assigned(partitions: Mapping[Partition, int]) -> None:
        nonlocal assigned
        assigned = True

    result_consumer.subscribe(
        [Topic(scheduled_result_topic_spec.topic_name)],
        on_assign=on_partitions_assigned,
    )

    attempts = 10
    while attempts > 0 and not assigned:
        result_consumer.poll(1.0)
        attempts -= 1

    # We need to wait for the consumer to receive partitions otherwise,
    # when we try to consume messages, we will not find anything.
    # Subscription is an async process.
    assert assigned == True, "Did not receive assignment within 10 attempts"

    consumer_group = str(uuid.uuid1().hex)
    auto_offset_reset = "latest"
    strict_offset_reset = False
    executor = build_executor_consumer(
        dataset_name,
        [entity_name],
        consumer_group,
        result_producer,
        2,
        2,
        auto_offset_reset,
        strict_offset_reset,
        TestingMetricsBackend(),
        None,
    )
    for i in range(1, 5):
        # Give time to the executor to subscribe
        time.sleep(1)
        executor._run_once()

    # Produce a scheduled task to the scheduled subscriptions topic
    subscription_data = SubscriptionData(
        project_id=1,
        query="MATCH (events) SELECT count()",
        time_window_sec=60,
        resolution_sec=60,
        entity_subscription=EventsSubscription(data_dict={}),
    )

    task = ScheduledSubscriptionTask(
        timestamp=datetime(1970, 1, 1),
        task=SubscriptionWithMetadata(
            entity_key,
            Subscription(
                SubscriptionIdentifier(
                    PartitionId(1),
                    uuid.UUID("91b46cb6224f11ecb2ddacde48001122")),
                subscription_data,
            ),
            1,
        ),
    )

    encoder = SubscriptionScheduledTaskEncoder()
    encoded_task = encoder.encode(task)

    scheduled_topic_spec = stream_loader.get_subscription_scheduled_topic_spec(
    )
    assert scheduled_topic_spec is not None
    tasks_producer = KafkaProducer(
        build_kafka_producer_configuration(scheduled_topic_spec.topic))

    scheduled_topic = Topic(scheduled_topic_spec.topic_name)
    tasks_producer.produce(scheduled_topic, payload=encoded_task).result()
    tasks_producer.close()

    executor._run_once()
    executor.signal_shutdown()
    # Call run here so that the executor shuts down itself cleanly.
    executor.run()
    result = result_consumer.poll(5)
    assert result is not None, "Did not receive a result message"
    data = json.loads(result.payload.value)
    assert (data["payload"]["subscription_id"] ==
            "1/91b46cb6224f11ecb2ddacde48001122"), "Invalid subscription id"

    result_producer.close()
コード例 #25
0
def test_subscription_worker(subscription_data: SubscriptionData) -> None:
    broker: Broker[SubscriptionTaskResult] = Broker(MemoryMessageStorage(),
                                                    TestingClock())

    result_topic = Topic("subscription-results")

    broker.create_topic(result_topic, partitions=1)

    frequency = timedelta(minutes=1)
    evaluations = 3

    subscription = Subscription(
        SubscriptionIdentifier(PartitionId(0), uuid1()),
        subscription_data,
    )

    store = DummySubscriptionDataStore()
    store.create(subscription.identifier.uuid, subscription.data)

    metrics = DummyMetricsBackend(strict=True)

    dataset = get_dataset("events")
    worker = SubscriptionWorker(
        dataset,
        ThreadPoolExecutor(),
        {
            0: SubscriptionScheduler(store, PartitionId(0), timedelta(),
                                     metrics)
        },
        broker.get_producer(),
        result_topic,
        metrics,
    )

    now = datetime(2000, 1, 1)

    tick = Tick(
        offsets=Interval(0, 1),
        timestamps=Interval(now - (frequency * evaluations), now),
    )

    result_futures = worker.process_message(
        Message(Partition(Topic("events"), 0), 0, tick, now))

    assert result_futures is not None and len(result_futures) == evaluations

    # Publish the results.
    worker.flush_batch([result_futures])

    # Check to make sure the results were published.
    # NOTE: This does not cover the ``SubscriptionTaskResultCodec``!
    consumer = broker.get_consumer("group")
    consumer.subscribe([result_topic])

    for i in range(evaluations):
        timestamp = now - frequency * (evaluations - i)

        message = consumer.poll()
        assert message is not None
        assert message.partition.topic == result_topic

        task, future = result_futures[i]
        future_result = request, result = future.result()
        assert message.payload.task.timestamp == timestamp
        assert message.payload == SubscriptionTaskResult(task, future_result)

        # NOTE: The time series extension is folded back into the request
        # body, ideally this would reference the timeseries options in
        # isolation.
        from_pattern = FunctionCall(
            String(ConditionFunctions.GTE),
            (
                Column(None, String("timestamp")),
                Literal(Datetime(timestamp - subscription.data.time_window)),
            ),
        )
        to_pattern = FunctionCall(
            String(ConditionFunctions.LT),
            (Column(None, String("timestamp")), Literal(Datetime(timestamp))),
        )

        condition = request.query.get_condition()
        assert condition is not None

        conditions = get_first_level_and_conditions(condition)

        assert any([from_pattern.match(e) for e in conditions])
        assert any([to_pattern.match(e) for e in conditions])

        assert result == {
            "meta": [{
                "name": "count",
                "type": "UInt64"
            }],
            "data": [{
                "count": 0
            }],
        }