Ejemplo n.º 1
0
    def test_multiple_subscriptions(self) -> None:
        subscription = self.build_subscription(timedelta(minutes=1))
        other_subscription = self.build_subscription(timedelta(minutes=2))
        start = timedelta(minutes=-10)
        end = timedelta(minutes=0)
        expected = [
            ScheduledSubscriptionTask(
                self.now + timedelta(minutes=-10 + i),
                SubscriptionWithMetadata(
                    EntityKey.EVENTS,
                    subscription,
                    self.build_tick(start, end).offsets.upper,
                ),
            ) for i in range(10)
        ] + [
            ScheduledSubscriptionTask(
                self.now + timedelta(minutes=-10 + i),
                SubscriptionWithMetadata(
                    EntityKey.EVENTS,
                    other_subscription,
                    self.build_tick(start, end).offsets.upper,
                ),
            ) for i in range(0, 10, 2)
        ]

        expected.sort(key=self.sort_key)
        self.run_test(
            [subscription, other_subscription],
            start=start,
            end=end,
            expected=expected,
            sort_key=self.sort_key,
        )
Ejemplo n.º 2
0
    def get_task(self, subscription_with_metadata: SubscriptionWithMetadata,
                 timestamp: int) -> Optional[ScheduledSubscriptionTask]:
        subscription = subscription_with_metadata.subscription

        resolution = subscription.data.resolution_sec
        if timestamp % resolution == 0:
            self.__count += 1
            return ScheduledSubscriptionTask(datetime.fromtimestamp(timestamp),
                                             subscription_with_metadata)
        else:
            return None
Ejemplo n.º 3
0
def test_subscription_task_result_encoder() -> None:
    codec = SubscriptionTaskResultEncoder()

    timestamp = datetime.now()

    entity_subscription = EventsSubscription(data_dict={})
    subscription_data = SubscriptionData(
        project_id=1,
        query="MATCH (events) SELECT count() AS count",
        time_window_sec=60,
        resolution_sec=60,
        entity_subscription=entity_subscription,
    )

    # XXX: This seems way too coupled to the dataset.
    request = subscription_data.build_request(get_dataset("events"), timestamp,
                                              None, Timer("timer"))
    result: Result = {
        "meta": [{
            "type": "UInt64",
            "name": "count"
        }],
        "data": [{
            "count": 1
        }],
    }

    task_result = SubscriptionTaskResult(
        ScheduledSubscriptionTask(
            timestamp,
            SubscriptionWithMetadata(
                EntityKey.EVENTS,
                Subscription(
                    SubscriptionIdentifier(PartitionId(1), uuid.uuid1()),
                    subscription_data,
                ),
                5,
            ),
        ),
        (request, result),
    )

    message = codec.encode(task_result)
    data = json.loads(message.value.decode("utf-8"))
    assert data["version"] == 3
    payload = data["payload"]

    assert payload["subscription_id"] == str(
        task_result.task.task.subscription.identifier)
    assert payload["request"] == request.original_body
    assert payload["result"] == result
    assert payload["timestamp"] == task_result.task.timestamp.isoformat()
    assert payload["entity"] == EntityKey.EVENTS.value
Ejemplo n.º 4
0
    def get_task(self, subscription_with_metadata: SubscriptionWithMetadata,
                 timestamp: int) -> Optional[ScheduledSubscriptionTask]:
        subscription = subscription_with_metadata.subscription

        resolution = subscription.data.resolution_sec

        if resolution > settings.MAX_RESOLUTION_FOR_JITTER:
            if timestamp % resolution == 0:
                self.__count += 1
                self.__count_max_resolution += 1
                return ScheduledSubscriptionTask(
                    datetime.fromtimestamp(timestamp),
                    subscription_with_metadata)
            else:
                return None

        jitter = subscription.identifier.uuid.int % resolution
        if timestamp % resolution == jitter:
            self.__count += 1
            return ScheduledSubscriptionTask(
                datetime.fromtimestamp(timestamp - jitter),
                subscription_with_metadata)
        else:
            return None
Ejemplo n.º 5
0
def test_subscription_task_encoder() -> None:
    encoder = SubscriptionScheduledTaskEncoder()

    subscription_data = SubscriptionData(
        project_id=1,
        query="MATCH events SELECT count()",
        time_window_sec=60,
        resolution_sec=60,
        entity_subscription=EventsSubscription(data_dict={}),
    )

    subscription_id = uuid.UUID("91b46cb6224f11ecb2ddacde48001122")

    epoch = datetime(1970, 1, 1)

    tick_upper_offset = 5

    subscription_with_metadata = SubscriptionWithMetadata(
        EntityKey.EVENTS,
        Subscription(SubscriptionIdentifier(PartitionId(1), subscription_id),
                     subscription_data),
        tick_upper_offset,
    )

    task = ScheduledSubscriptionTask(timestamp=epoch,
                                     task=subscription_with_metadata)

    encoded = encoder.encode(task)

    assert encoded.key == b"1/91b46cb6224f11ecb2ddacde48001122"

    assert encoded.value == (
        b"{"
        b'"timestamp":"1970-01-01T00:00:00",'
        b'"entity":"events",'
        b'"task":{'
        b'"data":{"project_id":1,"time_window":60,"resolution":60,"query":"MATCH events SELECT count()"}},'
        b'"tick_upper_offset":5'
        b"}")

    decoded = encoder.decode(encoded)

    assert decoded == task
Ejemplo n.º 6
0
 def test_subscription_resolution_larger_than_interval(self) -> None:
     subscription = self.build_subscription(timedelta(minutes=3))
     start = timedelta(minutes=-1)
     end = timedelta(minutes=1)
     self.run_test(
         [subscription],
         start=start,
         end=end,
         expected=[
             ScheduledSubscriptionTask(
                 self.now,
                 SubscriptionWithMetadata(
                     EntityKey.EVENTS,
                     subscription,
                     self.build_tick(start, end).offsets.upper,
                 ),
             )
         ],
     )
Ejemplo n.º 7
0
 def test_simple(self) -> None:
     state.set_config("subscription_primary_task_builder", "immediate")
     subscription = self.build_subscription(timedelta(minutes=1))
     start = timedelta(minutes=-10)
     end = timedelta(minutes=0)
     self.run_test(
         [subscription],
         start=start,
         end=end,
         expected=[
             ScheduledSubscriptionTask(
                 self.now + timedelta(minutes=-10 + i),
                 SubscriptionWithMetadata(
                     EntityKey.EVENTS,
                     subscription,
                     self.build_tick(start, end).offsets.upper,
                 ),
             ) for i in range(10)
         ],
     )
Ejemplo n.º 8
0
 def test_subscription_resolution_larger_than_tiny_interval(self) -> None:
     state.set_config("subscription_primary_task_builder", "immediate")
     subscription = self.build_subscription(timedelta(minutes=1))
     start = timedelta(seconds=-1)
     end = timedelta(seconds=1)
     self.run_test(
         [subscription],
         start=start,
         end=end,
         expected=[
             ScheduledSubscriptionTask(
                 self.now,
                 SubscriptionWithMetadata(
                     EntityKey.EVENTS,
                     subscription,
                     self.build_tick(start, end).offsets.upper,
                 ),
             )
         ],
     )
Ejemplo n.º 9
0
    def test_simple_jittered(self) -> None:
        subscription = self.build_subscription(timedelta(minutes=1))
        start = timedelta(minutes=-10)
        end = timedelta(minutes=0)

        self.run_test(
            [subscription],
            start=start,
            end=end,
            expected=[
                ScheduledSubscriptionTask(
                    self.now + timedelta(minutes=-10 + i),
                    SubscriptionWithMetadata(
                        EntityKey.EVENTS,
                        subscription,
                        self.build_tick(start, end).offsets.upper,
                    ),
                ) for i in range(10)
            ],
        )
Ejemplo n.º 10
0
def generate_message(
    entity_key: EntityKey,
    subscription_identifier: Optional[SubscriptionIdentifier] = None,
) -> Iterator[Message[KafkaPayload]]:
    codec = SubscriptionScheduledTaskEncoder()
    epoch = datetime(1970, 1, 1)
    i = 0

    if subscription_identifier is None:
        subscription_identifier = SubscriptionIdentifier(
            PartitionId(1), uuid.uuid1())

    data_dict = {}
    if entity_key in (EntityKey.METRICS_SETS, EntityKey.METRICS_COUNTERS):
        data_dict = {"organization": 1}

    entity_subscription = ENTITY_KEY_TO_SUBSCRIPTION_MAPPER[entity_key](
        data_dict=data_dict)

    while True:
        payload = codec.encode(
            ScheduledSubscriptionTask(
                epoch + timedelta(minutes=i),
                SubscriptionWithMetadata(
                    entity_key,
                    Subscription(
                        subscription_identifier,
                        SubscriptionData(
                            project_id=1,
                            time_window_sec=60,
                            resolution_sec=60,
                            query=f"MATCH ({entity_key.value}) SELECT count()",
                            entity_subscription=entity_subscription,
                        ),
                    ),
                    i + 1,
                ),
            ))

        yield Message(Partition(Topic("test"), 0), i, payload, epoch)
        i += 1
Ejemplo n.º 11
0
    def decode(self, value: KafkaPayload) -> ScheduledSubscriptionTask:
        payload_value = value.value

        assert value.key is not None
        subscription_identifier = value.key.decode("utf-8")

        scheduled_subscription_dict = rapidjson.loads(payload_value.decode("utf-8"))

        entity_key = EntityKey(scheduled_subscription_dict["entity"])

        return ScheduledSubscriptionTask(
            datetime.fromisoformat(scheduled_subscription_dict["timestamp"]),
            SubscriptionWithMetadata(
                entity_key,
                Subscription(
                    SubscriptionIdentifier.from_string(subscription_identifier),
                    SubscriptionData.from_dict(
                        scheduled_subscription_dict["task"]["data"], entity_key
                    ),
                ),
                scheduled_subscription_dict["tick_upper_offset"],
            ),
        )
Ejemplo n.º 12
0
def test_metrics_subscription_task_result_encoder(
        subscription_cls: Type[EntitySubscription], aggregate: str,
        entity_key: EntityKey) -> None:
    codec = SubscriptionTaskResultEncoder()

    timestamp = datetime.now()

    entity_subscription = subscription_cls(data_dict={"organization": 1})
    subscription_data = SubscriptionData(
        project_id=1,
        query=(f"""
            MATCH ({entity_key.value}) SELECT {aggregate}(value) AS value BY project_id, tags[3]
            WHERE org_id = 1 AND project_id IN array(1) AND metric_id = 7 AND tags[3] IN array(1,2)
            """),
        time_window_sec=60,
        resolution_sec=60,
        entity_subscription=entity_subscription,
    )

    # XXX: This seems way too coupled to the dataset.
    request = subscription_data.build_request(get_dataset("metrics"),
                                              timestamp, None, Timer("timer"))
    result: Result = {
        "data": [
            {
                "project_id": 1,
                "tags[3]": 13,
                "value": 8
            },
            {
                "project_id": 1,
                "tags[3]": 4,
                "value": 46
            },
        ],
        "meta": [
            {
                "name": "project_id",
                "type": "UInt64"
            },
            {
                "name": "tags[3]",
                "type": "UInt64"
            },
            {
                "name": "value",
                "type": "Float64"
            },
        ],
    }
    task_result = SubscriptionTaskResult(
        ScheduledSubscriptionTask(
            timestamp,
            SubscriptionWithMetadata(
                entity_key,
                Subscription(
                    SubscriptionIdentifier(PartitionId(1), uuid.uuid1()),
                    subscription_data,
                ),
                5,
            ),
        ),
        (request, result),
    )
    message = codec.encode(task_result)
    data = json.loads(message.value.decode("utf-8"))
    assert data["version"] == 3
    payload = data["payload"]

    assert payload["subscription_id"] == str(
        task_result.task.task.subscription.identifier)
    assert payload["request"] == request.original_body
    assert payload["result"] == result
    assert payload["timestamp"] == task_result.task.timestamp.isoformat()
    assert payload["entity"] == entity_key.value
Ejemplo n.º 13
0
def test_executor_consumer() -> None:
    """
    End to end integration test
    """
    state.set_config("subscription_mode_events", "new")
    admin_client = AdminClient(get_default_kafka_configuration())
    create_topics(admin_client, [SnubaTopic.SUBSCRIPTION_SCHEDULED_EVENTS])
    create_topics(admin_client, [SnubaTopic.SUBSCRIPTION_RESULTS_EVENTS])

    dataset_name = "events"
    entity_name = "events"
    entity_key = EntityKey(entity_name)
    entity = get_entity(entity_key)
    storage = entity.get_writable_storage()
    assert storage is not None
    stream_loader = storage.get_table_writer().get_stream_loader()

    scheduled_result_topic_spec = stream_loader.get_subscription_result_topic_spec(
    )
    assert scheduled_result_topic_spec is not None
    result_producer = KafkaProducer(
        build_kafka_producer_configuration(scheduled_result_topic_spec.topic))

    result_consumer = KafkaConsumer(
        build_kafka_consumer_configuration(
            scheduled_result_topic_spec.topic,
            str(uuid.uuid1().hex),
            auto_offset_reset="latest",
            strict_offset_reset=False,
        ))
    assigned = False

    def on_partitions_assigned(partitions: Mapping[Partition, int]) -> None:
        nonlocal assigned
        assigned = True

    result_consumer.subscribe(
        [Topic(scheduled_result_topic_spec.topic_name)],
        on_assign=on_partitions_assigned,
    )

    attempts = 10
    while attempts > 0 and not assigned:
        result_consumer.poll(1.0)
        attempts -= 1

    # We need to wait for the consumer to receive partitions otherwise,
    # when we try to consume messages, we will not find anything.
    # Subscription is an async process.
    assert assigned == True, "Did not receive assignment within 10 attempts"

    consumer_group = str(uuid.uuid1().hex)
    auto_offset_reset = "latest"
    strict_offset_reset = False
    executor = build_executor_consumer(
        dataset_name,
        [entity_name],
        consumer_group,
        result_producer,
        2,
        2,
        auto_offset_reset,
        strict_offset_reset,
        TestingMetricsBackend(),
        None,
    )
    for i in range(1, 5):
        # Give time to the executor to subscribe
        time.sleep(1)
        executor._run_once()

    # Produce a scheduled task to the scheduled subscriptions topic
    subscription_data = SubscriptionData(
        project_id=1,
        query="MATCH (events) SELECT count()",
        time_window_sec=60,
        resolution_sec=60,
        entity_subscription=EventsSubscription(data_dict={}),
    )

    task = ScheduledSubscriptionTask(
        timestamp=datetime(1970, 1, 1),
        task=SubscriptionWithMetadata(
            entity_key,
            Subscription(
                SubscriptionIdentifier(
                    PartitionId(1),
                    uuid.UUID("91b46cb6224f11ecb2ddacde48001122")),
                subscription_data,
            ),
            1,
        ),
    )

    encoder = SubscriptionScheduledTaskEncoder()
    encoded_task = encoder.encode(task)

    scheduled_topic_spec = stream_loader.get_subscription_scheduled_topic_spec(
    )
    assert scheduled_topic_spec is not None
    tasks_producer = KafkaProducer(
        build_kafka_producer_configuration(scheduled_topic_spec.topic))

    scheduled_topic = Topic(scheduled_topic_spec.topic_name)
    tasks_producer.produce(scheduled_topic, payload=encoded_task).result()
    tasks_producer.close()

    executor._run_once()
    executor.signal_shutdown()
    # Call run here so that the executor shuts down itself cleanly.
    executor.run()
    result = result_consumer.poll(5)
    assert result is not None, "Did not receive a result message"
    data = json.loads(result.payload.value)
    assert (data["payload"]["subscription_id"] ==
            "1/91b46cb6224f11ecb2ddacde48001122"), "Invalid subscription id"

    result_producer.close()
Ejemplo n.º 14
0
def test_produce_result() -> None:
    state.set_config("subscription_mode_events", "new")
    epoch = datetime(1970, 1, 1)
    scheduled_topic = Topic("scheduled-subscriptions-events")
    result_topic = Topic("events-subscriptions-results")
    clock = TestingClock()
    broker_storage: MemoryMessageStorage[KafkaPayload] = MemoryMessageStorage()
    broker: Broker[KafkaPayload] = Broker(broker_storage, clock)
    broker.create_topic(scheduled_topic, partitions=1)
    broker.create_topic(result_topic, partitions=1)

    producer = broker.get_producer()
    commit = mock.Mock()

    strategy = ProduceResult(producer, result_topic.name, commit)

    subscription_data = SubscriptionData(
        project_id=1,
        query="MATCH (events) SELECT count() AS count",
        time_window_sec=60,
        resolution_sec=60,
        entity_subscription=EventsSubscription(data_dict={}),
    )

    subscription = Subscription(
        SubscriptionIdentifier(PartitionId(0), uuid.uuid1()),
        subscription_data)

    request = subscription_data.build_request(get_dataset("events"), epoch,
                                              None, Timer("timer"))
    result: Result = {
        "meta": [{
            "type": "UInt64",
            "name": "count"
        }],
        "data": [{
            "count": 1
        }],
    }

    message = Message(
        Partition(scheduled_topic, 0),
        1,
        SubscriptionTaskResult(
            ScheduledSubscriptionTask(
                epoch,
                SubscriptionWithMetadata(EntityKey.EVENTS, subscription, 1),
            ),
            (request, result),
        ),
        epoch,
    )

    strategy.submit(message)

    produced_message = broker_storage.consume(Partition(result_topic, 0), 0)
    assert produced_message is not None
    assert produced_message.payload.key == str(
        subscription.identifier).encode("utf-8")
    assert broker_storage.consume(Partition(result_topic, 0), 1) is None
    assert commit.call_count == 0
    strategy.poll()
    assert commit.call_count == 1

    # Commit is throttled so if we immediately submit another message, the commit count will not change
    strategy.submit(message)
    strategy.poll()
    assert commit.call_count == 1

    # Commit count immediately increases once we call join()
    strategy.join()
    assert commit.call_count == 2
Ejemplo n.º 15
0
)
from tests.subscriptions.subscriptions_utils import UUIDS, build_subscription

ALIGNED_TIMESTAMP = 1625518080  # Aligned to start of a minute

TEST_CASES = [
    pytest.param(
        ImmediateTaskBuilder(),
        "jittered",
        [(ALIGNED_TIMESTAMP, build_subscription(timedelta(minutes=1), 0))],
        [(
            ALIGNED_TIMESTAMP,
            ScheduledSubscriptionTask(
                datetime.fromtimestamp(ALIGNED_TIMESTAMP),
                SubscriptionWithMetadata(
                    EntityKey.EVENTS,
                    build_subscription(timedelta(minutes=1), 0),
                    1,
                ),
            ),
        )],
        [("tasks.built", 1, {})],
        id="One subscription immediately scheduled",
    ),
    pytest.param(
        ImmediateTaskBuilder(),
        "jittered",
        [(ALIGNED_TIMESTAMP + 1, build_subscription(timedelta(minutes=1), 0))],
        [],
        [("tasks.built", 0, {})],
        id="One subscription not aligned with resolution",
    ),