Beispiel #1
0
 def test_offsets_invalid_metadata(self):
     fetcher = OffsetsFetcherAsync(group_id='test', topic='test')
     future = Mock(succeeded=lambda: False,
                   retriable=lambda: True,
                   exceptions=Mock(invalid_metadata=True))
     with patch.object(fetcher,
                       '_send_offset_request',
                       return_value=[future]) as _send_offset_request:
         fetcher.offsets([0], -1)
         assert _send_offset_request.was_called()
Beispiel #2
0
 def __init__(self, messagebus):
     self._location = messagebus.kafka_location
     self._general_group = messagebus.spider_feed_group
     self._topic = messagebus.topic_todo
     self._max_next_requests = messagebus.max_next_requests
     self._hostname_partitioning = messagebus.hostname_partitioning
     self._enable_ssl = messagebus.enable_ssl
     self._cert_path = messagebus.cert_path
     kwargs = {
         'bootstrap_servers': self._location,
         'topic': self._topic,
         'group_id': self._general_group,
     }
     if self._enable_ssl:
         kwargs.update(_prepare_kafka_ssl_kwargs(self._cert_path))
     self._offset_fetcher = OffsetsFetcherAsync(**kwargs)
     self._codec = messagebus.codec
     self._partitions = messagebus.spider_feed_partitions
Beispiel #3
0
class SpiderFeedStream(BaseSpiderFeedStream):
    def __init__(self, messagebus):
        self._location = messagebus.kafka_location
        self._general_group = messagebus.spider_feed_group
        self._topic = messagebus.topic_todo
        self._max_next_requests = messagebus.max_next_requests
        self._hostname_partitioning = messagebus.hostname_partitioning
        self._enable_ssl = messagebus.enable_ssl
        self._cert_path = messagebus.cert_path
        kwargs = {
            'bootstrap_servers': self._location,
            'topic': self._topic,
            'group_id': self._general_group,
        }
        if self._enable_ssl:
            kwargs.update(_prepare_kafka_ssl_kwargs(self._cert_path))
        self._offset_fetcher = OffsetsFetcherAsync(**kwargs)
        self._codec = messagebus.codec
        self._partitions = messagebus.spider_feed_partitions

    def consumer(self, partition_id):
        c = Consumer(self._location, self._enable_ssl, self._cert_path,
                     self._topic, self._general_group, partition_id)
        assert len(c._consumer.partitions_for_topic(self._topic)) == self._partitions, \
            "Number of kafka topic partitions doesn't match value in config for spider feed"
        return c

    def available_partitions(self):
        partitions = []
        lags = self._offset_fetcher.get()
        for partition, lag in six.iteritems(lags):
            if lag < self._max_next_requests:
                partitions.append(partition)
        return partitions

    def producer(self):
        partitioner = Crc32NamePartitioner(self._partitions) if self._hostname_partitioning \
            else FingerprintPartitioner(self._partitions)
        return KeyedProducer(self._location,
                             self._enable_ssl,
                             self._cert_path,
                             self._topic,
                             partitioner,
                             self._codec,
                             batch_size=DEFAULT_BATCH_SIZE,
                             buffer_memory=DEFAULT_BUFFER_MEMORY)
Beispiel #4
0
 def __init__(self, messagebus):
     self._location = messagebus.kafka_location
     self._general_group = messagebus.spider_feed_group
     self._topic = messagebus.topic_todo
     self._max_next_requests = messagebus.max_next_requests
     self._hostname_partitioning = messagebus.hostname_partitioning
     self._enable_ssl = messagebus.enable_ssl
     self._cert_path = messagebus.cert_path
     kwargs = {
         'bootstrap_servers': self._location,
         'topic': self._topic,
         'group_id': self._general_group,
     }
     if self._enable_ssl:
         kwargs.update(_prepare_kafka_ssl_kwargs(self._cert_path))
     self._offset_fetcher = OffsetsFetcherAsync(**kwargs)
     self._codec = messagebus.codec
     self._partitions = messagebus.spider_feed_partitions
Beispiel #5
0
class SpiderFeedStream(BaseSpiderFeedStream):
    def __init__(self, messagebus):
        self._location = messagebus.kafka_location
        self._general_group = messagebus.spider_feed_group
        self._topic = messagebus.topic_todo
        self._max_next_requests = messagebus.max_next_requests
        self._hostname_partitioning = messagebus.hostname_partitioning
        self._enable_ssl = messagebus.enable_ssl
        self._cert_path = messagebus.cert_path
        kwargs = {
            'bootstrap_servers': self._location,
            'topic': self._topic,
            'group_id': self._general_group,
        }
        if self._enable_ssl:
            kwargs.update(_prepare_kafka_ssl_kwargs(self._cert_path))
        self._offset_fetcher = OffsetsFetcherAsync(**kwargs)
        self._codec = messagebus.codec
        self._partitions = messagebus.spider_feed_partitions

    def consumer(self, partition_id):
        c = Consumer(self._location, self._enable_ssl, self._cert_path, self._topic, self._general_group, partition_id)
        assert len(c._consumer.partitions_for_topic(self._topic)) == self._partitions
        return c

    def available_partitions(self):
        partitions = []
        lags = self._offset_fetcher.get()
        for partition, lag in six.iteritems(lags):
            if lag < self._max_next_requests:
                partitions.append(partition)
        return partitions

    def producer(self):
        partitioner = Crc32NamePartitioner(self._partitions) if self._hostname_partitioning \
            else FingerprintPartitioner(self._partitions)
        return KeyedProducer(self._location, self._enable_ssl, self._cert_path, self._topic, partitioner, self._codec,
                             batch_size=DEFAULT_BATCH_SIZE,
                             buffer_memory=DEFAULT_BUFFER_MEMORY)