Example #1
0
 def __init__(self, messagebus):
     self._location = messagebus.kafka_location
     self._general_group = messagebus.spider_feed_group
     self._topic = messagebus.topic_todo
     self._max_next_requests = messagebus.max_next_requests
     self._hostname_partitioning = messagebus.hostname_partitioning
     self._enable_ssl = messagebus.enable_ssl
     self._cert_path = messagebus.cert_path
     kwargs = {
         'bootstrap_servers': self._location,
         'topic': self._topic,
         'group_id': self._general_group,
     }
     if self._enable_ssl:
         kwargs.update({
             'security_protocol':
             'SSL',
             'ssl_cafile':
             os_path_join(self._cert_path, 'ca-cert.pem'),
             'ssl_certfile':
             os_path_join(self._cert_path, 'client-cert.pem'),
             'ssl_keyfile':
             os_path_join(self._cert_path, 'client-key.pem')
         })
     self._offset_fetcher = OffsetsFetcherAsync(**kwargs)
     self._codec = messagebus.codec
     self._partitions = messagebus.spider_feed_partitions
Example #2
0
 def __init__(self, messagebus):
     self._location = messagebus.kafka_location
     self._general_group = messagebus.spider_feed_group
     self._topic = messagebus.topic_todo
     self._max_next_requests = messagebus.max_next_requests
     self._offset_fetcher = OffsetsFetcherAsync(
         bootstrap_servers=self._location,
         topic=self._topic,
         group_id=self._general_group)
     self._codec = messagebus.codec
     self._partitioner = messagebus.spider_feed_partitioner
Example #3
0
class SpiderFeedStream(BaseSpiderFeedStream):
    def __init__(self, messagebus):
        self._location = messagebus.kafka_location
        self._general_group = messagebus.spider_feed_group
        self._topic = messagebus.topic_todo
        self._max_next_requests = messagebus.max_next_requests
        self._hostname_partitioning = messagebus.hostname_partitioning
        self._offset_fetcher = OffsetsFetcherAsync(
            bootstrap_servers=self._location,
            topic=self._topic,
            group_id=self._general_group)
        self._codec = messagebus.codec
        self._partitions = messagebus.spider_feed_partitions

    def consumer(self, partition_id):
        c = Consumer(self._location, self._topic, self._general_group,
                     partition_id)
        assert len(c._consumer.partitions_for_topic(
            self._topic)) == self._partitions
        return c

    def available_partitions(self):
        partitions = []
        lags = self._offset_fetcher.get()
        for partition, lag in six.iteritems(lags):
            if lag < self._max_next_requests:
                partitions.append(partition)
        return partitions

    def producer(self):
        partitioner = Crc32NamePartitioner(self._partitions) if self._hostname_partitioning \
            else FingerprintPartitioner(self._partitions)
        return KeyedProducer(self._location, self._topic, partitioner,
                             self._codec)
Example #4
0
class SpiderFeedStream(BaseSpiderFeedStream):
    def __init__(self, messagebus):
        self._location = messagebus.kafka_location
        self._general_group = messagebus.spider_feed_group
        self._topic = messagebus.topic_todo
        self._max_next_requests = messagebus.max_next_requests
        self._hostname_partitioning = messagebus.hostname_partitioning
        self._offset_fetcher = OffsetsFetcherAsync(bootstrap_servers=self._location, topic=self._topic,
                                                   group_id=self._general_group)
        self._codec = messagebus.codec
        self._partitions = messagebus.spider_feed_partitions

    def consumer(self, partition_id):
        c = Consumer(self._location, self._topic, self._general_group, partition_id)
        assert len(c._consumer.partitions_for_topic(self._topic)) == self._partitions
        return c

    def available_partitions(self):
        partitions = []
        lags = self._offset_fetcher.get()
        for partition, lag in six.iteritems(lags):
            if lag < self._max_next_requests:
                partitions.append(partition)
        return partitions

    def producer(self):
        partitioner = Crc32NamePartitioner(self._partitions) if self._hostname_partitioning \
            else FingerprintPartitioner(self._partitions)
        return KeyedProducer(self._location, self._topic, partitioner, self._codec)
Example #5
0
 def __init__(self, messagebus):
     self._location = messagebus.kafka_location
     self._general_group = messagebus.spider_feed_group
     self._topic = messagebus.topic_todo
     self._max_next_requests = messagebus.max_next_requests
     self._hostname_partitioning = messagebus.hostname_partitioning
     self._enable_ssl = messagebus.enable_ssl
     self._cert_path = messagebus.cert_path
     kwargs = {
         'bootstrap_servers': self._location,
         'topic': self._topic,
         'group_id': self._general_group,
     }
     if self._enable_ssl:
         kwargs.update(_prepare_kafka_ssl_kwargs(self._cert_path))
     self._offset_fetcher = OffsetsFetcherAsync(**kwargs)
     self._codec = messagebus.codec
     self._partitions = messagebus.spider_feed_partitions
Example #6
0
 def __init__(self, messagebus):
     self._location = messagebus.kafka_location
     self._general_group = messagebus.spider_feed_group
     self._topic = messagebus.topic_todo
     self._max_next_requests = messagebus.max_next_requests
     self._hostname_partitioning = messagebus.hostname_partitioning
     self._offset_fetcher = OffsetsFetcherAsync(bootstrap_servers=self._location, topic=self._topic,
                                                group_id=self._general_group)
     self._codec = messagebus.codec
     self._partitions = messagebus.spider_feed_partitions
Example #7
0
class SpiderFeedStream(BaseSpiderFeedStream):
    def __init__(self, messagebus):
        self._location = messagebus.kafka_location
        self._general_group = messagebus.spider_feed_group
        self._topic = messagebus.topic_todo
        self._max_next_requests = messagebus.max_next_requests
        self._hostname_partitioning = messagebus.hostname_partitioning
        self._enable_ssl = messagebus.enable_ssl
        self._cert_path = messagebus.cert_path
        kwargs = {
            'bootstrap_servers': self._location,
            'topic': self._topic,
            'group_id': self._general_group,
        }
        if self._enable_ssl:
            kwargs.update({
                'security_protocol':
                'SSL',
                'ssl_cafile':
                os_path_join(self._cert_path, 'ca-cert.pem'),
                'ssl_certfile':
                os_path_join(self._cert_path, 'client-cert.pem'),
                'ssl_keyfile':
                os_path_join(self._cert_path, 'client-key.pem')
            })
        self._offset_fetcher = OffsetsFetcherAsync(**kwargs)
        self._codec = messagebus.codec
        self._partitions = messagebus.spider_feed_partitions

    def consumer(self, partition_id):
        c = Consumer(self._location, self._enable_ssl, self._cert_path,
                     self._topic, self._general_group, partition_id)
        assert len(c._consumer.partitions_for_topic(
            self._topic)) == self._partitions
        return c

    def available_partitions(self):
        partitions = []
        lags = self._offset_fetcher.get()
        for partition, lag in six.iteritems(lags):
            if lag < self._max_next_requests:
                partitions.append(partition)
        return partitions

    def producer(self):
        partitioner = Crc32NamePartitioner(self._partitions) if self._hostname_partitioning \
            else FingerprintPartitioner(self._partitions)
        return KeyedProducer(self._location, self._enable_ssl, self._cert_path,
                             self._topic, partitioner, self._codec)
Example #8
0
class SpiderFeedStream(BaseSpiderFeedStream):
    def __init__(self, messagebus):
        self._location = messagebus.kafka_location
        self._general_group = messagebus.spider_feed_group
        self._topic = messagebus.topic_todo
        self._max_next_requests = messagebus.max_next_requests
        self._hostname_partitioning = messagebus.hostname_partitioning
        self._enable_ssl = messagebus.enable_ssl
        self._cert_path = messagebus.cert_path
        kwargs = {
            'bootstrap_servers': self._location,
            'topic': self._topic,
            'group_id': self._general_group,
        }
        if self._enable_ssl:
            kwargs.update(_prepare_kafka_ssl_kwargs(self._cert_path))
        self._offset_fetcher = OffsetsFetcherAsync(**kwargs)
        self._codec = messagebus.codec
        self._partitions = messagebus.spider_feed_partitions

    def consumer(self, partition_id):
        c = Consumer(self._location, self._enable_ssl, self._cert_path,
                     self._topic, self._general_group, partition_id)
        assert len(c._consumer.partitions_for_topic(
            self._topic)) == self._partitions
        return c

    def available_partitions(self):
        partitions = []
        lags = self._offset_fetcher.get()
        for partition, lag in six.iteritems(lags):
            if lag < self._max_next_requests:
                partitions.append(partition)
        return partitions

    def producer(self):
        partitioner = Crc32NamePartitioner(self._partitions) if self._hostname_partitioning \
            else FingerprintPartitioner(self._partitions)
        return KeyedProducer(self._location,
                             self._enable_ssl,
                             self._cert_path,
                             self._topic,
                             partitioner,
                             self._codec,
                             batch_size=DEFAULT_BATCH_SIZE,
                             buffer_memory=DEFAULT_BUFFER_MEMORY)