def __init__(self, messagebus): self._location = messagebus.kafka_location self._general_group = messagebus.spider_feed_group self._topic = messagebus.topic_todo self._max_next_requests = messagebus.max_next_requests self._hostname_partitioning = messagebus.hostname_partitioning self._enable_ssl = messagebus.enable_ssl self._cert_path = messagebus.cert_path kwargs = { 'bootstrap_servers': self._location, 'topic': self._topic, 'group_id': self._general_group, } if self._enable_ssl: kwargs.update({ 'security_protocol': 'SSL', 'ssl_cafile': os_path_join(self._cert_path, 'ca-cert.pem'), 'ssl_certfile': os_path_join(self._cert_path, 'client-cert.pem'), 'ssl_keyfile': os_path_join(self._cert_path, 'client-key.pem') }) self._offset_fetcher = OffsetsFetcherAsync(**kwargs) self._codec = messagebus.codec self._partitions = messagebus.spider_feed_partitions
def __init__(self, messagebus): self._location = messagebus.kafka_location self._general_group = messagebus.spider_feed_group self._topic = messagebus.topic_todo self._max_next_requests = messagebus.max_next_requests self._offset_fetcher = OffsetsFetcherAsync( bootstrap_servers=self._location, topic=self._topic, group_id=self._general_group) self._codec = messagebus.codec self._partitioner = messagebus.spider_feed_partitioner
class SpiderFeedStream(BaseSpiderFeedStream): def __init__(self, messagebus): self._location = messagebus.kafka_location self._general_group = messagebus.spider_feed_group self._topic = messagebus.topic_todo self._max_next_requests = messagebus.max_next_requests self._hostname_partitioning = messagebus.hostname_partitioning self._offset_fetcher = OffsetsFetcherAsync( bootstrap_servers=self._location, topic=self._topic, group_id=self._general_group) self._codec = messagebus.codec self._partitions = messagebus.spider_feed_partitions def consumer(self, partition_id): c = Consumer(self._location, self._topic, self._general_group, partition_id) assert len(c._consumer.partitions_for_topic( self._topic)) == self._partitions return c def available_partitions(self): partitions = [] lags = self._offset_fetcher.get() for partition, lag in six.iteritems(lags): if lag < self._max_next_requests: partitions.append(partition) return partitions def producer(self): partitioner = Crc32NamePartitioner(self._partitions) if self._hostname_partitioning \ else FingerprintPartitioner(self._partitions) return KeyedProducer(self._location, self._topic, partitioner, self._codec)
class SpiderFeedStream(BaseSpiderFeedStream): def __init__(self, messagebus): self._location = messagebus.kafka_location self._general_group = messagebus.spider_feed_group self._topic = messagebus.topic_todo self._max_next_requests = messagebus.max_next_requests self._hostname_partitioning = messagebus.hostname_partitioning self._offset_fetcher = OffsetsFetcherAsync(bootstrap_servers=self._location, topic=self._topic, group_id=self._general_group) self._codec = messagebus.codec self._partitions = messagebus.spider_feed_partitions def consumer(self, partition_id): c = Consumer(self._location, self._topic, self._general_group, partition_id) assert len(c._consumer.partitions_for_topic(self._topic)) == self._partitions return c def available_partitions(self): partitions = [] lags = self._offset_fetcher.get() for partition, lag in six.iteritems(lags): if lag < self._max_next_requests: partitions.append(partition) return partitions def producer(self): partitioner = Crc32NamePartitioner(self._partitions) if self._hostname_partitioning \ else FingerprintPartitioner(self._partitions) return KeyedProducer(self._location, self._topic, partitioner, self._codec)
def __init__(self, messagebus): self._location = messagebus.kafka_location self._general_group = messagebus.spider_feed_group self._topic = messagebus.topic_todo self._max_next_requests = messagebus.max_next_requests self._hostname_partitioning = messagebus.hostname_partitioning self._enable_ssl = messagebus.enable_ssl self._cert_path = messagebus.cert_path kwargs = { 'bootstrap_servers': self._location, 'topic': self._topic, 'group_id': self._general_group, } if self._enable_ssl: kwargs.update(_prepare_kafka_ssl_kwargs(self._cert_path)) self._offset_fetcher = OffsetsFetcherAsync(**kwargs) self._codec = messagebus.codec self._partitions = messagebus.spider_feed_partitions
def __init__(self, messagebus): self._location = messagebus.kafka_location self._general_group = messagebus.spider_feed_group self._topic = messagebus.topic_todo self._max_next_requests = messagebus.max_next_requests self._hostname_partitioning = messagebus.hostname_partitioning self._offset_fetcher = OffsetsFetcherAsync(bootstrap_servers=self._location, topic=self._topic, group_id=self._general_group) self._codec = messagebus.codec self._partitions = messagebus.spider_feed_partitions
class SpiderFeedStream(BaseSpiderFeedStream): def __init__(self, messagebus): self._location = messagebus.kafka_location self._general_group = messagebus.spider_feed_group self._topic = messagebus.topic_todo self._max_next_requests = messagebus.max_next_requests self._hostname_partitioning = messagebus.hostname_partitioning self._enable_ssl = messagebus.enable_ssl self._cert_path = messagebus.cert_path kwargs = { 'bootstrap_servers': self._location, 'topic': self._topic, 'group_id': self._general_group, } if self._enable_ssl: kwargs.update({ 'security_protocol': 'SSL', 'ssl_cafile': os_path_join(self._cert_path, 'ca-cert.pem'), 'ssl_certfile': os_path_join(self._cert_path, 'client-cert.pem'), 'ssl_keyfile': os_path_join(self._cert_path, 'client-key.pem') }) self._offset_fetcher = OffsetsFetcherAsync(**kwargs) self._codec = messagebus.codec self._partitions = messagebus.spider_feed_partitions def consumer(self, partition_id): c = Consumer(self._location, self._enable_ssl, self._cert_path, self._topic, self._general_group, partition_id) assert len(c._consumer.partitions_for_topic( self._topic)) == self._partitions return c def available_partitions(self): partitions = [] lags = self._offset_fetcher.get() for partition, lag in six.iteritems(lags): if lag < self._max_next_requests: partitions.append(partition) return partitions def producer(self): partitioner = Crc32NamePartitioner(self._partitions) if self._hostname_partitioning \ else FingerprintPartitioner(self._partitions) return KeyedProducer(self._location, self._enable_ssl, self._cert_path, self._topic, partitioner, self._codec)
class SpiderFeedStream(BaseSpiderFeedStream): def __init__(self, messagebus): self._location = messagebus.kafka_location self._general_group = messagebus.spider_feed_group self._topic = messagebus.topic_todo self._max_next_requests = messagebus.max_next_requests self._hostname_partitioning = messagebus.hostname_partitioning self._enable_ssl = messagebus.enable_ssl self._cert_path = messagebus.cert_path kwargs = { 'bootstrap_servers': self._location, 'topic': self._topic, 'group_id': self._general_group, } if self._enable_ssl: kwargs.update(_prepare_kafka_ssl_kwargs(self._cert_path)) self._offset_fetcher = OffsetsFetcherAsync(**kwargs) self._codec = messagebus.codec self._partitions = messagebus.spider_feed_partitions def consumer(self, partition_id): c = Consumer(self._location, self._enable_ssl, self._cert_path, self._topic, self._general_group, partition_id) assert len(c._consumer.partitions_for_topic( self._topic)) == self._partitions return c def available_partitions(self): partitions = [] lags = self._offset_fetcher.get() for partition, lag in six.iteritems(lags): if lag < self._max_next_requests: partitions.append(partition) return partitions def producer(self): partitioner = Crc32NamePartitioner(self._partitions) if self._hostname_partitioning \ else FingerprintPartitioner(self._partitions) return KeyedProducer(self._location, self._enable_ssl, self._cert_path, self._topic, partitioner, self._codec, batch_size=DEFAULT_BATCH_SIZE, buffer_memory=DEFAULT_BUFFER_MEMORY)