def iter_changes(self, since, forever): """ Since must be a dictionary of topic partition offsets. """ timeout = float('inf') if forever else MIN_TIMEOUT start_from_latest = since is None reset = 'largest' if start_from_latest else 'smallest' self._init_consumer(timeout, auto_offset_reset=reset) since = self._filter_offsets(since) # a special value of since=None will start from the end of the change stream if since is not None and (not isinstance(since, dict) or not since): raise ValueError("'since' must be None or a topic offset dictionary") if not start_from_latest: if self.strict: validate_offsets(since) checkpoint_topics = {tp[0] for tp in since} extra_topics = checkpoint_topics - set(self._topics) if extra_topics: raise ValueError("'since' contains extra topics: {}".format(list(extra_topics))) self._processed_topic_offsets = copy(since) # Tell the consumer to start from offsets that were passed in for topic_partition, offset in since.items(): self.consumer.seek(TopicPartition(topic_partition[0], topic_partition[1]), int(offset)) try: for message in self.consumer: self._processed_topic_offsets[(message.topic, message.partition)] = message.offset yield change_from_kafka_message(message) except StopIteration: assert not forever, 'Kafka pillow should not timeout when waiting forever!'
def validate_checkpoints(print_only): for pillow in get_all_pillow_instances(): if isinstance(pillow.get_change_feed(), KafkaChangeFeed): checkpoint_dict = _get_checkpoint_dict(pillow) try: validate_offsets(checkpoint_dict) except UnavailableKafkaOffset as e: message = u'Problem with checkpoint for {}: {}'.format( pillow.pillow_id, e) if print_only: print message else: raise Exception(message)
def validate_checkpoints(print_only): for pillow in get_all_pillow_instances(): if isinstance(pillow.get_change_feed(), KafkaChangeFeed): checkpoint_dict = _get_checkpoint_dict(pillow) try: validate_offsets(checkpoint_dict) except UnavailableKafkaOffset as e: message = u'Problem with checkpoint for {}: {}'.format( pillow.pillow_id, e ) if print_only: print message else: raise Exception(message)
def validate_checkpoints(print_only): for pillow in get_all_pillow_instances(): if (pillow.pillow_id in getattr(settings, 'ACTIVE_PILLOW_NAMES', [pillow.pillow_id]) and isinstance(pillow.get_change_feed(), KafkaChangeFeed)): checkpoint_dict = _get_checkpoint_dict(pillow) try: validate_offsets(checkpoint_dict) except UnavailableKafkaOffset as e: message = 'Problem with checkpoint for {}: {}'.format( pillow.pillow_id, e ) if print_only: print(message) else: raise Exception(message)
def validate_checkpoints(print_only): for pillow in get_all_pillow_instances(): if (pillow.pillow_id in getattr(settings, 'ACTIVE_PILLOW_NAMES', [pillow.pillow_id]) and isinstance(pillow.get_change_feed(), KafkaChangeFeed)): checkpoint_dict = _get_checkpoint_dict(pillow) try: validate_offsets(checkpoint_dict) except UnavailableKafkaOffset as e: message = 'Problem with checkpoint for {}: {}'.format( pillow.pillow_id, e) if print_only: print(message) else: raise Exception(message)
def iter_changes( self, since: Optional[Dict[TopicPartition, int]], forever: bool, ) -> Iterator[Change]: """ ``since`` must be a dictionary of topic partition offsets, or None """ timeout = MAX_TIMEOUT if forever else MIN_TIMEOUT start_from_latest = since is None reset = 'largest' if start_from_latest else 'smallest' self._init_consumer(timeout, auto_offset_reset=reset) since = self._filter_offsets(since) # a special value of since=None will start from the end of the change stream if not isinstance(since, (dict, type(None))): raise ValueError( f"Expected None or a topic offset dictionary. Got {since!r}") if since == {}: raise ValueError('Topic partition offsets not found') if not start_from_latest: if self.strict: validate_offsets(since) checkpoint_topics = {tp[0] for tp in since} extra_topics = checkpoint_topics - set(self._topics) if extra_topics: raise ValueError("'since' contains extra topics: {}".format( list(extra_topics))) self._processed_topic_offsets = copy(since) # Tell the consumer to start from offsets that were passed in for topic_partition, offset in since.items(): self.consumer.seek( TopicPartition(topic_partition[0], topic_partition[1]), int(offset)) try: for message in self.consumer: self._processed_topic_offsets[( message.topic, message.partition)] = message.offset yield change_from_kafka_message(message) except StopIteration: # no need to do anything since this is just telling us we've reached the end of the feed pass
def iter_changes(self, since, forever): """ Since must be a dictionary of topic partition offsets. """ # a special value of since=None will start from the end of the change stream if since is not None and (not isinstance(since, dict) or not since): raise ValueError( "'since' must be None or a topic offset dictionary") # in milliseconds, -1 means wait forever for changes timeout = -1 if forever else MIN_TIMEOUT start_from_latest = since is None reset = 'largest' if start_from_latest else 'smallest' consumer = self._get_consumer(timeout, auto_offset_reset=reset) if not start_from_latest: if self.strict: validate_offsets(since) checkpoint_topics = {tp[0] for tp in since} extra_topics = checkpoint_topics - set(self._topics) if extra_topics: raise ValueError("'since' contains extra topics: {}".format( list(extra_topics))) self._processed_topic_offsets = copy(since) offsets = [copy(self._processed_topic_offsets)] topics_missing = set(self._topics) - checkpoint_topics for topic in topics_missing: offsets.append(topic) # consume all available partitions # this is how you tell the consumer to start from a certain point in the sequence consumer.set_topic_partitions(*offsets) try: for message in consumer: self._processed_topic_offsets[( message.topic, message.partition)] = message.offset yield change_from_kafka_message(message) except ConsumerTimeout: assert not forever, 'Kafka pillow should not timeout when waiting forever!'
def iter_changes(self, since, forever): """ Since must be a dictionary of topic partition offsets. """ timeout = float('inf') if forever else MIN_TIMEOUT start_from_latest = since is None reset = 'largest' if start_from_latest else 'smallest' self._init_consumer(timeout, auto_offset_reset=reset) since = self._filter_offsets(since) # a special value of since=None will start from the end of the change stream if since is not None and (not isinstance(since, dict) or not since): raise ValueError( "'since' must be None or a topic offset dictionary") if not start_from_latest: if self.strict: validate_offsets(since) checkpoint_topics = {tp[0] for tp in since} extra_topics = checkpoint_topics - set(self._topics) if extra_topics: raise ValueError("'since' contains extra topics: {}".format( list(extra_topics))) self._processed_topic_offsets = copy(since) # Tell the consumer to start from offsets that were passed in for topic_partition, offset in since.items(): self.consumer.seek( TopicPartition(topic_partition[0], topic_partition[1]), int(offset)) try: for message in self.consumer: self._processed_topic_offsets[( message.topic, message.partition)] = message.offset yield change_from_kafka_message(message) except StopIteration: assert not forever, 'Kafka pillow should not timeout when waiting forever!'
def _validate_offsets(self, offsets): expected_values = { offset[0]: offset[2] for offset in offsets if len(offset) > 2 } validate_offsets(expected_values)
def _validate_offsets(self, offsets): expected_values = {offset[0]: offset[2] for offset in offsets if len(offset) > 2} validate_offsets(expected_values)