def migrate_kafka_sequence(change_feed, checkpoint): int_seq = None try: # optimistically try convert to int int_seq = int(checkpoint.sequence) except ValueError: pass if checkpoint.sequence_format == 'text' or int_seq is not None: topics = change_feed.topics assert len(topics) == 1, topics return kafka_seq_to_str({(topics[0], 0): int_seq}) elif checkpoint.sequence_format == 'json': sequence = json.loads(checkpoint.sequence) if not sequence: # if sequence is an empty dict just return it return sequence change_feed_topics = set(change_feed.topics) assert change_feed_topics <= set(sequence) return kafka_seq_to_str({(topic, 0): offset for topic, offset in sequence.items() if topic in change_feed_topics}) else: raise ValueError("Unknown checkpoint format: {}".format( checkpoint.sequence_format))
def update_to(self, seq): kafka_seq = None if isinstance(seq, dict): assert self.sequence_format == 'json' kafka_seq = seq seq = kafka_seq_to_str(seq) elif isinstance(seq, int): seq = str(seq) pillow_logging.info( "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq) ) with transaction.atomic(): if kafka_seq: for topic_partition, offset in kafka_seq.items(): KafkaCheckpoint.objects.update_or_create( checkpoint_id=self.checkpoint_id, topic=topic_partition[0], partition=topic_partition[1], defaults={'offset': offset} ) checkpoint = self.get_or_create_wrapped(verify_unchanged=True) checkpoint.sequence = seq checkpoint.timestamp = datetime.utcnow() checkpoint.save() self._last_checkpoint = checkpoint
def update_to(self, seq, change=None): kafka_seq = None if isinstance(seq, dict): assert self.sequence_format == 'json' kafka_seq = seq seq = kafka_seq_to_str(seq) elif isinstance(seq, int): seq = str(seq) pillow_logging.info( "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq) ) doc_modification_time = change.metadata.publish_timestamp if change else None with transaction.atomic(): if kafka_seq: for topic_partition, offset in kafka_seq.items(): KafkaCheckpoint.objects.update_or_create( checkpoint_id=self.checkpoint_id, topic=topic_partition[0], partition=topic_partition[1], defaults={'offset': offset, 'doc_modification_time': doc_modification_time} ) checkpoint = self.get_or_create_wrapped(verify_unchanged=True) checkpoint.sequence = seq checkpoint.timestamp = datetime.utcnow() checkpoint.save() self._last_checkpoint = checkpoint
def update_to(self, seq): kafka_seq = seq seq = kafka_seq_to_str(seq) pillow_logging.info( "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq) ) with transaction.atomic(): if kafka_seq: for topic_partition, offset in kafka_seq.items(): KafkaCheckpoint.objects.update_or_create( checkpoint_id=self.checkpoint_id, topic=topic_partition[0], partition=topic_partition[1], defaults={'offset': offset} )
def handle(self, topic, num_partitions, **options): stop_pillows = raw_input("did you stop pillows? [y/n]") if stop_pillows not in ['y', 'yes']: print("then stop them") kafka_command = ( "./kafka-topics.sh --alter --zookeeper <zk IP>:2181 --partitions={} --topic={}" .format(num_partitions, topic)) added_partition = raw_input( "have you run {} ? [y/n]".format(kafka_command)) if added_partition not in ['y', 'yes']: print("then run it") for checkpoint in DjangoPillowCheckpoint.objects.filter( sequence_format='json'): try: kafka_seq = str_to_kafka_seq(checkpoint.sequence) except ValueError: print("unable to parse {}", checkpoint.checkpoint_id) continue topics = [tp.topic for tp in kafka_seq] if topic not in topics: print("topic does not exist in {}", checkpoint.checkpoint_id) continue changed = False for partition in range(num_partitions): tp = TopicAndPartition(topic, partition) if tp in kafka_seq: continue else: changed = True kafka_seq[tp] = 0 if changed: checkpoint.old_sequence = checkpoint.sequence checkpoint.sequence = kafka_seq_to_str(kafka_seq) checkpoint.save() for topic_partition, offset in kafka_seq.items(): KafkaCheckpoint.objects.update_or_create( checkpoint_id=checkpoint.checkpoint_id, topic=topic_partition.topic, partition=topic_partition.partition, defaults={'offset': offset}) print("please restart the pillows")
def update_to(self, seq, change=None): if isinstance(seq, six.string_types): kafka_seq = str_to_kafka_seq(seq) else: kafka_seq = seq seq = kafka_seq_to_str(seq) pillow_logging.info( "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq) ) doc_modification_time = change.metadata.publish_timestamp if change else None with transaction.atomic(): if kafka_seq: for topic_partition, offset in kafka_seq.items(): KafkaCheckpoint.objects.update_or_create( checkpoint_id=self.checkpoint_id, topic=topic_partition[0], partition=topic_partition[1], defaults={'offset': offset, 'doc_modification_time': doc_modification_time} )
def update_to(self, seq, change=None): if isinstance(seq, str): kafka_seq = str_to_kafka_seq(seq) else: kafka_seq = seq seq = kafka_seq_to_str(seq) pillow_logging.info( "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq) ) doc_modification_time = change.metadata.publish_timestamp if change else None with transaction.atomic(): if kafka_seq: for topic_partition, offset in kafka_seq.items(): KafkaCheckpoint.objects.update_or_create( checkpoint_id=self.checkpoint_id, topic=topic_partition[0], partition=topic_partition[1], defaults={'offset': offset, 'doc_modification_time': doc_modification_time} )
def get_latest_offsets_json(self): return json.loads(kafka_seq_to_str(self.get_latest_offsets()))
def _seq_to_int(checkpoint, seq): from pillowtop.models import kafka_seq_to_str if _is_kafka(checkpoint): return json.loads(kafka_seq_to_str(seq)) else: return force_seq_int(seq)
def get_current_sequence_id(self): return kafka_seq_to_str(self.get_current_sequence_as_dict())
def _seq_to_int(checkpoint, seq): from pillowtop.models import kafka_seq_to_str if checkpoint.sequence_format == 'json': return json.loads(kafka_seq_to_str(seq)) else: return force_seq_int(seq)