def migrate_kafka_sequence(change_feed, checkpoint):
    int_seq = None
    try:
        # optimistically try convert to int
        int_seq = int(checkpoint.sequence)
    except ValueError:
        pass

    if checkpoint.sequence_format == 'text' or int_seq is not None:
        topics = change_feed.topics
        assert len(topics) == 1, topics
        return kafka_seq_to_str({(topics[0], 0): int_seq})
    elif checkpoint.sequence_format == 'json':
        sequence = json.loads(checkpoint.sequence)
        if not sequence:
            # if sequence is an empty dict just return it
            return sequence

        change_feed_topics = set(change_feed.topics)
        assert change_feed_topics <= set(sequence)
        return kafka_seq_to_str({(topic, 0): offset
                                 for topic, offset in sequence.items()
                                 if topic in change_feed_topics})
    else:
        raise ValueError("Unknown checkpoint format: {}".format(
            checkpoint.sequence_format))
예제 #2
0
    def update_to(self, seq):
        kafka_seq = None
        if isinstance(seq, dict):
            assert self.sequence_format == 'json'
            kafka_seq = seq
            seq = kafka_seq_to_str(seq)
        elif isinstance(seq, int):
            seq = str(seq)

        pillow_logging.info(
            "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq)
        )
        with transaction.atomic():
            if kafka_seq:
                for topic_partition, offset in kafka_seq.items():
                    KafkaCheckpoint.objects.update_or_create(
                        checkpoint_id=self.checkpoint_id,
                        topic=topic_partition[0],
                        partition=topic_partition[1],
                        defaults={'offset': offset}
                    )
            checkpoint = self.get_or_create_wrapped(verify_unchanged=True)
            checkpoint.sequence = seq
            checkpoint.timestamp = datetime.utcnow()
            checkpoint.save()
        self._last_checkpoint = checkpoint
예제 #3
0
    def update_to(self, seq, change=None):
        kafka_seq = None
        if isinstance(seq, dict):
            assert self.sequence_format == 'json'
            kafka_seq = seq
            seq = kafka_seq_to_str(seq)
        elif isinstance(seq, int):
            seq = str(seq)

        pillow_logging.info(
            "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq)
        )
        doc_modification_time = change.metadata.publish_timestamp if change else None
        with transaction.atomic():
            if kafka_seq:
                for topic_partition, offset in kafka_seq.items():
                    KafkaCheckpoint.objects.update_or_create(
                        checkpoint_id=self.checkpoint_id,
                        topic=topic_partition[0],
                        partition=topic_partition[1],
                        defaults={'offset': offset, 'doc_modification_time': doc_modification_time}
                    )
            checkpoint = self.get_or_create_wrapped(verify_unchanged=True)
            checkpoint.sequence = seq
            checkpoint.timestamp = datetime.utcnow()
            checkpoint.save()
        self._last_checkpoint = checkpoint
예제 #4
0
 def update_to(self, seq):
     kafka_seq = seq
     seq = kafka_seq_to_str(seq)
     pillow_logging.info(
         "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq)
     )
     with transaction.atomic():
         if kafka_seq:
             for topic_partition, offset in kafka_seq.items():
                 KafkaCheckpoint.objects.update_or_create(
                     checkpoint_id=self.checkpoint_id,
                     topic=topic_partition[0],
                     partition=topic_partition[1],
                     defaults={'offset': offset}
                 )
예제 #5
0
    def handle(self, topic, num_partitions, **options):
        stop_pillows = raw_input("did you stop pillows? [y/n]")
        if stop_pillows not in ['y', 'yes']:
            print("then stop them")

        kafka_command = (
            "./kafka-topics.sh --alter --zookeeper <zk IP>:2181 --partitions={} --topic={}"
            .format(num_partitions, topic))
        added_partition = raw_input(
            "have you run {} ? [y/n]".format(kafka_command))
        if added_partition not in ['y', 'yes']:
            print("then run it")

        for checkpoint in DjangoPillowCheckpoint.objects.filter(
                sequence_format='json'):
            try:
                kafka_seq = str_to_kafka_seq(checkpoint.sequence)
            except ValueError:
                print("unable to parse {}", checkpoint.checkpoint_id)
                continue

            topics = [tp.topic for tp in kafka_seq]
            if topic not in topics:
                print("topic does not exist in {}", checkpoint.checkpoint_id)
                continue

            changed = False
            for partition in range(num_partitions):
                tp = TopicAndPartition(topic, partition)
                if tp in kafka_seq:
                    continue
                else:
                    changed = True
                    kafka_seq[tp] = 0

            if changed:
                checkpoint.old_sequence = checkpoint.sequence
                checkpoint.sequence = kafka_seq_to_str(kafka_seq)
                checkpoint.save()

                for topic_partition, offset in kafka_seq.items():
                    KafkaCheckpoint.objects.update_or_create(
                        checkpoint_id=checkpoint.checkpoint_id,
                        topic=topic_partition.topic,
                        partition=topic_partition.partition,
                        defaults={'offset': offset})

        print("please restart the pillows")
예제 #6
0
    def update_to(self, seq, change=None):
        if isinstance(seq, six.string_types):
            kafka_seq = str_to_kafka_seq(seq)
        else:
            kafka_seq = seq
            seq = kafka_seq_to_str(seq)

        pillow_logging.info(
            "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq)
        )
        doc_modification_time = change.metadata.publish_timestamp if change else None

        with transaction.atomic():
            if kafka_seq:
                for topic_partition, offset in kafka_seq.items():
                    KafkaCheckpoint.objects.update_or_create(
                        checkpoint_id=self.checkpoint_id,
                        topic=topic_partition[0],
                        partition=topic_partition[1],
                        defaults={'offset': offset, 'doc_modification_time': doc_modification_time}
                    )
예제 #7
0
    def update_to(self, seq, change=None):
        if isinstance(seq, str):
            kafka_seq = str_to_kafka_seq(seq)
        else:
            kafka_seq = seq
            seq = kafka_seq_to_str(seq)

        pillow_logging.info(
            "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq)
        )
        doc_modification_time = change.metadata.publish_timestamp if change else None

        with transaction.atomic():
            if kafka_seq:
                for topic_partition, offset in kafka_seq.items():
                    KafkaCheckpoint.objects.update_or_create(
                        checkpoint_id=self.checkpoint_id,
                        topic=topic_partition[0],
                        partition=topic_partition[1],
                        defaults={'offset': offset, 'doc_modification_time': doc_modification_time}
                    )
예제 #8
0
파일: feed.py 프로젝트: twymer/commcare-hq
 def get_latest_offsets_json(self):
     return json.loads(kafka_seq_to_str(self.get_latest_offsets()))
예제 #9
0
 def _seq_to_int(checkpoint, seq):
     from pillowtop.models import kafka_seq_to_str
     if _is_kafka(checkpoint):
         return json.loads(kafka_seq_to_str(seq))
     else:
         return force_seq_int(seq)
예제 #10
0
 def get_current_sequence_id(self):
     return kafka_seq_to_str(self.get_current_sequence_as_dict())
예제 #11
0
 def get_current_sequence_id(self):
     return kafka_seq_to_str(self.get_current_sequence_as_dict())
예제 #12
0
 def _seq_to_int(checkpoint, seq):
     from pillowtop.models import kafka_seq_to_str
     if checkpoint.sequence_format == 'json':
         return json.loads(kafka_seq_to_str(seq))
     else:
         return force_seq_int(seq)
예제 #13
0
파일: feed.py 프로젝트: dimagi/commcare-hq
 def get_latest_offsets_json(self):
     return json.loads(kafka_seq_to_str(self.get_latest_offsets()))
예제 #14
0
 def _seq_to_int(checkpoint, seq):
     from pillowtop.models import kafka_seq_to_str
     if _is_kafka(checkpoint):
         return json.loads(kafka_seq_to_str(seq))
     else:
         return force_seq_int(seq)