def handle(self, **options):
        date = options['date']
        pillow_args = set(options['pillows'] or [])

        if not pillow_args and not confirm('Reset checkpoints ALL pillows?'):
            raise CommandError('Abort')

        def _pillow_match(pillow_id):
            return (
                pillow_id in pillow_args
                or any(re.match(arg, pillow_id, re.IGNORECASE) for arg in pillow_args)
            )

        all_pillows = get_all_pillow_instances()
        if not pillow_args:
            pillows = all_pillows
        else:
            pillows = [
                pillow for pillow in all_pillows
                if _pillow_match(pillow.pillow_id)
            ]

            if not pillows:
                raise CommandError('No pillows match: {}'.format(options['pillows']))

            if not confirm('Update checkpoints for {}?'.format('\n  '.join(p.pillow_id for p in pillows))):
                raise CommandError('abort')

        for pillow in pillows:
            checkpoint = pillow.checkpoint
            historical_checkpoint = HistoricalPillowCheckpoint.objects.filter(
                checkpoint_id=checkpoint.checkpoint_id,
                date_updated__lt=date).first()

            if not historical_checkpoint:
                print(self.style.ERROR('No historical checkpoints for {} before {}'.format(
                    checkpoint.checkpoint_id, date))
                )
                continue

            old_seq = pillow.get_last_checkpoint_sequence()
            new_seq = historical_checkpoint.seq
            if checkpoint.sequence_format == 'json' and isinstance(old_seq, dict):
                new_seq = str_to_kafka_seq(new_seq)
                diff = ('\n'.join(difflib.ndiff(
                    pprint.pformat(old_seq).splitlines(),
                    pprint.pformat(new_seq).splitlines())))
            else:
                diff = 'from: {}\nto  : {}'.format(old_seq, new_seq)

            pillow_id = pillow.pillow_id
            if old_seq == new_seq:
                print('Sequences for {} are identical, moving on.'.format(pillow_id))
                continue

            if confirm("\nReset checkpoint for '{}' pillow to sequence from  {}:\n\n{}\n".format(
                    pillow_id, historical_checkpoint.date_updated, diff
            )):
                pillow.checkpoint.update_to(new_seq)
                print(self.style.SUCCESS("Checkpoint for {} updated\n".format(pillow_id)))
    def rewind_pillows(date):
        for pillow in get_all_pillow_instances():
            checkpoint = pillow.checkpoint
            try:
                checkpoint = HistoricalPillowCheckpoint.objects.get(checkpoint_id=checkpoint.checkpoint_id,
                                                                    date_updated=date)
                if pillow.checkpoint.sequence_format == 'json':
                    seq = str_to_kafka_seq(checkpoint.seq)
                else:
                    seq = checkpoint.seq
            except HistoricalPillowCheckpoint.DoesNotExist:
                seq = DEFAULT_EMPTY_CHECKPOINT_SEQUENCE_FOR_RESTORE[pillow.checkpoint.sequence_format]

            pillow.checkpoint.update_to(seq)
Exemple #3
0
 def handle(self, **options):
     for checkpoint in DjangoPillowCheckpoint.objects.filter(
             sequence_format='json'):
         try:
             kafka_seq = str_to_kafka_seq(checkpoint.sequence)
         except ValueError:
             print("unable to migrate {}", checkpoint.checkpoint_id)
         else:
             for topic_partition, offset in kafka_seq.items():
                 KafkaCheckpoint.objects.update_or_create(
                     checkpoint_id=checkpoint.checkpoint_id,
                     topic=topic_partition.topic,
                     partition=topic_partition.partition,
                     defaults={'offset': offset})
Exemple #4
0
    def rewind_pillows(date):
        for pillow in get_all_pillow_instances():
            checkpoint = pillow.checkpoint
            try:
                checkpoint = HistoricalPillowCheckpoint.objects.get(checkpoint_id=checkpoint.checkpoint_id,
                                                                    date_updated=date)
                if pillow.checkpoint.sequence_format == 'json':
                    seq = str_to_kafka_seq(checkpoint.seq)
                else:
                    seq = checkpoint.seq
            except HistoricalPillowCheckpoint.DoesNotExist:
                seq = DEFAULT_EMPTY_CHECKPOINT_SEQUENCE_FOR_RESTORE[pillow.checkpoint.sequence_format]

            pillow.checkpoint.update_to(seq)
 def handle(self, **options):
     for checkpoint in DjangoPillowCheckpoint.objects.filter(sequence_format='json'):
         try:
             kafka_seq = str_to_kafka_seq(checkpoint.sequence)
         except ValueError:
             print("unable to migrate {}", checkpoint.checkpoint_id)
         else:
             for topic_partition, offset in kafka_seq.items():
                 KafkaCheckpoint.objects.update_or_create(
                     checkpoint_id=checkpoint.checkpoint_id,
                     topic=topic_partition.topic,
                     partition=topic_partition.partition,
                     defaults={'offset': offset}
                 )
Exemple #6
0
    def handle(self, topic, num_partitions, **options):
        stop_pillows = raw_input("did you stop pillows? [y/n]")
        if stop_pillows not in ['y', 'yes']:
            print("then stop them")

        kafka_command = (
            "./kafka-topics.sh --alter --zookeeper <zk IP>:2181 --partitions={} --topic={}"
            .format(num_partitions, topic))
        added_partition = raw_input(
            "have you run {} ? [y/n]".format(kafka_command))
        if added_partition not in ['y', 'yes']:
            print("then run it")

        for checkpoint in DjangoPillowCheckpoint.objects.filter(
                sequence_format='json'):
            try:
                kafka_seq = str_to_kafka_seq(checkpoint.sequence)
            except ValueError:
                print("unable to parse {}", checkpoint.checkpoint_id)
                continue

            topics = [tp.topic for tp in kafka_seq]
            if topic not in topics:
                print("topic does not exist in {}", checkpoint.checkpoint_id)
                continue

            changed = False
            for partition in range(num_partitions):
                tp = TopicAndPartition(topic, partition)
                if tp in kafka_seq:
                    continue
                else:
                    changed = True
                    kafka_seq[tp] = 0

            if changed:
                checkpoint.old_sequence = checkpoint.sequence
                checkpoint.sequence = kafka_seq_to_str(kafka_seq)
                checkpoint.save()

                for topic_partition, offset in kafka_seq.items():
                    KafkaCheckpoint.objects.update_or_create(
                        checkpoint_id=checkpoint.checkpoint_id,
                        topic=topic_partition.topic,
                        partition=topic_partition.partition,
                        defaults={'offset': offset})

        print("please restart the pillows")
Exemple #7
0
    def update_to(self, seq):
        if isinstance(seq, six.string_types):
            kafka_seq = str_to_kafka_seq(seq)
        else:
            kafka_seq = seq
            seq = kafka_seq_to_str(seq)

        pillow_logging.info("(%s) setting checkpoint: %s" %
                            (self.checkpoint_id, seq))
        with transaction.atomic():
            if kafka_seq:
                for topic_partition, offset in kafka_seq.items():
                    KafkaCheckpoint.objects.update_or_create(
                        checkpoint_id=self.checkpoint_id,
                        topic=topic_partition[0],
                        partition=topic_partition[1],
                        defaults={'offset': offset})
    def handle(self, doc_type, since, **kwargs):
        since = datetime.strptime(since, '%Y-%m-%d')
        if doc_type == 'form':
            pillows = get_form_es_pillows()
        elif doc_type == 'case':
            pillows = get_case_es_pillows()
        else:
            print("Unknown doc type {}. Specify form or case doc-type".format(
                doc_type))
            return

        for pillow in pillows:
            print("Processing for pillow {}".format(pillow.pillow_id))
            try:
                checkpoint = HistoricalPillowCheckpoint.objects.get(
                    date_updated=since,
                    checkpoint_id=pillow.checkpoint.checkpoint_id)
            except HistoricalPillowCheckpoint.DoesNotExist:
                print(
                    "No HistoricalPillowCheckpoint data available for pillow {}\n"
                    .format(pillow.pillow_id))
                continue
            total_changes = 0
            deleted_changes = 0
            seq = str_to_kafka_seq(checkpoint.seq)
            es_processors = [
                p for p in pillow.processors
                if isinstance(p, ElasticProcessor)
            ]
            for change in pillow.get_change_feed().iter_changes(since=seq,
                                                                forever=False):
                total_changes += 1
                if change.deleted and change.id:
                    deleted_changes += 1
                    for processor in es_processors:
                        processor.process_change(change)
                if total_changes % 100 == 0:
                    print(
                        "Processed {} deletes out of total {} changes for pillow {}\n"
                        .format(deleted_changes, total_changes,
                                pillow.pillow_id))
        print("Finished processing all deletes sucessfully!")
Exemple #9
0
    def update_to(self, seq, change=None):
        if isinstance(seq, six.string_types):
            kafka_seq = str_to_kafka_seq(seq)
        else:
            kafka_seq = seq
            seq = kafka_seq_to_str(seq)

        pillow_logging.info(
            "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq)
        )
        doc_modification_time = change.metadata.publish_timestamp if change else None

        with transaction.atomic():
            if kafka_seq:
                for topic_partition, offset in kafka_seq.items():
                    KafkaCheckpoint.objects.update_or_create(
                        checkpoint_id=self.checkpoint_id,
                        topic=topic_partition[0],
                        partition=topic_partition[1],
                        defaults={'offset': offset, 'doc_modification_time': doc_modification_time}
                    )
Exemple #10
0
    def update_to(self, seq, change=None):
        if isinstance(seq, str):
            kafka_seq = str_to_kafka_seq(seq)
        else:
            kafka_seq = seq
            seq = kafka_seq_to_str(seq)

        pillow_logging.info(
            "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq)
        )
        doc_modification_time = change.metadata.publish_timestamp if change else None

        with transaction.atomic():
            if kafka_seq:
                for topic_partition, offset in kafka_seq.items():
                    KafkaCheckpoint.objects.update_or_create(
                        checkpoint_id=self.checkpoint_id,
                        topic=topic_partition[0],
                        partition=topic_partition[1],
                        defaults={'offset': offset, 'doc_modification_time': doc_modification_time}
                    )
    def handle(self, **options):
        date = options['date']
        pillow_args = set(options['pillows'] or [])

        if not pillow_args and not confirm('Reset checkpoints ALL pillows?'):
            raise CommandError('Abort')

        def _pillow_match(pillow_id):
            return (pillow_id in pillow_args or any(
                re.match(arg, pillow_id, re.IGNORECASE)
                for arg in pillow_args))

        all_pillows = get_all_pillow_instances()
        if not pillow_args:
            pillows = all_pillows
        else:
            pillows = [
                pillow for pillow in all_pillows
                if _pillow_match(pillow.pillow_id)
            ]

            if not pillows:
                raise CommandError('No pillows match: {}'.format(
                    options['pillows']))

            if not confirm('Update checkpoints for {}?'.format('\n  '.join(
                    p.pillow_id for p in pillows))):
                raise CommandError('abort')

        for pillow in pillows:
            checkpoint = pillow.checkpoint
            historical_checkpoint = HistoricalPillowCheckpoint.objects.filter(
                checkpoint_id=checkpoint.checkpoint_id,
                date_updated__lt=date).first()

            if not historical_checkpoint:
                print(
                    self.style.ERROR(
                        'No historical checkpoints for {} before {}'.format(
                            checkpoint.checkpoint_id, date)))
                continue

            old_seq = pillow.get_last_checkpoint_sequence()
            new_seq = historical_checkpoint.seq
            if checkpoint.sequence_format == 'json' and isinstance(
                    old_seq, dict):
                new_seq = str_to_kafka_seq(new_seq)
                diff = ('\n'.join(
                    difflib.ndiff(
                        pprint.pformat(old_seq).splitlines(),
                        pprint.pformat(new_seq).splitlines())))
            else:
                diff = 'from: {}\nto  : {}'.format(old_seq, new_seq)

            pillow_id = pillow.pillow_id
            if old_seq == new_seq:
                print('Sequences for {} are identical, moving on.'.format(
                    pillow_id))
                continue

            if confirm(
                    "\nReset checkpoint for '{}' pillow to sequence from  {}:\n\n{}\n"
                    .format(pillow_id, historical_checkpoint.date_updated,
                            diff)):
                pillow.checkpoint.update_to(new_seq)
                print(
                    self.style.SUCCESS(
                        "Checkpoint for {} updated\n".format(pillow_id)))