Esempio n. 1
0
 def test_non_expired_checkpoint_iteration_strict(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed', strict=True)
     first_avaliable_offsets = get_multi_topic_first_available_offsets([topics.FORM, topics.CASE])
     since = {
         topic: first_available for topic, first_available in first_avaliable_offsets.items()
     }
     feed.iter_changes(since=since, forever=False).next()
    def test_multiple_topics(self):
        feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id="test-kafka-feed")
        self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False))))
        producer = KeyedProducer(get_kafka_client_or_none())
        offsets = feed.get_current_offsets()
        send_to_kafka(
            producer, topics.FORM, ChangeMeta(document_id="1", data_source_type="form", data_source_name="form")
        )
        send_to_kafka(
            producer, topics.CASE, ChangeMeta(document_id="2", data_source_type="case", data_source_name="case")
        )
        send_to_kafka(
            producer,
            topics.FORM_SQL,
            ChangeMeta(document_id="3", data_source_type="form-sql", data_source_name="form-sql"),
        )
        send_to_kafka(
            producer,
            topics.CASE_SQL,
            ChangeMeta(document_id="4", data_source_type="case-sql", data_source_name="case-sql"),
        )

        changes = list(feed.iter_changes(since=offsets, forever=False))
        self.assertEqual(2, len(changes))
        self.assertEqual(set(["1", "2"]), set([change.id for change in changes]))
Esempio n. 3
0
 def test_non_expired_checkpoint_iteration_strict(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed', strict=True)
     first_avaliable_offsets = get_multi_topic_first_available_offsets([topics.FORM, topics.CASE])
     since = {
         topic: first_available for topic, first_available in first_avaliable_offsets.items()
     }
     feed.iter_changes(since=since, forever=False).next()
Esempio n. 4
0
 def test_expired_checkpoint_iteration_strict(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed', strict=True)
     first_available_offsets = get_multi_topic_first_available_offsets([topics.FORM, topics.CASE])
     since = {
         topic_partition: offset - 1
         for topic_partition, offset in first_available_offsets.items()
     }
     with self.assertRaises(UnavailableKafkaOffset):
         feed.iter_changes(since=since, forever=False).next()
 def test_multiple_topics_with_partial_checkpoint(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
     self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False))))
     offsets = {'form': feed.get_current_offsets()['form']}
     expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)]
     changes = list(feed.iter_changes(since=offsets, forever=False))
     # should include at least the form and the case (may have more than one case since not
     # specifying a checkpoint rewinds it to the beginning of the feed)
     self.assertTrue(len(changes) > 1)
     found_change_ids = set([change.id for change in changes])
     for expected_id in set([meta.document_id for meta in expected_metas]):
         self.assertTrue(expected_id in found_change_ids)
Esempio n. 6
0
 def test_multiple_topics_with_partial_checkpoint(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
     self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False))))
     offsets = {'form': feed.get_latest_offsets()['form']}
     expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)]
     changes = list(feed.iter_changes(since=offsets, forever=False))
     # should include at least the form and the case (may have more than one case since not
     # specifying a checkpoint rewinds it to the beginning of the feed)
     self.assertTrue(len(changes) > 1)
     found_change_ids = set([change.id for change in changes])
     for expected_id in set([meta.document_id for meta in expected_metas]):
         self.assertTrue(expected_id in found_change_ids)
 def test_multiple_topics(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
     self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False))))
     offsets = feed.get_latest_offsets()
     expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)]
     unexpected_metas = [publish_stub_change(topics.FORM_SQL), publish_stub_change(topics.CASE_SQL)]
     changes = list(feed.iter_changes(since=offsets, forever=False))
     self.assertEqual(2, len(changes))
     found_change_ids = set([change.id for change in changes])
     self.assertEqual(set([meta.document_id for meta in expected_metas]), found_change_ids)
     for unexpected in unexpected_metas:
         self.assertTrue(unexpected.document_id not in found_change_ids)
 def test_multiple_topics(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
     self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False))))
     offsets = feed.get_current_offsets()
     expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)]
     unexpected_metas = [publish_stub_change(topics.FORM_SQL), publish_stub_change(topics.CASE_SQL)]
     changes = list(feed.iter_changes(since=offsets, forever=False))
     self.assertEqual(2, len(changes))
     found_change_ids = set([change.id for change in changes])
     self.assertEqual(set([meta.document_id for meta in expected_metas]), found_change_ids)
     for unexpected in unexpected_metas:
         self.assertTrue(unexpected.document_id not in found_change_ids)
    def handle(self, **options):
        if options['print_kafka_offsets']:
            start, end = self.get_min_max_offsets()
            print("\n\nKakfa topic offset range: {} - {}".format(start, end))
            return

        start_offset = options['offset_start']
        end_offset = options['offset_end']

        start, end = self.get_min_max_offsets()
        if start_offset < start:
            start_offset = start
        if end_offset < 0 or end_offset > end:
            end_offset = end

        if start_offset > end_offset:
            raise CommandError("Start greater than end: {} > {}".format(start_offset, end_offset))

        print('Using kafka offset range: {} - {}'.format(start_offset, end_offset))

        if options['find_start_offset']:
            find_first_match = FindFirstMatch(start_offset, end_offset, check_user_at_offset)
            first_matching_offset = find_first_match.search()
            if first_matching_offset is None:
                raise CommandError("Unable to find first matching offset. "
                                   "Try a different search range.")
            else:
                print("\nFirst matching offset = {}".format(first_matching_offset))
            return

        check = options['check']

        seen_ids = set()
        change_feed = KafkaChangeFeed(topics=[COMMCARE_USER], group_id='user-repair')
        for change in change_feed.iter_changes(since=start_offset, forever=False):
            if change.sequence_id > end_offset:
                return

            if change.id in seen_ids:
                continue

            seen_ids.add(change.id)

            if change.deleted:
                continue

            try:
                user = change.get_document()
            except ResourceNotFound:
                continue

            user = CommCareUser.wrap(user)

            if user_looks_ok(user):
                continue

            restore_domain_membership(user, check=check)

            if change.sequence_id % 100 == 0:
                print("Processed up to offset: {}".format(change.sequence_id))
 def test_non_expired_checkpoint_iteration_strict(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE],
                            client_id='test-kafka-feed',
                            strict=True)
     first_available_offsets = get_multi_topic_first_available_offsets(
         [topics.FORM, topics.CASE])
     next(feed.iter_changes(since=first_available_offsets, forever=False))
Esempio n. 11
0
 def test_expired_checkpoint_iteration_strict(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], client_id='test-kafka-feed', strict=True)
     first_available_offsets = get_multi_topic_first_available_offsets([topics.FORM, topics.CASE])
     since = {
         topic_partition: offset - 1
         for topic_partition, offset in first_available_offsets.items()
     }
     with self.assertRaises(UnavailableKafkaOffset):
         next(feed.iter_changes(since=since, forever=False))
    def handle(self, *args, **options):
        since = options['from']
        sleep = float(options['sleep'] or '.01')
        last_domain = None
        change_feed = KafkaChangeFeed(topic=topics.FORM, group_id='form-feed')
        for change in change_feed.iter_changes(since=since, forever=True):
            if not change.deleted:
                # this is just helpful for demos to find domain transitions
                if change.metadata.domain != last_domain:
                    last_domain = change.metadata.domain
                    print change.sequence_id, last_domain

                metadata = change.metadata.to_json()
                if not options['compact']:
                    metadata['country'] = _get_country(change.metadata.domain)
                message = RedisMessage(json.dumps(metadata))
                RedisPublisher(facility='form-feed', broadcast=True).publish_message(message)
                time.sleep(sleep)
Esempio n. 13
0
class capture_kafka_changes_context(object):
    def __init__(self, *topics):
        self.topics = topics
        self.change_feed = KafkaChangeFeed(
            topics=topics,
            client_id='test-{}'.format(uuid.uuid4().hex),
        )
        self.changes = None

    def __enter__(self):
        self.kafka_seq = get_multi_topic_offset(self.topics)
        self.changes = []
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        for change in self.change_feed.iter_changes(since=self.kafka_seq, forever=False):
            if change:
                self.changes.append(change)
    def handle(self, **options):
        since = options['from']
        sleep = float(options['sleep'] or '.01')
        last_domain = None
        change_feed = KafkaChangeFeed(topics=[topics.FORM], group_id='form-feed')
        for change in change_feed.iter_changes(since=since, forever=True):
            if not change.deleted:
                # this is just helpful for demos to find domain transitions
                if change.metadata.domain != last_domain:
                    last_domain = change.metadata.domain
                    print(change.sequence_id, last_domain)

                metadata = change.metadata.to_json()
                if not options['compact']:
                    metadata['country'] = _get_country(change.metadata.domain)
                message = RedisMessage(json.dumps(metadata))
                RedisPublisher(facility='form-feed', broadcast=True).publish_message(message)
                time.sleep(sleep)
Esempio n. 15
0
class capture_kafka_changes_context(object):
    def __init__(self, *topics):
        self.topics = topics
        self.change_feed = KafkaChangeFeed(
            topics=topics,
            group_id='test-{}'.format(uuid.uuid4().hex),
        )
        self.changes = None

    def __enter__(self):
        self.kafka_seq = get_multi_topic_offset(self.topics)
        self.changes = []
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        for change in self.change_feed.iter_changes(since=self.kafka_seq, forever=False):
            if change:
                self.changes.append(change)
def check_user_at_offset(offset):
    change_feed = KafkaChangeFeed(topics=[COMMCARE_USER], group_id='user-repair')
    change = None
    try:
        change = next(change_feed.iter_changes(since=offset, forever=False))
    except StopIteration:
        pass

    if not change:
        raise CommandError("No change at offset: {}".format(offset))

    if change.deleted:
        return False

    try:
        user = change.get_document()
    except ResourceNotFound:
        return False

    return 'commcare_project' in user['user_data']
Esempio n. 17
0
 def test_non_expired_checkpoint_iteration_strict(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], client_id='test-kafka-feed', strict=True)
     first_available_offsets = get_multi_topic_first_available_offsets([topics.FORM, topics.CASE])
     next(feed.iter_changes(since=first_available_offsets, forever=False))