def test_non_expired_checkpoint_iteration_strict(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed', strict=True) first_avaliable_offsets = get_multi_topic_first_available_offsets([topics.FORM, topics.CASE]) since = { topic: first_available for topic, first_available in first_avaliable_offsets.items() } feed.iter_changes(since=since, forever=False).next()
def test_multiple_topics(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id="test-kafka-feed") self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False)))) producer = KeyedProducer(get_kafka_client_or_none()) offsets = feed.get_current_offsets() send_to_kafka( producer, topics.FORM, ChangeMeta(document_id="1", data_source_type="form", data_source_name="form") ) send_to_kafka( producer, topics.CASE, ChangeMeta(document_id="2", data_source_type="case", data_source_name="case") ) send_to_kafka( producer, topics.FORM_SQL, ChangeMeta(document_id="3", data_source_type="form-sql", data_source_name="form-sql"), ) send_to_kafka( producer, topics.CASE_SQL, ChangeMeta(document_id="4", data_source_type="case-sql", data_source_name="case-sql"), ) changes = list(feed.iter_changes(since=offsets, forever=False)) self.assertEqual(2, len(changes)) self.assertEqual(set(["1", "2"]), set([change.id for change in changes]))
def test_non_expired_checkpoint_iteration_strict(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed', strict=True) first_avaliable_offsets = get_multi_topic_first_available_offsets([topics.FORM, topics.CASE]) since = { topic: first_available for topic, first_available in first_avaliable_offsets.items() } feed.iter_changes(since=since, forever=False).next()
def test_expired_checkpoint_iteration_strict(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed', strict=True) first_available_offsets = get_multi_topic_first_available_offsets([topics.FORM, topics.CASE]) since = { topic_partition: offset - 1 for topic_partition, offset in first_available_offsets.items() } with self.assertRaises(UnavailableKafkaOffset): feed.iter_changes(since=since, forever=False).next()
def test_multiple_topics_with_partial_checkpoint(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed') self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False)))) offsets = {'form': feed.get_current_offsets()['form']} expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)] changes = list(feed.iter_changes(since=offsets, forever=False)) # should include at least the form and the case (may have more than one case since not # specifying a checkpoint rewinds it to the beginning of the feed) self.assertTrue(len(changes) > 1) found_change_ids = set([change.id for change in changes]) for expected_id in set([meta.document_id for meta in expected_metas]): self.assertTrue(expected_id in found_change_ids)
def test_multiple_topics_with_partial_checkpoint(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed') self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False)))) offsets = {'form': feed.get_latest_offsets()['form']} expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)] changes = list(feed.iter_changes(since=offsets, forever=False)) # should include at least the form and the case (may have more than one case since not # specifying a checkpoint rewinds it to the beginning of the feed) self.assertTrue(len(changes) > 1) found_change_ids = set([change.id for change in changes]) for expected_id in set([meta.document_id for meta in expected_metas]): self.assertTrue(expected_id in found_change_ids)
def test_multiple_topics(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed') self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False)))) offsets = feed.get_latest_offsets() expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)] unexpected_metas = [publish_stub_change(topics.FORM_SQL), publish_stub_change(topics.CASE_SQL)] changes = list(feed.iter_changes(since=offsets, forever=False)) self.assertEqual(2, len(changes)) found_change_ids = set([change.id for change in changes]) self.assertEqual(set([meta.document_id for meta in expected_metas]), found_change_ids) for unexpected in unexpected_metas: self.assertTrue(unexpected.document_id not in found_change_ids)
def test_multiple_topics(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed') self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False)))) offsets = feed.get_current_offsets() expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)] unexpected_metas = [publish_stub_change(topics.FORM_SQL), publish_stub_change(topics.CASE_SQL)] changes = list(feed.iter_changes(since=offsets, forever=False)) self.assertEqual(2, len(changes)) found_change_ids = set([change.id for change in changes]) self.assertEqual(set([meta.document_id for meta in expected_metas]), found_change_ids) for unexpected in unexpected_metas: self.assertTrue(unexpected.document_id not in found_change_ids)
def handle(self, **options): if options['print_kafka_offsets']: start, end = self.get_min_max_offsets() print("\n\nKakfa topic offset range: {} - {}".format(start, end)) return start_offset = options['offset_start'] end_offset = options['offset_end'] start, end = self.get_min_max_offsets() if start_offset < start: start_offset = start if end_offset < 0 or end_offset > end: end_offset = end if start_offset > end_offset: raise CommandError("Start greater than end: {} > {}".format(start_offset, end_offset)) print('Using kafka offset range: {} - {}'.format(start_offset, end_offset)) if options['find_start_offset']: find_first_match = FindFirstMatch(start_offset, end_offset, check_user_at_offset) first_matching_offset = find_first_match.search() if first_matching_offset is None: raise CommandError("Unable to find first matching offset. " "Try a different search range.") else: print("\nFirst matching offset = {}".format(first_matching_offset)) return check = options['check'] seen_ids = set() change_feed = KafkaChangeFeed(topics=[COMMCARE_USER], group_id='user-repair') for change in change_feed.iter_changes(since=start_offset, forever=False): if change.sequence_id > end_offset: return if change.id in seen_ids: continue seen_ids.add(change.id) if change.deleted: continue try: user = change.get_document() except ResourceNotFound: continue user = CommCareUser.wrap(user) if user_looks_ok(user): continue restore_domain_membership(user, check=check) if change.sequence_id % 100 == 0: print("Processed up to offset: {}".format(change.sequence_id))
def test_non_expired_checkpoint_iteration_strict(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], client_id='test-kafka-feed', strict=True) first_available_offsets = get_multi_topic_first_available_offsets( [topics.FORM, topics.CASE]) next(feed.iter_changes(since=first_available_offsets, forever=False))
def test_expired_checkpoint_iteration_strict(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], client_id='test-kafka-feed', strict=True) first_available_offsets = get_multi_topic_first_available_offsets([topics.FORM, topics.CASE]) since = { topic_partition: offset - 1 for topic_partition, offset in first_available_offsets.items() } with self.assertRaises(UnavailableKafkaOffset): next(feed.iter_changes(since=since, forever=False))
def handle(self, *args, **options): since = options['from'] sleep = float(options['sleep'] or '.01') last_domain = None change_feed = KafkaChangeFeed(topic=topics.FORM, group_id='form-feed') for change in change_feed.iter_changes(since=since, forever=True): if not change.deleted: # this is just helpful for demos to find domain transitions if change.metadata.domain != last_domain: last_domain = change.metadata.domain print change.sequence_id, last_domain metadata = change.metadata.to_json() if not options['compact']: metadata['country'] = _get_country(change.metadata.domain) message = RedisMessage(json.dumps(metadata)) RedisPublisher(facility='form-feed', broadcast=True).publish_message(message) time.sleep(sleep)
class capture_kafka_changes_context(object): def __init__(self, *topics): self.topics = topics self.change_feed = KafkaChangeFeed( topics=topics, client_id='test-{}'.format(uuid.uuid4().hex), ) self.changes = None def __enter__(self): self.kafka_seq = get_multi_topic_offset(self.topics) self.changes = [] return self def __exit__(self, exc_type, exc_val, exc_tb): for change in self.change_feed.iter_changes(since=self.kafka_seq, forever=False): if change: self.changes.append(change)
def handle(self, **options): since = options['from'] sleep = float(options['sleep'] or '.01') last_domain = None change_feed = KafkaChangeFeed(topics=[topics.FORM], group_id='form-feed') for change in change_feed.iter_changes(since=since, forever=True): if not change.deleted: # this is just helpful for demos to find domain transitions if change.metadata.domain != last_domain: last_domain = change.metadata.domain print(change.sequence_id, last_domain) metadata = change.metadata.to_json() if not options['compact']: metadata['country'] = _get_country(change.metadata.domain) message = RedisMessage(json.dumps(metadata)) RedisPublisher(facility='form-feed', broadcast=True).publish_message(message) time.sleep(sleep)
class capture_kafka_changes_context(object): def __init__(self, *topics): self.topics = topics self.change_feed = KafkaChangeFeed( topics=topics, group_id='test-{}'.format(uuid.uuid4().hex), ) self.changes = None def __enter__(self): self.kafka_seq = get_multi_topic_offset(self.topics) self.changes = [] return self def __exit__(self, exc_type, exc_val, exc_tb): for change in self.change_feed.iter_changes(since=self.kafka_seq, forever=False): if change: self.changes.append(change)
def check_user_at_offset(offset): change_feed = KafkaChangeFeed(topics=[COMMCARE_USER], group_id='user-repair') change = None try: change = next(change_feed.iter_changes(since=offset, forever=False)) except StopIteration: pass if not change: raise CommandError("No change at offset: {}".format(offset)) if change.deleted: return False try: user = change.get_document() except ResourceNotFound: return False return 'commcare_project' in user['user_data']
def test_non_expired_checkpoint_iteration_strict(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], client_id='test-kafka-feed', strict=True) first_available_offsets = get_multi_topic_first_available_offsets([topics.FORM, topics.CASE]) next(feed.iter_changes(since=first_available_offsets, forever=False))