def test_set_attr_document(self): change = Change(id='id', sequence_id='', document={}) self.assertEqual({}, change.document) document = {'foo': 'bar'} change.set_document(document) self.assertEqual(document, change.document) self.assertEqual(document, change.to_dict()['doc'])
def test_id(self): id = '12345' change = Change(id=id, sequence_id=54321) error = create_error(change) self.assertEqual(error.doc_id, id) self.assertEqual(error.pillow, 'FakePillow') self.assertEqual(error.change_object.id, id) self.assertEqual(error.change_object.sequence_id, 54321)
def change_from_couch_row(couch_change, document_store=None): return Change( id=couch_change['id'], sequence_id=couch_change.get('seq', None), document=couch_change.get('doc', None), deleted=couch_change.get('deleted', False), document_store=document_store, )
def _sql_case_to_change(case): return Change( id=case.case_id, sequence_id=None, document=case.to_json(), deleted=False, metadata=change_meta_from_sql_case(case), document_store=None, )
def _ledger_v1_to_change(stock_state): return Change( id=stock_state.pk, sequence_id=None, document=stock_state.to_json(), deleted=False, metadata=change_meta_from_ledger_v1(stock_state), document_store=None, )
def _sql_sms_to_change(sms): return Change( id=sms.couch_id, sequence_id=None, document=sms.to_json(), deleted=False, metadata=change_meta_from_sms(sms), document_store=None, )
def test_no_domain(self): document = { 'doc_type': 'CommCareCase', 'type': 'mother', 'domain': None, } self.pillow.process_change(Change(id='test-id', sequence_id='3', document=document)) message = next(self.consumer) change_meta = change_meta_from_kafka_message(message.value) self.assertEqual(document['domain'], change_meta.domain)
def test_publish_timestamp(self): document = { 'doc_type': 'CommCareCase', 'type': 'mother', 'domain': None, } self.pillow.process_change(Change(id='test-id', sequence_id='3', document=document)) message = next(self.consumer) change_meta = change_meta_from_kafka_message(message.value) self.assertLessEqual(change_meta.publish_timestamp, datetime.utcnow())
def _changes_from_ids(self, case_ids): return [ Change(id=case_id, sequence_id=None, document_store=CaseDocumentStore(self.domain), metadata=ChangeMeta(document_id=case_id, domain=self.domain, data_source_type=SOURCE_COUCH, data_source_name='commcarehq')) for case_id in case_ids ]
def _changes_from_ids(self, case_ids): return [ Change(id=case_id, sequence_id=None, document_store=CaseDocumentStore('domain'), metadata=ChangeMeta(document_id=case_id, domain='domain', data_source_type='sql', data_source_name='case-sql')) for case_id in case_ids ]
def _doc_to_changes(self, doc): # creates a change object for the last 10 synclogs # of the given user, for the synclog pillow to process. # this means we wont have to iterate through all synclogs # when reindexing. synclogs = get_synclogs_for_user(doc['_id'], limit=10) changes = [ Change(id=res['doc']['_id'], sequence_id=None, document=res['doc']) for res in synclogs ] return changes
def change_from_couch_row(couch_change, document_store=None): try: change_id = couch_change['id'] except KeyError as err: raise KeyError(f"'id' not found in {couch_change!r}") from err return Change( id=change_id, sequence_id=couch_change.get('seq', None), document=couch_change.get('doc', None), deleted=couch_change.get('deleted', False), document_store=document_store, )
def _doc_to_changes(self, doc): # creates a change object for the last 10 synclogs # of the given user, for the synclog pillow to process. # this means we wont have to iterate through all synclogs # when reindexing. synclogs = SyncLogSQL.objects.filter( user_id=doc['_id']).order_by('date')[:10] changes = [ Change(id=res.doc['_id'], sequence_id=None, document=res.doc) for res in synclogs ] return changes
def test_set_document_overrides_doc_store(self): change = Change(id=self.doc_id, sequence_id='', document_store=self.dao) self.assertEqual(self.doc, change.get_document()) change.set_document({'something': 'else'}) self.assertNotEqual(self.doc, change.get_document())
def doc_to_change(doc): return Change(id=doc['_id'], sequence_id='0', document=doc, metadata=ChangeMeta( document_id=doc['_id'], data_source_type=data_sources.COUCH, data_source_name=CommCareCase.get_db().dbname, document_type=doc['doc_type'], document_subtype=doc.get('type'), domain=doc['domain'], is_deletion=False, ))
def doc_to_change(doc): return Change(id=doc['_id'], sequence_id='0', document=doc, metadata=ChangeMeta( document_id=doc['_id'], data_source_type=data_sources.SOURCE_COUCH, data_source_name=data_sources.CASE_SQL, document_type=doc['doc_type'], document_subtype=doc.get('type'), domain=doc['domain'], is_deletion=False, ))
def _test_error_logging_for_pillow(self, pillow_config): pillow = _pillow_instance_from_config_with_mock_process_change( pillow_config) doc = self._get_random_doc() pillow.process_with_error_handling( Change(id=doc['id'], sequence_id='3', document=doc)) errors = PillowError.objects.filter(pillow=pillow.pillow_id).all() self.assertEqual(1, len(errors), pillow_config) error = errors[0] self.assertEqual(error.doc_id, doc['id'], pillow_config) self.assertEqual('exceptions.Exception', error.error_type) self.assertIn(pillow.pillow_id, error.error_traceback)
def iter_all_changes(self, start_from=None): if not self.domains: return for form_id_chunk in self._iter_form_id_chunks(): for form in FormAccessorSQL.get_forms(form_id_chunk): yield Change( id=form.form_id, sequence_id=None, document=form.to_json(), deleted=False, metadata=change_meta_from_sql_form(form), document_store=None, )
def iter_all_changes(self, start_from=None): view_kwargs = copy(self._view_kwargs) view_kwargs['reduce'] = False # required to paginate a view if start_from is not None: # todo: should we abstract out how the keys work inside this class? view_kwargs['startkey'] = start_from for row in paginate_view(self._couch_db, self._view_name, self._chunk_size, **view_kwargs): # todo: if include_docs isn't specified then this will make one request to couch per row # to get the documents. In the future we will likely need to add chunking yield Change(id=row['id'], sequence_id=None, document=row.get('doc'), deleted=False, document_store=CouchDocumentStore(self._couch_db))
def _get_change(self, topic='case', doc_type='CommCareCase', doc_subtype='person'): doc_id = uuid.uuid4().hex return Change(doc_id, 'seq', topic=topic, metadata=ChangeMeta( data_source_type='couch', data_source_name='test_commcarehq', document_id=doc_id, document_type=doc_type, document_subtype=doc_subtype, is_deletion=False, ))
def _doc_to_change(doc, data_source_type, data_source_name): doc_meta = get_doc_meta_object_from_document(doc) change_meta = change_meta_from_doc_meta_and_document( doc_meta=doc_meta, document=doc, data_source_type=data_source_type, data_source_name=data_source_name, ) return Change( id=change_meta.document_id, sequence_id=None, document=doc, deleted=change_meta.is_deletion, metadata=change_meta, document_store=None, )
def change_from_kafka_message(message): change_meta = change_meta_from_kafka_message(message.value) try: document_store = get_document_store( data_source_type=change_meta.data_source_type, data_source_name=change_meta.data_source_name, domain=change_meta.domain) except UnknownDocumentStore: document_store = None return Change( id=change_meta.document_id, sequence_id=message.offset, document=None, deleted=change_meta.is_deletion, metadata=change_meta, document_store=document_store, )
def test(self): document = { 'doc_type': 'CommCareCase', 'type': 'mother', 'domain': 'kafka-test-domain', } change = Change(id='test-id', sequence_id='3', document=document) populate_change_metadata(change, SOURCE_COUCH, self._fake_couch.dbname) with patch('pillow_retry.api.get_pillow_by_name', return_value=self.pillow): # first change creates error message = 'test retry 1' self.pillow.process_change = MagicMock( side_effect=TestException(message)) self.pillow.process_with_error_handling( change, PillowRuntimeContext(changes_seen=0)) errors = self._check_errors(1, message) # second attempt updates error process_pillow_retry(errors[0]) errors = self._check_errors(2) # third attempt successful self.pillow.process_change = self.original_process_change process_pillow_retry(errors[0]) errors = list( PillowError.objects.filter(pillow=self.pillow.pillow_id).all()) self.assertEqual(0, len(errors)) message = next(self.consumer) change_meta = change_meta_from_kafka_message(message.value) self.assertEqual(SOURCE_COUCH, change_meta.data_source_type) self.assertEqual(self._fake_couch.dbname, change_meta.data_source_name) self.assertEqual('test-id', change_meta.document_id) self.assertEqual(document['doc_type'], change_meta.document_type) self.assertEqual(document['type'], change_meta.document_subtype) self.assertEqual(document['domain'], change_meta.domain) self.assertEqual(False, change_meta.is_deletion)
def test_deduplicate_changes(self): changes = [ Change(1, 'a'), Change(2, 'a'), Change(3, 'a'), Change(2, 'b'), Change(4, 'a'), Change(1, 'b'), ] deduped = PillowBase._deduplicate_changes(changes) self.assertEqual([(change.id, change.sequence_id) for change in deduped], [(3, 'a'), (2, 'b'), (4, 'a'), (1, 'b')])
def _test_error_logging_for_pillow(self, pillow_config): pillow = _pillow_instance_from_config_with_mock_process_change(pillow_config) if pillow.retry_errors: exc_class = Exception exc_class_string = 'exceptions.Exception' else: exc_class = DocumentMissingError exc_class_string = 'pillowtop.dao.exceptions.DocumentMissingError' pillow.process_change = MagicMock(side_effect=exc_class(pillow.pillow_id)) doc = self._get_random_doc() pillow.process_with_error_handling(Change(id=doc['id'], sequence_id='3', document=doc)) errors = PillowError.objects.filter(pillow=pillow.pillow_id).all() self.assertEqual(1, len(errors), pillow_config) error = errors[0] self.assertEqual(error.doc_id, doc['id'], pillow_config) self.assertEqual(exc_class_string, error.error_type) self.assertIn(pillow.pillow_id, error.error_traceback)
def _change_from_meta(change_meta): from corehq.apps.change_feed.data_sources import get_document_store from pillowtop.feed.interface import Change document_store = get_document_store( data_source_type=change_meta.data_source_type, data_source_name=change_meta.data_source_name, domain=change_meta.domain, load_source="change_feed", ) return Change( id=change_meta.document_id, sequence_id=None, document=None, deleted=change_meta.is_deletion, metadata=change_meta, document_store=document_store, topic=None, partition=None, )
def change_from_kafka_message(message): change_meta = change_meta_from_kafka_message(message.value) try: document_store = get_document_store( data_source_type=change_meta.data_source_type, data_source_name=change_meta.data_source_name, domain=change_meta.domain ) except UnknownDocumentStore: document_store = None notify_error("Unknown document store: {}".format(change_meta.data_source_type)) return Change( id=change_meta.document_id, sequence_id=message.offset, document=None, deleted=change_meta.is_deletion, metadata=change_meta, document_store=document_store, topic=message.topic, )
def test_process_change(self): document = { 'doc_type': 'CommCareCase', 'type': 'mother', 'domain': 'kafka-test-domain', } self.pillow.process_change(Change(id='test-id', sequence_id='3', document=document)) message = next(self.consumer) change_meta = change_meta_from_kafka_message(message.value) self.assertEqual(SOURCE_COUCH, change_meta.data_source_type) self.assertEqual(self._fake_couch.dbname, change_meta.data_source_name) self.assertEqual('test-id', change_meta.document_id) self.assertEqual(document['doc_type'], change_meta.document_type) self.assertEqual(document['type'], change_meta.document_subtype) self.assertEqual(document['domain'], change_meta.domain) self.assertEqual(False, change_meta.is_deletion) with self.assertRaises(StopIteration): next(self.consumer)
def test(self): document = { '_id': 'test-id', 'doc_type': 'CommCareCase', 'type': 'mother', 'domain': 'kafka-test-domain', } change = Change(id='test-id', sequence_id='3', document=document) populate_change_metadata(change, SOURCE_COUCH, 'test_commcarehq') with patch('pillow_retry.api.get_pillow_by_name', return_value=self.pillow): # first change creates error message = 'test retry 1' self.pillow.process_change = MagicMock( side_effect=TestException(message)) self.pillow.process_with_error_handling( change, PillowRuntimeContext(changes_seen=0)) errors = self._check_errors(1, message) # second attempt updates error with process_pillow_changes(self.pillow): process_pillow_retry(errors[0]) errors = self._check_errors(2) # third attempt successful self.pillow.process_change = self.original_process_change with process_pillow_changes(self.pillow): process_pillow_retry(errors[0]) errors = list( PillowError.objects.filter(pillow=self.pillow.pillow_id).all()) self.assertEqual(0, len(errors)) self.assertEqual(1, self.processor.count)
def _doc_to_changes(self, doc): # creates a change object for the last form submission # for the user to each of their apps. # this allows us to reindex for the app status report # without reindexing all forms. changes = [] forms = get_last_forms_by_app(doc['_id']) for form in forms: doc_meta = get_doc_meta_object_from_document(form) change_meta = change_meta_from_doc_meta_and_document( doc_meta=doc_meta, document=form, data_source_type='elasticsearch', data_source_name='hqforms', ) changes.append(Change( id=change_meta.document_id, sequence_id=None, document=form, deleted=change_meta.is_deletion, metadata=change_meta, document_store=None, )) return changes
def test_set_attr_deleted(self): change = Change(id='id', sequence_id='', deleted=True) self.assertTrue(change.deleted) change.deleted = False self.assertFalse(change.deleted) self.assertFalse(change.to_dict()['deleted'])
def test_set_attr_seq(self): change = Change(id='id', sequence_id='seq') self.assertEqual('seq', change.sequence_id) change.sequence_id = 'seq-2' self.assertEqual('seq-2', change.sequence_id) self.assertEqual('seq-2', change.to_dict()['seq'])
def _change(id): return Change(id=id, sequence_id=None)
def test_get_set_document(self): change = Change(id='id', sequence_id='') self.assertEqual(None, change.get_document()) doc = {'test': '123'} change.set_document(doc) self.assertEqual(doc, change.get_document())
def test_get_document_from_doc_store(self): change = Change(id=self.doc_id, sequence_id='', document_store=self.dao) self.assertEqual(self.doc, change.get_document())
def test_initial_document_overrides_doc_store(self): change = Change(id=self.doc_id, sequence_id='', document={'not': 'expected'}, document_store=self.dao) self.assertNotEqual(self.doc, change.get_document())
def test_get_document_not_found(self): change = Change(id=uuid.uuid4().hex, sequence_id='', document_store=self.dao) self.assertEqual(None, change.get_document())
def test_set_attr_id(self): change = Change(id='first-id', sequence_id='') self.assertEqual('first-id', change.id) change.id = 'new-id' self.assertEqual('new-id', change.id) self.assertEqual('new-id', change.to_dict()['id'])