def test_partitioned_by_date(self): # two docs from separate days sample_doc1, _ = get_sample_doc_and_indicators() sample_doc1['owner_id'] = "abcdefghijklmnop" sample_doc2, _ = get_sample_doc_and_indicators() sample_doc2['owner_id'] = "abcdefghijklmnop" # drop g sample_doc3, _ = get_sample_doc_and_indicators() sample_doc3['owner_id'] = "abcdefhijklmnop" self._process_docs([sample_doc1, sample_doc2, sample_doc3]) self.assertEqual(3, self.adapter.get_query_object().count()) # ensure docs are in separate databases result = self.adapter.engine.execute( 'SELECT COUNT(*) FROM "{}ghijklmnop";'.format(EXPECTED_UCR_CHILD_TABLE_PREFIX)) result = result.fetchone()[0] self.assertEqual(2, result) result = self.adapter.engine.execute( 'SELECT COUNT(*) FROM "{}fhijklmnop";'.format(EXPECTED_UCR_CHILD_TABLE_PREFIX)) result = result.fetchone()[0] self.assertEqual(1, result)
def test_rebuild_indicators(self, datetime_mock): datetime_mock.utcnow.return_value = self.fake_time_now self.config.save() sample_doc, _ = get_sample_doc_and_indicators(self.fake_time_now) CommCareCase.get_db().save_doc(sample_doc) rebuild_indicators(self.config._id) self._check_sample_doc_state()
def test_multiple_validations(self): self.config.validations = self.config.validations + [ Validation.wrap({ "name": "a_second_validation", "error_message": "another starred validation", "expression": { "type": "boolean_expression", "expression": { "type": "property_name", "property_name": "is_starred" }, "operator": "in", "property_value": ["yes", "no"] } }) ] sample_doc, expected_indicators = get_sample_doc_and_indicators() self.assertIsNone(self.config.validate_document(sample_doc)) sample_doc['is_starred'] = 'what is a star?' try: self.config.validate_document(sample_doc) except ValidationError as e: self.assertEquals(len(e.errors), 2) else: self.fail("There were no validation errors returned")
def test_rebuild_indicators(self, datetime_mock): datetime_mock.utcnow.return_value = self.fake_time_now sample_doc, expected_indicators = get_sample_doc_and_indicators(self.fake_time_now) CommCareCase.get_db().save_doc(sample_doc) self.addCleanup(lambda id: CommCareCase.get_db().delete_doc(id), sample_doc['_id']) rebuild_indicators(self.config._id) self._check_sample_doc_state(expected_indicators)
def test_report_data_source(self): # bootstrap report data sources against indicator data sources report_config_template = get_sample_report_config() report_config_1 = ReportConfiguration.wrap(report_config_template.to_json()) report_config_1.config_id = self.ds_1._id report_config_2 = ReportConfiguration.wrap(report_config_template.to_json()) report_config_2.config_id = self.ds_2._id # save a few docs to ds 1 sample_doc, _ = get_sample_doc_and_indicators() num_docs = 3 for i in range(num_docs): sample_doc['_id'] = uuid.uuid4().hex self.ds1_adapter.save(sample_doc) # ds 1 should have data, ds2 should not ds1_rows = ReportFactory.from_spec(report_config_1).get_data() self.assertEqual(1, len(ds1_rows)) self.assertEqual(num_docs, ds1_rows[0]['count']) ds2_rows = ReportFactory.from_spec(report_config_2).get_data() self.assertEqual(0, len(ds2_rows)) # save one doc to ds 2 sample_doc['_id'] = uuid.uuid4().hex self.ds2_adapter.save(sample_doc) # ds 1 should still have same data, ds2 should now have one row ds1_rows = ReportFactory.from_spec(report_config_1).get_data() self.assertEqual(1, len(ds1_rows)) self.assertEqual(num_docs, ds1_rows[0]['count']) ds2_rows = ReportFactory.from_spec(report_config_2).get_data() self.assertEqual(1, len(ds2_rows)) self.assertEqual(1, ds2_rows[0]['count'])
def test_pillow_save_to_multiple_databases(self): self.assertNotEqual(self.ds1_adapter.engine.url, self.ds2_adapter.engine.url) pillow = get_case_pillow(ucr_configs=[self.ds_1, self.ds_2]) self.assertNotEqual(self.ds1_adapter.engine.url, self.ds2_adapter.engine.url) sample_doc, _ = get_sample_doc_and_indicators() pillow.process_change(doc_to_change(sample_doc)) self.assertNotEqual(self.ds1_adapter.engine.url, self.ds2_adapter.engine.url) self.assertEqual(1, self.ds1_adapter.get_query_object().count()) self.assertEqual(1, self.ds2_adapter.get_query_object().count())
def test_is_starred_validation(self): sample_doc, expected_indicators = get_sample_doc_and_indicators() self.assertIsNone(self.config.validate_document(sample_doc)) sample_doc['is_starred'] = 'what is a star?' with self.assertRaisesRegexp(ValidationError, "is_starred has unexpected value"): self.config.validate_document(sample_doc)
def test_check_if_doc_exist(self, datetime_mock): datetime_mock.utcnow.return_value = self.fake_time_now sample_doc, expected_indicators = get_sample_doc_and_indicators(self.fake_time_now) self.assertFalse(self.adapter.doc_exists(sample_doc)) self.pillow.process_change(doc_to_change(sample_doc)) self.assertIs(self.adapter.doc_exists(sample_doc), True)
def _create_cases(self, datetime_mock, docs=[]): datetime_mock.utcnow.return_value = self.fake_time_now docs = docs or [ get_sample_doc_and_indicators(self.fake_time_now)[0] for i in range(10) ] # save case to DB - should also publish to kafka cases = [_save_sql_case(doc) for doc in docs] return cases
def test_pillow_save_to_multiple_databases(self): self.assertNotEqual(self.ds1_adapter.engine.url, self.ds2_adapter.engine.url) pillow = ConfigurableIndicatorPillow() pillow.bootstrap(configs=[self.ds_1, self.ds_2]) self.assertNotEqual(self.ds1_adapter.engine.url, self.ds2_adapter.engine.url) sample_doc, _ = get_sample_doc_and_indicators() pillow.change_transport(sample_doc) self.assertNotEqual(self.ds1_adapter.engine.url, self.ds2_adapter.engine.url) self.assertEqual(1, self.ds1_adapter.get_query_object().count()) self.assertEqual(1, self.ds2_adapter.get_query_object().count())
def test_check_if_doc_exist(self, datetime_mock): datetime_mock.utcnow.return_value = self.fake_time_now sample_doc, expected_indicators = get_sample_doc_and_indicators( self.fake_time_now) self.assertFalse(self.adapter.doc_exists(sample_doc)) self.pillow.process_change(doc_to_change(sample_doc)) self.assertIs(self.adapter.doc_exists(sample_doc), True)
def test_partitioned_by_date(self): # two docs from separate days sample_doc1, _ = get_sample_doc_and_indicators() sample_doc1['opened_on'] = datetime(2018, 1, 1) sample_doc2, _ = get_sample_doc_and_indicators() sample_doc2['opened_on'] = datetime(2018, 1, 2) self._process_docs([sample_doc1, sample_doc2]) self.assertEqual(2, self.adapter.get_query_object().count()) # ensure docs are in separate databases result = self.adapter.engine.execute( 'SELECT COUNT(*) FROM "{}y2018d001";'.format(EXPECTED_UCR_CHILD_TABLE_PREFIX)) result = result.fetchone()[0] self.assertEqual(1, result) result = self.adapter.engine.execute( 'SELECT COUNT(*) FROM "{}y2018d002";'.format(EXPECTED_UCR_CHILD_TABLE_PREFIX)) result = result.fetchone()[0] self.assertEqual(1, result)
def _check_sample_doc_state(self, datetime_mock): datetime_mock.utcnow.return_value = self.fake_time_now _, expected_indicators = get_sample_doc_and_indicators(self.fake_time_now) self.assertEqual(1, self.adapter.get_query_object().count()) row = self.adapter.get_query_object()[0] for k in row.keys(): v = getattr(row, k) if isinstance(expected_indicators[k], decimal.Decimal): self.assertAlmostEqual(expected_indicators[k], v) else: self.assertEqual(expected_indicators[k], v)
def test_indicators(self, datetime_mock): fake_time_now = datetime.datetime(2015, 4, 24, 12, 30, 8, 24886) datetime_mock.utcnow.return_value = fake_time_now # indicators sample_doc, expected_indicators = get_sample_doc_and_indicators( fake_time_now) expected_indicators["commcare_project"] = sample_doc["domain"] [results] = self.config.get_all_values(sample_doc) for result in results: self.assertEqual(expected_indicators[result.column.id], result.value)
def test_partial_fallback_calls(self, iter_docs_patch, process_change_patch): # this is equivalent to failing on last 4 docs, since they are missing in docstore docs = [ get_sample_doc_and_indicators(self.fake_time_now)[0] for i in range(10) ] iter_docs_patch.return_value = docs[0:6] cases = self._create_and_process_changes(docs) # since chunked processing failed, normal processing should get called process_change_patch.assert_has_calls([mock.call(mock.ANY)] * 4)
def test_process_filter_no_longer_pass(self, datetime_mock): datetime_mock.utcnow.return_value = self.fake_time_now sample_doc, expected_indicators = get_sample_doc_and_indicators(self.fake_time_now) self.pillow.process_change(doc_to_change(sample_doc)) self._check_sample_doc_state(expected_indicators) sample_doc['type'] = 'wrong_type' self.pillow.process_change(doc_to_change(sample_doc)) self.assertEqual(0, self.adapter.get_query_object().count())
def _check_sample_doc_state(self, datetime_mock): datetime_mock.utcnow.return_value = self.fake_time_now _, expected_indicators = get_sample_doc_and_indicators( self.fake_time_now) self.assertEqual(1, self.adapter.get_query_object().count()) row = self.adapter.get_query_object()[0] for k in row.keys(): v = getattr(row, k) if isinstance(expected_indicators[k], decimal.Decimal): self.assertAlmostEqual(expected_indicators[k], v) else: self.assertEqual(expected_indicators[k], v)
def test_indicators(self, datetime_mock): fake_time_now = datetime.datetime(2015, 4, 24, 12, 30, 8, 24886) datetime_mock.utcnow.return_value = fake_time_now # indicators sample_doc, expected_indicators = get_sample_doc_and_indicators(fake_time_now) [results] = self.config.get_all_values(sample_doc) for result in results: try: self.assertEqual(expected_indicators[result.column.id], result.value) except AssertionError: # todo: this is a hack due to the fact that type conversion currently happens # in the database layer. this should eventually be fixed. self.assertEqual(str(expected_indicators[result.column.id]), result.value)
def _create_cases(self, datetime_mock, docs=[]): datetime_mock.utcnow.return_value = self.fake_time_now docs = docs or [ get_sample_doc_and_indicators(self.fake_time_now)[0] for i in range(10) ] # save case to DB - should also publish to kafka cases = [ _save_sql_case(doc) for doc in docs ] return cases
def test_get_docs(self): docs = [ get_sample_doc_and_indicators(self.fake_time_now)[0] for i in range(10) ] feed = self.pillow.get_change_feed() since = feed.get_latest_offsets() cases = self._create_cases(docs=docs) changes = list(feed.iter_changes(since, forever=False)) bad_changes, result_docs = ConfigurableReportPillowProcessor.get_docs_for_changes( changes, docs[1]['domain']) self.assertEqual(set([c.id for c in changes]), set([doc['_id'] for doc in result_docs]))
def test_partial_fallback_data(self, iter_docs_patch): docs = [ get_sample_doc_and_indicators(self.fake_time_now)[0] for i in range(10) ] # this is equivalent to failing on last 5 docs, since they are missing in docstore iter_docs_patch.return_value = docs[0:5] cases = self._create_and_process_changes(docs=docs) query = self.adapter.get_query_object() # first five docs should be processed in bulk, last five serially self.assertEqual(query.count(), 10) self.assertEqual(set([case.case_id for case in cases]), set([row.doc_id for row in query.all()]))
def _test_process_doc_from_sql(self, datetime_mock): datetime_mock.utcnow.return_value = self.fake_time_now sample_doc, expected_indicators = get_sample_doc_and_indicators(self.fake_time_now) since = self.pillow.get_change_feed().get_latest_offsets() # save case to DB - should also publish to kafka case = _save_sql_case(sample_doc) # run pillow and check changes self.pillow.process_changes(since=since, forever=False) self._check_sample_doc_state(expected_indicators) CaseAccessorSQL.hard_delete_cases(case.domain, [case.case_id])
def test_process_doc_from_sql(self, datetime_mock): datetime_mock.utcnow.return_value = self.fake_time_now sample_doc, expected_indicators = get_sample_doc_and_indicators(self.fake_time_now) since = self.pillow.get_change_feed().get_latest_offsets() # save case to DB - should also publish to kafka case = _save_sql_case(sample_doc) # run pillow and check changes self.pillow.process_changes(since=since, forever=False) self._check_sample_doc_state(expected_indicators) CaseAccessorSQL.hard_delete_cases(case.domain, [case.case_id])
def test_partial_fallback_data(self, iter_docs_patch): docs = [ get_sample_doc_and_indicators(self.fake_time_now)[0] for i in range(10) ] # this is equivalent to failing on last 5 docs, since they are missing in docstore iter_docs_patch.return_value = docs[0:5] cases = self._create_and_process_changes(docs=docs) query = self.adapter.get_query_object() # first five docs should be processed in bulk, last five serially self.assertEqual(query.count(), 10) self.assertEqual( set([case.case_id for case in cases]), set([row.doc_id for row in query.all()]) )
def test_get_docs(self): docs = [ get_sample_doc_and_indicators(self.fake_time_now)[0] for i in range(10) ] feed = self.pillow.get_change_feed() since = feed.get_latest_offsets() cases = self._create_cases(docs=docs) changes = list(feed.iter_changes(since, forever=False)) bad_changes, result_docs = ConfigurableReportPillowProcessor.get_docs_for_changes( changes, docs[1]['domain']) self.assertEqual( set([c.id for c in changes]), set([doc['_id'] for doc in result_docs]) )
def _test_process_doc_from_couch(self, datetime_mock, pillow): datetime_mock.utcnow.return_value = self.fake_time_now sample_doc, expected_indicators = get_sample_doc_and_indicators(self.fake_time_now) # make sure case is in DB case = CommCareCase.wrap(sample_doc) with drop_connected_signals(case_post_save): case.save() # send to kafka since = self.pillow.get_change_feed().get_latest_offsets() producer.send_change(topics.CASE, doc_to_change(sample_doc).metadata) # run pillow and check changes pillow.process_changes(since=since, forever=False) self._check_sample_doc_state(expected_indicators) case.delete()
def test_pillow_save_to_one_database_at_a_time(self): pillow = ConfigurableIndicatorPillow() pillow.bootstrap(configs=[self.ds_1]) sample_doc, _ = get_sample_doc_and_indicators() pillow.change_transport(sample_doc) self.assertEqual(1, self.ds1_adapter.get_query_object().count()) self.assertEqual(0, self.ds2_adapter.get_query_object().count()) # save to the other pillow.bootstrap(configs=[self.ds_2]) sample_doc['_id'] = uuid.uuid4().hex pillow.change_transport(sample_doc) self.assertEqual(1, self.ds1_adapter.get_query_object().count()) self.assertEqual(1, self.ds2_adapter.get_query_object().count()) self.assertEqual(1, self.ds1_adapter.get_query_object().filter_by(doc_id='some-doc-id').count()) self.assertEqual(1, self.ds2_adapter.get_query_object().filter_by(doc_id=sample_doc['_id']).count())
def test_pillow_save_to_one_database_at_a_time(self): pillow = get_case_pillow(ucr_configs=[self.ds_1]) sample_doc, _ = get_sample_doc_and_indicators() pillow.process_change(doc_to_change(sample_doc)) self.assertEqual(1, self.ds1_adapter.get_query_object().count()) self.assertEqual(0, self.ds2_adapter.get_query_object().count()) # save to the other pillow = get_case_pillow(ucr_configs=[self.ds_2]) orig_id = sample_doc['_id'] sample_doc['_id'] = uuid.uuid4().hex pillow.process_change(doc_to_change(sample_doc)) self.assertEqual(1, self.ds1_adapter.get_query_object().count()) self.assertEqual(1, self.ds2_adapter.get_query_object().count()) self.assertEqual(1, self.ds1_adapter.get_query_object().filter_by(doc_id=orig_id).count()) self.assertEqual(1, self.ds2_adapter.get_query_object().filter_by(doc_id=sample_doc['_id']).count())
def test_mirroring(self): ds3 = DataSourceConfiguration.wrap(get_sample_data_source().to_json()) ds3.engine_id = "default" ds3.mirrored_engine_ids = ['engine-2'] adapter = get_indicator_adapter(ds3) self.assertEqual(type(adapter.adapter), MultiDBSqlAdapter) self.assertEqual(len(adapter.all_adapters), 2) for db_adapter in adapter.all_adapters: with db_adapter.session_context() as session: self.assertEqual(0, session.query(db_adapter.get_table()).count()) with patch('pillowtop.models.KafkaCheckpoint.get_or_create_for_checkpoint_id'): pillow = get_case_pillow(ucr_configs=[ds3]) sample_doc, _ = get_sample_doc_and_indicators() pillow.process_change(doc_to_change(sample_doc)) for db_adapter in adapter.all_adapters: with db_adapter.session_context() as session: self.assertEqual(1, session.query(db_adapter.get_table()).count())
def test_mirroring(self): ds3 = DataSourceConfiguration.wrap(get_sample_data_source().to_json()) ds3.engine_id = DEFAULT_ENGINE_ID ds3.mirrored_engine_ids = ['engine-2'] adapter = get_indicator_adapter(ds3) self.assertEqual(type(adapter.adapter), MultiDBSqlAdapter) self.assertEqual(len(adapter.all_adapters), 2) for db_adapter in adapter.all_adapters: with db_adapter.session_context() as session: self.assertEqual(0, session.query(db_adapter.get_table()).count()) with patch('pillowtop.models.KafkaCheckpoint.get_or_create_for_checkpoint_id'): pillow = get_case_pillow(ucr_configs=[ds3]) sample_doc, _ = get_sample_doc_and_indicators() pillow.process_change(doc_to_change(sample_doc)) for db_adapter in adapter.all_adapters: with db_adapter.session_context() as session: self.assertEqual(1, session.query(db_adapter.get_table()).count())
def _test_process_deleted_doc_from_sql(self, datetime_mock): datetime_mock.utcnow.return_value = self.fake_time_now sample_doc, expected_indicators = get_sample_doc_and_indicators(self.fake_time_now) since = self.pillow.get_change_feed().get_latest_offsets() # save case to DB - should also publish to kafka case = _save_sql_case(sample_doc) # run pillow and check changes self.pillow.process_changes(since=since, forever=False) self._check_sample_doc_state(expected_indicators) # delete the case and verify it's removed since = self.pillow.get_change_feed().get_latest_offsets() CaseAccessorSQL.soft_delete_cases(case.domain, [case.case_id]) self.pillow.process_changes(since=since, forever=False) self.assertEqual(0, self.adapter.get_query_object().count()) CaseAccessorSQL.hard_delete_cases(case.domain, [case.case_id])
def test_report_data_source(self): # bootstrap report data sources against indicator data sources report_config_template = get_sample_report_config() report_config_1 = ReportConfiguration.wrap( report_config_template.to_json()) report_config_1.config_id = self.ds_1._id report_config_2 = ReportConfiguration.wrap( report_config_template.to_json()) report_config_2.config_id = self.ds_2._id # save a few docs to ds 1 sample_doc, _ = get_sample_doc_and_indicators() num_docs = 3 for i in range(num_docs): sample_doc['_id'] = uuid.uuid4().hex self.ds1_adapter.save(sample_doc) # ds 1 should have data, ds2 should not ds1_rows = ConfigurableReportDataSource.from_spec( report_config_1).get_data() self.assertEqual(1, len(ds1_rows)) self.assertEqual(num_docs, ds1_rows[0]['count']) ds2_rows = ConfigurableReportDataSource.from_spec( report_config_2).get_data() self.assertEqual(0, len(ds2_rows)) # save one doc to ds 2 sample_doc['_id'] = uuid.uuid4().hex self.ds2_adapter.save(sample_doc) # ds 1 should still have same data, ds2 should now have one row ds1_rows = ConfigurableReportDataSource.from_spec( report_config_1).get_data() self.assertEqual(1, len(ds1_rows)) self.assertEqual(num_docs, ds1_rows[0]['count']) ds2_rows = ConfigurableReportDataSource.from_spec( report_config_2).get_data() self.assertEqual(1, len(ds2_rows)) self.assertEqual(1, ds2_rows[0]['count'])
def test_pillow_save_to_one_database_at_a_time(self): pillow = ConfigurableIndicatorPillow() pillow.bootstrap(configs=[self.ds_1]) sample_doc, _ = get_sample_doc_and_indicators() pillow.change_transport(sample_doc) self.assertEqual(1, self.ds1_adapter.get_query_object().count()) self.assertEqual(0, self.ds2_adapter.get_query_object().count()) # save to the other pillow.bootstrap(configs=[self.ds_2]) sample_doc['_id'] = uuid.uuid4().hex pillow.change_transport(sample_doc) self.assertEqual(1, self.ds1_adapter.get_query_object().count()) self.assertEqual(1, self.ds2_adapter.get_query_object().count()) self.assertEqual( 1, self.ds1_adapter.get_query_object().filter_by( doc_id='some-doc-id').count()) self.assertEqual( 1, self.ds2_adapter.get_query_object().filter_by( doc_id=sample_doc['_id']).count())
def test_basic_doc_processing(self, datetime_mock): datetime_mock.utcnow.return_value = self.fake_time_now sample_doc, expected_indicators = get_sample_doc_and_indicators(self.fake_time_now) self.pillow.process_change(doc_to_change(sample_doc)) self._check_sample_doc_state(expected_indicators)
def test_change_transport(self, datetime_mock): datetime_mock.utcnow.return_value = self.fake_time_now sample_doc, _ = get_sample_doc_and_indicators(self.fake_time_now) self.pillow.change_transport(sample_doc) self._check_sample_doc_state()
def test_not_relevant_to_domain(self, datetime_mock): datetime_mock.utcnow.return_value = self.fake_time_now sample_doc, expected_indicators = get_sample_doc_and_indicators(self.fake_time_now) sample_doc['domain'] = 'not-this-domain' self.pillow.process_change(doc_to_change(sample_doc)) self.assertEqual(0, self.adapter.get_query_object().count())