def test_subqueries(self): hpo_ids = ['chs', 'pitt'] project_id = bq_utils.app_identity.get_application_id() dataset_id = bq_utils.get_dataset_id() table = 'measurement' mapping_msg = 'Expected mapping subquery count %s but got %s' union_msg = 'Expected union subquery count %s but got %s' # Should not generate subqueries when HPO tables do not exist pitt_table_id = self._create_hpo_table('pitt', table, dataset_id) expected_count = 1 subqueries = ehr_union._mapping_subqueries(table, hpo_ids, dataset_id, project_id) actual_count = len(subqueries) self.assertEqual(expected_count, actual_count, mapping_msg % (expected_count, actual_count)) subquery = subqueries[0] self.assertTrue(pitt_table_id in subquery) subqueries = ehr_union._union_subqueries(table, hpo_ids, dataset_id, self.output_dataset_id) self.assertEqual(expected_count, actual_count, union_msg % (expected_count, actual_count)) subquery = subqueries[0] self.assertTrue(pitt_table_id in subquery) # After adding measurement table for chs, should generate subqueries for both chs_table_id = self._create_hpo_table('chs', table, dataset_id) expected_count = 2 subqueries = ehr_union._mapping_subqueries(table, hpo_ids, dataset_id, project_id) actual_count = len(subqueries) self.assertEqual(expected_count, actual_count, mapping_msg % (expected_count, actual_count)) self.assertTrue(any(sq for sq in subqueries if pitt_table_id in sq)) self.assertTrue(any(sq for sq in subqueries if chs_table_id in sq)) subqueries = ehr_union._union_subqueries(table, hpo_ids, dataset_id, self.output_dataset_id) actual_count = len(subqueries) self.assertEqual(expected_count, actual_count, union_msg % (expected_count, actual_count)) self.assertTrue(any(sq for sq in subqueries if pitt_table_id in sq)) self.assertTrue(any(sq for sq in subqueries if chs_table_id in sq))
def test_mapping_subqueries(self, mock_list_all_table_ids): """ Verify the query for loading mapping tables. A constant value should be added to destination key fields in all tables except for person where the values in the src_person_id and person_id fields should be equal. :param mock_list_all_table_ids: simulate tables being returned """ # patch list_all_table_ids so that # for FAKE_SITE_1 and FAKE_SITE_2 # it returns both of their person, visit_occurrence and pii_name tables # for FAKE_SITE_1 only # it returns the condition_occurrence table tables = ['person', 'visit_occurrence', 'pii_name'] fake_table_ids = [ bq_utils.get_table_id(hpo_id, table) for hpo_id in self.hpo_ids for table in tables ] fake_table_ids.append( bq_utils.get_table_id(self.FAKE_SITE_1, 'condition_occurrence')) mock_list_all_table_ids.return_value = fake_table_ids hpo_count = len(self.hpo_ids) # offset is added to the visit_occurrence destination key field actual = eu._mapping_subqueries('visit_occurrence', self.hpo_ids, 'fake_dataset', 'fake_project') hpo_unique_identifiers = eu.get_hpo_offsets(self.hpo_ids) self.assertEqual(hpo_count, len(actual)) for i in range(0, hpo_count): hpo_id = self.hpo_ids[i] subquery = actual[i] hpo_table = bq_utils.get_table_id(hpo_id, 'visit_occurrence') hpo_offset = hpo_unique_identifiers[hpo_id] self.assertIn(f"'{hpo_table}' AS src_table_id", subquery) self.assertIn('visit_occurrence_id AS src_visit_occurrence_id', subquery) self.assertIn( f'visit_occurrence_id + {hpo_offset} AS visit_occurrence_id', subquery) # src_person_id and person_id fields both use participant ID value # (offset is NOT added to the value) actual = eu._mapping_subqueries('person', self.hpo_ids, 'fake_dataset', 'fake_project') self.assertEqual(hpo_count, len(actual)) for i in range(0, hpo_count): hpo_id = self.hpo_ids[i] subquery = actual[i] hpo_table = bq_utils.get_table_id(hpo_id, 'person') self.assertIn(f"'{hpo_table}' AS src_table_id", subquery) self.assertIn('person_id AS src_person_id', subquery) self.assertIn('person_id AS person_id', subquery) # only return queries for tables that exist actual = eu._mapping_subqueries('condition_occurrence', self.hpo_ids, 'fake_dataset', 'fake_project') self.assertEqual(1, len(actual)) subquery = actual[0] hpo_table = bq_utils.get_table_id(self.FAKE_SITE_1, 'condition_occurrence') hpo_offset = hpo_unique_identifiers[self.FAKE_SITE_1] self.assertIn(f"'{hpo_table}' AS src_table_id", subquery) self.assertIn('condition_occurrence_id AS src_condition_occurrence_id', subquery) self.assertIn( f'condition_occurrence_id + {hpo_offset} AS condition_occurrence_id', subquery)