def test_copy_rdr_tables(self): for table in RDR_TABLES_TO_COPY: self.assertFalse( bq_utils.table_exists( table, self.combined_dataset_id)) # sanity check copy_rdr_table(table) actual = bq_utils.table_exists(table, self.combined_dataset_id) self.assertTrue( actual, msg='RDR table {table} should be copied'.format(table=table)) # Check that row count in combined is same as rdr query = ( 'WITH rdr AS ' ' (SELECT COUNT(1) n FROM `{rdr_dataset_id}.{table}`), ' 'combined AS ' ' (SELECT COUNT(1) n FROM `{combined_dataset_id}.{table}`) ' 'SELECT ' 'rdr.n AS rdr_count, ' 'combined.n AS combined_count ' 'FROM rdr, combined ').format( rdr_dataset_id=self.rdr_dataset_id, combined_dataset_id=self.combined_dataset_id, table=table) response = bq_utils.query(query) rows = bq_utils.response2rows(response) self.assertTrue(len(rows) == 1) # sanity check row = rows[0] rdr_count, combined_count = row['rdr_count'], row['combined_count'] msg_fmt = 'Table {table} has {rdr_count} in rdr and {combined_count} in combined (expected to be equal)' self.assertEqual( rdr_count, combined_count, msg_fmt.format(table=table, rdr_count=rdr_count, combined_count=combined_count))
def test_ehr_person_to_observation(self): # ehr person table converts to observation records create_cdm_tables() copy_rdr_table('person') move_ehr_person_to_observation() # person table query q_person = ''' SELECT (person_id, gender_concept_id, gender_source_value, race_concept_id, race_source_value, CAST(birth_datetime as STRING), ethnicity_concept_id, ethnicity_source_value, EXTRACT(DATE FROM birth_datetime)) FROM {ehr_dataset_id}.person '''.format(ehr_dataset_id=self.ehr_dataset_id) response_ehr_person = [[ item['v'] for item in row['f'] ] for row in query_result_to_payload(bq_utils.query(q_person))['F0_']] q_obs = ''' SELECT (person_id, observation_concept_id, value_as_concept_id, value_as_string, observation_source_value, observation_date) FROM {ehr_dataset_id}.observation obs WHERE obs.observation_concept_id=4013886 -- Race - 4013886 OR obs.observation_concept_id=4271761 -- Ethnic group - 4271761 OR obs.observation_concept_id=4135376 -- Gender - 4135376 OR obs.observation_concept_id=4083587 -- DOB - 4083587 '''.format(ehr_dataset_id=self.combined_dataset_id) response_obs = [[ item['v'] for item in row['f'] ] for row in query_result_to_payload(bq_utils.query(q_obs))['F0_']] # concept ids gender_concept_id = '4135376' race_concept_id = '4013886' dob_concept_id = '4083587' ethnicity_concept_id = '4271761' # expected lists expected_gender_list = [(row[0], gender_concept_id, row[1], row[8]) for row in response_ehr_person] expected_race_list = [(row[0], race_concept_id, row[3], row[8]) for row in response_ehr_person] expected_dob_list = [(row[0], dob_concept_id, row[5], row[8]) for row in response_ehr_person] expected_ethnicity_list = [(row[0], ethnicity_concept_id, row[6], row[8]) for row in response_ehr_person] # actual lists actual_gender_list = [(row[0], row[1], row[2], row[5]) for row in response_obs if row[1] == gender_concept_id] actual_race_list = [(row[0], row[1], row[2], row[5]) for row in response_obs if row[1] == race_concept_id] actual_dob_list = [(row[0], row[1], row[3], row[5]) for row in response_obs if row[1] == dob_concept_id] actual_ethnicity_list = [(row[0], row[1], row[2], row[5]) for row in response_obs if row[1] == ethnicity_concept_id] self.assertListEqual(sorted(expected_gender_list), sorted(actual_gender_list), 'gender check fails') self.assertListEqual(sorted(expected_race_list), sorted(actual_race_list), 'race check fails') self.assertListEqual(sorted(expected_dob_list), sorted(actual_dob_list), 'dob check fails') self.assertListEqual(sorted(expected_ethnicity_list), sorted(actual_ethnicity_list), 'ethnicity check fails') person_ehr_row_count = int( bq_utils.get_table_info('person', self.ehr_dataset_id)['numRows']) obs_row_count = int( bq_utils.get_table_info('observation', self.combined_dataset_id)['numRows']) self.assertEqual(person_ehr_row_count * 4, obs_row_count)