def _compare_cities( project, validation_dataset, rdr_dataset, pii_dataset, hpo, concept_id, pii_field ): """ Compare email addresses from hpo PII table and OMOP observation table. :param project: project to search for the datasets :param validation_dataset: the auto generated match validation dataset created in this module. queried to get the location value to identify a location field :param rdr_dataset: contains datasets from the rdr group :param pii_dataset: dataset created from submitted hpo sites. the pii tables :param hpo: string identifier of hpo :param concept_id: integer value of concept id for concept in the rdr_dataset :param pii_field: string value of field name with data matching the concept_id. used to extract the correct values from the pii tables :return: a match_value dictionary. """ match_values = {} cities = readers.get_rdr_match_values( project, validation_dataset, consts.ID_MATCH_TABLE, concept_id ) pii_cities = readers.get_location_pii( project, rdr_dataset, pii_dataset, hpo, consts.PII_ADDRESS_TABLE, pii_field ) for person_id, pii_city in pii_cities: rdr_city = cities.get(person_id) if rdr_city is None or pii_city is None: match_str = consts.MISSING else: rdr_city = normalizer.normalize_city_name(rdr_city) pii_city = normalizer.normalize_city_name(pii_city) match_str = consts.MATCH if rdr_city == pii_city else consts.MISMATCH match_values[person_id] = match_str return match_values
def _compare_cities(project, validation_dataset, rdr_dataset, pii_dataset, hpo, concept_id, pii_field, pii_tables): """ Compare city information from hpo PII table and OMOP observation table. :param project: project to search for the datasets :param validation_dataset: the auto generated match validation dataset created in this module. queried to get the location value to identify a location field :param rdr_dataset: contains datasets from the rdr group :param pii_dataset: dataset created from submitted hpo sites. the pii tables :param hpo: string identifier of hpo :param concept_id: integer value of concept id for concept in the rdr_dataset :param pii_field: string value of field name with data matching the concept_id. used to extract the correct values from the pii tables :return: a match_value dictionary. """ match_values = {} table_name = hpo + consts.PII_ADDRESS_TABLE if table_name in pii_tables: cities = readers.get_rdr_match_values(project, validation_dataset, consts.ID_MATCH_TABLE, concept_id) try: pii_cities = readers.get_location_pii(project, rdr_dataset, pii_dataset, hpo, consts.PII_ADDRESS_TABLE, pii_field) except (oauth2client.client.HttpAccessTokenRefreshError, googleapiclient.errors.HttpError): LOGGER.exception( f"Unable to read PII for: {hpo}\tdata field:\t{pii_field}") raise for person_id, pii_city in pii_cities: rdr_city = cities.get(person_id) if rdr_city is None or pii_city is None: match_str = consts.MISSING else: rdr_city = normalizer.normalize_city_name(rdr_city) pii_city = normalizer.normalize_city_name(pii_city) match_str = consts.MATCH if rdr_city == pii_city else consts.MISMATCH match_values[person_id] = match_str else: raise RuntimeError('Table {} doesnt exist.'.format(table_name)) return match_values
def test_nomalize_city_with_unknown_abbreviation(self): # test actual = normalizer.normalize_city_name('L8t. Made Up Place') # post conditions expected = 'l8t made up place' self.assertEqual(actual, expected)
def test_normalize_city_name_mixed_case(self): # test actual = normalizer.normalize_city_name('bIrMiNgHaM') expected = 'birmingham' # post conditions self.assertEqual(actual, expected)
def test_normalize_city_with_punctuation_and_spaces(self): # test actual = normalizer.normalize_city_name('St. Paul\'s Place') # post conditions expected = 'saint pauls place' self.assertEqual(actual, expected)
def test_normalize_non_string_city(self): # test actual = normalizer.normalize_city_name(88.321) # post conditions expected = '88321' self.assertEqual(actual, expected)
def test_normalize_None_city(self): # test actual = normalizer.normalize_city_name(None) # post conditions expected = '' self.assertEqual(actual, expected)