def test_get_pii_values(self, mock_query, mock_response): # pre conditions mock_query.return_value = {} mock_response.return_value = [ { consts.PERSON_ID_FIELD: 1, 12345: 'saLLy', }, { consts.PERSON_ID_FIELD: 2, 12345: 'Rudy' }, { consts.PERSON_ID_FIELD: 3, 12345: 'MaTiLdA' }, ] # test actual = reader.get_pii_values('project-foo', 'pii-bar', 'zeta', '_sea', 12345) # postconditions expected = [(1, 'saLLy'), (2, 'Rudy'), (3, 'MaTiLdA')] self.assertEqual(actual, expected) self.assertEqual(mock_query.call_count, 1) self.assertEqual(mock_response.call_count, 1) self.assertEqual( mock_query.assert_called_with( consts.PII_VALUES.format(project='project-foo', dataset='pii-bar', hpo_site_str='zeta', table_suffix='_sea', field=12345)), None)
def _compare_phone_numbers( project, rdr_dataset, pii_dataset, hpo, concept_id, pii_field, pii_tables ): """ Compare the digit based phone numbers from PII and Observation tables. :param project: project to search for the datasets :param rdr_dataset: contains datasets from the rdr group :param pii_dataset: dataset created from submitted hpo sites. the pii tables :param hpo: string identifier of hpo :param concept_id: integer value of concept id for concept in the rdr_dataset :param pii_field: string value of field name with data matching the concept_id. used to extract the correct values from the pii tables :return: A match_values dictionary. """ match_values = {} table_name = hpo + consts.PII_PHONE_TABLE if table_name in pii_tables: phone_numbers = readers.get_rdr_match_values( project, rdr_dataset, consts.ID_MATCH_TABLE, concept_id ) try: pii_phone_numbers = readers.get_pii_values( project, pii_dataset, hpo, consts.PII_PHONE_TABLE, pii_field ) except (oauth2client.client.HttpAccessTokenRefreshError, googleapiclient.errors.HttpError): LOGGER.exception( "Unable to read PII for: %s\tdata field:\t%s", hpo, pii_field ) raise for person_id, pii_number in pii_phone_numbers: rdr_phone = phone_numbers.get(person_id) if rdr_phone is None or pii_number is None: match_str = consts.MISSING else: rdr_phone = normalizer.normalize_phone(rdr_phone) pii_number = normalizer.normalize_phone(pii_number) match_str = consts.MATCH if rdr_phone == pii_number else consts.MISMATCH match_values[person_id] = match_str else: raise RuntimeError('Table {} doesnt exist.'.format(table_name)) return match_values
def _compare_name_fields(project, rdr_dataset, pii_dataset, hpo, concept_id, pii_field, pii_tables): """ For an hpo, compare all first, middle, and last name fields to omop settings. This compares a site's name field values found in their uploaded PII tables with the values in the OMOP observation table. :param project: project to search for the datasets :param rdr_dataset: contains datasets from the rdr group :param pii_dataset: dataset created from submitted hpo sites. the pii tables :param hpo: string identifier of hpo :param concept_id: integer value of concept id for concept in the rdr_dataset :param pii_field: string value of field name with data matching the concept_id. used to extract the correct values from the pii tables :return: a match_values dictionary. """ match_values = {} table_name = hpo + consts.PII_NAME_TABLE if table_name in pii_tables: rdr_names = readers.get_rdr_match_values(project, rdr_dataset, consts.ID_MATCH_TABLE, concept_id) try: pii_names = readers.get_pii_values(project, pii_dataset, hpo, consts.PII_NAME_TABLE, pii_field) except (oauth2client.client.HttpAccessTokenRefreshError, googleapiclient.errors.HttpError): LOGGER.exception( f"Unable to read PII for: {hpo}\tdata field:\t{pii_field}") raise for person_id, pii_name in pii_names: rdr_name = rdr_names.get(person_id) if rdr_name is None or pii_name is None: match_str = consts.MISSING else: pii_name = normalizer.normalize_name(pii_name) rdr_name = normalizer.normalize_name(rdr_name) match_str = consts.MATCH if rdr_name == pii_name else consts.MISMATCH match_values[person_id] = match_str else: raise RuntimeError('Table {} doesnt exist.'.format(table_name)) return match_values
def _compare_name_fields( project, rdr_dataset, pii_dataset, hpo, concept_id, pii_field ): """ For an hpo, compare all first, middle, and last name fields to omop settings. This compares a site's name field values found in their uploaded PII tables with the values in the OMOP observation table. :param project: project to search for the datasets :param rdr_dataset: contains datasets from the rdr group :param pii_dataset: dataset created from submitted hpo sites. the pii tables :param hpo: string identifier of hpo :param concept_id: integer value of concept id for concept in the rdr_dataset :param pii_field: string value of field name with data matching the concept_id. used to extract the correct values from the pii tables :return: a match_values dictionary. """ match_values = {} rdr_names = readers.get_rdr_match_values( project, rdr_dataset, consts.ID_MATCH_TABLE, concept_id ) pii_names = readers.get_pii_values( project, pii_dataset, hpo, consts.PII_NAME_TABLE, pii_field ) for person_id, pii_name in pii_names: rdr_name = rdr_names.get(person_id) if rdr_name is None or pii_name is None: match_str = consts.MISSING else: pii_name = normalizer.normalize_name(pii_name) rdr_name = normalizer.normalize_name(rdr_name) match_str = consts.MATCH if rdr_name == pii_name else consts.MISMATCH match_values[person_id] = match_str return match_values
def _compare_email_addresses(project, rdr_dataset, pii_dataset, hpo, concept_id, pii_field, pii_tables): """ Compare email addresses from hpo PII table and OMOP observation table. :param project: project to search for the datasets :param rdr_dataset: contains datasets from the rdr group :param pii_dataset: dataset created from submitted hpo sites. the pii tables :param hpo: string identifier of hpo :param concept_id: integer value of concept id for concept in the rdr_dataset :param pii_field: string value of field name with data matching the concept_id. used to extract the correct values from the pii tables :return: a match_value dictionary. """ match_values = {} table_name = hpo + consts.PII_EMAIL_TABLE if table_name in pii_tables: email_addresses = readers.get_rdr_match_values(project, rdr_dataset, consts.ID_MATCH_TABLE, concept_id) try: pii_emails = readers.get_pii_values(project, pii_dataset, hpo, consts.PII_EMAIL_TABLE, pii_field) except (oauth2client.client.HttpAccessTokenRefreshError, googleapiclient.errors.HttpError): LOGGER.exception( f"Unable to read PII for: {hpo}\tdata field:\t{pii_field}") raise for person_id, pii_email in pii_emails: rdr_email = email_addresses.get(person_id) if rdr_email is None or pii_email is None: match_str = consts.MISSING else: rdr_email = normalizer.normalize_email(rdr_email) pii_email = normalizer.normalize_email(pii_email) match_str = consts.MATCH if rdr_email == pii_email else consts.MISMATCH match_values[person_id] = match_str else: raise RuntimeError('Table {} doesnt exist.'.format(table_name)) return match_values
def _compare_email_addresses( project, rdr_dataset, pii_dataset, hpo, concept_id, pii_field ): """ Compare email addresses from hpo PII table and OMOP observation table. :param project: project to search for the datasets :param rdr_dataset: contains datasets from the rdr group :param pii_dataset: dataset created from submitted hpo sites. the pii tables :param hpo: string identifier of hpo :param concept_id: integer value of concept id for concept in the rdr_dataset :param pii_field: string value of field name with data matching the concept_id. used to extract the correct values from the pii tables :return: a match_value dictionary. """ match_values = {} email_addresses = readers.get_rdr_match_values( project, rdr_dataset, consts.ID_MATCH_TABLE, concept_id ) pii_emails = readers.get_pii_values( project, pii_dataset, hpo, consts.PII_EMAIL_TABLE, pii_field ) for person_id, pii_email in pii_emails: rdr_email = email_addresses.get(person_id) if rdr_email is None or pii_email is None: match_str = consts.MISSING else: rdr_email = normalizer.normalize_email(rdr_email) pii_email = normalizer.normalize_email(pii_email) match_str = consts.MATCH if rdr_email == pii_email else consts.MISMATCH match_values[person_id] = match_str return match_values
def _compare_phone_numbers( project, rdr_dataset, pii_dataset, hpo, concept_id, pii_field ): """ Compare the digit based phone numbers from PII and Observation tables. :param project: project to search for the datasets :param rdr_dataset: contains datasets from the rdr group :param pii_dataset: dataset created from submitted hpo sites. the pii tables :param hpo: string identifier of hpo :param concept_id: integer value of concept id for concept in the rdr_dataset :param pii_field: string value of field name with data matching the concept_id. used to extract the correct values from the pii tables :return: A match_values dictionary. """ match_values = {} phone_numbers = readers.get_rdr_match_values( project, rdr_dataset, consts.ID_MATCH_TABLE, concept_id ) pii_phone_numbers = readers.get_pii_values( project, pii_dataset, hpo, consts.PII_PHONE_TABLE, pii_field ) for person_id, pii_number in pii_phone_numbers: rdr_phone = phone_numbers.get(person_id) if rdr_phone is None or pii_number is None: match_str = consts.MISSING else: rdr_phone = normalizer.normalize_phone(rdr_phone) pii_number = normalizer.normalize_phone(pii_number) match_str = consts.MATCH if rdr_phone == pii_number else consts.MISMATCH match_values[person_id] = match_str return match_values