Beispiel #1
0
    def test_get_location_pii(self, mock_query, mock_response, mock_fields):
        # pre conditions
        mock_query.return_value = {}
        mock_response.side_effect = [[
            {
                consts.PERSON_ID_FIELD: 1,
                consts.LOCATION_ID_FIELD: 85,
            },
            {
                consts.PERSON_ID_FIELD: 2,
                consts.LOCATION_ID_FIELD: 90,
            },
            {
                consts.PERSON_ID_FIELD: 3,
                consts.LOCATION_ID_FIELD: 115,
            },
        ],
                                     [
                                         {
                                             consts.LOCATION_ID_FIELD: 85,
                                             12345: 'Elm Str.'
                                         },
                                         {
                                             consts.LOCATION_ID_FIELD: 90,
                                             12345: '11 Ocean Ave.'
                                         },
                                         {
                                             consts.LOCATION_ID_FIELD: 115,
                                             12345: '1822 RR 25'
                                         },
                                     ]]

        mock_fields.return_value = [{
            'name': consts.LOCATION_ID_FIELD,
            'type': consts.INTEGER_TYPE
        }]

        # test
        actual = reader.get_location_pii('project-foo', 'rdr-bar', 'pii-baz',
                                         'chi', '_sky', 12345)

        # post-conditions
        expected = [(1, 'Elm Str.'), (2, '11 Ocean Ave.'), (3, '1822 RR 25')]
        self.assertEqual(actual, expected)
        self.assertEqual(mock_query.call_count, 2)
        self.assertEqual(mock_response.call_count, 2)
        self.assertEqual(
            mock_query.assert_called_with(
                consts.PII_LOCATION_VALUES.format(project='project-foo',
                                                  dataset='rdr-bar',
                                                  field=12345,
                                                  id_list='85, 90, 115')),
            None)
def _compare_zip_codes(
        project,
        validation_dataset,
        rdr_dataset,
        pii_dataset,
        hpo,
        concept_id,
        pii_field
    ):
    """
    Compare email addresses from hpo PII table and OMOP observation table.

    :param project:  project to search for the datasets
    :param validation_dataset:  the auto generated match validation dataset
        created in this module.  queried to get the location value to identify
        a location field
    :param rdr_dataset:  contains datasets from the rdr group
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param hpo: string identifier of hpo
    :param concept_id:  integer value of concept id for concept in the rdr_dataset
    :param pii_field:  string value of field name with data matching the
        concept_id.  used to extract the correct values from the pii tables

    :return: a match_value dictionary.
    """
    match_values = {}

    zip_codes = readers.get_rdr_match_values(
        project, validation_dataset, consts.ID_MATCH_TABLE, concept_id
    )

    pii_zip_codes = readers.get_location_pii(
        project,
        rdr_dataset,
        pii_dataset,
        hpo,
        consts.PII_ADDRESS_TABLE,
        pii_field
    )

    for person_id, pii_zip_code in pii_zip_codes:
        rdr_zip = zip_codes.get(person_id)

        if rdr_zip is None or pii_zip_code is None:
            match_str = consts.MISSING
        else:
            rdr_zip = normalizer.normalize_zip(rdr_zip)
            pii_zip = normalizer.normalize_zip(pii_zip_code)
            match_str = consts.MATCH if rdr_zip == pii_zip else consts.MISMATCH

        match_values[person_id] = match_str

    return match_values
Beispiel #3
0
def _compare_zip_codes(project, validation_dataset, rdr_dataset, pii_dataset,
                       hpo, concept_id, pii_field, pii_tables):
    """
    Compare zip codes from hpo PII table and OMOP observation table.

    :param project:  project to search for the datasets
    :param validation_dataset:  the auto generated match validation dataset
        created in this module.  queried to get the location value to identify
        a location field
    :param rdr_dataset:  contains datasets from the rdr group
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param hpo: string identifier of hpo
    :param concept_id:  integer value of concept id for concept in the rdr_dataset
    :param pii_field:  string value of field name with data matching the
        concept_id.  used to extract the correct values from the pii tables

    :return: a match_value dictionary.
    """
    match_values = {}
    table_name = hpo + consts.PII_ADDRESS_TABLE

    if table_name in pii_tables:
        zip_codes = readers.get_rdr_match_values(project, validation_dataset,
                                                 consts.ID_MATCH_TABLE,
                                                 concept_id)

        try:
            pii_zip_codes = readers.get_location_pii(project, rdr_dataset,
                                                     pii_dataset, hpo,
                                                     consts.PII_ADDRESS_TABLE,
                                                     pii_field)
        except (oauth2client.client.HttpAccessTokenRefreshError,
                googleapiclient.errors.HttpError):
            LOGGER.exception(
                f"Unable to read PII for: {hpo}\tdata field:\t{pii_field}")
            raise

        for person_id, pii_zip_code in pii_zip_codes:
            rdr_zip = zip_codes.get(person_id)

            if rdr_zip is None or pii_zip_code is None:
                match_str = consts.MISSING
            else:
                rdr_zip = normalizer.normalize_zip(rdr_zip)
                pii_zip = normalizer.normalize_zip(pii_zip_code)
                match_str = consts.MATCH if rdr_zip == pii_zip else consts.MISMATCH

            match_values[person_id] = match_str
    else:
        raise RuntimeError('Table {} doesnt exist.'.format(table_name))

    return match_values
Beispiel #4
0
def _compare_states(project, validation_dataset, rdr_dataset, pii_dataset, hpo,
                    concept_id, pii_field):
    """
    Compare email addresses from hpo PII table and OMOP observation table.

    :param project:  project to search for the datasets
    :param validation_dataset:  the auto generated match validation dataset
        created in this module.  queried to get the location value to identify
        a location field
    :param rdr_dataset:  contains datasets from the rdr group
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param hpo: string identifier of hpo
    :param concept_id:  integer value of concept id for concept in the rdr_dataset
    :param pii_field:  string value of field name with data matching the
        concept_id.  used to extract the correct values from the pii tables

    :return: a match_value dictionary.
    """
    match_values = {}

    states = readers.get_rdr_match_values(project, validation_dataset,
                                          consts.ID_MATCH_TABLE, concept_id)

    try:
        pii_states = readers.get_location_pii(project, rdr_dataset,
                                              pii_dataset, hpo,
                                              consts.PII_ADDRESS_TABLE,
                                              pii_field)
    except (oauth2client.client.HttpAccessTokenRefreshError,
            googleapiclient.errors.HttpError) as exception:
        LOGGER.exception("Unable to read PII for: %s\tdata field:\t%s", hpo,
                         pii_field)
        return match_values, exception

    for person_id, pii_state in pii_states:
        rdr_state = states.get(person_id)

        if rdr_state is None or pii_state is None:
            match_str = consts.MISSING
        else:
            rdr_state = normalizer.normalize_state(rdr_state)
            pii_state = normalizer.normalize_state(pii_state)
            match_str = consts.MATCH if rdr_state == pii_state else consts.MISMATCH

        match_values[person_id] = match_str

    return match_values, None
Beispiel #5
0
def _compare_street_addresses(project, validation_dataset, rdr_dataset,
                              pii_dataset, hpo, concept_id_one, concept_id_two,
                              field_one, field_two, pii_tables):
    """
    Compare the components of the standard address field.

    Individually compares the address one, address two, city, state, and zip
    fields of an address.  Compares address one and address two as distinct
    fields and if they do not match, then combines the fields and compares as
    a single field.  Both are either set as a match or not match.

    :param project:  project to search for the datasets
    :param validation_dataset:  the auto generated match validation dataset
        created in this module.  queried to get the location value to identify
        a location field
    :param rdr_dataset:  contains datasets from the rdr group
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param hpo: string identifier of hpo
    :param concept_id_one:  integer value of concept id for concept in the rdr_dataset
    :param concept_id_two:  integer value of concept id for concept in the rdr_dataset
    :param field_one:  string value of field name with data matching the
        concept_id.  used to extract the correct values from the pii tables
    :param field_two:  string value of field name with data matching the
        concept_id.  used to extract the correct values from the pii tables

    :param hpo:  hpo site name used to download pii from the site's pii table
    :return: a match_values dictionary.
    """
    address_one_match_values = {}
    address_two_match_values = {}
    table_name = hpo + consts.PII_ADDRESS_TABLE

    if table_name in pii_tables:
        rdr_address_ones = readers.get_rdr_match_values(
            project, validation_dataset, consts.ID_MATCH_TABLE, concept_id_one)

        rdr_address_twos = readers.get_rdr_match_values(
            project, validation_dataset, consts.ID_MATCH_TABLE, concept_id_two)

        try:
            pii_street_ones = readers.get_location_pii(
                project, rdr_dataset, pii_dataset, hpo,
                consts.PII_ADDRESS_TABLE, field_one)
        except (oauth2client.client.HttpAccessTokenRefreshError,
                googleapiclient.errors.HttpError):
            LOGGER.exception(
                f"Unable to read PII for: {hpo}\tdata field:\t{field_one}")
            raise
        try:
            pii_street_twos = readers.get_location_pii(
                project, rdr_dataset, pii_dataset, hpo,
                consts.PII_ADDRESS_TABLE, field_two)
        except (oauth2client.client.HttpAccessTokenRefreshError,
                googleapiclient.errors.HttpError):
            LOGGER.exception(
                f"Unable to read PII for: {hpo}\tdata field:\t{field_two}")
            raise

        pii_street_addresses = {}
        for person_id, street in pii_street_ones:
            pii_street_addresses[person_id] = [person_id, street]

        for person_id, street in pii_street_twos:
            current_value = pii_street_addresses.get(person_id, [])

            if current_value == []:
                current_value = [person_id, '', street]
            else:
                current_value.append(street)

            pii_street_addresses[person_id] = current_value

        for person_id, addresses in pii_street_addresses.items():

            pii_addr_one = addresses[1]
            pii_addr_two = addresses[2]

            rdr_addr_one = normalizer.normalize_street(
                rdr_address_ones.get(person_id))
            pii_addr_one = normalizer.normalize_street(pii_addr_one)
            rdr_addr_two = normalizer.normalize_street(
                rdr_address_twos.get(person_id))
            pii_addr_two = normalizer.normalize_street(pii_addr_two)

            # easy case, fields 1 and 2 from both sources match exactly
            if rdr_addr_one == pii_addr_one and rdr_addr_two == pii_addr_two:
                address_one_match_values[person_id] = consts.MATCH
                address_two_match_values[person_id] = consts.MATCH
            else:
                # convert two fields to one field and store as a list of strings
                full_rdr_street = rdr_addr_one + ' ' + rdr_addr_two
                full_pii_street = pii_addr_one + ' ' + pii_addr_two
                full_rdr_street_list = full_rdr_street.split()
                full_pii_street_list = full_pii_street.split()

                # check top see if each item in one list is in the other list  and
                # set match results from that
                missing_rdr = _compare_address_lists(full_rdr_street_list,
                                                     full_pii_street_list)
                missing_pii = _compare_address_lists(full_pii_street_list,
                                                     full_rdr_street_list)

                if (missing_rdr + missing_pii) > 0:
                    address_one_match_values[person_id] = consts.MISMATCH
                    address_two_match_values[person_id] = consts.MISMATCH
                else:
                    address_one_match_values[person_id] = consts.MATCH
                    address_two_match_values[person_id] = consts.MATCH
    else:
        raise RuntimeError('Table {} doesnt exist.'.format(table_name))

    return address_one_match_values, address_two_match_values