Ejemplo n.º 1
0
    def test_normalize_street_with_alpha_numerics(self):
        # test
        actual = normalizer.normalize_street('Apt. 50a Bldg. 2')

        # post condition
        expected = 'apartment 50 a building 2'
        self.assertEqual(actual, expected)
Ejemplo n.º 2
0
    def test_normalize_None_street(self):
        # test
        actual = normalizer.normalize_street(None)

        # post condition
        expected = ''
        self.assertEqual(actual, expected)
Ejemplo n.º 3
0
    def test_normalize_street_with_numeric_endings(self):
        # test
        actual = normalizer.normalize_street('71st Street')

        # post condition
        expected = '71 street'
        self.assertEqual(actual, expected)
Ejemplo n.º 4
0
    def test_normalize_street_with_puntcuations(self):
        # test
        actual = normalizer.normalize_street('El-lm Str. Blvd.')

        # post condition
        expected = 'el lm street boulevard'
        self.assertEqual(actual, expected)
Ejemplo n.º 5
0
    def test_normalize_street_abbreviations(self):
        # test
        actual = normalizer.normalize_street('Elm St BTM BND ALy')

        # post condition
        expected = 'elm street bottom bend alley'
        self.assertEqual(actual, expected)
Ejemplo n.º 6
0
    def test_normalize_street(self):
        # test
        actual = normalizer.normalize_street('Elm Street')

        # post condition
        expected = 'elm street'
        self.assertEqual(actual, expected)
Ejemplo n.º 7
0
    def test_normalize_non_string_street(self):
        # test
        actual = normalizer.normalize_street(1492.0)

        # post condition
        expected = '1492 0'
        self.assertEqual(actual, expected)
Ejemplo n.º 8
0
def _compare_street_addresses(project, validation_dataset, rdr_dataset,
                              pii_dataset, hpo, concept_id_one, concept_id_two,
                              field_one, field_two, pii_tables):
    """
    Compare the components of the standard address field.

    Individually compares the address one, address two, city, state, and zip
    fields of an address.  Compares address one and address two as distinct
    fields and if they do not match, then combines the fields and compares as
    a single field.  Both are either set as a match or not match.

    :param project:  project to search for the datasets
    :param validation_dataset:  the auto generated match validation dataset
        created in this module.  queried to get the location value to identify
        a location field
    :param rdr_dataset:  contains datasets from the rdr group
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param hpo: string identifier of hpo
    :param concept_id_one:  integer value of concept id for concept in the rdr_dataset
    :param concept_id_two:  integer value of concept id for concept in the rdr_dataset
    :param field_one:  string value of field name with data matching the
        concept_id.  used to extract the correct values from the pii tables
    :param field_two:  string value of field name with data matching the
        concept_id.  used to extract the correct values from the pii tables

    :param hpo:  hpo site name used to download pii from the site's pii table
    :return: a match_values dictionary.
    """
    address_one_match_values = {}
    address_two_match_values = {}
    table_name = hpo + consts.PII_ADDRESS_TABLE

    if table_name in pii_tables:
        rdr_address_ones = readers.get_rdr_match_values(
            project, validation_dataset, consts.ID_MATCH_TABLE, concept_id_one)

        rdr_address_twos = readers.get_rdr_match_values(
            project, validation_dataset, consts.ID_MATCH_TABLE, concept_id_two)

        try:
            pii_street_ones = readers.get_location_pii(
                project, rdr_dataset, pii_dataset, hpo,
                consts.PII_ADDRESS_TABLE, field_one)
        except (oauth2client.client.HttpAccessTokenRefreshError,
                googleapiclient.errors.HttpError):
            LOGGER.exception(
                f"Unable to read PII for: {hpo}\tdata field:\t{field_one}")
            raise
        try:
            pii_street_twos = readers.get_location_pii(
                project, rdr_dataset, pii_dataset, hpo,
                consts.PII_ADDRESS_TABLE, field_two)
        except (oauth2client.client.HttpAccessTokenRefreshError,
                googleapiclient.errors.HttpError):
            LOGGER.exception(
                f"Unable to read PII for: {hpo}\tdata field:\t{field_two}")
            raise

        pii_street_addresses = {}
        for person_id, street in pii_street_ones:
            pii_street_addresses[person_id] = [person_id, street]

        for person_id, street in pii_street_twos:
            current_value = pii_street_addresses.get(person_id, [])

            if current_value == []:
                current_value = [person_id, '', street]
            else:
                current_value.append(street)

            pii_street_addresses[person_id] = current_value

        for person_id, addresses in pii_street_addresses.items():

            pii_addr_one = addresses[1]
            pii_addr_two = addresses[2]

            rdr_addr_one = normalizer.normalize_street(
                rdr_address_ones.get(person_id))
            pii_addr_one = normalizer.normalize_street(pii_addr_one)
            rdr_addr_two = normalizer.normalize_street(
                rdr_address_twos.get(person_id))
            pii_addr_two = normalizer.normalize_street(pii_addr_two)

            # easy case, fields 1 and 2 from both sources match exactly
            if rdr_addr_one == pii_addr_one and rdr_addr_two == pii_addr_two:
                address_one_match_values[person_id] = consts.MATCH
                address_two_match_values[person_id] = consts.MATCH
            else:
                # convert two fields to one field and store as a list of strings
                full_rdr_street = rdr_addr_one + ' ' + rdr_addr_two
                full_pii_street = pii_addr_one + ' ' + pii_addr_two
                full_rdr_street_list = full_rdr_street.split()
                full_pii_street_list = full_pii_street.split()

                # check top see if each item in one list is in the other list  and
                # set match results from that
                missing_rdr = _compare_address_lists(full_rdr_street_list,
                                                     full_pii_street_list)
                missing_pii = _compare_address_lists(full_pii_street_list,
                                                     full_rdr_street_list)

                if (missing_rdr + missing_pii) > 0:
                    address_one_match_values[person_id] = consts.MISMATCH
                    address_two_match_values[person_id] = consts.MISMATCH
                else:
                    address_one_match_values[person_id] = consts.MATCH
                    address_two_match_values[person_id] = consts.MATCH
    else:
        raise RuntimeError('Table {} doesnt exist.'.format(table_name))

    return address_one_match_values, address_two_match_values