Ejemplo n.º 1
0
def format_result_in_ap_style(address, db_alias=None, street_custom_styles=None, additional_street_styles=None):
    """
    Given a string representing a street address, parses that location
    and converts it to Associated Press style. Eventually this will be
    joined by a format_result_in_usps_style function as well.

    Takes one required and two optional arguments:
        *   address: a string representing the address to be properly
                formatted. Must have either city and state or ZIP specified.
        -   db_alias: the name given to the geocoder's database in your
                settings.py file. Defaults to None (though a null value
                will be overridden in lines 240-245).
        -   street_custom_styles: A dict of dicts, with first-level
                keys specifying a city and second-level keys specifying
                the names of streets in this city that should be changed
                before they are returned from the formatter. The second-
                level keys will be replaced with the respective values.
                Optional, and defaults to None.
        -   additional_street_styles: A dict of dicts, with first-level
                keys specifying city and second-level keys specifying
                the names of streets in this city that should be changed
                before they are passed into the geocoder. The second-
                level keys will be replaced with the respective values.
                Optional, and defaults to None.

    Returns a string representing the address converted to follow style.
    """
    # Unless otherwise specified, the database alias will be that which has
    # been specified in settings.GEOCODER_DB_ALIAS (or, failing that, the
    # string 'geocoder').
    if not db_alias:
        db_alias = getattr(
                settings,
                'GEOCODER_DB_ALIAS',
                'geocoder'
            )

    # Remove all periods and commas from the address.
    address = address.replace(',', '').replace('.', '')

    # If the first part of the address matches the bizarre out-state
    # Wisconsin address formula (exempli gratia, N109W1711 Ava Circle),
    # remove that address and substitute in a bogus number instead.
    # We'll format the actual address and substitute it in later.
    address_first_part = address.split(' ')[0]
    address_first_part_match = re.search(
        r'([N|E|W|S])(\d+)([N|E|W|S])(\d+)',
        address_first_part.upper(),
        flags=re.IGNORECASE
    )
    if address_first_part_match:
        address_first_part_formatted = "%s%s-%s%s" % (
            address_first_part_match.group(1).upper(),
            address_first_part_match.group(2),
            address_first_part_match.group(3).upper(),
            address_first_part_match.group(4)
        )
        address = '171717' + address.split(address_first_part)[1]

    # First, normalize this address using PostGIS.
    result_list = normalize_address(
            address,
            db_alias,
            additional_street_styles
        )

    if not result_list:
        return None

    if address_first_part_match:
        if result_list[0] == '171717':
            result_list[0] = address_first_part_formatted

    formatted_address = []
    formatted_first_line = []
    # For our additional processing, check if there were non-numeric characters
    # in the street address. If so, replace their normalized value with the raw
    # street number. Else use the normalized value.

    if result_list[0] != '':
        if not address.split(' ')[0].isdigit():
            formatted_first_line.append(
                        address.split(' ')[0].strip(' ').upper()
                    )
        else:
            formatted_first_line.append(result_list[0])

    # Now check to see if the street has a predirection. If so convert its
    # value to Associated Press style.
    if result_list[1] != '':
        formatted_first_line.append(ap_style.PREDIRECTIONS_TO_AP_STYLE[
            result_list[1].lower()].upper())

    # Now remove any quotes that may have been added to the street name, and
    # see if it matches one of the streets for which we have a custom style. If
    # so, return this custom street name in lieu of the one from the geocoder.
    # Else return the geocoded result in titlecase.

    if result_list[2] != '' and result_list[3].lower() not \
                in HIGHWAYS_TO_STYLE.keys():
        if street_custom_styles:
            if result_list[6].lower() in street_custom_styles.keys() and \
                    result_list[2].strip('"').lower() in street_custom_styles[
                            result_list[6].lower()
                        ].keys():
                formatted_first_line.append(
                        street_custom_styles[
                            result_list[6].lower()
                        ][result_list[2].strip('"').lower()])
            else:
                formatted_first_line.append(titlecase(
                    result_list[2].strip('"')
                ))
        else:
            formatted_first_line.append(titlecase(
                result_list[2].strip('"')
            ))

    # Next match roads to their corrrect abbreviations (or non-abbreviations)
    # according to Associated Press style.
    formatted_highway = None
    if result_list[3] != '':
        if result_list[3].lower() in street_suffixes.keys() and \
                        street_suffixes[result_list[3].lower()] \
                        in ap_style.SUFFIXES_TO_AP_STYLE.keys():
            formatted_first_line.append(
                    ap_style.SUFFIXES_TO_AP_STYLE[
                            street_suffixes[result_list[3].lower()]
                        ].capitalize()
                )
        elif result_list[3].lower() in HIGHWAYS_TO_STYLE.keys():
            if result_list[3].lower() in HIGHWAYS_STATE_APPEND:
                if result_list[2] != '':
                    formatted_first_line.append("%s%s" % (
                            "%(state_full)s %(hwy_fmt)s",
                            result_list[2].strip('"').upper()
                        ))
                    formatted_highway = " ".join(HIGHWAYS_TO_STYLE[
                            result_list[3].lower()
                        ].split(' ')[1:])
                else:
                    formatted_first_line.append("%(state_full)s %(hwy_fmt)s")
                    formatted_highway = HIGHWAYS_TO_STYLE[
                            result_list[3].lower()
                        ]
            else:
                if result_list[2] != '':
                    formatted_first_line.append('%s%s' % (
                            HIGHWAYS_TO_STYLE[result_list[3].lower()],
                            result_list[2].strip('"').upper()
                        ))
                else:
                    formatted_first_line.append(
                            HIGHWAYS_TO_STYLE[result_list[3].lower()]
                        )

    # Now append the post-directional suffix, if one exists. Not sure about the
    # style guidelines on this, so I'll defer doing anything too fancy until I
    # know for sure. ~AJV
    if result_list[4] != '':
        formatted_first_line.append(result_list[4])

    # Now append the entire first line of the address to an array of the whole
    # address by line.
    formatted_address.append(" ".join(item for item in formatted_first_line))

    # Next process the unit number (if one was given).
    if result_list[5] != '':
        unit_raw = result_list[5]
        unit_kind = unit_raw.split(' ')[0].lower()
        if unit_kind.replace('.', '') in SECONDARY_UNITS_WITH_NUMBERS.keys():
            unit_type = SECONDARY_UNITS_WITH_NUMBERS[unit_kind.replace('.',
                    '')]
            unit_remainder = unit_raw[len(unit_kind):].upper()
            if unit_remainder[1:4] == '000':
                unit_remainder = ' %s' % unit_remainder[4:]
            #
            if len(result_list[5].split(' ')) == 2 and \
                    result_list[5].split(' ')[0].upper() == \
                    result_list[5].split(' ')[1].upper():
                unit_formatted = [
                    unit_type.capitalize(),
                    ' unit'
                ]
            else:
                unit_formatted = [
                    unit_type.capitalize(),
                    unit_remainder
                ]
            formatted_address.append("".join(item for item in unit_formatted))
        else:
            pass

    # Now format the last line of the address (we won't make a fourth line for
    # country), complete with the city, state and ZIP code (if given).
    formatted_city_state_line = []

    # If the address has a city name listed, check if it's listed as a style
    # exception to the normal rules. If so return it in the local style; if not
    # capitalize each word of it and return that.
    if result_list[6] != '':
        formatted_city_state_line.append(titlecase(result_list[6]) + ",")

    # If the address has a state listed, look for it first in the abbreviations
    # to state names in Associated Press style crosswalk. If the abbreviation
    # is not there, uppercase it as given and return it instead.
    if result_list[7] != '':
        if result_list[7].lower() in CROSSWALK.keys():
            state_match = CROSSWALK[result_list[7]]
            formatted_city_state_line.append(state_match['ap'])
            state_full = state_match['name']
        else:
            formatted_city_state_line.append(result_list[7].upper())
            state_full = result_list[7].upper()

    # If the address has a ZIP code, add that to the formatted line.
    if result_list[8] != '':
        formatted_city_state_line.append(result_list[8])

    # Concatenate the last line of the address and append it to the
    # formatted_address array.
    formatted_address.append(" ".join(
            item for item in formatted_city_state_line
        ))

    if formatted_highway:
        highway_info = {
            'hwy_fmt': formatted_highway,
            'state_full': state_full,
        }
        formatted_address[0] = formatted_address[0] % highway_info

    return formatted_address
Ejemplo n.º 2
0
def normalize_intersection(intersection_raw, db_alias=None):
    """
    Given a database alias (as set forth in Django's settings) and an
    intersection (as a string) to normalize, connects to that database,
    fires a normalization query and returns the resultant normalized
    address.

    Takes one required and one optional argument:
        *   intersection_raw: the address to be normalized.
        -   db_alias: the name given to the geocoder's database in your
                settings.py file. Defaults to None (though a null value
                will be overridden in lines 39-44).

    Returns a string representing the normalized address, if results
    were generated. Otherwise returns a value of None.
    """
    # Unless otherwise specified, the database alias will be that which has
    # been specified in settings.GEOCODER_DB_ALIAS (or, failing that, the
    # string 'geocoder').
    if not db_alias:
        db_alias = getattr(
                settings,
                'GEOCODER_DB_ALIAS',
                'geocoder'
            )

    # Find the match. If the intersection string includes an '@' symbol, look
    # no further and start processing the input as an intersection. Else if the
    # string has a match for one of the other union symbols, parse to find
    # where one road ends and the other begins.
    certain_match = ALWAYS_DENOTES_INTERSECTION_RE.search(
            intersection_raw.upper()
        )
    possible_match = SOMETIMES_DENOTES_INTERSECTION_RE.search(
            intersection_raw.upper()
        )

    # Split the first road and the rest of the address according to whether
    # there was a sure match or only a possible one.
    # If there wasn't any match at all raise an input error.
    if certain_match:
        first_road = intersection_raw[:certain_match.start()].strip(' ')
        remainder = intersection_raw[certain_match.end():].strip(' ')
    elif possible_match:
        first_road = intersection_raw[:possible_match.start()].strip(' ')
        remainder = intersection_raw[possible_match.end():].strip(' ')
    else:
        raise IntersectionInputError("Missing second address to be parsed.")

    # Now we have to separate the second street from city, state and ZIP code
    # information (if any of this was supplied at all). The best way to do this
    # is to attach a fake street address number and run this through PostGIS'
    # normalize_address function.
    spoofed_street_address = " ".join([
            "1217",
            remainder
        ])
    parsable = normalize_address(
            spoofed_street_address,
            db_alias
        )

    # Everything hinges on the state. So if there was a state specified to the
    # normalizer (and if it's a legitimate American state), proceed to build
    # the rest of the query according to that information. If not, add the
    # default state (WI in our case) into the data.
    address_components = {
        'city': None,
        'state': None,
        'zip': None
    }
    if parsable:
        # Before processing the parsed result, remove all quotes from each
        # address component.
        parsable = [item.replace('"', '') for item in parsable]
        if parsable[7] and parsable[7] in [item[0] for item in US_STATES]:
            address_components['state'] = parsable[7]
        else:
            address_components['state'] = getattr(
                        settings,
                        'DEFAULT_GEOCODER_STATE',
                        'Wisconsin'
                    )

        # If there was a ZIP code specified, add that to the address components
        # list too.
        if parsable[8]:
            address_components['zip'] = parsable[8]

        # Finally, if there was a city specified add that to the address
        # components as well.
        if parsable[6]:
            address_components['city'] = parsable[6]

        second_road = " ".join([part for part in parsable[1:3] if part != ''])

        # Now we'll go back and normalize the first street name the same way.
        spoofed_first = " ".join([
                "1217",
                first_road,
                'Milwaukee',  # Note: it doesn't matter the city & state we use.
                'WI'  # The function just needs something in these spots.
            ])
        first_road_parsable = normalize_address(
                spoofed_first,
                db_alias
            )
        if first_road_parsable:
            first_road = " ".join([part for part in first_road_parsable[1:3] \
                                    if part != ''])
            if first_road == '':
                raise IntersectionInputError('Invalid first road value.')
        else:
            raise IntersectionInputError('Invalid first road value.')

        address_components['first_road'] = first_road
        address_components['second_road'] = second_road

        return address_components

    else:
        raise IntersectionInputError('Invalid second road or city/state/ZIP ' \
                                    'value.')