def format_result_in_ap_style(address, db_alias=None, street_custom_styles=None, additional_street_styles=None): """ Given a string representing a street address, parses that location and converts it to Associated Press style. Eventually this will be joined by a format_result_in_usps_style function as well. Takes one required and two optional arguments: * address: a string representing the address to be properly formatted. Must have either city and state or ZIP specified. - db_alias: the name given to the geocoder's database in your settings.py file. Defaults to None (though a null value will be overridden in lines 240-245). - street_custom_styles: A dict of dicts, with first-level keys specifying a city and second-level keys specifying the names of streets in this city that should be changed before they are returned from the formatter. The second- level keys will be replaced with the respective values. Optional, and defaults to None. - additional_street_styles: A dict of dicts, with first-level keys specifying city and second-level keys specifying the names of streets in this city that should be changed before they are passed into the geocoder. The second- level keys will be replaced with the respective values. Optional, and defaults to None. Returns a string representing the address converted to follow style. """ # Unless otherwise specified, the database alias will be that which has # been specified in settings.GEOCODER_DB_ALIAS (or, failing that, the # string 'geocoder'). if not db_alias: db_alias = getattr( settings, 'GEOCODER_DB_ALIAS', 'geocoder' ) # Remove all periods and commas from the address. address = address.replace(',', '').replace('.', '') # If the first part of the address matches the bizarre out-state # Wisconsin address formula (exempli gratia, N109W1711 Ava Circle), # remove that address and substitute in a bogus number instead. # We'll format the actual address and substitute it in later. address_first_part = address.split(' ')[0] address_first_part_match = re.search( r'([N|E|W|S])(\d+)([N|E|W|S])(\d+)', address_first_part.upper(), flags=re.IGNORECASE ) if address_first_part_match: address_first_part_formatted = "%s%s-%s%s" % ( address_first_part_match.group(1).upper(), address_first_part_match.group(2), address_first_part_match.group(3).upper(), address_first_part_match.group(4) ) address = '171717' + address.split(address_first_part)[1] # First, normalize this address using PostGIS. result_list = normalize_address( address, db_alias, additional_street_styles ) if not result_list: return None if address_first_part_match: if result_list[0] == '171717': result_list[0] = address_first_part_formatted formatted_address = [] formatted_first_line = [] # For our additional processing, check if there were non-numeric characters # in the street address. If so, replace their normalized value with the raw # street number. Else use the normalized value. if result_list[0] != '': if not address.split(' ')[0].isdigit(): formatted_first_line.append( address.split(' ')[0].strip(' ').upper() ) else: formatted_first_line.append(result_list[0]) # Now check to see if the street has a predirection. If so convert its # value to Associated Press style. if result_list[1] != '': formatted_first_line.append(ap_style.PREDIRECTIONS_TO_AP_STYLE[ result_list[1].lower()].upper()) # Now remove any quotes that may have been added to the street name, and # see if it matches one of the streets for which we have a custom style. If # so, return this custom street name in lieu of the one from the geocoder. # Else return the geocoded result in titlecase. if result_list[2] != '' and result_list[3].lower() not \ in HIGHWAYS_TO_STYLE.keys(): if street_custom_styles: if result_list[6].lower() in street_custom_styles.keys() and \ result_list[2].strip('"').lower() in street_custom_styles[ result_list[6].lower() ].keys(): formatted_first_line.append( street_custom_styles[ result_list[6].lower() ][result_list[2].strip('"').lower()]) else: formatted_first_line.append(titlecase( result_list[2].strip('"') )) else: formatted_first_line.append(titlecase( result_list[2].strip('"') )) # Next match roads to their corrrect abbreviations (or non-abbreviations) # according to Associated Press style. formatted_highway = None if result_list[3] != '': if result_list[3].lower() in street_suffixes.keys() and \ street_suffixes[result_list[3].lower()] \ in ap_style.SUFFIXES_TO_AP_STYLE.keys(): formatted_first_line.append( ap_style.SUFFIXES_TO_AP_STYLE[ street_suffixes[result_list[3].lower()] ].capitalize() ) elif result_list[3].lower() in HIGHWAYS_TO_STYLE.keys(): if result_list[3].lower() in HIGHWAYS_STATE_APPEND: if result_list[2] != '': formatted_first_line.append("%s%s" % ( "%(state_full)s %(hwy_fmt)s", result_list[2].strip('"').upper() )) formatted_highway = " ".join(HIGHWAYS_TO_STYLE[ result_list[3].lower() ].split(' ')[1:]) else: formatted_first_line.append("%(state_full)s %(hwy_fmt)s") formatted_highway = HIGHWAYS_TO_STYLE[ result_list[3].lower() ] else: if result_list[2] != '': formatted_first_line.append('%s%s' % ( HIGHWAYS_TO_STYLE[result_list[3].lower()], result_list[2].strip('"').upper() )) else: formatted_first_line.append( HIGHWAYS_TO_STYLE[result_list[3].lower()] ) # Now append the post-directional suffix, if one exists. Not sure about the # style guidelines on this, so I'll defer doing anything too fancy until I # know for sure. ~AJV if result_list[4] != '': formatted_first_line.append(result_list[4]) # Now append the entire first line of the address to an array of the whole # address by line. formatted_address.append(" ".join(item for item in formatted_first_line)) # Next process the unit number (if one was given). if result_list[5] != '': unit_raw = result_list[5] unit_kind = unit_raw.split(' ')[0].lower() if unit_kind.replace('.', '') in SECONDARY_UNITS_WITH_NUMBERS.keys(): unit_type = SECONDARY_UNITS_WITH_NUMBERS[unit_kind.replace('.', '')] unit_remainder = unit_raw[len(unit_kind):].upper() if unit_remainder[1:4] == '000': unit_remainder = ' %s' % unit_remainder[4:] # if len(result_list[5].split(' ')) == 2 and \ result_list[5].split(' ')[0].upper() == \ result_list[5].split(' ')[1].upper(): unit_formatted = [ unit_type.capitalize(), ' unit' ] else: unit_formatted = [ unit_type.capitalize(), unit_remainder ] formatted_address.append("".join(item for item in unit_formatted)) else: pass # Now format the last line of the address (we won't make a fourth line for # country), complete with the city, state and ZIP code (if given). formatted_city_state_line = [] # If the address has a city name listed, check if it's listed as a style # exception to the normal rules. If so return it in the local style; if not # capitalize each word of it and return that. if result_list[6] != '': formatted_city_state_line.append(titlecase(result_list[6]) + ",") # If the address has a state listed, look for it first in the abbreviations # to state names in Associated Press style crosswalk. If the abbreviation # is not there, uppercase it as given and return it instead. if result_list[7] != '': if result_list[7].lower() in CROSSWALK.keys(): state_match = CROSSWALK[result_list[7]] formatted_city_state_line.append(state_match['ap']) state_full = state_match['name'] else: formatted_city_state_line.append(result_list[7].upper()) state_full = result_list[7].upper() # If the address has a ZIP code, add that to the formatted line. if result_list[8] != '': formatted_city_state_line.append(result_list[8]) # Concatenate the last line of the address and append it to the # formatted_address array. formatted_address.append(" ".join( item for item in formatted_city_state_line )) if formatted_highway: highway_info = { 'hwy_fmt': formatted_highway, 'state_full': state_full, } formatted_address[0] = formatted_address[0] % highway_info return formatted_address
def normalize_intersection(intersection_raw, db_alias=None): """ Given a database alias (as set forth in Django's settings) and an intersection (as a string) to normalize, connects to that database, fires a normalization query and returns the resultant normalized address. Takes one required and one optional argument: * intersection_raw: the address to be normalized. - db_alias: the name given to the geocoder's database in your settings.py file. Defaults to None (though a null value will be overridden in lines 39-44). Returns a string representing the normalized address, if results were generated. Otherwise returns a value of None. """ # Unless otherwise specified, the database alias will be that which has # been specified in settings.GEOCODER_DB_ALIAS (or, failing that, the # string 'geocoder'). if not db_alias: db_alias = getattr( settings, 'GEOCODER_DB_ALIAS', 'geocoder' ) # Find the match. If the intersection string includes an '@' symbol, look # no further and start processing the input as an intersection. Else if the # string has a match for one of the other union symbols, parse to find # where one road ends and the other begins. certain_match = ALWAYS_DENOTES_INTERSECTION_RE.search( intersection_raw.upper() ) possible_match = SOMETIMES_DENOTES_INTERSECTION_RE.search( intersection_raw.upper() ) # Split the first road and the rest of the address according to whether # there was a sure match or only a possible one. # If there wasn't any match at all raise an input error. if certain_match: first_road = intersection_raw[:certain_match.start()].strip(' ') remainder = intersection_raw[certain_match.end():].strip(' ') elif possible_match: first_road = intersection_raw[:possible_match.start()].strip(' ') remainder = intersection_raw[possible_match.end():].strip(' ') else: raise IntersectionInputError("Missing second address to be parsed.") # Now we have to separate the second street from city, state and ZIP code # information (if any of this was supplied at all). The best way to do this # is to attach a fake street address number and run this through PostGIS' # normalize_address function. spoofed_street_address = " ".join([ "1217", remainder ]) parsable = normalize_address( spoofed_street_address, db_alias ) # Everything hinges on the state. So if there was a state specified to the # normalizer (and if it's a legitimate American state), proceed to build # the rest of the query according to that information. If not, add the # default state (WI in our case) into the data. address_components = { 'city': None, 'state': None, 'zip': None } if parsable: # Before processing the parsed result, remove all quotes from each # address component. parsable = [item.replace('"', '') for item in parsable] if parsable[7] and parsable[7] in [item[0] for item in US_STATES]: address_components['state'] = parsable[7] else: address_components['state'] = getattr( settings, 'DEFAULT_GEOCODER_STATE', 'Wisconsin' ) # If there was a ZIP code specified, add that to the address components # list too. if parsable[8]: address_components['zip'] = parsable[8] # Finally, if there was a city specified add that to the address # components as well. if parsable[6]: address_components['city'] = parsable[6] second_road = " ".join([part for part in parsable[1:3] if part != '']) # Now we'll go back and normalize the first street name the same way. spoofed_first = " ".join([ "1217", first_road, 'Milwaukee', # Note: it doesn't matter the city & state we use. 'WI' # The function just needs something in these spots. ]) first_road_parsable = normalize_address( spoofed_first, db_alias ) if first_road_parsable: first_road = " ".join([part for part in first_road_parsable[1:3] \ if part != '']) if first_road == '': raise IntersectionInputError('Invalid first road value.') else: raise IntersectionInputError('Invalid first road value.') address_components['first_road'] = first_road address_components['second_road'] = second_road return address_components else: raise IntersectionInputError('Invalid second road or city/state/ZIP ' \ 'value.')