def test_verify_zip_both(fake_requests_get): req = """http://production.shippingapis.com/ShippingAPI.dll?API=Verify&XML=%3CAddressValidateRequest+USERID%3D%22foo_id%22%3E%3CAddress+ID%3D%220%22%3E%3CAddress1+%2F%3E%3CAddress2%3E6406+Ivy+Lane%3C%2FAddress2%3E%3CCity%3EGreenbelt%3C%2FCity%3E%3CState%3EMD%3C%2FState%3E%3CZip5%3E20770%3C%2FZip5%3E%3CZip4%3E1441%3C%2FZip4%3E%3C%2FAddress%3E%3C%2FAddressValidateRequest%3E""" res = """<?xml version="1.0"?> <AddressValidateResponse><Address ID="0"><Address2>6406 IVY LN</Address2><City>GREENBELT</City><State>MD</State><Zip5>20770</Zip5><Zip4>1441</Zip4></Address></AddressValidateResponse>""" (fake_requests_get.expects_call() .with_args(req) .returns(fudge.Fake('Response').has_attr(content=res))) address = OrderedDict([ ('address', '6406 Ivy Lane'), ('city', 'Greenbelt'), ('state', 'MD'), ('zip_code', '207701441'), ]) res = verify( 'foo_id', address, ) expected = OrderedDict([ ('address', '6406 IVY LN'), ('city', 'GREENBELT'), ('state', 'MD'), ('zip5', '20770'), ('zip4', '1441'), ]) eq(res, expected)
def test_verify_zip_dash(fake_urlopen): fake_urlopen = fake_urlopen.expects_call() req = """http://production.shippingapis.com/ShippingAPI.dll?API=Verify&XML=%3CAddressValidateRequest+USERID%3D%22foo_id%22%3E%3CAddress+ID%3D%220%22%3E%3CAddress1%2F%3E%3CAddress2%3E6406+Ivy+Lane%3C%2FAddress2%3E%3CCity%3EGreenbelt%3C%2FCity%3E%3CState%3EMD%3C%2FState%3E%3CZip5%3E20770%3C%2FZip5%3E%3CZip4%3E1441%3C%2FZip4%3E%3C%2FAddress%3E%3C%2FAddressValidateRequest%3E""" fake_urlopen = fake_urlopen.with_args(req) res = StringIO(u"""<?xml version="1.0"?> <AddressValidateResponse><Address ID="0"><Address2>6406 IVY LN</Address2><City>GREENBELT</City><State>MD</State><Zip5>20770</Zip5><Zip4>1441</Zip4></Address></AddressValidateResponse>""") fake_urlopen.returns(res) address = OrderedDict([ ('address', '6406 Ivy Lane'), ('city', 'Greenbelt'), ('state', 'MD'), ('zip_code', '20770-1441'), ]) res = verify( 'foo_id', address ) expected = OrderedDict([ ('address', '6406 IVY LN'), ('city', 'GREENBELT'), ('state', 'MD'), ('zip5', '20770'), ('zip4', '1441'), ]) eq(res, expected)
def test_verify_zip_both(fake_urlopen): fake_urlopen = fake_urlopen.expects_call() req = """http://production.shippingapis.com/ShippingAPI.dll?API=Verify&XML=%3CAddressValidateRequest+USERID%3D%22foo_id%22%3E%3CAddress+ID%3D%220%22%3E%3CAddress1%2F%3E%3CAddress2%3E6406+Ivy+Lane%3C%2FAddress2%3E%3CCity%3EGreenbelt%3C%2FCity%3E%3CState%3EMD%3C%2FState%3E%3CZip5%3E20770%3C%2FZip5%3E%3CZip4%3E1441%3C%2FZip4%3E%3C%2FAddress%3E%3C%2FAddressValidateRequest%3E""" fake_urlopen = fake_urlopen.with_args(req) res = StringIO("""<?xml version="1.0"?> <AddressValidateResponse><Address ID="0"><Address2>6406 IVY LN</Address2><City>GREENBELT</City><State>MD</State><Zip5>20770</Zip5><Zip4>1441</Zip4></Address></AddressValidateResponse>""" ) fake_urlopen.returns(res) address = OrderedDict([ ('address', '6406 Ivy Lane'), ('city', 'Greenbelt'), ('state', 'MD'), ('zip_code', '207701441'), ]) res = verify( 'foo_id', address, ) expected = OrderedDict([ ('address', '6406 IVY LN'), ('city', 'GREENBELT'), ('state', 'MD'), ('zip5', '20770'), ('zip4', '1441'), ]) eq(res, expected)
def usps_validate(self): line_1, line_2 = self.mapped_lines key = settings.USPS_API_KEY data = {'address': line_1, 'city': line_2} result = {} try: result = address_information.verify(key, data) except ValueError as e: self.failed(e.message) return if result.get('returntext'): self.matched_partial(result, "(USPS): " + result.get('returntext')) # Good USPS match, partial Google match elif self.status == MAPPED_PARTIAL: self.matched_partial(result, self.message) else: assert (self.status == MAPPED) # Belt and suspenders self.matched(result, "Address is fully matched and is deliverable.")
def usps_validate(self): line_1, line_2 = self.mapped_lines key = settings.USPS_API_KEY data = { 'address': line_1, 'city': line_2 } result = {} try: result = address_information.verify(key, data) except ValueError as e: self.failed(e.message) return if result.get('returntext'): self.matched_partial(result, "(USPS): " + result.get('returntext')) # Good USPS match, partial Google match elif self.status == MAPPED_PARTIAL: self.matched_partial(result, self.message) else: assert (self.status == MAPPED) # Belt and suspenders self.matched(result, "Address is fully matched and is deliverable.")
def normalize_address(address): """Normalize a domestic (US) address""" if address['country'] != 'US': return address, False addr = { 'zip_code': address['postal_code'], 'state': address['state'], 'city': address['city'], 'address': address['line1'] } line2 = address.get('line2') if line2: addr['address_extended'] = line2 try: usps_addr = address_information.verify(current_app.config['USPS_USER_ID'], addr) norm_addr = {} changed = False for k_frm, k_to in USPS_ADDRESS_KEYS.items(): norm_addr[k_to] = usps_addr.get(k_frm) if (norm_addr[k_to] or '').lower() != (address[k_to] or '').lower(): changed = True norm_addr['country'] = 'US' return norm_addr, changed except ValueError: return None, True
def _save_venues( events, events_coll, usps_id, now, ): # Don't waste a call to the USPS API if not events: return venues = [event['facebook']['venue'] for event in events] usps_venues = [ OrderedDict([ ('address', venue['street']), ('city', venue['city']), ('state', venue['state']), ]) for venue in venues ] matches = address_information.verify( usps_id, *usps_venues ) # TODO fugly if len(usps_venues) == 1: matches = [matches] for (event,match) in zip(events,matches): if isinstance(match, ValueError): _mark_as_failed( events_coll=events_coll, event_id=event['_id'], now=now, field='normalization_failed', reason=str(match), ) continue match['country'] = 'US' save = OrderedDict([ ('normalized', match), ('ubernear.normalization_completed', now), ('ubernear.normalization_source', 'usps'), ]) log.debug( 'Storing normalized venue for {event_id}'.format( event_id=event['_id'], ) ) mongo.save_no_replace( events_coll, _id=event['_id'], save=save, )
def gen_standardize_address(addr1, addr2, key, results, usps_key): addr = {'address': addr1, 'city': addr2, 'state': 'NY'} try: result = address_information.verify(usps_key, addr) zip4 = "-{}".format(result['zip4']) if ('zip4' in result) and result['zip4'] else '' results[key] = "{}, {} {} {}{}".format( result['address'], result['city'], result['state'], result['zip5'], zip4) except Exception as e: results[key] = "{}, {}".format(addr1, addr2)
def address_lookup(batch, usps_key): # Form donor addresses logging.info("address_lookup processing batch: {}".format(str(batch))) post_data = [] if batch is None: return [] for row in batch: try: addr1 = row['ContributorAddr1'] if ',' in row['ContributorAddr2']: city = row['ContributorAddr2'].split(',')[0] else: city = row['ContributorAddr2'].split(' ')[0] post_data.append({'address': addr1, 'city': city, 'state': 'NY'}) except Exception as e: logging.error( "Could not append to post_data in address_lookup: {}".format( e)) # Submit batch to API try: recv_data = address_information.verify(usps_key, *post_data) except Exception as e: # There was only one entry in the batch, and it failed logging.error( "Caught exception posting to address_information.verify: {}". format(e)) # Match output = [] for i, row in enumerate(batch): if row is not None: out_dct = row.copy() else: continue # Try and use formatted address try: out_dct['ContributorAddr1'] = recv_data[i]['address'] if isinstance(recv_data[i]['zip5'], int): # So defensive recv_data[i]['zip5'] = "{:0.0f}".format(recv_data[i]['zip5']) out_dct['ContributorAddr2'] = "{}, {} {}".format( recv_data[i]['city'], recv_data[i]['state'], recv_data[i]['zip5']) except Exception as e: # Output from pyusps is Exception not dict, etc. logging.error( "Caught exception building out_dct in address_lookup: {}". format(e)) output.append(out_dct) return output
def test_verify_multiple(fake_requests_get): req = """http://production.shippingapis.com/ShippingAPI.dll?API=Verify&XML=%3CAddressValidateRequest+USERID%3D%22foo_id%22%3E%3CAddress+ID%3D%220%22%3E%3CAddress1+%2F%3E%3CAddress2%3E6406+Ivy+Lane%3C%2FAddress2%3E%3CCity%3EGreenbelt%3C%2FCity%3E%3CState%3EMD%3C%2FState%3E%3CZip5+%2F%3E%3CZip4+%2F%3E%3C%2FAddress%3E%3CAddress+ID%3D%221%22%3E%3CAddress1+%2F%3E%3CAddress2%3E8+Wildwood+Drive%3C%2FAddress2%3E%3CCity%3EOld+Lyme%3C%2FCity%3E%3CState%3ECT%3C%2FState%3E%3CZip5+%2F%3E%3CZip4+%2F%3E%3C%2FAddress%3E%3C%2FAddressValidateRequest%3E""" res = """<?xml version="1.0"?> <AddressValidateResponse><Address ID="0"><Address2>6406 IVY LN</Address2><City>GREENBELT</City><State>MD</State><Zip5>20770</Zip5><Zip4>1441</Zip4></Address><Address ID="1"><Address2>8 WILDWOOD DR</Address2><City>OLD LYME</City><State>CT</State><Zip5>06371</Zip5><Zip4>1844</Zip4></Address></AddressValidateResponse>""" (fake_requests_get.expects_call() .with_args(req) .returns(fudge.Fake('Response').has_attr(content=res))) addresses = [ OrderedDict([ ('address', '6406 Ivy Lane'), ('city', 'Greenbelt'), ('state', 'MD'), ]), OrderedDict([ ('address', '8 Wildwood Drive'), ('city', 'Old Lyme'), ('state', 'CT'), ]), ] res = verify( 'foo_id', *addresses ) expected = [ OrderedDict([ ('address', '6406 IVY LN'), ('city', 'GREENBELT'), ('state', 'MD'), ('zip5', '20770'), ('zip4', '1441'), ]), OrderedDict([ ('address', '8 WILDWOOD DR'), ('city', 'OLD LYME'), ('state', 'CT'), ('zip5', '06371'), ('zip4', '1844'), ]), ] eq(res, expected)
def test_verify_api_address_error_multiple(fake_requests_get): req = """http://production.shippingapis.com/ShippingAPI.dll?API=Verify&XML=%3CAddressValidateRequest+USERID%3D%22foo_id%22%3E%3CAddress+ID%3D%220%22%3E%3CAddress1+%2F%3E%3CAddress2%3E6406+Ivy+Lane%3C%2FAddress2%3E%3CCity%3EGreenbelt%3C%2FCity%3E%3CState%3EMD%3C%2FState%3E%3CZip5+%2F%3E%3CZip4+%2F%3E%3C%2FAddress%3E%3CAddress+ID%3D%221%22%3E%3CAddress1+%2F%3E%3CAddress2%3E8+Wildwood+Drive%3C%2FAddress2%3E%3CCity%3EOld+Lyme%3C%2FCity%3E%3CState%3ENJ%3C%2FState%3E%3CZip5+%2F%3E%3CZip4+%2F%3E%3C%2FAddress%3E%3C%2FAddressValidateRequest%3E""" res = """<?xml version="1.0"?> <AddressValidateResponse><Address ID="0"><Address2>6406 IVY LN</Address2><City>GREENBELT</City><State>MD</State><Zip5>20770</Zip5><Zip4>1441</Zip4></Address><Address ID="1"><Error><Number>-2147219400</Number><Source>API_AddressCleancAddressClean.CleanAddress2;SOLServer.CallAddressDll</Source><Description>Invalid City.</Description><HelpFile></HelpFile><HelpContext>1000440</HelpContext></Error></Address></AddressValidateResponse>""" (fake_requests_get.expects_call() .with_args(req) .returns(fudge.Fake('Response').has_attr(content=res))) addresses = [ OrderedDict([ ('address', '6406 Ivy Lane'), ('city', 'Greenbelt'), ('state', 'MD'), ]), OrderedDict([ ('address', '8 Wildwood Drive'), ('city', 'Old Lyme'), ('state', 'NJ'), ]), ] res = verify( 'foo_id', *addresses ) # eq does not work with exceptions. Process each item manually. eq(len(res), 2) eq( res[0], OrderedDict([ ('address', '6406 IVY LN'), ('city', 'GREENBELT'), ('state', 'MD'), ('zip5', '20770'), ('zip4', '1441'), ]), ) assert_errors_equal( res[1], ValueError('-2147219400: Invalid City.'), )
def test_verify_multiple(fake_urlopen): fake_urlopen = fake_urlopen.expects_call() req = """https://production.shippingapis.com/ShippingAPI.dll?API=Verify&XML=%3CAddressValidateRequest+USERID%3D%22foo_id%22%3E%3CAddress+ID%3D%220%22%3E%3CAddress1%2F%3E%3CAddress2%3E6406+Ivy+Lane%3C%2FAddress2%3E%3CCity%3EGreenbelt%3C%2FCity%3E%3CState%3EMD%3C%2FState%3E%3CZip5%2F%3E%3CZip4%2F%3E%3C%2FAddress%3E%3CAddress+ID%3D%221%22%3E%3CAddress1%2F%3E%3CAddress2%3E8+Wildwood+Drive%3C%2FAddress2%3E%3CCity%3EOld+Lyme%3C%2FCity%3E%3CState%3ECT%3C%2FState%3E%3CZip5%2F%3E%3CZip4%2F%3E%3C%2FAddress%3E%3C%2FAddressValidateRequest%3E""" fake_urlopen = fake_urlopen.with_args(req) res = StringIO(u"""<?xml version="1.0"?> <AddressValidateResponse><Address ID="0"><Address2>6406 IVY LN</Address2><City>GREENBELT</City><State>MD</State><Zip5>20770</Zip5><Zip4>1441</Zip4></Address><Address ID="1"><Address2>8 WILDWOOD DR</Address2><City>OLD LYME</City><State>CT</State><Zip5>06371</Zip5><Zip4>1844</Zip4></Address></AddressValidateResponse>""") fake_urlopen.returns(res) addresses = [ OrderedDict([ ('address', '6406 Ivy Lane'), ('city', 'Greenbelt'), ('state', 'MD'), ]), OrderedDict([ ('address', '8 Wildwood Drive'), ('city', 'Old Lyme'), ('state', 'CT'), ]), ] res = verify( 'foo_id', *addresses ) expected = [ OrderedDict([ ('address', '6406 IVY LN'), ('city', 'GREENBELT'), ('state', 'MD'), ('zip5', '20770'), ('zip4', '1441'), ]), OrderedDict([ ('address', '8 WILDWOOD DR'), ('city', 'OLD LYME'), ('state', 'CT'), ('zip5', '06371'), ('zip4', '1844'), ]), ] eq(res, expected)
def test_verify_api_address_error_multiple(fake_urlopen): fake_urlopen = fake_urlopen.expects_call() req = """https://production.shippingapis.com/ShippingAPI.dll?API=Verify&XML=%3CAddressValidateRequest+USERID%3D%22foo_id%22%3E%3CAddress+ID%3D%220%22%3E%3CAddress1%2F%3E%3CAddress2%3E6406+Ivy+Lane%3C%2FAddress2%3E%3CCity%3EGreenbelt%3C%2FCity%3E%3CState%3EMD%3C%2FState%3E%3CZip5%2F%3E%3CZip4%2F%3E%3C%2FAddress%3E%3CAddress+ID%3D%221%22%3E%3CAddress1%2F%3E%3CAddress2%3E8+Wildwood+Drive%3C%2FAddress2%3E%3CCity%3EOld+Lyme%3C%2FCity%3E%3CState%3ENJ%3C%2FState%3E%3CZip5%2F%3E%3CZip4%2F%3E%3C%2FAddress%3E%3C%2FAddressValidateRequest%3E""" fake_urlopen = fake_urlopen.with_args(req) res = StringIO(u"""<?xml version="1.0"?> <AddressValidateResponse><Address ID="0"><Address2>6406 IVY LN</Address2><City>GREENBELT</City><State>MD</State><Zip5>20770</Zip5><Zip4>1441</Zip4></Address><Address ID="1"><Error><Number>-2147219400</Number><Source>API_AddressCleancAddressClean.CleanAddress2;SOLServer.CallAddressDll</Source><Description>Invalid City.</Description><HelpFile></HelpFile><HelpContext>1000440</HelpContext></Error></Address></AddressValidateResponse>""") fake_urlopen.returns(res) addresses = [ OrderedDict([ ('address', '6406 Ivy Lane'), ('city', 'Greenbelt'), ('state', 'MD'), ]), OrderedDict([ ('address', '8 Wildwood Drive'), ('city', 'Old Lyme'), ('state', 'NJ'), ]), ] res = verify( 'foo_id', *addresses ) # eq does not work with exceptions. Process each item manually. eq(len(res), 2) eq( res[0], OrderedDict([ ('address', '6406 IVY LN'), ('city', 'GREENBELT'), ('state', 'MD'), ('zip5', '20770'), ('zip4', '1441'), ]), ) assert_errors_equal( res[1], ValueError('-2147219400: Invalid City.'), )
def vf_standardize_address(row, results, usps_key): """Used for the NY State Voter File only.""" rhalfcode = '' if pd.isnull(row['RHALFCODE']) else row['RHALFCODE'] raddnumber = '' if pd.isnull(row['RADDNUMBER']) else row['RADDNUMBER'] rpredirection = '' if pd.isnull( row['RPREDIRECTION']) else row['RPREDIRECTION'] rstreetname = '' if pd.isnull(row['RSTREETNAME']) else row['RSTREETNAME'] rpostdirection = '' if pd.isnull( row['RPOSTDIRECTION']) else row['RPOSTDIRECTION'] rapartment = '' if pd.isnull(row['RAPARTMENT']) else row['RAPARTMENT'] if ('APT' in str(row['RAPARTMENT']).upper()) \ or ('UNIT' in str(row['RAPARTMENT']).upper()) \ or (row['RAPARTMENT'] == ''): address = "{} {} {} {} {} {}".format( raddnumber, rhalfcode, rpredirection, rstreetname, rpostdirection, rapartment) else: address = "{} {} {} {} {} APT {}".format( raddnumber, rhalfcode, rpredirection, rstreetname, rpostdirection, rapartment) try: address = address.upper() addr = {'address': address, 'city': row['RCITY'], 'state': 'NY'} result = address_information.verify(usps_key, addr) zip4 = "-{}".format(result['zip4']) if result['zip4'] else '' results[row['SBOEID']] = "{}, {} {} {}{}".format( result['address'], result['city'], result['state'], result['zip5'], zip4) except Exception: results[row['SBOEID']] = address
from pyusps import address_information from csv import reader #import re addresses = list() with open('output.txt', 'w') as f: f.write('City\tState\tAddress\tZip Code\n') with open('addresses.txt', 'r') as f: r = reader(f, delimiter='\t') # Skip the first row. next(r) for row in r: city, state, addr, zip_code = row address = dict([('address', addr), ('city', city), ('state', state), ('zip_code', zip_code)]) try: addresses.append(address_information.verify("033NONE01173",address)) except: print("Check address " + address['address']) print(address) for address in addresses: try: addr = address['address']+"\t"+address['city']+"\t"+address['state']+"\t"+address['zip5']+"-"+address['zip4'] print(addr) open('output.txt', 'a').write(addr + '\n') except: print("There was an error")
def verify_with_usps(self, addresses): ### Needs to be in a try block for the usps verify method to not raise an error try: return address_information.verify(self.usps_id, *addresses) except: return False
def standardize_address(batch, usps_key): # Form voter addresses post_data = [] if batch is None: return [] for row in batch: try: raddnumber = row['RADDNUMBER'].strip() if row['RADDNUMBER'] else "" rstreetname = row['RSTREETNAME'].strip( ) if row['RSTREETNAME'] else "" if row['RAPARTMENT']: if ('APT' in row['RAPARTMENT']) or ( 'UNIT' in row['RAPARTMENT']) or ('PH' in row['RAPARTMENT']): rapartment = row['RAPARTMENT'] else: rapartment = "APT {}".format(row['RAPARTMENT']) else: rapartment = "" post_data.append({ 'address': u" ".join([raddnumber, rstreetname, rapartment]), 'city': row['RCITY'], 'state': 'NY' }) except Exception as e: logging.Info( "Could not form address in standardize_address, error: {}". format(e)) post_data.append(None) # Submit batch to API try: recv_data = address_information.verify(usps_key, *post_data) except Exception as e: logging.error( "Caught exception posting to standardize_address: {}".format(e)) # Match output = [] for i, row in enumerate(batch): if row is not None: out_dct = row.copy() else: continue # Try and use formatted address try: out_dct['voter_addr1'] = recv_data[i]['address'] if isinstance(recv_data[i]['zip5'], int): # So defensive recv_data[i]['zip5'] = "{:0.0f}".format(recv_data[i]['zip5']) out_dct['voter_addr2'] = "{}, {} {}".format( recv_data[i]['city'], recv_data[i]['state'], recv_data[i]['zip5']) except Exception as e: # Output from pyusps is Exception not dict, etc.; fall back on # constructed string try: out_dct['voter_addr1'] = post_data[i]['address'] except Exception as e: # e.g. because post_data[i] is None out_dct['voter_addr1'] = None out_dct['voter_addr2'] = "{}, NY {}".format( row['RCITY'], row['RZIP5']) # Form match string, whatever happens out_dct['match_string'] = "{} {}, {}, {}".format( out_dct['LASTNAME'].strip(), out_dct['FIRSTNAME'].strip(), out_dct['voter_addr1'], out_dct['voter_addr2']) output.append(out_dct) return output
def main(): parser = optparse.OptionParser( usage='%prog [OPTS]', ) parser.add_option( '-v', '--verbose', help='Verbose mode [default %default]', action="store_true", dest="verbose" ) parser.add_option( '--csv', help='Path to the CSV file containing the places to import', metavar='PATH', ) parser.add_option( '--config', help=('Path to the config file with information on how to ' 'import places' ), metavar='PATH', ) parser.add_option( '--db-config', help=('Path the to file with information on how to ' 'retrieve and store data in the database' ), metavar='PATH', ) parser.set_defaults( verbose=False, ) options, args = parser.parse_args() if args: parser.error('Wrong number of arguments.') if options.csv is None: parser.error('Missing option --csv=.') if options.config is None: parser.error('Missing option --config=.') if options.db_config is None: parser.error('Missing option --db-config=.') logging.basicConfig( level=logging.DEBUG if options.verbose else logging.INFO, format='%(asctime)s.%(msecs)03d %(name)s: %(levelname)s: %(message)s', datefmt='%Y-%m-%dT%H:%M:%S', ) places_csv = absolute_path(options.csv) config = config_parser(options.config) coll = collections(options.db_config) places_coll = coll['places-collection'] usps_id = config.get('usps', 'user_id') delimiter = config.get('csv', 'delimiter') delimiter = delimiter.decode('string-escape') fieldnames = [ 'id', 'name', 'address', 'address_extended', 'po_box', 'locality', 'region', 'country', 'postcode', 'tel', 'fax', 'category', 'website', 'email', 'latitude', 'longitude', 'status', ] log.info('Start...') with open(places_csv, 'rb') as places_fp: places = csv.DictReader( places_fp, delimiter=delimiter, fieldnames=fieldnames, ) for place in places: # Don't store empty fields save = defaultdict(dict) for k,v in place.iteritems(): if v != '': save['info'][k] = v try: lat = float(save['info']['latitude']) lng = float(save['info']['longitude']) except (KeyError, ValueError): log.debug( 'Did not find a valid latitude and longitude for place ' '{_id}'.format( _id=save['info']['id'], ) ) else: save['info']['latitude'] = lat save['info']['longitude'] = lng # Coordinates are always stored in the form [lng,lat], # in that order. Anything else might result in incorrect # MongoDB Geospatial queries. save['ubernear.location'] = [lng, lat] error_msg = ('Bad coordinates (lng,lat) {coord} for id ' '{_id}' ) error_msg = error_msg.format( coord=(lng, lat), _id=save['info']['id'] ) if (lng < -180 or lng >= 180) or (lat < -90 or lat > 90): log.error(error_msg) del save['info']['latitude'] del save['info']['longitude'] del save['ubernear.location'] if 'address' not in save['info']: log.error( 'Found place {_id} with no address information. ' 'Skipping'.format( _id=save['info']['id'], ) ) continue match = dict([ ('address', save['info']['address']), ('city', save['info']['locality']), ('state', save['info']['region']), ('zipcode', save['info']['postcode']), ]) if 'address_extended' in save['info']: match['address_extended'] = save['info']['address_extended'] try: norm = address_information.verify(usps_id, match) except: log.error( 'The USPS API could not find an address for place ' '{_id}'.format( _id=save['info']['id'], ) ) else: norm['name'] = save['info']['name'].upper() norm['country'] = 'US' save['normalized'] = norm save['ubernear.normalization_source'] = 'usps' save['ubernear.source'] = 'factual' mongo.save_no_replace( places_coll, _id=save['info']['id'], save=save, ) indices = [ {'ubernear.location': pymongo.GEO2D}, {'ubernear.last_checked': pymongo.ASCENDING}, ] mongo.create_indices( collection=places_coll, indices=indices, ) log.info('End')