def x_test_errors(self): from ambry.geo.address import Parser import imp import os import csv parser = Parser() bundle = imp.load_source('bundle', '/Users/eric/proj/Bundles/src/civicdata/sandiego.gov/sandiego.gov-businesses-orig/bundle.py') b = bundle.Bundle() p = b.partitions.find(table='businesses', grain='errors') f_output = os.path.join(os.path.dirname(__file__), '../support','business_addresses.out.csv') with open(f_output, 'w') as out: writer = csv.DictWriter(out, self.header) writer.writeheader() for row in p.query("SELECT * FROM businesses"): ps = parser.parse(row['address']) d = ps.as_dict() d['input'] = row['address'].strip() d['output'] = str(ps) writer.writerow(d)
def test_address_files(self): import os from ambry.geo.address import Parser import csv parser = Parser() success = 0 failure = 0 total = 0 filename = "crime_addresses" f_input = os.path.join(os.path.dirname(__file__), '../support',filename + '.txt') f_output = os.path.join(os.path.dirname(__file__), '../support',filename + '.out.csv') with open(f_output, 'w') as out: writer = csv.DictWriter(out, self.header) writer.writeheader() with open(f_input) as f: for line in f: total += 1 print '----' print line.strip() try: ps = parser.parse(line) if not ps: failure += 1 continue except Exception as e: print "ERROR", e failure += 1 continue d = ps.as_dict() d['input'] = line.strip() d['output'] = str(ps) writer.writerow(d) if not ps.city: failure += 1 print d print ps print else: success += 1 print print "total={} success={} failure={} rate={}".format(total, success, failure, round((float(failure)/float(total)*100), 3))
def __init__(self,partition): from ambry.geo.address import Parser self.p = partition self.address_cache = {} self.parser = Parser()
class Geocoder(object): def __init__(self,partition): from ambry.geo.address import Parser self.p = partition self.address_cache = {} self.parser = Parser() def parse_and_code(self, addrstr, city=None, state=None, zip=None): adr = self.parser.parse(addrstr, city=city, state=state, zip=zip) if adr.hash in self.address_cache: address_id = self.address_cache[adr.hash] else: r = self.geocode(**adr.args) if r: address_id = r['address_id'] self.address_cache[adr.hash] = address_id else: self.address_cache[adr.hash] = None address_id = None return address_id, adr def geocode(self, number, name, direction=None, suffix=None, city=None, state=None, zip=None): '''Return a record from the geocoder table. This function expects a partition, p, that holds a table named 'gecoder', of the same structure as used in clarinova.com-geocode-casnd ''' direction = direction if direction else '-' suffix = suffix if suffix else '-' city = city if city else '-' zip = zip if zip else -1 try: zip = int(zip) except: zip = -1 q = """ SELECT *, ( CASE WHEN city = :city THEN 10 ELSE 0 END + CASE WHEN zip = :zip THEN 10 ELSE 0 END + CASE WHEN suffix = :suffix THEN 10 ELSE 0 END ) AS score FROM geocoder WHERE name = :name AND direction = :direction AND score >= 20 AND number BETWEEN (:number-100) AND (:number+100) ORDER BY ABS(number - :number) LIMIT 1; """ return self.p.query(q, number=number, name=name, direction=direction, suffix=suffix, city=city.title(), state=state.upper(), zip=int(zip)).first()