def test_basic(self): from pprint import pprint from databundles.geo.geocoder import Geocoder g = Geocoder(self.bundle.library) filename = "good_segments" f_input = os.path.join(os.path.dirname(__file__),'support',filename + '.txt') f_output = os.path.join(os.path.dirname(__file__),'support',filename + '.out.csv') with open(f_input) as f: for line in f: addr = line.strip() r = g.geocode_address(addr) print "==", addr print "->",r if r: print " ", r['coded_address']
def x_test_crime(self): from databundles.geo.address import Parser from databundles.geo.geocoder import Geocoder import csv g = Geocoder(self.bundle.library, addresses_ds='geoaddresses') _,incidents = self.bundle.library.dep('crime') log_rate = self.bundle.init_log_rate(1000) p = Parser() with open(self.bundle.filesystem.path('errors.csv'), 'wb') as f: writer = csv.writer(f) writer.writerow(['code','arg','block_address','city','number','dir','street','type']) multi_cities = 0.0 multi_addr = 0.0 no_response = 0.0 for i, inct in enumerate(incidents.query("SELECT * FROM incidents limit 100000")): row = dict(inct) candidates = g.geocode_semiblock(row['blockaddress'], row['city'], 'CA') if len(candidates) == 0: no_response += 1 self.write_error_row('norsp',0, p,writer,row['blockaddress'], row['city']) continue elif len(candidates) != 1: multi_cities += 1 self.write_error_row('mcities',len(candidates), p,writer,row['blockaddress'], row['city']) continue s = candidates.popitem()[1] if len(s) > 3: self.write_error_row('maddr',len(s), p,writer,row['blockaddress'], row['city']) multi_addr +=1 if i > 0: log_rate("{} cities={}, {}% addr={}, {}% nrp={}, {}%".format(i, multi_cities, int(multi_cities/i * 100), multi_addr, int(multi_addr/i * 100), no_response, int(no_response/i * 100) ))
def test_geo(self): from databundles.geo.geocoder import Geocoder g = Geocoder(self.library, addresses_ds='geoaddresses') p = self.partitions.find(table='businesses') errorp = self.partitions.find_or_new(table='businesses', grain='errors') ok = 0 errors = 0 with errorp.database.inserter() as ins: for row in p.query('SELECT * FROM businesses'): candidates = g.geocode_address(row['address'], row['city'], 'CA') if len(candidates) != 1 : #print "('{0}', (None, '{0}','gln')),".format(row['address']) errors += 1 else: ok += 1 print len(candidates), ok, errors, int(float(ok)/(ok+errors) * 100) ins.insert(row)