예제 #1
0
 def test_basic(self):
     from pprint import pprint
     from databundles.geo.geocoder import Geocoder               
     g = Geocoder(self.bundle.library)               
                  
     filename = "good_segments"
     f_input =  os.path.join(os.path.dirname(__file__),'support',filename + '.txt')
     f_output =  os.path.join(os.path.dirname(__file__),'support',filename + '.out.csv')
                
     with open(f_input) as f:
         for line in f:
             addr = line.strip()
             r  = g.geocode_address(addr)
             print "==", addr
             print "->",r
             if r:
                 print "  ", r['coded_address']
예제 #2
0
    def x_test_crime(self):
        from databundles.geo.address import Parser
        from databundles.geo.geocoder import Geocoder
        import csv
                      
        g = Geocoder(self.bundle.library, addresses_ds='geoaddresses')      
        _,incidents = self.bundle.library.dep('crime')
    
        log_rate = self.bundle.init_log_rate(1000)
    
        p = Parser()

        with open(self.bundle.filesystem.path('errors.csv'), 'wb') as f:
            writer = csv.writer(f)
            
            writer.writerow(['code','arg','block_address','city','number','dir','street','type'])
            
            multi_cities = 0.0
            multi_addr = 0.0
            no_response = 0.0
            for i, inct in enumerate(incidents.query("SELECT * FROM incidents limit 100000")):
                row = dict(inct)
    
                candidates = g.geocode_semiblock(row['blockaddress'], row['city'], 'CA')
    
                if  len(candidates) == 0:
                    no_response += 1
                    self.write_error_row('norsp',0, p,writer,row['blockaddress'], row['city'])
                    continue
                elif  len(candidates) != 1:
                    multi_cities += 1
                    self.write_error_row('mcities',len(candidates), p,writer,row['blockaddress'], row['city'])
                    continue
                  
                s =  candidates.popitem()[1]
     
                if len(s) > 3:
                    self.write_error_row('maddr',len(s), p,writer,row['blockaddress'], row['city'])
                    multi_addr +=1
                
                if i > 0:
                    log_rate("{}  cities={}, {}% addr={}, {}%  nrp={}, {}%".format(i, 
                                                                        multi_cities, int(multi_cities/i * 100), 
                                                                        multi_addr, int(multi_addr/i * 100),
                                                                        no_response, int(no_response/i * 100) ))
예제 #3
0
파일: bundle.py 프로젝트: hsd315/civicdata
 def test_geo(self):
     from databundles.geo.geocoder import Geocoder
     
     g = Geocoder(self.library, addresses_ds='geoaddresses')
     
     p = self.partitions.find(table='businesses')
     
     errorp = self.partitions.find_or_new(table='businesses', grain='errors')
     
     ok = 0
     errors = 0
     with errorp.database.inserter() as ins:
         for row in p.query('SELECT * FROM businesses'):
             candidates = g.geocode_address(row['address'], row['city'], 'CA')
     
             if len(candidates) != 1 :
                 #print "('{0}', (None, '{0}','gln')),".format(row['address'])
                 errors += 1
             else:
                 ok += 1
             
             print len(candidates), ok, errors, int(float(ok)/(ok+errors) * 100)
             ins.insert(row)