class Test(TestBase): def setUp(self): self.copy_or_build_bundle() self.bundle = Bundle() def tearDown(self): pass def test_basic(self): from ambry.geo.geocoder import Geocoder g = Geocoder(self.bundle.library) filename = "good_segments" f_input = os.path.join(os.path.dirname(__file__), '../support',filename + '.txt') f_output = os.path.join(os.path.dirname(__file__), '../support',filename + '.out.csv') with open(f_input) as f: for line in f: addr = line.strip() r = g.geocode_address(addr) print "==", addr print "->",r if r: print " ", r['codedaddress'] def write_error_row(self, code, arg, p, w, address, city): try: ps = p.parse(address) except: ps = False if not ps: row = [code, arg, address, city] else: row = [code, arg, address, city, ps.number, ps.street_direction, ps.street_name, ps.street_type] w.writerow(row) def x_test_crime(self): from ambry.geo.address import Parser from ambry.geo.geocoder import Geocoder import csv g = Geocoder(self.bundle.library, addresses_ds='geoaddresses') _,incidents = self.bundle.library.dep('crime') log_rate = self.bundle.init_log_rate(1000) p = Parser() with open(self.bundle.filesystem.path('errors.csv'), 'wb') as f: writer = csv.writer(f) writer.writerow(['code','arg','block_address','city','number','dir','street','type']) multi_cities = 0.0 multi_addr = 0.0 no_response = 0.0 for i, inct in enumerate(incidents.query("SELECT * FROM incidents limit 100000")): row = dict(inct) candidates = g.geocode_semiblock(row['blockaddress'], row['city'], 'CA') if len(candidates) == 0: no_response += 1 self.write_error_row('norsp',0, p,writer,row['blockaddress'], row['city']) continue elif len(candidates) != 1: multi_cities += 1 self.write_error_row('mcities',len(candidates), p,writer,row['blockaddress'], row['city']) continue s = candidates.popitem()[1] if len(s) > 3: self.write_error_row('maddr',len(s), p,writer,row['blockaddress'], row['city']) multi_addr +=1 if i > 0: log_rate("{} cities={}, {}% addr={}, {}% nrp={}, {}%".format(i, multi_cities, int(multi_cities/i * 100), multi_addr, int(multi_addr/i * 100), no_response, int(no_response/i * 100) )) def test_place_coder(self): from ambry.geo.geocoder import PlaceCoder pc = PlaceCoder(self.bundle.library) places = self.bundle.library.dep('places').partition for place in places.rows: try: in_places = [ x['name'] for x in pc.lookup_wgs(place['lat'], place['lon'])] except ValueError: continue # Some of the centroids aren't in the regions, since there are complicated region # shapes, and some cities hold parcels in the east county. if not place['name'] in in_places: print place['type'], place['name'], in_places
class Test(TestBase): def setUp(self): import testbundle.bundle self.bundle_dir = os.path.dirname(testbundle.bundle.__file__) self.rc = get_runconfig((os.path.join(self.bundle_dir,'warehouse-test-config.yaml'), os.path.join(self.bundle_dir,'bundle.yaml'))) self.copy_or_build_bundle() self.bundle = Bundle() print "Deleting: {}".format(self.rc.group('filesystem').root_dir) databundles.util.rm_rf(self.rc.group('filesystem').root_dir) def tearDown(self): pass def resolver(self,name): if name == self.bundle.identity.name or name == self.bundle.identity.vname: return self.bundle else: return False class Resolver(object): def get(self,name): if name == self.bundle.identity.name or name == self.bundle.identity.vname: return self.bundle else: return False def get_ref(self,name): pass def progress_cb(self, lr, type_,name,n): if n: lr("{} {}: {}".format(type, name, n)) else: self.bundle.log("{} {}".format(type_, name)) def test_create(self): from databundles.warehouse import new_warehouse w = new_warehouse(self.rc.warehouse('postgres')) print "Re-create database" w.database.enable_delete = True w.resolver = lambda name: self.resolver(name) lr = self.bundle.init_log_rate(10000) w.progress_cb = lambda type_,name,n: self.progress_cb(lr, type_,name,n) try: w.drop() except: pass w.create() w.library.create() w.install(self.bundle) w.create_table(self.bundle.dataset.vid, "ttwo") def x_test_install(self): def resolver(name): if name == self.bundle.identity.name or name == self.bundle.identity.vname: return self.bundle else: return False def progress_cb(lr, type,name,n): if n: lr("{} {}: {}".format(type, name, n)) else: self.bundle.log("{} {}".format(type, name)) from databundles.warehouse import new_warehouse from functools import partial print "Getting warehouse" w = new_warehouse(self.rc.warehouse('postgres')) print "Re-create database" w.database.enable_delete = True w.resolver = resolver w.progress_cb = progress_cb try: w.drop() except: pass w.create() ps = self.bundle.partitions.all print "{} partitions".format(len(ps)) for p in self.bundle.partitions: lr = self.bundle.init_log_rate(10000) w.install(p, progress_cb = partial(progress_cb, lr) ) self.assertTrue(w.has(self.bundle.identity.vname)) for p in self.bundle.partitions: self.assertTrue(w.has(p.identity.vname)) for p in self.bundle.partitions: w.remove(p.identity.vname) print w.get(self.bundle.identity.name) print w.get(self.bundle.identity.vname) print w.get(self.bundle.identity.id_) w.install(self.bundle) print w.get(self.bundle.identity.name) print w.get(self.bundle.identity.vname) print w.get(self.bundle.identity.id_) for p in self.bundle.partitions: lr = self.bundle.init_log_rate(10000) w.install(p, progress_cb = partial(progress_cb, lr))
class Test(TestBase): def setUp(self): import testbundle.bundle from ambry.run import RunConfig self.bundle_dir = os.path.dirname(testbundle.bundle.__file__) self.rc = get_runconfig((os.path.join(self.bundle_dir,'warehouse-test-config.yaml'), os.path.join(self.bundle_dir,'bundle.yaml'), RunConfig.USER_ACCOUNTS)) self.copy_or_build_bundle() self.bundle = Bundle() print "Deleting: {}".format(self.rc.group('filesystem').root_dir) ambry.util.rm_rf(self.rc.group('filesystem').root_dir) def tearDown(self): pass def resolver(self,name): if name == self.bundle.identity.name or name == self.bundle.identity.vname: return self.bundle else: return False def get_library(self, name='default'): """Clear out the database before the test run""" from ambry.library import new_library config = self.rc.library(name) l = new_library(config, reset=True) l.database.enable_delete = True l.database.drop() l.database.create() return l def get_warehouse(self, l, name): from ambry.util import get_logger from ambry.warehouse import new_warehouse w = new_warehouse(self.rc.warehouse(name), l) w.logger = get_logger('unit_test') lr = self.bundle.init_log_rate(10000) w.logger = TestLogger(lr) w.database.enable_delete = True w.database.delete() w.create() return w def _test_local_install(self, name): l = self.get_library('local') l.put_bundle(self.bundle) w = self.get_warehouse(l, name) print "Warehouse: ", w.database.dsn print "Library: ", l.database.dsn w.install("source-dataset-subset-variation-tone-0.0.1") w.install("source-dataset-subset-variation-tthree-0.0.1") w.install("source-dataset-subset-variation-geot1-geo-0.0.1") w = self.get_warehouse(l, 'spatialite') print "WAREHOUSE: ", w.database.dsn w.install("source-dataset-subset-variation-tone-0.0.1") w.install("source-dataset-subset-variation-tthree-0.0.1") w.install("source-dataset-subset-variation-geot1-geo-0.0.1") def test_local_sqlite_install(self): self._test_local_install('sqlite') def test_local_postgres_install(self): self._test_local_install('postgres1') def _test_remote_install(self, name): self.start_server(self.rc.library('server')) l = self.get_library('client') l.put_bundle(self.bundle) w = self.get_warehouse(l, name) print "WAREHOUSE: ", w.database.dsn w.install("source-dataset-subset-variation-tone-0.0.1") w.install("source-dataset-subset-variation-tthree-0.0.1") w.install("source-dataset-subset-variation-geot1-geo-0.0.1") w = self.get_warehouse(l, 'spatialite') print "WAREHOUSE: ", w.database.dsn w.install("source-dataset-subset-variation-tone-0.0.1") w.install("source-dataset-subset-variation-tthree-0.0.1") w.install("source-dataset-subset-variation-geot1-geo-0.0.1") def test_remote_sqlite_install(self): self._test_remote_install('sqlite') def test_remote_postgres_install(self): self._test_remote_install('postgres1') def test_manifest(self): from ambry.warehouse.manifest import Manifest m = Manifest(""" First Line of documentation partitions: part1 # Comment part2 # Comment views: create view foobar1 as one two three; create view foobar2 as one two three; documentation: Foo Doc views: create view foobar3 as one two three; doc: More Documentation sql:driver1|driver2 one two three sql:driver1 four five sql:driver2 seven eight """) for view in m.views: print "view", view for partition in m.partitions: print 'partition', partition print 'doc', m.documentation print '----' print m.sql def x_test_install(self): def resolver(name): if name == self.bundle.identity.name or name == self.bundle.identity.vname: return self.bundle else: return False def progress_cb(lr, type,name,n): if n: lr("{} {}: {}".format(type, name, n)) else: self.bundle.log("{} {}".format(type, name)) from ambry.warehouse import new_warehouse from functools import partial print "Getting warehouse" w = new_warehouse(self.rc.warehouse('postgres')) print "Re-create database" w.database.enable_delete = True w.resolver = resolver w.progress_cb = progress_cb try: w.drop() except: pass w.create() ps = self.bundle.partitions.all print "{} partitions".format(len(ps)) for p in self.bundle.partitions: lr = self.bundle.init_log_rate(10000) w.install(p, progress_cb = partial(progress_cb, lr) ) self.assertTrue(w.has(self.bundle.identity.vname)) for p in self.bundle.partitions: self.assertTrue(w.has(p.identity.vname)) for p in self.bundle.partitions: w.remove(p.identity.vname) print w.get(self.bundle.identity.name) print w.get(self.bundle.identity.vname) print w.get(self.bundle.identity.id_) w.install(self.bundle) print w.get(self.bundle.identity.name) print w.get(self.bundle.identity.vname) print w.get(self.bundle.identity.id_) for p in self.bundle.partitions: lr = self.bundle.init_log_rate(10000) w.install(p, progress_cb = partial(progress_cb, lr))
class Test(TestBase): def setUp(self): self.copy_or_build_bundle() self.bundle = Bundle() def tearDown(self): pass def test_basic(self): from pprint import pprint from databundles.geo.geocoder import Geocoder g = Geocoder(self.bundle.library) filename = "good_segments" f_input = os.path.join(os.path.dirname(__file__),'support',filename + '.txt') f_output = os.path.join(os.path.dirname(__file__),'support',filename + '.out.csv') with open(f_input) as f: for line in f: addr = line.strip() r = g.geocode_address(addr) print "==", addr print "->",r if r: print " ", r['coded_address'] def write_error_row(self, code, arg, p, w, address, city): try: ps = p.parse(address) except: ps = False if not ps: row = [code, arg, address, city] else: row = [code, arg, address, city, ps.number, ps.street_direction, ps.street_name, ps.street_type] w.writerow(row) def x_test_crime(self): from databundles.geo.address import Parser from databundles.geo.geocoder import Geocoder import csv g = Geocoder(self.bundle.library, addresses_ds='geoaddresses') _,incidents = self.bundle.library.dep('crime') log_rate = self.bundle.init_log_rate(1000) p = Parser() with open(self.bundle.filesystem.path('errors.csv'), 'wb') as f: writer = csv.writer(f) writer.writerow(['code','arg','block_address','city','number','dir','street','type']) multi_cities = 0.0 multi_addr = 0.0 no_response = 0.0 for i, inct in enumerate(incidents.query("SELECT * FROM incidents limit 100000")): row = dict(inct) candidates = g.geocode_semiblock(row['blockaddress'], row['city'], 'CA') if len(candidates) == 0: no_response += 1 self.write_error_row('norsp',0, p,writer,row['blockaddress'], row['city']) continue elif len(candidates) != 1: multi_cities += 1 self.write_error_row('mcities',len(candidates), p,writer,row['blockaddress'], row['city']) continue s = candidates.popitem()[1] if len(s) > 3: self.write_error_row('maddr',len(s), p,writer,row['blockaddress'], row['city']) multi_addr +=1 if i > 0: log_rate("{} cities={}, {}% addr={}, {}% nrp={}, {}%".format(i, multi_cities, int(multi_cities/i * 100), multi_addr, int(multi_addr/i * 100), no_response, int(no_response/i * 100) ))