def add_geos(table): geocoder = Geocoder() terms_db = TermsDB() table_coords = [] total = len(table) for index, row in table.iterrows(): row_terms = [] for cell in row: if type(cell) is str: canonical = canonical_form(cell) terms = terms_db.search(cell, canonical) if terms: row_terms = row_terms + terms # for term in terms: # # print(term) # row_coords.append(geocoder.geocode(term)) # if geo: # geo.sort(key=lambda x: x[1]) # print(row_coords) # if not geo and cell not in EXC: # a[cell] = 1 gt = GeoEntity(row_terms) gt.geocode(geocoder) table_coords.append(gt) # print progress sys.stdout.write("\r %s / %s" % (str(index), total)) sys.stdout.flush() print("") geocoder.close() return pd.concat( [table, pd.Series(table_coords, name="geoentity")], axis=1)
def __init__(self, data_folder="data", terms_folder="terms"): self.cache = shelve.open(os.path.join(data_folder, "cache.db")) self.terms_db = TermsDB(terms_folder) # Coords limits for geolocation # bot left top right self.limits = (-47, -24.05, -46.30, -23.35) self.regions = None self.osm = geopy.Nominatim(view_box=self.limits) self.gm = geopy.GoogleV3() self.server_options = { "osm": self.geocode_osm, "gm": self.geocode_gm, } self.shapefy_regions(os.path.join(data_folder, "subprefeituras.geojson"))
class Geocoder(object): """A class to organize geoservers and geocode terms""" def __init__(self, data_folder="data", terms_folder="terms"): self.cache = shelve.open(os.path.join(data_folder, "cache.db")) self.terms_db = TermsDB(terms_folder) # Coords limits for geolocation # bot left top right self.limits = (-47, -24.05, -46.30, -23.35) self.regions = None self.osm = geopy.Nominatim(view_box=self.limits) self.gm = geopy.GoogleV3() self.server_options = { "osm": self.geocode_osm, "gm": self.geocode_gm, } self.shapefy_regions(os.path.join(data_folder, "subprefeituras.geojson")) def shapefy_regions(self, path_geojson): # TODO: permitir configurar... with open(path_geojson, 'r') as f: self.regions = {} j = json.load(f) for region in j['features']: name = region['properties']['name'] poly = shape(region['geometry']) self.regions[name] = poly def inside_limits(self, point): """Checks if point is inside coords limits or possible region.""" if not self.regions: # Use rectangle check lat, lon = point.latitude, point.longitude if (lon > self.limits[0] and lat > self.limits[1] and lon < self.limits[2] and lat < self.limits[3]): return True else: return False else: # Check inside all possible regions p = Point((point.longitude, point.latitude)) for name, poly in self.regions.items(): # if poly.contains(p): if p.intersects(poly): return name return False def geocode(self, term): """Geocodes a term in all avaiable geoservers""" # TODO: permitir cofigurar isso... # limit string size s = term[:60] # check cache # TODO: remove this .encode for Python 3 print(s) cache_key = s.encode("utf-8") term_geo = self.cache.get(cache_key) if not term_geo: term_geo = {} # query all servers for server_name, func in self.server_options.items(): try: points = func(s) except geopy.exc.GeocoderQuotaExceeded: print("Quota Exceeded!") raise except (geopy.exc.GeocoderTimedOut, geopy.exc.GeocoderUnavailable): print("Timed out or unable to contact server!") print("Trying again...") points = func(s) term_geo[server_name] = [] for point in points: region = self.inside_limits(point) if region: if region is True: region = "???" term_geo[server_name].append({ "address": point.address, "latitude": point.latitude, "longitude": point.longitude, "region": region }) self.cache[cache_key] = term_geo # print("------------------------------------") # print(term_geo) return term_geo def geocode_osm(self, s): # TODO: permitir configurar s += ", São Paulo, São Paulo" r = self.osm.geocode(s, timeout=10, exactly_one=True) if r: return [r] else: return [] def geocode_gm(self, s): # TODO: permitir configurar s += ", São Paulo, São Paulo" r = self.gm.geocode(s, timeout=10, exactly_one=True) # TODO: permitir configurar if not r or r.address == "São Paulo - State of São Paulo, Brazil": return [] else: return [r] def geocode_list(self, strings): """Creates a GeoEntity with the strings geocoded.""" all_terms = [] for string in strings: canonical = canonical_form(string) terms = self.terms_db.search(string, canonical) if terms: all_terms += terms gt = GeoEntity(all_terms) gt.geocode(self) return gt # return self.geocode(all_terms) def close(self): """Closes cache.""" self.cache.close() def __enter__(self): return self def __exit__(self, type, value, traceback): self.close()