class QuietPlacesData: def __init__(self, datafile, debug): self.debug = debug fileObject = open(datafile, 'r') self.data = pickle.load(fileObject) # print 'loaded ' + str(len(self.data)) + 'records, initializing index' self.__load_geo_index() def __load_geo_index(self): self.geo_index = GeoGridIndex() for lodging in self.data: # if self.debug: print 'loading geo for:' + lodging["name"] lat = float(lodging["lat"]) lng = float(lodging["lng"]) self.geo_index.add_point(GeoPoint(lat, lng, ref=lodging)) def geo_search(self, lat, lng, range): center_point = GeoPoint(lat, lng) lodgings = [] for geo_point, distance in self.geo_index.get_nearest_points( center_point, range, 'km'): # if self.debug: print("We found {0} in {1} km".format(geo_point.ref["name"], distance)) lodgings.append(geo_point.ref) return lodgings
def build_geo_index_from_coord_index(coord_index, precision=5): geo_index = GeoGridIndex(precision=precision) for (lat, lng) in coord_index: geo_index.add_point(GeoPoint(lat, lng)) return geo_index
def test_plot_grid(self): grid = GeoGridIndex(precision=4) for i in range(0,10): lat = random.random()*180 - 90 lng = random.random()*360 - 180 grid.add_point(GeoPoint(lat, lng)) self.assertEqual(len(grid.data), 10)
def build_geo_index_from_point_index(index, precision=5): geo_index = GeoGridIndex(precision=precision) for id_, point_info in index.iteritems(): lat, lon = point_info.get('latitude', point_info.get('lat')), point_info.get( 'longitude', point_info.get('lon')) geo_index.add_point(GeoPoint(lat, lon, point_info['id'])) return geo_index
def match_stops_to_nodes(gtfs, walk_network): """ Parameters ---------- gtfs : a GTFS object walk_network : networkx.Graph Returns ------- stop_I_to_node: dict maps stop_I to closest walk_network node stop_I_to_dist: dict maps stop_I to the distance to the closest walk_network node """ network_nodes = walk_network.nodes(data="true") stop_Is = set(gtfs.get_straight_line_transfer_distances()['from_stop_I']) stops_df = gtfs.stops() geo_index = GeoGridIndex(precision=6) for net_node, data in network_nodes: geo_index.add_point(GeoPoint(data['lat'], data['lon'], ref=net_node)) stop_I_to_node = {} stop_I_to_dist = {} for stop_I in stop_Is: stop_lat = float(stops_df[stops_df.stop_I == stop_I].lat) stop_lon = float(stops_df[stops_df.stop_I == stop_I].lon) geo_point = GeoPoint(stop_lat, stop_lon) min_dist = float('inf') min_dist_node = None search_distances_m = [0.100, 0.500] for search_distance_m in search_distances_m: for point, distance in geo_index.get_nearest_points( geo_point, search_distance_m, "km"): if distance < min_dist: min_dist = distance * 1000 min_dist_node = point.ref if min_dist_node is not None: break if min_dist_node is None: warn("No OSM node found for stop: " + str(stops_df[stops_df.stop_I == stop_I])) stop_I_to_node[stop_I] = min_dist_node stop_I_to_dist[stop_I] = min_dist return stop_I_to_node, stop_I_to_dist
def calc_transfers(conn, threshold_meters=1000): geohash_precision = _get_geo_hash_precision(threshold_meters / 1000.) geo_index = GeoGridIndex(precision=geohash_precision) g = GTFS(conn) stops = g.get_table("stops") stop_geopoints = [] cursor = conn.cursor() for stop in stops.itertuples(): stop_geopoint = GeoPoint(stop.lat, stop.lon, ref=stop.stop_I) geo_index.add_point(stop_geopoint) stop_geopoints.append(stop_geopoint) for stop_geopoint in stop_geopoints: nearby_stop_geopoints = geo_index.get_nearest_points_dirty( stop_geopoint, threshold_meters / 1000.0, "km") from_stop_I = int(stop_geopoint.ref) from_lat = stop_geopoint.latitude from_lon = stop_geopoint.longitude to_stop_Is = [] distances = [] for nearby_stop_geopoint in nearby_stop_geopoints: to_stop_I = int(nearby_stop_geopoint.ref) if to_stop_I == from_stop_I: continue to_lat = nearby_stop_geopoint.latitude to_lon = nearby_stop_geopoint.longitude distance = math.ceil( wgs84_distance(from_lat, from_lon, to_lat, to_lon)) if distance <= threshold_meters: to_stop_Is.append(to_stop_I) distances.append(distance) n_pairs = len(to_stop_Is) from_stop_Is = [from_stop_I] * n_pairs cursor.executemany( 'INSERT OR REPLACE INTO stop_distances VALUES (?, ?, ?, ?, ?, ?);', zip(from_stop_Is, to_stop_Is, distances, [None] * n_pairs, [None] * n_pairs, [None] * n_pairs)) cursor.execute( 'CREATE INDEX IF NOT EXISTS idx_sd_fsid ON stop_distances (from_stop_I);' )
def initialize_matching(overwrite=False): # 1. fetch all unmatched Amadeus hotels print("Loading Amadeus hotels") amdh = load_amadeus_from_db() namdh = len(amdh) print("Loaded %d hotels" % namdh) # 2. fetch all unmatched Booking.com hotels print("Loading Booking hotels") bkgh = load_booking() # bkgh = load_booking_from_mysql() print("Loaded %d hotels" % len(bkgh)) # 3. load existing matches if not overwrite: print("Loading previous matches") matches = load_matches() else: matches = {} matched_amdids = matches.keys() matched_bkgids = set(matches.values()) # 4. Exclude already matched amdh = amdh[~amdh.amd_id.isin(matched_amdids)] bkgh = bkgh[~bkgh.bkg_id.isin(matched_bkgids)] print "%d Amadeus hotels left to match to %d Booking hotels" % (len(amdh), len(bkgh)) # 5. Build geo index print("Building Geo Index") geo_index = GeoGridIndex() for i, hb in bkgh.iterrows(): if hb['lat'] == 90: hb['lat'] = -90.0 geo_index.add_point(GeoPoint(hb['lat'], hb['lng'], ref=hb)) return amdh, bkgh, matches, geo_index, namdh
class DistanceCalculator(object): def __init__(self): self.geo_index = GeoGridIndex(precision=3) self.conn = sqlite3.connect('rockstar_02.db', isolation_level="DEFERRED") self.conn.text_factory = str self.cursor = self.conn.cursor() self.debug = False def load_index(self, input=None): """ Load all of the geolocated cemetery towers into memory, inside of our geo_index variable """ print 'Loading locations of interest into internal spatial index.' input_counter = 0 for line in open(input, 'rU'): line = line.strip() parts = line.split('\t') #print parts #if len(parts) < 20 or len(parts)<2: # continue admin1, lat, lon, tag = parts[10], float(parts[4]), float( parts[5]), parts[7] if admin1 == 'VA' and tag == 'HSP': #print lat self.geo_index.add_point(GeoPoint(lat, lon)) input_counter += 1 print 'Done loading index of hospital (added %s values)' % ( input_counter) def enumerate_all_distances(self, admin1=None): """ Walk the geohash5 centroids, calculate the distance to the nearest tower for each one, and write the distance value to the database. """ #Walk the geohash5 centroids, c = self.cursor c.execute( 'SELECT geohash, centroid_lat, centroid_lon from boxes where admin1=?', (admin1, )) geohashes_plus_coords = [] for row in c.fetchall(): geo5_item, lat, lon = row geohashes_plus_coords.append([geo5_item, lat, lon]) #print geo5_item #print 'Those are the geohashes' progress_counter = 0 for geo5, lat, lon in geohashes_plus_coords: progress_counter += 1 if progress_counter % 50 == 0: print 'Processed %s records.' % (progress_counter) if self.debug == True: print '--------' print 'geohash of interest:', geo5, lat, lon #calculate the distance to the nearest tower for each one, temp_geo_point = GeoPoint(lat, lon) values = self.geo_index.get_nearest_points(temp_geo_point, 50.0, 'km') #print values minimum_distance = MINIMUM_DISTANCE for value in values: the_point, the_distance = value if the_distance < minimum_distance: minimum_distance = the_distance #and write the distance value to the database. c.execute('UPDATE boxes set hospital_distance=? where geohash=?', (minimum_distance, geo5)) self.conn.commit() print 'Finished updating distance from geo5 centroids to input data'
skiprows=1) amdh = convert_amd_df_to_matching_format(amdh_full) namdh = len(amdh) print("Loaded %d hotels" % namdh) print("Loading Booking hotels") bkgh = load_booking() # bkgh = load_booking_from_mysql() print("Loaded %d hotels" % len(bkgh)) print("Building Geo Index") geo_index = GeoGridIndex() for i, hb in bkgh.iterrows(): if hb['lat'] == 90: hb['lat'] = -90.0 geo_index.add_point(GeoPoint(hb['lat'], hb['lng'], ref=hb)) matches = {} print("1st pass") match_in_neighborhood(amdh, geo_index, 1, 0.6, matches, namdh) print("2nd pass") match_in_neighborhood(amdh, geo_index, 2, 0.75, matches, namdh) print("3rd pass") match_in_neighborhood(amdh, geo_index, 4, 0.8, matches, namdh) print("4th pass") match_in_neighborhood(amdh, geo_index, 6, 0.86, matches, namdh)
class DistanceCalculator(object): def __init__(self): self.geo_index = GeoGridIndex(precision=3) self.conn = sqlite3.connect('rockstar_02.db', isolation_level="DEFERRED") self.conn.text_factory = str self.cursor = self.conn.cursor() self.debug = False def load_index(self, input=None): """ Load all of the geolocated cemetery towers into memory, inside of our geo_index variable """ print 'Loading locations of interest into internal spatial index.' input_counter = 0 sf = shapefile.Reader( '/Users/chrisholloway/Downloads/virginia-latest-free.shp/gis.osm.roads_free_1.shp' ) shaperec = sf.shapeRecords() motorway_hash = [] for rec in range(len(shaperec)): if 'secondary_link' in shaperec[rec].record[2]: lat = shaperec[rec].shape.points[0][1] lon = shaperec[rec].shape.points[0][0] self.geo_index.add_point(GeoPoint(lat, lon)) input_counter += 1 print 'Done loading index of secondary_links (added %s values)' % ( input_counter) def enumerate_all_distances(self, admin1=None): """ Walk the geohash5 centroids, calculate the distance to the nearest tower for each one, and write the distance value to the database. """ #Walk the geohash5 centroids, c = self.cursor c.execute( 'SELECT geohash, centroid_lat, centroid_lon from boxes where admin1=?', (admin1, )) geohashes_plus_coords = [] for row in c.fetchall(): geo5_item, lat, lon = row geohashes_plus_coords.append([geo5_item, lat, lon]) #print geo5_item #print 'Those are the geohashes' progress_counter = 0 for geo5, lat, lon in geohashes_plus_coords: progress_counter += 1 if progress_counter % 50 == 0: print 'Processed %s records.' % (progress_counter) if self.debug == True: print '--------' print 'geohash of interest:', geo5, lat, lon #calculate the distance to the nearest tower for each one, temp_geo_point = GeoPoint(lat, lon) values = self.geo_index.get_nearest_points(temp_geo_point, 50.0, 'km') #print values minimum_distance = MINIMUM_DISTANCE for value in values: the_point, the_distance = value if the_distance < minimum_distance: minimum_distance = the_distance #and write the distance value to the database. c.execute( 'UPDATE boxes set secondary_link_distance=? where geohash=?', (minimum_distance, geo5)) self.conn.commit() print 'Finished updating distance from geo5 centroids to input data'
import dask.dataframe as dd from dask.multiprocessing import get #Load Swaziland Population File swaz_pop = pd.read_csv("swaz_pop_data.csv") #Load Water Data water_data = pd.read_csv("Water_Point_Data_Exchange_Complete_Dataset.csv") #Create df with only Swaziland water swaz_water = water_data[water_data['#country_name'] == 'Swaziland'] #Create Geo Index of Swaziland Population Data geo_index = GeoGridIndex() for index, row in swaz_pop.iterrows(): geo_index.add_point(GeoPoint(row['y'], row['x'], ref=row['value'])) #Calculate population with x distance of random well def calculate_population_within_x_km(row, index, km): center_point = GeoPoint(row['#lat_deg'], row['#lon_deg']) total_population = 0 try: for point, distance in index.get_nearest_points( center_point, km, 'km'): total_population += point.ref except: print("Invalid data - Record skipped") #print("Total population within", km, "kilometers:", int(total_population)) return total_population
myfile = opener.open(myurl) js = json.load(myfile) # build index of tract representative points index = GeoGridIndex() for feature in js['features']: # get feature properties and unique tract identifier properties = feature.get('properties') BoroCT2010 = properties.get('BoroCT2010') # geometry of tract geometry = feature.get('geometry') polygon = shape(geometry) # get a representative point from each tract lon, lat = polygon.representative_point().coords[0] # add representative point to index index.add_point(GeoPoint(lat, lon, ref=BoroCT2010)) # function returning the Census tract of a point def geocoder(lat, lon, rad=.5): taxi_point = GeoPoint(lat, lon) # iterate throug the nearest tracts to the point for point, distance in index.get_nearest_points(taxi_point, rad, unit='km'): for feature in js['features']: properties = feature.get('properties') BoroCT2010 = properties.get('BoroCT2010') # check if the point belongs to one of the nearest tracts to it if point.ref == BoroCT2010: geometry = feature.get('geometry') polygon = shape(geometry) if polygon.contains(Point(lon, lat)):
if 'sid' in form: c1, c2 = (-31.36023, -64.26264) z = 13 banda = '025' lat = 0 prop = int(form['sid'].value) geo_index = GeoGridIndex() cursor.execute( "select origen, destino, frecuencia, calidad from link where propietario='%s'" % (prop)) datapoint = cursor.fetchall() for p in datapoint: lat, lon = geohash.decode(p[0]) geo_index.add_point(GeoPoint(lat, lon)) lat, lon = geohash.decode(p[1]) geo_index.add_point(GeoPoint(lat, lon)) cursor.execute( "select point, name, ip, numeroenlaces from nodo where propietario='%s'" % (prop)) datanodo = cursor.fetchall() numero = [] nodos = {} for p in datanodo: nodos[p[0]] = p[3] numero.append(p[2]) if 'center' in form:
myfile = opener.open(myurl) js = json.load(myfile) # build index of tract representative points index = GeoGridIndex() for feature in js['features']: # get feature properties and unique tract identifier properties = feature.get('properties') BoroCT2010 = properties.get('BoroCT2010') # geometry of tract geometry = feature.get('geometry') polygon = shape(geometry) # get a representative point from each tract lon, lat = polygon.representative_point().coords[0] # add representative point to index index.add_point(GeoPoint(lat, lon, ref=BoroCT2010)) # function returning the Census tract of a point def geocoder(lat, lon, rad=.5): taxi_point = GeoPoint(lat, lon) # iterate throug the nearest tracts to the point for point, distance in index.get_nearest_points(taxi_point, rad, unit='km'): for feature in js['features']: properties = feature.get('properties') BoroCT2010 = properties.get('BoroCT2010') # check if the point belongs to one of the nearest tracts to it if point.ref == BoroCT2010: geometry = feature.get('geometry') polygon = shape(geometry)
array([[0. , 1. ], [0. , 1. ], [0. , 1.41421356], [0. , 1. ], [0. , 1. ], [0. , 1.41421356]]) from geoindex import GeoGridIndex, GeoPoint import random index = GeoGridIndex() for _ in range(10000): lat = random.random()*180 - 90 lng = random.random()*360 - 180 index.add_point(GeoPoint(lat, lng)) center_point = GeoPoint(37.7772448, -122.3955118) for distance, point in index.get_nearest_points(center_point, 10, 'km'): print("We found {0} in {1} km".format(point, distance)) #index = GeoGridIndex() for airport in get_all_airports(): index.add_point(GeoPoint(lat, lng, ref=airport)) center_point = GeoPoint(37.7772448, -122.3955118)
""" print(head) if 'sid' in form: c1,c2 = (-31.36023,-64.26264) z = 13 banda = '025' lat = 0 prop = int(form['sid'].value) geo_index = GeoGridIndex() cursor.execute ("select origen, destino, frecuencia, calidad from link where propietario='%s'" %(prop)) datapoint = cursor.fetchall () for p in datapoint: lat,lon=geohash.decode(p[0]) geo_index.add_point(GeoPoint(lat,lon)) lat,lon=geohash.decode(p[1]) geo_index.add_point(GeoPoint(lat,lon)) cursor.execute ("select point, name, ip, numeroenlaces from nodo where propietario='%s'" %(prop)) datanodo = cursor.fetchall () numero = [] nodos = {} for p in datanodo: nodos[p[0]] = p[3] numero.append(p[2]) if 'center' in form: c1,c2 = (form['center'].value)[7:].rstrip(')').split(',') z = form['zoom'].value