def test_point_distance(self): hotel = GeoPoint(*TEST_POINTS[0]) landmark = GeoPoint(*TEST_POINTS[1]) self.assertAlmostEqual(hotel.distance_to(landmark, 'mi'), .7046874859635269) self.assertAlmostEqual(hotel.distance_to(landmark, 'km'), 1.1340845774104864)
def test_point_distance(self): hotel = GeoPoint(*TEST_POINTS[0]) landmark = GeoPoint(*TEST_POINTS[1]) self.assertAlmostEqual( hotel.distance_to(landmark, 'mi'), .7046874859635269 ) self.assertAlmostEqual( hotel.distance_to(landmark, 'km'), 1.1340845774104864 )
def __load_geo_index(self): self.geo_index = GeoGridIndex() for lodging in self.data: # if self.debug: print 'loading geo for:' + lodging["name"] lat = float(lodging["lat"]) lng = float(lodging["lng"]) self.geo_index.add_point(GeoPoint(lat, lng, ref=lodging))
def build_geo_index_from_coord_index(coord_index, precision=5): geo_index = GeoGridIndex(precision=precision) for (lat, lng) in coord_index: geo_index.add_point(GeoPoint(lat, lng)) return geo_index
def get_interactive_candidates(amdh, geo_index, radius=6, nsim_threshold=0.5): """ Gets candidates for interactive matching for yet unmatched Amadeus hotels """ cands_by_hotel = defaultdict(list) for i, h in amdh.iterrows(): center_point = GeoPoint(h['lat'], h['lng'], ref=h) try: cands = list( geo_index.get_nearest_points(center_point, radius, 'km')) except Exception: continue nsims = [ get_name_sim(hb.ref['name'], h['name'], swap_words=True) for (hb, d) in cands ] inds = [ ind for ind in range(len(nsims)) if nsims[ind] > nsim_threshold ] inds = sorted(inds, key=lambda i: -nsims[i]) for ind in inds: hb, d = cands[ind] cands_by_hotel[h['amd_id']].append( (hb.ref['bkg_id'], nsims[ind], d)) return cands_by_hotel
def test_bounds(self): glen = lambda x: len(list(x)) # ezv block point1 = GeoPoint(43.59375, -4.21875) # ezv point2 = GeoPoint(43.59375, -4.218750001) # ezu point3 = GeoPoint(43.59375, -2.812500001) # ezv point4 = GeoPoint(43.59375, -2.8125) # ezy point5 = GeoPoint(43.59375, (-4.21875 + -2.8125) / 2) points = [point1, point2, point3, point4, point5] index = GeoGridIndex(precision=3) # import ipdb; ipdb.set_trace() map(index.add_point, points) self.assertEquals(glen(index.get_nearest_points(point1, 57)), 3) self.assertEquals(glen(index.get_nearest_points(point2, 57)), 3) self.assertEquals(glen(index.get_nearest_points(point3, 57)), 3) self.assertEquals(glen(index.get_nearest_points(point4, 57)), 3) self.assertEquals(glen(index.get_nearest_points(point5, 57)), 5)
def match_stops_to_nodes(gtfs, walk_network): """ Parameters ---------- gtfs : a GTFS object walk_network : networkx.Graph Returns ------- stop_I_to_node: dict maps stop_I to closest walk_network node stop_I_to_dist: dict maps stop_I to the distance to the closest walk_network node """ network_nodes = walk_network.nodes(data="true") stop_Is = set(gtfs.get_straight_line_transfer_distances()['from_stop_I']) stops_df = gtfs.stops() geo_index = GeoGridIndex(precision=6) for net_node, data in network_nodes: geo_index.add_point(GeoPoint(data['lat'], data['lon'], ref=net_node)) stop_I_to_node = {} stop_I_to_dist = {} for stop_I in stop_Is: stop_lat = float(stops_df[stops_df.stop_I == stop_I].lat) stop_lon = float(stops_df[stops_df.stop_I == stop_I].lon) geo_point = GeoPoint(stop_lat, stop_lon) min_dist = float('inf') min_dist_node = None search_distances_m = [0.100, 0.500] for search_distance_m in search_distances_m: for point, distance in geo_index.get_nearest_points( geo_point, search_distance_m, "km"): if distance < min_dist: min_dist = distance * 1000 min_dist_node = point.ref if min_dist_node is not None: break if min_dist_node is None: warn("No OSM node found for stop: " + str(stops_df[stops_df.stop_I == stop_I])) stop_I_to_node[stop_I] = min_dist_node stop_I_to_dist[stop_I] = min_dist return stop_I_to_node, stop_I_to_dist
def geo_search(self, lat, lng, range): center_point = GeoPoint(lat, lng) lodgings = [] for geo_point, distance in self.geo_index.get_nearest_points( center_point, range, 'km'): # if self.debug: print("We found {0} in {1} km".format(geo_point.ref["name"], distance)) lodgings.append(geo_point.ref) return lodgings
def get_nearest_point(geo_index, lat, lng, r): nearest_points_dists = list( geo_index.get_nearest_points(GeoPoint(lat, lng), r / 1000)) if nearest_points_dists: nearest_point, dist = min(nearest_points_dists, key=lambda x: x[1]) dist *= 1000 if dist <= r: return nearest_point.ref, dist return None, None
def build_geo_index_from_point_index(index, precision=5): geo_index = GeoGridIndex(precision=precision) for id_, point_info in index.iteritems(): lat, lon = point_info.get('latitude', point_info.get('lat')), point_info.get( 'longitude', point_info.get('lon')) geo_index.add_point(GeoPoint(lat, lon, point_info['id'])) return geo_index
def calculate_population_within_x_km(row, index, km): center_point = GeoPoint(row['#lat_deg'], row['#lon_deg']) total_population = 0 try: for point, distance in index.get_nearest_points( center_point, km, 'km'): total_population += point.ref except: print("Invalid data - Record skipped") #print("Total population within", km, "kilometers:", int(total_population)) return total_population
def generate_comparison_vectors(amdh, geo_index, radius=6, nsim_threshold=0.5): """ Creation of comparison vectors for training a matching classifier amdh: data frame containing Amadeus Hotels geo_index: index of Booking hotels, easy to query by geo radius radius: distance to fetch candidates from nsim_threshold: minimum name similarity to be considered a candidate """ cand_pair_ids = [] features = [] count = 0 for i, h in amdh.iterrows(): count += 1 if count % 500 == 0: progress = count * 100.0 / len(amdh) print("%.2f %%" % progress) center_point = GeoPoint(h['lat'], h['lng'], ref=h) try: cands = list( geo_index.get_nearest_points(center_point, radius, 'km')) except Exception: continue nsims1 = [ get_name_sim(hb.ref['name'], h['name'], swap_words=False) for (hb, d) in cands ] nsims2 = [ get_name_sim(hb.ref['name'], h['name'], swap_words=True) for (hb, d) in cands ] inds = [ ind for ind in range(len(nsims2)) if nsims2[ind] > nsim_threshold ] for ind in inds: hb, d = cands[ind] cand_pair_ids.append((h['amd_id'], hb.ref['bkg_id'])) features.append((nsims1[ind], nsims2[ind], d)) cand_pairs = pd.MultiIndex.from_tuples(cand_pair_ids, names=['amd_id', 'bkg_id']) cand_data = pd.DataFrame(features, index=cand_pairs, columns=['nsim1', 'nsim2', 'dist']) return cand_data
def test_point(self): point = GeoPoint(*TEST_POINTS[0]) self.assertEqual(point.latitude, TEST_POINTS[0][0]) self.assertEqual(point.longitude, TEST_POINTS[0][1]) self.assertIsNone(point._rad_latitude) self.assertIsNone(point._rad_longitude) self.assertEqual(point.rad_latitude, math.radians(TEST_POINTS[0][0])) self.assertEqual(point.rad_longitude, math.radians(TEST_POINTS[0][1])) self.assertIsNotNone(point._rad_latitude) self.assertIsNotNone(point._rad_longitude) self.assertEqual(point.rad_latitude, point._rad_latitude) self.assertEqual(point.rad_longitude, point._rad_longitude) same = GeoPoint(TEST_POINTS[0][0], TEST_POINTS[0][1]) self.assertEqual(point, same) self.assertTrue(point == same) other = GeoPoint(TEST_POINTS[1][0], TEST_POINTS[1][1]) self.assertNotEqual(point, other) self.assertFalse(point == other) self.assertNotEqual(point, TEST_POINTS[0]) self.assertFalse(point == TEST_POINTS[0])
def geocoder(lat, lon, rad=.5): taxi_point = GeoPoint(lat, lon) # iterate throug the nearest tracts to the point for point, distance in index.get_nearest_points(taxi_point, rad, unit='km'): for feature in js['features']: properties = feature.get('properties') BoroCT2010 = properties.get('BoroCT2010') # check if the point belongs to one of the nearest tracts to it if point.ref == BoroCT2010: geometry = feature.get('geometry') polygon = shape(geometry) if polygon.contains(Point(lon, lat)): return BoroCT2010 # if no tract is found, return an invalid string return "notfound"
def load_index(self, input=None): """ Load all of the geolocated cemetery towers into memory, inside of our geo_index variable """ print 'Loading locations of interest into internal spatial index.' input_counter = 0 sf = shapefile.Reader('/Users/chrisholloway/Downloads/virginia-latest-free.shp/gis.osm.pois_a_free_1.shp') shaperec = sf.shapeRecords() university_hash=[] for rec in range(len(shaperec)): if 'university' in shaperec[rec].record: lat = shaperec[rec].shape.points[0][1] lon = shaperec[rec].shape.points[0][0] self.geo_index.add_point(GeoPoint(lat,lon)) input_counter +=1 print 'Done loading index of university (added %s values)' %(input_counter)
def load_index(self, input=None): """ Load all of the geolocated cemetery towers into memory, inside of our geo_index variable """ print 'Loading locations of interest into internal spatial index.' input_counter = 0 for line in open(input,'rU'): line = line.strip() parts = line.split('\t') #print parts #if len(parts) < 20 or len(parts)<2: # continue admin1, lat, lon, tag = parts[10], float(parts[4]), float(parts[5]), parts[7] if admin1 == 'VA' and tag == 'CMTY': print lat self.geo_index.add_point(GeoPoint(lat,lon)) input_counter +=1 print 'Done loading index of cemetery (added %s values)' %(input_counter)
def calc_transfers(conn, threshold_meters=1000): geohash_precision = _get_geo_hash_precision(threshold_meters / 1000.) geo_index = GeoGridIndex(precision=geohash_precision) g = GTFS(conn) stops = g.get_table("stops") stop_geopoints = [] cursor = conn.cursor() for stop in stops.itertuples(): stop_geopoint = GeoPoint(stop.lat, stop.lon, ref=stop.stop_I) geo_index.add_point(stop_geopoint) stop_geopoints.append(stop_geopoint) for stop_geopoint in stop_geopoints: nearby_stop_geopoints = geo_index.get_nearest_points_dirty( stop_geopoint, threshold_meters / 1000.0, "km") from_stop_I = int(stop_geopoint.ref) from_lat = stop_geopoint.latitude from_lon = stop_geopoint.longitude to_stop_Is = [] distances = [] for nearby_stop_geopoint in nearby_stop_geopoints: to_stop_I = int(nearby_stop_geopoint.ref) if to_stop_I == from_stop_I: continue to_lat = nearby_stop_geopoint.latitude to_lon = nearby_stop_geopoint.longitude distance = math.ceil( wgs84_distance(from_lat, from_lon, to_lat, to_lon)) if distance <= threshold_meters: to_stop_Is.append(to_stop_I) distances.append(distance) n_pairs = len(to_stop_Is) from_stop_Is = [from_stop_I] * n_pairs cursor.executemany( 'INSERT OR REPLACE INTO stop_distances VALUES (?, ?, ?, ?, ?, ?);', zip(from_stop_Is, to_stop_Is, distances, [None] * n_pairs, [None] * n_pairs, [None] * n_pairs)) cursor.execute( 'CREATE INDEX IF NOT EXISTS idx_sd_fsid ON stop_distances (from_stop_I);' )
def enumerate_all_distances(self, admin1=None): """ Walk the geohash5 centroids, calculate the distance to the nearest tower for each one, and write the distance value to the database. """ #Walk the geohash5 centroids, c = self.cursor c.execute( 'SELECT geohash, centroid_lat, centroid_lon from boxes where admin1=?', (admin1, )) geohashes_plus_coords = [] for row in c.fetchall(): geo5_item, lat, lon = row geohashes_plus_coords.append([geo5_item, lat, lon]) #print geo5_item #print 'Those are the geohashes' progress_counter = 0 for geo5, lat, lon in geohashes_plus_coords: progress_counter += 1 if progress_counter % 50 == 0: print 'Processed %s records.' % (progress_counter) if self.debug == True: print '--------' print 'geohash of interest:', geo5, lat, lon #calculate the distance to the nearest tower for each one, temp_geo_point = GeoPoint(lat, lon) values = self.geo_index.get_nearest_points(temp_geo_point, 50.0, 'km') #print values minimum_distance = MINIMUM_DISTANCE for value in values: the_point, the_distance = value if the_distance < minimum_distance: minimum_distance = the_distance #and write the distance value to the database. c.execute( 'UPDATE boxes set bus_station_distance=? where geohash=?', (minimum_distance, geo5)) self.conn.commit() print 'Finished updating distance from geo5 centroids to input data'
def test_geo_point_distance(self): location_x = GeoPoint(*TEST_POINTS[0]) location_y = GeoPoint(*TEST_POINTS[1]) self.assertAlmostEqual( location_x.distance_to(location_y, 'mi'), .7046874859635269 ) self.assertAlmostEqual( location_x.distance_to(location_y, 'km'), 1.1340845774104864 ) self.assertAlmostEqual( location_y.distance_to(location_x, 'mi'), .7046874859635269 ) self.assertAlmostEqual( location_y.distance_to(location_x, 'km'), 1.1340845774104864 )
def initialize_matching(overwrite=False): # 1. fetch all unmatched Amadeus hotels print("Loading Amadeus hotels") amdh = load_amadeus_from_db() namdh = len(amdh) print("Loaded %d hotels" % namdh) # 2. fetch all unmatched Booking.com hotels print("Loading Booking hotels") bkgh = load_booking() # bkgh = load_booking_from_mysql() print("Loaded %d hotels" % len(bkgh)) # 3. load existing matches if not overwrite: print("Loading previous matches") matches = load_matches() else: matches = {} matched_amdids = matches.keys() matched_bkgids = set(matches.values()) # 4. Exclude already matched amdh = amdh[~amdh.amd_id.isin(matched_amdids)] bkgh = bkgh[~bkgh.bkg_id.isin(matched_bkgids)] print "%d Amadeus hotels left to match to %d Booking hotels" % (len(amdh), len(bkgh)) # 5. Build geo index print("Building Geo Index") geo_index = GeoGridIndex() for i, hb in bkgh.iterrows(): if hb['lat'] == 90: hb['lat'] = -90.0 geo_index.add_point(GeoPoint(hb['lat'], hb['lng'], ref=hb)) return amdh, bkgh, matches, geo_index, namdh
if 'sid' in form: c1, c2 = (-31.36023, -64.26264) z = 13 banda = '025' lat = 0 prop = int(form['sid'].value) geo_index = GeoGridIndex() cursor.execute( "select origen, destino, frecuencia, calidad from link where propietario='%s'" % (prop)) datapoint = cursor.fetchall() for p in datapoint: lat, lon = geohash.decode(p[0]) geo_index.add_point(GeoPoint(lat, lon)) lat, lon = geohash.decode(p[1]) geo_index.add_point(GeoPoint(lat, lon)) cursor.execute( "select point, name, ip, numeroenlaces from nodo where propietario='%s'" % (prop)) datanodo = cursor.fetchall() numero = [] nodos = {} for p in datanodo: nodos[p[0]] = p[3] numero.append(p[2]) if 'center' in form:
def get_nearest_points(geo_index, lat, lng, r): for point, dist in geo_index.get_nearest_points(GeoPoint(lat, lng), r / 1000): dist *= 1000 if dist <= r: yield point.ref, dist
def match_in_neighborhood(amdh, geo_index, radius, nsim_threshold, matches, namdh, save=True, unique=False, swap_words=False, return_cands=True): count = 0 amdh = amdh[~amdh.amd_id.isin(matches.keys())] if return_cands: cands_by_hotel = {} for _, h in amdh.iterrows(): count += 1 if count % 1000 == 0: progress = count * 100.0 / len(amdh) print("%.2f %%" % progress) center_point = GeoPoint(h['lat'], h['lng'], ref=h) try: cands = list( geo_index.get_nearest_points(center_point, radius, 'km')) except Exception: continue cands = [hb.ref for (hb, d) in cands] cands = [hb for hb in cands if hb['bkg_id'] not in matches.values()] if not cands: continue nsims = [ get_name_sim(hb['name'], h['name'], swap_words) for hb in cands ] inds = [ i for i in reversed(np.argsort(nsims)) if nsims[i] > nsim_threshold ] if inds and (not unique or len(inds) == 1): best_ind = inds[0] hb = cands[best_ind] nsim = nsims[best_ind] matches[h['amd_id']] = hb['bkg_id'] continue if return_cands and len(inds) > 1: candsh = [] for i in inds: ns = nsims[i] candsh.append({'candidate': cands[i], 'name_sim': ns}) cands_by_hotel[h['amd_id']] = candsh perc_matched = len(matches) * 100.0 / namdh print("%.1f%% matched" % perc_matched) # Save Matches if save: with open('matches_excel.json', 'w') as f: json.dump(matches, f) if return_cands: return cands_by_hotel
array([[0. , 1. ], [0. , 1. ], [0. , 1.41421356], [0. , 1. ], [0. , 1. ], [0. , 1.41421356]]) from geoindex import GeoGridIndex, GeoPoint import random index = GeoGridIndex() for _ in range(10000): lat = random.random()*180 - 90 lng = random.random()*360 - 180 index.add_point(GeoPoint(lat, lng)) center_point = GeoPoint(37.7772448, -122.3955118) for distance, point in index.get_nearest_points(center_point, 10, 'km'): print("We found {0} in {1} km".format(point, distance)) #index = GeoGridIndex() for airport in get_all_airports(): index.add_point(GeoPoint(lat, lng, ref=airport)) center_point = GeoPoint(37.7772448, -122.3955118)
def match_in_neighborhood(amdh, geo_index, radius, nsim_threshold, matches, namdh, save=True, unique=False, swap_words=False, return_cands=True): count = 0 amdh = amdh[~amdh.lvr_id.isin(matches.keys())] if return_cands: candidates = {} for _, h in amdh.iterrows(): count += 1 if count % 1000 == 0: progress = count * 100.0 / len(amdh) print("%.2f %%" % progress) center_point = GeoPoint(h['lat'], h['lng'], ref=h) try: cands = list( geo_index.get_nearest_points(center_point, radius, 'km')) except Exception: continue cands = [hb.ref for (hb, d) in cands] cands = [] for (hb, d) in cands: hb_ = hb.ref hb_['dist'] = d cands.append(hb) cands = [hb for hb in cands if hb['bkg_id'] not in matches.values()] if not cands: continue sw = extract_stopwords([hb['name'] for hb in cands]) nsims_plain = [ get_name_sim(hb['name'], h['name'], False, sw) for hb in cands ] if swap_words: nsims_swap = [ get_name_sim(hb['name'], h['name'], True, sw) for hb in cands ] nsims = nsims_swap else: nsims = nsims_plain inds = [ i for i in reversed(np.argsort(nsims)) if nsims[i] > nsim_threshold ] if inds and (not unique or len(inds) == 1) and (not return_cands): best_ind = inds[0] hb = cands[best_ind] nsim = nsims[best_ind] matches[h['lvr_id']] = hb['bkg_id'] if return_cands: candsh = [] for i in inds: ns = nsims[i] hb = cands[i] cand = { # 'candidate': cands[i], # 'name_sim': ns 'lvr_id': h['lvr_id'], 'bkg_id': hb['bkg_id'], 'name': h['name'], 'chain': h['chain'], 'name_bkg': hb['name'], 'chain_bkg': hb['chain'], 'name_sim': nsims_plain[i], 'name_sim_sw': nsims_swap[i], 'dist': hb['d'] } candsh.append(cand) candidates[h['lvr_id']] = candsh perc_matched = len(matches) * 100.0 / namdh print("%.1f%% matched" % perc_matched) # Save Matches if save: with open('matches.json', 'w') as f: json.dump(matches, f) if return_cands: return candidates
skiprows=1) amdh = convert_amd_df_to_matching_format(amdh_full) namdh = len(amdh) print("Loaded %d hotels" % namdh) print("Loading Booking hotels") bkgh = load_booking() # bkgh = load_booking_from_mysql() print("Loaded %d hotels" % len(bkgh)) print("Building Geo Index") geo_index = GeoGridIndex() for i, hb in bkgh.iterrows(): if hb['lat'] == 90: hb['lat'] = -90.0 geo_index.add_point(GeoPoint(hb['lat'], hb['lng'], ref=hb)) matches = {} print("1st pass") match_in_neighborhood(amdh, geo_index, 1, 0.6, matches, namdh) print("2nd pass") match_in_neighborhood(amdh, geo_index, 2, 0.75, matches, namdh) print("3rd pass") match_in_neighborhood(amdh, geo_index, 4, 0.8, matches, namdh) print("4th pass") match_in_neighborhood(amdh, geo_index, 6, 0.86, matches, namdh)
def extract_candidates(amdh, geo_index, radius, nsim_threshold, namdh): count = 0 candidates = {} for _, h in amdh.iterrows(): count += 1 if count % 1000 == 0: progress = count * 100.0 / len(amdh) print("%.2f %%" % progress) center_point = GeoPoint(h['lat'], h['lng'], ref=h) try: geo_cands = list( geo_index.get_nearest_points(center_point, radius, 'km')) except Exception: continue cands = [] for (hb, d) in geo_cands: hbd = hb.ref hbd['dist'] = d cands.append(hbd) if not cands: continue sw = extract_stopwords([hb['name'] for hb in cands]) nsims_plain = [ get_name_sim(hb['name'], h['name'], False, sw) for hb in cands ] nsims_swap = [ get_name_sim(hb['name'], h['name'], True, sw) for hb in cands ] nsims = nsims_swap inds = [ i for i in reversed(np.argsort(nsims)) if nsims[i] > nsim_threshold ] candsh = [] for i in inds: ns = nsims[i] hb = cands[i] cand = { # 'candidate': cands[i], # 'name_sim': ns 'lvr_id': h['lvr_id'], 'bkg_id': hb['bkg_id'], 'name': h['name'], 'chain': h['chain'], 'name_bkg': hb['name'], 'chain_bkg': hb['chain'], 'name_sim': nsims_plain[i], 'name_sim_sw': nsims_swap[i], 'dist': hb['dist'] } candsh.append(cand) candidates[h['lvr_id']] = candsh return candidates
def __init__(self, lat, lon, radius, unit='km'): GeoGridIndex.__init__(self, precision=3) self.center_point = GeoPoint(latitude=float(lat), longitude=float(lon)) self.radius = radius self.unit = unit self._config = None
myfile = opener.open(myurl) js = json.load(myfile) # build index of tract representative points index = GeoGridIndex() for feature in js['features']: # get feature properties and unique tract identifier properties = feature.get('properties') BoroCT2010 = properties.get('BoroCT2010') # geometry of tract geometry = feature.get('geometry') polygon = shape(geometry) # get a representative point from each tract lon, lat = polygon.representative_point().coords[0] # add representative point to index index.add_point(GeoPoint(lat, lon, ref=BoroCT2010)) # function returning the Census tract of a point def geocoder(lat, lon, rad=.5): taxi_point = GeoPoint(lat, lon) # iterate throug the nearest tracts to the point for point, distance in index.get_nearest_points(taxi_point, rad, unit='km'): for feature in js['features']: properties = feature.get('properties') BoroCT2010 = properties.get('BoroCT2010') # check if the point belongs to one of the nearest tracts to it if point.ref == BoroCT2010: geometry = feature.get('geometry') polygon = shape(geometry)
class TestIndexAccurate(TestCase): point_1bluxome = GeoPoint(37.7772448, -122.3955118) point_market_street = GeoPoint(37.785275, -122.4062836) point_oakland = GeoPoint(37.7919585, -122.2287941) point_walnut_creek = GeoPoint(37.8942235, -122.040223) point_freemont = GeoPoint(37.5293865, -121.9992648) point_la = GeoPoint(34.0204989, -118.4117325) points = [ point_1bluxome, point_market_street, point_oakland, point_walnut_creek, point_freemont, point_la ] def test_bounds(self): glen = lambda x: len(list(x)) # ezv block point1 = GeoPoint(43.59375, -4.21875) # ezv point2 = GeoPoint(43.59375, -4.218750001) # ezu point3 = GeoPoint(43.59375, -2.812500001) # ezv point4 = GeoPoint(43.59375, -2.8125) # ezy point5 = GeoPoint(43.59375, (-4.21875 + -2.8125) / 2) points = [point1, point2, point3, point4, point5] index = GeoGridIndex(precision=3) # import ipdb; ipdb.set_trace() map(index.add_point, points) self.assertEquals(glen(index.get_nearest_points(point1, 57)), 3) self.assertEquals(glen(index.get_nearest_points(point2, 57)), 3) self.assertEquals(glen(index.get_nearest_points(point3, 57)), 3) self.assertEquals(glen(index.get_nearest_points(point4, 57)), 3) self.assertEquals(glen(index.get_nearest_points(point5, 57)), 5) def test_big_distance(self): index = GeoGridIndex(precision=2) map(index.add_point, self.points) ls = list(index.get_nearest_points(self.point_la, 600)) self.assertEquals(len(ls), len(self.points)) def test_simple_accurate(self, precision=3): glen = lambda x: len(list(x)) index = GeoGridIndex(precision=precision) map(index.add_point, self.points) ls = index.get_nearest_points(self.point_1bluxome, 10) ls = list(ls) eq_(glen(ls), 2) points = map(itemgetter(0), ls) self.assertIn(self.point_1bluxome, points) self.assertIn(self.point_market_street, points) eq_(glen(index.get_nearest_points(self.point_1bluxome, 15)), 3) eq_(glen(index.get_nearest_points(self.point_1bluxome, 34)), 4) def test_distance_km(self, precision=3): index = GeoGridIndex(precision=precision) map(index.add_point, self.points) for pt, distance in index.get_nearest_points(self.point_1bluxome, 10): if pt == self.point_1bluxome: self.assertEquals(distance, 0) if pt == self.point_market_street: self.assertEquals(distance, 1.301272755220718) def test_distance_mi(self, precision=3): index = GeoGridIndex(precision=precision) map(index.add_point, self.points) for pt, distance in index.get_nearest_points(self.point_1bluxome, 10, 'mi'): if pt == self.point_1bluxome: self.assertEquals(distance, 0) if pt == self.point_market_street: self.assertEquals(distance, 0.808573403337458) def test_different_precision(self): for precision in [1, 2, 3]: self.test_simple_accurate(precision) def test_wrong_precision(self): index = GeoGridIndex(precision=4) self.assertRaisesRegexp( Exception, 'precision=2', lambda: list( index.get_nearest_points(self.point_market_street, 100)))
import dask.dataframe as dd from dask.multiprocessing import get #Load Swaziland Population File swaz_pop = pd.read_csv("swaz_pop_data.csv") #Load Water Data water_data = pd.read_csv("Water_Point_Data_Exchange_Complete_Dataset.csv") #Create df with only Swaziland water swaz_water = water_data[water_data['#country_name'] == 'Swaziland'] #Create Geo Index of Swaziland Population Data geo_index = GeoGridIndex() for index, row in swaz_pop.iterrows(): geo_index.add_point(GeoPoint(row['y'], row['x'], ref=row['value'])) #Calculate population with x distance of random well def calculate_population_within_x_km(row, index, km): center_point = GeoPoint(row['#lat_deg'], row['#lon_deg']) total_population = 0 try: for point, distance in index.get_nearest_points( center_point, km, 'km'): total_population += point.ref except: print("Invalid data - Record skipped") #print("Total population within", km, "kilometers:", int(total_population)) return total_population