Beispiel #1
0
    def test_point_distance(self):
        hotel = GeoPoint(*TEST_POINTS[0])
        landmark = GeoPoint(*TEST_POINTS[1])

        self.assertAlmostEqual(hotel.distance_to(landmark, 'mi'),
                               .7046874859635269)
        self.assertAlmostEqual(hotel.distance_to(landmark, 'km'),
                               1.1340845774104864)
    def test_point_distance(self):
        hotel = GeoPoint(*TEST_POINTS[0])
        landmark = GeoPoint(*TEST_POINTS[1])

        self.assertAlmostEqual(
            hotel.distance_to(landmark, 'mi'), .7046874859635269
        )
        self.assertAlmostEqual(
            hotel.distance_to(landmark, 'km'), 1.1340845774104864
        )
Beispiel #3
0
 def __load_geo_index(self):
     self.geo_index = GeoGridIndex()
     for lodging in self.data:
         # if self.debug: print 'loading geo for:' + lodging["name"]
         lat = float(lodging["lat"])
         lng = float(lodging["lng"])
         self.geo_index.add_point(GeoPoint(lat, lng, ref=lodging))
def build_geo_index_from_coord_index(coord_index, precision=5):
    geo_index = GeoGridIndex(precision=precision)

    for (lat, lng) in coord_index:
        geo_index.add_point(GeoPoint(lat, lng))

    return geo_index
def get_interactive_candidates(amdh, geo_index, radius=6, nsim_threshold=0.5):
    """
        Gets candidates for interactive matching for yet unmatched Amadeus hotels
    """
    cands_by_hotel = defaultdict(list)

    for i, h in amdh.iterrows():
        center_point = GeoPoint(h['lat'], h['lng'], ref=h)
        try:
            cands = list(
                geo_index.get_nearest_points(center_point, radius, 'km'))
        except Exception:
            continue

        nsims = [
            get_name_sim(hb.ref['name'], h['name'], swap_words=True)
            for (hb, d) in cands
        ]
        inds = [
            ind for ind in range(len(nsims)) if nsims[ind] > nsim_threshold
        ]
        inds = sorted(inds, key=lambda i: -nsims[i])

        for ind in inds:
            hb, d = cands[ind]
            cands_by_hotel[h['amd_id']].append(
                (hb.ref['bkg_id'], nsims[ind], d))

    return cands_by_hotel
 def test_bounds(self):
     glen = lambda x: len(list(x))
     # ezv block
     point1 = GeoPoint(43.59375, -4.21875)  # ezv
     point2 = GeoPoint(43.59375, -4.218750001)  # ezu
     point3 = GeoPoint(43.59375, -2.812500001)  # ezv
     point4 = GeoPoint(43.59375, -2.8125)  # ezy
     point5 = GeoPoint(43.59375, (-4.21875 + -2.8125) / 2)
     points = [point1, point2, point3, point4, point5]
     index = GeoGridIndex(precision=3)
     # import ipdb; ipdb.set_trace()
     map(index.add_point, points)
     self.assertEquals(glen(index.get_nearest_points(point1, 57)), 3)
     self.assertEquals(glen(index.get_nearest_points(point2, 57)), 3)
     self.assertEquals(glen(index.get_nearest_points(point3, 57)), 3)
     self.assertEquals(glen(index.get_nearest_points(point4, 57)), 3)
     self.assertEquals(glen(index.get_nearest_points(point5, 57)), 5)
Beispiel #7
0
def match_stops_to_nodes(gtfs, walk_network):
    """
    Parameters
    ----------
    gtfs : a GTFS object
    walk_network : networkx.Graph

    Returns
    -------
    stop_I_to_node: dict
        maps stop_I to closest walk_network node
    stop_I_to_dist: dict
        maps stop_I to the distance to the closest walk_network node
    """
    network_nodes = walk_network.nodes(data="true")

    stop_Is = set(gtfs.get_straight_line_transfer_distances()['from_stop_I'])
    stops_df = gtfs.stops()

    geo_index = GeoGridIndex(precision=6)
    for net_node, data in network_nodes:
        geo_index.add_point(GeoPoint(data['lat'], data['lon'], ref=net_node))
    stop_I_to_node = {}
    stop_I_to_dist = {}
    for stop_I in stop_Is:
        stop_lat = float(stops_df[stops_df.stop_I == stop_I].lat)
        stop_lon = float(stops_df[stops_df.stop_I == stop_I].lon)
        geo_point = GeoPoint(stop_lat, stop_lon)
        min_dist = float('inf')
        min_dist_node = None
        search_distances_m = [0.100, 0.500]
        for search_distance_m in search_distances_m:
            for point, distance in geo_index.get_nearest_points(
                    geo_point, search_distance_m, "km"):
                if distance < min_dist:
                    min_dist = distance * 1000
                    min_dist_node = point.ref
            if min_dist_node is not None:
                break
        if min_dist_node is None:
            warn("No OSM node found for stop: " +
                 str(stops_df[stops_df.stop_I == stop_I]))
        stop_I_to_node[stop_I] = min_dist_node
        stop_I_to_dist[stop_I] = min_dist
    return stop_I_to_node, stop_I_to_dist
Beispiel #8
0
    def geo_search(self, lat, lng, range):
        center_point = GeoPoint(lat, lng)
        lodgings = []

        for geo_point, distance in self.geo_index.get_nearest_points(
                center_point, range, 'km'):
            # if self.debug: print("We found {0} in {1} km".format(geo_point.ref["name"], distance))
            lodgings.append(geo_point.ref)
        return lodgings
def get_nearest_point(geo_index, lat, lng, r):
    nearest_points_dists = list(
        geo_index.get_nearest_points(GeoPoint(lat, lng), r / 1000))
    if nearest_points_dists:
        nearest_point, dist = min(nearest_points_dists, key=lambda x: x[1])
        dist *= 1000
        if dist <= r:
            return nearest_point.ref, dist
    return None, None
def build_geo_index_from_point_index(index, precision=5):
    geo_index = GeoGridIndex(precision=precision)

    for id_, point_info in index.iteritems():
        lat, lon = point_info.get('latitude',
                                  point_info.get('lat')), point_info.get(
                                      'longitude', point_info.get('lon'))
        geo_index.add_point(GeoPoint(lat, lon, point_info['id']))

    return geo_index
Beispiel #11
0
def calculate_population_within_x_km(row, index, km):
    center_point = GeoPoint(row['#lat_deg'], row['#lon_deg'])
    total_population = 0
    try:
        for point, distance in index.get_nearest_points(
                center_point, km, 'km'):
            total_population += point.ref
    except:
        print("Invalid data - Record skipped")
    #print("Total population within", km, "kilometers:", int(total_population))
    return total_population
def generate_comparison_vectors(amdh, geo_index, radius=6, nsim_threshold=0.5):
    """
        Creation of comparison vectors for training a matching classifier

        amdh: data frame containing Amadeus Hotels
        geo_index: index of Booking hotels, easy to query by geo radius
        radius: distance to fetch candidates from
        nsim_threshold: minimum name similarity to be considered a candidate
    """
    cand_pair_ids = []
    features = []

    count = 0
    for i, h in amdh.iterrows():
        count += 1
        if count % 500 == 0:
            progress = count * 100.0 / len(amdh)
            print("%.2f %%" % progress)

        center_point = GeoPoint(h['lat'], h['lng'], ref=h)
        try:
            cands = list(
                geo_index.get_nearest_points(center_point, radius, 'km'))
        except Exception:
            continue

        nsims1 = [
            get_name_sim(hb.ref['name'], h['name'], swap_words=False)
            for (hb, d) in cands
        ]
        nsims2 = [
            get_name_sim(hb.ref['name'], h['name'], swap_words=True)
            for (hb, d) in cands
        ]
        inds = [
            ind for ind in range(len(nsims2)) if nsims2[ind] > nsim_threshold
        ]

        for ind in inds:
            hb, d = cands[ind]
            cand_pair_ids.append((h['amd_id'], hb.ref['bkg_id']))
            features.append((nsims1[ind], nsims2[ind], d))

    cand_pairs = pd.MultiIndex.from_tuples(cand_pair_ids,
                                           names=['amd_id', 'bkg_id'])
    cand_data = pd.DataFrame(features,
                             index=cand_pairs,
                             columns=['nsim1', 'nsim2', 'dist'])

    return cand_data
Beispiel #13
0
    def test_point(self):
        point = GeoPoint(*TEST_POINTS[0])
        self.assertEqual(point.latitude, TEST_POINTS[0][0])
        self.assertEqual(point.longitude, TEST_POINTS[0][1])
        self.assertIsNone(point._rad_latitude)
        self.assertIsNone(point._rad_longitude)

        self.assertEqual(point.rad_latitude, math.radians(TEST_POINTS[0][0]))
        self.assertEqual(point.rad_longitude, math.radians(TEST_POINTS[0][1]))
        self.assertIsNotNone(point._rad_latitude)
        self.assertIsNotNone(point._rad_longitude)
        self.assertEqual(point.rad_latitude, point._rad_latitude)
        self.assertEqual(point.rad_longitude, point._rad_longitude)

        same = GeoPoint(TEST_POINTS[0][0], TEST_POINTS[0][1])
        self.assertEqual(point, same)
        self.assertTrue(point == same)

        other = GeoPoint(TEST_POINTS[1][0], TEST_POINTS[1][1])
        self.assertNotEqual(point, other)
        self.assertFalse(point == other)

        self.assertNotEqual(point, TEST_POINTS[0])
        self.assertFalse(point == TEST_POINTS[0])
Beispiel #14
0
def geocoder(lat, lon, rad=.5):
    taxi_point = GeoPoint(lat, lon)
    # iterate throug the nearest tracts to the point
    for point, distance in index.get_nearest_points(taxi_point, rad,
                                                    unit='km'):
        for feature in js['features']:
            properties = feature.get('properties')
            BoroCT2010 = properties.get('BoroCT2010')
            # check if the point belongs to one of the nearest tracts to it
            if point.ref == BoroCT2010:
                geometry = feature.get('geometry')
                polygon = shape(geometry)
                if polygon.contains(Point(lon, lat)):
                    return BoroCT2010
    # if no tract is found, return an invalid string
    return "notfound"
Beispiel #15
0
	def load_index(self, input=None):
		"""
		Load all of the geolocated cemetery towers into memory,
		inside of our geo_index variable
		"""
		print 'Loading locations of interest into internal spatial index.'
		input_counter = 0
		sf = shapefile.Reader('/Users/chrisholloway/Downloads/virginia-latest-free.shp/gis.osm.pois_a_free_1.shp')
		shaperec = sf.shapeRecords()
		university_hash=[]
		for rec in range(len(shaperec)):
			if 'university' in shaperec[rec].record:
				lat = shaperec[rec].shape.points[0][1]
				lon = shaperec[rec].shape.points[0][0]
				self.geo_index.add_point(GeoPoint(lat,lon))
				input_counter +=1
		print 'Done loading index of university (added %s values)' %(input_counter)
	def load_index(self, input=None):
		"""
		Load all of the geolocated cemetery towers into memory,
		inside of our geo_index variable
		"""
		print 'Loading locations of interest into internal spatial index.'
		input_counter = 0
		for line in open(input,'rU'):
			line = line.strip()
			parts = line.split('\t')
			#print parts
			#if len(parts) < 20 or len(parts)<2:
			#	continue
			admin1, lat, lon, tag = parts[10], float(parts[4]), float(parts[5]), parts[7]
			if admin1 == 'VA' and tag == 'CMTY':
				print lat
				self.geo_index.add_point(GeoPoint(lat,lon))
				input_counter +=1
		print 'Done loading index of cemetery (added %s values)' %(input_counter)
Beispiel #17
0
def calc_transfers(conn, threshold_meters=1000):
    geohash_precision = _get_geo_hash_precision(threshold_meters / 1000.)
    geo_index = GeoGridIndex(precision=geohash_precision)
    g = GTFS(conn)
    stops = g.get_table("stops")
    stop_geopoints = []
    cursor = conn.cursor()

    for stop in stops.itertuples():
        stop_geopoint = GeoPoint(stop.lat, stop.lon, ref=stop.stop_I)
        geo_index.add_point(stop_geopoint)
        stop_geopoints.append(stop_geopoint)
    for stop_geopoint in stop_geopoints:
        nearby_stop_geopoints = geo_index.get_nearest_points_dirty(
            stop_geopoint, threshold_meters / 1000.0, "km")
        from_stop_I = int(stop_geopoint.ref)
        from_lat = stop_geopoint.latitude
        from_lon = stop_geopoint.longitude

        to_stop_Is = []
        distances = []
        for nearby_stop_geopoint in nearby_stop_geopoints:
            to_stop_I = int(nearby_stop_geopoint.ref)
            if to_stop_I == from_stop_I:
                continue
            to_lat = nearby_stop_geopoint.latitude
            to_lon = nearby_stop_geopoint.longitude
            distance = math.ceil(
                wgs84_distance(from_lat, from_lon, to_lat, to_lon))
            if distance <= threshold_meters:
                to_stop_Is.append(to_stop_I)
                distances.append(distance)

        n_pairs = len(to_stop_Is)
        from_stop_Is = [from_stop_I] * n_pairs
        cursor.executemany(
            'INSERT OR REPLACE INTO stop_distances VALUES (?, ?, ?, ?, ?, ?);',
            zip(from_stop_Is, to_stop_Is, distances, [None] * n_pairs,
                [None] * n_pairs, [None] * n_pairs))
        cursor.execute(
            'CREATE INDEX IF NOT EXISTS idx_sd_fsid ON stop_distances (from_stop_I);'
        )
Beispiel #18
0
    def enumerate_all_distances(self, admin1=None):
        """
		Walk the geohash5 centroids,
		calculate the distance to the nearest tower for each one,
		and write the distance value to the database.
		"""
        #Walk the geohash5 centroids,
        c = self.cursor
        c.execute(
            'SELECT geohash, centroid_lat, centroid_lon from boxes where admin1=?',
            (admin1, ))
        geohashes_plus_coords = []
        for row in c.fetchall():
            geo5_item, lat, lon = row
            geohashes_plus_coords.append([geo5_item, lat, lon])
            #print geo5_item
        #print 'Those are the geohashes'
        progress_counter = 0
        for geo5, lat, lon in geohashes_plus_coords:
            progress_counter += 1
            if progress_counter % 50 == 0:
                print 'Processed %s records.' % (progress_counter)
            if self.debug == True:
                print '--------'
                print 'geohash of interest:', geo5, lat, lon
            #calculate the distance to the nearest tower for each one,
            temp_geo_point = GeoPoint(lat, lon)
            values = self.geo_index.get_nearest_points(temp_geo_point, 50.0,
                                                       'km')
            #print values
            minimum_distance = MINIMUM_DISTANCE
            for value in values:
                the_point, the_distance = value
                if the_distance < minimum_distance:
                    minimum_distance = the_distance

            #and write the distance value to the database.
            c.execute(
                'UPDATE boxes set bus_station_distance=? where geohash=?',
                (minimum_distance, geo5))
        self.conn.commit()
        print 'Finished updating distance from geo5 centroids to input data'
Beispiel #19
0
    def test_geo_point_distance(self):
        location_x = GeoPoint(*TEST_POINTS[0])
        location_y = GeoPoint(*TEST_POINTS[1])

        self.assertAlmostEqual(
            location_x.distance_to(location_y, 'mi'), .7046874859635269
        )
        self.assertAlmostEqual(
            location_x.distance_to(location_y, 'km'), 1.1340845774104864
        )  
        self.assertAlmostEqual(
            location_y.distance_to(location_x, 'mi'), .7046874859635269
        )
        self.assertAlmostEqual(
            location_y.distance_to(location_x, 'km'), 1.1340845774104864
        )
def initialize_matching(overwrite=False):
    # 1. fetch all unmatched Amadeus hotels
    print("Loading Amadeus hotels")
    amdh = load_amadeus_from_db()
    namdh = len(amdh)
    print("Loaded %d hotels" % namdh)

    # 2. fetch all unmatched Booking.com hotels
    print("Loading Booking hotels")
    bkgh = load_booking()
    # bkgh = load_booking_from_mysql()
    print("Loaded %d hotels" % len(bkgh))

    # 3. load existing matches
    if not overwrite:
        print("Loading previous matches")
        matches = load_matches()
    else:
        matches = {}

    matched_amdids = matches.keys()
    matched_bkgids = set(matches.values())

    # 4. Exclude already matched
    amdh = amdh[~amdh.amd_id.isin(matched_amdids)]
    bkgh = bkgh[~bkgh.bkg_id.isin(matched_bkgids)]
    print "%d Amadeus hotels left to match to %d Booking hotels" % (len(amdh),
                                                                    len(bkgh))

    # 5. Build geo index
    print("Building Geo Index")
    geo_index = GeoGridIndex()
    for i, hb in bkgh.iterrows():
        if hb['lat'] == 90:
            hb['lat'] = -90.0
        geo_index.add_point(GeoPoint(hb['lat'], hb['lng'], ref=hb))

    return amdh, bkgh, matches, geo_index, namdh
Beispiel #21
0
if 'sid' in form:
    c1, c2 = (-31.36023, -64.26264)
    z = 13
    banda = '025'
    lat = 0
    prop = int(form['sid'].value)
    geo_index = GeoGridIndex()
    cursor.execute(
        "select origen, destino, frecuencia, calidad from link where propietario='%s'"
        % (prop))
    datapoint = cursor.fetchall()

    for p in datapoint:
        lat, lon = geohash.decode(p[0])
        geo_index.add_point(GeoPoint(lat, lon))
        lat, lon = geohash.decode(p[1])
        geo_index.add_point(GeoPoint(lat, lon))

    cursor.execute(
        "select point, name, ip, numeroenlaces from nodo where propietario='%s'"
        % (prop))
    datanodo = cursor.fetchall()
    numero = []
    nodos = {}

    for p in datanodo:
        nodos[p[0]] = p[3]
        numero.append(p[2])

    if 'center' in form:
def get_nearest_points(geo_index, lat, lng, r):
    for point, dist in geo_index.get_nearest_points(GeoPoint(lat, lng),
                                                    r / 1000):
        dist *= 1000
        if dist <= r:
            yield point.ref, dist
def match_in_neighborhood(amdh,
                          geo_index,
                          radius,
                          nsim_threshold,
                          matches,
                          namdh,
                          save=True,
                          unique=False,
                          swap_words=False,
                          return_cands=True):
    count = 0
    amdh = amdh[~amdh.amd_id.isin(matches.keys())]

    if return_cands:
        cands_by_hotel = {}
    for _, h in amdh.iterrows():
        count += 1
        if count % 1000 == 0:
            progress = count * 100.0 / len(amdh)
            print("%.2f %%" % progress)

        center_point = GeoPoint(h['lat'], h['lng'], ref=h)
        try:
            cands = list(
                geo_index.get_nearest_points(center_point, radius, 'km'))
        except Exception:
            continue

        cands = [hb.ref for (hb, d) in cands]
        cands = [hb for hb in cands if hb['bkg_id'] not in matches.values()]
        if not cands:
            continue

        nsims = [
            get_name_sim(hb['name'], h['name'], swap_words) for hb in cands
        ]
        inds = [
            i for i in reversed(np.argsort(nsims)) if nsims[i] > nsim_threshold
        ]

        if inds and (not unique or len(inds) == 1):
            best_ind = inds[0]
            hb = cands[best_ind]
            nsim = nsims[best_ind]
            matches[h['amd_id']] = hb['bkg_id']
            continue

        if return_cands and len(inds) > 1:
            candsh = []
            for i in inds:
                ns = nsims[i]
                candsh.append({'candidate': cands[i], 'name_sim': ns})
            cands_by_hotel[h['amd_id']] = candsh

    perc_matched = len(matches) * 100.0 / namdh
    print("%.1f%% matched" % perc_matched)

    # Save Matches
    if save:
        with open('matches_excel.json', 'w') as f:
            json.dump(matches, f)

    if return_cands:
        return cands_by_hotel
Beispiel #24
0
array([[0.        , 1.        ],
       [0.        , 1.        ],
       [0.        , 1.41421356],
       [0.        , 1.        ],
       [0.        , 1.        ],
       [0.        , 1.41421356]])


from geoindex import GeoGridIndex, GeoPoint
import random
index = GeoGridIndex()

for _ in range(10000):
    lat = random.random()*180 - 90
    lng = random.random()*360 - 180
    index.add_point(GeoPoint(lat, lng))




center_point = GeoPoint(37.7772448, -122.3955118)
for distance, point in index.get_nearest_points(center_point, 10, 'km'):
    print("We found {0} in {1} km".format(point, distance))



#index = GeoGridIndex()
for airport in get_all_airports():
    index.add_point(GeoPoint(lat, lng, ref=airport))

center_point = GeoPoint(37.7772448, -122.3955118)
def match_in_neighborhood(amdh,
                          geo_index,
                          radius,
                          nsim_threshold,
                          matches,
                          namdh,
                          save=True,
                          unique=False,
                          swap_words=False,
                          return_cands=True):
    count = 0
    amdh = amdh[~amdh.lvr_id.isin(matches.keys())]

    if return_cands:
        candidates = {}
    for _, h in amdh.iterrows():
        count += 1
        if count % 1000 == 0:
            progress = count * 100.0 / len(amdh)
            print("%.2f %%" % progress)

        center_point = GeoPoint(h['lat'], h['lng'], ref=h)
        try:
            cands = list(
                geo_index.get_nearest_points(center_point, radius, 'km'))
        except Exception:
            continue

        cands = [hb.ref for (hb, d) in cands]
        cands = []
        for (hb, d) in cands:
            hb_ = hb.ref
            hb_['dist'] = d
            cands.append(hb)

        cands = [hb for hb in cands if hb['bkg_id'] not in matches.values()]
        if not cands:
            continue

        sw = extract_stopwords([hb['name'] for hb in cands])

        nsims_plain = [
            get_name_sim(hb['name'], h['name'], False, sw) for hb in cands
        ]
        if swap_words:
            nsims_swap = [
                get_name_sim(hb['name'], h['name'], True, sw) for hb in cands
            ]
            nsims = nsims_swap
        else:
            nsims = nsims_plain

        inds = [
            i for i in reversed(np.argsort(nsims)) if nsims[i] > nsim_threshold
        ]

        if inds and (not unique or len(inds) == 1) and (not return_cands):
            best_ind = inds[0]
            hb = cands[best_ind]
            nsim = nsims[best_ind]
            matches[h['lvr_id']] = hb['bkg_id']

        if return_cands:
            candsh = []
            for i in inds:
                ns = nsims[i]
                hb = cands[i]
                cand = {
                    # 'candidate': cands[i],
                    # 'name_sim': ns
                    'lvr_id': h['lvr_id'],
                    'bkg_id': hb['bkg_id'],
                    'name': h['name'],
                    'chain': h['chain'],
                    'name_bkg': hb['name'],
                    'chain_bkg': hb['chain'],
                    'name_sim': nsims_plain[i],
                    'name_sim_sw': nsims_swap[i],
                    'dist': hb['d']
                }
                candsh.append(cand)
            candidates[h['lvr_id']] = candsh

    perc_matched = len(matches) * 100.0 / namdh
    print("%.1f%% matched" % perc_matched)

    # Save Matches
    if save:
        with open('matches.json', 'w') as f:
            json.dump(matches, f)

    if return_cands:
        return candidates
        skiprows=1)
    amdh = convert_amd_df_to_matching_format(amdh_full)
    namdh = len(amdh)
    print("Loaded %d hotels" % namdh)

    print("Loading Booking hotels")
    bkgh = load_booking()
    # bkgh = load_booking_from_mysql()
    print("Loaded %d hotels" % len(bkgh))

    print("Building Geo Index")
    geo_index = GeoGridIndex()
    for i, hb in bkgh.iterrows():
        if hb['lat'] == 90:
            hb['lat'] = -90.0
        geo_index.add_point(GeoPoint(hb['lat'], hb['lng'], ref=hb))

    matches = {}

    print("1st pass")
    match_in_neighborhood(amdh, geo_index, 1, 0.6, matches, namdh)

    print("2nd pass")
    match_in_neighborhood(amdh, geo_index, 2, 0.75, matches, namdh)

    print("3rd pass")
    match_in_neighborhood(amdh, geo_index, 4, 0.8, matches, namdh)

    print("4th pass")
    match_in_neighborhood(amdh, geo_index, 6, 0.86, matches, namdh)
def extract_candidates(amdh, geo_index, radius, nsim_threshold, namdh):
    count = 0

    candidates = {}
    for _, h in amdh.iterrows():
        count += 1
        if count % 1000 == 0:
            progress = count * 100.0 / len(amdh)
            print("%.2f %%" % progress)

        center_point = GeoPoint(h['lat'], h['lng'], ref=h)
        try:
            geo_cands = list(
                geo_index.get_nearest_points(center_point, radius, 'km'))
        except Exception:
            continue

        cands = []
        for (hb, d) in geo_cands:
            hbd = hb.ref
            hbd['dist'] = d
            cands.append(hbd)

        if not cands:
            continue

        sw = extract_stopwords([hb['name'] for hb in cands])

        nsims_plain = [
            get_name_sim(hb['name'], h['name'], False, sw) for hb in cands
        ]
        nsims_swap = [
            get_name_sim(hb['name'], h['name'], True, sw) for hb in cands
        ]

        nsims = nsims_swap
        inds = [
            i for i in reversed(np.argsort(nsims)) if nsims[i] > nsim_threshold
        ]

        candsh = []
        for i in inds:
            ns = nsims[i]
            hb = cands[i]
            cand = {
                # 'candidate': cands[i],
                # 'name_sim': ns
                'lvr_id': h['lvr_id'],
                'bkg_id': hb['bkg_id'],
                'name': h['name'],
                'chain': h['chain'],
                'name_bkg': hb['name'],
                'chain_bkg': hb['chain'],
                'name_sim': nsims_plain[i],
                'name_sim_sw': nsims_swap[i],
                'dist': hb['dist']
            }
            candsh.append(cand)
        candidates[h['lvr_id']] = candsh

    return candidates
Beispiel #28
0
 def __init__(self, lat, lon, radius, unit='km'):
     GeoGridIndex.__init__(self, precision=3)
     self.center_point = GeoPoint(latitude=float(lat), longitude=float(lon))
     self.radius = radius
     self.unit = unit
     self._config = None
Beispiel #29
0
myfile = opener.open(myurl)
js = json.load(myfile)

# build index of tract representative points
index = GeoGridIndex()
for feature in js['features']:
    # get feature properties and unique tract identifier
    properties = feature.get('properties')
    BoroCT2010 = properties.get('BoroCT2010')
    # geometry of tract
    geometry = feature.get('geometry')
    polygon = shape(geometry)
    # get a representative point from each tract
    lon, lat = polygon.representative_point().coords[0]
    # add representative point to index
    index.add_point(GeoPoint(lat, lon, ref=BoroCT2010))


# function returning the Census tract of a point
def geocoder(lat, lon, rad=.5):
    taxi_point = GeoPoint(lat, lon)
    # iterate throug the nearest tracts to the point
    for point, distance in index.get_nearest_points(taxi_point, rad,
                                                    unit='km'):
        for feature in js['features']:
            properties = feature.get('properties')
            BoroCT2010 = properties.get('BoroCT2010')
            # check if the point belongs to one of the nearest tracts to it
            if point.ref == BoroCT2010:
                geometry = feature.get('geometry')
                polygon = shape(geometry)
class TestIndexAccurate(TestCase):
    point_1bluxome = GeoPoint(37.7772448, -122.3955118)
    point_market_street = GeoPoint(37.785275, -122.4062836)
    point_oakland = GeoPoint(37.7919585, -122.2287941)
    point_walnut_creek = GeoPoint(37.8942235, -122.040223)
    point_freemont = GeoPoint(37.5293865, -121.9992648)
    point_la = GeoPoint(34.0204989, -118.4117325)
    points = [
        point_1bluxome, point_market_street, point_oakland, point_walnut_creek,
        point_freemont, point_la
    ]

    def test_bounds(self):
        glen = lambda x: len(list(x))
        # ezv block
        point1 = GeoPoint(43.59375, -4.21875)  # ezv
        point2 = GeoPoint(43.59375, -4.218750001)  # ezu
        point3 = GeoPoint(43.59375, -2.812500001)  # ezv
        point4 = GeoPoint(43.59375, -2.8125)  # ezy
        point5 = GeoPoint(43.59375, (-4.21875 + -2.8125) / 2)
        points = [point1, point2, point3, point4, point5]
        index = GeoGridIndex(precision=3)
        # import ipdb; ipdb.set_trace()
        map(index.add_point, points)
        self.assertEquals(glen(index.get_nearest_points(point1, 57)), 3)
        self.assertEquals(glen(index.get_nearest_points(point2, 57)), 3)
        self.assertEquals(glen(index.get_nearest_points(point3, 57)), 3)
        self.assertEquals(glen(index.get_nearest_points(point4, 57)), 3)
        self.assertEquals(glen(index.get_nearest_points(point5, 57)), 5)

    def test_big_distance(self):
        index = GeoGridIndex(precision=2)
        map(index.add_point, self.points)
        ls = list(index.get_nearest_points(self.point_la, 600))
        self.assertEquals(len(ls), len(self.points))

    def test_simple_accurate(self, precision=3):
        glen = lambda x: len(list(x))
        index = GeoGridIndex(precision=precision)
        map(index.add_point, self.points)

        ls = index.get_nearest_points(self.point_1bluxome, 10)
        ls = list(ls)
        eq_(glen(ls), 2)
        points = map(itemgetter(0), ls)
        self.assertIn(self.point_1bluxome, points)
        self.assertIn(self.point_market_street, points)

        eq_(glen(index.get_nearest_points(self.point_1bluxome, 15)), 3)
        eq_(glen(index.get_nearest_points(self.point_1bluxome, 34)), 4)

    def test_distance_km(self, precision=3):
        index = GeoGridIndex(precision=precision)
        map(index.add_point, self.points)
        for pt, distance in index.get_nearest_points(self.point_1bluxome, 10):
            if pt == self.point_1bluxome:
                self.assertEquals(distance, 0)
            if pt == self.point_market_street:
                self.assertEquals(distance, 1.301272755220718)

    def test_distance_mi(self, precision=3):
        index = GeoGridIndex(precision=precision)
        map(index.add_point, self.points)
        for pt, distance in index.get_nearest_points(self.point_1bluxome, 10,
                                                     'mi'):

            if pt == self.point_1bluxome:
                self.assertEquals(distance, 0)
            if pt == self.point_market_street:
                self.assertEquals(distance, 0.808573403337458)

    def test_different_precision(self):
        for precision in [1, 2, 3]:
            self.test_simple_accurate(precision)

    def test_wrong_precision(self):
        index = GeoGridIndex(precision=4)
        self.assertRaisesRegexp(
            Exception, 'precision=2', lambda: list(
                index.get_nearest_points(self.point_market_street, 100)))
Beispiel #31
0
import dask.dataframe as dd
from dask.multiprocessing import get

#Load Swaziland Population File
swaz_pop = pd.read_csv("swaz_pop_data.csv")

#Load Water Data
water_data = pd.read_csv("Water_Point_Data_Exchange_Complete_Dataset.csv")

#Create df with only Swaziland water
swaz_water = water_data[water_data['#country_name'] == 'Swaziland']

#Create Geo Index of Swaziland Population Data
geo_index = GeoGridIndex()
for index, row in swaz_pop.iterrows():
    geo_index.add_point(GeoPoint(row['y'], row['x'], ref=row['value']))


#Calculate population with x distance of random well
def calculate_population_within_x_km(row, index, km):
    center_point = GeoPoint(row['#lat_deg'], row['#lon_deg'])
    total_population = 0
    try:
        for point, distance in index.get_nearest_points(
                center_point, km, 'km'):
            total_population += point.ref
    except:
        print("Invalid data - Record skipped")
    #print("Total population within", km, "kilometers:", int(total_population))
    return total_population