def test(): expected_lyon_paris = EXPECTED_LYON_PARIS[unit] expected_lyon_new_york = EXPECTED_LYON_NEW_YORK[unit] expected_paris_new_york = EXPECTED_PARIS_NEW_YORK[unit] assert haversine_vector(LYON, PARIS, unit=unit) == expected_lyon_paris assert isinstance(unit.value, str) assert haversine_vector(LYON, PARIS, unit=unit.value) == expected_lyon_paris
def makeDistMat(self, df): lon_array = df['longitude'].to_numpy() lat_array = df['latitude'].to_numpy() n = lon_array.size pointarray = np.concatenate([lat_array[:, None], lon_array[:, None]], axis=1) np.array(pointarray, dtype=np.float32) istack = np.repeat(pointarray, repeats=n, axis=0) jstack = np.repeat(np.expand_dims(pointarray, 0), axis=0, repeats=n).reshape([n**2, 2], order='C') distmat = haversine_vector(istack, jstack).reshape(n, n) #self.logger.info(f'lon_array[:5],lat_array[:5],distmat[0:5,0:5]:{[lon_array[:5],lat_array[:5],distmat[0:5,0:5]]}') '''for i in range(n): ilatlon=pointarray[i,:] ilatlonstack=np.repeat(ilatlon,n) distmat[i,:]= #(lat,lon) pointlist=[(lon_array[i],lat_array[i]) for i in range(n)] distmat=np.empty([n,n],dtype=np.float32) # for i0 in range(n): for i1 in range(i0+1,n): distance=geopy.distance.great_circle(pointlist[i0],pointlist[i1]).meters #.geodesic is more accurate but slower distmat[i0,i1]=distance#not taking advantage of symmetry of distance for storage, assuming ample ram distmat[i1,i0]=distance''' self.distmat = distmat return distmat
def test_haversine_vector_comb(): expected = [[392.21725956, 343.37455271], [6163.43638211, 5586.48447423]] assert_allclose( # See https://numpy.org/doc/stable/reference/generated/numpy.testing.assert_allclose.html#numpy.testing.assert_allclose haversine_vector([LYON, LONDON], [PARIS, NEW_YORK], Unit.KILOMETERS, comb=True), expected)
def get_closest_sat( con, coord: Tuple[float, float], datetime: pd.Timestamp = pd.Timestamp.now() ) -> Tuple[str, float]: """ :con: SQL connection :coord: coordinates in (lat,long) format :datetime: time when you are looking for the satellite returns None if no satellites found or id of the sat """ df = pd.read_sql( f"SELECT * FROM space WHERE spaceTrack == {datetime.timestamp()}", con=con, index_col="spaceTrack", parse_dates="spaceTrack", ) sats = df.loc[datetime].set_index("sat_id") if not len(sats): raise KeyError("No information available for specified datetime") sats = sats[["latitude", "longitude"]] # reorder cols dists = pd.Series(haversine_vector([coord] * len(sats), sats.values), index=sats.index) return dists.idxmin(), dists.min()
def relevant_departure_node_id(G, search_request): relevant_nodes_id = [] source_coor = [search_request.departure_lat, search_request.departure_lon] earliest_departure = search_request.earliest_departure node_label_list = [u[0] for u in G.nodes(data=True)] node_type_list = [u[1]['node_type'] for u in G.nodes(data=True)] depature_time_list = [u[1]['time'] for u in G.nodes(data=True)] sink_array = np.array([[v[1]['stop_lat'], v[1]['stop_lon']] for v in G.nodes(data=True)]) distance_array = haversine_vector(source_coor, sink_array) valid_indexes = np.where( distance_array <= config.max_walking_distance)[0].tolist() for t in valid_indexes: if node_type_list[t] == "departure": walking_time_in_seconds = (distance_array[t] / config.walking_speed) * 3600 if earliest_departure + timedelta(seconds=walking_time_in_seconds) \ <= depature_time_list[t] and \ (depature_time_list[t]-earliest_departure).seconds <= \ config.max_time_from_earliest_departure: relevant_nodes_id.append(node_label_list[t]) return relevant_nodes_id
def compute_geo_distance(true_locations: np.ndarray, predicted_locations: np.ndarray): """ :param true_locations: list (np.ndarray) of true coordinates as (lon, lat) :param predicted_locations: list (np.ndarray) of predicted coordinates as (lon, lat) :return: np.ndarray of distances between true locations and predicted in kilometers """ return haversine_vector(true_locations[:, [1, 0]], predicted_locations[:, [1, 0]], Unit.KILOMETERS)
def process_one_user(df_one_user): df_one_user = df_one_user.sort_values("timestamp") df_one_user["dist"] = 0 geopositions = df_one_user[["long", "lat"]].values df_one_user.iloc[1:, df_one_user.columns.get_loc("dist")] = haversine_vector( geopositions[:-1], geopositions[1:], ) return df_one_user
def create_vectorized_haversine_li( origin: Tuple[float], dest_lons: List[float], dest_lats: List[float], unit: str = "mi", ): origin_lat = [origin[0]] * len(dest_lats) origin_lon = [origin[1]] * len(dest_lons) origin = list(zip(origin_lat, origin_lon)) dest = list(zip(dest_lats, dest_lons)) d = haversine_vector(origin, dest, unit=Unit.MILES) return d
def add_distance(network: SpatioTemporalNetwork) -> SpatioTemporalNetwork: """Add distance in km between area centroids.""" centroid = network.nodes.centroid centroid_long = centroid.x centroid_long.name = 'long' centroid_lat = centroid.y centroid_lat.name = 'lat' centroids = pd.concat([centroid_long, centroid_lat], axis=1) centroid_from = network.edges.join(centroids, on=network._origin).rename(columns={'long': 'long_from', 'lat': 'lat_from'}) centroid_all = centroid_from.join(centroids, on=network._destination).rename(columns={'long': 'long_to', 'lat': 'lat_to'}) from_points = list(zip(centroid_all.lat_from, centroid_all.long_from)) to_points = list(zip(centroid_all.lat_to, centroid_all.long_to)) centroid_all['distance'] = haversine_vector(from_points, to_points, Unit.KILOMETERS) centroid_all.drop(['long_from', 'lat_from', 'long_to', 'lat_to'], axis=1, inplace=True) return SpatioTemporalNetwork(nodes=network.nodes, edges=centroid_all)
def distance(gauged: pd.DataFrame, ungauged: pd.Series): """Return geographic distance [km] between ungauged and database of gauged catchments. Parameters ---------- gauged : pd.DataFrame Table containing columns for longitude and latitude of catchment's centroid. ungauged : pd.Series Coordinates of the ungauged catchment. """ gauged_array = np.array(list(zip(gauged.latitude.values, gauged.longitude.values))) return pd.Series( data=haversine_vector( gauged_array, np.array([ungauged.latitude, ungauged.longitude]), comb=True )[0], index=gauged.index, )
def create_vectorized_haversine_li( olat: float, olon: float, dlats: List[float], dlons: List[float], dist_factor: float = 1.17, ) -> List[float]: assert len(dlats) == len(dlons) olats: List[float] = [olat] * len(dlats) olons: List[float] = [olon] * len(dlons) os: List[Tuple[float, float]] = list(zip(olats, olons)) ds: List[Tuple[float, float]] = list(zip(dlats, dlons)) ds: List[float] = haversine_vector(os, ds, unit=Unit.MILES) # distance factor adjust haversine for theoretical travel difference ds *= dist_factor return ds
def haversine_worker(inqueue: mp.Queue, readarray: mp.RawArray, readarrayshape: tuple, readarraydtype: type, writearray: mp.RawArray, writearraydtype: type) -> None: """ Worker that takes messages from a queue to compute the jaccard distance between sample i and a set of other samples in the reading array Reshapes the reading array once if it is a shared ctype, otherwise it is on disk or copied into the memory of each worker The computation reduces the reading array along the zero-th dimension it writes the resulting distances to non-overlapping parts of the shared triangular array. """ DIST_np = np.frombuffer(writearray, dtype = writearraydtype) logging.info('this process has given itself access to a shared writing array') if not isinstance(readarray, (np.ndarray, np.memmap)): # Make the thing numpy accesible in this process readarray_np = np.frombuffer(readarray, dtype = readarraydtype).reshape(readarrayshape) logging.info('this process has given itself access to a shared reading array') else: readarray_np = readarray while True: i, compare_samples, distmat_indices = inqueue.get() if i == 'STOP': logging.info('this process is shutting down, after STOP') break else: DIST_np[distmat_indices] = haversine_vector(array1 = readarray_np[:,i], array2 = readarray_np[:,compare_samples].T)
def relevant_arrival_node_id(G, search_request, departure_nodes): relevant_nodes_id = [] request_flying_distance = haversine( (search_request.departure_lat, search_request.departure_lon), (search_request.arrival_lat, search_request.arrival_lon)) request_flying_time = (request_flying_distance / config.max_possible_speed) * 3600 source_coor = [search_request.arrival_lat, search_request.arrival_lon] earliest_departure = search_request.earliest_departure node_label_list = [u[0] for u in G.nodes(data=True)] node_type_list = [u[1]['node_type'] for u in G.nodes(data=True)] arrival_time_list = [u[1]['time'] for u in G.nodes(data=True)] sink_array = np.array([[v[1]['stop_lat'], v[1]['stop_lon']] for v in G.nodes(data=True)]) distance_array = haversine_vector(source_coor, sink_array) valid_indexes = np.where( distance_array <= config.max_walking_distance)[0].tolist() for t in valid_indexes: if node_type_list[t] == "arrival" and \ (arrival_time_list[t] >= earliest_departure + timedelta(seconds=request_flying_time)) \ and (arrival_time_list[t] <= earliest_departure + timedelta( seconds=config.max_arrival_time_from_earliest_departure)): relevant_nodes_id.append(node_label_list[t]) return relevant_nodes_id
def create_transfer_edges(G): source_coordinates = [[u[1]['stop_lat'], u[1]['stop_lon']] for u in G.nodes(data=True)] sink_coordinates = [[v[1]['stop_lat'], v[1]['stop_lon']] for v in G.nodes(data=True)] node_label_list = [u[0] for u in G.nodes(data=True)] node_type_list = [u[1]['node_type'] for u in G.nodes(data=True)] node_trip_id_list = [u[1]['trip_id'] for u in G.nodes(data=True)] arrival_time_list = [u[1]['time'] for u in G.nodes(data=True)] depature_time_list = [u[1]['time'] for u in G.nodes(data=True)] source_array = np.array(source_coordinates) sink_array = np.array(sink_coordinates) for index, source_coor in enumerate(source_array): distance_array = haversine_vector(source_coor, sink_array) valid_indexes = np.where(distance_array <= config.max_walking_distance)[0].tolist() for t in valid_indexes: if node_type_list[index] == "arrival" \ and node_type_list[t] == "departure" \ and node_trip_id_list[index] != node_trip_id_list[t]: travel_time_in_seconds = ( distance_array[t]/config.walking_speed)*3600 total_transfer_time_in_second = ( depature_time_list[t] - arrival_time_list[index]).seconds if arrival_time_list[index] + \ timedelta(seconds=travel_time_in_seconds) \ <= depature_time_list[t] and \ node_label_list[index] != node_label_list[t]: G.add_edge(node_label_list[index], node_label_list[t], res_cost=np.array( [total_transfer_time_in_second]), weight=0, transfer=1) return G
def add_dist_features(df): """Adds distance features based on lat/long to df: n_win_30/60/90: Number of facilities within 30/60/90 miles nearest: distance in miles to nearest facility (note latlongs based on city/county so this is not exact and has some noise)""" # N facilities in 30, 60, 90 mile radius and min dist latlongs = df.loc[~df.lat_long.isna(), ['ccn', 'lat_long'] ] # Get distance matrix in miles for all facilities dist_matrix = np.array([haversine_vector([i] * len(latlongs['lat_long']), list(latlongs['lat_long']), Unit.MILES) for i in list(latlongs['lat_long'])]) # Fill diagonal with huge number, then get n within m distance np.fill_diagonal(dist_matrix, 10000) # within 30/60/90 miles n_win_30 = np.array(dist_matrix < 30).sum(axis=1) n_win_60 = np.array(dist_matrix < 60).sum(axis=1) n_win_90 = np.array(dist_matrix < 90).sum(axis=1) # Fill diagonal with missing, then get nearest facility distance np.fill_diagonal(dist_matrix, np.nan) nearest = np.nanmin(dist_matrix, axis=1) # Assign to latlong df latlongs['n_win_30'] = n_win_30 latlongs['n_win_60'] = n_win_60 latlongs['n_win_90'] = n_win_90 latlongs['nearest'] = nearest # Join df and latlongs df = df.join(latlongs.drop(columns='lat_long').set_index('ccn'), on='ccn') return df
def main(file1, file2, location1, location2, dist): osm_data = pd.read_json(file1) # chain restaurant data using qid chain_qids = pd.read_json(file2) # merge osm_data and chain_qid to identify chain restaurants in osm_data = osm_data.merge(chain_qids, how='left', on='qid') osm_data['is_chain_restaurant'] = osm_data.is_chain_restaurant.fillna(0) # filter restaurants from non restaurant in osm_data amenity = osm_data['amenity'] restaurant = list( dict.fromkeys([i for i in amenity if i not in nonRestaurestaurant()])) #print(restaurant) osm_data = osm_data[osm_data['amenity'].isin(restaurant)] size = osm_data.lat.size #convert locations list to matrix for haversine_vector(), distance calculation location1_matrix = make_matrix(location1, size) location2_matrix = make_matrix(location2, size) #distance between location 1 and osm_data locations osm_data['dist1'] = haversine_vector( osm_data[['lat', 'lon']].values.tolist(), [location1 for i in range(size)], Unit.KILOMETERS) #distance between location 2 and osm_data locations osm_data['dist2'] = haversine_vector( osm_data[['lat', 'lon']].values.tolist(), [location2 for i in range(size)], Unit.KILOMETERS) # number of chain restaurants with chosen distance of location 1 dist1_and_chain = osm_data[(osm_data.dist1 < dist) & (osm_data.is_chain_restaurant == 1)].shape[0] # number of chain restaurants with chosen distance of location 2 dist2_and_chain = osm_data[(osm_data.dist2 < dist) & (osm_data.is_chain_restaurant == 1)].shape[0] # number of non chain restaurants with chosen distance of location 1 dist1_and_nonchain = osm_data[(osm_data.dist1 < dist) & (osm_data.is_chain_restaurant == 0)].shape[0] # number of non chain restaurants with chosen distance of location 2 dist2_and_nonchain = osm_data[(osm_data.dist2 < dist) & (osm_data.is_chain_restaurant == 0)].shape[0] # perform chi-squared with filtered distances to compare # chain restaurant densities in locations 1 and 2 - Derek restaurant_contingency = np.array([[dist1_and_nonchain, dist1_and_chain], [dist2_and_nonchain, dist2_and_chain]]) print('Restaurant and Chain Restaurant Counts') print('Location 1:', restaurant_contingency[0, :]) print('Location 2:', restaurant_contingency[1, :]) p_value = chi2_contingency(restaurant_contingency)[1] print(f'Chi-squared p-value: {p_value}') # for map visualization # put a marker on location 1 and 2 on map m3 = folium.Map(location=location2, zoom_start=100) folium.Marker(location1, popup='<b>Location 1</b>').add_to(m3) folium.Marker(location2, popup='<b>Location 2</b>').add_to(m3) # select only restauarnts with distance of your chosen distance within_distance = osm_data[(osm_data.dist2 < dist) | (osm_data.dist1 < dist)] chain_restaurant = within_distance[within_distance.is_chain_restaurant == 1][["lat", "lon"]].values non_chain_restaurant = within_distance[within_distance.is_chain_restaurant == 0][["lat", "lon"]].values # blue for restaurants within chosen distance of location 1 for i in range(len(chain_restaurant)): folium.CircleMarker(chain_restaurant[i], radius=8, color='blue', fill=True).add_to(m3) # red for restaurants within chosen distance of location 2 for i in range(len(non_chain_restaurant)): folium.CircleMarker(non_chain_restaurant[i], radius=2, color='red', s=25, fill=True).add_to(m3) m3.save('map.html') # For heat map visualization - with similar procedure m = folium.Map(location=location2, zoom_start=100) folium.Marker(location1, popup='<b>SFU Burnaby</b>').add_to(m) folium.Marker(location2, popup='<b>SFU Vancouver</b>').add_to(m) latlons = osm_data[["lat", "lon"]].values HeatMap(latlons).add_to(m) m.save('heat_map.html')
def make_relative_meters(batch): end = [] rel_x = [] rel_y = [] # find missing values: # make sure that last value is not -1 batch = np.array(batch) for i in range(batch.shape[1]): if batch[-1, i, 1] == -1: for j in reversed(range(batch.shape[0])): if batch[j, i, 1] != -1: j1 = j + 1 if j1 == 0: batch = np.delete(batch, i, 1) else: for j in range(j1, batch.shape[0]): if batch[j - 2, i, 1] != -1: batch[j, i, 1:3] = batch[j - 1, i, 1:3] + ( batch[j - 1, i, 1:3] - batch[j - 2, i, 1:3]) else: batch[j, i, 1:3] = batch[j - 1, i, 1:3] break # for j in reversed(range(batch.shape[0])): # if batch[j, i, 1] != -1: # batch[j:, i, :] = batch[j, i, :] # break # find remainding values mask = np.where(batch[:, :, 1] == -1) # set to next value for i, j in zip(reversed(mask[0]), reversed(mask[1])): if i + 2 < batch.shape[0]: batch[i, j, 1:3] = batch[i + 1, j, 1:3] + (batch[i + 1, j, 1:3] - batch[i + 2, j, 1:3]) else: batch[i, j, 1:3] = batch[i + 1, j, 1:3] # batch[i, j, :] = batch[i + 1, j, :] # get last entry for each element in batch for l in batch[-1]: end.append((l[1], l[2])) for step in batch: step_x = [] step_y = [] rev = np.ones((2, batch.shape[1])) for idx, l in enumerate(step): step_x.append((l[1], end[idx][1])) step_y.append((end[idx][0], l[2])) if l[1] < end[idx][0]: rev[0, idx] *= -1 if l[2] < end[idx][1]: rev[1, idx] *= -1 rel_x.append(haversine_vector(step_x, end) * rev[0, :]) rel_y.append(haversine_vector(step_y, end) * rev[1, :]) return np.array(rel_x), np.array(rel_y)
def make_relative_meters_batch(batch, n_max=5): end = [] rel_x_batch = [] rel_y_batch = [] batch = np.array(batch) out_x = np.zeros((batch.shape[1], batch.shape[0], n_max)) out_y = np.zeros((batch.shape[1], batch.shape[0], n_max)) # make sure that last value is not -1 for i in range(batch.shape[1]): if batch[-1, i, 1] == -1: for j in reversed(range(batch.shape[0])): if batch[j, i, 1] != -1: j1 = j + 1 if j1 == 0: batch = np.delete(batch, i, 1) else: for j in range(j1, batch.shape[0]): if batch[j - 2, i, 1] != -1: batch[j, i, 1:3] = batch[j - 1, i, 1:3] + ( batch[j - 1, i, 1:3] - batch[j - 2, i, 1:3]) else: batch[j, i, 1:3] = batch[j - 1, i, 1:3] break # for j in reversed(range(batch.shape[0])): # if batch[j, i, 1] != -1: # batch[j:, i, :] = batch[j, i, :] # break # find remainding values mask = np.where(batch[:, :, 1] == -1) # set to next value for i, j in zip(reversed(mask[0]), reversed(mask[1])): if i + 2 < batch.shape[0]: batch[i, j, 1:3] = batch[i + 1, j, 1:3] + (batch[i + 1, j, 1:3] - batch[i + 2, j, 1:3]) else: batch[i, j, 1:3] = batch[i + 1, j, 1:3] # batch[i, j, :] = batch[i + 1, j, :] # get last entry for each element in batch for l in batch[-1]: end.append((l[1], l[2])) for i in range(batch.shape[1]): rel_x = [] rel_y = [] for step in batch: step_x = [] step_y = [] rev = np.ones((2, batch.shape[1])) for idx, l in enumerate(step): step_x.append((l[1], step[i][2])) step_y.append((step[i][1], l[2])) if l[1] < step[i][1]: rev[0, idx] *= -1 if l[2] < step[i][2]: rev[1, idx] *= -1 rel_x.append( haversine_vector(step_x, [tuple(step[i][1:3])] * len(step_x)) * rev[0, :]) rel_y.append( haversine_vector(step_y, [tuple(step[i][1:3])] * len(step_x)) * rev[1, :]) rel_x_batch.append(rel_x) rel_y_batch.append(rel_y) rel_x_batch = np.array(rel_x_batch) rel_y_batch = np.array(rel_y_batch) dist = np.sqrt(rel_x_batch**2 + rel_y_batch**2) sort_idx = np.argsort(dist[:, -1]) sort_idx = np.delete(sort_idx, 0, 1) # remove 0 distance for i in range(sort_idx.shape[0]): if sort_idx.shape[1] > n_max: idx_0 = rel_x_batch[i, :, sort_idx[i, :n_max]].T != 0 out_x[i][idx_0] = rel_x_batch[i, :, sort_idx[i, :n_max]].T[idx_0] idx_0 = rel_y_batch[i, :, sort_idx[i, :n_max]].T != 0 out_y[i][idx_0] = rel_y_batch[i, :, sort_idx[i, :n_max]].T[idx_0] else: idx_0 = rel_x_batch[i, :, sort_idx[i]].T != 0 out_x[i, :, :sort_idx.shape[1]][idx_0] = rel_x_batch[ i, :, sort_idx[i]].T[idx_0] idx_0 = rel_y_batch[i, :, sort_idx[i]].T != 0 out_y[i, :, :sort_idx.shape[1]][idx_0] = rel_y_batch[ i, :, sort_idx[i]].T[idx_0] return out_x, out_y
def main_formula(): lyon = (45.7597, 4.8422) # (lat, lon) paris = (48.8567, 2.3508) error = (float('nan'), float('nan')) pprint(haversine_vector([lyon, lyon], [paris, error], Unit.KILOMETERS))