def euclidean_distances(hospitals_gdf: gpd.GeoDataFrame, ed_inst_gdf: gpd.GeoDataFrame) -> np.ndarray: """Calculates pairwise Euclidean distances.""" distances = np.zeros((len(hospitals_gdf), len(ed_inst_gdf))) for hosp_idx, hosp_row in enumerate(hospitals_gdf.itertuples()): for ed_idx, ed_row in enumerate(ed_inst_gdf.itertuples()): dist = hosp_row.geometry.distance(ed_row.geometry) distances[hosp_idx, ed_idx] = dist return distances
def travel_time_distances(travel_time_df: pd.DataFrame, hospitals_gdf: gpd.GeoDataFrame, ed_inst_gdf: gpd.GeoDataFrame, epsilon: float = 1e-4, default_time: float = 10000) -> np.ndarray: """Loads precomputed pairwise travel time distances. :param travel_time_df: The precomputed table of pairwise travel times (in Olivia Walch's format). :param hospitals_gdf: Hospitals to calculate travel times for. :param ed_inst_gdf: Educational institutions to calculate travel times for. :param epsilon: The tolerance used for matching longitudes and latitudes. :param default_time: The default time to use when a (hospital, educational institution) pair is missing from the travel time data. :return: A pairwise distance matrix (rows are hospitals, columns are educational institutions). """ times = default_time * np.ones((len(hospitals_gdf), len(ed_inst_gdf))) travel_time_index = {} for row in travel_time_df.itertuples(): source = getattr(row, 'Source') dest = getattr(row, 'Destination') travel_time = getattr(row, 'Time') / 60 travel_time_index[f'{source} -> {dest}'] = travel_time for hosp_idx, hosp_row in enumerate(hospitals_gdf.itertuples()): for ed_idx, ed_row in enumerate(ed_inst_gdf.itertuples()): # Try to match based on name, and then disambiguate based on long/lat. ed_name = getattr(ed_row, 'NAME').replace(',', '') hosp_name = getattr(hosp_row, 'NAME').replace(',', '') if f'{ed_name} -> {hosp_name}' in travel_time_index: travel_time = travel_time_index[f'{ed_name} -> {hosp_name}'] else: ed_lat = getattr(ed_row, 'LATITUDE') ed_long = getattr(ed_row, 'LONGITUDE') hosp_lat = getattr(hosp_row, 'LATITUDE') hosp_long = getattr(hosp_row, 'LONGITUDE') df = travel_time_df travel_row = df[ ((df['SourceLat'] - ed_lat).abs() <= epsilon) & ((df['SourceLong'] - ed_long).abs() <= epsilon) & ((df['DestLat'] - hosp_lat).abs() <= epsilon) & ((df['DestLong'] - hosp_long).abs() <= epsilon)] if not travel_row.empty: if len(travel_row) > 1: print('Warning:', ed_name, '->', hosp_name, 'ambiguous') travel_time = travel_row['Time'].iloc[0] / 60 else: print('Warning: could not find travel time for', f'{ed_name} -> {hosp_name}') continue times[hosp_idx, ed_idx] = travel_time return times
def overlap(target: GeoDataFrame, tiles: GeoDataFrame, verbose: bool): """ Find all unique tiles that intersects given region, based on max coverage area Parameters ---------- target: GeoDataFrame Input Polygon tiles: GeoDataFrame Tiles (Sentinel2) verbose: bool verbose mode, if True prints messages Returns ------- GeoDataFrame Tiles for given Polygon """ pprint(f"Start finding overlapping tiles", verbose) tiles, epsg = _get_intersect_tiles(target, tiles) result_tiles = list() for row in tiles.itertuples(): start_area = target.geometry[0].area target.geometry[0] = target.geometry[0].difference(row.geometry) if start_area != target.geometry[0].area: result_tiles.append(dict(Name=row.Name, geometry=row.geometry)) result = gp.GeoDataFrame(result_tiles, crs={'init': epsg}) result = result.to_crs({'init': 'epsg:4326'}) pprint(f"End finding overlapping tiles", verbose) return result
def find_adjacent_lane_boundary(lane_boundaries: gpd.GeoDataFrame, lane, left_right): lane_coords = [Point(c) for c in lane.geometry.coords] lane_start_azimuth = geo_util.calc_azimuth(lane_coords[0], lane_coords[1]) lane_end_azimuth = geo_util.calc_azimuth(lane_coords[-2], lane_coords[-1]) # Calculate stats stats = [] for lane_boundary in lane_boundaries.itertuples(): coords = [Point(c) for c in lane_boundary.geometry.coords] start_lateral_offset = geo_util.calc_lateral_offset( lane_coords[0], coords[0], lane_start_azimuth) end_lateral_offset = geo_util.calc_lateral_offset( lane_coords[-1], coords[-1], lane_end_azimuth) if left_right == "right": start_lateral_offset = -start_lateral_offset end_lateral_offset = -end_lateral_offset start_azimuth = geo_util.calc_azimuth(coords[0], coords[1]) end_azimuth = geo_util.calc_azimuth(coords[-2], coords[-1]) start_azimuth_diff = abs( geo_util.normalize_radian(start_azimuth - lane_start_azimuth)) end_azimuth_diff = abs( geo_util.normalize_radian(end_azimuth - lane_end_azimuth)) stats.append({ "lane_boundary": lane_boundary, "start_lateral_offset": start_lateral_offset, "end_lateral_offset": end_lateral_offset, "start_azimuth_diff": start_azimuth_diff, "end_azimuth_diff": end_azimuth_diff, }) # Filter by conditions th_min_dist = -0.1 th_max_dist = 3 th_max_azimuth = np.deg2rad(30) candidate_stats = list( filter( lambda stat: ((th_min_dist <= stat["start_lateral_offset"] <= th_max_dist) and (th_min_dist <= stat["end_lateral_offset"] <= th_max_dist) and (stat["start_azimuth_diff"] <= th_max_azimuth) and (stat["end_azimuth_diff"] <= th_max_azimuth)), stats, )) if not candidate_stats: return None # Sort by score sorted_stat = sorted( candidate_stats, key=lambda stat: stat["start_azimuth_diff"] + stat["end_azimuth_diff"]) return sorted_stat[0]["lane_boundary"]
def save_geopackage(path: str, data: geopandas.GeoDataFrame) -> None: """ Save a `GeoDataFrame` to the disk at the specified path. If the path already exists, the existing destination will be overwritten. :param path: Path used for the output file :type path: str :param data: Output `GeoDataFrame` instance :type data: GeoDataFrame """ for row in data.itertuples(): data.to_file(path, driver="GPKG", layer=row.name)
def buffer_geometry(gdf: gpd.GeoDataFrame, width_table: Dict[str, float] = raster_table, default_width: float = 1.5) -> List[Polygon]: roads: List[Polygon] = [] for row in gdf.itertuples(): if hasattr(row, 'highway'): road_class = row.highway else: road_class = row.fclass if type(road_class) == list: road_class = road_class[0] width = width_table.get(road_class, default_width) * WIDTH_FACTOR roads += [row.geometry.buffer(width)] return roads
def upload_gdf( self, gdf: gpd.GeoDataFrame, class_name: str, upload_altitude: bool = True): """Upload GeoDataFrame to Parse Args: gdf: GeoDataFrame with data to upload class_name: name of class to upload data to in Parse upload_altitude: whether to upload altitude as an attribute for Point Z geometries. Uploaded as "alt". """ headers = self.headers.copy() headers['Content-Type'] = 'application/json' json_data = [] geom_name = gdf.geometry.name columns = [x for x in gdf.columns if x != geom_name] for row in gdf.itertuples(): d = {} # Make sure that type of geometry is point geom = getattr(row, geom_name) assert isinstance(geom, Point), 'Geometry not of type Point' # is it a 2D or 3D point? coords = list(geom.coords)[0] lon = coords[0] lat = coords[1] alt = coords[2] if len(coords) == 3 else None d[geom_name] = self.encode_geopoint(lon=lon, lat=lat) if (alt is not None) and upload_altitude: d['alt'] = alt for column in columns: d[column] = getattr(row, column) json_data.append(d) for group in chunker(json_data, 50): self.upload_batch(data=group, class_name=class_name)
def calculate_area(gdf: gpd.GeoDataFrame): areas = [] for row in gdf.itertuples(): centroid = row.geometry.centroid utm_tuple = utm.from_latlon(centroid.y, centroid.x) if centroid.y > 0: south = False else: south = True crs = CRS.from_dict({ "proj": "utm", "zone": utm_tuple[2], south: south }) crs_code = f"EPSG:{crs.to_authority()[1]}" row_as_df = pd.DataFrame.from_records([row], columns=row._fields) row_as_gdf = gpd.GeoDataFrame(row_as_df, geometry=row_as_df.geometry, crs="EPSG:4326") row_as_utm = row_as_gdf.to_crs(crs_code) areas.append(row_as_utm.area.sum()) area = sum(areas) return area
def attractivity_matrix(): pois = pd.read_csv("../datasets/" + longlatfile) pois_geom = [Point(xy) for xy in zip(pois.iloc[:, 0], pois.iloc[:, 1])] pois_gdf = GeoDataFrame(None, crs=crs, geometry=pois_geom) bus_gdf = create_act_gdf(BUS) nightlife_gdf = create_act_gdf(NIGHTLIFE) supermarkt_gdf = create_act_gdf(SUPERMARKT) university_gdf = create_act_gdf(UNIVERSITY) data = pd.DataFrame(None, index=range(len(pois)), columns=[ "Lat", "Long", "public_transport", "nightlife", "shops", "near_university" ]) #data = pd.DataFrame(None, index=range(len(pois)), columns=["avg_cost", "district"]) for (i, poi) in pois_gdf.itertuples(): #bwr, ortst = polygon_check(poi.x, poi.y) data.iloc[i] = [ poi.y, poi.x, len(bus_gdf[bus_gdf["geometry"].apply(lambda x: haversine(x, poi)) < feature_range[BUS]]), len(nightlife_gdf[nightlife_gdf["geometry"].apply( lambda x: haversine(x, poi)) < feature_range[NIGHTLIFE]]), len(supermarkt_gdf[supermarkt_gdf["geometry"].apply( lambda x: haversine(x, poi)) < feature_range[SUPERMARKT]]), len(university_gdf[university_gdf["geometry"].apply( lambda x: haversine(x, poi)) < feature_range[UNIVERSITY]]), ] print(i) with open("../datasets/" + "final.csv", "w") as f: data.to_csv(f, index=False)
def intersect_trail_with_polygons(trail: LineString, gdf: gpd.GeoDataFrame, key_col: str): """Intersect trail with polygons to produce overlapping line segments Both trail and gdf must be projected to a projected coordinate system before being passed to this function. This is used, e.g. to find the portions of the trail that are within national parks or national forests. Args: - trail: projected LineString of trail - gdf: projected GDF of polygons to find intersections of. It shouldn't matter if an area shows up once as a MultiPolygon or multiple times (with the same `key_col` value) as individual Polygons. - key_col: column of GDF to use as keys of dict Returns: - {key_col: {'geometry': MutliLineString, 'length': float}} where `lines` is a list of lines where the trail intersects with the given polygon, and `length` is the sum of distances in the polygon. """ intersections = {} # Iterate over GeoDataFrame for row in gdf.itertuples(): # Set geometry variable so that it can be updated if it needs to be made # valid. You can't update a namedtuple geometry = row.geometry # Check if geometry is valid if not geometry.is_valid: geometry = geometry.buffer(0) # Compute intersection int_line = trail.intersection(geometry) # Get key_col in dataset key = getattr(row, key_col) # Instantiate dict with key intersections[key] = intersections.get(key, {}) if int_line.type == 'LineString': intersections[key]['geometry'] = MultiLineString([int_line]) elif int_line.type == 'MultiLineString': intersections[key]['geometry'] = int_line elif int_line.type == 'GeometryCollection': msg = 'If GeometryCollection should not have intersection' assert len(int_line) == 0, msg intersections[key]['geometry'] = None else: msg = 'intersection of Polygon, LineString should be LineString' raise ValueError(msg) # Add length in projected coordinates to dictionary for key, d in intersections.items(): if d['geometry'] is None: intersections[key]['length'] = None continue intersections[key]['length'] = d['geometry'].length return intersections
def split_tiles(input_tiles: gpd.GeoDataFrame, nb_tiles_wanted: int) -> gpd.GeoDataFrame: nb_tiles = len(input_tiles) if nb_tiles >= nb_tiles_wanted: return input_tiles nb_tiles_ratio_target = nb_tiles_wanted / nb_tiles # Loop over all tiles in the grid result_tiles = [] for tile in input_tiles.itertuples(): # For this tile, as long as the curr_nb_tiles_ratio_todo is not 1, keep splitting curr_nb_tiles_ratio_todo = nb_tiles_ratio_target curr_tiles_being_split = [tile.geometry] while curr_nb_tiles_ratio_todo > 1: # Check in how many parts the tiles are split in this iteration divisor = 0 if round(curr_nb_tiles_ratio_todo) == 3: divisor = 3 else: divisor = 2 curr_nb_tiles_ratio_todo /= divisor # Split all current tiles tmp_tiles_after_split = [] for tile_to_split in curr_tiles_being_split: xmin, ymin, xmax, ymax = tile_to_split.bounds width = abs(xmax - xmin) height = abs(ymax - ymin) # Split in 2 or 3... if divisor == 3: if width > height: split_line = sh_geom.LineString([ (xmin + width / 3, ymin - 1), (xmin + width / 3, ymax + 1), (xmin + 2 * width / 3, ymax + 1), (xmin + 2 * width / 3, ymin - 1) ]) else: split_line = sh_geom.LineString([ (xmin - 1, ymin + height / 3), (xmax + 1, ymin + height / 3), (xmax + 1, ymin + 2 * height / 3), (xmin - 1, ymin + 2 * height / 3) ]) else: if width > height: split_line = sh_geom.LineString([ (xmin + width / 2, ymin - 1), (xmin + width / 2, ymax + 1) ]) else: split_line = sh_geom.LineString([ (xmin - 1, ymin + height / 2), (xmax + 1, ymin + height / 2) ]) tmp_tiles_after_split.extend( sh_ops.split(tile_to_split, split_line).geoms) curr_tiles_being_split = tmp_tiles_after_split result_tiles.extend(curr_tiles_being_split) # We should be ready... return gpd.GeoDataFrame(geometry=result_tiles, crs=input_tiles.crs)
gpd_bj_stations['U1'] = np.array( np.cos(mk_bj_results.Gradient * (np.pi) / 10000)) gpd_bj_stations['V1'] = np.array( np.sin(mk_bj_results.Gradient * (np.pi) / 10000)) gpd_bj_stations['p1'] = np.array(mk_bj_results.Significance) fig, ax = plt.subplots() ax.set_aspect('equal') bjsp.plot(ax=ax, color='white', edgecolor='k') df_clean.plot(ax=ax, marker='o', color='blue', markersize=25) df_regionalbg.plot(ax=ax, marker='o', color='green', markersize=25) df_suburban.plot(ax=ax, marker='o', color='orange', markersize=25) df_traffic.plot(ax=ax, marker='o', color='red', markersize=25) df_urban.plot(ax=ax, marker='o', color='purple', markersize=25) plt.legend(types) #ax.quiver(gpd_bj_stations['X'], gpd_bj_stations['Y'], gpd_bj_stations['U1'], gpd_bj_stations['V1'], width = 0.005) for row in gpd_bj_stations.itertuples(): if row.p1 <= 0.05 and row.V1 > 0: ax.quiver(row.X, row.Y, row.U1, row.V1, color='maroon', width=0.005) if 0.05 < row.p1 <= 0.10 and row.V1 > 0: ax.quiver(row.X, row.Y, row.U1, row.V1, color='orangered', width=0.005) if 0.10 < row.p1 <= 0.34 and row.V1 > 0: ax.quiver(row.X, row.Y, row.U1, row.V1, color='darkgoldenrod', width=0.005) if row.p1 <= 0.05 and row.V1 < 0: ax.quiver(row.X, row.Y, row.U1,
def _load_db_from_df(self, stations_gdf: gpd.GeoDataFrame) -> str: """ Load the DB based on a data frame. :return: None """ # erase everything first # self.logger.info("Deleting old entries ...") # for station in ObservationStation.objects.all(): # station.delete() # self.logger.info("Deletions Completed.") # make a lookup for country_code country_lookup = EUCountries.get_country_code_lookup() stations_gdf = EEAStationDataSource._add_nuts_regions_to_df(stations_gdf) self.logger.info(f"Loading {len(stations_gdf)} stations to local data base ...") count = 0 r_list = [] for row in tqdm(stations_gdf.itertuples(), desc='loading stations'): country_code = country_lookup.get(row.Countrycode) if country_code is None: self.logger.debug(f"{row.Countrycode} skipped. Not in NUTS regions.") continue try: r_list.append(ObservationStation.objects.create( air_quality_station=row.AirQualityStation, country_code=country_code, air_quality_network=row.AirQualityNetwork, air_quality_station_eoicode=row.AirQualityStationEoICode, air_quality_station_natcode=row.AirQualityStationNatCode, projection=row.Projection, longitude=row.Longitude, latitude=row.Latitude, altitude=row.Altitude, # nuts_0=row.NUTS_0, nuts_1=row.NUTS_1, nuts_2=row.NUTS_2, nuts_3=row.NUTS_3, air_quality_station_area=row.AirQualityStationArea)) # record.save() count += 1 if count % 10000 == 0: ObservationStation.objects.bulk_create(r_list, ignore_conflicts=True) r_list.clear() except Exception as e: # take no action. duplicates are expected as new files are loaded. # print(e) # print(row) continue if len(r_list) > 0: ObservationStation.objects.bulk_create(r_list, ignore_conflicts=True) r_list.clear() self.logger.info(f"Done downloading station meta data. Loaded {count} stations.") return f"Loaded {count} stations."