def test_warning_crs_mismatch(point_gdf, single_rectangle_gdf): with pytest.warns(UserWarning, match="CRS mismatch between the CRS"): clip(point_gdf, single_rectangle_gdf.to_crs(3857))
def test_not_gdf(single_rectangle_gdf): """Non-GeoDataFrame inputs raise attribute errors.""" with pytest.raises(TypeError): clip((2, 3), single_rectangle_gdf) with pytest.raises(TypeError): clip(single_rectangle_gdf, (2, 3))
def test_returns_series(point_gdf, single_rectangle_gdf): """Test that function returns a GeoSeries if GeoSeries is passed.""" out = clip(point_gdf.geometry, single_rectangle_gdf) assert isinstance(out, GeoSeries)
loc='lower left') ax2 = fig.add_axes([.67, .22, .25, .2]) ax2.get_xaxis().set_ticks([]) ax2.get_yaxis().set_ticks([]) northeast_states = us_map[us_map['NAME'].isin([ 'Vermont', 'New Hampshire', 'Massachusetts', 'Connecticut', 'Rhode Island', 'New York', 'Pennsylvania', 'Maine', 'Virginia' ])] northeast_states['dissolve'] = 1 ne_bound = northeast_states.dissolve('dissolve') gpd.clip(us_map, box(*ne_bound.geometry.bounds.iloc[0].tolist())).plot(ax=ax2, edgecolor='k') us_map[us_map['NAME'] == 'Vermont'].plot(ax=ax2, color='g') gpd.GeoDataFrame( geometry=[bounds.to_crs(us_map.crs).boundary.iloc[0][0]]).plot(color='r', ax=ax2) ax2.margins(0, 0) ax2.set_title('Location in NE US') #ax.set_title('Study Area', fontsize = 'xx-large') plt.savefig(os.path.join('results', 'misc_charts', 'study_area.png'), bbox_inches='tight') #%% calculate proportion of
def reservoirs(wtshd): data = [] # to store all variables we need and append it to pandas dataframe file at the end watershed = gpd.read_file(wtshd) geom_buffer = watershed.buffer(0) # it was 400 before, Farshid changed it to 0 watershed1 = gpd.read_file(wtshd) watershed1['geometry'] = geom_buffer watershed_prj = watershed1.to_crs('epsg:4269') watershed_prj_path = os.path.join(path_dict['tempFolder_path'], 'watershed_prj.shp') watershed_prj.to_file(watershed_prj_path) watershed_prj_noBuffer = watershed.to_crs('epsg:4269') # to get basin's ID: # the column_name is different in shapefile directories: if "GAGE_ID" in watershed.columns: # this is for Conus shapefiles, data.append(watershed_prj_noBuffer['GAGE_ID'][0]) elif "HUC10" in watershed.columns: # this is for running SRB-HUC10 shapefiles data.append(watershed_prj_noBuffer['HUC10'][0]) # to get basin's area: # The column_name is different in different shapefiles if "AREA" in watershed.columns: data.append(watershed_prj_noBuffer['AREA'][0]) elif "AreaSqKm" in watershed.columns: data.append(watershed_prj_noBuffer['AreaSqKm'][0] * 1e6) # converting sqkm to sqm if (needDEM == True) & (data[1] < 24e9): # 25e9 is the maximum area that we can musaic, # larger than this, we get memory issue. needs to get fixed in the future flowAccu_temp_path, AccuProcess = create_FlowAccu_tif_file(watershed_prj_path, path_dict) ### if AccuProcess = True --> it means there is at least a tif file that has overlap with the watershed ### if AccuProcess = False --> it means there is not any tif file that has overlap with the watershed else: AccuProcess = False if AccuProcess == True: watershed_outlet = find_watershed_outlet(watershed_prj, flowAccu_temp_path, gages_prj) else: watershed_outlet = ["_", "_", "_", "_"] data.extend(watershed_outlet) ## finding all major dams inside a watershed selected_maj_dams = gpd.clip(dams_shp_prj, watershed_prj_noBuffer) # it was dams_shp_prj # selected_maj_dams = selected_dams.loc[(selected_dams['DAM_HEIGHT'] >= 50) | # (selected_dams['NORMAL_STORAGE'] >= 5000)] MAJ_NDAMS = len(selected_maj_dams) # number of dams inside a watershed data.append(MAJ_NDAMS) if (MAJ_NDAMS) > 0: # at least one dam in the watershed # now calculate the Minimum and Mean Distance between the dams and watershed_outlet # and to get "General_Purpose" of watershed distance_outlet_dams = [] dam_ID = [] for i, point in enumerate(selected_maj_dams['geometry']): x = point.xy[0][0] y = point.xy[1][0] dam_ID.append(selected_maj_dams['NID_ID_Cod'].values[i]) #NID_ID_Cod # calculating distance between outlet and dams if (watershed_outlet[2] != "_") & (watershed_outlet[3] != "_"): dam_point = (x, y) outlet_point = (watershed_outlet[2], watershed_outlet[3]) distance = hs.haversine(dam_point, outlet_point, unit=hs.Unit.METERS) distance_outlet_dams.append(distance) else: distance_outlet_dams.append(-999) # it was np.nan before data.append(np.nanmin(distance_outlet_dams)) # distance of nearest dam from the outlet data.append(np.nanmean(distance_outlet_dams)) # average distance of dams from outlet # To calculate General Purpose of each watershed based on the major dams inside it dam = dams_info_gdf.loc[dams_info_gdf["NIDID"].isin(dam_ID)] if len(dam) > 0: # sometimes some major dams in shape file are not in the excel file general_purpose, max_norm_stor, std_norm_stor = general_purpose_watershed(dam, dams_shp_prj) else: # sometimes the dataset has some problem general_purpose = -1 max_norm_stor = 0 std_norm_stor = 0 else: # No dam in watershed data.extend([-999, -999]) # this is for dams distance and watershed outlet (means no dam in watershed) general_purpose = -1 max_norm_stor = 0 std_norm_stor = 0 data.append(general_purpose) data.append(max_norm_stor) data.append(std_norm_stor) ### finding NDAMS_2009 and normal storage for all dams: NDAMS, STOR_NOR_2009 = finding_NDAMS(watershed_prj_noBuffer, dams_info_gdf, path_dict, data) data.append(NDAMS) data.append(STOR_NOR_2009) print("Watershed: ", os.path.split(wtshd)[-1]) return data
def populate_sample_cell( sample_cell: Polygon, sample_cell_area: float, traces_sindex: PyGEOSSTRTreeIndex, traces: gpd.GeoDataFrame, nodes: gpd.GeoDataFrame, snap_threshold: float, resolve_branches_and_nodes: bool, ) -> Dict[str, float]: """ Take a single grid polygon and populate it with parameters. Mauldon determination requires that E-nodes are defined for every single sample circle. If correct Mauldon values are wanted `resolve_branches_and_nodes` must be passed as True. This will result in much longer analysis time. """ _centroid = sample_cell.centroid if not isinstance(_centroid, Point): raise TypeError("Expected Point centroid.") centroid = _centroid sample_circle = safe_buffer(centroid, np.sqrt(sample_cell_area) * 1.5) sample_circle_area = sample_circle.area assert sample_circle_area > 0 # Choose geometries that are either within the sample_circle or # intersect it # Use spatial indexing to filter to only spatially relevant traces, # traces and nodes trace_candidates_idx = spatial_index_intersection( traces_sindex, geom_bounds(sample_circle)) trace_candidates = traces.iloc[trace_candidates_idx] assert isinstance(trace_candidates, gpd.GeoDataFrame) if len(trace_candidates) == 0: return determine_topology_parameters( trace_length_array=np.array([]), node_counts=determine_node_type_counts(np.array([]), branches_defined=True), area=sample_circle_area, ) if resolve_branches_and_nodes: # Solve branches and nodes for each cell if wanted # Only way to make sure Mauldon parameters are correct _, nodes = branches_and_nodes( traces=trace_candidates, areas=gpd.GeoSeries([sample_circle], crs=traces.crs), snap_threshold=snap_threshold, ) # node_candidates_idx = list(nodes_sindex.intersection(sample_circle.bounds)) node_candidates_idx = spatial_index_intersection( spatial_index=pygeos_spatial_index(nodes), coordinates=geom_bounds(sample_circle), ) node_candidates = nodes.iloc[node_candidates_idx] # Crop traces to sample circle # First check if any geometries intersect # If not: sample_features is an empty GeoDataFrame if any( trace_candidate.intersects(sample_circle) for trace_candidate in trace_candidates.geometry.values): sample_traces = crop_to_target_areas( traces=trace_candidates, areas=gpd.GeoSeries([sample_circle]), is_filtered=True, keep_column_data=False, ) else: sample_traces = traces.iloc[0:0] if any(node.intersects(sample_circle) for node in nodes.geometry.values): # if any(nodes.intersects(sample_circle)): # TODO: Is node clipping stable? sample_nodes = gpd.clip(node_candidates, sample_circle) assert sample_nodes is not None assert all( isinstance(val, Point) for val in sample_nodes.geometry.values) else: sample_nodes = nodes.iloc[0:0] assert isinstance(sample_nodes, gpd.GeoDataFrame) assert isinstance(sample_traces, gpd.GeoDataFrame) sample_node_type_values = sample_nodes[CLASS_COLUMN].values assert isinstance(sample_node_type_values, np.ndarray) node_counts = determine_node_type_counts(sample_node_type_values, branches_defined=True) topology_parameters = determine_topology_parameters( trace_length_array=sample_traces.geometry.length.values, node_counts=node_counts, area=sample_circle_area, correct_mauldon=resolve_branches_and_nodes, ) return topology_parameters
def clip_by_shape(self, other_gdf): """Clip this GDF by another GDF""" self.gdf = gpd.clip(self.gdf, other_gdf)
def test_clip_lines(two_line_gdf, single_rectangle_gdf): """Test what happens when you give the clip_extent a line GDF.""" clip_line = clip(two_line_gdf, single_rectangle_gdf) assert len(clip_line.geometry) == 2
def test_clip_with_multipolygon(buffered_locations, single_rectangle_gdf): """Test clipping a polygon with a multipolygon.""" multi = buffered_locations.dissolve(by="type").reset_index() clipped = clip(single_rectangle_gdf, multi) assert clipped.geom_type[0] == "Polygon"
def test_clip_poly_series(buffered_locations, single_rectangle_gdf): """Test clipping a polygon GDF with a generic polygon geometry.""" clipped_poly = clip(buffered_locations.geometry, single_rectangle_gdf) assert len(clipped_poly) == 3 assert all(clipped_poly.geom_type == "Polygon")
def test_clip_multiline(multi_line, single_rectangle_gdf): """Test that clipping a multiline feature with a poly returns expected output.""" clipped = clip(multi_line, single_rectangle_gdf) assert clipped.geom_type[0] == "MultiLineString"
def xr_animation(ds, bands=None, output_path='animation.mp4', width_pixels=500, interval=100, percentile_stretch=(0.02, 0.98), image_proc_funcs=None, show_gdf=None, show_date='%d %b %Y', show_text=None, show_colorbar=True, gdf_kwargs={}, annotation_kwargs={}, imshow_kwargs={}, colorbar_kwargs={}, limit=None): """ Takes an `xarray` timeseries and animates the data as either a three-band (e.g. true or false colour) or single-band animation, allowing changes in the landscape to be compared across time. Animations can be customised to include text and date annotations or use specific combinations of input bands. Vector data can be overlaid and animated on top of imagery, and custom image processing functions can be applied to each frame. Supports .mp4 (ideal for Twitter/social media) and .gif (ideal for all purposes, but can have large file sizes) format files. Last modified: October 2020 Parameters ---------- ds : xarray.Dataset An xarray dataset with multiple time steps (i.e. multiple observations along the `time` dimension). bands : list of strings An list of either one or three band names to be plotted, all of which must exist in `ds`. output_path : str, optional A string giving the output location and filename of the resulting animation. File extensions of '.mp4' and '.gif' are accepted. Defaults to 'animation.mp4'. width_pixels : int, optional An integer defining the output width in pixels for the resulting animation. The height of the animation is set automatically based on the dimensions/ratio of the input xarray dataset. Defaults to 500 pixels wide. interval : int, optional An integer defining the milliseconds between each animation frame used to control the speed of the output animation. Higher values result in a slower animation. Defaults to 100 milliseconds between each frame. percentile_stretch : tuple of floats, optional An optional tuple of two floats that can be used to clip one or three-band arrays by percentiles to produce a more vibrant, visually attractive image that is not affected by outliers/ extreme values. The default is `(0.02, 0.98)` which is equivalent to xarray's `robust=True`. This parameter is ignored completely if `vmin` and `vmax` are provided as kwargs to `imshow_kwargs`. image_proc_funcs : list of funcs, optional An optional list containing functions that will be applied to each animation frame (timestep) prior to animating. This can include image processing functions such as increasing contrast, unsharp masking, saturation etc. The function should take AND return a `numpy.ndarray` with shape [y, x, bands]. If your function has parameters, you can pass in custom values using a lambda function: `image_proc_funcs=[lambda x: custom_func(x, param1=10)]`. show_gdf: geopandas.GeoDataFrame, optional Vector data (e.g. ESRI shapefiles or GeoJSON) can be optionally plotted over the top of imagery by supplying a `geopandas.GeoDataFrame` object. To customise colours used to plot the vector features, create a new column in the GeoDataFrame called 'colors' specifying the colour used to plot each feature: e.g. `gdf['colors'] = 'red'`. To plot vector features at specific moments in time during the animation, create new 'start_time' and/or 'end_time' columns in the GeoDataFrame that define the time range used to plot each feature. Dates can be provided in any string format that can be converted using the `pandas.to_datetime()`. e.g. `gdf['end_time'] = ['2001', '2005-01', '2009-01-01']` show_date : string or bool, optional An optional string or bool that defines how (or if) to plot date annotations for each animation frame. Defaults to '%d %b %Y'; can be customised to any format understood by strftime (https://strftime.org/). Set to False to remove date annotations completely. show_text : str or list of strings, optional An optional string or list of strings with a length equal to the number of timesteps in `ds`. This can be used to display a static text annotation (using a string), or a dynamic title (using a list) that displays different text for each timestep. By default, no text annotation will be plotted. show_colorbar : bool, optional An optional boolean indicating whether to include a colourbar for single-band animations. Defaults to True. gdf_kwargs : dict, optional An optional dictionary of keyword arguments to customise the appearance of a `geopandas.GeoDataFrame` supplied to `show_gdf`. Keyword arguments are passed to `GeoSeries.plot` (see http://geopandas.org/reference.html#geopandas.GeoSeries.plot). For example: `gdf_kwargs = {'linewidth': 2}`. annotation_kwargs : dict, optional An optional dict of keyword arguments for controlling the appearance of text annotations. Keyword arguments are passed to `matplotlib`'s `plt.annotate` (see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.annotate.html for options). For example, `annotation_kwargs={'fontsize':20, 'color':'red', 'family':'serif'}. imshow_kwargs : dict, optional An optional dict of keyword arguments for controlling the appearance of arrays passed to `matplotlib`'s `plt.imshow` (see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.imshow.html for options). For example, a green colour scheme and custom stretch could be specified using: `onebandplot_kwargs={'cmap':'Greens`, 'vmin':0.2, 'vmax':0.9}`. (some parameters like 'cmap' will only have an effect for single-band animations, not three-band RGB animations). colorbar_kwargs : dict, optional An optional dict of keyword arguments used to control the appearance of the colourbar. Keyword arguments are passed to `matplotlib.pyplot.tick_params` (see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.tick_params.html for options). This can be used to customise the colourbar ticks, e.g. changing tick label colour depending on the background of the animation: `colorbar_kwargs={'colors': 'black'}`. limit: int, optional An optional integer specifying how many animation frames to render (e.g. `limit=50` will render the first 50 frames). This can be useful for quickly testing animations without rendering the entire time-series. """ def _start_end_times(gdf, ds): """ Converts 'start_time' and 'end_time' columns in a `geopandas.GeoDataFrame` to datetime objects to allow vector features to be plotted at specific moments in time during an animation, and sets default values based on the first and last time in `ds` if this information is missing from the dataset. """ # Make copy of gdf so we do not modify original data gdf = gdf.copy() # Get min and max times from input dataset minmax_times = pd.to_datetime(ds.time.isel(time=[0, -1]).values) # Update both `start_time` and `end_time` columns for time_col, time_val in zip(['start_time', 'end_time'], minmax_times): # Add time_col if it does not exist if time_col not in gdf: gdf[time_col] = np.nan # Convert values to datetimes and fill gaps with relevant time value gdf[time_col] = pd.to_datetime(gdf[time_col], errors='ignore') gdf[time_col] = gdf[time_col].fillna(time_val) return gdf def _add_colorbar(fig, ax, vmin, vmax, imshow_defaults, colorbar_defaults): """ Adds a new colorbar axis to the animation with custom minimum and maximum values and styling. """ # Create new axis object for colorbar cax = fig.add_axes([0.02, 0.02, 0.96, 0.03]) # Initialise color bar using plot min and max values img = ax.imshow(np.array([[vmin, vmax]]), **imshow_defaults) fig.colorbar(img, cax=cax, orientation='horizontal', ticks=np.linspace(vmin, vmax, 2)) # Fine-tune appearance of colorbar cax.xaxis.set_ticks_position('top') cax.tick_params(axis='x', **colorbar_defaults) cax.get_xticklabels()[0].set_horizontalalignment('left') cax.get_xticklabels()[-1].set_horizontalalignment('right') def _frame_annotation(times, show_date, show_text): """ Creates a custom annotation for the top-right of the animation by converting a `xarray.DataArray` of times into strings, and combining this with a custom text annotation. Handles cases where `show_date=False/None`, `show_text=False/None`, or where `show_text` is a list of strings. """ # Test if show_text is supplied as a list is_sequence = isinstance(show_text, (list, tuple, np.ndarray)) # Raise exception if it is shorter than number of dates if is_sequence and (len(show_text) == 1): show_text, is_sequence = show_text[0], False elif is_sequence and (len(show_text) < len(times)): raise ValueError(f'Annotations supplied via `show_text` must have ' f'either a length of 1, or a length >= the number ' f'of timesteps in `ds` (n={len(times)})') times_list = (times.dt.strftime(show_date).values if show_date else [None] * len(times)) text_list = show_text if is_sequence else [show_text] * len(times) annotation_list = ['\n'.join([str(i) for i in (a, b) if i]) for a, b in zip(times_list, text_list)] return annotation_list def _update_frames(i, ax, extent, annotation_text, gdf, gdf_defaults, annotation_defaults, imshow_defaults): """ Animation called by `matplotlib.animation.FuncAnimation` to animate each frame in the animation. Plots array and any text annotations, as well as a temporal subset of `gdf` data based on the times specified in 'start_time' and 'end_time' columns. """ # Clear previous frame to optimise render speed and plot imagery ax.clear() ax.imshow(array[i, ...].clip(0.0, 1.0), extent=extent, vmin=0.0, vmax=1.0, **imshow_defaults) # Add annotation text ax.annotate(annotation_text[i], **annotation_defaults) # Add geodataframe annotation if show_gdf is not None: # Obtain start and end times to filter geodataframe features time_i = ds.time.isel(time=i).values # Subset geodataframe using start and end dates gdf_subset = show_gdf.loc[(show_gdf.start_time <= time_i) & (show_gdf.end_time >= time_i)] if len(gdf_subset.index) > 0: # Set color to geodataframe field if supplied if ('color' in gdf_subset) and ('color' not in gdf_kwargs): gdf_defaults.update({'color': gdf_subset['color'].tolist()}) gdf_subset.plot(ax=ax, **gdf_defaults) # Remove axes to show imagery only ax.axis('off') # Update progress bar progress_bar.update(1) # Test if bands have been supplied, or convert to list to allow # iteration if a single band is provided as a string if bands is None: raise ValueError(f'Please use the `bands` parameter to supply ' f'a list of one or three bands that exist as ' f'variables in `ds`, e.g. {list(ds.data_vars)}') elif isinstance(bands, str): bands = [bands] # Test if bands exist in dataset missing_bands = [b for b in bands if b not in ds.data_vars] if missing_bands: raise ValueError(f'Band(s) {missing_bands} do not exist as ' f'variables in `ds` {list(ds.data_vars)}') # Test if time dimension exists in dataset if 'time' not in ds.dims: raise ValueError(f"`ds` does not contain a 'time' dimension " f"required for generating an animation") # Set default parameters outline = [PathEffects.withStroke(linewidth=2.5, foreground='black')] annotation_defaults = { 'xy': (1, 1), 'xycoords': 'axes fraction', 'xytext': (-5, -5), 'textcoords': 'offset points', 'horizontalalignment': 'right', 'verticalalignment': 'top', 'fontsize': 20, 'color': 'white', 'path_effects': outline } imshow_defaults = {'cmap': 'magma', 'interpolation': 'nearest'} colorbar_defaults = {'colors': 'white', 'labelsize': 12, 'length': 0} gdf_defaults = {'linewidth': 1.5} # Update defaults with kwargs annotation_defaults.update(annotation_kwargs) imshow_defaults.update(imshow_kwargs) colorbar_defaults.update(colorbar_kwargs) gdf_defaults.update(gdf_kwargs) # Get info on dataset dimensions height, width = ds.geobox.shape scale = width_pixels / width left, bottom, right, top = ds.geobox.extent.boundingbox # Prepare annotations annotation_list = _frame_annotation(ds.time, show_date, show_text) # Prepare geodataframe if show_gdf is not None: show_gdf = show_gdf.to_crs(ds.geobox.crs) show_gdf = gpd.clip(show_gdf, mask=box(left, bottom, right, top)) show_gdf = _start_end_times(show_gdf, ds) # Convert data to 4D numpy array of shape [time, y, x, bands] ds = ds[bands].to_array().transpose(..., 'variable')[0:limit, ...] array = ds.astype(np.float32).values # Optionally apply image processing along axis 0 (e.g. to each timestep) bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} ({remaining_s:.1f} ' \ 'seconds remaining at {rate_fmt}{postfix})' if image_proc_funcs: print('Applying custom image processing functions') for i, array_i in tqdm(enumerate(array), total=len(ds.time), leave=False, bar_format=bar_format, unit=' frames'): for func in image_proc_funcs: array_i = func(array_i) array[i, ...] = array_i # Clip to percentiles and rescale between 0.0 and 1.0 for plotting vmin, vmax = np.quantile(array[np.isfinite(array)], q=percentile_stretch) # Replace with vmin and vmax if present in `imshow_defaults` if 'vmin' in imshow_defaults: vmin = imshow_defaults.pop('vmin') if 'vmax' in imshow_defaults: vmax = imshow_defaults.pop('vmax') # Rescale between 0 and 1 array = rescale_intensity(array, in_range=(vmin, vmax), out_range=(0.0, 1.0)) array = np.squeeze(array) # remove final axis if only one band # Set up figure fig, ax = plt.subplots() fig.set_size_inches(width * scale / 72, height * scale / 72, forward=True) fig.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0) # Optionally add colorbar if show_colorbar & (len(bands) == 1): _add_colorbar(fig, ax, vmin, vmax, imshow_defaults, colorbar_defaults) # Animate print(f'Exporting animation to {output_path}') anim = FuncAnimation( fig=fig, func=_update_frames, fargs=( ax, # axis to plot into [left, right, bottom, top], # imshow extent annotation_list, # list of text annotations show_gdf, # geodataframe to plot over imagery gdf_defaults, # any kwargs used to plot gdf annotation_defaults, # kwargs for annotations imshow_defaults), # kwargs for imshow frames=len(ds.time), interval=interval, repeat=False) # Set up progress bar progress_bar = tqdm(total=len(ds.time), unit=' frames', bar_format=bar_format) # Export animation to file if Path(output_path).suffix == '.gif': anim.save(output_path, writer='pillow') else: anim.save(output_path, dpi=72) # Update progress bar to fix progress bar moving past end if progress_bar.n != len(ds.time): progress_bar.n = len(ds.time) progress_bar.last_print_n = len(ds.time)
Remember, our variogram defines the spatial autocorrelation of the data (i.e., how the locations in our region affect one another). Once we have a variogram model, we can use it to estimate the weights in our kriging model. I won't go into detail on how this is done, but there is a neat walkthrough in the [scikit-gstat docs here](https://scikit-gstat.readthedocs.io/en/latest/userguide/kriging.html). Anyway, I'll briefly use the [pykrige](https://github.com/GeoStat-Framework/PyKrige) library to do some kriging so you can get an idea of what it looks like: krig = OrdinaryKriging(x=gpm25["Easting"], y=gpm25["Northing"], z=gpm25["PM_25"], variogram_model="spherical") z, ss = krig.execute("grid", gridx, gridy) plt.imshow(z); Now let's convert our raster back to polygons so we can map it. I'm also going to load in a polygon of BC using `osmnx` to clip my data so it fits nicely on my map this time: polygons, values = pixel2poly(gridx, gridy, z, resolution) pm25_model = (gpd.GeoDataFrame({"PM_25_modelled": values}, geometry=polygons, crs="EPSG:3347") .to_crs("EPSG:4326") ) bc = ox.geocode_to_gdf("British Columbia, Canada") pm25_model = gpd.clip(pm25_model, bc) fig = px.choropleth_mapbox(pm25_model, geojson=pm25_model.geometry, locations=pm25_model.index, color="PM_25_modelled", color_continuous_scale="RdYlGn_r", center={"lat": 52.261, "lon": -123.246}, zoom=3.5, mapbox_style="carto-positron") fig.update_layout(margin=dict(l=0, r=0, t=30, b=10)) fig.update_traces(marker_line_width=0) I used an "ordinary kriging" interpolation above which is the simplest implementation of kriging. The are many other forms of kriging too that can account for underlying trends in the data ("universal kriging"), or even use a regression or classification model to make use of additional explanatory variables. `pykrige` [supports most variations](https://geostat-framework.readthedocs.io/projects/pykrige/en/stable/examples/index.html). In particular for the latter, `pykrige` can accept `sklearn` models which is useful! ### 2.3. Areal interpolation Areal interpolation is concerned with mapping data from one polygonal representation to another. Imagine I want to map the air pollution polygons I just made to FSA polygons (recall FSA is "forward sortation area", which are groups of postcodes). The most intuitive way to do this is to distribute values based on area proportions, hence "areal interpolation". I'll use the [tobler](https://github.com/pysal/tobler) library for this. First, load in the FSA polygons:
## make a RECTANGLE to clip the viasat_data from shapely.geometry import Polygon lat_point_list = [37.625, 37.625, 37.426, 37.426, 37.625] lon_point_list = [14.86, 15.25, 15.25, 14.86, 14.86] polygon_geom = Polygon(zip(lon_point_list, lat_point_list)) crs = {'init': 'epsg:4326'} polygon = gpd.GeoDataFrame(index=[0], crs=crs, geometry=[polygon_geom]) ## check projection (CRS) # polygon.crs # polygon.plot() ## clip the data with the RECTANGLE LPIR_RESILIENT = gpd.clip(LPIR_RESILIENT, polygon) ### plot gedataframe with colors -----### ## add background map ### gdf = LPIR_RESILIENT import contextily as ctx del LPIR_RESILIENT # minx, miny, maxx, maxy = gdf.geometry.total_bounds # polygon.geometry.total_bounds ## reproject with mercator coordinates (this is the coordinate system of the basemap) gdf = gdf.to_crs(epsg=3857) # Plot the data within the RECTANGULAR extensions fig, ax = plt.subplots(figsize=(10, 10))
category='cultural', name='admin_1_states_provinces_lines', scale='50m', facecolor='none') ax1.add_feature(cfeature.LAND) ax1.add_feature(cfeature.COASTLINE) ax1.add_feature(states_provinces, edgecolor='black') mun = geobr.read_municipality(code_muni='all', year=2018) mun.plot(facecolor="none", alpha=1, edgecolor='gray', ax=ax1) polygon = Polygon([(lon1, lat1), (lon1, lat2), (lon2, lat2), (lon2, lat1), (lon1, lat1)]) poly_gdf = gpd.GeoDataFrame([1], geometry=[polygon], crs=mun.crs) poly_gdf.boundary.plot(ax=ax1, color="red") munDomain = gpd.clip(mun, polygon) ## This is very important munDomain = munDomain.sort_values(by='abbrev_state') ax2 = plt.subplot(1, 2, 2, projection=ccrs.PlateCarree()) munDomain.plot(ax=ax2, color="purple", alpha=0.5) munDomain.boundary.plot(ax=ax2) poly_gdf.boundary.plot(ax=ax2, color="red") ax2.set_title("Clipped", fontsize=20) plt.savefig('04_output/emissions/fig/clip.png', bbox_inches='tight', facecolor='w') #%% Temporal distribution from Andrade et al. (2015) co = [ 0.019, 0.012, 0.008, 0.004, 0.003, 0.003, 0.006, 0.017, 0.047, 0.074, 0.072, 0.064, 0.055, 0.052, 0.051, 0.048, 0.052, 0.057, 0.068, 0.087,
def test_mixed_geom(mixed_gdf, single_rectangle_gdf): """Test clipping a mixed GeoDataFrame""" clipped = clip(mixed_gdf, single_rectangle_gdf) assert (clipped.geom_type[0] == "Point" and clipped.geom_type[1] == "Polygon" and clipped.geom_type[2] == "LineString")
## make a RECTANGLE to clip the viasat_data from shapely.geometry import Polygon lat_point_list = [37.625, 37.625, 37.426, 37.426, 37.625] lon_point_list = [14.86, 15.25, 15.25, 14.86, 14.86] polygon_geom = Polygon(zip(lon_point_list, lat_point_list)) crs = {'init': 'epsg:4326'} polygon = gpd.GeoDataFrame(index=[0], crs=crs, geometry=[polygon_geom]) ## check projection (CRS) ## https://geopandas.org/projections.html # polygon.crs # polygon.plot() ## clip the data with the RECTANGLE all_counts_uv = gpd.clip(all_counts_uv, polygon) ### plot gedataframe with colors -----### ## add background map ### gdf = all_counts_uv import contextily as ctx # minx, miny, maxx, maxy = gdf.geometry.total_bounds # polygon.geometry.total_bounds ## reproject with mercator coordinates (this is the coordinate system of the basemap) gdf = gdf.to_crs(epsg=3857) # Plot the data within the RECTANGULAR extensions fig, ax = plt.subplots(figsize=(10, 10)) polygon = polygon.to_crs(epsg=3857) polygon.plot(alpha=0,
def test_mixed_series(mixed_gdf, single_rectangle_gdf): """Test clipping a mixed GeoSeries""" clipped = clip(mixed_gdf.geometry, single_rectangle_gdf) assert (clipped.geom_type[0] == "Point" and clipped.geom_type[1] == "Polygon" and clipped.geom_type[2] == "LineString")
if __name__ == "__main__": # Project's root os.chdir("../..") for region in REGIONS: region_name = region.get("name") region_mask = gpd.read_file(region.get("path")) df = pd.DataFrame(columns=["year", "burned_area", "rainfall"]) grid = create_grid(*region_mask.bounds.loc[0], GRID_RESOLUTION, region_mask.crs) grid = gpd.clip(grid, region_mask) grid = grid[grid.area >= GRID_AREA_THRESHOLD * GRID_RESOLUTION**2] grid = grid.reset_index() burn_fn = f"data/nc/MODIS/MCD64A1/{region_name}/MCD64A1_500m.nc" burn_da = xr.open_dataset(burn_fn, mask_and_scale=False)["Burn_Date"] rainfall_fn = f"data/nc/CHC/CHIRPS/{region_name}/chirps_v2_5km.nc" rainfall_da = xr.open_dataset(rainfall_fn, mask_and_scale=False)["precip"] years = np.unique(burn_da.time.dt.year.values) for year in years: temp_grid = grid.copy()
def test_warning_extra_geoms_mixed(single_rectangle_gdf, mixed_gdf): """Test the correct warnings are raised if keep_geom_type is called on a mixed GDF""" with pytest.warns(UserWarning): clip(mixed_gdf, single_rectangle_gdf, keep_geom_type=True)
geo_df = geo_df.to_crs(epsg=2163) state_map = gpd.read_file( 'shapefiles/geo_export_9ef76f60-e019-451c-be6b-5a879a5e7c07.shp') state_map = state_map.to_crs(epsg=2163) group_map = gpd.read_file('shapefiles/Corn_belt_all_states_20_bz.shp') group_map = group_map.to_crs(epsg=2163) county_map = gpd.read_file('shapefiles/Corn_belt_all_states.shp') county_map = county_map.to_crs(epsg=2163) county_map2 = gpd.read_file('shapefiles/USA_counties.shp') county_map2 = county_map2.to_crs(epsg=2163) cb_counties = gpd.clip(county_map2, county_map) fig, ax = plt.subplots() state_map.plot(ax=ax, color='gray', alpha=0.6, edgecolor='white', linewidth=0.5) cb_counties.plot(ax=ax, color='orange', alpha=1, edgecolor='darkslategrey', linewidth=0.2) ax.set_box_aspect(1) ax.set_xlim(-750000, 2000000)
def test_warning_geomcoll(single_rectangle_gdf, geomcol_gdf): """Test the correct warnings are raised if keep_geom_type is called on a GDF with GeometryCollection""" with pytest.warns(UserWarning): clip(geomcol_gdf, single_rectangle_gdf, keep_geom_type=True)
#Convert to geodataframe main_polygons = geopandas.GeoDataFrame.from_features(collection) main_polygons.crs = project_crs for cell_char in grid_cell_chars: for index in range(1, grid_cell_len + 1): cell = cell_char + str(index) curr_path = os.path.join(cell_grids_path, cell, cell_grids_id) os.chdir(curr_path) polygons = glob.glob("*.shp") for polygon_name in polygons: polygon_path = os.path.join(curr_path, polygon_name) polygon = geopandas.read_file(polygon_path) polygon.crs = project_crs try: clipped_poly = geopandas.clip(main_polygons, polygon) if not clipped_poly.empty: out_path = os.path.join(output_path, cell, polygon_name) clipped_poly.to_file(out_path) print(cell, polygon_name) else: a = 1 except: a = 1 print("Done with " + cell) print("\n---\n\n") print("done")
## make a RECTANGLE to clip the viasat_data from shapely.geometry import Polygon lat_point_list = [37.625, 37.625, 37.426, 37.426, 37.625] lon_point_list = [14.86, 15.25, 15.25, 14.86, 14.86] polygon_geom = Polygon(zip(lon_point_list, lat_point_list)) crs = {'init': 'epsg:4326'} polygon = gpd.GeoDataFrame(index=[0], crs=crs, geometry=[polygon_geom]) ## check projection (CRS) # polygon.crs # polygon.plot() ## clip the data with the RECTANGLE speed_PHF_and_SCARICA = gpd.clip(speed_PHF_and_SCARICA, polygon) ### plot gedataframe with colors -----### ## add background map ### gdf = speed_PHF_and_SCARICA import contextily as ctx # minx, miny, maxx, maxy = gdf.geometry.total_bounds # polygon.geometry.total_bounds ## reproject with mercator coordinates (this is the coordinate system of the basemap) gdf = gdf.to_crs(epsg=3857) # Plot the data within the RECTANGULAR extensions fig, ax = plt.subplots(figsize=(10, 10)) polygon = polygon.to_crs(epsg=3857) polygon.plot(alpha=0,
if max(testmerged['sum_y']) > maxval: maxval = max(testmerged['sum_y']) ######################################## # Clip data by polygon and create plot # ######################################## # get a Series of protected area names pas = poly['NAME'] # loop through protected areas for i in pas: pa = poly[poly['NAME'] == i] # clip by protected area polygon pa_viirs19 = gpd.clip(fire19, pa) # group by date df19 = groupByDate(pa_viirs19, idx19) # same to 2020 data # clip by protected area polygon pa_viirs20 = gpd.clip(fire20, pa) # group by date df20 = groupByDate(pa_viirs20, idx20) # reset index df19 = df19.reset_index() df20 = df20.reset_index()
def bikeability(place, scale='city', data=False): ''' A function that would calculate bikeability value for a given place of interest. Parameters place: the place of interest e.g "Freiburg, Germany" datatype = string Scale: can be either "grid" or "city" default is "city" datatype = string data: if True output returns a dataframe along with the standard dictionary output, datatype = boolean Returns the average_index for bikeability(number between 0 and 100) and some summary statistics of index, datatype = dictionary or dataframe and dictionary if data is set as True. Usage example a = bikeability('Freiburg, Germany', scale ='grid', data = False) ... for grid scale approach a,b = bikeability('Freiburg, Germany', scale ='grid', data = True) a =bikeability('Freiburg, Germany', scale = 'city')... for city scale approach a,b =bikeability('Freiburg, Germany', scale = 'city', data = True) ''' if scale != 'grid': place = place # Create and set osmnx to select important tags useful_tags_way = [ 'bridge', 'length', 'oneway', 'lanes', 'ref', 'name', 'highway', 'maxspeed', 'service', 'access', 'area', 'cycleway', 'landuse', 'width', 'est_width', 'junction', 'surface' ] ox.utils.config(useful_tags_way=useful_tags_way ) # = useful_tags_path change here1 # Create basic city graph place_name = place graph = ox.graph_from_place(place_name, network_type='all', retain_all=True) # # Calculate and add edge closeness centrality(connectedness) centrality = nx.degree_centrality(nx.line_graph(graph)) nx.set_edge_attributes(graph, centrality, 'centrality') # Extract nodes and edges to geopandas from graph #edges = ox.graph_to_gdfs(graph, nodes=False) try: edges = ox.graph_to_gdfs(graph, nodes=False) pass except Exception as e: print('{} at {}'.format(e, place)) # Remove unwanted columns and add weight variable cols = [ 'highway', 'cycleway', 'surface', 'maxspeed', 'length', 'lanes', 'oneway', 'width', 'centrality', 'geometry' ] try: df = edges.loc[:, cols] except KeyError as e: print(e) # Set appropriate data types df['maxspeed'] = pd.to_numeric(df['maxspeed'], errors='coerce', downcast='integer') df['lanes'] = pd.to_numeric(df['lanes'], errors='coerce', downcast='integer') df['width'] = pd.to_numeric(df['width'], errors='coerce', downcast='unsigned') df['highway'] = df['highway'].astype(str) df['surface'] = df['surface'].astype(str) df['oneway'] = df['oneway'].astype(int) df['cycleway'] = df['cycleway'].astype(str) # Dataframe cleaning and preprocessing # highway column df['highway'] = df['highway'].str.replace(r'[^\w\s-]', '', regex=True) highway_cols = (pd.DataFrame(df.highway.str.split(' ', expand=True))) highway_map = ({ 'service': 6, 'None': np.nan, 'residential': 8, 'unclassified': 7, 'footway': 7, 'track': 5, 'tertiary': 6, 'living_street': 9, 'path': 5, 'pedestrian': 7, 'secondary': 5, 'primary': 2, 'steps': 2, 'cycleway': 10, 'rest_area': 5, 'primary_link': 2, 'ferry': 1, 'construction': 2, 'byway': 8, 'bridleway': 6, 'trunk': 2, 'trunk_link': 2, 'motorway': 1, 'motorway_link': 1 }) for column in highway_cols: highway_cols[column] = highway_cols[column].map(highway_map) highway_cols['mean'] = np.nanmean(highway_cols, axis=1) df['highway'] = round(highway_cols['mean']) # cycleway column df['cycleway'] = df['cycleway'].str.replace(r'[^\w\s-]', '', regex=True) cycleway_cols = (pd.DataFrame(df.cycleway.str.split(' ', expand=True))) cycleway_map = ({ 'opposite': 9, 'lane': 9, 'share_busway': 8, 'shared_lane': 8, 'segregated': 10, 'no': 1, 'opposite_lane': 9, 'crossing': 10, 'track': 10, 'designated': 10, 'opposite_share_busway': 8, 'seperate': 10, 'shoulder': 8 }) for column in cycleway_cols: cycleway_cols[column] = cycleway_cols[column].map(cycleway_map) cycleway_cols['mean'] = np.nanmean(cycleway_cols, axis=1) df['cycleway'] = round(cycleway_cols['mean']) # surface column df['surface'] = df['surface'].str.replace(r'[^\w\s-]', '', regex=True) surface_cols = (pd.DataFrame(df.surface.str.split(' ', expand=True))) surface_map = ({ 'asphalt': 10, 'paved': 10, 'cobblestone': 5, 'fine_gravel': 9, 'ground': 7, 'sett': 6, 'gravel': 7, 'metal': 6, 'compacted': 10, 'dirt': 6, 'paving_stones': 7, 'grass_paver': 5, 'unpaved': 8, 'pebblestone': 9, 'concrete': 10, 'grass': 5, 'mud': 1 }) for column in surface_cols: surface_cols[column] = surface_cols[column].map(surface_map) surface_cols['mean'] = np.nanmean(surface_cols, axis=1) df['surface'] = round(surface_cols['mean']) # maxspeed column df.loc[df['maxspeed'] > 110, 'maxspeed'] = 110 df.loc[df['maxspeed'] < 20, 'maxspeed'] = 20 maxspeed_map = ({ 20: 10, 30: 9, 40: 8, 50: 7, 60: 6, 70: 5, 80: 4, 90: 3, 100: 2, 110: 1 }) df['maxspeed'] = df['maxspeed'].map(maxspeed_map) # lanes column df.loc[df['lanes'] > 8, 'lanes'] = 8 lanes_map = {1: 10, 2: 9, 3: 5, 4: 5, 5: 3, 6: 3, 7: 2, 8: 1} df['lanes'] = df['lanes'].map(lanes_map) # oneway column oneway_map = {0: 5, 1: 10, -1: 5} df['oneway'] = df['oneway'].map(oneway_map) # width column df.loc[df['width'] < 2, 'width'] = 1 df.loc[df['width'] > 6, 'width'] = 6 df['width'] = round(df['width']) width_map = ({1: 1, 2: 2, 3: 5, 4: 7, 5: 9, 6: 10}) df['width'] = df['width'].map(width_map) # normalize centrality column (between o and 10) df['centrality'] = ( (df['centrality'] - np.min(df['centrality'])) / (np.max(df['centrality']) - np.min(df['centrality']))) * 10 # Switch to new df for calculation d_frame = df.copy(deep=True) # Multiply variables by weights d_frame['cycleway'] = d_frame['cycleway'] * 0.208074534 d_frame['surface'] = d_frame['surface'] * 0.108695652 d_frame['highway'] = d_frame['highway'] * 0.167701863 d_frame['maxspeed'] = d_frame['maxspeed'] * 0.189440994 d_frame['lanes'] = d_frame['lanes'] * 0.108695652 d_frame['centrality'] = d_frame['centrality'] * 0.071428571 d_frame['width'] = d_frame['width'] * 0.086956522 d_frame['oneway'] = d_frame['oneway'] * 0.059006211 # Normalize variables between 0 and 1 d_frame['index'] = (np.nanmean(d_frame[[ 'cycleway', 'highway', 'surface', 'maxspeed', 'lanes', 'width', 'oneway', 'centrality' ]], axis=1, dtype='float64')) * 80 # Final statistics index of city mean_index = np.average(d_frame['index'], weights=d_frame['length']) max_index = d_frame['index'].max() min_index = d_frame['index'].min() std_index = d_frame['index'].std() # Plot result #d_frame.plot(column = 'index',legend = True) # Result dictionary result = ({ 'place': place, 'average_index': mean_index, 'max_index': max_index, 'min_index': min_index, 'std_index': std_index }) else: #Get bounding box for place place_name = place area = ox.geocode_to_gdf(place_name) # graph first xmin, ymin, xmax, ymax = area.total_bounds #divide into grids x = lon, y = lat height = 0.041667 width = 0.041667 rows = int(np.ceil((ymax - ymin) / height)) cols = int(np.ceil((xmax - xmin) / width)) XleftOrigin = xmin XrightOrigin = xmin + width YtopOrigin = ymax YbottomOrigin = ymax - height polygons = [] for i in range(cols): Ytop = YtopOrigin Ybottom = YbottomOrigin for j in range(rows): polygons.append( Polygon([(XleftOrigin, Ytop), (XrightOrigin, Ytop), (XrightOrigin, Ybottom), (XleftOrigin, Ybottom)])) Ytop = Ytop - height Ybottom = Ybottom - height XleftOrigin = XleftOrigin + width XrightOrigin = XrightOrigin + width #Ensure the grids are within the polygon grid_list = [] for i in range(len(polygons)): p = Point(polygons[i].centroid.x, polygons[i].centroid.y) geome = shape(polygons[i]) q = gpd.GeoDataFrame({'geometry': geome}, index=[0]) q = q.set_crs("EPSG:4326") if area.geometry.iloc[0].contains(polygons[i]) == True: grid_list.append(q) #elif p.within(area.geometry.iloc[0]) == True and area.geometry.iloc[0].contains(polygons[i])== False: elif area.geometry.iloc[0].intersects(polygons[i]): #grid_list.append(polygons[i]) clip = gpd.clip(area, q) grid_list.append(clip) #Initialize important variables dflist = [] exception_grids = [] dfs = [] for i in tqdm(range(len(grid_list))): #graph useful_tags_way = [ 'bridge', 'length', 'oneway', 'lanes', 'ref', 'name', 'highway', 'maxspeed', 'surface', 'area', 'landuse', 'width', 'est_width', 'junction', 'cycleway' ] ox.utils.config(useful_tags_way=useful_tags_way ) # = =useful_tags_path change 2 try: box_graph = ox.graph_from_polygon( grid_list[i].geometry.iloc[0], network_type='bike', retain_all=True) pass except Exception as e: print('{} at grid {}, skip grid'.format(e, i + 1)) exception_grids.append(i + 1) continue # Calculate and add edge closeness centrality(connectedness) centrality = nx.degree_centrality(nx.line_graph(box_graph)) nx.set_edge_attributes(box_graph, centrality, 'centrality') # Extract nodes and edges to geopandas from graph try: edges = ox.graph_to_gdfs(box_graph, nodes=False) pass except Exception as e: print('{} at grid {}, skip grid'.format(e, i + 1)) exception_grids.append(i + 1) continue # Select only the important variables cols = [ 'highway', 'cycleway', 'surface', 'maxspeed', 'length', 'lanes', 'oneway', 'width', 'centrality', 'geometry' ] try: df = edges.loc[:, cols] pass except KeyError as e: print('{} at grid {}, skip grid'.format(e, i + 1)) exception_grids.append(i + 1) continue # Set appropriate data types df['maxspeed'] = pd.to_numeric(df['maxspeed'], errors='coerce', downcast='integer') df['lanes'] = pd.to_numeric(df['lanes'], errors='coerce', downcast='integer') df['width'] = pd.to_numeric(df['width'], errors='coerce', downcast='unsigned') df['highway'] = df['highway'].astype(str) df['surface'] = df['surface'].astype(str) df['oneway'] = df['oneway'].astype(int) df['cycleway'] = df['cycleway'].astype(str) # Dataframe cleaning and preprocessing # highway column df['highway'] = df['highway'].str.replace(r'[^\w\s-]', '', regex=True) highway_cols = (pd.DataFrame(df.highway.str.split(' ', expand=True))) highway_map = ({ 'service': 6, 'None': np.nan, 'residential': 8, 'unclassified': 7, 'footway': 7, 'track': 5, 'tertiary_link': 6, 'tertiary': 6, 'living_street': 9, 'path': 5, 'pedestrian': 7, 'secondary': 5, 'secondary_link': 5, 'primary': 2, 'steps': 2, 'cycleway': 10, 'rest_area': 5, 'primary_link': 2, 'ferry': 1, 'construction': 2, 'byway': 8, 'bridleway': 6, 'trunk': 2, 'trunk_link': 2, 'motorway': 1, 'motorway_link': 1 }) for column in highway_cols: highway_cols[column] = highway_cols[column].map(highway_map) highway_cols['mean'] = np.nanmean(highway_cols, axis=1) df['highway'] = round(highway_cols['mean']) #cycleway column df['cycleway'] = df['cycleway'].str.replace(r'[^\w\s-]', '', regex=True) cycleway_cols = (pd.DataFrame( df.cycleway.str.split(' ', expand=True))) cycleway_map = ({ 'opposite': 9, 'lane': 9, 'share_busway': 8, 'shared_lane': 8, 'segregated': 10, 'no': 1, 'opposite_lane': 9, 'crossing': 10, 'track': 10, 'designated': 10, 'opposite_share_busway': 8, 'seperate': 10, 'shoulder': 8 }) for column in cycleway_cols: cycleway_cols[column] = cycleway_cols[column].map(cycleway_map) cycleway_cols['mean'] = np.nanmean(cycleway_cols, axis=1) df['cycleway'] = round(cycleway_cols['mean']) # surface column df['surface'] = df['surface'].str.replace(r'[^\w\s-]', '', regex=True) #'' surface_cols = (pd.DataFrame(df.surface.str.split(' ', expand=True))) surface_map = ({ 'asphalt': 10, 'paved': 10, 'cobblestone': 3, 'fine_gravel': 9, 'ground': 6, 'sett': 4, 'gravel': 7, 'metal': 7, 'compacted': 9, 'dirt': 6, 'paving_stones': 7, 'grass_paver': 4, 'unpaved': 7, 'pebblestone': 7, 'concrete': 10, 'grass': 5, 'mud': 2, 'sand': 5, 'wood': 4, 'earth': 6, 'woodchips': 3, 'snow': 2, 'ice': 2, 'salt': 2 }) for column in surface_cols: surface_cols[column] = surface_cols[column].map(surface_map) surface_cols['mean'] = np.nanmean(surface_cols, axis=1) df['surface'] = round(surface_cols['mean']) # maxspeed column df.loc[df['maxspeed'] > 110, 'maxspeed'] = 110 df.loc[df['maxspeed'] < 20, 'maxspeed'] = 20 df['maxspeed'] = round(df['maxspeed'], -1) maxspeed_map = ({ 20: 10, 30: 9, 40: 8, 50: 7, 60: 6, 70: 5, 80: 4, 90: 3, 100: 2, 110: 1 }) df['maxspeed'] = df['maxspeed'].map(maxspeed_map) # lanes column df.loc[df['lanes'] > 8, 'lanes'] = 8 lanes_map = {1: 10, 2: 9, 3: 5, 4: 5, 5: 3, 6: 3, 7: 2, 8: 1} df['lanes'] = df['lanes'].map(lanes_map) # oneway column oneway_map = {0: 5, 1: 10, -1: 5} df['oneway'] = df['oneway'].map(oneway_map) # width column df.loc[df['width'] < 2, 'width'] = 1 df.loc[df['width'] > 6, 'width'] = 6 df['width'] = round(df['width']) width_map = ({1: 1, 2: 2, 3: 5, 4: 7, 5: 9, 6: 10}) df['width'] = df['width'].map(width_map) # normalize centrality column (between o and 10) df['centrality'] = ( (df['centrality'] - np.min(df['centrality'])) / (np.max(df['centrality']) - np.min(df['centrality']))) * 10 #Switch to new df for calculation d_frame = df.copy(deep=True) # Multiply variables by weights d_frame['cycleway'] = d_frame['cycleway'] * 0.208074534 d_frame['surface'] = d_frame['surface'] * 0.108695652 d_frame['highway'] = d_frame['highway'] * 0.167701863 d_frame['maxspeed'] = d_frame['maxspeed'] * 0.189440994 d_frame['lanes'] = d_frame['lanes'] * 0.108695652 d_frame['centrality'] = d_frame['centrality'] * 0.071428571 d_frame['width'] = d_frame['width'] * 0.086956522 d_frame['oneway'] = d_frame['oneway'] * 0.059006211 d_frame['index'] = (np.nanmean(d_frame[[ 'cycleway', 'highway', 'surface', 'maxspeed', 'lanes', 'width', 'oneway', 'centrality' ]], axis=1, dtype='float64')) * 80 d_frame['grid_index'] = np.average(d_frame['index'], weights=d_frame['length']) dflist.append(d_frame) dfs.append(df) #Final statistics index of city in dictionary df_indexes = pd.concat(dflist) result = ({ 'place': place_name, 'average_index': np.average(df_indexes['index'], weights=df_indexes['length']), 'max_index': df_indexes['index'].max(), 'min_index': df_indexes['index'].min(), 'std_index': df_indexes['index'].std(), 'grids': len(grid_list), 'nsegments': len(df_indexes), 'unused_grids': len(exception_grids) }) if data == False: return (result) else: return (d_frame, result)
def test_returns_gdf(point_gdf, single_rectangle_gdf): """Test that function returns a GeoDataFrame (or GDF-like) object.""" out = clip(point_gdf, single_rectangle_gdf) assert isinstance(out, GeoDataFrame)
def main(mosaic, data, dest, ntl, bbox, country): os.makedirs(dest, exist_ok=True) os.makedirs(dest + '/pre-event', exist_ok=True) os.makedirs(dest + '/post-event', exist_ok=True) # create raster mosaic for rasters with same name (~ same area) print('creating mosaic of overlapping rasters') if mosaic: for prepost in ['pre', 'post']: filenames = os.listdir(os.path.join(data, prepost + '-event')) tuples = [] for filename in filenames: name = filename.split('-')[1] same = sorted( [x for x in filenames if x.split('-')[1] == name]) if same not in tuples and len(same) > 1: tuples.append(same) for tuple in tuples: out_file = tuple[0].split('.')[0] + '-merged.tif' for ix, file in enumerate(tuple): if ix == 0: os.system('gdalwarp -r average {} {} {}'.format( os.path.join(data, prepost + '-event', file), os.path.join(data, prepost + '-event', tuple[ix + 1]), os.path.join(dest, prepost + '-event', out_file))) elif ix == 1: continue else: os.system('gdalwarp -r average {} {} {}'.format( os.path.join(data, prepost + '-event', file), os.path.join(dest, prepost + '-event', out_file), os.path.join(dest, prepost + '-event', out_file))) # copy all the other rasters to dest for file in [ x for x in filenames if x not in [item for tuple in tuples for item in tuple] ]: copyfile(os.path.join(data, prepost + '-event', file), os.path.join(dest, prepost + '-event', file)) # filter pre-event rasters print('filtering pre-event rasters') # filter by bounding box (if provided) if bbox != '': bbox_tuple = [float(x) for x in bbox.split(',')] bbox = box(bbox_tuple[0], bbox_tuple[1], bbox_tuple[2], bbox_tuple[3]) geo = gpd.GeoDataFrame({'geometry': bbox}, index=[0], crs=from_epsg(4326)) coords = getFeatures(geo) print('filtering on bbox:') print(coords) # loop over images and filter for raster in tqdm(glob.glob(dest + '/pre-event/*.tif')): raster = raster.replace('\\', '/') raster_or = raster out_name = raster.split('.')[0] + '-bbox.tif' with rasterio.open(raster) as src: print('cropping on bbox') try: out_img, out_transform = mask(dataset=src, shapes=coords, crop=True) out_meta = src.meta.copy() out_meta.update({ 'height': out_img.shape[1], 'width': out_img.shape[2], 'transform': out_transform }) print('saving', out_name) with rasterio.open(out_name, 'w', **out_meta) as dst: dst.write(out_img) except: print('empty raster, discard') os.remove(raster_or) # filter by nighttime lights # load nighttime light mask ntl_shapefile = 'input/ntl_mask_extended.shp' if ntl: # filter mask by country (if provided) if country != '': country_ntl_shapefile = ntl_shapefile.split( '.')[0] + '_' + country.lower() + '.shp' if not os.path.exists(country_ntl_shapefile): ntl_world = gpd.read_file(ntl_shapefile) ntl_world.crs = {'init': 'epsg:4326'} ntl_world = ntl_world.to_crs("EPSG:4326") world = gpd.read_file( gpd.datasets.get_path('naturalearth_lowres')) country_shape = world[world.name == country] if country_shape.empty: print('WARNING: country', country, 'not found!!!') print('available countries:') print(world.name.unique()) print('proceeding with global mask') country_ntl_shapefile = ntl_shapefile else: country_shape = country_shape.reset_index() country_shape.at[0, 'geometry'] = box( *country_shape.at[0, 'geometry'].bounds) country_shape.geometry = country_shape.geometry.scale( xfact=1.1, yfact=1.1) ntl_country = gpd.clip(ntl_world, country_shape) ntl_country.to_file(country_ntl_shapefile) with fiona.open(country_ntl_shapefile, "r") as shapefile: shapes = [feature["geometry"] for feature in shapefile] else: with fiona.open(ntl_shapefile, "r") as shapefile: shapes = [feature["geometry"] for feature in shapefile] # loop over images and filter for raster in tqdm(glob.glob(dest + '/pre-event/*.tif')): raster = raster.replace('\\', '/') raster_or = raster out_name = raster.split('.')[0] + '-ntl.tif' if 'ntl' in raster: continue crop_next = True print('processing', raster) out_name_ntl = raster.split('.')[0] + '-ntl-mask.tif' try: with rasterio.open(raster) as src: shapes_r = [ x for x in shapes if not rasterio.coords.disjoint_bounds( src.bounds, rasterio.features.bounds(x)) ] if len(shapes_r) == 0: print('no ntl present, discard') crop_next = False else: print('ntl present, creating mask') out_image, out_transform = rasterio.mask.mask( src, shapes_r, crop=True) out_meta = src.meta out_meta.update({ "driver": "GTiff", "height": out_image.shape[1], "width": out_image.shape[2], "transform": out_transform }) # save temporary ntl file print('saving mask', out_name_ntl) with rasterio.open(out_name_ntl, "w", **out_meta) as dst: dst.write(out_image) crop_next = True raster = out_name_ntl if crop_next: with rasterio.open(raster) as src: print('cropping nan on', raster) window = get_data_window(src.read(1, masked=True)) kwargs = src.meta.copy() kwargs.update({ 'height': window.height, 'width': window.width, 'transform': rasterio.windows.transform(window, src.transform) }) print('saving', out_name) try: with rasterio.open(out_name, 'w', **kwargs) as dst: dst.write(src.read(window=window)) except: print('empty raster, discard') # remove temporary ntl file os.remove(raster) # remove original raster os.remove(raster_or) except: print('error loading raster, skipping')
def test_clip_points(point_gdf, single_rectangle_gdf): """Test clipping a points GDF with a generic polygon geometry.""" clip_pts = clip(point_gdf, single_rectangle_gdf) pts = np.array([[2, 2], [3, 4], [9, 8]]) exp = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:4326") assert_geodataframe_equal(clip_pts, exp)
def test_clip_box_overlap(pointsoutside_overlap_gdf, single_rectangle_gdf): """Test clip when intersection is emtpy and boxes do overlap.""" clipped = clip(pointsoutside_overlap_gdf, single_rectangle_gdf) assert len(clipped) == 0