Ejemplo n.º 1
0
def test_warning_crs_mismatch(point_gdf, single_rectangle_gdf):
    with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
        clip(point_gdf, single_rectangle_gdf.to_crs(3857))
Ejemplo n.º 2
0
def test_not_gdf(single_rectangle_gdf):
    """Non-GeoDataFrame inputs raise attribute errors."""
    with pytest.raises(TypeError):
        clip((2, 3), single_rectangle_gdf)
    with pytest.raises(TypeError):
        clip(single_rectangle_gdf, (2, 3))
Ejemplo n.º 3
0
def test_returns_series(point_gdf, single_rectangle_gdf):
    """Test that function returns a GeoSeries if GeoSeries is passed."""
    out = clip(point_gdf.geometry, single_rectangle_gdf)
    assert isinstance(out, GeoSeries)
Ejemplo n.º 4
0
          loc='lower left')

ax2 = fig.add_axes([.67, .22, .25, .2])
ax2.get_xaxis().set_ticks([])
ax2.get_yaxis().set_ticks([])

northeast_states = us_map[us_map['NAME'].isin([
    'Vermont', 'New Hampshire', 'Massachusetts', 'Connecticut', 'Rhode Island',
    'New York', 'Pennsylvania', 'Maine', 'Virginia'
])]

northeast_states['dissolve'] = 1
ne_bound = northeast_states.dissolve('dissolve')

gpd.clip(us_map,
         box(*ne_bound.geometry.bounds.iloc[0].tolist())).plot(ax=ax2,
                                                               edgecolor='k')
us_map[us_map['NAME'] == 'Vermont'].plot(ax=ax2, color='g')

gpd.GeoDataFrame(
    geometry=[bounds.to_crs(us_map.crs).boundary.iloc[0][0]]).plot(color='r',
                                                                   ax=ax2)

ax2.margins(0, 0)
ax2.set_title('Location in NE US')
#ax.set_title('Study Area', fontsize = 'xx-large')

plt.savefig(os.path.join('results', 'misc_charts', 'study_area.png'),
            bbox_inches='tight')
#%% calculate proportion of
Ejemplo n.º 5
0
def reservoirs(wtshd):
    data = []  # to store all variables we need and append it to pandas dataframe file at the end
    watershed = gpd.read_file(wtshd)
    geom_buffer = watershed.buffer(0)    # it was 400 before, Farshid changed it to 0
    watershed1 = gpd.read_file(wtshd)
    watershed1['geometry'] = geom_buffer
    watershed_prj = watershed1.to_crs('epsg:4269')
    watershed_prj_path = os.path.join(path_dict['tempFolder_path'], 'watershed_prj.shp')
    watershed_prj.to_file(watershed_prj_path)
    watershed_prj_noBuffer = watershed.to_crs('epsg:4269')

    # to get basin's ID:
    # the column_name is different in shapefile directories:
    if "GAGE_ID" in watershed.columns:    # this is for Conus shapefiles,
        data.append(watershed_prj_noBuffer['GAGE_ID'][0])
    elif "HUC10" in watershed.columns:   # this is for running SRB-HUC10 shapefiles
        data.append(watershed_prj_noBuffer['HUC10'][0])

    # to get basin's area:
    # The column_name is different in different shapefiles
    if "AREA" in watershed.columns:
       data.append(watershed_prj_noBuffer['AREA'][0])
    elif "AreaSqKm" in watershed.columns:
        data.append(watershed_prj_noBuffer['AreaSqKm'][0] * 1e6)    # converting sqkm to sqm

    if (needDEM == True) & (data[1] < 24e9):  # 25e9 is the maximum area that we can musaic,
        # larger than this, we get memory issue. needs to get fixed in the future
        flowAccu_temp_path, AccuProcess = create_FlowAccu_tif_file(watershed_prj_path, path_dict)
        ### if AccuProcess = True --> it means there is at least a tif file that has overlap with the watershed
        ### if AccuProcess = False --> it means there is not any tif file that has overlap with the watershed
    else:
        AccuProcess = False

    if AccuProcess == True:
        watershed_outlet = find_watershed_outlet(watershed_prj, flowAccu_temp_path, gages_prj)
    else:
        watershed_outlet = ["_", "_", "_", "_"]

    data.extend(watershed_outlet)

    ## finding all major dams inside a watershed
    selected_maj_dams = gpd.clip(dams_shp_prj, watershed_prj_noBuffer)    # it was dams_shp_prj
    # selected_maj_dams = selected_dams.loc[(selected_dams['DAM_HEIGHT'] >= 50) |
    #                                       (selected_dams['NORMAL_STORAGE'] >= 5000)]
    MAJ_NDAMS = len(selected_maj_dams)  # number of dams inside a watershed
    data.append(MAJ_NDAMS)

    if (MAJ_NDAMS) > 0:  # at least one dam in the watershed

        # now calculate the Minimum and Mean Distance between the dams and watershed_outlet
        # and to get "General_Purpose" of watershed
        distance_outlet_dams = []
        dam_ID = []
        for i, point in enumerate(selected_maj_dams['geometry']):
            x = point.xy[0][0]
            y = point.xy[1][0]
            dam_ID.append(selected_maj_dams['NID_ID_Cod'].values[i])    #NID_ID_Cod

            # calculating distance between outlet and dams
            if (watershed_outlet[2] != "_") & (watershed_outlet[3] != "_"):
                dam_point = (x, y)
                outlet_point = (watershed_outlet[2], watershed_outlet[3])
                distance = hs.haversine(dam_point, outlet_point, unit=hs.Unit.METERS)
                distance_outlet_dams.append(distance)
            else:
                distance_outlet_dams.append(-999)   # it was np.nan before

        data.append(np.nanmin(distance_outlet_dams))  # distance of nearest dam from the outlet
        data.append(np.nanmean(distance_outlet_dams))  # average distance of dams from outlet

        # To calculate General Purpose of each watershed based on the major dams inside it
        dam = dams_info_gdf.loc[dams_info_gdf["NIDID"].isin(dam_ID)]
        if len(dam) > 0:  # sometimes some major dams in shape file are not in the excel file
            general_purpose, max_norm_stor, std_norm_stor = general_purpose_watershed(dam, dams_shp_prj)
        else:   # sometimes the dataset has some problem
            general_purpose = -1
            max_norm_stor = 0
            std_norm_stor = 0

    else:  # No dam in watershed
        data.extend([-999, -999])  # this is for dams distance and watershed outlet (means no dam in watershed)
        general_purpose = -1
        max_norm_stor = 0
        std_norm_stor = 0

    data.append(general_purpose)
    data.append(max_norm_stor)
    data.append(std_norm_stor)
    ### finding NDAMS_2009 and normal storage for all dams:
    NDAMS, STOR_NOR_2009 = finding_NDAMS(watershed_prj_noBuffer, dams_info_gdf, path_dict, data)

    data.append(NDAMS)
    data.append(STOR_NOR_2009)

    print("Watershed:       ", os.path.split(wtshd)[-1])
    return data
Ejemplo n.º 6
0
def populate_sample_cell(
    sample_cell: Polygon,
    sample_cell_area: float,
    traces_sindex: PyGEOSSTRTreeIndex,
    traces: gpd.GeoDataFrame,
    nodes: gpd.GeoDataFrame,
    snap_threshold: float,
    resolve_branches_and_nodes: bool,
) -> Dict[str, float]:
    """
    Take a single grid polygon and populate it with parameters.

    Mauldon determination requires that E-nodes are defined for
    every single sample circle. If correct Mauldon values are
    wanted `resolve_branches_and_nodes` must be passed as True.
    This will result in much longer analysis time.

    """
    _centroid = sample_cell.centroid
    if not isinstance(_centroid, Point):
        raise TypeError("Expected Point centroid.")
    centroid = _centroid
    sample_circle = safe_buffer(centroid, np.sqrt(sample_cell_area) * 1.5)
    sample_circle_area = sample_circle.area
    assert sample_circle_area > 0

    # Choose geometries that are either within the sample_circle or
    # intersect it
    # Use spatial indexing to filter to only spatially relevant traces,
    # traces and nodes
    trace_candidates_idx = spatial_index_intersection(
        traces_sindex, geom_bounds(sample_circle))
    trace_candidates = traces.iloc[trace_candidates_idx]

    assert isinstance(trace_candidates, gpd.GeoDataFrame)

    if len(trace_candidates) == 0:
        return determine_topology_parameters(
            trace_length_array=np.array([]),
            node_counts=determine_node_type_counts(np.array([]),
                                                   branches_defined=True),
            area=sample_circle_area,
        )
    if resolve_branches_and_nodes:
        # Solve branches and nodes for each cell if wanted
        # Only way to make sure Mauldon parameters are correct
        _, nodes = branches_and_nodes(
            traces=trace_candidates,
            areas=gpd.GeoSeries([sample_circle], crs=traces.crs),
            snap_threshold=snap_threshold,
        )
    # node_candidates_idx = list(nodes_sindex.intersection(sample_circle.bounds))
    node_candidates_idx = spatial_index_intersection(
        spatial_index=pygeos_spatial_index(nodes),
        coordinates=geom_bounds(sample_circle),
    )

    node_candidates = nodes.iloc[node_candidates_idx]

    # Crop traces to sample circle
    # First check if any geometries intersect
    # If not: sample_features is an empty GeoDataFrame
    if any(
            trace_candidate.intersects(sample_circle)
            for trace_candidate in trace_candidates.geometry.values):
        sample_traces = crop_to_target_areas(
            traces=trace_candidates,
            areas=gpd.GeoSeries([sample_circle]),
            is_filtered=True,
            keep_column_data=False,
        )
    else:
        sample_traces = traces.iloc[0:0]
    if any(node.intersects(sample_circle) for node in nodes.geometry.values):
        # if any(nodes.intersects(sample_circle)):
        # TODO: Is node clipping stable?
        sample_nodes = gpd.clip(node_candidates, sample_circle)
        assert sample_nodes is not None
        assert all(
            isinstance(val, Point) for val in sample_nodes.geometry.values)
    else:
        sample_nodes = nodes.iloc[0:0]

    assert isinstance(sample_nodes, gpd.GeoDataFrame)
    assert isinstance(sample_traces, gpd.GeoDataFrame)

    sample_node_type_values = sample_nodes[CLASS_COLUMN].values
    assert isinstance(sample_node_type_values, np.ndarray)

    node_counts = determine_node_type_counts(sample_node_type_values,
                                             branches_defined=True)

    topology_parameters = determine_topology_parameters(
        trace_length_array=sample_traces.geometry.length.values,
        node_counts=node_counts,
        area=sample_circle_area,
        correct_mauldon=resolve_branches_and_nodes,
    )
    return topology_parameters
 def clip_by_shape(self, other_gdf):
     """Clip this GDF by another GDF"""
     self.gdf = gpd.clip(self.gdf, other_gdf)
Ejemplo n.º 8
0
def test_clip_lines(two_line_gdf, single_rectangle_gdf):
    """Test what happens when you give the clip_extent a line GDF."""
    clip_line = clip(two_line_gdf, single_rectangle_gdf)
    assert len(clip_line.geometry) == 2
Ejemplo n.º 9
0
def test_clip_with_multipolygon(buffered_locations, single_rectangle_gdf):
    """Test clipping a polygon with a multipolygon."""
    multi = buffered_locations.dissolve(by="type").reset_index()
    clipped = clip(single_rectangle_gdf, multi)
    assert clipped.geom_type[0] == "Polygon"
Ejemplo n.º 10
0
def test_clip_poly_series(buffered_locations, single_rectangle_gdf):
    """Test clipping a polygon GDF with a generic polygon geometry."""
    clipped_poly = clip(buffered_locations.geometry, single_rectangle_gdf)
    assert len(clipped_poly) == 3
    assert all(clipped_poly.geom_type == "Polygon")
Ejemplo n.º 11
0
def test_clip_multiline(multi_line, single_rectangle_gdf):
    """Test that clipping a multiline feature with a poly returns expected output."""
    clipped = clip(multi_line, single_rectangle_gdf)
    assert clipped.geom_type[0] == "MultiLineString"
def xr_animation(ds,
                 bands=None,
                 output_path='animation.mp4',
                 width_pixels=500,
                 interval=100,                 
                 percentile_stretch=(0.02, 0.98),
                 image_proc_funcs=None,
                 show_gdf=None,
                 show_date='%d %b %Y',
                 show_text=None,
                 show_colorbar=True,
                 gdf_kwargs={},
                 annotation_kwargs={},
                 imshow_kwargs={},
                 colorbar_kwargs={},
                 limit=None):
    
    """
    Takes an `xarray` timeseries and animates the data as either a 
    three-band (e.g. true or false colour) or single-band animation, 
    allowing changes in the landscape to be compared across time.
    
    Animations can be customised to include text and date annotations 
    or use specific combinations of input bands. Vector data can be 
    overlaid and animated on top of imagery, and custom image 
    processing functions can be applied to each frame.
    
    Supports .mp4 (ideal for Twitter/social media) and .gif (ideal 
    for all purposes, but can have large file sizes) format files. 
    
    Last modified: October 2020
    
    Parameters
    ----------  
    ds : xarray.Dataset
        An xarray dataset with multiple time steps (i.e. multiple 
        observations along the `time` dimension).        
    bands : list of strings
        An list of either one or three band names to be plotted, 
        all of which must exist in `ds`. 
    output_path : str, optional
        A string giving the output location and filename of the 
        resulting animation. File extensions of '.mp4' and '.gif' are 
        accepted. Defaults to 'animation.mp4'.
    width_pixels : int, optional
        An integer defining the output width in pixels for the 
        resulting animation. The height of the animation is set 
        automatically based on the dimensions/ratio of the input 
        xarray dataset. Defaults to 500 pixels wide.        
    interval : int, optional
        An integer defining the milliseconds between each animation 
        frame used to control the speed of the output animation. Higher
        values result in a slower animation. Defaults to 100 
        milliseconds between each frame.         
    percentile_stretch : tuple of floats, optional
        An optional tuple of two floats that can be used to clip one or
        three-band arrays by percentiles to produce a more vibrant, 
        visually attractive image that is not affected by outliers/
        extreme values. The default is `(0.02, 0.98)` which is 
        equivalent to xarray's `robust=True`. This parameter is ignored
        completely if `vmin` and `vmax` are provided as kwargs to
        `imshow_kwargs`.
    image_proc_funcs : list of funcs, optional
        An optional list containing functions that will be applied to 
        each animation frame (timestep) prior to animating. This can 
        include image processing functions such as increasing contrast, 
        unsharp masking, saturation etc. The function should take AND 
        return a `numpy.ndarray` with shape [y, x, bands]. If your 
        function has parameters, you can pass in custom values using 
        a lambda function:
        `image_proc_funcs=[lambda x: custom_func(x, param1=10)]`.
    show_gdf: geopandas.GeoDataFrame, optional
        Vector data (e.g. ESRI shapefiles or GeoJSON) can be optionally
        plotted over the top of imagery by supplying a 
        `geopandas.GeoDataFrame` object. To customise colours used to
        plot the vector features, create a new column in the
        GeoDataFrame called 'colors' specifying the colour used to plot 
        each feature: e.g. `gdf['colors'] = 'red'`.
        To plot vector features at specific moments in time during the
        animation, create new 'start_time' and/or 'end_time' columns in
        the GeoDataFrame that define the time range used to plot each 
        feature. Dates can be provided in any string format that can be 
        converted using the `pandas.to_datetime()`. e.g.
         `gdf['end_time'] = ['2001', '2005-01', '2009-01-01']`    
    show_date : string or bool, optional
        An optional string or bool that defines how (or if) to plot 
        date annotations for each animation frame. Defaults to 
        '%d %b %Y'; can be customised to any format understood by 
        strftime (https://strftime.org/). Set to False to remove date 
        annotations completely.       
    show_text : str or list of strings, optional
        An optional string or list of strings with a length equal to 
        the number of timesteps in `ds`. This can be used to display a 
        static text annotation (using a string), or a dynamic title 
        (using a list) that displays different text for each timestep. 
        By default, no text annotation will be plotted.        
    show_colorbar : bool, optional
        An optional boolean indicating whether to include a colourbar 
        for single-band animations. Defaults to True.
    gdf_kwargs : dict, optional
        An optional dictionary of keyword arguments to customise the 
        appearance of a `geopandas.GeoDataFrame` supplied to 
        `show_gdf`. Keyword arguments are passed to `GeoSeries.plot` 
        (see http://geopandas.org/reference.html#geopandas.GeoSeries.plot). 
        For example: `gdf_kwargs = {'linewidth': 2}`. 
    annotation_kwargs : dict, optional
        An optional dict of keyword arguments for controlling the 
        appearance of  text annotations. Keyword arguments are passed 
        to `matplotlib`'s `plt.annotate` 
        (see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.annotate.html 
        for options). For example, `annotation_kwargs={'fontsize':20, 
        'color':'red', 'family':'serif'}.  
    imshow_kwargs : dict, optional
        An optional dict of keyword arguments for controlling the 
        appearance of arrays passed to `matplotlib`'s `plt.imshow` 
        (see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.imshow.html 
        for options). For example, a green colour scheme and custom
        stretch could be specified using: 
        `onebandplot_kwargs={'cmap':'Greens`, 'vmin':0.2, 'vmax':0.9}`.
        (some parameters like 'cmap' will only have an effect for 
        single-band animations, not three-band RGB animations).
    colorbar_kwargs : dict, optional
        An optional dict of keyword arguments used to control the 
        appearance of the colourbar. Keyword arguments are passed to
        `matplotlib.pyplot.tick_params` 
        (see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.tick_params.html
        for options). This can be used to customise the colourbar 
        ticks, e.g. changing tick label colour depending on the 
        background of the animation: 
        `colorbar_kwargs={'colors': 'black'}`.
    limit: int, optional
        An optional integer specifying how many animation frames to 
        render (e.g. `limit=50` will render the first 50 frames). This
        can be useful for quickly testing animations without rendering 
        the entire time-series.    
            
    """

    def _start_end_times(gdf, ds):
        """
        Converts 'start_time' and 'end_time' columns in a 
        `geopandas.GeoDataFrame` to datetime objects to allow vector
        features to be plotted at specific moments in time during an
        animation, and sets default values based on the first
        and last time in `ds` if this information is missing from the
        dataset.
        """

        # Make copy of gdf so we do not modify original data
        gdf = gdf.copy()

        # Get min and max times from input dataset
        minmax_times = pd.to_datetime(ds.time.isel(time=[0, -1]).values)

        # Update both `start_time` and `end_time` columns
        for time_col, time_val in zip(['start_time', 'end_time'], minmax_times):

            # Add time_col if it does not exist
            if time_col not in gdf:
                gdf[time_col] = np.nan

            # Convert values to datetimes and fill gaps with relevant time value
            gdf[time_col] = pd.to_datetime(gdf[time_col], errors='ignore')
            gdf[time_col] = gdf[time_col].fillna(time_val)

        return gdf


    def _add_colorbar(fig, ax, vmin, vmax, imshow_defaults, colorbar_defaults):
        """
        Adds a new colorbar axis to the animation with custom minimum 
        and maximum values and styling.
        """

        # Create new axis object for colorbar
        cax = fig.add_axes([0.02, 0.02, 0.96, 0.03])

        # Initialise color bar using plot min and max values
        img = ax.imshow(np.array([[vmin, vmax]]), **imshow_defaults)
        fig.colorbar(img,
                     cax=cax,
                     orientation='horizontal',
                     ticks=np.linspace(vmin, vmax, 2))

        # Fine-tune appearance of colorbar
        cax.xaxis.set_ticks_position('top')
        cax.tick_params(axis='x', **colorbar_defaults)
        cax.get_xticklabels()[0].set_horizontalalignment('left')
        cax.get_xticklabels()[-1].set_horizontalalignment('right')


    def _frame_annotation(times, show_date, show_text):
        """
        Creates a custom annotation for the top-right of the animation
        by converting a `xarray.DataArray` of times into strings, and
        combining this with a custom text annotation. Handles cases 
        where `show_date=False/None`, `show_text=False/None`, or where
        `show_text` is a list of strings.
        """

        # Test if show_text is supplied as a list
        is_sequence = isinstance(show_text, (list, tuple, np.ndarray))

        # Raise exception if it is shorter than number of dates
        if is_sequence and (len(show_text) == 1):
            show_text, is_sequence = show_text[0], False
        elif is_sequence and (len(show_text) < len(times)):
            raise ValueError(f'Annotations supplied via `show_text` must have '
                             f'either a length of 1, or a length >= the number '
                             f'of timesteps in `ds` (n={len(times)})')

        times_list = (times.dt.strftime(show_date).values if show_date else [None] *
                      len(times))
        text_list = show_text if is_sequence else [show_text] * len(times)
        annotation_list = ['\n'.join([str(i) for i in (a, b) if i])
                           for a, b in zip(times_list, text_list)]

        return annotation_list


    def _update_frames(i, ax, extent, annotation_text, gdf, gdf_defaults,
                       annotation_defaults, imshow_defaults):
        """
        Animation called by `matplotlib.animation.FuncAnimation` to 
        animate each frame in the animation. Plots array and any text
        annotations, as well as a temporal subset of `gdf` data based
        on the times specified in 'start_time' and 'end_time' columns.
        """        

        # Clear previous frame to optimise render speed and plot imagery
        ax.clear()
        ax.imshow(array[i, ...].clip(0.0, 1.0), 
                  extent=extent, 
                  vmin=0.0, vmax=1.0, 
                  **imshow_defaults)

        # Add annotation text
        ax.annotate(annotation_text[i], **annotation_defaults)

        # Add geodataframe annotation
        if show_gdf is not None:

            # Obtain start and end times to filter geodataframe features
            time_i = ds.time.isel(time=i).values

            # Subset geodataframe using start and end dates
            gdf_subset = show_gdf.loc[(show_gdf.start_time <= time_i) &
                                      (show_gdf.end_time >= time_i)]           

            if len(gdf_subset.index) > 0:

                # Set color to geodataframe field if supplied
                if ('color' in gdf_subset) and ('color' not in gdf_kwargs):
                    gdf_defaults.update({'color': gdf_subset['color'].tolist()})

                gdf_subset.plot(ax=ax, **gdf_defaults)

        # Remove axes to show imagery only
        ax.axis('off')
        
        # Update progress bar
        progress_bar.update(1)
        
    
    # Test if bands have been supplied, or convert to list to allow
    # iteration if a single band is provided as a string
    if bands is None:
        raise ValueError(f'Please use the `bands` parameter to supply '
                         f'a list of one or three bands that exist as '
                         f'variables in `ds`, e.g. {list(ds.data_vars)}')
    elif isinstance(bands, str):
        bands = [bands]
    
    # Test if bands exist in dataset
    missing_bands = [b for b in bands if b not in ds.data_vars]
    if missing_bands:
        raise ValueError(f'Band(s) {missing_bands} do not exist as '
                         f'variables in `ds` {list(ds.data_vars)}')
    
    # Test if time dimension exists in dataset
    if 'time' not in ds.dims:
        raise ValueError(f"`ds` does not contain a 'time' dimension "
                         f"required for generating an animation")
                
    # Set default parameters
    outline = [PathEffects.withStroke(linewidth=2.5, foreground='black')]
    annotation_defaults = {
        'xy': (1, 1),
        'xycoords': 'axes fraction',
        'xytext': (-5, -5),
        'textcoords': 'offset points',
        'horizontalalignment': 'right',
        'verticalalignment': 'top',
        'fontsize': 20,
        'color': 'white',
        'path_effects': outline
    }
    imshow_defaults = {'cmap': 'magma', 'interpolation': 'nearest'}
    colorbar_defaults = {'colors': 'white', 'labelsize': 12, 'length': 0}
    gdf_defaults = {'linewidth': 1.5}

    # Update defaults with kwargs
    annotation_defaults.update(annotation_kwargs)
    imshow_defaults.update(imshow_kwargs)
    colorbar_defaults.update(colorbar_kwargs)
    gdf_defaults.update(gdf_kwargs)

    # Get info on dataset dimensions
    height, width = ds.geobox.shape
    scale = width_pixels / width
    left, bottom, right, top = ds.geobox.extent.boundingbox

    # Prepare annotations
    annotation_list = _frame_annotation(ds.time, show_date, show_text)

    # Prepare geodataframe
    if show_gdf is not None:
        show_gdf = show_gdf.to_crs(ds.geobox.crs)
        show_gdf = gpd.clip(show_gdf, mask=box(left, bottom, right, top))
        show_gdf = _start_end_times(show_gdf, ds)

    # Convert data to 4D numpy array of shape [time, y, x, bands]
    ds = ds[bands].to_array().transpose(..., 'variable')[0:limit, ...]
    array = ds.astype(np.float32).values

    # Optionally apply image processing along axis 0 (e.g. to each timestep)
    bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} ({remaining_s:.1f} ' \
                   'seconds remaining at {rate_fmt}{postfix})'
    if image_proc_funcs:
        print('Applying custom image processing functions')
        for i, array_i in tqdm(enumerate(array),
                               total=len(ds.time),
                               leave=False,
                               bar_format=bar_format,
                               unit=' frames'):
            for func in image_proc_funcs:
                array_i = func(array_i)
            array[i, ...] = array_i

    # Clip to percentiles and rescale between 0.0 and 1.0 for plotting
    vmin, vmax = np.quantile(array[np.isfinite(array)], q=percentile_stretch)
        
    # Replace with vmin and vmax if present in `imshow_defaults`
    if 'vmin' in imshow_defaults:
        vmin = imshow_defaults.pop('vmin')
    if 'vmax' in imshow_defaults:
        vmax = imshow_defaults.pop('vmax')
    
    # Rescale between 0 and 1
    array = rescale_intensity(array, 
                              in_range=(vmin, vmax), 
                              out_range=(0.0, 1.0))
    array = np.squeeze(array)  # remove final axis if only one band

    # Set up figure
    fig, ax = plt.subplots()
    fig.set_size_inches(width * scale / 72, height * scale / 72, forward=True)
    fig.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0)

    # Optionally add colorbar
    if show_colorbar & (len(bands) == 1):
        _add_colorbar(fig, ax, vmin, vmax, imshow_defaults, colorbar_defaults)

    # Animate
    print(f'Exporting animation to {output_path}') 
    anim = FuncAnimation(
        fig=fig,
        func=_update_frames,
        fargs=(
            ax,  # axis to plot into
            [left, right, bottom, top],  # imshow extent
            annotation_list,  # list of text annotations
            show_gdf,  # geodataframe to plot over imagery
            gdf_defaults,  # any kwargs used to plot gdf
            annotation_defaults,  # kwargs for annotations
            imshow_defaults),  # kwargs for imshow
        frames=len(ds.time),
        interval=interval,
        repeat=False)
    
    # Set up progress bar
    progress_bar = tqdm(total=len(ds.time), 
                        unit=' frames', 
                        bar_format=bar_format) 

    # Export animation to file
    if Path(output_path).suffix == '.gif':
        anim.save(output_path, writer='pillow')
    else:
        anim.save(output_path, dpi=72)

    # Update progress bar to fix progress bar moving past end
    if progress_bar.n != len(ds.time):
        progress_bar.n = len(ds.time)
        progress_bar.last_print_n = len(ds.time)
Ejemplo n.º 13
0
Remember, our variogram defines the spatial autocorrelation of the data (i.e., how the locations in our region affect one another). Once we have a variogram model, we can use it to estimate the weights in our kriging model. I won't go into detail on how this is done, but there is a neat walkthrough in the [scikit-gstat docs here](https://scikit-gstat.readthedocs.io/en/latest/userguide/kriging.html).

Anyway, I'll briefly use the [pykrige](https://github.com/GeoStat-Framework/PyKrige) library to do some kriging so you can get an idea of what it looks like:

krig = OrdinaryKriging(x=gpm25["Easting"], y=gpm25["Northing"], z=gpm25["PM_25"], variogram_model="spherical")
z, ss = krig.execute("grid", gridx, gridy)
plt.imshow(z);

Now let's convert our raster back to polygons so we can map it. I'm also going to load in a polygon of BC using `osmnx` to clip my data so it fits nicely on my map this time:

polygons, values = pixel2poly(gridx, gridy, z, resolution)
pm25_model = (gpd.GeoDataFrame({"PM_25_modelled": values}, geometry=polygons, crs="EPSG:3347")
                 .to_crs("EPSG:4326")
                 )
bc = ox.geocode_to_gdf("British Columbia, Canada")
pm25_model = gpd.clip(pm25_model, bc)

fig = px.choropleth_mapbox(pm25_model, geojson=pm25_model.geometry, locations=pm25_model.index,
                           color="PM_25_modelled", color_continuous_scale="RdYlGn_r",
                           center={"lat": 52.261, "lon": -123.246}, zoom=3.5,
                           mapbox_style="carto-positron")
fig.update_layout(margin=dict(l=0, r=0, t=30, b=10))
fig.update_traces(marker_line_width=0)

I used an "ordinary kriging" interpolation above which is the simplest implementation of kriging. The are many other forms of kriging too that can account for underlying trends in the data ("universal kriging"), or even use a regression or classification model to make use of additional explanatory variables. `pykrige` [supports most variations](https://geostat-framework.readthedocs.io/projects/pykrige/en/stable/examples/index.html). In particular for the latter, `pykrige` can accept `sklearn` models which is useful!

### 2.3. Areal interpolation

Areal interpolation is concerned with mapping data from one polygonal representation to another. Imagine I want to map the air pollution polygons I just made to FSA polygons (recall FSA is "forward sortation area", which are groups of postcodes). The most intuitive way to do this is to distribute values based on area proportions, hence "areal interpolation".

I'll use the [tobler](https://github.com/pysal/tobler) library for this. First, load in the FSA polygons:
Ejemplo n.º 14
0
    ## make a RECTANGLE to clip the viasat_data
    from shapely.geometry import Polygon

    lat_point_list = [37.625, 37.625, 37.426, 37.426, 37.625]
    lon_point_list = [14.86, 15.25, 15.25, 14.86, 14.86]

    polygon_geom = Polygon(zip(lon_point_list, lat_point_list))
    crs = {'init': 'epsg:4326'}
    polygon = gpd.GeoDataFrame(index=[0], crs=crs, geometry=[polygon_geom])
    ## check projection (CRS)
    # polygon.crs

    # polygon.plot()
    ## clip the data with the RECTANGLE
    LPIR_RESILIENT = gpd.clip(LPIR_RESILIENT, polygon)

    ### plot gedataframe with colors -----###
    ## add background map ###
    gdf = LPIR_RESILIENT
    import contextily as ctx

    del LPIR_RESILIENT

    # minx, miny, maxx, maxy = gdf.geometry.total_bounds
    # polygon.geometry.total_bounds

    ## reproject with mercator coordinates (this is the coordinate system of the basemap)
    gdf = gdf.to_crs(epsg=3857)
    # Plot the data within the RECTANGULAR extensions
    fig, ax = plt.subplots(figsize=(10, 10))
Ejemplo n.º 15
0
    category='cultural',
    name='admin_1_states_provinces_lines',
    scale='50m',
    facecolor='none')

ax1.add_feature(cfeature.LAND)
ax1.add_feature(cfeature.COASTLINE)
ax1.add_feature(states_provinces, edgecolor='black')
mun = geobr.read_municipality(code_muni='all', year=2018)
mun.plot(facecolor="none", alpha=1, edgecolor='gray', ax=ax1)
polygon = Polygon([(lon1, lat1), (lon1, lat2), (lon2, lat2), (lon2, lat1),
                   (lon1, lat1)])
poly_gdf = gpd.GeoDataFrame([1], geometry=[polygon], crs=mun.crs)
poly_gdf.boundary.plot(ax=ax1, color="red")

munDomain = gpd.clip(mun, polygon)  ## This is very important
munDomain = munDomain.sort_values(by='abbrev_state')

ax2 = plt.subplot(1, 2, 2, projection=ccrs.PlateCarree())
munDomain.plot(ax=ax2, color="purple", alpha=0.5)
munDomain.boundary.plot(ax=ax2)
poly_gdf.boundary.plot(ax=ax2, color="red")
ax2.set_title("Clipped", fontsize=20)
plt.savefig('04_output/emissions/fig/clip.png',
            bbox_inches='tight',
            facecolor='w')

#%% Temporal distribution from Andrade et al. (2015)
co = [
    0.019, 0.012, 0.008, 0.004, 0.003, 0.003, 0.006, 0.017, 0.047, 0.074,
    0.072, 0.064, 0.055, 0.052, 0.051, 0.048, 0.052, 0.057, 0.068, 0.087,
Ejemplo n.º 16
0
def test_mixed_geom(mixed_gdf, single_rectangle_gdf):
    """Test clipping a mixed GeoDataFrame"""
    clipped = clip(mixed_gdf, single_rectangle_gdf)
    assert (clipped.geom_type[0] == "Point"
            and clipped.geom_type[1] == "Polygon"
            and clipped.geom_type[2] == "LineString")
        ## make a RECTANGLE to clip the viasat_data
        from shapely.geometry import Polygon

        lat_point_list = [37.625, 37.625, 37.426, 37.426, 37.625]
        lon_point_list = [14.86, 15.25, 15.25, 14.86, 14.86]

        polygon_geom = Polygon(zip(lon_point_list, lat_point_list))
        crs = {'init': 'epsg:4326'}
        polygon = gpd.GeoDataFrame(index=[0], crs=crs, geometry=[polygon_geom])
        ## check projection (CRS)
        ## https://geopandas.org/projections.html
        # polygon.crs

        # polygon.plot()
        ## clip the data with the RECTANGLE
        all_counts_uv = gpd.clip(all_counts_uv, polygon)

        ### plot gedataframe with colors -----###
        ## add background map ###
        gdf = all_counts_uv
        import contextily as ctx

        # minx, miny, maxx, maxy = gdf.geometry.total_bounds
        # polygon.geometry.total_bounds

        ## reproject with mercator coordinates (this is the coordinate system of the basemap)
        gdf = gdf.to_crs(epsg=3857)
        # Plot the data within the RECTANGULAR extensions
        fig, ax = plt.subplots(figsize=(10, 10))
        polygon = polygon.to_crs(epsg=3857)
        polygon.plot(alpha=0,
Ejemplo n.º 18
0
def test_mixed_series(mixed_gdf, single_rectangle_gdf):
    """Test clipping a mixed GeoSeries"""
    clipped = clip(mixed_gdf.geometry, single_rectangle_gdf)
    assert (clipped.geom_type[0] == "Point"
            and clipped.geom_type[1] == "Polygon"
            and clipped.geom_type[2] == "LineString")
Ejemplo n.º 19
0
if __name__ == "__main__":

    # Project's root
    os.chdir("../..")

    for region in REGIONS:

        region_name = region.get("name")
        region_mask = gpd.read_file(region.get("path"))

        df = pd.DataFrame(columns=["year", "burned_area", "rainfall"])

        grid = create_grid(*region_mask.bounds.loc[0], GRID_RESOLUTION,
                           region_mask.crs)
        grid = gpd.clip(grid, region_mask)
        grid = grid[grid.area >= GRID_AREA_THRESHOLD * GRID_RESOLUTION**2]
        grid = grid.reset_index()

        burn_fn = f"data/nc/MODIS/MCD64A1/{region_name}/MCD64A1_500m.nc"
        burn_da = xr.open_dataset(burn_fn, mask_and_scale=False)["Burn_Date"]

        rainfall_fn = f"data/nc/CHC/CHIRPS/{region_name}/chirps_v2_5km.nc"
        rainfall_da = xr.open_dataset(rainfall_fn,
                                      mask_and_scale=False)["precip"]

        years = np.unique(burn_da.time.dt.year.values)
        for year in years:

            temp_grid = grid.copy()
Ejemplo n.º 20
0
def test_warning_extra_geoms_mixed(single_rectangle_gdf, mixed_gdf):
    """Test the correct warnings are raised if keep_geom_type is
    called on a mixed GDF"""
    with pytest.warns(UserWarning):
        clip(mixed_gdf, single_rectangle_gdf, keep_geom_type=True)
Ejemplo n.º 21
0
geo_df = geo_df.to_crs(epsg=2163)

state_map = gpd.read_file(
    'shapefiles/geo_export_9ef76f60-e019-451c-be6b-5a879a5e7c07.shp')
state_map = state_map.to_crs(epsg=2163)

group_map = gpd.read_file('shapefiles/Corn_belt_all_states_20_bz.shp')
group_map = group_map.to_crs(epsg=2163)

county_map = gpd.read_file('shapefiles/Corn_belt_all_states.shp')
county_map = county_map.to_crs(epsg=2163)

county_map2 = gpd.read_file('shapefiles/USA_counties.shp')
county_map2 = county_map2.to_crs(epsg=2163)

cb_counties = gpd.clip(county_map2, county_map)

fig, ax = plt.subplots()
state_map.plot(ax=ax,
               color='gray',
               alpha=0.6,
               edgecolor='white',
               linewidth=0.5)
cb_counties.plot(ax=ax,
                 color='orange',
                 alpha=1,
                 edgecolor='darkslategrey',
                 linewidth=0.2)

ax.set_box_aspect(1)
ax.set_xlim(-750000, 2000000)
Ejemplo n.º 22
0
def test_warning_geomcoll(single_rectangle_gdf, geomcol_gdf):
    """Test the correct warnings are raised if keep_geom_type is
    called on a GDF with GeometryCollection"""
    with pytest.warns(UserWarning):
        clip(geomcol_gdf, single_rectangle_gdf, keep_geom_type=True)
Ejemplo n.º 23
0
#Convert to geodataframe
main_polygons = geopandas.GeoDataFrame.from_features(collection)
main_polygons.crs = project_crs

for cell_char in grid_cell_chars:
    for index in range(1, grid_cell_len + 1):
        cell = cell_char + str(index)
        curr_path = os.path.join(cell_grids_path, cell, cell_grids_id)
        os.chdir(curr_path)
        polygons = glob.glob("*.shp")

        for polygon_name in polygons:
            polygon_path = os.path.join(curr_path, polygon_name)
            polygon = geopandas.read_file(polygon_path)

            polygon.crs = project_crs

            try:
                clipped_poly = geopandas.clip(main_polygons, polygon)
                if not clipped_poly.empty:
                    out_path = os.path.join(output_path, cell, polygon_name)
                    clipped_poly.to_file(out_path)
                    print(cell, polygon_name)
                else:
                    a = 1
            except:
                a = 1
        print("Done with " + cell)
    print("\n---\n\n")
print("done")
        ## make a RECTANGLE to clip the viasat_data
        from shapely.geometry import Polygon

        lat_point_list = [37.625, 37.625, 37.426, 37.426, 37.625]
        lon_point_list = [14.86, 15.25, 15.25, 14.86, 14.86]

        polygon_geom = Polygon(zip(lon_point_list, lat_point_list))
        crs = {'init': 'epsg:4326'}
        polygon = gpd.GeoDataFrame(index=[0], crs=crs, geometry=[polygon_geom])
        ## check projection (CRS)
        # polygon.crs

        # polygon.plot()
        ## clip the data with the RECTANGLE
        speed_PHF_and_SCARICA = gpd.clip(speed_PHF_and_SCARICA, polygon)

        ### plot gedataframe with colors -----###
        ## add background map ###
        gdf = speed_PHF_and_SCARICA
        import contextily as ctx

        # minx, miny, maxx, maxy = gdf.geometry.total_bounds
        # polygon.geometry.total_bounds

        ## reproject with mercator coordinates (this is the coordinate system of the basemap)
        gdf = gdf.to_crs(epsg=3857)
        # Plot the data within the RECTANGULAR extensions
        fig, ax = plt.subplots(figsize=(10, 10))
        polygon = polygon.to_crs(epsg=3857)
        polygon.plot(alpha=0,
Ejemplo n.º 25
0
if max(testmerged['sum_y']) > maxval:
    maxval = max(testmerged['sum_y'])

########################################
# Clip data by polygon and create plot #
########################################

# get a Series of protected area names
pas = poly['NAME']

# loop through protected areas
for i in pas:
    pa = poly[poly['NAME'] == i]

    # clip by protected area polygon
    pa_viirs19 = gpd.clip(fire19, pa)

    # group by date
    df19 = groupByDate(pa_viirs19, idx19)

    # same to 2020 data
    # clip by protected area polygon
    pa_viirs20 = gpd.clip(fire20, pa)

    # group by date
    df20 = groupByDate(pa_viirs20, idx20)

    # reset index
    df19 = df19.reset_index()
    df20 = df20.reset_index()
Ejemplo n.º 26
0
def bikeability(place, scale='city', data=False):
    ''' A function that would calculate bikeability value for a given
    place of interest. 

    Parameters
    place: the place of interest e.g "Freiburg, Germany" datatype = string
    Scale: can be either "grid" or "city" default is "city" datatype = string
    data: if True output returns a dataframe along with the standard dictionary 
    output, datatype = boolean

    Returns the average_index for bikeability(number between 0 and 100) and some
    summary statistics of index, datatype = dictionary or dataframe and dictionary
    if data is set as True.
    
    Usage example
    a = bikeability('Freiburg, Germany', scale ='grid', data = False) ... for grid scale approach
    a,b = bikeability('Freiburg, Germany', scale ='grid', data = True)
    a =bikeability('Freiburg, Germany', scale = 'city')... for city scale approach
    a,b =bikeability('Freiburg, Germany', scale = 'city', data = True)
    '''

    if scale != 'grid':

        place = place

        # Create and set osmnx to select important tags
        useful_tags_way = [
            'bridge', 'length', 'oneway', 'lanes', 'ref', 'name', 'highway',
            'maxspeed', 'service', 'access', 'area', 'cycleway', 'landuse',
            'width', 'est_width', 'junction', 'surface'
        ]

        ox.utils.config(useful_tags_way=useful_tags_way
                        )  # = useful_tags_path  change here1

        # Create basic city graph
        place_name = place
        graph = ox.graph_from_place(place_name,
                                    network_type='all',
                                    retain_all=True)

        # # Calculate and add edge closeness centrality(connectedness)
        centrality = nx.degree_centrality(nx.line_graph(graph))
        nx.set_edge_attributes(graph, centrality, 'centrality')

        # Extract nodes and edges to geopandas from graph
        #edges = ox.graph_to_gdfs(graph, nodes=False)
        try:
            edges = ox.graph_to_gdfs(graph, nodes=False)
            pass
        except Exception as e:
            print('{} at {}'.format(e, place))

        # Remove unwanted columns and add weight variable
        cols = [
            'highway', 'cycleway', 'surface', 'maxspeed', 'length', 'lanes',
            'oneway', 'width', 'centrality', 'geometry'
        ]

        try:
            df = edges.loc[:, cols]
        except KeyError as e:
            print(e)

        # Set appropriate data types

        df['maxspeed'] = pd.to_numeric(df['maxspeed'],
                                       errors='coerce',
                                       downcast='integer')
        df['lanes'] = pd.to_numeric(df['lanes'],
                                    errors='coerce',
                                    downcast='integer')
        df['width'] = pd.to_numeric(df['width'],
                                    errors='coerce',
                                    downcast='unsigned')
        df['highway'] = df['highway'].astype(str)
        df['surface'] = df['surface'].astype(str)
        df['oneway'] = df['oneway'].astype(int)
        df['cycleway'] = df['cycleway'].astype(str)

        # Dataframe cleaning and preprocessing
        # highway column
        df['highway'] = df['highway'].str.replace(r'[^\w\s-]', '', regex=True)
        highway_cols = (pd.DataFrame(df.highway.str.split(' ', expand=True)))
        highway_map = ({
            'service': 6,
            'None': np.nan,
            'residential': 8,
            'unclassified': 7,
            'footway': 7,
            'track': 5,
            'tertiary': 6,
            'living_street': 9,
            'path': 5,
            'pedestrian': 7,
            'secondary': 5,
            'primary': 2,
            'steps': 2,
            'cycleway': 10,
            'rest_area': 5,
            'primary_link': 2,
            'ferry': 1,
            'construction': 2,
            'byway': 8,
            'bridleway': 6,
            'trunk': 2,
            'trunk_link': 2,
            'motorway': 1,
            'motorway_link': 1
        })
        for column in highway_cols:
            highway_cols[column] = highway_cols[column].map(highway_map)
        highway_cols['mean'] = np.nanmean(highway_cols, axis=1)
        df['highway'] = round(highway_cols['mean'])

        # cycleway column
        df['cycleway'] = df['cycleway'].str.replace(r'[^\w\s-]',
                                                    '',
                                                    regex=True)
        cycleway_cols = (pd.DataFrame(df.cycleway.str.split(' ', expand=True)))
        cycleway_map = ({
            'opposite': 9,
            'lane': 9,
            'share_busway': 8,
            'shared_lane': 8,
            'segregated': 10,
            'no': 1,
            'opposite_lane': 9,
            'crossing': 10,
            'track': 10,
            'designated': 10,
            'opposite_share_busway': 8,
            'seperate': 10,
            'shoulder': 8
        })
        for column in cycleway_cols:
            cycleway_cols[column] = cycleway_cols[column].map(cycleway_map)
        cycleway_cols['mean'] = np.nanmean(cycleway_cols, axis=1)
        df['cycleway'] = round(cycleway_cols['mean'])

        # surface column
        df['surface'] = df['surface'].str.replace(r'[^\w\s-]', '', regex=True)
        surface_cols = (pd.DataFrame(df.surface.str.split(' ', expand=True)))
        surface_map = ({
            'asphalt': 10,
            'paved': 10,
            'cobblestone': 5,
            'fine_gravel': 9,
            'ground': 7,
            'sett': 6,
            'gravel': 7,
            'metal': 6,
            'compacted': 10,
            'dirt': 6,
            'paving_stones': 7,
            'grass_paver': 5,
            'unpaved': 8,
            'pebblestone': 9,
            'concrete': 10,
            'grass': 5,
            'mud': 1
        })
        for column in surface_cols:
            surface_cols[column] = surface_cols[column].map(surface_map)
        surface_cols['mean'] = np.nanmean(surface_cols, axis=1)
        df['surface'] = round(surface_cols['mean'])

        # maxspeed column
        df.loc[df['maxspeed'] > 110, 'maxspeed'] = 110
        df.loc[df['maxspeed'] < 20, 'maxspeed'] = 20
        maxspeed_map = ({
            20: 10,
            30: 9,
            40: 8,
            50: 7,
            60: 6,
            70: 5,
            80: 4,
            90: 3,
            100: 2,
            110: 1
        })
        df['maxspeed'] = df['maxspeed'].map(maxspeed_map)

        # lanes column
        df.loc[df['lanes'] > 8, 'lanes'] = 8
        lanes_map = {1: 10, 2: 9, 3: 5, 4: 5, 5: 3, 6: 3, 7: 2, 8: 1}
        df['lanes'] = df['lanes'].map(lanes_map)

        # oneway column
        oneway_map = {0: 5, 1: 10, -1: 5}
        df['oneway'] = df['oneway'].map(oneway_map)

        # width column
        df.loc[df['width'] < 2, 'width'] = 1
        df.loc[df['width'] > 6, 'width'] = 6
        df['width'] = round(df['width'])
        width_map = ({1: 1, 2: 2, 3: 5, 4: 7, 5: 9, 6: 10})
        df['width'] = df['width'].map(width_map)

        # normalize centrality column (between o and 10)
        df['centrality'] = (
            (df['centrality'] - np.min(df['centrality'])) /
            (np.max(df['centrality']) - np.min(df['centrality']))) * 10

        # Switch to new df for calculation
        d_frame = df.copy(deep=True)

        # Multiply variables by weights
        d_frame['cycleway'] = d_frame['cycleway'] * 0.208074534
        d_frame['surface'] = d_frame['surface'] * 0.108695652
        d_frame['highway'] = d_frame['highway'] * 0.167701863
        d_frame['maxspeed'] = d_frame['maxspeed'] * 0.189440994
        d_frame['lanes'] = d_frame['lanes'] * 0.108695652
        d_frame['centrality'] = d_frame['centrality'] * 0.071428571
        d_frame['width'] = d_frame['width'] * 0.086956522
        d_frame['oneway'] = d_frame['oneway'] * 0.059006211

        # Normalize variables between 0 and 1
        d_frame['index'] = (np.nanmean(d_frame[[
            'cycleway', 'highway', 'surface', 'maxspeed', 'lanes', 'width',
            'oneway', 'centrality'
        ]],
                                       axis=1,
                                       dtype='float64')) * 80

        # Final statistics index of city
        mean_index = np.average(d_frame['index'], weights=d_frame['length'])
        max_index = d_frame['index'].max()
        min_index = d_frame['index'].min()
        std_index = d_frame['index'].std()

        # Plot result
        #d_frame.plot(column = 'index',legend = True)

        # Result dictionary
        result = ({
            'place': place,
            'average_index': mean_index,
            'max_index': max_index,
            'min_index': min_index,
            'std_index': std_index
        })

    else:
        #Get bounding box for place
        place_name = place
        area = ox.geocode_to_gdf(place_name)  # graph first
        xmin, ymin, xmax, ymax = area.total_bounds

        #divide into grids x = lon, y = lat
        height = 0.041667
        width = 0.041667
        rows = int(np.ceil((ymax - ymin) / height))
        cols = int(np.ceil((xmax - xmin) / width))
        XleftOrigin = xmin
        XrightOrigin = xmin + width
        YtopOrigin = ymax
        YbottomOrigin = ymax - height
        polygons = []
        for i in range(cols):
            Ytop = YtopOrigin
            Ybottom = YbottomOrigin
            for j in range(rows):
                polygons.append(
                    Polygon([(XleftOrigin, Ytop), (XrightOrigin, Ytop),
                             (XrightOrigin, Ybottom), (XleftOrigin, Ybottom)]))
                Ytop = Ytop - height
                Ybottom = Ybottom - height
            XleftOrigin = XleftOrigin + width
            XrightOrigin = XrightOrigin + width

        #Ensure the grids are within the polygon
        grid_list = []
        for i in range(len(polygons)):
            p = Point(polygons[i].centroid.x, polygons[i].centroid.y)
            geome = shape(polygons[i])
            q = gpd.GeoDataFrame({'geometry': geome}, index=[0])
            q = q.set_crs("EPSG:4326")
            if area.geometry.iloc[0].contains(polygons[i]) == True:
                grid_list.append(q)
            #elif p.within(area.geometry.iloc[0]) == True and area.geometry.iloc[0].contains(polygons[i])== False:
            elif area.geometry.iloc[0].intersects(polygons[i]):
                #grid_list.append(polygons[i])
                clip = gpd.clip(area, q)
                grid_list.append(clip)

        #Initialize important variables
        dflist = []
        exception_grids = []
        dfs = []

        for i in tqdm(range(len(grid_list))):

            #graph
            useful_tags_way = [
                'bridge', 'length', 'oneway', 'lanes', 'ref', 'name',
                'highway', 'maxspeed', 'surface', 'area', 'landuse', 'width',
                'est_width', 'junction', 'cycleway'
            ]
            ox.utils.config(useful_tags_way=useful_tags_way
                            )  # = =useful_tags_path change 2

            try:
                box_graph = ox.graph_from_polygon(
                    grid_list[i].geometry.iloc[0],
                    network_type='bike',
                    retain_all=True)
                pass
            except Exception as e:
                print('{} at grid {}, skip grid'.format(e, i + 1))
                exception_grids.append(i + 1)
                continue

            # Calculate and add edge closeness centrality(connectedness)
            centrality = nx.degree_centrality(nx.line_graph(box_graph))
            nx.set_edge_attributes(box_graph, centrality, 'centrality')

            # Extract nodes and edges to geopandas from graph
            try:
                edges = ox.graph_to_gdfs(box_graph, nodes=False)
                pass
            except Exception as e:
                print('{} at grid {}, skip grid'.format(e, i + 1))
                exception_grids.append(i + 1)
                continue

            # Select only the important variables
            cols = [
                'highway', 'cycleway', 'surface', 'maxspeed', 'length',
                'lanes', 'oneway', 'width', 'centrality', 'geometry'
            ]
            try:
                df = edges.loc[:, cols]
                pass
            except KeyError as e:
                print('{} at grid {}, skip grid'.format(e, i + 1))
                exception_grids.append(i + 1)
                continue

            # Set appropriate data types
            df['maxspeed'] = pd.to_numeric(df['maxspeed'],
                                           errors='coerce',
                                           downcast='integer')
            df['lanes'] = pd.to_numeric(df['lanes'],
                                        errors='coerce',
                                        downcast='integer')
            df['width'] = pd.to_numeric(df['width'],
                                        errors='coerce',
                                        downcast='unsigned')
            df['highway'] = df['highway'].astype(str)
            df['surface'] = df['surface'].astype(str)
            df['oneway'] = df['oneway'].astype(int)
            df['cycleway'] = df['cycleway'].astype(str)

            # Dataframe cleaning and preprocessing
            # highway column
            df['highway'] = df['highway'].str.replace(r'[^\w\s-]',
                                                      '',
                                                      regex=True)
            highway_cols = (pd.DataFrame(df.highway.str.split(' ',
                                                              expand=True)))
            highway_map = ({
                'service': 6,
                'None': np.nan,
                'residential': 8,
                'unclassified': 7,
                'footway': 7,
                'track': 5,
                'tertiary_link': 6,
                'tertiary': 6,
                'living_street': 9,
                'path': 5,
                'pedestrian': 7,
                'secondary': 5,
                'secondary_link': 5,
                'primary': 2,
                'steps': 2,
                'cycleway': 10,
                'rest_area': 5,
                'primary_link': 2,
                'ferry': 1,
                'construction': 2,
                'byway': 8,
                'bridleway': 6,
                'trunk': 2,
                'trunk_link': 2,
                'motorway': 1,
                'motorway_link': 1
            })
            for column in highway_cols:
                highway_cols[column] = highway_cols[column].map(highway_map)
            highway_cols['mean'] = np.nanmean(highway_cols, axis=1)
            df['highway'] = round(highway_cols['mean'])

            #cycleway column
            df['cycleway'] = df['cycleway'].str.replace(r'[^\w\s-]',
                                                        '',
                                                        regex=True)
            cycleway_cols = (pd.DataFrame(
                df.cycleway.str.split(' ', expand=True)))
            cycleway_map = ({
                'opposite': 9,
                'lane': 9,
                'share_busway': 8,
                'shared_lane': 8,
                'segregated': 10,
                'no': 1,
                'opposite_lane': 9,
                'crossing': 10,
                'track': 10,
                'designated': 10,
                'opposite_share_busway': 8,
                'seperate': 10,
                'shoulder': 8
            })
            for column in cycleway_cols:
                cycleway_cols[column] = cycleway_cols[column].map(cycleway_map)
            cycleway_cols['mean'] = np.nanmean(cycleway_cols, axis=1)
            df['cycleway'] = round(cycleway_cols['mean'])

            # surface column
            df['surface'] = df['surface'].str.replace(r'[^\w\s-]',
                                                      '',
                                                      regex=True)  #''
            surface_cols = (pd.DataFrame(df.surface.str.split(' ',
                                                              expand=True)))
            surface_map = ({
                'asphalt': 10,
                'paved': 10,
                'cobblestone': 3,
                'fine_gravel': 9,
                'ground': 6,
                'sett': 4,
                'gravel': 7,
                'metal': 7,
                'compacted': 9,
                'dirt': 6,
                'paving_stones': 7,
                'grass_paver': 4,
                'unpaved': 7,
                'pebblestone': 7,
                'concrete': 10,
                'grass': 5,
                'mud': 2,
                'sand': 5,
                'wood': 4,
                'earth': 6,
                'woodchips': 3,
                'snow': 2,
                'ice': 2,
                'salt': 2
            })
            for column in surface_cols:
                surface_cols[column] = surface_cols[column].map(surface_map)
            surface_cols['mean'] = np.nanmean(surface_cols, axis=1)
            df['surface'] = round(surface_cols['mean'])

            # maxspeed column
            df.loc[df['maxspeed'] > 110, 'maxspeed'] = 110
            df.loc[df['maxspeed'] < 20, 'maxspeed'] = 20
            df['maxspeed'] = round(df['maxspeed'], -1)
            maxspeed_map = ({
                20: 10,
                30: 9,
                40: 8,
                50: 7,
                60: 6,
                70: 5,
                80: 4,
                90: 3,
                100: 2,
                110: 1
            })
            df['maxspeed'] = df['maxspeed'].map(maxspeed_map)

            # lanes column
            df.loc[df['lanes'] > 8, 'lanes'] = 8
            lanes_map = {1: 10, 2: 9, 3: 5, 4: 5, 5: 3, 6: 3, 7: 2, 8: 1}
            df['lanes'] = df['lanes'].map(lanes_map)

            # oneway column
            oneway_map = {0: 5, 1: 10, -1: 5}
            df['oneway'] = df['oneway'].map(oneway_map)

            # width column
            df.loc[df['width'] < 2, 'width'] = 1
            df.loc[df['width'] > 6, 'width'] = 6
            df['width'] = round(df['width'])
            width_map = ({1: 1, 2: 2, 3: 5, 4: 7, 5: 9, 6: 10})
            df['width'] = df['width'].map(width_map)

            # normalize centrality column (between o and 10)
            df['centrality'] = (
                (df['centrality'] - np.min(df['centrality'])) /
                (np.max(df['centrality']) - np.min(df['centrality']))) * 10

            #Switch to new df for calculation
            d_frame = df.copy(deep=True)

            # Multiply variables by weights
            d_frame['cycleway'] = d_frame['cycleway'] * 0.208074534
            d_frame['surface'] = d_frame['surface'] * 0.108695652
            d_frame['highway'] = d_frame['highway'] * 0.167701863
            d_frame['maxspeed'] = d_frame['maxspeed'] * 0.189440994
            d_frame['lanes'] = d_frame['lanes'] * 0.108695652
            d_frame['centrality'] = d_frame['centrality'] * 0.071428571
            d_frame['width'] = d_frame['width'] * 0.086956522
            d_frame['oneway'] = d_frame['oneway'] * 0.059006211

            d_frame['index'] = (np.nanmean(d_frame[[
                'cycleway', 'highway', 'surface', 'maxspeed', 'lanes', 'width',
                'oneway', 'centrality'
            ]],
                                           axis=1,
                                           dtype='float64')) * 80

            d_frame['grid_index'] = np.average(d_frame['index'],
                                               weights=d_frame['length'])
            dflist.append(d_frame)
            dfs.append(df)

        #Final statistics index of city in dictionary
        df_indexes = pd.concat(dflist)
        result = ({
            'place':
            place_name,
            'average_index':
            np.average(df_indexes['index'], weights=df_indexes['length']),
            'max_index':
            df_indexes['index'].max(),
            'min_index':
            df_indexes['index'].min(),
            'std_index':
            df_indexes['index'].std(),
            'grids':
            len(grid_list),
            'nsegments':
            len(df_indexes),
            'unused_grids':
            len(exception_grids)
        })

    if data == False:
        return (result)
    else:
        return (d_frame, result)
Ejemplo n.º 27
0
def test_returns_gdf(point_gdf, single_rectangle_gdf):
    """Test that function returns a GeoDataFrame (or GDF-like) object."""
    out = clip(point_gdf, single_rectangle_gdf)
    assert isinstance(out, GeoDataFrame)
Ejemplo n.º 28
0
def main(mosaic, data, dest, ntl, bbox, country):

    os.makedirs(dest, exist_ok=True)
    os.makedirs(dest + '/pre-event', exist_ok=True)
    os.makedirs(dest + '/post-event', exist_ok=True)

    # create raster mosaic for rasters with same name (~ same area)
    print('creating mosaic of overlapping rasters')
    if mosaic:
        for prepost in ['pre', 'post']:
            filenames = os.listdir(os.path.join(data, prepost + '-event'))
            tuples = []
            for filename in filenames:
                name = filename.split('-')[1]
                same = sorted(
                    [x for x in filenames if x.split('-')[1] == name])
                if same not in tuples and len(same) > 1:
                    tuples.append(same)
            for tuple in tuples:
                out_file = tuple[0].split('.')[0] + '-merged.tif'
                for ix, file in enumerate(tuple):
                    if ix == 0:
                        os.system('gdalwarp -r average {} {} {}'.format(
                            os.path.join(data, prepost + '-event', file),
                            os.path.join(data, prepost + '-event',
                                         tuple[ix + 1]),
                            os.path.join(dest, prepost + '-event', out_file)))
                    elif ix == 1:
                        continue
                    else:
                        os.system('gdalwarp -r average {} {} {}'.format(
                            os.path.join(data, prepost + '-event', file),
                            os.path.join(dest, prepost + '-event', out_file),
                            os.path.join(dest, prepost + '-event', out_file)))
            # copy all the other rasters to dest
            for file in [
                    x for x in filenames
                    if x not in [item for tuple in tuples for item in tuple]
            ]:
                copyfile(os.path.join(data, prepost + '-event', file),
                         os.path.join(dest, prepost + '-event', file))

    # filter pre-event rasters

    print('filtering pre-event rasters')

    # filter by bounding box (if provided)
    if bbox != '':
        bbox_tuple = [float(x) for x in bbox.split(',')]
        bbox = box(bbox_tuple[0], bbox_tuple[1], bbox_tuple[2], bbox_tuple[3])
        geo = gpd.GeoDataFrame({'geometry': bbox},
                               index=[0],
                               crs=from_epsg(4326))
        coords = getFeatures(geo)
        print('filtering on bbox:')
        print(coords)

        # loop over images and filter
        for raster in tqdm(glob.glob(dest + '/pre-event/*.tif')):
            raster = raster.replace('\\', '/')
            raster_or = raster
            out_name = raster.split('.')[0] + '-bbox.tif'
            with rasterio.open(raster) as src:
                print('cropping on bbox')

                try:
                    out_img, out_transform = mask(dataset=src,
                                                  shapes=coords,
                                                  crop=True)
                    out_meta = src.meta.copy()
                    out_meta.update({
                        'height': out_img.shape[1],
                        'width': out_img.shape[2],
                        'transform': out_transform
                    })

                    print('saving', out_name)
                    with rasterio.open(out_name, 'w', **out_meta) as dst:
                        dst.write(out_img)
                except:
                    print('empty raster, discard')

            os.remove(raster_or)

    # filter by nighttime lights

    # load nighttime light mask
    ntl_shapefile = 'input/ntl_mask_extended.shp'
    if ntl:
        # filter mask by country (if provided)
        if country != '':
            country_ntl_shapefile = ntl_shapefile.split(
                '.')[0] + '_' + country.lower() + '.shp'
            if not os.path.exists(country_ntl_shapefile):
                ntl_world = gpd.read_file(ntl_shapefile)
                ntl_world.crs = {'init': 'epsg:4326'}
                ntl_world = ntl_world.to_crs("EPSG:4326")
                world = gpd.read_file(
                    gpd.datasets.get_path('naturalearth_lowres'))
                country_shape = world[world.name == country]
                if country_shape.empty:
                    print('WARNING: country', country, 'not found!!!')
                    print('available countries:')
                    print(world.name.unique())
                    print('proceeding with global mask')
                    country_ntl_shapefile = ntl_shapefile
                else:
                    country_shape = country_shape.reset_index()
                    country_shape.at[0, 'geometry'] = box(
                        *country_shape.at[0, 'geometry'].bounds)
                    country_shape.geometry = country_shape.geometry.scale(
                        xfact=1.1, yfact=1.1)
                    ntl_country = gpd.clip(ntl_world, country_shape)
                    ntl_country.to_file(country_ntl_shapefile)
            with fiona.open(country_ntl_shapefile, "r") as shapefile:
                shapes = [feature["geometry"] for feature in shapefile]
        else:
            with fiona.open(ntl_shapefile, "r") as shapefile:
                shapes = [feature["geometry"] for feature in shapefile]

        # loop over images and filter
        for raster in tqdm(glob.glob(dest + '/pre-event/*.tif')):
            raster = raster.replace('\\', '/')
            raster_or = raster
            out_name = raster.split('.')[0] + '-ntl.tif'
            if 'ntl' in raster:
                continue
            crop_next = True

            print('processing', raster)
            out_name_ntl = raster.split('.')[0] + '-ntl-mask.tif'
            try:
                with rasterio.open(raster) as src:
                    shapes_r = [
                        x for x in shapes
                        if not rasterio.coords.disjoint_bounds(
                            src.bounds, rasterio.features.bounds(x))
                    ]
                    if len(shapes_r) == 0:
                        print('no ntl present, discard')
                        crop_next = False
                    else:
                        print('ntl present, creating mask')
                        out_image, out_transform = rasterio.mask.mask(
                            src, shapes_r, crop=True)
                        out_meta = src.meta

                        out_meta.update({
                            "driver": "GTiff",
                            "height": out_image.shape[1],
                            "width": out_image.shape[2],
                            "transform": out_transform
                        })
                        # save temporary ntl file
                        print('saving mask', out_name_ntl)
                        with rasterio.open(out_name_ntl, "w",
                                           **out_meta) as dst:
                            dst.write(out_image)
                        crop_next = True
                    raster = out_name_ntl
                if crop_next:
                    with rasterio.open(raster) as src:
                        print('cropping nan on', raster)
                        window = get_data_window(src.read(1, masked=True))

                        kwargs = src.meta.copy()
                        kwargs.update({
                            'height':
                            window.height,
                            'width':
                            window.width,
                            'transform':
                            rasterio.windows.transform(window, src.transform)
                        })

                        print('saving', out_name)
                        try:
                            with rasterio.open(out_name, 'w', **kwargs) as dst:
                                dst.write(src.read(window=window))
                        except:
                            print('empty raster, discard')

                    # remove temporary ntl file
                    os.remove(raster)
                    # remove original raster
                    os.remove(raster_or)
            except:
                print('error loading raster, skipping')
Ejemplo n.º 29
0
def test_clip_points(point_gdf, single_rectangle_gdf):
    """Test clipping a points GDF with a generic polygon geometry."""
    clip_pts = clip(point_gdf, single_rectangle_gdf)
    pts = np.array([[2, 2], [3, 4], [9, 8]])
    exp = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:4326")
    assert_geodataframe_equal(clip_pts, exp)
Ejemplo n.º 30
0
def test_clip_box_overlap(pointsoutside_overlap_gdf, single_rectangle_gdf):
    """Test clip when intersection is emtpy and boxes do overlap."""
    clipped = clip(pointsoutside_overlap_gdf, single_rectangle_gdf)
    assert len(clipped) == 0