Example #1
0
def test_project_raster(test_output_path, geotiff_3070):
    filename = geotiff_3070
    filename2 = os.path.join(test_output_path, 'test_raster_4269.tif')
    project_raster(filename,
                   filename2,
                   'epsg:4269',
                   resampling=0,
                   resolution=None,
                   num_threads=2,
                   driver='GTiff')
    filename3 = os.path.join(test_output_path, 'test_raster_4269_3070.tif')
    project_raster(filename2,
                   filename3,
                   'epsg:3070',
                   resampling=0,
                   resolution=None,
                   num_threads=2,
                   driver='GTiff')
    with rasterio.open(filename) as src:
        array = src.read(1)
    with rasterio.open(filename2) as src2:
        array2 = src2.read(1)
    with rasterio.open(filename3) as src3:
        array3 = src3.read(1)

    # verify that get_values_at_points returns the same results
    # for the original and round-tripped 3070 rasters
    original_cell_xcenters = np.array([0.5, 1.5, 2.5] * 3)
    original_cell_ycenters = np.array([0.5] * 3 + [1.5] * 3 + [2.5] * 3)
    x, y = src.transform * (original_cell_xcenters, original_cell_ycenters)
    results = get_values_at_points(filename, x=x, y=y)
    expected = np.arange(0, 9)
    assert np.allclose(results, expected)
    results3 = get_values_at_points(filename3, x=x, y=y)
    assert np.allclose(results3, expected)
def rasters_to_grid(modelgrid, dem, rasters,
                    dem_elevation_units='meters', raster_elevation_units='meters',
                    dest_elevation_units='meters'):
    """Sample a sequence of rasters onto the i, j locations of a modelgrid,
    returning a 3D numpy array of the sampled elevations. Fill places with nodata
    using the next valid surface above.

    Parameters
    ----------
    modelgrid : Modflow-setup :class:`~mfsetup.grid.MFsetupGrid` instance
        Modflow-setup grid instance describing the model grid
    dem : str (filepath)
        Raster representing the land surface, at the highest resolution being contemplated for the model.
        Usually this is derived by sampling a higher resolution DEM using zonal statistics, taking
        the mean DEM value for each model cell.
    rasters : list of strings (filepaths)
        Raster surfaces describing hydrogelogic contacts surrounding the voxel data.
    dem_elevation_units : str, optional
        Elevation units of dem_means_raster, by default 'meters'
    framework_raster_elevation_units : str, optional
        Elevation units of the framework_rasters, by default 'meters'
    model_length_units : str, optional
        Length units used in the model, by default 'meters'

    References
    ----------
    See the documentation for the :func:`fill_cells_vertically <mfsetup.discretization.fill_cells_vertically>`
    function in Modflow-setup for an explanation of the filling process.

    """
    grid = modelgrid
    dem_elevations = get_values_at_points(dem, grid.xcellcenters, grid.ycellcenters,
                                          method='linear')
    # convert to model units
    dem_elevations *= convert_length_units(dem_elevation_units, dest_elevation_units)

    raster_elevations = []
    for raster in rasters:
        grid_cell_values = get_values_at_points(raster, grid.xcellcenters, grid.ycellcenters,
                                                method='linear')
        # convert to model units
        grid_cell_values *= convert_length_units(raster_elevation_units, dest_elevation_units)
        raster_elevations.append(grid_cell_values)
    raster_elevations = np.array(raster_elevations)
    
    # fill nans in the sampled original framework elevations
    # (nans are where a layer surface is absent)
    # fill the nans with the next surface above
    # see https://github.com/aleaf/modflow-setup/blob/develop/mfsetup/discretization.py
    model_top_filled, filled_raster_elevations = fill_cells_vertically(dem_elevations, raster_elevations)
    above_land_surface = filled_raster_elevations > dem_elevations
    # reset any values above land surface to land surface
    dem_means_3d = np.tile(dem_elevations, (filled_raster_elevations.shape[0], 1, 1))
    filled_raster_elevations[above_land_surface] = dem_means_3d[above_land_surface]
    del dem_means_3d
    filled_raster_elevations = np.vstack([np.reshape(dem_elevations, (1, *dem_elevations.shape)),
                                          filled_raster_elevations])
    return filled_raster_elevations
Example #3
0
    def assign_missing_elevs(self, top_raster, dem_units='meters'):
        """ Use the top of model raster, or land-surface raster,
        to assign the elevation for points where elevation is missing.
        
        Parameters
        ----------
        top_raster: str
            path to raster data set with land surface or model top elevation, used
            to assign missing values to water-use points
        elev_field: str
            field in df with elevation data, default is 'FROM_ALT_VA'
        """

        no_elev = self.df['FROM_ALT_VA'].isnull()
        x_no_elev = self.df.loc[no_elev, 'x'].values
        y_no_elev = self.df.loc[no_elev, 'y'].values
        elevs = raster.get_values_at_points(top_raster,
                                            x=x_no_elev,
                                            y=y_no_elev,
                                            points_crs=self.dest_crs)
        elevs *= convert_length_units(dem_units, self.model_length_units)
        self.df.loc[no_elev, 'FROM_ALT_VA'] = elevs
        assert not self.df['FROM_ALT_VA'].isnull().any()
        self.well_elevations = dict(
            zip(self.df['SITE_NO'], self.df['FROM_ALT_VA']))
Example #4
0
    def make_production_zones(self,
                              production_zones,
                              default_elevation_units='feet'):
        """ Make dictionary attributes for production zones.
        These are used to assign individual wells to production zones.
        The defaultdict is keyed by zone_name and then SITE_NO.

        Parameters
        ----------
        zonelist: list of lists
            List of production zone information, each zone requires a 
            list with [zone_name, zone_top, zone_bot]
        zone_name: str
            name assigned to prodcuction zone
        zone_top: str
            path to raster with top of zone
        zone_bot: str
            path to raster to bottom of zone
        key: str
            key (column name) to use in the resulting
            parameter zone dictionaries.  Defaults to SITE_NO
        """

        # if only one list is passed, put it into a list.
        #if isinstance(zonelist[0], str):
        #    zonelist = [zonelist]
        key = self.site_no_col

        # get tops and bottoms of estimated production intervals at each well
        # make dictionaries to lookup by well
        for name, info in production_zones.items():
            top_raster, botm_raster, *units = info
            units = units[0] if units else default_elevation_units
            x = self.df['x'].values
            y = self.df['y'].values
            length_unit_conversion = convert_length_units(
                units, self.model_length_units)
            top_elevations = raster.get_values_at_points(
                top_raster, x=x, y=y) * length_unit_conversion
            self.prod_zone_top[name] = dict(zip(self.df[key], top_elevations))
            botm_elevations = raster.get_values_at_points(
                botm_raster, x=x, y=y) * length_unit_conversion
            self.prod_zone_bot[name] = dict(zip(self.df[key], botm_elevations))
            self.df['{}_top'.format(name)] = top_elevations
            self.df['{}_botm'.format(name)] = botm_elevations
Example #5
0
def test_get_values_at_points_arc_ascii(test_output_path):
    filename, _ = arc_ascii(test_output_path)
    result = get_values_at_points(filename,
                                  x=[2.5, 7.5, -1],
                                  y=[2.5, 7.5, -1],
                                  out_of_bounds_errors='coerce')
    result[np.isnan(result)] = -9999
    expected = [2, 1, -9999]
    assert np.allclose(result, expected)
Example #6
0
def test_get_values_at_points_netcdf(test_output_path):
    filename, _ = nc_file(test_output_path)
    result = get_values_at_points(filename,
                                  x=[2.5, 7.5, -1],
                                  y=[2.5, 7.5, -1],
                                  xarray_variable='values',
                                  out_of_bounds_errors='coerce')
    result[np.isnan(result)] = -9999
    expected = [2., 1., -9999.]
    assert np.allclose(result, expected)
Example #7
0
def test_points_to_raster(point_data, test_output_path):
    bottom_shapefiles = [test_output_path / 'test_points.shp']
    outfile = test_output_path / 'test_points_raster.tif'
    points_to_raster(bottom_shapefiles,
                     data_col='values',
                     output_resolution=0.1,
                     outfile=outfile)
    source_data = shp2df(str(bottom_shapefiles[0]))
    x = [g.x for g in source_data.geometry]
    y = [g.y for g in source_data.geometry]
    results = get_values_at_points(outfile, x, y)
    assert np.allclose(results, source_data['values'].values)
Example #8
0
def test_get_values_at_points_geotiff(test_output_path, x, y, rotation, method,
                                      expected, size_thresh):
    filename, transform = geotiff(test_output_path, rotation=rotation)
    result = get_values_at_points(filename,
                                  x=x,
                                  y=y,
                                  method=method,
                                  out_of_bounds_errors='coerce',
                                  size_thresh=size_thresh)
    result[np.isnan(result)] = -9999
    if size_thresh == 0:
        method = 'nearest'
    assert np.allclose(result, expected[method])
Example #9
0
def test_get_values_at_points_in_a_different_crs(geotiff_3070):

    # get a dataset reader handle for the raster
    with rasterio.open(geotiff_3070) as src:
        pass
    # points that represent the cell centers of the raster in epsg:3070
    original_cell_xcenters = np.array([0.5, 1.5, 2.5] * 3)
    original_cell_ycenters = np.array([0.5] * 3 + [1.5] * 3 + [2.5] * 3)
    x, y = src.transform * (original_cell_xcenters, original_cell_ycenters)
    print(src.transform)
    results = get_values_at_points(geotiff_3070, x=x, y=y)
    expected = np.arange(0, 9)
    assert np.allclose(results, expected)

    # reproject the points to epsg:4326
    print((x, y))
    x_4326, y_4326 = project((x, y), 'epsg:3070', 'epsg:4326')
    print((x_4326, y_4326))
    results2 = get_values_at_points(geotiff_3070,
                                    x=x_4326,
                                    y=y_4326,
                                    points_crs='epsg:4326')
    assert np.allclose(results2, expected)
Example #10
0
def preprocess_iwum_pumping(ncfile,
                            start_date=None,
                            end_date=None,
                            active_area=None,
                            active_area_id_column=None,
                            active_area_feature_id=None,
                            estimated_production_zone_top=None,
                            estimated_production_zone_botm=None,
                            flux_variable='value',
                            nc_crs=5070,
                            dest_crs=5070,
                            nc_length_units='meters',
                            estimated_production_surface_units='meters',
                            model_length_units='meters',
                            outfile=None):
    """Get pumping from the Irrigation Water Use Model (IWUM; Wilson, 2020) output and
    assign open interval information, using raster surfaces of the
    top and bottom of an estimated production zone.

    Parameters
    ----------
    ncfile : file path
        NetCDF output from Irrigation Water Use Model
    start_date : str
        Cull data before this date.
    end_date : str
        Cull data after this date.
    active_area : str
        Shapefile with polygon to cull observations to. Automatically reprojected
        to dest_crs if the shapefile includes a .prj file.
        by default, None.
    active_area_id_column : str, optional
        Column in active_area with feature ids.
        By default, None, in which case all features are used.
    active_area_feature_id : str, optional
        ID of feature to use for active area
        By default, None, in which case all features are used.
    estimated_production_zone_top : file path
        Raster surface for assigning screen tops
    estimated_production_zone_botm : file path
        Raster surface for assigning screen bottoms
    flux_variable : str
        Varible in ncfile for pumping fluxes. Fluxes are assumed to
        represent total volumes for each time period.
    nc_crs : obj
        Coordinate Reference System (CRS) of ncfile.
        A Python int, dict, str, or pyproj.crs.CRS instance
        passed to the pyproj.crs.from_user_input
        See http://pyproj4.github.io/pyproj/stable/api/crs/crs.html#pyproj.crs.CRS.from_user_input.
        Can be any of:
          - PROJ string
          - Dictionary of PROJ parameters
          - PROJ keyword arguments for parameters
          - JSON string with PROJ parameters
          - CRS WKT string
          - An authority string [i.e. 'epsg:4326']
          - An EPSG integer code [i.e. 4326]
          - A tuple of ("auth_name": "auth_code") [i.e ('epsg', '4326')]
          - An object with a `to_wkt` method.
          - A :class:`pyproj.crs.CRS` class
    nc_length_units : str, {'meters', 'ft', etc.}
        Length units of pumped volumes in ncfile
    estimated_production_surface_units : str, {'meters', 'ft', etc.}
        Length units of elevations in estimated production surface rasters.
    model_length_units : str, {'meters', 'ft', etc.}
        Length units of model.
    outfile : csv file for output table

    Returns
    -------
    df : DataFrame
        Table of pumping rates in m3/day, location
        and open interval information.

        Columns:

        ============== ================================================
        site_no        index position of pumping rate in ncfile grid
        x              x-coordinate in `dest_crs`
        y              y-coordinate in `dest_crs`
        start_datetime start date of pumping period
        end_datetime   end date of pumping period
        screen_top     screen top elevation, in `model_length_units`
        screen_botm    screen bottom elevation, in `model_length_units`
        q              pumping rate, in model units
        geometry       shapely Point object representing location
        ============== ================================================

    Notes
    -----
    * Time units are assumed to be days.
    * Fluxes are assumed to represent total volumes for each time period
      indicated by the differences between successive values along the time axis of ncfile.
    """
    ds = xr.open_dataset(ncfile)
    time_variable = [k for k in ds.coords.keys() if k.lower() not in {'x', 'y'}][0]
    ds_x, ds_y = np.meshgrid(ds['x'], ds['y'])

    # original values are in m3, in each 1 mi2 cell
    # can leave in m3 if reassigning to 1km grid as point values
    length_conversion = convert_volume_units(nc_length_units,
                                             model_length_units) ** 3
    unit_suffix = vol_suffix[model_length_units] + 'd'
    flux_col = 'q'  # 'flux_{}'.format(unit_suffix)  # output field name for fluxes

    # get top/botm elevations
    est_screen_top = None
    est_screen_botm = None
    if estimated_production_zone_top is not None and \
            estimated_production_zone_botm is not None:
        surf_unit_conversion = convert_length_units(estimated_production_surface_units,
                                                    model_length_units)
        est_screen_top = get_values_at_points(estimated_production_zone_top, ds_x, ds_y,
                                                points_crs=nc_crs)
        est_screen_top *= surf_unit_conversion
        est_screen_botm = get_values_at_points(estimated_production_zone_botm, ds_x, ds_y,
                                                 points_crs=nc_crs)
        est_screen_botm *= surf_unit_conversion

        # in any places where screen top is less than the screen botm,
        # set both at the mean
        loc = est_screen_top < est_screen_botm
        means = np.mean([est_screen_top, est_screen_botm], axis=0)
        est_screen_top[loc] = means[loc]
        est_screen_botm[loc] = means[loc]
        print(f'Reset screen top and bottom to mean elevation at {loc.ravel().sum()} '
              f'locations where screen top was < screen bottom')

    dfs = []
    times = pd.DatetimeIndex(ds[time_variable].loc[start_date:end_date].values)
    for n, period_start_date in enumerate(times):

        # for each time entry, get the data
        kwargs = {time_variable: period_start_date}
        arr = ds[flux_variable].sel(**kwargs).values

        # make sure pumping sign is  negative
        # based on assumption that values are mostly abstraction
        if arr.sum() > 0:
            arr *= -1

        # set up a dataframe
        data = {'site_no': np.arange(ds_x.size),
                'x': ds_x.ravel(),
                'y': ds_y.ravel(),
                 }
        if est_screen_top is not None and est_screen_botm is not None:
            data.update({'screen_top': est_screen_top.ravel(),
                         'screen_botm': est_screen_botm.ravel()
                         }
                        )
        df = pd.DataFrame(data)
        df['start_datetime'] = period_start_date

        # get the end_date, handling last entry
        if n + 1 < len(times):
            period_end_date = times[n + 1]
        else:
            # set end date for last period on previous period length
            last_start = dfs[-1]['start_datetime'].values[0]
            ndays = (pd.Timestamp(period_start_date) -
                     pd.Timestamp(last_start)).days
            period_end_date = period_start_date + pd.Timedelta(ndays, unit='d')

        # convert the time units
        ndays = (pd.Timestamp(period_end_date) -
                 pd.Timestamp(period_start_date)).days
        assert ndays > 0, "period_end_date {} is before period_start_date {}"\
            .format(period_end_date, period_start_date)
        time_conversion = 1 / ndays  # original quantities are volumes for the time period

        # time indexing in pandas is through last value
        period_end_date = pd.Timestamp(period_end_date) - pd.Timedelta(1, unit='d')
        df['end_datetime'] = period_end_date
        df[flux_col] = arr.ravel() * length_conversion * time_conversion

        # only includes fluxes > 0
        df = df.loc[df[flux_col] < 0]

        dfs.append(df)
    df = pd.concat(dfs)

    # site number column (that would be unique from other integers from other data sources)
    df['site_no'] = [f'iwum_{node}' for node in df.site_no]

    # project the data to a destination crs, if provided
    # make a separate metadata dataframe with 1 row per location
    # to avoid redundant operations
    metadata = df.groupby('site_no').first().reset_index()[['site_no', 'x', 'y']]
    metadata.index = metadata['site_no']
    x_pr, y_pr = project((metadata.x.values, metadata.y.values), nc_crs, dest_crs)
    metadata['x'], metadata['y'] = x_pr, y_pr
    metadata['geometry'] = [Point(x, y) for x, y in zip(x_pr, y_pr)]

    # cull the data to the model area, if provided
    if active_area is not None:
        df, metadata = cull_data_to_active_area(df, active_area,
                                      active_area_id_column,
                                      active_area_feature_id,
                                      data_crs=dest_crs, metadata=metadata)

    # update data with x,y values projected in metadata
    x = dict(zip(metadata.site_no, metadata.x))
    y = dict(zip(metadata.site_no, metadata.y))
    df['x'] = [x[sn] for sn in df.site_no]
    df['y'] = [y[sn] for sn in df.site_no]
    if outfile is not None:
        outfile = Path(outfile)
        df.to_csv(outfile, index=False, float_format='%g')
        print('wrote {}'.format(outfile))

        # Make a plot of iwum output in mgal/day
        out_pdf_path = outfile.parent / 'plots'
        out_pdf_path.mkdir(exist_ok=True)
        plot_iwum_output(ncfile, flux_variable=flux_variable, outpath=out_pdf_path)

    return df
Example #11
0
def plot_cross_sections(layers,
                        out_pdf,
                        property_data=None,
                        voxel_start_layer=0,
                        voxel_zones=None,
                        cmap='copper',
                        voxel_cmap='viridis',
                        unit_labels=None,
                        add_raster_surfaces=None,
                        modelgrid=None):
    """Generate a multi-page PDF of the layer cross sections.

    Parameters
    ----------
    layers : 3D numpy array 
        Array of layer elevations, starting with the model top. 
        (Length equal to the number of botm_array + 1)
    property_data : 3D numpy array 
        Array of zone numbers generated by setup_model_layers.
    out_pdf : str (filepath)
        Filename of multi-page PDF.
    voxel_start_layer : int, optional
        First layer with voxel data, by default 0
    voxel_zones : sequence, optional
        Zone numbers within property_data that are voxel-based, 
        by default None
    cmap : str, optional
        Matplotlib colormap for non-voxel zone numbers, by default 'copper',
        to contrast with colormap for voxel-based zone numbers.
    voxel_cmap : str, optional
        Matplotlib colormap for voxel-based zone numbers, by default 'viridis'.
    unit_labels : dict, optional
        Dictionary mapping non-voxel zone numbers to hydrogeologic units, 
        by default None
    """
    raster_arrays = None
    if add_raster_surfaces:
        if modelgrid is None:
            raise ValueError("add_raster_surfaces option requires a modelgrid")
        raster_arrays = {}
        _, nrow, ncol = modelgrid.shape
        x = modelgrid.xcellcenters.ravel()
        y = modelgrid.ycellcenters.ravel()
        for label, raster in add_raster_surfaces.items():
            values = get_values_at_points(raster,
                                          x,
                                          y,
                                          points_crs=modelgrid.crs)
            raster_arrays[label] = np.reshape(values, (nrow, ncol))

    with PdfPages(out_pdf) as pdf:
        nlay, nrow, ncol = layers.shape
        for row in range(0, nrow, 50):
            plot_slice(layers,
                       property_data,
                       row=row,
                       column=slice(None, None),
                       voxel_start_layer=voxel_start_layer,
                       voxel_zones=voxel_zones,
                       cmap=cmap,
                       voxel_cmap=voxel_cmap,
                       unit_labels=unit_labels,
                       add_surfaces=raster_arrays)
            pdf.savefig()
            plt.close()
        for column in range(0, ncol, 50):
            plot_slice(layers,
                       property_data,
                       row=slice(None, None),
                       column=column,
                       voxel_start_layer=voxel_start_layer,
                       voxel_zones=voxel_zones,
                       cmap=cmap,
                       voxel_cmap=voxel_cmap,
                       unit_labels=unit_labels,
                       add_surfaces=raster_arrays)
            pdf.savefig()
            plt.close()