def test_project_raster(test_output_path, geotiff_3070): filename = geotiff_3070 filename2 = os.path.join(test_output_path, 'test_raster_4269.tif') project_raster(filename, filename2, 'epsg:4269', resampling=0, resolution=None, num_threads=2, driver='GTiff') filename3 = os.path.join(test_output_path, 'test_raster_4269_3070.tif') project_raster(filename2, filename3, 'epsg:3070', resampling=0, resolution=None, num_threads=2, driver='GTiff') with rasterio.open(filename) as src: array = src.read(1) with rasterio.open(filename2) as src2: array2 = src2.read(1) with rasterio.open(filename3) as src3: array3 = src3.read(1) # verify that get_values_at_points returns the same results # for the original and round-tripped 3070 rasters original_cell_xcenters = np.array([0.5, 1.5, 2.5] * 3) original_cell_ycenters = np.array([0.5] * 3 + [1.5] * 3 + [2.5] * 3) x, y = src.transform * (original_cell_xcenters, original_cell_ycenters) results = get_values_at_points(filename, x=x, y=y) expected = np.arange(0, 9) assert np.allclose(results, expected) results3 = get_values_at_points(filename3, x=x, y=y) assert np.allclose(results3, expected)
def rasters_to_grid(modelgrid, dem, rasters, dem_elevation_units='meters', raster_elevation_units='meters', dest_elevation_units='meters'): """Sample a sequence of rasters onto the i, j locations of a modelgrid, returning a 3D numpy array of the sampled elevations. Fill places with nodata using the next valid surface above. Parameters ---------- modelgrid : Modflow-setup :class:`~mfsetup.grid.MFsetupGrid` instance Modflow-setup grid instance describing the model grid dem : str (filepath) Raster representing the land surface, at the highest resolution being contemplated for the model. Usually this is derived by sampling a higher resolution DEM using zonal statistics, taking the mean DEM value for each model cell. rasters : list of strings (filepaths) Raster surfaces describing hydrogelogic contacts surrounding the voxel data. dem_elevation_units : str, optional Elevation units of dem_means_raster, by default 'meters' framework_raster_elevation_units : str, optional Elevation units of the framework_rasters, by default 'meters' model_length_units : str, optional Length units used in the model, by default 'meters' References ---------- See the documentation for the :func:`fill_cells_vertically <mfsetup.discretization.fill_cells_vertically>` function in Modflow-setup for an explanation of the filling process. """ grid = modelgrid dem_elevations = get_values_at_points(dem, grid.xcellcenters, grid.ycellcenters, method='linear') # convert to model units dem_elevations *= convert_length_units(dem_elevation_units, dest_elevation_units) raster_elevations = [] for raster in rasters: grid_cell_values = get_values_at_points(raster, grid.xcellcenters, grid.ycellcenters, method='linear') # convert to model units grid_cell_values *= convert_length_units(raster_elevation_units, dest_elevation_units) raster_elevations.append(grid_cell_values) raster_elevations = np.array(raster_elevations) # fill nans in the sampled original framework elevations # (nans are where a layer surface is absent) # fill the nans with the next surface above # see https://github.com/aleaf/modflow-setup/blob/develop/mfsetup/discretization.py model_top_filled, filled_raster_elevations = fill_cells_vertically(dem_elevations, raster_elevations) above_land_surface = filled_raster_elevations > dem_elevations # reset any values above land surface to land surface dem_means_3d = np.tile(dem_elevations, (filled_raster_elevations.shape[0], 1, 1)) filled_raster_elevations[above_land_surface] = dem_means_3d[above_land_surface] del dem_means_3d filled_raster_elevations = np.vstack([np.reshape(dem_elevations, (1, *dem_elevations.shape)), filled_raster_elevations]) return filled_raster_elevations
def assign_missing_elevs(self, top_raster, dem_units='meters'): """ Use the top of model raster, or land-surface raster, to assign the elevation for points where elevation is missing. Parameters ---------- top_raster: str path to raster data set with land surface or model top elevation, used to assign missing values to water-use points elev_field: str field in df with elevation data, default is 'FROM_ALT_VA' """ no_elev = self.df['FROM_ALT_VA'].isnull() x_no_elev = self.df.loc[no_elev, 'x'].values y_no_elev = self.df.loc[no_elev, 'y'].values elevs = raster.get_values_at_points(top_raster, x=x_no_elev, y=y_no_elev, points_crs=self.dest_crs) elevs *= convert_length_units(dem_units, self.model_length_units) self.df.loc[no_elev, 'FROM_ALT_VA'] = elevs assert not self.df['FROM_ALT_VA'].isnull().any() self.well_elevations = dict( zip(self.df['SITE_NO'], self.df['FROM_ALT_VA']))
def make_production_zones(self, production_zones, default_elevation_units='feet'): """ Make dictionary attributes for production zones. These are used to assign individual wells to production zones. The defaultdict is keyed by zone_name and then SITE_NO. Parameters ---------- zonelist: list of lists List of production zone information, each zone requires a list with [zone_name, zone_top, zone_bot] zone_name: str name assigned to prodcuction zone zone_top: str path to raster with top of zone zone_bot: str path to raster to bottom of zone key: str key (column name) to use in the resulting parameter zone dictionaries. Defaults to SITE_NO """ # if only one list is passed, put it into a list. #if isinstance(zonelist[0], str): # zonelist = [zonelist] key = self.site_no_col # get tops and bottoms of estimated production intervals at each well # make dictionaries to lookup by well for name, info in production_zones.items(): top_raster, botm_raster, *units = info units = units[0] if units else default_elevation_units x = self.df['x'].values y = self.df['y'].values length_unit_conversion = convert_length_units( units, self.model_length_units) top_elevations = raster.get_values_at_points( top_raster, x=x, y=y) * length_unit_conversion self.prod_zone_top[name] = dict(zip(self.df[key], top_elevations)) botm_elevations = raster.get_values_at_points( botm_raster, x=x, y=y) * length_unit_conversion self.prod_zone_bot[name] = dict(zip(self.df[key], botm_elevations)) self.df['{}_top'.format(name)] = top_elevations self.df['{}_botm'.format(name)] = botm_elevations
def test_get_values_at_points_arc_ascii(test_output_path): filename, _ = arc_ascii(test_output_path) result = get_values_at_points(filename, x=[2.5, 7.5, -1], y=[2.5, 7.5, -1], out_of_bounds_errors='coerce') result[np.isnan(result)] = -9999 expected = [2, 1, -9999] assert np.allclose(result, expected)
def test_get_values_at_points_netcdf(test_output_path): filename, _ = nc_file(test_output_path) result = get_values_at_points(filename, x=[2.5, 7.5, -1], y=[2.5, 7.5, -1], xarray_variable='values', out_of_bounds_errors='coerce') result[np.isnan(result)] = -9999 expected = [2., 1., -9999.] assert np.allclose(result, expected)
def test_points_to_raster(point_data, test_output_path): bottom_shapefiles = [test_output_path / 'test_points.shp'] outfile = test_output_path / 'test_points_raster.tif' points_to_raster(bottom_shapefiles, data_col='values', output_resolution=0.1, outfile=outfile) source_data = shp2df(str(bottom_shapefiles[0])) x = [g.x for g in source_data.geometry] y = [g.y for g in source_data.geometry] results = get_values_at_points(outfile, x, y) assert np.allclose(results, source_data['values'].values)
def test_get_values_at_points_geotiff(test_output_path, x, y, rotation, method, expected, size_thresh): filename, transform = geotiff(test_output_path, rotation=rotation) result = get_values_at_points(filename, x=x, y=y, method=method, out_of_bounds_errors='coerce', size_thresh=size_thresh) result[np.isnan(result)] = -9999 if size_thresh == 0: method = 'nearest' assert np.allclose(result, expected[method])
def test_get_values_at_points_in_a_different_crs(geotiff_3070): # get a dataset reader handle for the raster with rasterio.open(geotiff_3070) as src: pass # points that represent the cell centers of the raster in epsg:3070 original_cell_xcenters = np.array([0.5, 1.5, 2.5] * 3) original_cell_ycenters = np.array([0.5] * 3 + [1.5] * 3 + [2.5] * 3) x, y = src.transform * (original_cell_xcenters, original_cell_ycenters) print(src.transform) results = get_values_at_points(geotiff_3070, x=x, y=y) expected = np.arange(0, 9) assert np.allclose(results, expected) # reproject the points to epsg:4326 print((x, y)) x_4326, y_4326 = project((x, y), 'epsg:3070', 'epsg:4326') print((x_4326, y_4326)) results2 = get_values_at_points(geotiff_3070, x=x_4326, y=y_4326, points_crs='epsg:4326') assert np.allclose(results2, expected)
def preprocess_iwum_pumping(ncfile, start_date=None, end_date=None, active_area=None, active_area_id_column=None, active_area_feature_id=None, estimated_production_zone_top=None, estimated_production_zone_botm=None, flux_variable='value', nc_crs=5070, dest_crs=5070, nc_length_units='meters', estimated_production_surface_units='meters', model_length_units='meters', outfile=None): """Get pumping from the Irrigation Water Use Model (IWUM; Wilson, 2020) output and assign open interval information, using raster surfaces of the top and bottom of an estimated production zone. Parameters ---------- ncfile : file path NetCDF output from Irrigation Water Use Model start_date : str Cull data before this date. end_date : str Cull data after this date. active_area : str Shapefile with polygon to cull observations to. Automatically reprojected to dest_crs if the shapefile includes a .prj file. by default, None. active_area_id_column : str, optional Column in active_area with feature ids. By default, None, in which case all features are used. active_area_feature_id : str, optional ID of feature to use for active area By default, None, in which case all features are used. estimated_production_zone_top : file path Raster surface for assigning screen tops estimated_production_zone_botm : file path Raster surface for assigning screen bottoms flux_variable : str Varible in ncfile for pumping fluxes. Fluxes are assumed to represent total volumes for each time period. nc_crs : obj Coordinate Reference System (CRS) of ncfile. A Python int, dict, str, or pyproj.crs.CRS instance passed to the pyproj.crs.from_user_input See http://pyproj4.github.io/pyproj/stable/api/crs/crs.html#pyproj.crs.CRS.from_user_input. Can be any of: - PROJ string - Dictionary of PROJ parameters - PROJ keyword arguments for parameters - JSON string with PROJ parameters - CRS WKT string - An authority string [i.e. 'epsg:4326'] - An EPSG integer code [i.e. 4326] - A tuple of ("auth_name": "auth_code") [i.e ('epsg', '4326')] - An object with a `to_wkt` method. - A :class:`pyproj.crs.CRS` class nc_length_units : str, {'meters', 'ft', etc.} Length units of pumped volumes in ncfile estimated_production_surface_units : str, {'meters', 'ft', etc.} Length units of elevations in estimated production surface rasters. model_length_units : str, {'meters', 'ft', etc.} Length units of model. outfile : csv file for output table Returns ------- df : DataFrame Table of pumping rates in m3/day, location and open interval information. Columns: ============== ================================================ site_no index position of pumping rate in ncfile grid x x-coordinate in `dest_crs` y y-coordinate in `dest_crs` start_datetime start date of pumping period end_datetime end date of pumping period screen_top screen top elevation, in `model_length_units` screen_botm screen bottom elevation, in `model_length_units` q pumping rate, in model units geometry shapely Point object representing location ============== ================================================ Notes ----- * Time units are assumed to be days. * Fluxes are assumed to represent total volumes for each time period indicated by the differences between successive values along the time axis of ncfile. """ ds = xr.open_dataset(ncfile) time_variable = [k for k in ds.coords.keys() if k.lower() not in {'x', 'y'}][0] ds_x, ds_y = np.meshgrid(ds['x'], ds['y']) # original values are in m3, in each 1 mi2 cell # can leave in m3 if reassigning to 1km grid as point values length_conversion = convert_volume_units(nc_length_units, model_length_units) ** 3 unit_suffix = vol_suffix[model_length_units] + 'd' flux_col = 'q' # 'flux_{}'.format(unit_suffix) # output field name for fluxes # get top/botm elevations est_screen_top = None est_screen_botm = None if estimated_production_zone_top is not None and \ estimated_production_zone_botm is not None: surf_unit_conversion = convert_length_units(estimated_production_surface_units, model_length_units) est_screen_top = get_values_at_points(estimated_production_zone_top, ds_x, ds_y, points_crs=nc_crs) est_screen_top *= surf_unit_conversion est_screen_botm = get_values_at_points(estimated_production_zone_botm, ds_x, ds_y, points_crs=nc_crs) est_screen_botm *= surf_unit_conversion # in any places where screen top is less than the screen botm, # set both at the mean loc = est_screen_top < est_screen_botm means = np.mean([est_screen_top, est_screen_botm], axis=0) est_screen_top[loc] = means[loc] est_screen_botm[loc] = means[loc] print(f'Reset screen top and bottom to mean elevation at {loc.ravel().sum()} ' f'locations where screen top was < screen bottom') dfs = [] times = pd.DatetimeIndex(ds[time_variable].loc[start_date:end_date].values) for n, period_start_date in enumerate(times): # for each time entry, get the data kwargs = {time_variable: period_start_date} arr = ds[flux_variable].sel(**kwargs).values # make sure pumping sign is negative # based on assumption that values are mostly abstraction if arr.sum() > 0: arr *= -1 # set up a dataframe data = {'site_no': np.arange(ds_x.size), 'x': ds_x.ravel(), 'y': ds_y.ravel(), } if est_screen_top is not None and est_screen_botm is not None: data.update({'screen_top': est_screen_top.ravel(), 'screen_botm': est_screen_botm.ravel() } ) df = pd.DataFrame(data) df['start_datetime'] = period_start_date # get the end_date, handling last entry if n + 1 < len(times): period_end_date = times[n + 1] else: # set end date for last period on previous period length last_start = dfs[-1]['start_datetime'].values[0] ndays = (pd.Timestamp(period_start_date) - pd.Timestamp(last_start)).days period_end_date = period_start_date + pd.Timedelta(ndays, unit='d') # convert the time units ndays = (pd.Timestamp(period_end_date) - pd.Timestamp(period_start_date)).days assert ndays > 0, "period_end_date {} is before period_start_date {}"\ .format(period_end_date, period_start_date) time_conversion = 1 / ndays # original quantities are volumes for the time period # time indexing in pandas is through last value period_end_date = pd.Timestamp(period_end_date) - pd.Timedelta(1, unit='d') df['end_datetime'] = period_end_date df[flux_col] = arr.ravel() * length_conversion * time_conversion # only includes fluxes > 0 df = df.loc[df[flux_col] < 0] dfs.append(df) df = pd.concat(dfs) # site number column (that would be unique from other integers from other data sources) df['site_no'] = [f'iwum_{node}' for node in df.site_no] # project the data to a destination crs, if provided # make a separate metadata dataframe with 1 row per location # to avoid redundant operations metadata = df.groupby('site_no').first().reset_index()[['site_no', 'x', 'y']] metadata.index = metadata['site_no'] x_pr, y_pr = project((metadata.x.values, metadata.y.values), nc_crs, dest_crs) metadata['x'], metadata['y'] = x_pr, y_pr metadata['geometry'] = [Point(x, y) for x, y in zip(x_pr, y_pr)] # cull the data to the model area, if provided if active_area is not None: df, metadata = cull_data_to_active_area(df, active_area, active_area_id_column, active_area_feature_id, data_crs=dest_crs, metadata=metadata) # update data with x,y values projected in metadata x = dict(zip(metadata.site_no, metadata.x)) y = dict(zip(metadata.site_no, metadata.y)) df['x'] = [x[sn] for sn in df.site_no] df['y'] = [y[sn] for sn in df.site_no] if outfile is not None: outfile = Path(outfile) df.to_csv(outfile, index=False, float_format='%g') print('wrote {}'.format(outfile)) # Make a plot of iwum output in mgal/day out_pdf_path = outfile.parent / 'plots' out_pdf_path.mkdir(exist_ok=True) plot_iwum_output(ncfile, flux_variable=flux_variable, outpath=out_pdf_path) return df
def plot_cross_sections(layers, out_pdf, property_data=None, voxel_start_layer=0, voxel_zones=None, cmap='copper', voxel_cmap='viridis', unit_labels=None, add_raster_surfaces=None, modelgrid=None): """Generate a multi-page PDF of the layer cross sections. Parameters ---------- layers : 3D numpy array Array of layer elevations, starting with the model top. (Length equal to the number of botm_array + 1) property_data : 3D numpy array Array of zone numbers generated by setup_model_layers. out_pdf : str (filepath) Filename of multi-page PDF. voxel_start_layer : int, optional First layer with voxel data, by default 0 voxel_zones : sequence, optional Zone numbers within property_data that are voxel-based, by default None cmap : str, optional Matplotlib colormap for non-voxel zone numbers, by default 'copper', to contrast with colormap for voxel-based zone numbers. voxel_cmap : str, optional Matplotlib colormap for voxel-based zone numbers, by default 'viridis'. unit_labels : dict, optional Dictionary mapping non-voxel zone numbers to hydrogeologic units, by default None """ raster_arrays = None if add_raster_surfaces: if modelgrid is None: raise ValueError("add_raster_surfaces option requires a modelgrid") raster_arrays = {} _, nrow, ncol = modelgrid.shape x = modelgrid.xcellcenters.ravel() y = modelgrid.ycellcenters.ravel() for label, raster in add_raster_surfaces.items(): values = get_values_at_points(raster, x, y, points_crs=modelgrid.crs) raster_arrays[label] = np.reshape(values, (nrow, ncol)) with PdfPages(out_pdf) as pdf: nlay, nrow, ncol = layers.shape for row in range(0, nrow, 50): plot_slice(layers, property_data, row=row, column=slice(None, None), voxel_start_layer=voxel_start_layer, voxel_zones=voxel_zones, cmap=cmap, voxel_cmap=voxel_cmap, unit_labels=unit_labels, add_surfaces=raster_arrays) pdf.savefig() plt.close() for column in range(0, ncol, 50): plot_slice(layers, property_data, row=slice(None, None), column=column, voxel_start_layer=voxel_start_layer, voxel_zones=voxel_zones, cmap=cmap, voxel_cmap=voxel_cmap, unit_labels=unit_labels, add_surfaces=raster_arrays) pdf.savefig() plt.close()