def griding(max_distance=500, cell_size=500): begin = process_time() print(feature + 'chaining begin') grid = chain.grid(spacing=cell_size, data_names=[feature]) grid = vd.distance_mask(coordinates, maxdist=max_distance, grid=grid) grid[feature].to_netcdf('~/graphite_git/resources/tif/verde/' + feature + '_' + max_distance + '_' + cell_size + '.nc') grid[feature].plot(figsize=(8, 8), cmap='magma') plt.axis('scaled') timelapse(begin, "griding") return grid
print(chain) # Fit on the training data chain.fit(*train) # And score on the testing data. The best possible score is 1, meaning a perfect # prediction of the test data. score = chain.score(*test) print("Cross-validation R^2 score: {:.2f}".format(score)) # Interpolate the wind speed onto a regular geographic grid and mask the data that are # far from the observation points grid_full = chain.grid( region, spacing=spacing, projection=projection, dims=["latitude", "longitude"] ) grid = vd.distance_mask( coordinates, maxdist=3 * spacing * 111e3, grid=grid_full, projection=projection ) # Make maps of the original and gridded wind speed plt.figure(figsize=(6, 6)) ax = plt.axes(projection=ccrs.Mercator()) ax.set_title("Uncoupled spline gridding of wind speed") tmp = ax.quiver( grid.longitude.values, grid.latitude.values, grid.east_component.values, grid.north_component.values, width=0.0015, scale=100, color="tab:blue", transform=ccrs.PlateCarree(),
######################################################################################## # The returned ``train`` and ``test`` arguments are each tuples with the coordinates (in # a tuple) and a data array. They are in a format that can be easily passed to the # :meth:`~verde.base.BaseGridder.fit` method of most gridders using Python's argument # expansion using the ``*`` symbol. spline = vd.Spline() spline.fit(*train) ######################################################################################## # Let's plot the gridded result to see what it looks like. We'll mask out grid points # that are too far from any given data point. mask = vd.distance_mask( (data.longitude, data.latitude), maxdist=3 * spacing * 111e3, coordinates=vd.grid_coordinates(region, spacing=spacing), projection=projection, ) grid = spline.grid( region=region, spacing=spacing, projection=projection, dims=["latitude", "longitude"], data_names=["temperature"], ).where(mask) plt.figure(figsize=(8, 6)) ax = plt.axes(projection=ccrs.Mercator()) ax.set_title("Gridded temperature") pc = grid.temperature.plot.pcolormesh( ax=ax,
def interp(df, mask, var='biomass', spacing=4000): """ Grid a set of lat/lon points to a grid defined by mask Parameters ---------- df : pd.DataFrame Data points to be gridded in the form of a Pandas DataFrame with columns ``lat``, ``lon``, and ``var``. mask : xr.DataArray Target grid defintion. Must include a pyproj parsable crs attribute (e.g. ``mask.attrs['crs']``). Data should be between 0 and 1. var : str Name of column in df to grid. spacing : float Grid spacing in units defined by the masks crs. Returns ------- grid : xr.DataArray Gridded data from df. """ import verde as vd # extract the projection and grid info region = [mask.x.data[0], mask.x.data[-1], mask.y.data[-1], mask.y.data[0]] projection = pyproj.Proj(mask.attrs['crs']) coordinates = (df.lon.values, df.lat.values) proj_coords = projection(*coordinates) # split for validation... this may belong outside of this function train, test = vd.train_test_split( projection(*coordinates), df[var], random_state=RANDOM_SEED, ) # fit the gridder chain = vd.Chain( [ ('mean', vd.BlockReduce(np.mean, spacing=spacing * 0.25, region=region)), ('nearest', vd.ScipyGridder(method='linear')), ] ) chain.fit(*train) # y_pred = chain.predict(test[0]) # fit_score = score(test[1][0], y_pred) # make the grid grid = chain.grid(spacing=spacing, region=region, data_names=[var], dims=('y', 'x')) grid = vd.distance_mask( proj_coords, maxdist=4 * spacing, grid=grid, ) grid = np.flipud(grid[var]) * mask grid.name = var return grid
# Fit on the training data chain.fit(*train) # And score on the testing data. The best possible score is 1, meaning a # perfect prediction of the test data. score = chain.score(*test) print("Cross-validation R^2 score: {:.2f}".format(score)) # Interpolate our horizontal GPS velocities onto a regular geographic grid and # mask the data that are far from the observation points grid_full = chain.grid(region, spacing=spacing, projection=projection, dims=["latitude", "longitude"]) grid = vd.distance_mask( (data.longitude, data.latitude), maxdist=3 * spacing * 111e3, grid=grid_full, projection=projection, ) # Calculate residuals between the predictions and the original input data. predicted = chain.predict(projection(*coordinates)) residuals = (data.velocity_east - predicted[0], data.velocity_north - predicted[1]) # Make maps of the original velocities, gridded velocities, and the residuals fig, axes = plt.subplots(1, 2, figsize=(12, 8), subplot_kw=dict(projection=ccrs.Mercator())) crs = ccrs.PlateCarree() # Plot the observed data and the residuals
# The Baja California bathymetry dataset has big gaps on land. We want to mask these # gaps on a dummy grid that we'll generate over the region. data = vd.datasets.fetch_baja_bathymetry() region = vd.get_region((data.longitude, data.latitude)) # Generate the coordinates and a dummy grid of ones to show the mask. spacing = 10 / 60 coordinates = vd.grid_coordinates(region, spacing=spacing) dummy_data = np.ones_like(coordinates[0]) # Generate a mask for points that are more than 2 grid spacings away from any data # point. The mask is True for points that are within the maximum distance. Here, we'll # provide the grid coordinates to the function but we could also give it a region and # spacing instead if we hadn't generated the coordinates. mask = vd.distance_mask((data.longitude, data.latitude), maxdist=spacing * 2, coordinates=coordinates) print(mask) # Turn points that are too far into NaNs so they won't show up in our plot dummy_data[~mask] = np.nan # Make a plot of the masked data and the data locations. crs = ccrs.PlateCarree() plt.figure(figsize=(7, 6)) ax = plt.axes(projection=ccrs.Mercator()) ax.set_title("Only keep grid points that are close to data") ax.plot(data.longitude, data.latitude, ".y", markersize=0.5, transform=crs) ax.pcolormesh(*coordinates, dummy_data, transform=crs) vd.datasets.setup_baja_bathymetry_map(ax, land=None) plt.tight_layout()
]) # Fit on the training data chain.fit(*train) # And score on the testing data. The best possible score is 1, meaning a perfect # prediction of the test data. score = chain.score(*test) print("Cross-validation R^2 score: {:.2f}".format(score)) # Interpolate our horizontal GPS velocities onto a regular geographic grid and mask the # data that are far from the observation points grid = chain.grid(region, spacing=spacing, projection=projection, dims=["latitude", "longitude"]) mask = vd.distance_mask((data.longitude, data.latitude), maxdist=0.5, region=region, spacing=spacing) grid = grid.where(mask) # Calculate residuals between the predictions and the original input data. Even though # we aren't using regularization or regularly distributed forces, the prediction won't # be perfect because of the BlockReduce operation. We fit the gridder on the reduced # observations, not the original data. predicted = chain.predict(projection(*coordinates)) residuals = (data.velocity_east - predicted[0], data.velocity_north - predicted[1]) # Make maps of the original velocities, the gridded velocities, and the residuals fig, axes = plt.subplots(1, 2, figsize=(12, 8),
# Interpolate data on a regular grid with 0.2 degrees spacing. The # interpolation requires the radius of the grid points (upward coordinate). By # passing in the maximum radius of the data, we're effectively # upward-continuing the data. The grid will be defined in spherical # coordinates. grid = eqs.grid( upward=coordinates[-1].max(), spacing=0.2, data_names=["gravity_disturbance"], ) # The grid is a xarray.Dataset with values, coordinates, and metadata print("\nGenerated grid:\n", grid) # Mask grid points too far from data points grid = vd.distance_mask(data_coordinates=coordinates, maxdist=0.5, grid=grid) # Get the maximum absolute value between the original and gridded data so we # can use the same color scale for both plots and have 0 centered at the white # color. maxabs = vd.maxabs(gravity_disturbance, grid.gravity_disturbance.values) # Get the region boundaries region = vd.get_region(coordinates) # Plot observed and gridded gravity disturbance fig, (ax1, ax2) = plt.subplots( nrows=1, ncols=2, figsize=(10, 5), sharey=True,
print(chain.score(*test)) grid = chain.grid( region=region, spacing=spacing, projection=projection, dims=["latitude", "longitude"], ) print(grid) ######################################################################################## # Mask out the points too far from data and plot the gridded vectors. grid = vd.distance_mask( (data.longitude, data.latitude), maxdist=spacing * 2 * 111e3, grid=grid, projection=projection, ) plt.figure(figsize=(6, 8)) ax = plt.axes(projection=ccrs.Mercator()) tmp = ax.quiver( grid.longitude.values, grid.latitude.values, grid.east_component.values, grid.north_component.values, scale=0.3, transform=crs, width=0.002, ) ax.quiverkey(tmp, 0.2, 0.15, 0.05, label="0.05 m/yr", coordinates="figure")
spacing = 10 / 60 reducer = vd.BlockReduce(np.median, spacing=spacing * 111e3) filter_coords, filter_bathy = reducer.filter(proj_coords, data.bathymetry_m) spline = vd.Spline().fit(filter_coords, filter_bathy) ######################################################################################## # If we now call :meth:`verde.Spline.grid` we'll get back a grid evenly spaced in # projected Cartesian coordinates. grid = spline.grid(spacing=spacing * 111e3, data_names="bathymetry") print("Cartesian grid:") print(grid) ######################################################################################## # We'll mask our grid using :func:`verde.distance_mask` to get rid of all the spurious # solutions far away from the data points. grid = vd.distance_mask(proj_coords, maxdist=30e3, grid=grid) plt.figure(figsize=(7, 6)) plt.title("Gridded bathymetry in Cartesian coordinates") pc = grid.bathymetry.plot.pcolormesh(cmap="viridis", vmax=0, add_colorbar=False) plt.colorbar(pc).set_label("bathymetry (m)") plt.plot(filter_coords[0], filter_coords[1], ".k", markersize=0.5) plt.xlabel("Easting (m)") plt.ylabel("Northing (m)") plt.gca().set_aspect("equal") plt.tight_layout() plt.show() ########################################################################################
import numpy as np import matplotlib.pyplot as plt print("Verde version:", vd.version.full_version) data = vd.datasets.fetch_baja_bathymetry() projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) proj_coords = projection(data.longitude.values, data.latitude.values) spacing = 10 / 60 interp = vd.Chain([ ("median", vd.BlockReduce(np.median, spacing=spacing * 111e3)), ("spline", vd.Spline(mindist=10e3, damping=1e-5)), ]) interp.fit(proj_coords, data.bathymetry_m) grid = interp.grid(spacing=spacing * 111e3, data_names=["bathymetry"]) grid = vd.distance_mask(proj_coords, maxdist=30e3, grid=grid) fig, ax = plt.subplots(1, 1, figsize=(7, 6)) pc = grid.bathymetry.plot.pcolormesh(ax=ax, cmap="viridis", vmax=0, add_colorbar=False) plt.colorbar(pc, pad=0, ax=ax, aspect=40).set_label("bathymetry (m)") ax.set_xlabel("Easting (m)") ax.set_ylabel("Northing (m)") ax.set_title("Gridded bathymetry") ax.set_aspect("equal") plt.show()
data = vd.datasets.fetch_baja_bathymetry() region = vd.get_region((data.longitude, data.latitude)) # Generate the coordinates for a regular grid mask spacing = 10 / 60 coordinates = vd.grid_coordinates(region, spacing=spacing) # Generate a mask for points that are more than 2 grid spacings away from any data # point. The mask is True for points that are within the maximum distance. Distance # calculations in the mask are Cartesian only. We can provide a projection function to # convert the coordinates before distances are calculated (Mercator in this case). In # this case, the maximum distance is also Cartesian and must be converted from degrees # to meters. mask = vd.distance_mask( (data.longitude, data.latitude), maxdist=spacing * 2 * 111e3, coordinates=coordinates, projection=pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()), ) print(mask) # Create a dummy grid with ones that we can mask to show the results. # Turn points that are too far into NaNs so they won't show up in our plot. dummy_data = np.ones_like(coordinates[0]) dummy_data[~mask] = np.nan # Make a plot of the masked data and the data locations. crs = ccrs.PlateCarree() plt.figure(figsize=(7, 6)) ax = plt.axes(projection=ccrs.Mercator()) ax.set_title("Only keep grid points that are close to data") ax.plot(data.longitude, data.latitude, ".y", markersize=0.5, transform=crs)