def generic_gridder(day, df, idx): """ Generic gridding algorithm for easy variables """ data = df[idx].values coordinates = (df["lon"].values, df["lat"].values) region = [XAXIS[0], XAXIS[-1], YAXIS[0], YAXIS[-1]] projection = pyproj.Proj(proj="merc", lat_ts=df["lat"].mean()) spacing = 0.5 chain = vd.Chain([ ("mean", vd.BlockReduce(np.mean, spacing=spacing * 111e3)), ("spline", vd.Spline(damping=1e-10, mindist=100e3)), ]) train, test = vd.train_test_split(projection(*coordinates), data, random_state=0) chain.fit(*train) score = chain.score(*test) shape = (len(YAXIS), len(XAXIS)) grid = chain.grid( region=region, shape=shape, projection=projection, dims=["latitude", "longitude"], data_names=["precip"], ) res = grid.to_array() res = np.ma.where(res < 0, 0, res) print(("%s %s rows for %s column min:%.3f max:%.3f score: %.3f") % (day, len(df.index), idx, np.nanmin(res), np.nanmax(res), score)) return masked_array(res, mpunits("inch"))
def chain_config( spacing=2500, degree=7 ): #degree>20 is useless ##operations with 2 degree polynomium can go downwards or upwards very fast begin = process_time() print("chain_config begin") chain = vd.Chain([ ('trend', vd.Trend(degree=degree)), ('reduce', vd.BlockReduce(np.median, spacing=spacing)), ('spline', vd.Spline()), ]) timelapse(begin, "chain_config") return chain
projection(*coordinates), (data.wind_speed_east_knots, data.wind_speed_north_knots), random_state=2, ) # We'll make a 20 arc-minute grid spacing = 20 / 60 # Chain together a blocked mean to avoid aliasing, a polynomial trend (Spline usually # requires de-trended data), and finally a Spline for each component. Notice that # BlockReduce can work on multicomponent data without the use of Vector. chain = vd.Chain( [ ("mean", vd.BlockReduce(np.mean, spacing * 111e3)), ("trend", vd.Vector([vd.Trend(degree=1) for i in range(2)])), ( "spline", vd.Vector([vd.Spline(damping=1e-10, mindist=500e3) for i in range(2)]), ), ] ) print(chain) # Fit on the training data chain.fit(*train) # And score on the testing data. The best possible score is 1, meaning a perfect # prediction of the test data. score = chain.score(*test) print("Cross-validation R^2 score: {:.2f}".format(score)) # Interpolate the wind speed onto a regular geographic grid and mask the data that are # far from the observation points
# We'll test this on the California vertical GPS velocity data because it comes with the # uncertainties data = vd.datasets.fetch_california_gps() coordinates = (data.longitude.values, data.latitude.values) # Use a Mercator projection for our Cartesian gridder projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) # Now we can chain a block weighted mean and weighted spline together. We'll use # uncertainty propagation to calculate the new weights from block mean because our data # vary smoothly but have different uncertainties. spacing = 5 / 60 # 5 arc-minutes chain = vd.Chain( [ ("mean", vd.BlockMean(spacing=spacing * 111e3, uncertainty=True)), ("spline", vd.Spline(damping=1e-10)), ] ) print(chain) # Split the data into a training and testing set. We'll use the training set to grid the # data and the testing set to validate our spline model. Weights need to # 1/uncertainty**2 for the error propagation in BlockMean to work. train, test = vd.train_test_split( projection(*coordinates), data.velocity_up, weights=1 / data.std_up ** 2, random_state=0, ) # Fit the model on the training set chain.fit(*train)
def interp(df, mask, var='biomass', spacing=4000): """ Grid a set of lat/lon points to a grid defined by mask Parameters ---------- df : pd.DataFrame Data points to be gridded in the form of a Pandas DataFrame with columns ``lat``, ``lon``, and ``var``. mask : xr.DataArray Target grid defintion. Must include a pyproj parsable crs attribute (e.g. ``mask.attrs['crs']``). Data should be between 0 and 1. var : str Name of column in df to grid. spacing : float Grid spacing in units defined by the masks crs. Returns ------- grid : xr.DataArray Gridded data from df. """ import verde as vd # extract the projection and grid info region = [mask.x.data[0], mask.x.data[-1], mask.y.data[-1], mask.y.data[0]] projection = pyproj.Proj(mask.attrs['crs']) coordinates = (df.lon.values, df.lat.values) proj_coords = projection(*coordinates) # split for validation... this may belong outside of this function train, test = vd.train_test_split( projection(*coordinates), df[var], random_state=RANDOM_SEED, ) # fit the gridder chain = vd.Chain( [ ('mean', vd.BlockReduce(np.mean, spacing=spacing * 0.25, region=region)), ('nearest', vd.ScipyGridder(method='linear')), ] ) chain.fit(*train) # y_pred = chain.predict(test[0]) # fit_score = score(test[1][0], y_pred) # make the grid grid = chain.grid(spacing=spacing, region=region, data_names=[var], dims=('y', 'x')) grid = vd.distance_mask( proj_coords, maxdist=4 * spacing, grid=grid, ) grid = np.flipud(grid[var]) * mask grid.name = var return grid
data.latitude, c=data.bathymetry_m, s=0.1, transform=ccrs.PlateCarree(), ) plt.colorbar().set_label("meters") vd.datasets.setup_baja_bathymetry_map(ax) plt.show() ######################################################################################## # We'll create a chain that applies a blocked median to the data, fits a polynomial # trend, and then fits a standard gridder to the trend residuals. chain = vd.Chain([ ("reduce", vd.BlockReduce(np.median, spacing * 111e3)), ("trend", vd.Trend(degree=1)), ("spline", vd.Spline()), ]) print(chain) ######################################################################################## # Calling :meth:`verde.Chain.fit` will automatically run the data through the chain: # # #. Apply the blocked median to the input data # #. Fit a trend to the blocked data and output the residuals # #. Fit the spline to the trend residuals chain.fit(proj_coords, data.bathymetry_m) ######################################################################################## # Now that the data has been through the chain, calling :meth:`verde.Chain.predict` will
# Split the data into a training and testing set. We'll fit the gridder on the # training set and use the testing set to evaluate how well the gridder is # performing. train, test = vd.train_test_split(projection(*coordinates), (data.velocity_east, data.velocity_north), random_state=0) # We'll make a 10 arc-minute grid in the end. spacing = 10 / 60 # Chain together a blocked mean to avoid aliasing, a polynomial trend to take # care of the increase toward the coast, and finally the vector gridder using # Poisson's ratio 0.5 to couple the two horizontal components. chain = vd.Chain([ ("mean", vd.BlockReduce(np.mean, spacing * 111e3)), ("trend", vd.Vector([vd.Trend(degree=1) for i in range(2)])), ("spline", ez.Elastic2D(poisson=0.5, mindist=10e3)), ]) # Fit on the training data chain.fit(*train) # And score on the testing data. The best possible score is 1, meaning a # perfect prediction of the test data. score = chain.score(*test) print("Cross-validation R^2 score: {:.2f}".format(score)) # Interpolate our horizontal GPS velocities onto a regular geographic grid and # mask the data that are far from the observation points grid_full = chain.grid(region, spacing=spacing, projection=projection, dims=["latitude", "longitude"])
# We'll test this on the air temperature data from Texas data = vd.datasets.fetch_texas_wind() coordinates = (data.longitude.values, data.latitude.values) region = vd.get_region(coordinates) # Use a Mercator projection for our Cartesian gridder projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) # The output grid spacing will 15 arc-minutes spacing = 15 / 60 # Now we can chain a blocked mean and spline together. The Spline can be regularized # by setting the damping coefficient (should be positive). It's also a good idea to set # the minimum distance to the average data spacing to avoid singularities in the spline. chain = vd.Chain([ ("mean", vd.BlockReduce(np.mean, spacing=spacing * 111e3)), ("spline", vd.Spline(damping=1e-10, mindist=100e3)), ]) print(chain) # We can evaluate model performance by splitting the data into a training and testing # set. We'll use the training set to grid the data and the testing set to validate our # spline model. train, test = vd.train_test_split(projection(*coordinates), data.air_temperature_c, random_state=0) # Fit the model on the training set chain.fit(*train) # And calculate an R^2 score coefficient on the testing set. The best possible score # (perfect prediction) is 1. This can tell us how good our spline is at predicting data
# Split the data into a training and testing set. We'll fit the gridder on the training # set and use the testing set to evaluate how well the gridder is performing. train, test = vd.train_test_split(projection(*coordinates), (data.velocity_east, data.velocity_north), random_state=0) # We'll make a 20 arc-minute grid in the end. spacing = 20 / 60 # Chain together a blocked mean to avoid aliasing, a polynomial trend to take care of # the increase toward the coast, and finally the vector gridder using Poisson's ratio # 0.5 to couple the two horizontal components. chain = vd.Chain([ ("mean", vd.BlockReduce(np.mean, spacing * 111e3)), ("trend", vd.VectorTrend(degree=5)), ("spline", vd.Vector2D(poisson=0.5)), ]) # Fit on the training data chain.fit(*train) # And score on the testing data. The best possible score is 1, meaning a perfect # prediction of the test data. score = chain.score(*test) print("Cross-validation R^2 score: {:.2f}".format(score)) # Interpolate our horizontal GPS velocities onto a regular geographic grid and mask the # data that are far from the observation points grid = chain.grid(region, spacing=spacing, projection=projection, dims=["latitude", "longitude"])
weights=(1 / data.std_east**2, 1 / data.std_north**2), random_state=1, ) ######################################################################################## # Now we can make a 2-component spline. Since :class:`verde.Vector` implements # ``fit``, ``predict``, and ``filter``, we can use it in a :class:`verde.Chain` to build # a pipeline. # # We need to use a bit of damping so that the weights can be taken into account. Splines # without damping provide a perfect fit to the data and ignore the weights as a # consequence. chain = vd.Chain([ ("mean", vd.BlockMean(spacing=spacing * 111e3, uncertainty=True)), ("trend", vd.Vector([vd.Trend(1), vd.Trend(1)])), ("spline", vd.Vector([vd.Spline(damping=1e-10), vd.Spline(damping=1e-10)])), ]) print(chain) ######################################################################################## # # .. warning:: # # Never generate the component gridders with ``[vd.Spline()]*2``. This will result # in each component being a represented by **the same Spline object**, causing # problems when trying to fit it to different components. # # Fitting the spline and gridding is exactly the same as what we've done before. chain.fit(*train)
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) proj_coords = projection(data.longitude.values, data.latitude.values) region = vd.get_region(coordinates) spacing = 5 / 60 ######################################################################################## # Now we can grid our data using a weighted spline. We'll use the block mean results # with uncertainty based weights. # # Note that the weighted spline solution will only work on a non-exact interpolation. So # we'll need to use some damping regularization or not use the data locations for the # point forces. Here, we'll apply a bit of damping. spline = vd.Chain([ # Convert the spacing to meters because Spline is a Cartesian gridder ("mean", vd.BlockMean(spacing=spacing * 111e3, uncertainty=True)), ("spline", vd.Spline(damping=1e-10)), ]).fit(proj_coords, data.velocity_up, data.weights) grid = spline.grid( region=region, spacing=spacing, projection=projection, dims=["latitude", "longitude"], data_names=["velocity"], ) # Avoid showing interpolation outside of the convex hull of the data points. grid = vd.convexhull_mask(coordinates, grid=grid, projection=projection) ######################################################################################## # Calculate an unweighted spline as well for comparison. spline_unweighted = vd.Chain([
import pyproj import verde as vd import numpy as np import matplotlib.pyplot as plt print("Verde version:", vd.version.full_version) data = vd.datasets.fetch_baja_bathymetry() projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) proj_coords = projection(data.longitude.values, data.latitude.values) spacing = 10 / 60 interp = vd.Chain([ ("median", vd.BlockReduce(np.median, spacing=spacing * 111e3)), ("spline", vd.Spline(mindist=10e3, damping=1e-5)), ]) interp.fit(proj_coords, data.bathymetry_m) grid = interp.grid(spacing=spacing * 111e3, data_names=["bathymetry"]) grid = vd.distance_mask(proj_coords, maxdist=30e3, grid=grid) fig, ax = plt.subplots(1, 1, figsize=(7, 6)) pc = grid.bathymetry.plot.pcolormesh(ax=ax, cmap="viridis", vmax=0, add_colorbar=False) plt.colorbar(pc, pad=0, ax=ax, aspect=40).set_label("bathymetry (m)") ax.set_xlabel("Easting (m)") ax.set_ylabel("Northing (m)") ax.set_title("Gridded bathymetry") ax.set_aspect("equal")
# Load the Rio de Janeiro total field magnetic anomaly data data = vd.datasets.fetch_rio_magnetic() region = vd.get_region((data.longitude, data.latitude)) # Create a projection for the data using pyproj so that we can use it as input for the # gridder. We'll set the latitude of true scale to the mean latitude of the data. projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) # Create a chain that fits a 2nd degree trend, decimates the residuals using a blocked # mean to avoid aliasing, and then fits a standard gridder to the residuals. The spacing # for the blocked mean will be 0.5 arc-minutes (approximately converted to meters). spacing = 0.5 / 60 chain = vd.Chain([ ("trend", vd.Trend(degree=2)), ("reduce", vd.BlockReduce(np.mean, spacing * 111e3)), ("spline", vd.Spline(damping=1e-8)), ]) print("Chained estimator:", chain) # Calling 'fit' will automatically run the data through the chain chain.fit(projection(data.longitude.values, data.latitude.values), data.total_field_anomaly_nt) # Each component of the chain can be accessed separately using the 'named_steps' # attribute grid_trend = chain.named_steps["trend"].grid() print("\nTrend grid:") print(grid_trend) grid_residual = chain.named_steps["spline"].grid() print("\nResidual grid:")