def generic_gridder(day, df, idx): """ Generic gridding algorithm for easy variables """ data = df[idx].values coordinates = (df["lon"].values, df["lat"].values) region = [XAXIS[0], XAXIS[-1], YAXIS[0], YAXIS[-1]] projection = pyproj.Proj(proj="merc", lat_ts=df["lat"].mean()) spacing = 0.5 chain = vd.Chain([ ("mean", vd.BlockReduce(np.mean, spacing=spacing * 111e3)), ("spline", vd.Spline(damping=1e-10, mindist=100e3)), ]) train, test = vd.train_test_split(projection(*coordinates), data, random_state=0) chain.fit(*train) score = chain.score(*test) shape = (len(YAXIS), len(XAXIS)) grid = chain.grid( region=region, shape=shape, projection=projection, dims=["latitude", "longitude"], data_names=["precip"], ) res = grid.to_array() res = np.ma.where(res < 0, 0, res) print(("%s %s rows for %s column min:%.3f max:%.3f score: %.3f") % (day, len(df.index), idx, np.nanmin(res), np.nanmax(res), score)) return masked_array(res, mpunits("inch"))
def chain_config( spacing=2500, degree=7 ): #degree>20 is useless ##operations with 2 degree polynomium can go downwards or upwards very fast begin = process_time() print("chain_config begin") chain = vd.Chain([ ('trend', vd.Trend(degree=degree)), ('reduce', vd.BlockReduce(np.median, spacing=spacing)), ('spline', vd.Spline()), ]) timelapse(begin, "chain_config") return chain
random_state=2, ) # We'll make a 20 arc-minute grid spacing = 20 / 60 # Chain together a blocked mean to avoid aliasing, a polynomial trend (Spline usually # requires de-trended data), and finally a Spline for each component. Notice that # BlockReduce can work on multicomponent data without the use of Vector. chain = vd.Chain( [ ("mean", vd.BlockReduce(np.mean, spacing * 111e3)), ("trend", vd.Vector([vd.Trend(degree=1) for i in range(2)])), ( "spline", vd.Vector([vd.Spline(damping=1e-10, mindist=500e3) for i in range(2)]), ), ] ) print(chain) # Fit on the training data chain.fit(*train) # And score on the testing data. The best possible score is 1, meaning a perfect # prediction of the test data. score = chain.score(*test) print("Cross-validation R^2 score: {:.2f}".format(score)) # Interpolate the wind speed onto a regular geographic grid and mask the data that are # far from the observation points grid_full = chain.grid(
ax = plt.axes() ax.set_title("Air temperature measurements for Texas") ax.plot(train[0][0], train[0][1], ".r", label="train") ax.plot(test[0][0], test[0][1], ".b", label="test") ax.legend() ax.set_aspect("equal") plt.tight_layout() plt.show() ######################################################################################## # The returned ``train`` and ``test`` arguments are each tuples with the coordinates (in # a tuple) and a data array. They are in a format that can be easily passed to the # :meth:`~verde.base.BaseGridder.fit` method of most gridders using Python's argument # expansion using the ``*`` symbol. spline = vd.Spline() spline.fit(*train) ######################################################################################## # Let's plot the gridded result to see what it looks like. We'll mask out grid points # that are too far from any given data point. mask = vd.distance_mask( (data.longitude, data.latitude), maxdist=3 * spacing * 111e3, coordinates=vd.grid_coordinates(region, spacing=spacing), projection=projection, ) grid = spline.grid( region=region, spacing=spacing, projection=projection,
# We'll test this on the California vertical GPS velocity data because it comes with the # uncertainties data = vd.datasets.fetch_california_gps() coordinates = (data.longitude.values, data.latitude.values) # Use a Mercator projection for our Cartesian gridder projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) # Now we can chain a block weighted mean and weighted spline together. We'll use # uncertainty propagation to calculate the new weights from block mean because our data # vary smoothly but have different uncertainties. spacing = 5 / 60 # 5 arc-minutes chain = vd.Chain( [ ("mean", vd.BlockMean(spacing=spacing * 111e3, uncertainty=True)), ("spline", vd.Spline(damping=1e-10)), ] ) print(chain) # Split the data into a training and testing set. We'll use the training set to grid the # data and the testing set to validate our spline model. Weights need to # 1/uncertainty**2 for the error propagation in BlockMean to work. train, test = vd.train_test_split( projection(*coordinates), data.velocity_up, weights=1 / data.std_up ** 2, random_state=0, ) # Fit the model on the training set chain.fit(*train)
c=data.bathymetry_m, s=0.1, transform=ccrs.PlateCarree(), ) plt.colorbar().set_label("meters") vd.datasets.setup_baja_bathymetry_map(ax) plt.show() ######################################################################################## # We'll create a chain that applies a blocked median to the data, fits a polynomial # trend, and then fits a standard gridder to the trend residuals. chain = vd.Chain([ ("reduce", vd.BlockReduce(np.median, spacing * 111e3)), ("trend", vd.Trend(degree=1)), ("spline", vd.Spline()), ]) print(chain) ######################################################################################## # Calling :meth:`verde.Chain.fit` will automatically run the data through the chain: # # #. Apply the blocked median to the input data # #. Fit a trend to the blocked data and output the residuals # #. Fit the spline to the trend residuals chain.fit(proj_coords, data.bathymetry_m) ######################################################################################## # Now that the data has been through the chain, calling :meth:`verde.Chain.predict` will # sum the results of every step in the chain that has a ``predict`` method. In our case,
data = vd.datasets.fetch_texas_wind() # Use Mercator projection because Spline is a Cartesian gridder projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) proj_coords = projection(data.longitude.values, data.latitude.values) region = vd.get_region((data.longitude, data.latitude)) # The desired grid spacing in degrees (converted to meters using 1 degree approx. 111km) spacing = 15 / 60 ######################################################################################## # Before we begin tuning, let's reiterate what the results were with the default # parameters. spline_default = vd.Spline() score_default = np.mean( vd.cross_val_score(spline_default, proj_coords, data.air_temperature_c)) spline_default.fit(proj_coords, data.air_temperature_c) print("R² with defaults:", score_default) ######################################################################################## # Tuning # ------ # # :class:`~verde.Spline` has many parameters that can be set to modify the final result. # Mainly the ``damping`` regularization parameter and the ``mindist`` "fudge factor" # which smooths the solution. Would changing the default values give us a better score? # # We can answer these questions by changing the values in our ``spline`` and # re-evaluating the model score repeatedly for different values of these parameters.
data = vd.datasets.fetch_texas_wind() coordinates = (data.longitude.values, data.latitude.values) region = vd.get_region(coordinates) # Use a Mercator projection for our Cartesian gridder projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) # The output grid spacing will 15 arc-minutes spacing = 15 / 60 # Now we can chain a blocked mean and spline together. The Spline can be regularized # by setting the damping coefficient (should be positive). It's also a good idea to set # the minimum distance to the average data spacing to avoid singularities in the spline. chain = vd.Chain([ ("mean", vd.BlockReduce(np.mean, spacing=spacing * 111e3)), ("spline", vd.Spline(damping=1e-10, mindist=100e3)), ]) print(chain) # We can evaluate model performance by splitting the data into a training and testing # set. We'll use the training set to grid the data and the testing set to validate our # spline model. train, test = vd.train_test_split(projection(*coordinates), data.air_temperature_c, random_state=0) # Fit the model on the training set chain.fit(*train) # And calculate an R^2 score coefficient on the testing set. The best possible score # (perfect prediction) is 1. This can tell us how good our spline is at predicting data
# Let's plot these two datasets with different colors: plt.figure(figsize=(8, 6)) ax = plt.axes() ax.set_title("Air temperature measurements for Texas") ax.plot(train[0][0], train[0][1], ".r", label="train") ax.plot(test[0][0], test[0][1], ".b", label="test") ax.legend() ax.set_aspect("equal") plt.show() ######################################################################################## # We can pass the training dataset to the :meth:`~verde.base.BaseGridder.fit` method of # most gridders using Python's argument expansion using the ``*`` symbol. spline = vd.Spline() spline.fit(*train) ######################################################################################## # Let's plot the gridded result to see what it looks like. First, we'll create a # geographic grid: grid = spline.grid( region=region, spacing=spacing, projection=projection, dims=["latitude", "longitude"], data_names=["temperature"], ) print(grid) ########################################################################################
random_state=1, ) ######################################################################################## # Now we can make a 2-component spline. Since :class:`verde.Vector` implements # ``fit``, ``predict``, and ``filter``, we can use it in a :class:`verde.Chain` to build # a pipeline. # # We need to use a bit of damping so that the weights can be taken into account. Splines # without damping provide a perfect fit to the data and ignore the weights as a # consequence. chain = vd.Chain([ ("mean", vd.BlockMean(spacing=spacing * 111e3, uncertainty=True)), ("trend", vd.Vector([vd.Trend(1), vd.Trend(1)])), ("spline", vd.Vector([vd.Spline(damping=1e-10), vd.Spline(damping=1e-10)])), ]) print(chain) ######################################################################################## # # .. warning:: # # Never generate the component gridders with ``[vd.Spline()]*2``. This will result # in each component being a represented by **the same Spline object**, causing # problems when trying to fit it to different components. # # Fitting the spline and gridding is exactly the same as what we've done before. chain.fit(*train)
# Convert to float df = df.astype(np.float64) # Use Mercator projection because Spline is a Cartesian # gridder projection = pyproj.Proj(proj="merc", lat_ts=df.latitude.mean()) proj_coords = projection(df.longitude.values, df.latitude.values) region = vd.get_region((df.longitude, df.latitude)) # The desired grid spacing in degrees # (converted to meters using 1 degree approx. 111km) spacing = 1 # Loop over the combinations and collect # the scores for each parameter set spline = vd.Spline(mindist=5e3, damping=1e-4) spline.fit(proj_coords, df[parameter]) # Cross-validated gridder grid = spline.grid( region=region, spacing=spacing, projection=projection, dims=["lat", "lon"], data_names="value", ) # Mask grid points that are too far from the given data # points mask = vd.distance_mask( (df.longitude, df.latitude),
plt.ylabel("Northing (m)") plt.gca().set_aspect("equal") plt.tight_layout() plt.show() ######################################################################################## # Cartesian grids # --------------- # # Now we can use :class:`verde.BlockReduce` and :class:`verde.Spline` on our projected # coordinates. We'll specify the desired grid spacing as degrees and convert it to # Cartesian using the 1 degree approx. 111 km rule-of-thumb. spacing = 10 / 60 reducer = vd.BlockReduce(np.median, spacing=spacing * 111e3) filter_coords, filter_bathy = reducer.filter(proj_coords, data.bathymetry_m) spline = vd.Spline().fit(filter_coords, filter_bathy) ######################################################################################## # If we now call :meth:`verde.Spline.grid` we'll get back a grid evenly spaced in # projected Cartesian coordinates. grid = spline.grid(spacing=spacing * 111e3, data_names="bathymetry") print("Cartesian grid:") print(grid) ######################################################################################## # We'll mask our grid using :func:`verde.distance_mask` to get rid of all the spurious # solutions far away from the data points. grid = vd.distance_mask(proj_coords, maxdist=30e3, grid=grid) plt.figure(figsize=(7, 6)) plt.title("Gridded bathymetry in Cartesian coordinates")
import pyproj import verde as vd import numpy as np import matplotlib.pyplot as plt print("Verde version:", vd.version.full_version) data = vd.datasets.fetch_baja_bathymetry() projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) proj_coords = projection(data.longitude.values, data.latitude.values) spacing = 10 / 60 interp = vd.Chain([ ("median", vd.BlockReduce(np.median, spacing=spacing * 111e3)), ("spline", vd.Spline(mindist=10e3, damping=1e-5)), ]) interp.fit(proj_coords, data.bathymetry_m) grid = interp.grid(spacing=spacing * 111e3, data_names=["bathymetry"]) grid = vd.distance_mask(proj_coords, maxdist=30e3, grid=grid) fig, ax = plt.subplots(1, 1, figsize=(7, 6)) pc = grid.bathymetry.plot.pcolormesh(ax=ax, cmap="viridis", vmax=0, add_colorbar=False) plt.colorbar(pc, pad=0, ax=ax, aspect=40).set_label("bathymetry (m)") ax.set_xlabel("Easting (m)") ax.set_ylabel("Northing (m)") ax.set_title("Gridded bathymetry") ax.set_aspect("equal")
# Load the Rio de Janeiro total field magnetic anomaly data data = vd.datasets.fetch_rio_magnetic() region = vd.get_region((data.longitude, data.latitude)) # Create a projection for the data using pyproj so that we can use it as input for the # gridder. We'll set the latitude of true scale to the mean latitude of the data. projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) # Create a chain that fits a 2nd degree trend, decimates the residuals using a blocked # mean to avoid aliasing, and then fits a standard gridder to the residuals. The spacing # for the blocked mean will be 0.5 arc-minutes (approximately converted to meters). spacing = 0.5 / 60 chain = vd.Chain([ ("trend", vd.Trend(degree=2)), ("reduce", vd.BlockReduce(np.mean, spacing * 111e3)), ("spline", vd.Spline(damping=1e-8)), ]) print("Chained estimator:", chain) # Calling 'fit' will automatically run the data through the chain chain.fit(projection(data.longitude.values, data.latitude.values), data.total_field_anomaly_nt) # Each component of the chain can be accessed separately using the 'named_steps' # attribute grid_trend = chain.named_steps["trend"].grid() print("\nTrend grid:") print(grid_trend) grid_residual = chain.named_steps["spline"].grid() print("\nResidual grid:") print(grid_residual)