コード例 #1
0
def generic_gridder(day, df, idx):
    """
    Generic gridding algorithm for easy variables
    """
    data = df[idx].values
    coordinates = (df["lon"].values, df["lat"].values)
    region = [XAXIS[0], XAXIS[-1], YAXIS[0], YAXIS[-1]]
    projection = pyproj.Proj(proj="merc", lat_ts=df["lat"].mean())
    spacing = 0.5
    chain = vd.Chain([
        ("mean", vd.BlockReduce(np.mean, spacing=spacing * 111e3)),
        ("spline", vd.Spline(damping=1e-10, mindist=100e3)),
    ])
    train, test = vd.train_test_split(projection(*coordinates),
                                      data,
                                      random_state=0)
    chain.fit(*train)
    score = chain.score(*test)
    shape = (len(YAXIS), len(XAXIS))
    grid = chain.grid(
        region=region,
        shape=shape,
        projection=projection,
        dims=["latitude", "longitude"],
        data_names=["precip"],
    )
    res = grid.to_array()
    res = np.ma.where(res < 0, 0, res)
    print(("%s %s rows for %s column min:%.3f max:%.3f score: %.3f") %
          (day, len(df.index), idx, np.nanmin(res), np.nanmax(res), score))
    return masked_array(res, mpunits("inch"))
コード例 #2
0
def chain_config(
    spacing=2500,
    degree=7
):  #degree>20 is useless ##operations with 2 degree polynomium can go downwards or upwards very fast
    begin = process_time()
    print("chain_config begin")
    chain = vd.Chain([
        ('trend', vd.Trend(degree=degree)),
        ('reduce', vd.BlockReduce(np.median, spacing=spacing)),
        ('spline', vd.Spline()),
    ])
    timelapse(begin, "chain_config")
    return chain
コード例 #3
0
    random_state=2,
)

# We'll make a 20 arc-minute grid
spacing = 20 / 60

# Chain together a blocked mean to avoid aliasing, a polynomial trend (Spline usually
# requires de-trended data), and finally a Spline for each component. Notice that
# BlockReduce can work on multicomponent data without the use of Vector.
chain = vd.Chain(
    [
        ("mean", vd.BlockReduce(np.mean, spacing * 111e3)),
        ("trend", vd.Vector([vd.Trend(degree=1) for i in range(2)])),
        (
            "spline",
            vd.Vector([vd.Spline(damping=1e-10, mindist=500e3) for i in range(2)]),
        ),
    ]
)
print(chain)

# Fit on the training data
chain.fit(*train)
# And score on the testing data. The best possible score is 1, meaning a perfect
# prediction of the test data.
score = chain.score(*test)
print("Cross-validation R^2 score: {:.2f}".format(score))

# Interpolate the wind speed onto a regular geographic grid and mask the data that are
# far from the observation points
grid_full = chain.grid(
コード例 #4
0
ax = plt.axes()
ax.set_title("Air temperature measurements for Texas")
ax.plot(train[0][0], train[0][1], ".r", label="train")
ax.plot(test[0][0], test[0][1], ".b", label="test")
ax.legend()
ax.set_aspect("equal")
plt.tight_layout()
plt.show()

########################################################################################
# The returned ``train`` and ``test`` arguments are each tuples with the coordinates (in
# a tuple) and a data array. They are in a format that can be easily passed to the
# :meth:`~verde.base.BaseGridder.fit` method of most gridders using Python's argument
# expansion using the ``*`` symbol.

spline = vd.Spline()
spline.fit(*train)

########################################################################################
# Let's plot the gridded result to see what it looks like. We'll mask out grid points
# that are too far from any given data point.
mask = vd.distance_mask(
    (data.longitude, data.latitude),
    maxdist=3 * spacing * 111e3,
    coordinates=vd.grid_coordinates(region, spacing=spacing),
    projection=projection,
)
grid = spline.grid(
    region=region,
    spacing=spacing,
    projection=projection,
コード例 #5
0
# We'll test this on the California vertical GPS velocity data because it comes with the
# uncertainties
data = vd.datasets.fetch_california_gps()
coordinates = (data.longitude.values, data.latitude.values)

# Use a Mercator projection for our Cartesian gridder
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())

# Now we can chain a block weighted mean and weighted spline together. We'll use
# uncertainty propagation to calculate the new weights from block mean because our data
# vary smoothly but have different uncertainties.
spacing = 5 / 60  # 5 arc-minutes
chain = vd.Chain(
    [
        ("mean", vd.BlockMean(spacing=spacing * 111e3, uncertainty=True)),
        ("spline", vd.Spline(damping=1e-10)),
    ]
)
print(chain)

# Split the data into a training and testing set. We'll use the training set to grid the
# data and the testing set to validate our spline model. Weights need to
# 1/uncertainty**2 for the error propagation in BlockMean to work.
train, test = vd.train_test_split(
    projection(*coordinates),
    data.velocity_up,
    weights=1 / data.std_up ** 2,
    random_state=0,
)
# Fit the model on the training set
chain.fit(*train)
コード例 #6
0
    c=data.bathymetry_m,
    s=0.1,
    transform=ccrs.PlateCarree(),
)
plt.colorbar().set_label("meters")
vd.datasets.setup_baja_bathymetry_map(ax)
plt.show()

########################################################################################
# We'll create a chain that applies a blocked median to the data, fits a polynomial
# trend, and then fits a standard gridder to the trend residuals.

chain = vd.Chain([
    ("reduce", vd.BlockReduce(np.median, spacing * 111e3)),
    ("trend", vd.Trend(degree=1)),
    ("spline", vd.Spline()),
])
print(chain)

########################################################################################
# Calling :meth:`verde.Chain.fit` will automatically run the data through the chain:
#
# #. Apply the blocked median to the input data
# #. Fit a trend to the blocked data and output the residuals
# #. Fit the spline to the trend residuals

chain.fit(proj_coords, data.bathymetry_m)

########################################################################################
# Now that the data has been through the chain, calling :meth:`verde.Chain.predict` will
# sum the results of every step in the chain that has a ``predict`` method. In our case,
コード例 #7
0
data = vd.datasets.fetch_texas_wind()

# Use Mercator projection because Spline is a Cartesian gridder
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())
proj_coords = projection(data.longitude.values, data.latitude.values)

region = vd.get_region((data.longitude, data.latitude))
# The desired grid spacing in degrees (converted to meters using 1 degree approx. 111km)
spacing = 15 / 60

########################################################################################
# Before we begin tuning, let's reiterate what the results were with the default
# parameters.

spline_default = vd.Spline()
score_default = np.mean(
    vd.cross_val_score(spline_default, proj_coords, data.air_temperature_c))
spline_default.fit(proj_coords, data.air_temperature_c)
print("R² with defaults:", score_default)

########################################################################################
# Tuning
# ------
#
# :class:`~verde.Spline` has many parameters that can be set to modify the final result.
# Mainly the ``damping`` regularization parameter and the ``mindist`` "fudge factor"
# which smooths the solution. Would changing the default values give us a better score?
#
# We can answer these questions by changing the values in our ``spline`` and
# re-evaluating the model score repeatedly for different values of these parameters.
コード例 #8
0
data = vd.datasets.fetch_texas_wind()
coordinates = (data.longitude.values, data.latitude.values)
region = vd.get_region(coordinates)

# Use a Mercator projection for our Cartesian gridder
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())

# The output grid spacing will 15 arc-minutes
spacing = 15 / 60

# Now we can chain a blocked mean and spline together. The Spline can be regularized
# by setting the damping coefficient (should be positive). It's also a good idea to set
# the minimum distance to the average data spacing to avoid singularities in the spline.
chain = vd.Chain([
    ("mean", vd.BlockReduce(np.mean, spacing=spacing * 111e3)),
    ("spline", vd.Spline(damping=1e-10, mindist=100e3)),
])
print(chain)

# We can evaluate model performance by splitting the data into a training and testing
# set. We'll use the training set to grid the data and the testing set to validate our
# spline model.
train, test = vd.train_test_split(projection(*coordinates),
                                  data.air_temperature_c,
                                  random_state=0)

# Fit the model on the training set
chain.fit(*train)

# And calculate an R^2 score coefficient on the testing set. The best possible score
# (perfect prediction) is 1. This can tell us how good our spline is at predicting data
コード例 #9
0
ファイル: model_evaluation.py プロジェクト: jcrawford/verde
# Let's plot these two datasets with different colors:

plt.figure(figsize=(8, 6))
ax = plt.axes()
ax.set_title("Air temperature measurements for Texas")
ax.plot(train[0][0], train[0][1], ".r", label="train")
ax.plot(test[0][0], test[0][1], ".b", label="test")
ax.legend()
ax.set_aspect("equal")
plt.show()

########################################################################################
# We can pass the training dataset to the :meth:`~verde.base.BaseGridder.fit` method of
# most gridders using Python's argument expansion using the ``*`` symbol.

spline = vd.Spline()
spline.fit(*train)

########################################################################################
# Let's plot the gridded result to see what it looks like. First, we'll create a
# geographic grid:
grid = spline.grid(
    region=region,
    spacing=spacing,
    projection=projection,
    dims=["latitude", "longitude"],
    data_names=["temperature"],
)
print(grid)

########################################################################################
コード例 #10
0
ファイル: vectors.py プロジェクト: rowanc1/verde
    random_state=1,
)

########################################################################################
# Now we can make a 2-component spline. Since :class:`verde.Vector` implements
# ``fit``, ``predict``, and ``filter``, we can use it in a :class:`verde.Chain` to build
# a pipeline.
#
# We need to use a bit of damping so that the weights can be taken into account. Splines
# without damping provide a perfect fit to the data and ignore the weights as a
# consequence.

chain = vd.Chain([
    ("mean", vd.BlockMean(spacing=spacing * 111e3, uncertainty=True)),
    ("trend", vd.Vector([vd.Trend(1), vd.Trend(1)])),
    ("spline", vd.Vector([vd.Spline(damping=1e-10),
                          vd.Spline(damping=1e-10)])),
])
print(chain)

########################################################################################
#
# .. warning::
#
#     Never generate the component gridders with ``[vd.Spline()]*2``. This will result
#     in each component being a represented by **the same Spline object**, causing
#     problems when trying to fit it to different components.
#
# Fitting the spline and gridding is exactly the same as what we've done before.

chain.fit(*train)
コード例 #11
0
            # Convert to float
            df = df.astype(np.float64)

            # Use Mercator projection because Spline is a Cartesian
            # gridder
            projection = pyproj.Proj(proj="merc", lat_ts=df.latitude.mean())
            proj_coords = projection(df.longitude.values, df.latitude.values)
            region = vd.get_region((df.longitude, df.latitude))

            # The desired grid spacing in degrees
            # (converted to meters using 1 degree approx. 111km)
            spacing = 1

            # Loop over the combinations and collect
            # the scores for each parameter set
            spline = vd.Spline(mindist=5e3, damping=1e-4)
            spline.fit(proj_coords, df[parameter])

            # Cross-validated gridder
            grid = spline.grid(
                region=region,
                spacing=spacing,
                projection=projection,
                dims=["lat", "lon"],
                data_names="value",
            )

            # Mask grid points that are too far from the given data
            # points
            mask = vd.distance_mask(
                (df.longitude, df.latitude),
コード例 #12
0
plt.ylabel("Northing (m)")
plt.gca().set_aspect("equal")
plt.tight_layout()
plt.show()

########################################################################################
# Cartesian grids
# ---------------
#
# Now we can use :class:`verde.BlockReduce` and :class:`verde.Spline` on our projected
# coordinates. We'll specify the desired grid spacing as degrees and convert it to
# Cartesian using the 1 degree approx. 111 km rule-of-thumb.
spacing = 10 / 60
reducer = vd.BlockReduce(np.median, spacing=spacing * 111e3)
filter_coords, filter_bathy = reducer.filter(proj_coords, data.bathymetry_m)
spline = vd.Spline().fit(filter_coords, filter_bathy)

########################################################################################
# If we now call :meth:`verde.Spline.grid` we'll get back a grid evenly spaced in
# projected Cartesian coordinates.
grid = spline.grid(spacing=spacing * 111e3, data_names="bathymetry")
print("Cartesian grid:")
print(grid)

########################################################################################
# We'll mask our grid using :func:`verde.distance_mask` to get rid of all the spurious
# solutions far away from the data points.
grid = vd.distance_mask(proj_coords, maxdist=30e3, grid=grid)

plt.figure(figsize=(7, 6))
plt.title("Gridded bathymetry in Cartesian coordinates")
コード例 #13
0
import pyproj
import verde as vd
import numpy as np
import matplotlib.pyplot as plt

print("Verde version:", vd.version.full_version)

data = vd.datasets.fetch_baja_bathymetry()
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())
proj_coords = projection(data.longitude.values, data.latitude.values)

spacing = 10 / 60
interp = vd.Chain([
    ("median", vd.BlockReduce(np.median, spacing=spacing * 111e3)),
    ("spline", vd.Spline(mindist=10e3, damping=1e-5)),
])
interp.fit(proj_coords, data.bathymetry_m)

grid = interp.grid(spacing=spacing * 111e3, data_names=["bathymetry"])
grid = vd.distance_mask(proj_coords, maxdist=30e3, grid=grid)

fig, ax = plt.subplots(1, 1, figsize=(7, 6))
pc = grid.bathymetry.plot.pcolormesh(ax=ax,
                                     cmap="viridis",
                                     vmax=0,
                                     add_colorbar=False)
plt.colorbar(pc, pad=0, ax=ax, aspect=40).set_label("bathymetry (m)")
ax.set_xlabel("Easting (m)")
ax.set_ylabel("Northing (m)")
ax.set_title("Gridded bathymetry")
ax.set_aspect("equal")
コード例 #14
0
ファイル: chain_trend.py プロジェクト: zhaobin74/verde
# Load the Rio de Janeiro total field magnetic anomaly data
data = vd.datasets.fetch_rio_magnetic()
region = vd.get_region((data.longitude, data.latitude))

# Create a projection for the data using pyproj so that we can use it as input for the
# gridder. We'll set the latitude of true scale to the mean latitude of the data.
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())

# Create a chain that fits a 2nd degree trend, decimates the residuals using a blocked
# mean to avoid aliasing, and then fits a standard gridder to the residuals. The spacing
# for the blocked mean will be 0.5 arc-minutes (approximately converted to meters).
spacing = 0.5 / 60
chain = vd.Chain([
    ("trend", vd.Trend(degree=2)),
    ("reduce", vd.BlockReduce(np.mean, spacing * 111e3)),
    ("spline", vd.Spline(damping=1e-8)),
])
print("Chained estimator:", chain)
# Calling 'fit' will automatically run the data through the chain
chain.fit(projection(data.longitude.values, data.latitude.values),
          data.total_field_anomaly_nt)

# Each component of the chain can be accessed separately using the 'named_steps'
# attribute
grid_trend = chain.named_steps["trend"].grid()
print("\nTrend grid:")
print(grid_trend)

grid_residual = chain.named_steps["spline"].grid()
print("\nResidual grid:")
print(grid_residual)