import verde as vd # We'll test this on the air temperature data from Texas data = vd.datasets.fetch_texas_wind() coordinates = (data.longitude.values, data.latitude.values) region = vd.get_region(coordinates) # Use a Mercator projection for our Cartesian gridder projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) # The output grid spacing will 15 arc-minutes spacing = 15 / 60 # This spline will automatically perform cross-validation and search for the optimal # parameter configuration. spline = vd.SplineCV(dampings=(1e-5, 1e-3, 1e-1), mindists=(10e3, 50e3, 100e3)) # Fit the model on the data. Under the hood, the class will perform K-fold # cross-validation for each the 3*3=9 parameter combinations and pick the one with the # highest R² score. spline.fit(projection(*coordinates), data.air_temperature_c) # We can show the best R² score obtained in the cross-validation print("\nScore: {:.3f}".format(spline.scores_.max())) # And then the best spline parameters that produced this high score. print("\nBest spline configuration:") print(" mindist:", spline.mindist_) print(" damping:", spline.damping_) # Now we can create a geographic grid of air temperature by providing a projection
# This type of tuning is important and should always be performed when using a new # gridder or a new dataset. However, the above implementation requires a lot of # coding. Fortunately, Verde provides convenience classes that perform the # cross-validation and tuning automatically when fitting a dataset. ######################################################################################## # Cross-validated gridders # ------------------------ # # The :class:`verde.SplineCV` class provides a cross-validated version of # :class:`verde.Spline`. It has almost the same interface but does all of the above # automatically when fitting a dataset. The only difference is that you must provide a # list of ``damping`` and ``mindist`` parameters to try instead of only a single value: spline = vd.SplineCV( dampings=dampings, mindists=mindists, ) ######################################################################################## # Calling :meth:`~verde.SplineCV.fit` will run a grid search over all parameter # combinations to find the one that maximizes the cross-validation score. spline.fit(proj_coords, data.air_temperature_c) ######################################################################################## # The estimated best damping and mindist, as well as the cross-validation # scores, are stored in class attributes: print("Highest score:", spline.scores_.max()) print("Best damping:", spline.damping_) print("Best mindist:", spline.mindist_)
# This type of tuning is important and should always be performed when using a new # gridder or a new dataset. However, the above implementation requires a lot of # coding. Fortunately, Verde provides convenience classes that perform the # cross-validation and tuning automatically when fitting a dataset. ######################################################################################## # Cross-validated gridders # ------------------------ # # The :class:`verde.SplineCV` class provides a cross-validated version of # :class:`verde.Spline`. It has almost the same interface but does all of the above # automatically when fitting a dataset. The only difference is that you must provide a # list of ``damping`` and ``mindist`` parameters to try instead of only a single value: spline = vd.SplineCV( dampings=[None, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1], mindists=[5e3, 10e3, 25e3, 50e3, 75e3, 100e3], ) spline.fit(proj_coords, data.air_temperature_c) ######################################################################################## # The estimated best damping and mindist, as well as the cross-validation scores, are # stored in class attributes: print("Highest score:", spline.scores_.max()) print("Best damping:", spline.damping_) print("Best mindist:", spline.mindist_) ######################################################################################## # Finally, we can make a grid with the best configuration to see how it compares to the # default result.