def test_block_averaged_sources(coordinates):
    """
    Check if block_averaged_sources block average coordinates
    """
    spacing = 4000
    depth = 100
    depth_factor = 1
    k_nearest = 3
    parameters = {
        "constant_depth": {},
        "relative_depth": {},
        "variable_depth": {"depth_factor": depth_factor, "k_nearest": k_nearest},
    }
    for depth_type, params in parameters.items():
        points = block_averaged_sources(
            coordinates, depth_type=depth_type, spacing=spacing, depth=depth, **params
        )
        # Check if there's one source per block
        # We do so by checking if every averaged coordinate is close enough to
        # the center of the block
        block_coords, labels = vd.block_split(
            points, spacing=spacing, region=vd.get_region(coordinates)
        )
        npt.assert_allclose(points[0], block_coords[0][labels], atol=spacing / 2)
        npt.assert_allclose(points[1], block_coords[1][labels], atol=spacing / 2)
Beispiel #2
0
def grid_sources(coordinates, spacing=None, depth=None, **kwargs):
    """
    Create a regular grid of point sources

    All point sources will be located at the same depth, equal to the difference between
    the minimum elevation of observation points and the ``depth`` argument.

    Parameters
    ----------
    coordinates : tuple of arrays
        Tuple containing the coordinates of the observation points in the following
        order: (``easting``, ``northing``, ``upward``).
    spacing : float, tuple = (s_north, s_east)
        The block size in the South-North and West-East directions, respectively.
        A single value means that the size is equal in both directions.
    depth : float
        Depth shift used to compute the constant depth at which point sources will be
        located.

    Returns
    -------
    points : tuple of arrays
        Tuple containing the coordinates of the source points in the following order:
        (``easting``, ``northing``, ``upward``).
    """
    # Generate grid sources
    region = get_region(coordinates)
    easting, northing = grid_coordinates(region=region, spacing=spacing)
    upward = np.full_like(easting, coordinates[2].min()) - depth
    return easting, northing, upward
Beispiel #3
0
def get_best_prediction(coordinates, data, target, layout, parameters_set):
    """
    Score interpolations with different parameters and get the best prediction

    Performs several predictions using the same source layout (but with different
    parameters) and score each of them against the target grid through RMS.
    """
    # Get shape, region and height of the target grid
    region = vd.get_region((target.easting.values, target.northing.values))
    shape = target.shape
    height = target.height
    # Score the predictions (with RMS)
    rms = []
    for parameters in parameters_set:
        prediction, _ = grid_data(coordinates, data, region, shape, height,
                                  layout, parameters)
        # Score the prediction against target data
        rms.append(
            np.sqrt(mean_squared_error(target.values, prediction.values)))
    # Get best prediction
    best = np.nanargmin(rms)
    best_parameters = parameters_set[best]
    best_rms = rms[best]
    best_prediction, points = grid_data(coordinates, data, region, shape,
                                        height, layout, best_parameters)
    # Convert parameters and RMS to a pandas.DataFrame
    parameters_and_rms = pd.DataFrame.from_dict(parameters_set)
    parameters_and_rms["rms"] = rms
    # Add RMS and number of sources to the grid attributes
    best_prediction.attrs["rms"] = best_rms
    best_prediction.attrs["n_points"] = points[0].size
    return best_prediction, parameters_and_rms
Beispiel #4
0
def plot_grid(ax,
              coordinates,
              linestyles="dotted",
              region=None,
              pad=50,
              **kwargs):
    "Plot the grid coordinates as dots and lines."
    data_region = vd.get_region(coordinates)
    ax.vlines(
        coordinates[0][0],
        ymin=data_region[2],
        ymax=data_region[3],
        linestyles=linestyles,
        zorder=0,
    )
    ax.hlines(
        coordinates[1][:, 1],
        xmin=data_region[0],
        xmax=data_region[1],
        linestyles=linestyles,
        zorder=0,
    )
    ax.scatter(*coordinates, **kwargs)
    if pad:
        padded = vd.pad_region(region, pad=pad)
        plt.xlim(padded[:2])
        plt.ylim(padded[2:])
Beispiel #5
0
    def fit(self, coordinates, data, weights=None):
        """
        Fit the gridder to the given 3-component vector data.

        The data region is captured and used as default for the
        :meth:`~verde.Vector3D.grid` and :meth:`~verde.Vector3D.scatter`
        methods.

        All input arrays must have the same shape.

        Parameters
        ----------
        coordinates : tuple of arrays
            Arrays with the coordinates of each data point. Should be in the
            following order: (easting, northing, vertical, ...). Only easting
            and northing will be used, all subsequent coordinates will be
            ignored.
        data : tuple of array
            A tuple ``(east_component, north_component, up_component)`` of
            arrays with the vector data values at each point.
        weights : None or tuple array
            If not None, then the weights assigned to each data point. Must be
            one array per data component. Typically, this should be 1 over the
            data uncertainty squared.

        Returns
        -------
        self
            Returns this estimator instance for chaining operations.

        """
        coordinates, data, weights = check_fit_input(coordinates,
                                                     data,
                                                     weights,
                                                     unpack=False)
        if len(data) != 3:
            raise ValueError(
                "Need three data components. Only {} given.".format(len(data)))
        # Capture the data region to use as a default when gridding.
        self.region_ = get_region(coordinates[:2])
        self.force_coords_ = self._get_force_coordinates(coordinates)
        if any(w is not None for w in weights):
            weights = np.concatenate([i.ravel() for i in weights])
        else:
            weights = None
        self._check_weighted_exact_solution(weights)
        if self.flip_vertical:
            data = list(data)
            data[-1] *= -data[-1]
            data = tuple(data)
        data = np.concatenate([i.ravel() for i in data])
        jacobian = vector3d_jacobian(coordinates[:2],
                                     self.force_coords_,
                                     self.poisson,
                                     self.depth,
                                     fudge=self.fudge)
        self.force_ = self._estimate_forces(jacobian, data, weights)
        return self
 def _create_rolling_windows(self, coordinates):
     """
     Create indices of sources and data points for each rolling window
     """
     # Compute window spacing based on overlapping
     window_spacing = self.window_size * (1 - self.overlapping)
     # Get the largest region between data points and sources
     data_region = vd.get_region(coordinates)
     sources_region = vd.get_region(self.points_)
     region = (
         min(data_region[0], sources_region[0]),
         max(data_region[1], sources_region[1]),
         min(data_region[2], sources_region[2]),
         max(data_region[3], sources_region[3]),
     )
     # The windows for sources and data points are the same, but the
     # verde.rolling_window function creates indices for the given
     # coordinates. That's why we need to create two set of window indices:
     # one for the sources and one for the data points.
     # We pass the same region, size and spacing to be sure that both set of
     # windows are the same.
     _, source_windows = vd.rolling_window(self.points_,
                                           region=region,
                                           size=self.window_size,
                                           spacing=window_spacing)
     _, data_windows = vd.rolling_window(coordinates,
                                         region=region,
                                         size=self.window_size,
                                         spacing=window_spacing)
     # Ravel the indices
     source_windows = [i[0] for i in source_windows.ravel()]
     data_windows = [i[0] for i in data_windows.ravel()]
     # Shuffle windows
     if self.shuffle:
         source_windows, data_windows = shuffle(
             source_windows, data_windows, random_state=self.random_state)
     # Remove empty windows
     source_windows_nonempty = []
     data_windows_nonempty = []
     for src, data in zip(source_windows, data_windows):
         if src.size > 0 and data.size > 0:
             source_windows_nonempty.append(src)
             data_windows_nonempty.append(data)
     return source_windows_nonempty, data_windows_nonempty
def test_grid_sources(coordinates):
    """
    Check if grid_sources creates a regular grid of sources
    """
    depth = 100
    spacing = 4000
    points = grid_sources(coordinates, spacing=spacing, depth=depth)
    grid = vd.grid_coordinates(vd.get_region(coordinates), spacing=spacing)
    npt.assert_allclose(points[0], grid[0])
    npt.assert_allclose(points[1], grid[1])
    npt.assert_allclose(points[2], coordinates[2].min() - depth)
Beispiel #8
0
def test_vector2d_forces(data2d):
    "See if the exact solution works when providing forces"
    coords, data = data2d
    force_coords = vd.scatter_points(vd.get_region(coords), size=500, random_state=1)
    spline = Elastic2D(force_coords=force_coords).fit(coords, data)
    npt.assert_allclose(spline.score(coords, data), 1)
    npt.assert_allclose(spline.predict(coords), data, rtol=1e-2, atol=1)
    # There should be 1 force per data point
    assert spline.force_coords[0].size == 500
    assert spline.force_.size == 2 * 500
    npt.assert_allclose(spline.force_coords, n_1d_arrays(force_coords, n=2))
Beispiel #9
0
    def fit(self, coordinates, data, weights=None):
        """
        Fit the coefficients of the equivalent sources.

        The fitting process is carried out through the gradient-boosting
        algorithm.
        The data region is captured and used as default for the
        :meth:`~harmonica.EquivalentSourcesGB.grid` method.

        All input arrays must have the same shape.

        Parameters
        ----------
        coordinates : tuple of arrays
            Arrays with the coordinates of each data point. Should be in the
            following order: (``easting``, ``northing``, ``upward``, ...).
            Only ``easting``, ``northing``, and ``upward`` will be used, all
            subsequent coordinates will be ignored.
        data : array
            The data values of each data point.
        weights : None or array
            If not None, then the weights assigned to each data point.
            Typically, this should be 1 over the data uncertainty squared.

        Returns
        -------
        self
            Returns this estimator instance for chaining operations.
        """
        coordinates, data, weights = vdb.check_fit_input(coordinates, data, weights)
        coordinates, data, weights = cast_fit_input(
            coordinates, data, weights, self.dtype
        )
        # Capture the data region to use as a default when gridding.
        self.region_ = get_region(coordinates[:2])
        # Ravel coordinates, data and weights to 1d-arrays
        coordinates = vdb.n_1d_arrays(coordinates, 3)
        data = data.ravel()
        if weights is not None:
            weights = weights.ravel()
        # Build point sources
        if self.points is None:
            self.points_ = self._build_points(coordinates)
        else:
            self.points_ = tuple(
                p.astype(self.dtype) for p in vdb.n_1d_arrays(self.points, 3)
            )
        # Initialize coefficients
        self.coefs_ = np.zeros_like(self.points_[0])
        # Fit coefficients through gradient boosting
        self._gradient_boosting(coordinates, data, weights)
        return self
Beispiel #10
0
    def fit(self, coordinates, data, weights=None):
        """
        Fit the gridder to the given 2-component vector data.

        The data region is captured and used as default for the
        :meth:`~erizo.Elastic2D.grid` and :meth:`~erizo.Elastic2D.scatter`
        methods.

        All input arrays must have the same shape.

        Parameters
        ----------
        coordinates : tuple of arrays
            Arrays with the coordinates of each data point. Should be in the
            following order: (easting, northing, vertical, ...). Only easting
            and northing will be used, all subsequent coordinates will be
            ignored.
        data : tuple of array
            A tuple ``(east_component, north_component)`` of arrays with the
            vector data values at each point.
        weights : None or tuple array
            If not None, then the weights assigned to each data point. Must be
            one array per data component. Typically, this should be 1 over the
            data uncertainty squared.

        Returns
        -------
        self
            Returns this estimator instance for chaining operations.

        """
        coordinates, data, weights = check_fit_input(coordinates,
                                                     data,
                                                     weights,
                                                     unpack=False)
        if len(data) != 2:
            raise ValueError("Need two data components. Only {} given.".format(
                len(data)))
        # Capture the data region to use as a default when gridding.
        self.region_ = get_region(coordinates[:2])
        if any(w is not None for w in weights):
            weights = np.concatenate([i.ravel() for i in weights])
        else:
            weights = None
        warn_weighted_exact_solution(self, weights)
        data = np.concatenate([i.ravel() for i in data])
        if self.force_coords is None:
            self.force_coords = tuple(i.copy()
                                      for i in n_1d_arrays(coordinates, n=2))
        jacobian = self.jacobian(coordinates[:2], self.force_coords)
        self.force_ = least_squares(jacobian, data, weights, self.damping)
        return self
Beispiel #11
0
def _get_region_data_sources(coordinates, points):
    """
    Return the region that contains every observation and every source

    Parameters
    ----------
    coordinates : tuple
    points : tuple

    Returns
    -------
    region : tuple
    """
    data_region = get_region(coordinates)
    sources_region = get_region(points)
    region = (
        min(data_region[0], sources_region[0]),
        max(data_region[1], sources_region[1]),
        min(data_region[2], sources_region[2]),
        max(data_region[3], sources_region[3]),
    )
    return region
Beispiel #12
0
    def fit(self, coordinates, data, weights=None):
        """
        Fit the coefficients of the equivalent layer.

        The data region is captured and used as default for the
        :meth:`~harmonica.EQLHarmonic.grid` and
        :meth:`~harmonica.EQLHarmonic.scatter` methods.

        All input arrays must have the same shape.

        Parameters
        ----------
        coordinates : tuple of arrays
            Arrays with the coordinates of each data point. Should be in the
            following order: (``easting``, ``northing``, ``upward``, ...).
            Only ``easting``, ``northing``, and ``upward`` will be used, all
            subsequent coordinates will be ignored.
        data : array
            The data values of each data point.
        weights : None or array
            If not None, then the weights assigned to each data point.
            Typically, this should be 1 over the data uncertainty squared.

        Returns
        -------
        self
            Returns this estimator instance for chaining operations.
        """
        coordinates, data, weights = vdb.check_fit_input(
            coordinates, data, weights)
        # Capture the data region to use as a default when gridding.
        self.region_ = vd.get_region(coordinates[:2])
        coordinates = vdb.n_1d_arrays(coordinates, 3)
        if self.points is None:
            self.points_ = (
                coordinates[0],
                coordinates[1],
                coordinates[2] - self.relative_depth,
            )
        else:
            self.points_ = vdb.n_1d_arrays(self.points, 3)
        jacobian = self.jacobian(coordinates, self.points_)
        self.coefs_ = vdb.least_squares(jacobian, data, weights, self.damping)
        return self
Beispiel #13
0
def _cut_and_scale(survey, region, data_region):
    """
    Cut a subsection from the original survey and scale it to the given region

    Parameters
    ----------
    survey : :class:`pandas.DataFrame`
        Original survey as a :class:`pandas.DataFrame` containing the following
        columns: ``longitude``, ``latitude`` and ``height``.
    region : tuple or list (optional)
        Region to which the survey points coordinates will be scaled. The
        boundaries must be passed in the following order: (``east``, ``west``,
        ``south``, ``north``, ...), defined on a geodetic coordinate system and
        in degrees. All subsequent boundaries will be ignored. If ``None``, the
        survey points won't be scaled.
    data_region : tuple or list (optional)
        Region where the original Great Britain magnetic dataset will be
        sampled. The boundaries must be passed in the following order:
        (``east``, ``west``, ``south``, ``north``, ...), defined on a geodetic
        coordinate system and in degrees. All subsequent boundaries will be
        ignored.

    Returns
    -------
    survey : :class:`pandas.DataFrame`
        Dataframe containing the coordinates of the observation points on
        a geodetic coordinate system. Longitudes and latitudes are in degrees,
        and heights in meters.
    """
    # Cut the data into the data_region
    inside_points = inside((survey.longitude, survey.latitude), data_region)
    survey = survey[inside_points].copy()
    # Scale survey coordinates to the passed region
    if region is not None:
        w, e, s, n = region[:4]
        longitude_min, longitude_max, latitude_min, latitude_max = get_region(
            (survey.longitude, survey.latitude))
        survey["longitude"] = (e - w) / (longitude_max - longitude_min) * (
            survey.longitude - longitude_min) + w
        survey["latitude"] = (n - s) / (latitude_max - latitude_min) * (
            survey.latitude - latitude_min) + s
    return survey
Beispiel #14
0
processing pipeline.
"""
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import numpy as np
import pyproj
import verde as vd


# Fetch the wind speed data from Texas.
data = vd.datasets.fetch_texas_wind()
print(data.head())

# Separate out some of the data into utility variables
coordinates = (data.longitude.values, data.latitude.values)
region = vd.get_region(coordinates)
# Use a Mercator projection because Spline is a Cartesian gridder
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())

# Split the data into a training and testing set. We'll fit the gridder on the training
# set and use the testing set to evaluate how well the gridder is performing.
train, test = vd.train_test_split(
    projection(*coordinates),
    (data.wind_speed_east_knots, data.wind_speed_north_knots),
    random_state=2,
)

# We'll make a 20 arc-minute grid
spacing = 20 / 60

# Chain together a blocked mean to avoid aliasing, a polynomial trend (Spline usually
Beispiel #15
0
sample air temperature data.
"""
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import itertools
import pyproj
import verde as vd

data = vd.datasets.fetch_texas_wind()

# Use Mercator projection because Spline is a Cartesian gridder
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())
proj_coords = projection(data.longitude.values, data.latitude.values)

region = vd.get_region((data.longitude, data.latitude))
# The desired grid spacing in degrees (converted to meters using 1 degree approx. 111km)
spacing = 15 / 60

########################################################################################
# Splitting the data
# ------------------
#
# We can't evaluate a gridder on the data that went into fitting it. The true test of a
# model is if it can correctly predict data that it hasn't seen before. scikit-learn has
# the :func:`sklearn.model_selection.train_test_split` function to separate a dataset
# into two parts: one for fitting the model (called *training* data) and a separate one
# for evaluating the model (called *testing* data). Using it with spatial data would
# involve some tedious array conversions so Verde implements
# :func:`verde.train_test_split` which does the same thing but takes coordinates and
# data arrays instead.
Beispiel #16
0
    properties={"density": density},
)

# Compute gravity field on a regular grid located at 4000m above the ellipsoid
coordinates = vd.grid_coordinates(region=(12, 33, -35, -18),
                                  spacing=0.2,
                                  extra_coords=4000)
easting, northing = projection(*coordinates[:2])
coordinates_projected = (easting, northing, coordinates[-1])
prisms_gravity = prisms.prism_layer.gravity(coordinates_projected, field="g_z")

# Make a plot of the computed gravity
plt.figure(figsize=(8, 8))
ax = plt.axes(projection=ccrs.Mercator())
maxabs = vd.maxabs(prisms_gravity)
tmp = ax.pcolormesh(*coordinates[:2],
                    prisms_gravity,
                    vmin=-maxabs,
                    vmax=maxabs,
                    cmap="RdBu_r",
                    transform=ccrs.PlateCarree())
ax.set_extent(vd.get_region(coordinates), crs=ccrs.PlateCarree())
plt.title("Gravitational acceleration of the topography")
plt.colorbar(tmp,
             label="mGal",
             orientation="horizontal",
             shrink=0.93,
             pad=0.01,
             aspect=50)
plt.show()
Beispiel #17
0
def plot_data(data,
              fname,
              every=1,
              maxabs=3,
              pad=None,
              scale=300,
              s=20,
              key=30,
              cmap='seismic',
              coords=None):
    """
    Plot the 3 data components in 2 maps.
    """
    fig, axes = plt.subplots(1,
                             2,
                             figsize=(13.1, 7.5),
                             subplot_kw=dict(projection=ccrs.Mercator()))
    crs = ccrs.PlateCarree()
    # Plot the horizontal components
    ax = axes[0]
    if data.east_velocity.ndim == 1:
        east = data.east_velocity.values[::every]
        north = data.north_velocity.values[::every]
    else:
        east = data.east_velocity.values[::every, ::every]
        north = data.north_velocity.values[::every, ::every]
    tmp = ax.quiver(data.longitude.values[::every],
                    data.latitude.values[::every],
                    east,
                    north,
                    scale=scale,
                    width=0.0015,
                    transform=crs)
    ax.set_title('Horizontal velocity')
    # Plot the vertical component
    ax = axes[1]
    if data.up_velocity.ndim == 1:
        pc = ax.scatter(data.longitude,
                        data.latitude,
                        c=data.up_velocity,
                        s=s,
                        cmap=cmap,
                        vmin=-maxabs,
                        vmax=maxabs,
                        transform=crs)
    else:
        pc = ax.pcolormesh(data.longitude,
                           data.latitude,
                           data.up_velocity,
                           cmap=cmap,
                           vmin=-maxabs,
                           vmax=maxabs,
                           transform=crs)
        ax.coastlines()
    plt.colorbar(pc, ax=ax, pad=0, aspect=50).set_label('mm/yr')
    ax.set_title('Vertical velocity')
    ax.quiverkey(tmp,
                 0.60,
                 0.10,
                 key,
                 label='{} mm/yr'.format(key),
                 coordinates='figure')
    if coords is not None:
        ax.plot(*coords, '.k', markersize=1, transform=crs)
    # Setup the axis labels and ticks
    region = vd.get_region((data.longitude, data.latitude))
    if pad is not None:
        region = vd.pad_region(region, pad)
    for ax in axes:
        ax.add_feature(cfeature.LAND, facecolor='gray')
        ax.add_feature(cfeature.OCEAN)
        ax.set_extent(region, crs=crs)
        # Setup the map ticks
        ax.set_xticks(np.arange(-123, -115, 2), crs=crs)
        ax.xaxis.set_major_formatter(LongitudeFormatter())
    axes[0].set_yticks(np.arange(34, 42, 2), crs=crs)
    axes[0].yaxis.set_major_formatter(LatitudeFormatter())
    plt.tight_layout(w_pad=0, pad=0)
    fig.savefig('../../images/{}.png'.format(fname), transparent=True, dpi=300)
    return fig, axes
Beispiel #18
0
more information.
"""
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import verde as vd
import harmonica as hm

# Fetch the data in a pandas.DataFrame
data = hm.datasets.fetch_south_africa_gravity()
print(data)

# Plot the observations in a Mercator map using Cartopy
fig = plt.figure(figsize=(6.5, 5))
ax = plt.axes(projection=ccrs.Mercator())
ax.set_title("Observed gravity data from South Africa", pad=25)
tmp = ax.scatter(
    data.longitude,
    data.latitude,
    c=data.gravity,
    s=0.8,
    cmap="viridis",
    transform=ccrs.PlateCarree(),
)
plt.colorbar(
    tmp, ax=ax, label="observed gravity [mGal]", aspect=50, pad=0.1, shrink=0.92
)
ax.set_extent(vd.get_region((data.longitude, data.latitude)))
ax.gridlines(draw_labels=True)
ax.coastlines()
plt.show()
Beispiel #19
0
    def fit(self, coordinates, data, weights=None):
        """
        Fit the gridder to the given 3-component vector data.

        The data region is captured and used as default for the
        :meth:`~verde.VectorSpline3D.grid` and :meth:`~verde.VectorSpline3D.scatter`
        methods.

        All input arrays must have the same shape.

        Parameters
        ----------
        coordinates : tuple of arrays
            Arrays with the coordinates of each data point. Should be in the
            following order: (easting, northing, vertical, ...). Only easting
            and northing will be used, all subsequent coordinates will be
            ignored.
        data : tuple of array
            A tuple ``(east_component, north_component, up_component)`` of
            arrays with the vector data values at each point.
        weights : None or tuple array
            If not None, then the weights assigned to each data point. Must be
            one array per data component. Typically, this should be 1 over the
            data uncertainty squared.

        Returns
        -------
        self
            Returns this estimator instance for chaining operations.

        """
        coordinates, data, weights = check_fit_input(coordinates,
                                                     data,
                                                     weights,
                                                     unpack=False)
        if len(data) != 3:
            raise ValueError(
                "Need three data components. Only {} given.".format(len(data)))
        # Capture the data region to use as a default when gridding.
        self.region_ = get_region(coordinates[:2])
        if any(w is not None for w in weights):
            weights = np.concatenate([i.ravel() for i in weights])
        else:
            weights = None
        data = np.concatenate([i.ravel() for i in data])
        if self.force_coords is None:
            self.force_coords = tuple(i.copy()
                                      for i in n_1d_arrays(coordinates, n=2))
        else:
            self.force_coords = n_1d_arrays(self.force_coords, n=2)
        if self.depth_scale is None:
            self._depth_scale = np.zeros_like(self.force_coords[0])
        elif self.depth_scale == "nearest":
            points = np.transpose(self.force_coords)
            nndist = np.median(KDTree(points).query(points, k=20)[0], axis=1)
            self._depth_scale = nndist - nndist.min()
        else:
            self._depth_scale = self.depth_scale
        jacobian = self.jacobian(coordinates[:2], self.force_coords)
        self.force_ = least_squares(jacobian, data, weights, self.damping)
        return self
    def fit(self, coordinates, data, weights=None):
        """
        Fit the coefficients of the equivalent layer.

        The source coefficients are iteratively fitted.
        A regular set of rolling windows with a 50% of overlap is defined along
        the entire data region. On each iteration, one window is randomly
        selected and then all the coefficients of the sources that fall inside
        that window are fitted using the data points that also fall inside it.
        Then the field produced by these sources is computed and removed from
        the data to obtain a residue. The next iteration follows the same way,
        randomly choosing another window, but now the fit is done on the
        residue.

        The data region is captured and used as default for the
        :meth:`~harmonica.EQLHarmonic.grid` and
        :meth:`~harmonica.EQLHarmonic.scatter` methods.

        All input arrays must have the same shape.

        Parameters
        ----------
        coordinates : tuple of arrays
            Arrays with the coordinates of each data point. Should be in the
            following order: (easting, northing, upward, ...).
            Only easting, northing, and upward will be used, all subsequent
            coordinates will be ignored.
        data : array
            The data values of each data point.
        weights : None or array
            If not None, then the weights assigned to each data point.
            Typically, this should be 1 over the data uncertainty squared.

        Returns
        -------
        self
            Returns this estimator instance for chaining operations.
        """
        coordinates, data, weights = vdb.check_fit_input(
            coordinates, data, weights)
        # Capture the data region to use as a default when gridding.
        self.region_ = vd.get_region(coordinates[:2])
        # Ravel coordinates, data and weights to 1d-arrays
        coordinates = vdb.n_1d_arrays(coordinates, 3)
        data = data.ravel()
        if weights is not None:
            weights = weights.ravel()
        # Define self.points_ if warm_start is False and gridder is not
        # already fitted
        if not self.warm_start or not hasattr(self, "coefs_"):
            if self.points is None:
                self.points_ = (
                    coordinates[0],
                    coordinates[1],
                    coordinates[2] - self.relative_depth,
                )
            else:
                self.points_ = vdb.n_1d_arrays(self.points, 3)
        # Initialize coefficients and residue arrays
        if self.warm_start and hasattr(self, "coefs_"):
            residue = data - self.predict(coordinates)
        else:
            self.coefs_ = np.zeros(self.points_[0].size)
            residue = data.copy()
        # Fit coefficients through gradient boosting
        self._gradient_boosting(coordinates, residue, weights)
        return self
Beispiel #21
0
    for parameter in PARAMETERS:
        # Create subset
        df = raw[["latitude", "longitude", parameter]]

        # Remove NaN values
        df = df[df[parameter].notna()]

        if len(df.index) > 250:
            # Convert to float
            df = df.astype(np.float64)

            # Use Mercator projection because Spline is a Cartesian
            # gridder
            projection = pyproj.Proj(proj="merc", lat_ts=df.latitude.mean())
            proj_coords = projection(df.longitude.values, df.latitude.values)
            region = vd.get_region((df.longitude, df.latitude))

            # The desired grid spacing in degrees
            # (converted to meters using 1 degree approx. 111km)
            spacing = 1

            # Loop over the combinations and collect
            # the scores for each parameter set
            spline = vd.Spline(mindist=5e3, damping=1e-4)
            spline.fit(proj_coords, df[parameter])

            # Cross-validated gridder
            grid = spline.grid(
                region=region,
                spacing=spacing,
                projection=projection,
Beispiel #22
0
# Evaluate the data fit by calculating an R² score against the observed data.
# This is a measure of how well layer the fits the data NOT how good the
# interpolation will be.
print("R² score:", eql.score(coordinates, gravity_disturbance))

# Interpolate data on a regular grid with 0.2 degrees spacing defined on
# geodetic coordinates. To do so we need to specify that we want coordinates to
# be converted to spherical geocentric coordinates before the prediction is
# carried out. This can be done though the "projection" argument.
# The interpolation requires an extra coordinate (upward height). By passing in
# 2500 m above the ellipsoid, we're effectively
# upward-continuing the data (maximum height of observation points is 2400 m).
# All the parameters passed to build the grid (region, spacing and
# extra_coords) are in geodetic coordinates.
region = vd.get_region((longitude, latitude))
grid = eql.grid(
    region=region,
    spacing=0.2,
    extra_coords=2500,
    dims=["latitude", "longitude"],
    data_names=["gravity_disturbance"],
    projection=ellipsoid.geodetic_to_spherical,
)

# Mask grid points too far from data points
grid = vd.distance_mask(data_coordinates=coordinates, maxdist=0.5, grid=grid)

# Get the maximum absolute value between the original and gridded data so we
# can use the same color scale for both plots and have 0 centered at the white
# color.