Beispiel #1
0
    def _spatial_interp(z_model: da.Array, x_model: da.Array,
                        y_model: da.Array, x_sat: np.ndarray,
                        y_sat: np.ndarray) -> np.ndarray:
        """Spatial interpolation of the SSH on the selected maps.

        Args:
            z_model (numpy.ndarray): model SSH
            x_model (numpy.ndarray): model longitude
            y_model (numpy.ndarray): model latitude
            x_sat (numpy.ndarray): satellite longitude
            y_sat (numpy.ndarray): satellite latitude

        Returns:
            numpy.ndarray: interpolated SSH in space.
        """
        mesh = pyinterp.RTree()
        mesh.packing(
            np.vstack((x_model.compute(), y_model.compute())).T,
            z_model.compute())

        z, _ = mesh.radial_basis_function(
            np.vstack((x_sat, y_sat)).T.astype("float32"),
            within=True,
            k=11,
            rbf="thin_plate",
            num_threads=1,
        )
        return z.astype("float32")
Beispiel #2
0
def spearman_1xn(
    x: da.Array,
    data: da.Array,
    value_range: Optional[Tuple[float, float]] = None,
    k: Optional[int] = None,
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Parameters
    ----------
    x : da.Array
    data : da.Array
    value_range : Optional[Tuple[float, float]] = None
    k : Optional[int] = None
    """

    _, ncols = data.shape
    data = data.compute()  # TODO: How to compute rank distributedly?

    ranks = np.empty_like(data)
    for j in range(ncols):
        ranks[:, j] = pd.Series(data[:, j]).rank()
    ranks = da.from_array(ranks)
    xrank = pd.Series(x.compute()).rank()
    xrank = da.from_array(xrank)

    return pearson_1xn(xrank, ranks, value_range, k)
Beispiel #3
0
    def _spatial_interp(
        z_model: da.Array,
        x_model: da.Array,
        y_model: da.Array,
        x_sat: np.ndarray,
        y_sat: np.ndarray,
    ) -> np.ndarray:
        """Spatial interpolation of SSH from NATL60 model.

        Args:
            z_model (da.Array): SSH model
            x_model (da.Array): longitude model
            y_model (da.Array): latitude model
            x_sat (np.ndarray): longitude satellite
            y_sat (np.ndarray): latitude satellite

        Returns:
            np.ndarray: SSH satellite
        """
        mesh = pyinterp.RTree(dtype=np.dtype("float32"))

        start_time = time.time()

        ssh = z_model.compute()
        defined = ~np.isnan(ssh)
        ssh = ssh[defined]

        lon = x_model[defined].compute()
        lat = y_model[defined].compute()

        # The tree is built and the interpolation is calculated
        coordinates = np.vstack((lon, lat)).T
        del lon, lat

        LOGGER.debug(
            "loaded %d MB in %.2fs",
            (coordinates.nbytes + ssh.nbytes) // 1024**2,
            time.time() - start_time,
        )

        start_time = time.time()
        mesh.packing(coordinates, ssh)
        del coordinates, ssh
        LOGGER.debug("mesh build in %.2fs", time.time() - start_time)

        start_time = time.time()
        z_sat, _ = mesh.radial_basis_function(
            np.vstack((x_sat, y_sat)).T.astype("float32"),
            within=True,
            k=11,
            radius=8000,
            rbf="thin_plate",
            num_threads=1,
        )
        LOGGER.debug("interpolation done in %.2fs", time.time() - start_time)
        del mesh
        return z_sat.astype("float32")
Beispiel #4
0
def spearman_nxn(data: da.Array) -> da.Array:
    """
    Spearman correlation calculation of a n x n correlation matrix for n columns
    """
    _, ncols = data.shape
    data = data.compute()  # TODO: How to compute rank distributedly?

    ranks = np.empty_like(data)
    for j in range(ncols):
        ranks[:, j] = pd.Series(data[:, j]).rank()
    ranks = da.from_array(ranks)
    corrmat = pearson_nxn(ranks)
    return corrmat
Beispiel #5
0
def calc_hist_kde(
        data: da.Array, bins: int,
        bandwidth: float) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray]:
    """
    Calculate a density histogram and its corresponding kernel density
    estimate over a given series. The kernel is guassian.

    Parameters
    ----------
    data: da.Array
        one numerical column over which to compute the histogram and kde
    bins : int
        number of bins to use in the histogram
    bandwidth: float
        bandwidth for the kde

    Returns
    -------
    Tuple[pd.DataFrame, np.ndarray, np.ndarray]
        The histogram in a dataframe, range of points for the kde,
        and the kde calculated at the specified points
    """
    minv, maxv = dask.compute(data.min(), data.max())
    hist_arr, bins_arr = da.histogram(data,
                                      range=[minv, maxv],
                                      bins=bins,
                                      density=True)
    hist_arr = hist_arr.compute()
    intervals = _format_bin_intervals(bins_arr)
    hist_df = pd.DataFrame({
        "intervals": intervals,
        "left": bins_arr[:-1],
        "right": bins_arr[1:],
        "freq": hist_arr,
    })
    pts_rng = np.linspace(minv, maxv, 1000)
    pdf = gaussian_kde(data.compute(), bw_method=bandwidth)(pts_rng)
    return hist_df, pts_rng, pdf