Beispiel #1
0
def test_blockmedian_without_outfile_setting():
    """
    Run blockmedian by not passing in outfile parameter setting.
    """
    with pytest.raises(GMTInvalidInput):
        blockmedian(table="@tut_ship.xyz",
                    spacing="5m",
                    region=[245, 255, 20, 30])
Beispiel #2
0
def test_blockmedian_wrong_kind_of_input_table_grid():
    """
    Run blockmedian using table input that is not a pandas.DataFrame or file
    but a grid
    """
    dataframe = load_sample_bathymetry()
    invalid_table = dataframe.bathymetry.to_xarray()
    assert data_kind(invalid_table) == "grid"
    with pytest.raises(GMTInvalidInput):
        blockmedian(table=invalid_table, spacing="5m", region=[245, 255, 20, 30])
Beispiel #3
0
def test_blockmedian_wrong_kind_of_input_table_matrix():
    """
    Run blockmedian using table input that is not a pandas.DataFrame but still
    a matrix
    """
    dataframe = load_sample_bathymetry()
    invalid_table = dataframe.values
    assert data_kind(invalid_table) == "matrix"
    with pytest.raises(GMTInvalidInput):
        blockmedian(table=invalid_table, spacing="5m", region=[245, 255, 20, 30])
Beispiel #4
0
def test_blockmedian_without_outfile_setting():
    """
    Run blockmedian by not passing in outfile parameter setting.
    """
    output = blockmedian(data="@tut_ship.xyz",
                         spacing="5m",
                         region=[245, 255, 20, 30])
    assert isinstance(output, pd.DataFrame)
    assert output.shape == (5849, 3)
    npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])
Beispiel #5
0
def test_blockmedian_input_table_matrix(dataframe):
    """
    Run blockmedian using table input that is not a pandas.DataFrame but still
    a matrix.
    """
    table = dataframe.values
    output = blockmedian(data=table, spacing="5m", region=[245, 255, 20, 30])
    assert isinstance(output, pd.DataFrame)
    assert output.shape == (5849, 3)
    npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])
Beispiel #6
0
def test_blockmedian_input_dataframe(dataframe):
    """
    Run blockmedian by passing in a pandas.DataFrame as input.
    """
    output = blockmedian(data=dataframe,
                         spacing="5m",
                         region=[245, 255, 20, 30])
    assert isinstance(output, pd.DataFrame)
    assert all(dataframe.columns == output.columns)
    assert output.shape == (5849, 3)
    npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])
Beispiel #7
0
def test_blockmedian_input_dataframe():
    """
    Run blockmedian by passing in a pandas.DataFrame as input
    """
    dataframe = load_sample_bathymetry()
    output = blockmedian(table=dataframe, spacing="5m", region=[245, 255, 20, 30])
    assert isinstance(output, pd.DataFrame)
    assert all(dataframe.columns == output.columns)
    assert output.shape == (5849, 3)
    npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])

    return output
Beispiel #8
0
def test_blockmedian_input_xyz(dataframe):
    """
    Run blockmedian by passing in x/y/z as input.
    """
    output = blockmedian(
        x=dataframe.longitude,
        y=dataframe.latitude,
        z=dataframe.bathymetry,
        spacing="5m",
        region=[245, 255, 20, 30],
    )
    assert isinstance(output, pd.DataFrame)
    assert output.shape == (5849, 3)
    npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])
Beispiel #9
0
def test_blockmedian_input_filename():
    """
    Run blockmedian by passing in an ASCII text file as input.
    """
    with GMTTempFile() as tmpfile:
        output = blockmedian(
            data="@tut_ship.xyz",
            spacing="5m",
            region=[245, 255, 20, 30],
            outfile=tmpfile.name,
        )
        assert output is None  # check that output is None since outfile is set
        assert os.path.exists(
            path=tmpfile.name)  # check that outfile exists at path
        output = pd.read_csv(tmpfile.name, sep="\t", header=None)
        assert output.shape == (5849, 3)
        npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])
Beispiel #10
0
def spatiotemporal_cube(
    table: pd.DataFrame,
    placename: str = "",
    x_var: str = "x",
    y_var: str = "y",
    z_var: str = "h_corr",
    spacing: int = 250,
    clip_limits: bool = True,
    cycles: list = None,
    projection:
    str = "+proj=stere +lat_0=-90 +lat_ts=-71 +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs",
    folder: str = "",
) -> xr.Dataset:
    """
    Interpolates a time-series point cloud into an xarray.Dataset data cube.
    Uses `pygmt`'s blockmedian and surface algorithms to produce individual
    NetCDF grids, and `xarray` to stack each NetCDF grid into one dataset.

    Steps are as follows:

    1. Create several xarray.DataArray grid surfaces from a table of points,
       one for each time cycle.
    2. Stacked the grids along a time cycle axis into a xarray.Dataset which is
       a spatiotemporal data cube with 'x', 'y' and 'cycle_number' dimensions.

                             _1__2__3_
            *   *           /  /  /  /|
         *   *             /  /  /  / |
       *   *    *         /__/__/__/  |  y
    *    *   *      -->   |  |  |  |  |
      *    *   *          |  |  |  | /
        *    *            |__|__|__|/  x
                             cycle

    Parameters
    ----------
    table : pandas.DataFrame
        A table containing the ICESat-2 track data from multiple cycles. It
        should ideally have geographical columns called 'x', 'y', and attribute
        columns like 'h_corr_1', 'h_corr_2', etc for each cycle time.
    placename : str
        Optional. A descriptive placename for the data (e.g. some_ice_stream),
        to be used in the temporary NetCDF filename.
    x_var : str
        The x coordinate column name to use from the table data. Default is
        'x'.
    y_var : str
        The y coordinate column name to use from the table data. Default is
        'y'.
    z_var : str
        The z column name to use from the table data. This will be the
        attribute that the surface algorithm will run on. Default is 'h_corr'.
    spacing : float or str
        The spatial resolution of the resulting grid, provided as a number or
        as 'dx/dy' increments. This is passed on to `pygmt.blockmedian` and
        `pygmt.surface`. Default is 250 (metres).
    clip_limits : bool
        Whether or not to clip the output grid surface to ± 3 times the median
        absolute deviation of the data table's z-values. Useful for handling
        outlier values in the data table. Default is True (will clip).
    cycles : list
        The cycle numbers to run the gridding algorithm on, e.g. [3, 4] will
        use columns 'h_corr_3' and 'h_corr_4'. Default is None which will
        automatically determine the cycles for a given z_var.
    projection : str
        The proj4 string to store in the NetCDF output, will be passed directly
        to `pygmt.surface`'s J (projection) argument. Default is '+proj=stere
        +lat_0=-90 +lat_ts=-71 +lon_0=0 +k=1 +x_0=0 +y_0=0 +datum=WGS84
        +units=m +no_defs', i.e. Antarctic Polar Stereographic EPSG:3031.
    folder : str
        The folder to keep the intermediate NetCDF file in. Default is to place
        the files in the current working directory.

    Returns
    -------
    cube : xarray.Dataset
        A 3-dimensional data cube made of digital surfaces stacked along a time
        cycle axis.

    """
    import pygmt
    import tqdm

    # Determine grid's bounding box region (xmin, xmax, ymin, ymax)
    grid_region: np.ndarray = pygmt.info(table=table[[x_var, y_var]],
                                         spacing=f"s{spacing}")

    # Automatically determine list of cycles if None is given
    if cycles is None:
        cycles: list = [
            int(col[len(z_var) + 1:]) for col in table.columns
            if col.startswith(z_var)
        ]

    # Limit surface output to within 3 median absolute deviations of median value
    if clip_limits:
        z_values = table[[f"{z_var}_{cycle}" for cycle in cycles]]
        median: float = np.nanmedian(z_values)
        meddev: float = scipy.stats.median_abs_deviation(x=z_values,
                                                         axis=None,
                                                         nan_policy="omit")
        limits: list = [f"l{median - 3 * meddev}", f"u{median + 3 * meddev}"]
    else:
        limits = None

    # Create one grid surface for each time cycle
    _placename = f"_{placename}" if placename else ""
    for cycle in tqdm.tqdm(iterable=cycles):
        df_trimmed = pygmt.blockmedian(
            table=table[[x_var, y_var, f"{z_var}_{cycle}"]].dropna(),
            region=grid_region,
            spacing=f"{spacing}+e",
        )
        outfile = f"{z_var}{_placename}_cycle_{cycle}.nc"
        pygmt.surface(
            data=df_trimmed.values,
            region=grid_region,
            spacing=spacing,
            J=f'"{projection}"',  # projection
            L=limits,  # lower and upper limits
            M="3c",  # mask values 3 pixel cells outside/away from valid data
            T=0.35,  # tension factor
            V="e",  # error messages only
            outfile=outfile,
        )
        # print(pygmt.grdinfo(outfile))

    # Move files into new folder if requested
    paths: list = [f"{z_var}{_placename}_cycle_{cycle}.nc" for cycle in cycles]
    if folder:
        paths: list = [
            shutil.move(src=path, dst=os.path.join(folder, path))
            for path in paths
        ]

    # Stack several NetCDF grids into one NetCDF along the time cycle axis
    dataset: xr.Dataset = xr.open_mfdataset(
        paths=paths,
        combine="nested",
        concat_dim=[pd.Index(data=cycles, name="cycle_number")],
        attrs_file=paths[-1],
    )

    return dataset
Beispiel #11
0
def xyz_to_grid(
    xyz_data: pd.DataFrame,
    region: str,
    spacing: int = 250,
    tension: float = 0.35,
    outfile: str = None,
    mask_cell_radius: int = 3,
):
    """
    Performs interpolation of x, y, z point data to a raster grid.

    >>> xyz_data = pd.DataFrame(
    ...     600 * np.random.RandomState(seed=42).rand(60).reshape(20, 3),
    ...     columns=["x", "y", "z"],
    ... )
    >>> region = get_region(xyz_data=xyz_data)
    >>> grid = xyz_to_grid(xyz_data=xyz_data, region=region, spacing=250)
    >>> grid.to_array().shape
    (1, 3, 3)
    >>> grid.to_array().values
    array([[[208.90086, 324.8038 , 515.93726],
            [180.06642, 234.68915, 452.8586 ],
            [170.60728, 298.23764, 537.49774]]], dtype=float32)
    """
    ## Preprocessing with blockmedian
    df = gmt.blockmedian(table=xyz_data, region=region, spacing=f"{spacing}+e")

    ## XYZ point data to NetCDF grid via GMT surface
    grid = gmt.surface(
        x=df.x,
        y=df.y,
        z=df.z,
        region=region,
        spacing=f"{spacing}+e",
        T=tension,
        V="n",  # normal verbosity: produce only fatal error messages
        M=f"{mask_cell_radius}c",
    )

    ## Save grid to NetCDF with projection information
    if outfile is not None:
        # TODO add CRS!! See https://github.com/pydata/xarray/issues/2288
        grid.to_netcdf(path=outfile)

    ## Resample grid from gridline to pixel registration
    with gmt.helpers.GMTTempFile(suffix=".nc") as tmpfile:
        with gmt.clib.Session() as lib:
            if outfile is not None:  # kind == "file"
                file_context = gmt.helpers.dummy_context(outfile)
            else:  # kind == "grid"
                file_context = lib.virtualfile_from_grid(grid)
                outfile = tmpfile.name
            with file_context as infile:
                kwargs = {"T": "", "G": f"{outfile}"}
                arg_str = " ".join(
                    [infile, gmt.helpers.build_arg_string(kwargs)])
                lib.call_module(module="grdsample", args=arg_str)
            with xr.open_dataset(outfile) as dataset:
                grid = dataset.load()

    return grid