def test_blockmedian_without_outfile_setting(): """ Run blockmedian by not passing in outfile parameter setting. """ with pytest.raises(GMTInvalidInput): blockmedian(table="@tut_ship.xyz", spacing="5m", region=[245, 255, 20, 30])
def test_blockmedian_wrong_kind_of_input_table_grid(): """ Run blockmedian using table input that is not a pandas.DataFrame or file but a grid """ dataframe = load_sample_bathymetry() invalid_table = dataframe.bathymetry.to_xarray() assert data_kind(invalid_table) == "grid" with pytest.raises(GMTInvalidInput): blockmedian(table=invalid_table, spacing="5m", region=[245, 255, 20, 30])
def test_blockmedian_wrong_kind_of_input_table_matrix(): """ Run blockmedian using table input that is not a pandas.DataFrame but still a matrix """ dataframe = load_sample_bathymetry() invalid_table = dataframe.values assert data_kind(invalid_table) == "matrix" with pytest.raises(GMTInvalidInput): blockmedian(table=invalid_table, spacing="5m", region=[245, 255, 20, 30])
def test_blockmedian_without_outfile_setting(): """ Run blockmedian by not passing in outfile parameter setting. """ output = blockmedian(data="@tut_ship.xyz", spacing="5m", region=[245, 255, 20, 30]) assert isinstance(output, pd.DataFrame) assert output.shape == (5849, 3) npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])
def test_blockmedian_input_table_matrix(dataframe): """ Run blockmedian using table input that is not a pandas.DataFrame but still a matrix. """ table = dataframe.values output = blockmedian(data=table, spacing="5m", region=[245, 255, 20, 30]) assert isinstance(output, pd.DataFrame) assert output.shape == (5849, 3) npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])
def test_blockmedian_input_dataframe(dataframe): """ Run blockmedian by passing in a pandas.DataFrame as input. """ output = blockmedian(data=dataframe, spacing="5m", region=[245, 255, 20, 30]) assert isinstance(output, pd.DataFrame) assert all(dataframe.columns == output.columns) assert output.shape == (5849, 3) npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])
def test_blockmedian_input_dataframe(): """ Run blockmedian by passing in a pandas.DataFrame as input """ dataframe = load_sample_bathymetry() output = blockmedian(table=dataframe, spacing="5m", region=[245, 255, 20, 30]) assert isinstance(output, pd.DataFrame) assert all(dataframe.columns == output.columns) assert output.shape == (5849, 3) npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0]) return output
def test_blockmedian_input_xyz(dataframe): """ Run blockmedian by passing in x/y/z as input. """ output = blockmedian( x=dataframe.longitude, y=dataframe.latitude, z=dataframe.bathymetry, spacing="5m", region=[245, 255, 20, 30], ) assert isinstance(output, pd.DataFrame) assert output.shape == (5849, 3) npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])
def test_blockmedian_input_filename(): """ Run blockmedian by passing in an ASCII text file as input. """ with GMTTempFile() as tmpfile: output = blockmedian( data="@tut_ship.xyz", spacing="5m", region=[245, 255, 20, 30], outfile=tmpfile.name, ) assert output is None # check that output is None since outfile is set assert os.path.exists( path=tmpfile.name) # check that outfile exists at path output = pd.read_csv(tmpfile.name, sep="\t", header=None) assert output.shape == (5849, 3) npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])
def spatiotemporal_cube( table: pd.DataFrame, placename: str = "", x_var: str = "x", y_var: str = "y", z_var: str = "h_corr", spacing: int = 250, clip_limits: bool = True, cycles: list = None, projection: str = "+proj=stere +lat_0=-90 +lat_ts=-71 +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs", folder: str = "", ) -> xr.Dataset: """ Interpolates a time-series point cloud into an xarray.Dataset data cube. Uses `pygmt`'s blockmedian and surface algorithms to produce individual NetCDF grids, and `xarray` to stack each NetCDF grid into one dataset. Steps are as follows: 1. Create several xarray.DataArray grid surfaces from a table of points, one for each time cycle. 2. Stacked the grids along a time cycle axis into a xarray.Dataset which is a spatiotemporal data cube with 'x', 'y' and 'cycle_number' dimensions. _1__2__3_ * * / / / /| * * / / / / | * * * /__/__/__/ | y * * * --> | | | | | * * * | | | | / * * |__|__|__|/ x cycle Parameters ---------- table : pandas.DataFrame A table containing the ICESat-2 track data from multiple cycles. It should ideally have geographical columns called 'x', 'y', and attribute columns like 'h_corr_1', 'h_corr_2', etc for each cycle time. placename : str Optional. A descriptive placename for the data (e.g. some_ice_stream), to be used in the temporary NetCDF filename. x_var : str The x coordinate column name to use from the table data. Default is 'x'. y_var : str The y coordinate column name to use from the table data. Default is 'y'. z_var : str The z column name to use from the table data. This will be the attribute that the surface algorithm will run on. Default is 'h_corr'. spacing : float or str The spatial resolution of the resulting grid, provided as a number or as 'dx/dy' increments. This is passed on to `pygmt.blockmedian` and `pygmt.surface`. Default is 250 (metres). clip_limits : bool Whether or not to clip the output grid surface to ± 3 times the median absolute deviation of the data table's z-values. Useful for handling outlier values in the data table. Default is True (will clip). cycles : list The cycle numbers to run the gridding algorithm on, e.g. [3, 4] will use columns 'h_corr_3' and 'h_corr_4'. Default is None which will automatically determine the cycles for a given z_var. projection : str The proj4 string to store in the NetCDF output, will be passed directly to `pygmt.surface`'s J (projection) argument. Default is '+proj=stere +lat_0=-90 +lat_ts=-71 +lon_0=0 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs', i.e. Antarctic Polar Stereographic EPSG:3031. folder : str The folder to keep the intermediate NetCDF file in. Default is to place the files in the current working directory. Returns ------- cube : xarray.Dataset A 3-dimensional data cube made of digital surfaces stacked along a time cycle axis. """ import pygmt import tqdm # Determine grid's bounding box region (xmin, xmax, ymin, ymax) grid_region: np.ndarray = pygmt.info(table=table[[x_var, y_var]], spacing=f"s{spacing}") # Automatically determine list of cycles if None is given if cycles is None: cycles: list = [ int(col[len(z_var) + 1:]) for col in table.columns if col.startswith(z_var) ] # Limit surface output to within 3 median absolute deviations of median value if clip_limits: z_values = table[[f"{z_var}_{cycle}" for cycle in cycles]] median: float = np.nanmedian(z_values) meddev: float = scipy.stats.median_abs_deviation(x=z_values, axis=None, nan_policy="omit") limits: list = [f"l{median - 3 * meddev}", f"u{median + 3 * meddev}"] else: limits = None # Create one grid surface for each time cycle _placename = f"_{placename}" if placename else "" for cycle in tqdm.tqdm(iterable=cycles): df_trimmed = pygmt.blockmedian( table=table[[x_var, y_var, f"{z_var}_{cycle}"]].dropna(), region=grid_region, spacing=f"{spacing}+e", ) outfile = f"{z_var}{_placename}_cycle_{cycle}.nc" pygmt.surface( data=df_trimmed.values, region=grid_region, spacing=spacing, J=f'"{projection}"', # projection L=limits, # lower and upper limits M="3c", # mask values 3 pixel cells outside/away from valid data T=0.35, # tension factor V="e", # error messages only outfile=outfile, ) # print(pygmt.grdinfo(outfile)) # Move files into new folder if requested paths: list = [f"{z_var}{_placename}_cycle_{cycle}.nc" for cycle in cycles] if folder: paths: list = [ shutil.move(src=path, dst=os.path.join(folder, path)) for path in paths ] # Stack several NetCDF grids into one NetCDF along the time cycle axis dataset: xr.Dataset = xr.open_mfdataset( paths=paths, combine="nested", concat_dim=[pd.Index(data=cycles, name="cycle_number")], attrs_file=paths[-1], ) return dataset
def xyz_to_grid( xyz_data: pd.DataFrame, region: str, spacing: int = 250, tension: float = 0.35, outfile: str = None, mask_cell_radius: int = 3, ): """ Performs interpolation of x, y, z point data to a raster grid. >>> xyz_data = pd.DataFrame( ... 600 * np.random.RandomState(seed=42).rand(60).reshape(20, 3), ... columns=["x", "y", "z"], ... ) >>> region = get_region(xyz_data=xyz_data) >>> grid = xyz_to_grid(xyz_data=xyz_data, region=region, spacing=250) >>> grid.to_array().shape (1, 3, 3) >>> grid.to_array().values array([[[208.90086, 324.8038 , 515.93726], [180.06642, 234.68915, 452.8586 ], [170.60728, 298.23764, 537.49774]]], dtype=float32) """ ## Preprocessing with blockmedian df = gmt.blockmedian(table=xyz_data, region=region, spacing=f"{spacing}+e") ## XYZ point data to NetCDF grid via GMT surface grid = gmt.surface( x=df.x, y=df.y, z=df.z, region=region, spacing=f"{spacing}+e", T=tension, V="n", # normal verbosity: produce only fatal error messages M=f"{mask_cell_radius}c", ) ## Save grid to NetCDF with projection information if outfile is not None: # TODO add CRS!! See https://github.com/pydata/xarray/issues/2288 grid.to_netcdf(path=outfile) ## Resample grid from gridline to pixel registration with gmt.helpers.GMTTempFile(suffix=".nc") as tmpfile: with gmt.clib.Session() as lib: if outfile is not None: # kind == "file" file_context = gmt.helpers.dummy_context(outfile) else: # kind == "grid" file_context = lib.virtualfile_from_grid(grid) outfile = tmpfile.name with file_context as infile: kwargs = {"T": "", "G": f"{outfile}"} arg_str = " ".join( [infile, gmt.helpers.build_arg_string(kwargs)]) lib.call_module(module="grdsample", args=arg_str) with xr.open_dataset(outfile) as dataset: grid = dataset.load() return grid