def test_info_fails(): """ Make sure info raises an exception if not given either a file name, pandas DataFrame, or numpy ndarray. """ with pytest.raises(GMTInvalidInput): info(table=xr.DataArray(21))
def get_region(xyz_data: pd.DataFrame, round_increment: int = 250) -> str: """ Gets an extended bounding box region for points in an xyz pandas.DataFrame with columns x, y, and z. The coordinates will be rounded to values specified by the round_increment parameter. Implementation uses gmt.info with the -I (increment) setting, see also https://gmt.soest.hawaii.edu/doc/latest/gmtinfo.html#i The output region is returned in a string format 'xmin/xmax/ymin/ymax' directly usable as the -R 'region of interest' parameter in GMT. Indeed, the rounding is specifically optimized to give grid dimensions for fastest results in programs like GMT surface. >>> xyz_data = pd.DataFrame( ... 10000 * np.random.RandomState(seed=42).rand(30).reshape(10, 3), ... columns=["x", "y", "z"], ... ) >>> get_region(xyz_data=xyz_data) '-250/9500/0/9750' """ assert (xyz_data.columns == pd.Index(data=["x", "y", "z"], dtype="object")).all() with tempfile.NamedTemporaryFile(suffix=".csv") as tmpfile: xyz_data.to_csv(tmpfile.name, header=False, index=False) region = gmt.info(fname=tmpfile.name, I=f"s{round_increment}").strip()[2:] return region
def test_geopandas_info_geodataframe(gdf): """ Check that info can return the bounding box region from a geopandas.GeoDataFrame. """ output = info(table=gdf, per_column=True) npt.assert_allclose(actual=output, desired=[0.0, 35.0, 0.0, 20.0])
def test_info_1d_array(): """ Make sure info works on 1D numpy.ndarray inputs. """ output = info(table=np.arange(20)) expected_output = "<vector memory>: N = 20 <0/19>\n" assert output == expected_output
def test_info_per_column_spacing(): """ Make sure the per_column and spacing options work together. """ output = info(table=POINTS_DATA, per_column=True, spacing=0.1) npt.assert_allclose(actual=output, desired=[11.5, 61.8, -3, 7.9, 0.1412, 0.9338])
def test_info_2d_list(): """ Make sure info works on a 2d list. """ output = info(table=[[0, 8], [3, 5], [6, 2]]) expected_output = "<vector memory>: N = 3 <0/6> <2/8>\n" assert output == expected_output
def test_info_series(): """ Make sure info works on a pandas.Series input. """ output = info(pd.Series(data=[0, 4, 2, 8, 6])) expected_output = "<vector memory>: N = 5 <0/8>\n" assert output == expected_output
def test_geopandas_info_shapely(gdf, geomtype, desired): """ Check that info can return the bounding box region from a shapely.geometry object that has a __geo_interface__ property. """ geom = gdf.loc[geomtype].geometry output = info(table=geom, per_column=True) npt.assert_allclose(actual=output, desired=desired)
def test_info_dataframe(): "Make sure info works on pandas.DataFrame inputs" table = pd.read_csv(POINTS_DATA, sep=" ", header=None) output = info(table=table) expected_output = ( "<vector memory>: N = 20 <11.5309/61.7074> <-2.9289/7.8648> <0.1412/0.9338>\n" ) assert output == expected_output
def test_info(): "Make sure info works on file name inputs" output = info(table=POINTS_DATA) expected_output = (f"{POINTS_DATA}: N = 20 " "<11.5309/61.7074> " "<-2.9289/7.8648> " "<0.1412/0.9338>\n") assert output == expected_output
def test_info_per_column(): """ Make sure the per_column option works. """ output = info(table=POINTS_DATA, per_column=True) npt.assert_allclose( actual=output, desired=[11.5309, 61.7074, -2.9289, 7.8648, 0.1412, 0.9338])
def test_info_per_column_with_time_inputs(): """ Make sure the per_column option works with time inputs. """ table = pd.date_range(start="2020-01-01", periods=5).to_numpy() output = info(table=table, per_column=True) npt.assert_equal(actual=output, desired=["2020-01-01T00:00:00", "2020-01-05T00:00:00"])
def test_info_2d_array(): "Make sure info works on 2D numpy.ndarray inputs" table = np.loadtxt(POINTS_DATA) output = info(table=table) expected_output = ( "<vector memory>: N = 20 <11.5309/61.7074> <-2.9289/7.8648> <0.1412/0.9338>\n" ) assert output == expected_output
def test_info_numpy_array_time_column(): """ Make sure info works on a numpy.ndarray input with a datetime type. """ table = pd.date_range(start="2020-01-01", periods=5).to_numpy() output = info(table=table) expected_output = ( "<vector memory>: N = 5 <2020-01-01T00:00:00/2020-01-05T00:00:00>\n") assert output == expected_output
def test_info_per_column_with_time_inputs(): """ Make sure the per_column option works with time inputs. """ table = pd.date_range(start="2020-01-01", periods=5).to_numpy() # Please remove coltypes="0T" workaround after # https://github.com/GenericMappingTools/gmt/issues/4241 is resolved output = info(table=table, per_column=True, coltypes="0T") npt.assert_equal(actual=output, desired=["2020-01-01T00:00:00", "2020-01-05T00:00:00"])
def test_info_path(table): """ Make sure info works on a pathlib.Path input. """ output = info(data=table) expected_output = (f"{POINTS_DATA}: N = 20 " "<11.5309/61.7074> " "<-2.9289/7.8648> " "<0.1412/0.9338>\n") assert output == expected_output
def test_info_numpy_array_time_column(): """ Make sure info works on a numpy.ndarray input with a datetime type. """ table = pd.date_range(start="2020-01-01", periods=5).to_numpy() # Please remove coltypes="0T" workaround after # https://github.com/GenericMappingTools/gmt/issues/4241 is resolved output = info(table=table, coltypes="0T") expected_output = ( "<vector memory>: N = 5 <2020-01-01T00:00:00/2020-01-05T00:00:00>\n") assert output == expected_output
def test_info_pandas_dataframe_time_column(): "Make sure info works on pandas.DataFrame inputs with a time column" table = pd.DataFrame( data={ "z": [10, 13, 12, 15, 14], "time": pd.date_range(start="2020-01-01", periods=5), }) output = info(table=table) expected_output = ( "<vector memory>: N = 5 <10/15> <2020-01-01T00:00:00/2020-01-05T00:00:00>\n" ) assert output == expected_output
def from_gdf( cls, gdf: gpd.GeoDataFrame, name_col: str = None, spacing: float = 1000.0, **kwargs, ): """ Create a deepicedrain.Region instance from a geopandas GeoDataFrame (single row only). The bounding box will be automatically calculated from the geometry, rounded up and down as necessary if `spacing` is set. Parameters ---------- gdf : geopandas.GeoDataFrame A single row geodataframe with a Polygon or Polyline type geometry. name_col : str Name of the column in the geodataframe to use for setting the name of the Region. If unset, the name of the region will be automatically based on the first column of the geodataframe. Alternatively, pass in `name="Some Name"` to directly set the name. spacing : float Number to round coordinates up and down such that the bounding box are in nice intervals (requires PyGMT). Set to None to use exact bounds of input shape instead (uses Shapely only). Default is 1000m for rounding bounding box coordinates to nearest kilometre. Returns ------- region : deepicedrain.Region """ if "name" not in kwargs: try: kwargs["name"] = gdf[name_col] except KeyError: kwargs["name"] = gdf.iloc[0] try: import pygmt xmin, xmax, ymin, ymax = pygmt.info( table=np.vstack(gdf.geometry.exterior.coords.xy).T, spacing=float(spacing), ) except (ImportError, TypeError): xmin, ymin, xmax, ymax = gdf.geometry.bounds kwargs.update({"xmin": xmin, "xmax": xmax, "ymin": ymin, "ymax": ymax}) return cls(**kwargs)
def test_info_deprecate_table_to_data(): """ Make sure that the old parameter "table" is supported and it reports a warning. """ with pytest.warns(expected_warning=FutureWarning) as record: output = info(table=POINTS_DATA) # pylint: disable=no-value-for-parameter expected_output = (f"{POINTS_DATA}: N = 20 " "<11.5309/61.7074> " "<-2.9289/7.8648> " "<0.1412/0.9338>\n") assert output == expected_output assert len(record) == 1 # check that only one warning was raised
def test_info_spacing_bounding_box(): "Make sure the spacing option for writing a bounding box works" output = info(table=POINTS_DATA, spacing="b") npt.assert_allclose( actual=output, desired=[ [11.5309, -2.9289], [61.7074, -2.9289], [61.7074, 7.8648], [11.5309, 7.8648], [11.5309, -2.9289], ], )
def test_info_xarray_dataset_time_column(): "Make sure info works on xarray.Dataset 1D inputs with a time column" table = xr.Dataset( coords={"index": [0, 1, 2, 3, 4]}, data_vars={ "z": ("index", [10, 13, 12, 15, 14]), "time": ("index", pd.date_range(start="2020-01-01", periods=5)), }, ) output = info(table=table) expected_output = ( "<vector memory>: N = 5 <10/15> <2020-01-01T00:00:00/2020-01-05T00:00:00>\n" ) assert output == expected_output
dfFS = pd.read_csv(f"Models/mdFS_{bedname.lower()}_xyz_rheology.csv", sep=" ") else: dfFS = pd.read_csv( f"Models/mdFS_{bedname.lower()}_xyz_pressure_vel_friction.csv", sep=" ") dfFS = dfFS.rename(columns=dict(friction="slipperiness")) expr: str = "isbasal == True" if z_attr.isbasal else "issurface == True" df: pd.DataFrame = dfFS.query(expr=expr)[["x", "y", z_attr.varname]] # df.plot(x="slipperiness", y="velocity", kind="scatter", loglog=False) # df.plot(x="pressure", y="velocity", kind="scatter", loglog=False) # %% # Contour plots of velocity/slipperiness/rheology xmin, xmax, ymin, ymax, zmin, zmax = pygmt.info(table=df, per_column=True) region = "/".join(str(i) for i in [xmin, xmax, ymin, ymax]) fig = pygmt.Figure() pygmt.makecpt(cmap="hawaii", series=[zmin, zmax, (zmax - zmin) / 10], reverse=True) fig.basemap( region=region, projection="x1:1000000", frame=["af", f'WSne+t"{bedname} {z_attr.varname} {z_attr.symbol}"'], ) fig.contour( data=df.to_numpy(), I=True, levels=True,
def spatiotemporal_cube( table: pd.DataFrame, placename: str = "", x_var: str = "x", y_var: str = "y", z_var: str = "h_corr", spacing: int = 250, clip_limits: bool = True, cycles: list = None, projection: str = "+proj=stere +lat_0=-90 +lat_ts=-71 +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs", folder: str = "", ) -> xr.Dataset: """ Interpolates a time-series point cloud into an xarray.Dataset data cube. Uses `pygmt`'s blockmedian and surface algorithms to produce individual NetCDF grids, and `xarray` to stack each NetCDF grid into one dataset. Steps are as follows: 1. Create several xarray.DataArray grid surfaces from a table of points, one for each time cycle. 2. Stacked the grids along a time cycle axis into a xarray.Dataset which is a spatiotemporal data cube with 'x', 'y' and 'cycle_number' dimensions. _1__2__3_ * * / / / /| * * / / / / | * * * /__/__/__/ | y * * * --> | | | | | * * * | | | | / * * |__|__|__|/ x cycle Parameters ---------- table : pandas.DataFrame A table containing the ICESat-2 track data from multiple cycles. It should ideally have geographical columns called 'x', 'y', and attribute columns like 'h_corr_1', 'h_corr_2', etc for each cycle time. placename : str Optional. A descriptive placename for the data (e.g. some_ice_stream), to be used in the temporary NetCDF filename. x_var : str The x coordinate column name to use from the table data. Default is 'x'. y_var : str The y coordinate column name to use from the table data. Default is 'y'. z_var : str The z column name to use from the table data. This will be the attribute that the surface algorithm will run on. Default is 'h_corr'. spacing : float or str The spatial resolution of the resulting grid, provided as a number or as 'dx/dy' increments. This is passed on to `pygmt.blockmedian` and `pygmt.surface`. Default is 250 (metres). clip_limits : bool Whether or not to clip the output grid surface to ± 3 times the median absolute deviation of the data table's z-values. Useful for handling outlier values in the data table. Default is True (will clip). cycles : list The cycle numbers to run the gridding algorithm on, e.g. [3, 4] will use columns 'h_corr_3' and 'h_corr_4'. Default is None which will automatically determine the cycles for a given z_var. projection : str The proj4 string to store in the NetCDF output, will be passed directly to `pygmt.surface`'s J (projection) argument. Default is '+proj=stere +lat_0=-90 +lat_ts=-71 +lon_0=0 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs', i.e. Antarctic Polar Stereographic EPSG:3031. folder : str The folder to keep the intermediate NetCDF file in. Default is to place the files in the current working directory. Returns ------- cube : xarray.Dataset A 3-dimensional data cube made of digital surfaces stacked along a time cycle axis. """ import pygmt import tqdm # Determine grid's bounding box region (xmin, xmax, ymin, ymax) grid_region: np.ndarray = pygmt.info(table=table[[x_var, y_var]], spacing=f"s{spacing}") # Automatically determine list of cycles if None is given if cycles is None: cycles: list = [ int(col[len(z_var) + 1:]) for col in table.columns if col.startswith(z_var) ] # Limit surface output to within 3 median absolute deviations of median value if clip_limits: z_values = table[[f"{z_var}_{cycle}" for cycle in cycles]] median: float = np.nanmedian(z_values) meddev: float = scipy.stats.median_abs_deviation(x=z_values, axis=None, nan_policy="omit") limits: list = [f"l{median - 3 * meddev}", f"u{median + 3 * meddev}"] else: limits = None # Create one grid surface for each time cycle _placename = f"_{placename}" if placename else "" for cycle in tqdm.tqdm(iterable=cycles): df_trimmed = pygmt.blockmedian( table=table[[x_var, y_var, f"{z_var}_{cycle}"]].dropna(), region=grid_region, spacing=f"{spacing}+e", ) outfile = f"{z_var}{_placename}_cycle_{cycle}.nc" pygmt.surface( data=df_trimmed.values, region=grid_region, spacing=spacing, J=f'"{projection}"', # projection L=limits, # lower and upper limits M="3c", # mask values 3 pixel cells outside/away from valid data T=0.35, # tension factor V="e", # error messages only outfile=outfile, ) # print(pygmt.grdinfo(outfile)) # Move files into new folder if requested paths: list = [f"{z_var}{_placename}_cycle_{cycle}.nc" for cycle in cycles] if folder: paths: list = [ shutil.move(src=path, dst=os.path.join(folder, path)) for path in paths ] # Stack several NetCDF grids into one NetCDF along the time cycle axis dataset: xr.Dataset = xr.open_mfdataset( paths=paths, combine="nested", concat_dim=[pd.Index(data=cycles, name="cycle_number")], attrs_file=paths[-1], ) return dataset
def test_info_nearest_multiple(): """ Make sure the nearest_multiple option works. """ output = info(table=POINTS_DATA, nearest_multiple=0.1) npt.assert_allclose(actual=output, desired=[11.5, 61.8, 0.1])
the vertical exaggeration factor. """ import pandas as pd import pygmt # Load sample iris data and convert 'species' column to categorical dtype df = pd.read_csv("https://github.com/mwaskom/seaborn-data/raw/master/iris.csv") df.species = df.species.astype(dtype="category") # Use pygmt.info to get region bounds (xmin, xmax, ymin, ymax, zmin, zmax) # The below example will return a numpy array [0.0, 3.0, 4.0, 8.0, 1.0, 7.0] region = pygmt.info( data=df[["petal_width", "sepal_length", "petal_length"]], # x, y, z columns per_column=True, # report the min/max values per column as a numpy array # round the min/max values of the first three columns to the nearest # multiple of 1, 2 and 0.5, respectively spacing=(1, 2, 0.5), ) # Make a 3D scatter plot, coloring each of the 3 species differently fig = pygmt.Figure() # Define a colormap to be used for three categories, define the range of the # new discrete CPT using series=(lowest_value, highest_value, interval), use # color_model="+cSetosa,Versicolor,Virginica" to write the discrete color # palette "cubhelix" in categorical format and add the species names as # annotations for the colorbar pygmt.makecpt( cmap="cubhelix", color_model="+cSetosa,Versicolor,Virginica", series=(0, 2, 1) )
def test_info_spacing(): """ Make sure the spacing option works. """ output = info(table=POINTS_DATA, spacing=0.1) npt.assert_allclose(actual=output, desired=[11.5, 61.8, -3, 7.9])
data = [ ["20200712", 1000], ["20200714", 1235], ["20200716", 1336], ["20200719", 1176], ["20200721", 1573], ["20200724", 1893], ["20200729", 1634], ] df = pd.DataFrame(data, columns=["Date", "Score"]) df.Date = pd.to_datetime(df["Date"], format="%Y%m%d") fig = pygmt.Figure() region = pygmt.info( table=df[["Date", "Score"]], per_column=True, spacing=(700, 700), coltypes="T" ) fig.plot( region=region, projection="X15c/10c", frame=["WSen", "afg"], x=df.Date, y=df.Score, style="c0.4c", pen="1p", color="green3", ) fig.show()
method. """ import pandas as pd import pygmt # Load sample penguins data and convert 'species' column to categorical dtype df = pd.read_csv("https://github.com/mwaskom/seaborn-data/raw/master/penguins.csv") df.species = df.species.astype(dtype="category") # Use pygmt.info to get region bounds (xmin, xmax, ymin, ymax) # The below example will return a numpy array like [30.0, 60.0, 12.0, 22.0] region = pygmt.info( table=df[["bill_length_mm", "bill_depth_mm"]], # x and y columns per_column=True, # report the min/max values per column as a numpy array # round the min/max values of the first two columns to the nearest multiple # of 3 and 2, respectively spacing=(3, 2), ) # Make a 2D categorical scatter plot, coloring each of the 3 species differently fig = pygmt.Figure() # Generate a basemap of 10 cm x 10 cm size fig.basemap( region=region, projection="X10c/10c", frame=[ 'xafg+l"Bill length (mm)"', 'yafg+l"Bill depth (mm)"', 'WSen+t"Penguin size at Palmer Station"',
the vertical exaggeration factor. """ import pandas as pd import pygmt # Load sample iris data, and convert 'species' column to categorical dtype df = pd.read_csv("https://github.com/mwaskom/seaborn-data/raw/master/iris.csv") df["species"] = df.species.astype(dtype="category") # Use pygmt.info to get region bounds (xmin, xmax, ymin, ymax, zmin, zmax) # The below example will return a numpy array like [0., 3., 4., 8., 1., 7.] region = pygmt.info( table=df[["petal_width", "sepal_length", "petal_length"]], # x, y, z columns per_column=True, # report output as a numpy array spacing= "1/2/0.5", # rounds x, y and z intervals by 1, 2 and 0.5 respectively ) # Make our 3D scatter plot, coloring each of the 3 species differently fig = pygmt.Figure() pygmt.makecpt(cmap="cubhelix", color_model="+c", series=(0, 3, 1)) fig.plot3d( x=df.petal_width, y=df.sepal_length, z=df.petal_length, sizes=0.1 * df.sepal_width, # Vary each symbol size according to a data column color=df.species.cat.codes.astype( int), # Points colored by categorical number code