コード例 #1
0
def test_info_fails():
    """
    Make sure info raises an exception if not given either a file name, pandas
    DataFrame, or numpy ndarray.
    """
    with pytest.raises(GMTInvalidInput):
        info(table=xr.DataArray(21))
コード例 #2
0
ファイル: data_prep.py プロジェクト: jtaquia/deepbedmap
def get_region(xyz_data: pd.DataFrame, round_increment: int = 250) -> str:
    """
    Gets an extended bounding box region for points in an xyz pandas.DataFrame with
    columns x, y, and z. The coordinates will be rounded to values specified by the
    round_increment parameter. Implementation uses gmt.info with the -I (increment)
    setting, see also https://gmt.soest.hawaii.edu/doc/latest/gmtinfo.html#i

    The output region is returned in a string format 'xmin/xmax/ymin/ymax' directly
    usable as the -R 'region of interest' parameter in GMT. Indeed, the rounding is
    specifically optimized to give grid dimensions for fastest results in programs like
    GMT surface.

    >>> xyz_data = pd.DataFrame(
    ...     10000 * np.random.RandomState(seed=42).rand(30).reshape(10, 3),
    ...     columns=["x", "y", "z"],
    ... )
    >>> get_region(xyz_data=xyz_data)
    '-250/9500/0/9750'
    """
    assert (xyz_data.columns == pd.Index(data=["x", "y", "z"],
                                         dtype="object")).all()

    with tempfile.NamedTemporaryFile(suffix=".csv") as tmpfile:
        xyz_data.to_csv(tmpfile.name, header=False, index=False)
        region = gmt.info(fname=tmpfile.name,
                          I=f"s{round_increment}").strip()[2:]

    return region
コード例 #3
0
def test_geopandas_info_geodataframe(gdf):
    """
    Check that info can return the bounding box region from a
    geopandas.GeoDataFrame.
    """
    output = info(table=gdf, per_column=True)
    npt.assert_allclose(actual=output, desired=[0.0, 35.0, 0.0, 20.0])
コード例 #4
0
def test_info_1d_array():
    """
    Make sure info works on 1D numpy.ndarray inputs.
    """
    output = info(table=np.arange(20))
    expected_output = "<vector memory>: N = 20 <0/19>\n"
    assert output == expected_output
コード例 #5
0
def test_info_per_column_spacing():
    """
    Make sure the per_column and spacing options work together.
    """
    output = info(table=POINTS_DATA, per_column=True, spacing=0.1)
    npt.assert_allclose(actual=output,
                        desired=[11.5, 61.8, -3, 7.9, 0.1412, 0.9338])
コード例 #6
0
def test_info_2d_list():
    """
    Make sure info works on a 2d list.
    """
    output = info(table=[[0, 8], [3, 5], [6, 2]])
    expected_output = "<vector memory>: N = 3 <0/6> <2/8>\n"
    assert output == expected_output
コード例 #7
0
ファイル: test_info.py プロジェクト: meghanrjones/pygmt
def test_info_series():
    """
    Make sure info works on a pandas.Series input.
    """
    output = info(pd.Series(data=[0, 4, 2, 8, 6]))
    expected_output = "<vector memory>: N = 5 <0/8>\n"
    assert output == expected_output
コード例 #8
0
def test_geopandas_info_shapely(gdf, geomtype, desired):
    """
    Check that info can return the bounding box region from a shapely.geometry
    object that has a __geo_interface__ property.
    """
    geom = gdf.loc[geomtype].geometry
    output = info(table=geom, per_column=True)
    npt.assert_allclose(actual=output, desired=desired)
コード例 #9
0
ファイル: test_info.py プロジェクト: afantunes74/pygmt
def test_info_dataframe():
    "Make sure info works on pandas.DataFrame inputs"
    table = pd.read_csv(POINTS_DATA, sep=" ", header=None)
    output = info(table=table)
    expected_output = (
        "<vector memory>: N = 20 <11.5309/61.7074> <-2.9289/7.8648> <0.1412/0.9338>\n"
    )
    assert output == expected_output
コード例 #10
0
ファイル: test_info.py プロジェクト: afantunes74/pygmt
def test_info():
    "Make sure info works on file name inputs"
    output = info(table=POINTS_DATA)
    expected_output = (f"{POINTS_DATA}: N = 20 "
                       "<11.5309/61.7074> "
                       "<-2.9289/7.8648> "
                       "<0.1412/0.9338>\n")
    assert output == expected_output
コード例 #11
0
def test_info_per_column():
    """
    Make sure the per_column option works.
    """
    output = info(table=POINTS_DATA, per_column=True)
    npt.assert_allclose(
        actual=output,
        desired=[11.5309, 61.7074, -2.9289, 7.8648, 0.1412, 0.9338])
コード例 #12
0
def test_info_per_column_with_time_inputs():
    """
    Make sure the per_column option works with time inputs.
    """
    table = pd.date_range(start="2020-01-01", periods=5).to_numpy()
    output = info(table=table, per_column=True)
    npt.assert_equal(actual=output,
                     desired=["2020-01-01T00:00:00", "2020-01-05T00:00:00"])
コード例 #13
0
ファイル: test_info.py プロジェクト: afantunes74/pygmt
def test_info_2d_array():
    "Make sure info works on 2D numpy.ndarray inputs"
    table = np.loadtxt(POINTS_DATA)
    output = info(table=table)
    expected_output = (
        "<vector memory>: N = 20 <11.5309/61.7074> <-2.9289/7.8648> <0.1412/0.9338>\n"
    )
    assert output == expected_output
コード例 #14
0
def test_info_numpy_array_time_column():
    """
    Make sure info works on a numpy.ndarray input with a datetime type.
    """
    table = pd.date_range(start="2020-01-01", periods=5).to_numpy()
    output = info(table=table)
    expected_output = (
        "<vector memory>: N = 5 <2020-01-01T00:00:00/2020-01-05T00:00:00>\n")
    assert output == expected_output
コード例 #15
0
ファイル: test_info.py プロジェクト: xdshivani/pygmt
def test_info_per_column_with_time_inputs():
    """
    Make sure the per_column option works with time inputs.
    """
    table = pd.date_range(start="2020-01-01", periods=5).to_numpy()
    # Please remove coltypes="0T" workaround after
    # https://github.com/GenericMappingTools/gmt/issues/4241 is resolved
    output = info(table=table, per_column=True, coltypes="0T")
    npt.assert_equal(actual=output,
                     desired=["2020-01-01T00:00:00", "2020-01-05T00:00:00"])
コード例 #16
0
ファイル: test_info.py プロジェクト: meghanrjones/pygmt
def test_info_path(table):
    """
    Make sure info works on a pathlib.Path input.
    """
    output = info(data=table)
    expected_output = (f"{POINTS_DATA}: N = 20 "
                       "<11.5309/61.7074> "
                       "<-2.9289/7.8648> "
                       "<0.1412/0.9338>\n")
    assert output == expected_output
コード例 #17
0
ファイル: test_info.py プロジェクト: xdshivani/pygmt
def test_info_numpy_array_time_column():
    """
    Make sure info works on a numpy.ndarray input with a datetime type.
    """
    table = pd.date_range(start="2020-01-01", periods=5).to_numpy()
    # Please remove coltypes="0T" workaround after
    # https://github.com/GenericMappingTools/gmt/issues/4241 is resolved
    output = info(table=table, coltypes="0T")
    expected_output = (
        "<vector memory>: N = 5 <2020-01-01T00:00:00/2020-01-05T00:00:00>\n")
    assert output == expected_output
コード例 #18
0
ファイル: test_info.py プロジェクト: afantunes74/pygmt
def test_info_pandas_dataframe_time_column():
    "Make sure info works on pandas.DataFrame inputs with a time column"
    table = pd.DataFrame(
        data={
            "z": [10, 13, 12, 15, 14],
            "time": pd.date_range(start="2020-01-01", periods=5),
        })
    output = info(table=table)
    expected_output = (
        "<vector memory>: N = 5 <10/15> <2020-01-01T00:00:00/2020-01-05T00:00:00>\n"
    )
    assert output == expected_output
コード例 #19
0
    def from_gdf(
        cls,
        gdf: gpd.GeoDataFrame,
        name_col: str = None,
        spacing: float = 1000.0,
        **kwargs,
    ):
        """
        Create a deepicedrain.Region instance from a geopandas GeoDataFrame
        (single row only). The bounding box will be automatically calculated
        from the geometry, rounded up and down as necessary if `spacing` is set.

        Parameters
        ----------
        gdf : geopandas.GeoDataFrame
            A single row geodataframe with a Polygon or Polyline type geometry.

        name_col : str
            Name of the column in the geodataframe to use for setting the name
            of the Region. If  unset, the name of the region will be
            automatically based on the first column of the geodataframe.
            Alternatively, pass in `name="Some Name"` to directly set the name.

        spacing : float
            Number to round coordinates up and down such that the bounding box
            are in nice intervals (requires PyGMT). Set to None to use exact
            bounds of input shape instead (uses Shapely only). Default is 1000m
            for rounding bounding box coordinates to nearest kilometre.

        Returns
        -------
        region : deepicedrain.Region

        """
        if "name" not in kwargs:
            try:
                kwargs["name"] = gdf[name_col]
            except KeyError:
                kwargs["name"] = gdf.iloc[0]

        try:
            import pygmt

            xmin, xmax, ymin, ymax = pygmt.info(
                table=np.vstack(gdf.geometry.exterior.coords.xy).T,
                spacing=float(spacing),
            )
        except (ImportError, TypeError):
            xmin, ymin, xmax, ymax = gdf.geometry.bounds
        kwargs.update({"xmin": xmin, "xmax": xmax, "ymin": ymin, "ymax": ymax})

        return cls(**kwargs)
コード例 #20
0
ファイル: test_info.py プロジェクト: meghanrjones/pygmt
def test_info_deprecate_table_to_data():
    """
    Make sure that the old parameter "table" is supported and it reports a
    warning.
    """
    with pytest.warns(expected_warning=FutureWarning) as record:
        output = info(table=POINTS_DATA)  # pylint: disable=no-value-for-parameter
        expected_output = (f"{POINTS_DATA}: N = 20 "
                           "<11.5309/61.7074> "
                           "<-2.9289/7.8648> "
                           "<0.1412/0.9338>\n")
        assert output == expected_output
        assert len(record) == 1  # check that only one warning was raised
コード例 #21
0
ファイル: test_info.py プロジェクト: afantunes74/pygmt
def test_info_spacing_bounding_box():
    "Make sure the spacing option for writing a bounding box works"
    output = info(table=POINTS_DATA, spacing="b")
    npt.assert_allclose(
        actual=output,
        desired=[
            [11.5309, -2.9289],
            [61.7074, -2.9289],
            [61.7074, 7.8648],
            [11.5309, 7.8648],
            [11.5309, -2.9289],
        ],
    )
コード例 #22
0
ファイル: test_info.py プロジェクト: afantunes74/pygmt
def test_info_xarray_dataset_time_column():
    "Make sure info works on xarray.Dataset 1D inputs with a time column"
    table = xr.Dataset(
        coords={"index": [0, 1, 2, 3, 4]},
        data_vars={
            "z": ("index", [10, 13, 12, 15, 14]),
            "time": ("index", pd.date_range(start="2020-01-01", periods=5)),
        },
    )
    output = info(table=table)
    expected_output = (
        "<vector memory>: N = 5 <10/15> <2020-01-01T00:00:00/2020-01-05T00:00:00>\n"
    )
    assert output == expected_output
コード例 #23
0
    dfFS = pd.read_csv(f"Models/mdFS_{bedname.lower()}_xyz_rheology.csv",
                       sep=" ")
else:
    dfFS = pd.read_csv(
        f"Models/mdFS_{bedname.lower()}_xyz_pressure_vel_friction.csv",
        sep=" ")
    dfFS = dfFS.rename(columns=dict(friction="slipperiness"))
expr: str = "isbasal == True" if z_attr.isbasal else "issurface == True"
df: pd.DataFrame = dfFS.query(expr=expr)[["x", "y", z_attr.varname]]

# df.plot(x="slipperiness", y="velocity", kind="scatter", loglog=False)
# df.plot(x="pressure", y="velocity", kind="scatter", loglog=False)

# %%
# Contour plots of velocity/slipperiness/rheology
xmin, xmax, ymin, ymax, zmin, zmax = pygmt.info(table=df, per_column=True)
region = "/".join(str(i) for i in [xmin, xmax, ymin, ymax])

fig = pygmt.Figure()
pygmt.makecpt(cmap="hawaii",
              series=[zmin, zmax, (zmax - zmin) / 10],
              reverse=True)
fig.basemap(
    region=region,
    projection="x1:1000000",
    frame=["af", f'WSne+t"{bedname} {z_attr.varname} {z_attr.symbol}"'],
)
fig.contour(
    data=df.to_numpy(),
    I=True,
    levels=True,
コード例 #24
0
def spatiotemporal_cube(
    table: pd.DataFrame,
    placename: str = "",
    x_var: str = "x",
    y_var: str = "y",
    z_var: str = "h_corr",
    spacing: int = 250,
    clip_limits: bool = True,
    cycles: list = None,
    projection:
    str = "+proj=stere +lat_0=-90 +lat_ts=-71 +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs",
    folder: str = "",
) -> xr.Dataset:
    """
    Interpolates a time-series point cloud into an xarray.Dataset data cube.
    Uses `pygmt`'s blockmedian and surface algorithms to produce individual
    NetCDF grids, and `xarray` to stack each NetCDF grid into one dataset.

    Steps are as follows:

    1. Create several xarray.DataArray grid surfaces from a table of points,
       one for each time cycle.
    2. Stacked the grids along a time cycle axis into a xarray.Dataset which is
       a spatiotemporal data cube with 'x', 'y' and 'cycle_number' dimensions.

                             _1__2__3_
            *   *           /  /  /  /|
         *   *             /  /  /  / |
       *   *    *         /__/__/__/  |  y
    *    *   *      -->   |  |  |  |  |
      *    *   *          |  |  |  | /
        *    *            |__|__|__|/  x
                             cycle

    Parameters
    ----------
    table : pandas.DataFrame
        A table containing the ICESat-2 track data from multiple cycles. It
        should ideally have geographical columns called 'x', 'y', and attribute
        columns like 'h_corr_1', 'h_corr_2', etc for each cycle time.
    placename : str
        Optional. A descriptive placename for the data (e.g. some_ice_stream),
        to be used in the temporary NetCDF filename.
    x_var : str
        The x coordinate column name to use from the table data. Default is
        'x'.
    y_var : str
        The y coordinate column name to use from the table data. Default is
        'y'.
    z_var : str
        The z column name to use from the table data. This will be the
        attribute that the surface algorithm will run on. Default is 'h_corr'.
    spacing : float or str
        The spatial resolution of the resulting grid, provided as a number or
        as 'dx/dy' increments. This is passed on to `pygmt.blockmedian` and
        `pygmt.surface`. Default is 250 (metres).
    clip_limits : bool
        Whether or not to clip the output grid surface to ± 3 times the median
        absolute deviation of the data table's z-values. Useful for handling
        outlier values in the data table. Default is True (will clip).
    cycles : list
        The cycle numbers to run the gridding algorithm on, e.g. [3, 4] will
        use columns 'h_corr_3' and 'h_corr_4'. Default is None which will
        automatically determine the cycles for a given z_var.
    projection : str
        The proj4 string to store in the NetCDF output, will be passed directly
        to `pygmt.surface`'s J (projection) argument. Default is '+proj=stere
        +lat_0=-90 +lat_ts=-71 +lon_0=0 +k=1 +x_0=0 +y_0=0 +datum=WGS84
        +units=m +no_defs', i.e. Antarctic Polar Stereographic EPSG:3031.
    folder : str
        The folder to keep the intermediate NetCDF file in. Default is to place
        the files in the current working directory.

    Returns
    -------
    cube : xarray.Dataset
        A 3-dimensional data cube made of digital surfaces stacked along a time
        cycle axis.

    """
    import pygmt
    import tqdm

    # Determine grid's bounding box region (xmin, xmax, ymin, ymax)
    grid_region: np.ndarray = pygmt.info(table=table[[x_var, y_var]],
                                         spacing=f"s{spacing}")

    # Automatically determine list of cycles if None is given
    if cycles is None:
        cycles: list = [
            int(col[len(z_var) + 1:]) for col in table.columns
            if col.startswith(z_var)
        ]

    # Limit surface output to within 3 median absolute deviations of median value
    if clip_limits:
        z_values = table[[f"{z_var}_{cycle}" for cycle in cycles]]
        median: float = np.nanmedian(z_values)
        meddev: float = scipy.stats.median_abs_deviation(x=z_values,
                                                         axis=None,
                                                         nan_policy="omit")
        limits: list = [f"l{median - 3 * meddev}", f"u{median + 3 * meddev}"]
    else:
        limits = None

    # Create one grid surface for each time cycle
    _placename = f"_{placename}" if placename else ""
    for cycle in tqdm.tqdm(iterable=cycles):
        df_trimmed = pygmt.blockmedian(
            table=table[[x_var, y_var, f"{z_var}_{cycle}"]].dropna(),
            region=grid_region,
            spacing=f"{spacing}+e",
        )
        outfile = f"{z_var}{_placename}_cycle_{cycle}.nc"
        pygmt.surface(
            data=df_trimmed.values,
            region=grid_region,
            spacing=spacing,
            J=f'"{projection}"',  # projection
            L=limits,  # lower and upper limits
            M="3c",  # mask values 3 pixel cells outside/away from valid data
            T=0.35,  # tension factor
            V="e",  # error messages only
            outfile=outfile,
        )
        # print(pygmt.grdinfo(outfile))

    # Move files into new folder if requested
    paths: list = [f"{z_var}{_placename}_cycle_{cycle}.nc" for cycle in cycles]
    if folder:
        paths: list = [
            shutil.move(src=path, dst=os.path.join(folder, path))
            for path in paths
        ]

    # Stack several NetCDF grids into one NetCDF along the time cycle axis
    dataset: xr.Dataset = xr.open_mfdataset(
        paths=paths,
        combine="nested",
        concat_dim=[pd.Index(data=cycles, name="cycle_number")],
        attrs_file=paths[-1],
    )

    return dataset
コード例 #25
0
def test_info_nearest_multiple():
    """
    Make sure the nearest_multiple option works.
    """
    output = info(table=POINTS_DATA, nearest_multiple=0.1)
    npt.assert_allclose(actual=output, desired=[11.5, 61.8, 0.1])
コード例 #26
0
ファイル: scatter3d.py プロジェクト: weiji14/pygmt
the vertical exaggeration factor.
"""

import pandas as pd
import pygmt

# Load sample iris data and convert 'species' column to categorical dtype
df = pd.read_csv("https://github.com/mwaskom/seaborn-data/raw/master/iris.csv")
df.species = df.species.astype(dtype="category")

# Use pygmt.info to get region bounds (xmin, xmax, ymin, ymax, zmin, zmax)
# The below example will return a numpy array [0.0, 3.0, 4.0, 8.0, 1.0, 7.0]
region = pygmt.info(
    data=df[["petal_width", "sepal_length", "petal_length"]],  # x, y, z columns
    per_column=True,  # report the min/max values per column as a numpy array
    # round the min/max values of the first three columns to the nearest
    # multiple of 1, 2 and 0.5, respectively
    spacing=(1, 2, 0.5),
)

# Make a 3D scatter plot, coloring each of the 3 species differently
fig = pygmt.Figure()

# Define a colormap to be used for three categories, define the range of the
# new discrete CPT using series=(lowest_value, highest_value, interval), use
# color_model="+cSetosa,Versicolor,Virginica" to write the discrete color
# palette "cubhelix" in categorical format and add the species names as
# annotations for the colorbar
pygmt.makecpt(
    cmap="cubhelix", color_model="+cSetosa,Versicolor,Virginica", series=(0, 2, 1)
)
コード例 #27
0
def test_info_spacing():
    """
    Make sure the spacing option works.
    """
    output = info(table=POINTS_DATA, spacing=0.1)
    npt.assert_allclose(actual=output, desired=[11.5, 61.8, -3, 7.9])
コード例 #28
0
data = [
    ["20200712", 1000],
    ["20200714", 1235],
    ["20200716", 1336],
    ["20200719", 1176],
    ["20200721", 1573],
    ["20200724", 1893],
    ["20200729", 1634],
]
df = pd.DataFrame(data, columns=["Date", "Score"])
df.Date = pd.to_datetime(df["Date"], format="%Y%m%d")

fig = pygmt.Figure()
region = pygmt.info(
    table=df[["Date", "Score"]], per_column=True, spacing=(700, 700), coltypes="T"
)

fig.plot(
    region=region,
    projection="X15c/10c",
    frame=["WSen", "afg"],
    x=df.Date,
    y=df.Score,
    style="c0.4c",
    pen="1p",
    color="green3",
)

fig.show()
コード例 #29
0
ファイル: points_categorical.py プロジェクト: noorbuchi/pygmt
method.
"""

import pandas as pd
import pygmt

# Load sample penguins data and convert 'species' column to categorical dtype
df = pd.read_csv("https://github.com/mwaskom/seaborn-data/raw/master/penguins.csv")
df.species = df.species.astype(dtype="category")

# Use pygmt.info to get region bounds (xmin, xmax, ymin, ymax)
# The below example will return a numpy array like [30.0, 60.0, 12.0, 22.0]
region = pygmt.info(
    table=df[["bill_length_mm", "bill_depth_mm"]],  # x and y columns
    per_column=True,  # report the min/max values per column as a numpy array
    # round the min/max values of the first two columns to the nearest multiple
    # of 3 and 2, respectively
    spacing=(3, 2),
)

# Make a 2D categorical scatter plot, coloring each of the 3 species differently
fig = pygmt.Figure()

# Generate a basemap of 10 cm x 10 cm size
fig.basemap(
    region=region,
    projection="X10c/10c",
    frame=[
        'xafg+l"Bill length (mm)"',
        'yafg+l"Bill depth (mm)"',
        'WSen+t"Penguin size at Palmer Station"',
コード例 #30
0
the vertical exaggeration factor.
"""

import pandas as pd
import pygmt

# Load sample iris data, and convert 'species' column to categorical dtype
df = pd.read_csv("https://github.com/mwaskom/seaborn-data/raw/master/iris.csv")
df["species"] = df.species.astype(dtype="category")

# Use pygmt.info to get region bounds (xmin, xmax, ymin, ymax, zmin, zmax)
# The below example will return a numpy array like [0., 3., 4., 8., 1., 7.]
region = pygmt.info(
    table=df[["petal_width", "sepal_length",
              "petal_length"]],  # x, y, z columns
    per_column=True,  # report output as a numpy array
    spacing=
    "1/2/0.5",  # rounds x, y and z intervals by 1, 2 and 0.5 respectively
)

# Make our 3D scatter plot, coloring each of the 3 species differently
fig = pygmt.Figure()
pygmt.makecpt(cmap="cubhelix", color_model="+c", series=(0, 3, 1))
fig.plot3d(
    x=df.petal_width,
    y=df.sepal_length,
    z=df.petal_length,
    sizes=0.1 *
    df.sepal_width,  # Vary each symbol size according to a data column
    color=df.species.cat.codes.astype(
        int),  # Points colored by categorical number code