Beispiel #1
0
def test_tools_check_crop():
    assert [5, 10, 25, 28] == tools.check_crop(IN_CROP, IN_LIMITS)
Beispiel #2
0
import pytest
from hypothesis import given
from hypothesis.strategies import integers, text, floats

import numpy as np

from segysak import tools

IN_CROP = [5, 10, 25, 35]
IN_LIMITS = [0, 20, 0, 28]


def test_tools_check_crop():
    assert [5, 10, 25, 28] == tools.check_crop(IN_CROP, IN_LIMITS)


@given(floats(-10_000, 10_000), floats(-10_000, 10_000), integers(2, 1000))
def test_halfsample(a, b, n):
    test_array = np.linspace(a, b, n)
    truth_array = np.linspace(a, b, n * 2 - 1)
    assert np.allclose(tools.halfsample(test_array), truth_array)


if __name__ == "__main__":
    c = tools.check_crop(IN_CROP, IN_LIMITS)
    print(c)
Beispiel #3
0
def _loader_converter_header_handling(
    segyfile,
    cdp=None,
    iline=None,
    xline=None,
    cdpx=None,
    cdpy=None,
    offset=None,
    vert_domain="TWT",
    data_type="AMP",
    ix_crop=None,
    cdp_crop=None,
    xy_crop=None,
    z_crop=None,
    return_geometry=False,
    silent=False,
    extra_byte_fields=None,
    head_df=None,
    **segyio_kwargs,
):

    if head_df is None:
        # Start by scraping the headers.
        head_df = segy_header_scrape(segyfile, silent=silent, **segyio_kwargs)

    head_bin = segy_bin_scrape(segyfile, **segyio_kwargs)
    head_loc = AttrDict(
        dict(cdp=cdp,
             offset=offset,
             iline=iline,
             xline=xline,
             cdpx=cdpx,
             cdpy=cdpy))

    if all(map(lambda x: x is None, (cdp, iline, xline, offset))):
        # lets try and guess the data types if no hints given
        try:
            head_loc.update(what_geometry_am_i(head_df))
        except (KeyError, ValueError):
            print("Couldn't determine geometry, will load traces as flat 2D.")
            pass

    head_loc = AttrDict({
        key: (_get_tf(val) if val is not None else None)
        for key, val in head_loc.items()
    })

    if all(v is not None for v in (head_loc.cdpx, head_loc.cdpy)):
        extra_byte_fields[CoordKeyField.cdp_x] = head_loc.cdpx
        extra_byte_fields[CoordKeyField.cdp_y] = head_loc.cdpy

        # Scale Coordinates
        coord_scalar = head_df.SourceGroupScalar.median()
        coord_scalar_mult = np.power(abs(coord_scalar), np.sign(coord_scalar))
        head_df[head_loc.cdpx] = head_df[head_loc.cdpx].astype(np.float32)
        head_df[head_loc.cdpy] = head_df[head_loc.cdpy].astype(np.float32)
        head_df[
            head_loc.cdpx] = head_df[head_loc.cdpx] * coord_scalar_mult * 1.0
        head_df[
            head_loc.cdpy] = head_df[head_loc.cdpy] * coord_scalar_mult * 1.0

    # TODO: might need to scale offsets as well?

    # Cropping
    if cdp_crop and cdp is not None:  # 2d cdp cropping
        crop_min, crop_max = check_crop(
            cdp_crop,
            [head_df[head_loc.cdp].min(), head_df[head_loc.cdp].max()])

        head_df = head_df.query(
            "@head_loc.cdp >= @crop_min & @head_loc.cdp <= @crop_max")

    if ix_crop is not None and cdp is None:  # 3d inline/xline cropping
        il_min, il_max, xl_min, xl_max = check_crop(
            ix_crop,
            [
                head_df[head_loc.iline].min(),
                head_df[head_loc.iline].max(),
                head_df[head_loc.xline].min(),
                head_df[head_loc.xline].max(),
            ],
        )
        query = " & ".join([
            f"{head_loc.iline} >= @il_min",
            f"{head_loc.iline} <= @il_max",
            f"{head_loc.xline} >= @xl_min",
            f"{head_loc.xline} <= @xl_max",
        ])
        head_df = head_df.query(query).copy(deep=True)

    # TODO: -> Could implement some cropping with a polygon here
    if xy_crop is not None and cdp is None:
        x_min, x_max, y_min, y_max = check_crop(
            xy_crop,
            [
                head_df[head_loc.cdpx].min(),
                head_df[head_loc.cdpx].max(),
                head_df[head_loc.cdpy].min(),
                head_df[head_loc.cdpy].max(),
            ],
        )
        query = " & ".join([
            f"{head_loc.cdpx} >= @x_min",
            f"{head_loc.cdpx} <= @x_max",
            f"{head_loc.cdpy} >= @y_min",
            f"{head_loc.cdpy} <= @y_max",
        ])
        head_df = head_df.query(query).copy(deep=True)

    return head_df, head_bin, head_loc
Beispiel #4
0
def segy_loader(
    segyfile,
    ncfile=None,
    cdp=None,
    iline=None,
    xline=None,
    cdpx=None,
    cdpy=None,
    offset=None,
    vert_domain="TWT",
    data_type="AMP",
    ix_crop=None,
    cdp_crop=None,
    xy_crop=None,
    z_crop=None,
    return_geometry=False,
    silent=False,
    extra_byte_fields=None,
    **segyio_kwargs,
):
    """Convert SEGY data to NetCDF4 File

    The output ncfile has the following structure
        Dimensions:
            d1 - CDP or Inline axis
            d2 - Xline axis
            d3 - The vertical axis
            d4 - Offset/Angle Axis
        Coordinates:
            iline - The inline numbering
            xline - The xline numbering
            cdp_x - Eastings
            cdp_y - Northings
            cdp - Trace Number for 2d
        Variables
            data - The data volume
        Attributes:
            TBC

    Args:
        segyfile (str): Input segy file path
        ncfile (str, optional): Output SEISNC file path. If none the loaded data will be
            returned in memory as an xarray.Dataset.
        iline (int, optional): Inline byte location, usually 189
        xline (int, optional): Cross-line byte location, usally 193
        vert (str, optional): Vertical sampling domain. One of ['TWT', 'DEPTH']. Defaults to 'TWT'.
        cdp (int, optional): The CDP byte location, usually 21.
        data_type (str, optional): Data type ['AMP', 'VEL']. Defaults to 'AMP'.
        cdp_crop (list, optional): List of minimum and maximum cmp values to output.
            Has the form '[min_cmp, max_cmp]'. Ignored for 3D data.
        ix_crop (list, optional): List of minimum and maximum inline and crossline to output.
            Has the form '[min_il, max_il, min_xl, max_xl]'. Ignored for 2D data.
        xy_crop (list, optional): List of minimum and maximum cdp_x and cdp_y to output.
            Has the form '[min_x, max_x, min_y, max_y]'. Ignored for 2D data.
        z_crop (list, optional): List of minimum and maximum vertical samples to output.
            Has the form '[min, max]'.
        return_geometry (bool, optional): If true returns an xarray.dataset which doesn't contain data but mirrors
            the input volume header information.
        extra_byte_fields (list/mapping): A list of int or mapping of byte fields that should be returned as variables in the dataset.
        silent (bool): Disable progress bar.
        **segyio_kwargs: Extra keyword arguments for segyio.open

    Returns:
        xarray.Dataset: If ncfile keyword is specified returns open handle to disk netcdf4,
            otherwise the data in memory. If return_geometry is True does not load trace data and
            returns headers in geometry.
    """
    # Input sanity checks
    if cdp is not None and (iline is not None or xline is not None):
        raise ValueError("cdp cannot be defined with iline and xiline")

    if iline is None and xline is not None:
        raise ValueError("iline must be defined with xline")

    if xline is None and iline is not None:
        raise ValueError("xline must be defined with iline")

    if isinstance(extra_byte_fields, list):
        extra_byte_fields = {
            _get_tf(field): _get_tf(field)
            for field in extra_byte_fields
        }
    elif isinstance(extra_byte_fields, dict):
        extra_byte_fields = {
            key: _get_tf(field)
            for key, field in extra_byte_fields.items()
        }
    elif extra_byte_fields is None:
        extra_byte_fields = dict()
    else:
        raise ValueError("Unknown type for extra_byte_fields")

    if cdpx is None:
        cdpx = 181  # Assume standard location if misisng
    x_head_loc = _get_tf(cdpx)
    if cdpy is None:
        cdpy = 185  # Assume standard location if missing
    y_head_loc = _get_tf(cdpy)

    extra_byte_fields[CoordKeyField.cdp_x.value] = x_head_loc
    extra_byte_fields[CoordKeyField.cdp_y.value] = y_head_loc

    # Start by scraping the headers.
    head_df = segy_header_scrape(segyfile, silent=silent, **segyio_kwargs)
    head_bin = segy_bin_scrape(segyfile, **segyio_kwargs)

    # Scale Coordinates
    coord_scalar = head_df.SourceGroupScalar.median()
    coord_scalar_mult = np.power(abs(coord_scalar), np.sign(coord_scalar))
    head_df[x_head_loc] = head_df[x_head_loc].astype(float)
    head_df[y_head_loc] = head_df[y_head_loc].astype(float)
    head_df[x_head_loc] = head_df[x_head_loc] * coord_scalar_mult * 1.0
    head_df[y_head_loc] = head_df[y_head_loc] * coord_scalar_mult * 1.0

    # TODO: might need to scale offsets as well?

    # Cropping
    if cdp_crop and cdp is not None:  # 2d cdp cropping
        cmp_head_loc = _get_tf(cdp)
        crop_min, crop_max = check_crop(
            cdp_crop,
            [head_df[cmp_head_loc].min(), head_df[cmp_head_loc].max()])

        head_df = head_df.query(
            "@cmp_head_loc >= @crop_min & @cmp_head_loc <= @crop_max")

    if ix_crop is not None and cmp is None:  # 3d inline/xline cropping
        il_head_loc = _get_tf(iline)
        xl_head_loc = _get_tf(xline)
        il_min, il_max, xl_min, xl_max = check_crop(
            ix_crop,
            [
                head_df[il_head_loc].min(),
                head_df[il_head_loc].max(),
                head_df[xl_head_loc].min(),
                head_df[xl_head_loc].max(),
            ],
        )
        query = f"@il_head_loc >= @il_min & @il_head_loc <= @il_max & @xl_head_loc >= @xl_min and @xl_head_loc <= @xl_max"
        head_df = head_df.query(query)

    # TODO: -> Could implement some cropping with a polygon here
    if xy_crop is not None and cdp is None:
        x_min, x_max, y_min, y_max = check_crop(
            xy_crop,
            [
                head_df[x_head_loc].min(),
                head_df[x_head_loc].max(),
                head_df[y_head_loc].min(),
                head_df[y_head_loc].max(),
            ],
        )
        query = "@x_head_loc >= @x_min & x_head_loc <= @x_max & @y_head_loc >= @y_min and @y_head_loc <= @y_max"
        head_df = head_df.query(query)

    common_kwargs = dict(
        zcrop=z_crop,
        ncfile=ncfile,
        offset=offset,
        vert_domain=vert_domain,
        data_type=data_type,
        return_geometry=return_geometry,
        silent=silent,
    )

    # 3d data needs iline and xline
    if iline is not None and xline is not None:
        ds = _3dsegy_loader(
            segyfile,
            head_df,
            head_bin,
            iline=iline,
            xline=xline,
            **common_kwargs,
            **segyio_kwargs,
        )
        indexer = ["il_index", "xl_index"]
        dims = (DimensionKeyField.threed_head.value
                if offset is None else DimensionKeyField.threed_ps_head.value)

    # 2d data
    elif cdp is not None:
        ds = _2dsegy_loader(segyfile,
                            head_df,
                            head_bin,
                            cdp=cdp,
                            **common_kwargs,
                            **segyio_kwargs)
        indexer = ["cdp_index"]
        dims = (DimensionKeyField.twod_head.value
                if offset is None else DimensionKeyField.twod_ps_head.value)

    # fallbak to just a 2d array of traces
    else:
        ds = _2dsegy_loader(segyfile, head_df, head_bin, **common_kwargs,
                            **segyio_kwargs)
        indexer = []
        dims = DimensionKeyField.cdp_2d.value

    indexer = indexer + ["off_index"] if offset is not None else indexer

    # we have some some geometry to assign headers to
    if cdp is not None or iline is not None:
        head_ds = head_df.set_index(indexer).to_xarray()
        for key, field in extra_byte_fields.items():
            ds[key] = (dims, head_ds[field].values)
        ds = ds.set_coords(
            [CoordKeyField.cdp_x.value, CoordKeyField.cdp_y.value])
    # geometry is not known
    else:
        for key, field in extra_byte_fields.items():
            ds[key] = (dims, head_df[field].values)

    return ds
Beispiel #5
0
def segy2ncdf(segyfile,
              ncfile,
              CMP=False,
              iline=189,
              xline=193,
              cdpx=181,
              cdpy=185,
              vert='TWT',
              units='AMP',
              crop=None,
              zcrop=None,
              silent=False):
    """Convert SEGY data to NetCDF4 File

    The output ncfile has the following structure
        Dimensions:
            vert - The vertical axis
            iline - Inline axis
            xline - Xline axis
        Variables:
            INLINE_3D - The inline numbering
            CROSSLINE_3D - The xline numbering
            CDP_X - Eastings
            CDP_Y - Northings
            CDP_TRACE - Trace Number
            data - The data volume
        Attributes:
            vert.units
            vert.data.units
            ns - Number of samples in vert
            ds - Sample rate

    Args:
        segyfile (str): Input segy file path
        ncfile (str): Output SEISNC file path.
        iline (int): Inline byte location.
        xline (int): Cross-line byte location.
        vert (str): Vertical sampling domain.
        units (str): Units of amplitude data.
        crop (list): List of minimum and maximum inline and crossline to output.
            Has the form '[min_il, max_il, min_xl, max_xl]'.
        zcrop (list): List of minimum and maximum vertical samples to output.
            Has the form '[min, max]'.
        silent (bool): Disable progress bar.

    """
    head_df = segy_header_scrape(segyfile)
    head_bin = segy_bin_scrape(segyfile)

    # get names of columns where stuff we want is
    il_head_loc = str(segyio.TraceField(iline))
    xl_head_loc = str(segyio.TraceField(xline))
    x_head_loc = str(segyio.TraceField(cdpx))
    y_head_loc = str(segyio.TraceField(cdpy))

    # calculate vert, inline and crossline ranges/meshgrids
    il0 = head_df[il_head_loc].min()
    iln = head_df[il_head_loc].max()
    xl0 = head_df[xl_head_loc].min()
    xln = head_df[xl_head_loc].max()
    n0 = 0
    nsamp = head_df.TRACE_SAMPLE_COUNT.min()
    ns0 = head_df.DelayRecordingTime.min()
    coord_scalar = head_df.SourceGroupScalar.median()
    coord_scalar_sign = coord_scalar / abs(coord_scalar)
    coord_scalar_mult = np.power(abs(coord_scalar), coord_scalar_sign)

    dil = np.max(head_df[il_head_loc].values[1:] -
                 head_df[il_head_loc].values[:-1])
    dxl = np.max(head_df[xl_head_loc].values[1:] -
                 head_df[xl_head_loc].values[:-1])

    if crop is not None:
        crop = check_crop(crop, [il0, iln, xl0, xln])
        il0, iln, xl0, xln = crop

    # first and last values
    ni = 1 + (iln - il0) // dil
    nx = 1 + (xln - xl0) // dxl

    # binary header translation
    ns = head_bin['Samples']
    ds = head_bin['Interval']
    msys = _SEGY_MEASUREMENT_SYSTEM[head_bin['MeasurementSystem']]

    if zcrop is not None:
        zcrop = check_zcrop(zcrop, [0, ns])
        n0, ns = zcrop
        ns0 = ds * n0
        nsamp = ns - n0 + 1

    create_empty_seisnc(ncfile, (ni, nx, nsamp))
    set_seisnc_dims(ncfile,
                    first_sample=ns0,
                    sample_rate=ds // 1000,
                    first_iline=il0,
                    iline_step=dil,
                    first_xline=xl0,
                    xline_step=dxl,
                    vert_domain=vert,
                    measurement_system=msys)

    text = get_segy_texthead(segyfile)

    with segyio.open(segyfile, 'r', ignore_geometry=True, iline=iline, xline=xline) as segyf, \
      netCDF4.Dataset(ncfile, "a", format="NETCDF4") as seisnc:
        seisnc.text = text

        #assign CDPXY
        query = f"{il_head_loc} >= @il0 & {il_head_loc} <= @iln & {xl_head_loc} >= @xl0 and {xl_head_loc} <= @xln"
        cdpx = head_df.query(query)[[il_head_loc, xl_head_loc, x_head_loc
                                     ]].pivot(il_head_loc, xl_head_loc).values
        cdpy = head_df.query(query)[[il_head_loc, xl_head_loc, y_head_loc
                                     ]].pivot(il_head_loc, xl_head_loc).values
        seisnc['CDP_X'][:, :] = cdpx * coord_scalar_mult
        seisnc['CDP_Y'][:, :] = cdpy * coord_scalar_mult

        segyf.mmap()
        # load trace
        temp_line = np.full((nx, nsamp), np.nan, float)
        cur_iline = head_df[il_head_loc][0]
        pb = tqdm(total=segyf.tracecount,
                  desc="Converting SEGY",
                  disable=silent)
        for n, trc in enumerate(segyf.trace):
            cxl = head_df[xl_head_loc][n]
            cil = head_df[il_head_loc][n]
            if cxl < xl0 or cxl > xln or cil < il0 or cil > iln:
                pb.update()
                continue
            cur_xline = (cxl - xl0) // dxl
            temp_line[cur_xline, :] = trc[n0:ns + 1]
            if cil > cur_iline:
                cur_iline = cil
                seisnc['data'][(cur_iline - il0) / dil, :, :] = temp_line
                temp_line[:, :] = np.nan
            pb.update()
        pb.close()