def _3dsegy_loader( segyfile, head_df, head_bin, ncfile=None, iline=189, xline=193, offset=None, vert_domain="TWT", data_type="AMP", crop=None, zcrop=None, silent=False, return_geometry=False, **segyio_kwargs, ): """Convert SEGY data to Xarray or Netcdf4 This is a helper function for segy_loader. Users should use that function directly to load all segy data. """ # get names of columns where stuff we want is il_head_loc = _get_tf(iline) xl_head_loc = _get_tf(xline) # get vertical sample ranges n0 = 0 nsamp = head_bin["Samples"] ns0 = head_df.DelayRecordingTime.min() # short way to get inlines/xlines ilines = head_df[il_head_loc].unique() xlines = head_df[xl_head_loc].unique() inlines = np.sort(ilines) xlines = np.sort(xlines) iline_index_map = {il: i for i, il in enumerate(ilines)} xline_index_map = {xl: i for i, xl in enumerate(xlines)} head_df["il_index"] = head_df[il_head_loc].replace(iline_index_map) head_df["xl_index"] = head_df[xl_head_loc].replace(xline_index_map) # binary header translation ns = head_bin["Samples"] ds = head_bin["Interval"] / 1000.0 msys = _SEGY_MEASUREMENT_SYSTEM[head_bin["MeasurementSystem"]] # for offset if offset is not None: off_head_loc = _get_tf(offset) offsets = head_df[off_head_loc].unique() offsets = np.sort(offsets) offset_index_map = {off: i for i, off in enumerate(offsets)} head_df["off_index"] = head_df[off_head_loc].replace(offset_index_map) else: offsets = None if zcrop is not None: zcrop = check_zcrop(zcrop, [0, ns]) n0, ns = zcrop ns0 = ds * n0 nsamp = ns - n0 + 1 vert_samples = np.arange(ns0, ns0 + ds * nsamp, ds, dtype=int) builder, domain = _dataset_coordinate_helper(vert_samples, vert_domain, iline=ilines, xline=xlines, offset=offsets) ds = create_seismic_dataset(**builder) # create_seismic_dataset(d1=ni, d2=nx, d3=nsamp) text = get_segy_texthead(segyfile, **segyio_kwargs) ds.attrs[AttrKeyField.text.value] = text if ncfile is not None and return_geometry == False: ds.seisio.to_netcdf(ncfile) elif return_geometry: # return geometry -> e.g. don't process segy traces return ds else: ncfile = ds segyio_kwargs.update(dict(ignore_geometry=True, iline=iline, xline=xline)) # not prestack data if offset is None and not isinstance(ncfile, xr.Dataset): ds = _segy3d_ncdf( segyfile, ncfile, segyio_kwargs, n0, ns, head_df, il_head_loc, xl_head_loc, vert_domain=vert_domain, silent=silent, ) # not prestack data load into memory if offset is None and isinstance(ncfile, xr.Dataset): ds = _segy3d_xr( segyfile, ncfile, segyio_kwargs, n0, ns, head_df, il_head_loc, xl_head_loc, vert_domain=vert_domain, silent=silent, ) # prestack data if offset is not None and not isinstance(ncfile, xr.Dataset): ds = _segy3dps_ncdf( segyfile, ncfile, segyio_kwargs, n0, ns, head_df, il_head_loc, xl_head_loc, vert_domain=vert_domain, silent=silent, ) # prestack data load into memory if offset is not None and isinstance(ncfile, xr.Dataset): ds = _segy3dps_xr( segyfile, ncfile, segyio_kwargs, n0, ns, head_df, il_head_loc, xl_head_loc, vert_domain=vert_domain, silent=silent, ) return ds
def _2dsegy_loader( segyfile, head_df, head_bin, ncfile=None, cdp=None, offset=None, vert_domain="TWT", data_type="AMP", crop=None, zcrop=None, silent=False, return_geometry=False, **segyio_kwargs, ): """Convert SEGY data to Xarray or Netcdf4 This is a helper function for segy_loader. Users should use that function directly to load all segy data. """ # get names of columns where stuff we want is if cdp is None: cdp = 21 cdp_head_loc = _get_tf(cdp) head_df[cdp_head_loc] = head_df.index.values else: cdp_head_loc = _get_tf(cdp) # get vertical sample ranges n0 = 0 nsamp = head_bin["Samples"] ns0 = head_df.DelayRecordingTime.min() # short way to get cdps cdps = head_df[cdp_head_loc].unique() cdps = np.sort(cdps) head_df["cdp_index"] = _header_to_index_mapping(head_df[cdp_head_loc]) # binary header translation nsamp = head_bin["Samples"] sample_rate = head_bin["Interval"] / 1000.0 msys = _SEGY_MEASUREMENT_SYSTEM[head_bin["MeasurementSystem"]] # for offset if offset is not None: off_head_loc = _get_tf(offset) offsets = head_df[off_head_loc].unique() offsets = np.sort(offsets) head_df["off_index"] = _header_to_index_mapping(head_df[off_head_loc]) else: offsets = None if zcrop is not None: zcrop = check_zcrop(zcrop, [0, nsamp]) n0, nsamp = zcrop ns0 = sample_rate * n0 nsamp = nsamp - n0 + 1 vert_samples = np.arange(ns0, ns0 + sample_rate * nsamp, sample_rate, dtype=int) builder, domain = _dataset_coordinate_helper(vert_samples, vert_domain, cdp=cdps, offset=offsets) ds = create_seismic_dataset(**builder) # create_seismic_dataset(d1=ni, d2=nx, d3=nsamp) text = get_segy_texthead(segyfile, **segyio_kwargs) ds.attrs[AttrKeyField.text.value] = text if ncfile is not None and return_geometry == True: ds.seisio.to_netcdf(ncfile) return ds elif return_geometry: return ds segyio_kwargs.update(dict(ignore_geometry=True)) # stacked data if offset is None: ds = _segy2d_xr( segyfile, ds, segyio_kwargs, n0, nsamp, head_df, cdp_head_loc, vert_domain=vert_domain, silent=silent, ) # # prestack data if offset is not None: ds = _segy2d_ps_xr( segyfile, ds, segyio_kwargs, n0, nsamp, head_df, cdp_head_loc, vert_domain=vert_domain, silent=silent, ) if ncfile is not None: ds.seisio.to_netcdf(ncfile) return ds
def segy2ncdf(segyfile, ncfile, CMP=False, iline=189, xline=193, cdpx=181, cdpy=185, vert='TWT', units='AMP', crop=None, zcrop=None, silent=False): """Convert SEGY data to NetCDF4 File The output ncfile has the following structure Dimensions: vert - The vertical axis iline - Inline axis xline - Xline axis Variables: INLINE_3D - The inline numbering CROSSLINE_3D - The xline numbering CDP_X - Eastings CDP_Y - Northings CDP_TRACE - Trace Number data - The data volume Attributes: vert.units vert.data.units ns - Number of samples in vert ds - Sample rate Args: segyfile (str): Input segy file path ncfile (str): Output SEISNC file path. iline (int): Inline byte location. xline (int): Cross-line byte location. vert (str): Vertical sampling domain. units (str): Units of amplitude data. crop (list): List of minimum and maximum inline and crossline to output. Has the form '[min_il, max_il, min_xl, max_xl]'. zcrop (list): List of minimum and maximum vertical samples to output. Has the form '[min, max]'. silent (bool): Disable progress bar. """ head_df = segy_header_scrape(segyfile) head_bin = segy_bin_scrape(segyfile) # get names of columns where stuff we want is il_head_loc = str(segyio.TraceField(iline)) xl_head_loc = str(segyio.TraceField(xline)) x_head_loc = str(segyio.TraceField(cdpx)) y_head_loc = str(segyio.TraceField(cdpy)) # calculate vert, inline and crossline ranges/meshgrids il0 = head_df[il_head_loc].min() iln = head_df[il_head_loc].max() xl0 = head_df[xl_head_loc].min() xln = head_df[xl_head_loc].max() n0 = 0 nsamp = head_df.TRACE_SAMPLE_COUNT.min() ns0 = head_df.DelayRecordingTime.min() coord_scalar = head_df.SourceGroupScalar.median() coord_scalar_sign = coord_scalar / abs(coord_scalar) coord_scalar_mult = np.power(abs(coord_scalar), coord_scalar_sign) dil = np.max(head_df[il_head_loc].values[1:] - head_df[il_head_loc].values[:-1]) dxl = np.max(head_df[xl_head_loc].values[1:] - head_df[xl_head_loc].values[:-1]) if crop is not None: crop = check_crop(crop, [il0, iln, xl0, xln]) il0, iln, xl0, xln = crop # first and last values ni = 1 + (iln - il0) // dil nx = 1 + (xln - xl0) // dxl # binary header translation ns = head_bin['Samples'] ds = head_bin['Interval'] msys = _SEGY_MEASUREMENT_SYSTEM[head_bin['MeasurementSystem']] if zcrop is not None: zcrop = check_zcrop(zcrop, [0, ns]) n0, ns = zcrop ns0 = ds * n0 nsamp = ns - n0 + 1 create_empty_seisnc(ncfile, (ni, nx, nsamp)) set_seisnc_dims(ncfile, first_sample=ns0, sample_rate=ds // 1000, first_iline=il0, iline_step=dil, first_xline=xl0, xline_step=dxl, vert_domain=vert, measurement_system=msys) text = get_segy_texthead(segyfile) with segyio.open(segyfile, 'r', ignore_geometry=True, iline=iline, xline=xline) as segyf, \ netCDF4.Dataset(ncfile, "a", format="NETCDF4") as seisnc: seisnc.text = text #assign CDPXY query = f"{il_head_loc} >= @il0 & {il_head_loc} <= @iln & {xl_head_loc} >= @xl0 and {xl_head_loc} <= @xln" cdpx = head_df.query(query)[[il_head_loc, xl_head_loc, x_head_loc ]].pivot(il_head_loc, xl_head_loc).values cdpy = head_df.query(query)[[il_head_loc, xl_head_loc, y_head_loc ]].pivot(il_head_loc, xl_head_loc).values seisnc['CDP_X'][:, :] = cdpx * coord_scalar_mult seisnc['CDP_Y'][:, :] = cdpy * coord_scalar_mult segyf.mmap() # load trace temp_line = np.full((nx, nsamp), np.nan, float) cur_iline = head_df[il_head_loc][0] pb = tqdm(total=segyf.tracecount, desc="Converting SEGY", disable=silent) for n, trc in enumerate(segyf.trace): cxl = head_df[xl_head_loc][n] cil = head_df[il_head_loc][n] if cxl < xl0 or cxl > xln or cil < il0 or cil > iln: pb.update() continue cur_xline = (cxl - xl0) // dxl temp_line[cur_xline, :] = trc[n0:ns + 1] if cil > cur_iline: cur_iline = cil seisnc['data'][(cur_iline - il0) / dil, :, :] = temp_line temp_line[:, :] = np.nan pb.update() pb.close()