def geometry(): """ Return a hand-crafted Geometry object. """ table = pd.DataFrame(index=range(24), columns=const.THCOLS) # elevations table.loc[:, 'REC_ELEV'] = 100 table.loc[:, 'SOU_ELEV'] = 200 table.loc[:, 'DEPTH'] = 300 table.loc[:, 'REC_DATUM'] = 400 table.loc[:, 'SOU_DATUM'] = 500 table.loc[:, 'SOU_H2OD'] = 600 table.loc[:, 'REC_H2OD'] = 700 # coordinates table.loc[:, 'SOU_X'] = -100 table.loc[:, 'SOU_Y'] = -100 table.loc[:, 'REC_X'] = -100 table.loc[:, 'REC_Y'] = -100 table.loc[:, 'CDP_X'] = -100 table.loc[:, 'CDP_Y'] = -100 table.fillna(0, inplace=True) g = Geometry() g._df = table return g
def __init__(self): self.tfh = TextualFileHeader() self.bfh = BinaryFileHeader() self.g = Geometry() self.dm = DataMatrix() self.file = None
def filter(self, header, first, last, step): """ Return a new DM filtered in a way that header = range(first, last + 1, step). Args: header (str): Header name to filter by. first (float): First value of the header. last (float): Last value of the header (inclusive). step (float): Step of the header. Returns: A new DataMatrix object. """ new = DataMatrix() new.dt = self.dt new.t = np.copy(self.t) subset = self._headers._df.loc[self._headers._df[header] >= first] subset = subset.loc[subset[header] <= last] subset = subset.loc[(subset[header] - first) % step == 0] new._m = self._m[subset.index] new._headers = Geometry() new._headers._df = subset.reset_index().drop('index', axis=1) new._headers._df.TRACENO = new._headers._df.index + 1 new._headers._df.SEQNO = new._headers._df.index + 1 return new
def crop(self, t, inplace=False): """ Return a new DM with new trace length. Args: t: New trace length. inplace (bool): If True, modify this DM instead of returning a new one. Returns: A new DataMatrix object. """ if inplace: self._m = self._m[:, self.t <= t] self.t = self.t[self.t <= t] else: new = DataMatrix() new.dt = self.dt new.t = self.t[self.t <= t] new._m = self._m[:, self.t <= t] new._headers = Geometry() new._headers._df = self._headers._df.copy() return new
def resample(self, dt): """ Return a new DM with a bigger dt. Args: dt: New dt. Returns: A new DataMatrix object. """ nth = int(dt / self.dt) if nth != dt / self.dt: raise ValueError(f"Can't transform dt={self.dt} into dt={dt}!") new = DataMatrix() new.dt = dt new.t = self.t[::nth] new._m = self._m[:, ::nth] new._headers = Geometry() new._headers._df = self._headers._df.copy() return new
def test_loading_from_file(manually_crafted_segy_file): """ Test that Geometry object loads from file correctly. """ g = Geometry.load(manually_crafted_segy_file) # geometry should behave like a pd.DataFrame. # main way to access data is .loc property # check that all the headers are correctly loaded assert np.alltrue(g.loc[:, 'TRACENO'] == np.arange(1, 25, 1)) assert np.alltrue(g.loc[:, 'fTRACENO'] == np.arange(1, 25, 1)) assert np.alltrue(g.loc[:, 'FFID'] == 375) assert np.alltrue(g.loc[:, 'ELEVSC'] == -100) assert np.alltrue(g.loc[:, 'COORDSC'] == -100) assert np.alltrue(g.loc[:, 'NUMSMP'] == 512) assert np.alltrue(g.loc[:, 'DT'] == 500) assert np.alltrue(g.loc[:, 'YEAR'] == 1984) assert np.alltrue(g.loc[:, 'HOUR'] == 10) assert np.alltrue(g.loc[:, 'MINUTE'] == 51) # scalars to coordinates and elevations should be applied automatically assert np.alltrue(g.loc[:, 'SOU_X'] == 50) assert np.alltrue(g.loc[:, 'SOU_Y'] == 75) assert np.alltrue(g.loc[:, 'REC_X'] == np.arange(0, 48, 2)) assert np.alltrue(g.loc[:, 'REC_Y'] == -1) assert np.alltrue(g.loc[:, 'CDP_X'] == np.arange(25, 49, 1)) assert np.alltrue(g.loc[:, 'CDP_Y'] == 37) # there should be no NaN values in the Geometry assert np.all(g._df.notna())
def extract_by_indices(self, indices): """ Return a new DM, constructed from traces extracted by given indices. """ new = DataMatrix() new.dt = self.dt new.t = np.copy(self.t) new._m = np.copy(self._m[indices]) new._headers = Geometry() new._headers._df = self._headers.loc[indices, :].copy() return new
class SegY: """ This object represents a SEG-Y file. """ def __init__(self): self.tfh = TextualFileHeader() self.bfh = BinaryFileHeader() self.g = Geometry() self.dm = DataMatrix() self.file = None def save(self, file: str): """ Save the SegY to a file. Args: file (str): Path to the file. Notes: By default, float32 matrices will be saved as non-IBM floats. To save as IBM instead, manually set 'sample_format' BFH value to 1. """ header_fs = '>' + const.THFS sfc = self.bfh['sample_format'] self.g._apply_scalars_before_packing() with open(file, 'bw') as sgy: sgy.write(self.tfh._contents.encode('cp500')) bfh_values = self.bfh._dict.values() raw_bfh = struct.pack('>' + const.BFHFS, *bfh_values) sgy.write(raw_bfh) if sfc == 1: for i in range(self.bfh['no_traces']): raw_th = bytearray(240) raw_th[:232] = struct.pack( header_fs, *self.g.loc[i, :].values.astype(int)) sgy.write(raw_th) raw_trace = gfunc.pack_ibm32_series(self.dm._m[i], '>') sgy.write(raw_trace) else: trace_fs = '>' + const.SFC[sfc][1] * self.bfh[ 'samples_per_trace'] for i in range(self.bfh['no_traces']): raw_th = bytearray(240) raw_th[:232] = struct.pack( header_fs, *self.g.loc[i, :].values.astype(int)) sgy.write(raw_th) raw_trace = struct.pack(trace_fs, *self.dm._m[i]) sgy.write(raw_trace) self.g._apply_scalars_after_unpacking() @classmethod def load(cls, file: str): """ Load the SEG-Y file. """ segy = cls() with open(file, 'br') as sgy: endian = gfunc.grab_endiannes(sgy) sfc = gfunc.grab_sample_format_code(sgy) nt = gfunc.grab_number_of_traces(sgy) tl = gfunc.grab_trace_length(sgy) si = gfunc.grab_sample_interval(sgy) ss, fl, _ = const.SFC[sfc] dtype = const.DTYPEMAP[sfc] raw_tfh = sgy.read(3200) raw_bfh = sgy.read(400) header_data = np.empty(shape=(nt, 90), dtype=np.int32) segy.dm._m = np.empty(shape=(nt, tl), dtype=dtype) if sfc == 1: # IBM is a special case for i in range(nt): raw_header = sgy.read(240) header = struct.unpack(endian + const.THFS, raw_header[:232]) header_data[i] = header raw_trace = sgy.read(ss * tl) trace_values = gfunc.unpack_ibm32_series(raw_trace, endian) segy.dm._m[i] = trace_values else: trace_fs = endian + fl * tl for i in range(nt): raw_header = sgy.read(240) header = struct.unpack(endian + const.THFS, raw_header[:232]) header_data[i] = header raw_trace = sgy.read(ss * tl) trace_values = struct.unpack(trace_fs, raw_trace) segy.dm._m[i] = trace_values segy.tfh._contents = raw_tfh.decode('cp500') bfh_values = struct.unpack(endian + const.BFHFS, raw_bfh) segy.bfh._dict = dict(zip(const.BFHCOLS, bfh_values)) segy.bfh['no_traces'] = nt segy.g._df = pd.DataFrame(header_data, index=range(nt), columns=const.THCOLS) segy.g._apply_scalars_after_unpacking() segy.dm.dt = si segy.dm.t = np.arange(0, si * tl / 1000, si / 1000) segy.dm._headers = segy.g segy.file = file.split('/')[-1] return segy @classmethod def from_matrix(cls, matrix, sample_interval=500): """ Create a SegY object from a matrix. Args: matrix: A numpy matrix where each row is a trace and each column is a sample. sample_interval (int): Sample interval in microseconds. Notes: The needed sample format is detected from the matrix's dtype property. """ segy = cls() segy.dm._m = matrix segy.dm.dt = sample_interval segy.dm.t = np.arange(0, sample_interval * matrix.shape[1] / 1000, sample_interval / 1000) segy.bfh['sample_format'] = const.IDTYPEMAP[matrix.dtype.name] segy.bfh['sample_interval'] = sample_interval segy.bfh['samples_per_trace'] = matrix.shape[1] segy.bfh['measurement_system'] = 1 segy.bfh['byte_offset_of_data'] = 3600 segy.bfh['no_traces'] = matrix.shape[0] segy.g._df = pd.DataFrame(index=range(matrix.shape[0]), columns=const.THCOLS) segy.g.loc[:, 'TRACENO'] = np.arange(1, matrix.shape[0] + 1, 1) segy.g.loc[:, 'FFID'] = 1 segy.g.loc[:, 'CHAN'] = np.arange(1, matrix.shape[0] + 1, 1) segy.g.loc[:, 'ELEVSC'] = -100 segy.g.loc[:, 'COORDSC'] = -100 segy.g.loc[:, 'NUMSMP'] = matrix.shape[1] segy.g.loc[:, 'DT'] = sample_interval segy.g._df.fillna(0, inplace=True) return segy @classmethod def from_data_matrix(cls, dm): """ Create a new SegY object from a DataMatrix obect. """ segy = cls() segy.dm = dm segy.bfh['sample_format'] = const.IDTYPEMAP[dm._m.dtype.name] segy.bfh['sample_interval'] = dm.dt segy.bfh['samples_per_trace'] = dm._m.shape[1] segy.bfh['measurement_system'] = 1 segy.bfh['byte_offset_of_data'] = 3600 segy.bfh['no_traces'] = dm._m.shape[0] segy.g._df = dm._headers._df.copy() return segy