Example #1
def geometry():
    """ Return a hand-crafted Geometry object. """

    table = pd.DataFrame(index=range(24), columns=const.THCOLS)

    # elevations
    table.loc[:, 'REC_ELEV'] = 100
    table.loc[:, 'SOU_ELEV'] = 200
    table.loc[:, 'DEPTH'] = 300
    table.loc[:, 'REC_DATUM'] = 400
    table.loc[:, 'SOU_DATUM'] = 500
    table.loc[:, 'SOU_H2OD'] = 600
    table.loc[:, 'REC_H2OD'] = 700

    # coordinates
    table.loc[:, 'SOU_X'] = -100
    table.loc[:, 'SOU_Y'] = -100
    table.loc[:, 'REC_X'] = -100
    table.loc[:, 'REC_Y'] = -100
    table.loc[:, 'CDP_X'] = -100
    table.loc[:, 'CDP_Y'] = -100

    table.fillna(0, inplace=True)

    g = Geometry()
    g._df = table

    return g
Example #2
    def __init__(self):
        self.tfh = TextualFileHeader()
        self.bfh = BinaryFileHeader()
        self.g = Geometry()
        self.dm = DataMatrix()

        self.file = None
Example #3
    def filter(self, header, first, last, step):
        """ Return a new DM filtered in a way that header = range(first, last + 1, step).

            header (str): Header name to filter by.
            first (float): First value of the header.
            last (float): Last value of the header (inclusive).
            step (float): Step of the header.

            A new DataMatrix object.


        new = DataMatrix()
        new.dt = self.dt
        new.t = np.copy(self.t)

        subset = self._headers._df.loc[self._headers._df[header] >= first]
        subset = subset.loc[subset[header] <= last]
        subset = subset.loc[(subset[header] - first) % step == 0]

        new._m = self._m[subset.index]
        new._headers = Geometry()
        new._headers._df = subset.reset_index().drop('index', axis=1)
        new._headers._df.TRACENO = new._headers._df.index + 1
        new._headers._df.SEQNO = new._headers._df.index + 1

        return new
Example #4
    def crop(self, t, inplace=False):
        """ Return a new DM with new trace length.

            t: New trace length.
            inplace (bool): If True, modify this DM instead of returning a new one.

            A new DataMatrix object.


        if inplace:
            self._m = self._m[:, self.t <= t]
            self.t = self.t[self.t <= t]
            new = DataMatrix()
            new.dt = self.dt
            new.t = self.t[self.t <= t]

            new._m = self._m[:, self.t <= t]
            new._headers = Geometry()
            new._headers._df = self._headers._df.copy()

            return new
Example #5
    def resample(self, dt):
        """ Return a new DM with a bigger dt.

            dt: New dt.

            A new DataMatrix object.


        nth = int(dt / self.dt)

        if nth != dt / self.dt:
            raise ValueError(f"Can't transform dt={self.dt} into dt={dt}!")

        new = DataMatrix()
        new.dt = dt
        new.t = self.t[::nth]

        new._m = self._m[:, ::nth]
        new._headers = Geometry()
        new._headers._df = self._headers._df.copy()

        return new
Example #6
def test_loading_from_file(manually_crafted_segy_file):
    """ Test that Geometry object loads from file correctly. """

    g = Geometry.load(manually_crafted_segy_file)

    # geometry should behave like a pd.DataFrame.
    # main way to access data is .loc property

    # check that all the headers are correctly loaded
    assert np.alltrue(g.loc[:, 'TRACENO'] == np.arange(1, 25, 1))
    assert np.alltrue(g.loc[:, 'fTRACENO'] == np.arange(1, 25, 1))
    assert np.alltrue(g.loc[:, 'FFID'] == 375)
    assert np.alltrue(g.loc[:, 'ELEVSC'] == -100)
    assert np.alltrue(g.loc[:, 'COORDSC'] == -100)
    assert np.alltrue(g.loc[:, 'NUMSMP'] == 512)
    assert np.alltrue(g.loc[:, 'DT'] == 500)
    assert np.alltrue(g.loc[:, 'YEAR'] == 1984)
    assert np.alltrue(g.loc[:, 'HOUR'] == 10)
    assert np.alltrue(g.loc[:, 'MINUTE'] == 51)

    # scalars to coordinates and elevations should be applied automatically
    assert np.alltrue(g.loc[:, 'SOU_X'] == 50)
    assert np.alltrue(g.loc[:, 'SOU_Y'] == 75)
    assert np.alltrue(g.loc[:, 'REC_X'] == np.arange(0, 48, 2))
    assert np.alltrue(g.loc[:, 'REC_Y'] == -1)
    assert np.alltrue(g.loc[:, 'CDP_X'] == np.arange(25, 49, 1))
    assert np.alltrue(g.loc[:, 'CDP_Y'] == 37)

    # there should be no NaN values in the Geometry
    assert np.all(g._df.notna())
Example #7
    def extract_by_indices(self, indices):
        """ Return a new DM, constructed from traces extracted by given indices. """

        new = DataMatrix()
        new.dt = self.dt
        new.t = np.copy(self.t)
        new._m = np.copy(self._m[indices])
        new._headers = Geometry()
        new._headers._df = self._headers.loc[indices, :].copy()

        return new
Example #8
class SegY:
    """ This object represents a SEG-Y file. """
    def __init__(self):
        self.tfh = TextualFileHeader()
        self.bfh = BinaryFileHeader()
        self.g = Geometry()
        self.dm = DataMatrix()

        self.file = None

    def save(self, file: str):
        """ Save the SegY to a file.

            file (str): Path to the file.

            By default, float32 matrices will be saved as non-IBM floats. To save as IBM instead,
            manually set 'sample_format' BFH value to 1.


        header_fs = '>' + const.THFS
        sfc = self.bfh['sample_format']


        with open(file, 'bw') as sgy:

            bfh_values = self.bfh._dict.values()
            raw_bfh = struct.pack('>' + const.BFHFS, *bfh_values)

            if sfc == 1:
                for i in range(self.bfh['no_traces']):
                    raw_th = bytearray(240)
                    raw_th[:232] = struct.pack(
                        header_fs, *self.g.loc[i, :].values.astype(int))

                    raw_trace = gfunc.pack_ibm32_series(self.dm._m[i], '>')
                trace_fs = '>' + const.SFC[sfc][1] * self.bfh[
                for i in range(self.bfh['no_traces']):
                    raw_th = bytearray(240)
                    raw_th[:232] = struct.pack(
                        header_fs, *self.g.loc[i, :].values.astype(int))

                    raw_trace = struct.pack(trace_fs, *self.dm._m[i])


    def load(cls, file: str):
        """ Load the SEG-Y file. """

        segy = cls()

        with open(file, 'br') as sgy:
            endian = gfunc.grab_endiannes(sgy)
            sfc = gfunc.grab_sample_format_code(sgy)
            nt = gfunc.grab_number_of_traces(sgy)
            tl = gfunc.grab_trace_length(sgy)
            si = gfunc.grab_sample_interval(sgy)
            ss, fl, _ = const.SFC[sfc]
            dtype = const.DTYPEMAP[sfc]

            raw_tfh = sgy.read(3200)
            raw_bfh = sgy.read(400)

            header_data = np.empty(shape=(nt, 90), dtype=np.int32)
            segy.dm._m = np.empty(shape=(nt, tl), dtype=dtype)

            if sfc == 1:  # IBM is a special case
                for i in range(nt):
                    raw_header = sgy.read(240)
                    header = struct.unpack(endian + const.THFS,
                    header_data[i] = header

                    raw_trace = sgy.read(ss * tl)
                    trace_values = gfunc.unpack_ibm32_series(raw_trace, endian)
                    segy.dm._m[i] = trace_values
                trace_fs = endian + fl * tl

                for i in range(nt):
                    raw_header = sgy.read(240)
                    header = struct.unpack(endian + const.THFS,
                    header_data[i] = header

                    raw_trace = sgy.read(ss * tl)
                    trace_values = struct.unpack(trace_fs, raw_trace)
                    segy.dm._m[i] = trace_values

        segy.tfh._contents = raw_tfh.decode('cp500')

        bfh_values = struct.unpack(endian + const.BFHFS, raw_bfh)
        segy.bfh._dict = dict(zip(const.BFHCOLS, bfh_values))

        segy.bfh['no_traces'] = nt

        segy.g._df = pd.DataFrame(header_data,

        segy.dm.dt = si
        segy.dm.t = np.arange(0, si * tl / 1000, si / 1000)
        segy.dm._headers = segy.g

        segy.file = file.split('/')[-1]

        return segy

    def from_matrix(cls, matrix, sample_interval=500):
        """ Create a SegY object from a matrix.

            matrix: A numpy matrix where each row is a trace and each column is a sample.
            sample_interval (int): Sample interval in microseconds.

            The needed sample format is detected from the matrix's dtype property.


        segy = cls()

        segy.dm._m = matrix
        segy.dm.dt = sample_interval
        segy.dm.t = np.arange(0, sample_interval * matrix.shape[1] / 1000,
                              sample_interval / 1000)

        segy.bfh['sample_format'] = const.IDTYPEMAP[matrix.dtype.name]
        segy.bfh['sample_interval'] = sample_interval
        segy.bfh['samples_per_trace'] = matrix.shape[1]
        segy.bfh['measurement_system'] = 1
        segy.bfh['byte_offset_of_data'] = 3600
        segy.bfh['no_traces'] = matrix.shape[0]

        segy.g._df = pd.DataFrame(index=range(matrix.shape[0]),
        segy.g.loc[:, 'TRACENO'] = np.arange(1, matrix.shape[0] + 1, 1)
        segy.g.loc[:, 'FFID'] = 1
        segy.g.loc[:, 'CHAN'] = np.arange(1, matrix.shape[0] + 1, 1)
        segy.g.loc[:, 'ELEVSC'] = -100
        segy.g.loc[:, 'COORDSC'] = -100
        segy.g.loc[:, 'NUMSMP'] = matrix.shape[1]
        segy.g.loc[:, 'DT'] = sample_interval

        segy.g._df.fillna(0, inplace=True)

        return segy

    def from_data_matrix(cls, dm):
        """ Create a new SegY object from a DataMatrix obect. """

        segy = cls()

        segy.dm = dm

        segy.bfh['sample_format'] = const.IDTYPEMAP[dm._m.dtype.name]
        segy.bfh['sample_interval'] = dm.dt
        segy.bfh['samples_per_trace'] = dm._m.shape[1]
        segy.bfh['measurement_system'] = 1
        segy.bfh['byte_offset_of_data'] = 3600
        segy.bfh['no_traces'] = dm._m.shape[0]

        segy.g._df = dm._headers._df.copy()

        return segy