def test_complex_row_major_example(self): i_range = range(1, 31, 3) j_range = range(2, 24, 2) base = 100 stride = 4 d = {k: v for k, v in zip(product(i_range, j_range), count(start=base, step=stride))} # The previous line produces a dictionary which looks like this: # {(1, 2): 100, # (1, 4): 104, # (1, 6): 108, # (1, 8): 112, # (1, 10): 116, # ... # (28, 14): 520, # (28, 16): 524, # (28, 18): 528, # (28, 20): 532, # (28, 22): 536} # The catalog builder needs to be smart enough to recover the i and j ranges, the base # value, and the stride from this data. catalog_builder = CatalogBuilder(d) catalog = catalog_builder.create() assert isinstance(catalog, LastIndexVariesQuickestCatalog2D) assert catalog.key_min() == (1, 2) assert catalog.key_max() == (28, 22) assert catalog.value_first() == 100 assert catalog.value_last() == 536 assert catalog.i_min == 1 assert catalog.i_max == 28 assert catalog.j_min == 2 assert catalog.j_max == 22 assert len(catalog) == 110 assert all(d[key] == catalog[key] for key in d)
def test_row_major_example(self): d = { (0, 4): 8, (0, 5): 9, (0, 6): 10, (1, 4): 11, (1, 5): 12, (1, 6): 13, (2, 4): 14, (2, 5): 15, (2, 6): 16 } catalog_builder = CatalogBuilder(d) catalog = catalog_builder.create() assert isinstance(catalog, LastIndexVariesQuickestCatalog2D) assert catalog.key_min() == (0, 4) assert catalog.key_max() == (2, 6) assert catalog.value_first() == 8 assert catalog.value_last() == 16 assert catalog.i_min == 0 assert catalog.i_max == 2 assert catalog.j_min == 4 assert catalog.j_max == 6 assert len(catalog) == 9 with raises(KeyError): _ = catalog[(0, 0)] assert all(d[key] == catalog[key] for key in d)
def test_regular_mapping(self, start, num, step, values): assume(step != 0) mapping = {key: value for key, value in zip(range(start, start + num*step, step), values)} builder = CatalogBuilder(mapping) catalog = builder.create() shared_items = set(mapping.items()) & set(catalog.items()) self.assertEqual(len(shared_items), len(mapping))
def test_column_major_example(self): d = { (11, 14): 5, (13, 14): 10, (15, 14): 15, (11, 16): 20, (13, 16): 25, (15, 16): 30, (11, 18): 35, (13, 18): 40, (15, 18): 45 } catalog_builder = CatalogBuilder(d) catalog = catalog_builder.create() assert isinstance(catalog, FirstIndexVariesQuickestCatalog2D) assert catalog.key_min() == (11, 14) assert catalog.key_max() == (15, 18) assert catalog.value_first() == 5 assert catalog.value_last() == 45 assert catalog.i_min == 11 assert catalog.i_max == 15 assert catalog.j_min == 14 assert catalog.j_max == 18 assert len(catalog) == 9 with raises(KeyError): _ = catalog[(0, 0)] assert all(d[key] == catalog[key] for key in d)
def test_regular_constant_mapping(self, start, num, step, value): assume(step != 0) mapping = {key: value for key in range( start, start + num * step, step)} builder = CatalogBuilder(mapping) catalog = builder.create() shared_items = set(mapping.items()) & set(catalog.items()) assert len(shared_items) == len(mapping)
def test_linear_regular_mapping(self, num, key_start, key_step, value_start, value_step): assume(key_step != 0) assume(value_step != 0) mapping = {key: value for key, value in zip(range(key_start, key_start + num*key_step, key_step), range(value_start, value_start + num*value_step, value_step))} builder = CatalogBuilder(mapping) catalog = builder.create() shared_items = set(mapping.items()) & set(catalog.items()) self.assertEqual(len(shared_items), len(mapping))
def trace_indexes(self): if self._trace_index_catalog is None: trace_index_catalog_builder = CatalogBuilder() trace_index = 0 dataset = self._array_dataset samples = dataset._samples for inline_index in range(0, samples.shape[0]): for xline_index in range(0, samples.shape[1]): trace_samples = samples[inline_index, xline_index, :] if dataset._trace_has_samples(trace_samples): trace_index_catalog_builder.add(trace_index, (inline_index, xline_index)) self._trace_index_catalog = trace_index_catalog_builder.create() return self._trace_index_catalog.keys()
def trace_indexes(self): if self._trace_index_catalog is None: trace_index_catalog_builder = CatalogBuilder() trace_index = 0 dataset = self._array_dataset samples = dataset._samples for inline_index in range(0, samples.shape[0]): for xline_index in range(0, samples.shape[1]): trace_samples = samples[inline_index, xline_index, :] if dataset._trace_has_samples(trace_samples): trace_index_catalog_builder.add( trace_index, (inline_index, xline_index)) self._trace_index_catalog = trace_index_catalog_builder.create() return self._trace_index_catalog.keys()
def test_linear_regular_mapping_2d(self, i_start, i_num, i_step, j_start, j_num, j_step, c): assume(i_step != 0) assume(j_step != 0) def v(i, j): return (i - i_start) * ((j_start + j_num*j_step) - j_start) + (j - j_start) + c mapping = {(i, j): v(i, j) for i in range(i_start, i_start + i_num*i_step, i_step) for j in range(j_start, j_start + j_num*j_step, j_step)} builder = CatalogBuilder(mapping) catalog = builder.create() shared_items = set(mapping.items()) & set(catalog.items()) self.assertEqual(len(shared_items), len(mapping))
def test_irregular_mapping_gives_dictionary_catalog(self): mapping = { 1: 2, 2: 3, 3: 5, 5: 7, 8: 11, 13: 13, 21: 17, 34: 19, 55: 23, 89: 29, 144: 31, } builder = CatalogBuilder(mapping) catalog = builder.create() assert all(catalog[key] == value for key, value in mapping.items())
def test_complex_always_column_major_general(self, i_range, j_range, data): num_indices = len(i_range) * len(j_range) v_range = data.draw(ranges(min_size=num_indices, max_size=num_indices)) d = {k: v for k, v in zip(((i, j) for j, i in product(j_range, i_range)), v_range)} # The catalog builder needs to be smart enough to recover the i and j ranges, the base # value, and the stride from this data. catalog_builder = CatalogBuilder(d) catalog = catalog_builder.create() assert isinstance(catalog, FirstIndexVariesQuickestCatalog2D) assert catalog.key_min() == (i_range.start, j_range.start) assert catalog.key_max() == (last(i_range), last(j_range)) assert catalog.value_first() == v_range.start assert catalog.value_last() == last(v_range) assert catalog.i_min == i_range.start assert catalog.i_max == last(i_range) assert catalog.j_min == j_range.start assert catalog.j_max == last(j_range) assert len(catalog) == num_indices assert all(d[key] == catalog[key] for key in d)
def test_complex_column_major_example(self): i_range = range(1, 31, 3) j_range = range(2, 24, 2) base = 100 stride = 4 d = {k: v for k, v in zip(((i, j) for (j, i) in product(j_range, i_range)), count(start=base, step=stride))} # The previous line produces a dictionary which looks like this (note that the value here are # not consecutive, so while the dictionary is not displayed in column-major (first index varies # quickest) order, when sorted by value, it would be: # {(1, 2): 100, # (1, 4): 140, # (1, 6): 180, # (1, 8): 220, # (1, 10): 260, # ... # (28, 14): 376, # (28, 16): 416, # (28, 18): 456, # (28, 20): 496, # (28, 22): 536} # The catalog builder needs to be smart enough to recover the i and j ranges, the base # value, and the stride from this data. catalog_builder = CatalogBuilder(d) catalog = catalog_builder.create() assert isinstance(catalog, FirstIndexVariesQuickestCatalog2D) assert catalog.key_min() == (1, 2) assert catalog.key_max() == (28, 22) assert catalog.value_first() == 100 assert catalog.value_last() == 536 assert catalog.i_min == 1 assert catalog.i_max == 28 assert catalog.j_min == 2 assert catalog.j_max == 22 assert len(catalog) == 110 assert all(d[key] == catalog[key] for key in d)
def test_arbitrary_mapping(self, mapping): builder = CatalogBuilder(mapping) catalog = builder.create() shared_items = set(mapping.items()) & set(catalog.items()) self.assertEqual(len(shared_items), len(mapping))
def test_empty_mapping_returns_empty_catalog(self): builder = CatalogBuilder([]) catalog = builder.create() assert len(catalog) == 0
def test_arbitrary_mapping_2d(self, mapping): builder = CatalogBuilder(mapping) catalog = builder.create() shared_items = set(mapping.items()) & set(catalog.items()) assert len(shared_items) == len(mapping)
def test_adding_items_puts_them_in_the_catalog(self, mapping): builder = CatalogBuilder() for key, value in mapping.items(): builder.add(key, value) catalog = builder.create() assert all(catalog[key] == value for key, value in mapping.items())
def test_duplicate_items_returns_none(self, mapping): builder = CatalogBuilder(mapping + mapping) catalog = builder.create() assert catalog is None
def test_constant_mapping(self, mapping): builder = CatalogBuilder(mapping) catalog = builder.create() shared_items = set(mapping.items()) & set(catalog.items()) assert len(shared_items) == len(mapping)
def catalog_traces(fh, bps, trace_header_format=TraceHeaderRev1, endian='>', progress=None): """Build catalogs to facilitate random access to trace_samples data. Note: This function can take significant time to run, proportional to the number of traces in the SEG Y file. Four catalogs will be build: 1. A catalog mapping trace_samples index (0-based) to the position of that trace_samples header in the file. 2. A catalog mapping trace_samples index (0-based) to the number of samples in that trace_samples. 3. A catalog mapping CDP number to the trace_samples index. 4. A catalog mapping an (inline, crossline) number 2-tuple to trace_samples index. Args: fh: A file-like-object open in binary mode, positioned at the start of the first trace_samples header. bps: The number of bytes per sample, such as obtained by a call to bytes_per_sample() trace_header_format: The class defining the trace header format. Defaults to TraceHeaderRev1. endian: '>' for big-endian data (the standard and default), '<' for little-endian (non-standard) progress: A unary callable which will be passed a number between zero and one indicating the progress made. If provided, this callback will be invoked at least once with an argument equal to 1 Returns: A 4-tuple of the form (trace_samples-offset-catalog, trace_samples-length-catalog, cdp-catalog, line-catalog)` where each catalog is an instance of ``collections.Mapping`` or None if no catalog could be built. """ progress_callback = progress if progress is not None else lambda p: None if not callable(progress_callback): raise TypeError("catalog_traces(): progress callback must be callable") class CatalogSubFormat(metaclass=SubFormatMeta, parent_format=trace_header_format, parent_field_names=( 'file_sequence_num', 'ensemble_num', 'num_samples', 'inline_number', 'crossline_number', )): pass trace_header_packer = make_header_packer(CatalogSubFormat, endian) length = file_length(fh) pos_begin = fh.tell() trace_offset_catalog_builder = CatalogBuilder() trace_length_catalog_builder = CatalogBuilder() line_catalog_builder = CatalogBuilder() alt_line_catalog_builder = CatalogBuilder() cdp_catalog_builder = CatalogBuilder() for trace_number in count(): progress_callback(_READ_PROPORTION * pos_begin / length) fh.seek(pos_begin) data = fh.read(TRACE_HEADER_NUM_BYTES) if len(data) < TRACE_HEADER_NUM_BYTES: break trace_header = trace_header_packer.unpack(data) num_samples = trace_header.num_samples trace_length_catalog_builder.add(trace_number, num_samples) samples_bytes = num_samples * bps trace_offset_catalog_builder.add(trace_number, pos_begin) # Should we check the data actually exists? line_catalog_builder.add((trace_header.inline_number, trace_header.crossline_number), trace_number) alt_line_catalog_builder.add((trace_header.file_sequence_num, trace_header.ensemble_num), trace_number) cdp_catalog_builder.add(trace_header.ensemble_num, trace_number) pos_end = pos_begin + TRACE_HEADER_NUM_BYTES + samples_bytes pos_begin = pos_end progress_callback(_READ_PROPORTION) trace_offset_catalog = trace_offset_catalog_builder.create() progress_callback(_READ_PROPORTION + (_READ_PROPORTION / 4)) trace_length_catalog = trace_length_catalog_builder.create() progress_callback(_READ_PROPORTION + (_READ_PROPORTION / 2)) cdp_catalog = cdp_catalog_builder.create() progress_callback(_READ_PROPORTION + (_READ_PROPORTION * 3 / 4)) line_catalog = line_catalog_builder.create() if line_catalog is None: # Some 3D files put Inline and Crossline numbers in (TraceSequenceFile, cdp) pair line_catalog = alt_line_catalog_builder.create() progress_callback(1) return (trace_offset_catalog, trace_length_catalog, cdp_catalog, line_catalog)
def catalog_fixed_length_traces(fh, binary_reel_header, trace_header_format=TraceHeaderRev1, endian='>', progress=None): """Build catalogs to for a fixed length SEG Y file. This is much faster than the full catalog, but has limitations. No CDP, or inline, xline catalogs, and it only works for segy files with fixed legth SEG Y files. Note: This function is faster than the full catalog, but has limitations. No CDP, or inline, xline catalogs, and it only works for SEG Y files with a fixed number of samples in each trace. Two catalogs will be built: 1. A catalog mapping trace_samples index (0-based) to the position of that trace_samples header in the file. 2. A catalog mapping trace_samples index (0-based) to the number of samples in that trace_samples. Args: fh: A file-like-object open in binary mode, positioned at the start of the first trace_samples header. bps: The number of bytes per sample, such as obtained by a call to bytes_per_sample() trace_header_format: The class defining the trace header format. Defaults to TraceHeaderRev1. endian: '>' for big-endian data (the standard and default), '<' for little-endian (non-standard) progress: A unary callable which will be passed a number between zero and one indicating the progress made. If provided, this callback will be invoked at least once with an argument equal to 1 Returns: A 4-tuple of the form (trace_samples-offset-catalog, trace_samples-length-catalog, None, None)` where each catalog is an instance of ``collections.Mapping`` or None if no catalog could be built. """ revision = extract_revision(binary_reel_header) bps = bytes_per_sample(binary_reel_header, revision) progress_callback = progress if progress is not None else lambda p: None if not callable(progress_callback): raise TypeError("catalog_traces(): progress callback must be callable") class CatalogSubFormat(metaclass=SubFormatMeta, parent_format=trace_header_format, parent_field_names=( 'file_sequence_num', 'ensemble_num', 'num_samples', 'inline_number', 'crossline_number', )): pass num_file_bytes = file_length(fh) num_samples=binary_reel_header.num_samples num_traces_float = (num_file_bytes-REEL_HEADER_NUM_BYTES)/(TRACE_HEADER_NUM_BYTES+num_samples*bps) num_traces = int(num_traces_float) if num_traces != num_traces_float: raise ValueError( "SEG Y file {!r} of {} bytes is not consistent with a fixed trace length".format( filename_from_handle(fh), num_file_bytes)) trace_offset_catalog_builder = CatalogBuilder() trace_length_catalog_builder = CatalogBuilder() for trace_index in range(num_traces): pos_begin=REEL_HEADER_NUM_BYTES+(num_samples * bps+TRACE_HEADER_NUM_BYTES) * trace_index trace_length_catalog_builder.add(trace_index, num_samples) trace_offset_catalog_builder.add(trace_index, pos_begin) trace_offset_catalog = trace_offset_catalog_builder.create() trace_length_catalog = trace_length_catalog_builder.create() progress_callback(1) return (trace_offset_catalog, trace_length_catalog, None, None)