def test_get_quantile_fails(self, stat_segy): """`get_quantile` must fail if survey stats were not collected.""" path, _ = stat_segy survey = Survey(path, header_index="TRACE_SEQUENCE_FILE", header_cols="offset") with pytest.raises(ValueError): survey.get_quantile(0.5)
def test_no_mark_dead_warning(self, segy_path): """Check that a warning is emitted when `collect_stats` is run before `mark_dead_races`""" survey = Survey(segy_path, header_index="TRACE_SEQUENCE_FILE", header_cols="offset") with pytest.warns(RuntimeWarning): survey.collect_stats()
def test_generated_segy_loading(segy_path, header_index): s = Survey(segy_path, header_index=header_index, header_cols=[ 'FieldRecord', 'TraceNumber', 'SourceX', 'SourceY', 'GroupX', 'GroupY', 'offset', 'CDP_X', 'CDP_Y', 'INLINE_3D', 'CROSSLINE_3D' ]) assert s.sample_gather()
def test_concat_merge(self, segy_path, header_index): """Test concat followed by merge.""" s1_before = Survey(segy_path, header_index=header_index, header_cols=HEADER_COLS, name="before") s2_before = Survey(segy_path, header_index=header_index, header_cols=HEADER_COLS, name="before") s1_after = Survey(segy_path, header_index=header_index, header_cols=HEADER_COLS, name="after") s2_after = Survey(segy_path, header_index=header_index, header_cols=HEADER_COLS, name="after") index_before = SeismicIndex(s1_before, s2_before, mode="c") index_after = SeismicIndex(s1_after, s2_after, mode="c") _ = SeismicIndex(index_before, index_after, mode="m")
def test_merge_concat(self, segy_path, header_index): """Test merge followed by concat.""" s1_before = Survey(segy_path, header_index=header_index, header_cols=HEADER_COLS, name="before") s2_before = Survey(segy_path, header_index=header_index, header_cols=HEADER_COLS, name="before") s1_after = Survey(segy_path, header_index=header_index, header_cols=HEADER_COLS, name="after") s2_after = Survey(segy_path, header_index=header_index, header_cols=HEADER_COLS, name="after") index_s1 = SeismicIndex(s1_before, s1_after, mode="m") index_s2 = SeismicIndex(s2_before, s2_after, mode="m") _ = SeismicIndex(index_s1, index_s2, mode="c")
def test_load_traces(self, load_segy, init_limits, load_limits, use_segyio_trace_loader, traces_pos): """Compare loaded traces with the actual ones.""" path, trace_data = load_segy survey = Survey(path, header_index="TRACE_SEQUENCE_FILE", limits=init_limits, use_segyio_trace_loader=use_segyio_trace_loader, bar=False) # load_limits take priority over init_limits limits = init_limits if load_limits is None else load_limits trace_data = trace_data[traces_pos, limits] loaded_data = survey.load_traces(traces_pos, limits=load_limits) assert np.allclose(loaded_data, trace_data)
def test_batch_load_combined(segy_path): """test_batch_load_combined""" survey = Survey(segy_path, header_index='TRACE_SEQUENCE_FILE', name='raw') dataset = SeismicDataset(survey) batch = dataset.next_batch(200) batch = batch.load(src='raw', combined=True) assert len(batch.raw) == 1 assert len(batch.raw[0].data) == 200
def survey(segy_path): """Create gather""" survey = Survey(segy_path, header_index=['INLINE_3D', 'CROSSLINE_3D'], header_cols=['offset', 'FieldRecord']) survey.remove_dead_traces(bar=False) survey.collect_stats(bar=False) survey.headers[HDR_FIRST_BREAK] = np.random.randint(0, 1000, len(survey.headers)) return survey
def test_load_gather(self, load_segy, init_limits, load_limits, traces_pos): """Test gather loading by its headers.""" path, trace_data = load_segy survey = Survey(path, header_index="FieldRecord", limits=init_limits, bar=False) gather_headers = survey.headers.iloc[traces_pos] gather = survey.load_gather(gather_headers, limits=load_limits) # load_limits take priority over init_limits limits = init_limits if load_limits is None else load_limits gather_data = trace_data[traces_pos, limits] assert gather.headers.equals(gather_headers) assert np.allclose(gather.data, gather_data) assert np.allclose(gather.samples, survey.file_samples[limits])
def test_limits(self, segy_path, header_index, expected_index, header_cols, expected_cols, name, expected_name, limits, slice_limits): """Test survey loading with limits set.""" survey = Survey(segy_path, header_index=header_index, header_cols=header_cols, name=name, limits=limits, n_workers=1, bar=False) expected_headers = expected_index | expected_cols | {"TRACE_SEQUENCE_FILE"} assert_survey_loaded(survey, segy_path, expected_name, expected_index, expected_headers) # Assert that correct limits were set assert_survey_limits_set(survey, slice_limits) # Assert that stats are not calculated assert survey.has_stats is False assert survey.dead_traces_marked is False # Check that passing limits to init is identical to running set_limits method other = Survey(segy_path, header_index=header_index, header_cols=header_cols, name=name) other.set_limits(limits) assert_surveys_equal(survey, other)
def test_get_quantile(self, stat_segy, quantile, is_scalar): """Run `get_quantile` and check the returned value and its type.""" path, _ = stat_segy survey = Survey(path, header_index="TRACE_SEQUENCE_FILE", header_cols="offset") survey.mark_dead_traces() survey.collect_stats() quantile_val = survey.get_quantile(quantile) assert np.isscalar(quantile) is is_scalar assert np.allclose( np.array(quantile_val).ravel(), survey.quantile_interpolator(quantile))
def test_sample_gather(self, load_segy, init_limits, load_limits): """Test gather sampling.""" path, trace_data = load_segy survey = Survey(path, header_index="FieldRecord", limits=init_limits, bar=False) gather = survey.sample_gather(limits=load_limits) assert gather.index is not None # Unique index gather_headers = survey.get_headers_by_indices([gather.index]) assert gather.headers.equals(gather_headers) # load_limits take priority over init_limits limits = init_limits if load_limits is None else load_limits traces_pos = gather["TRACE_SEQUENCE_FILE"].ravel() - 1 gather_data = trace_data[traces_pos, limits] assert np.allclose(gather.data, gather_data) assert np.allclose(gather.samples, survey.file_samples[limits])
def make_benchmark_data(path): """Generate a SEG-Y file with specific geometry so that CDP gathers contain the same number of traces and construct survey objects for benchmark. """ # The geometry defined below should be changed only together with survey filtering parameters # to ensure that after filtering all the gathers \ supergathers have the same number of traces make_prestack_segy(path, fmt=1, survey_size=(400, 400), sources_step=(5, 5), receivers_step=(5, 5), activation_dist=(50, 50), bin_size=(10, 10)) # Load headers and add synthetic FirstBreak times sur = Survey(path, header_index=['INLINE_3D', 'CROSSLINE_3D'], header_cols='offset', name='raw') sur.headers['FirstBreak'] = np.random.randint(0, 3000, len(sur.headers)) def edge_lines_filter(line, num_lines): return (line >= line.min() + num_lines) & (line <= line.max() - num_lines) # Drop three lines of CDPs from each side of the survey, since they have less traces than central ones survey = (sur.filter(edge_lines_filter, 'CROSSLINE_3D', num_lines=3).filter(edge_lines_filter, 'INLINE_3D', num_lines=3)) sg_survey = survey.generate_supergathers((3, 3), (1, 1), (0, 0)) # Drop one line of supergathers from each side of the survey, since they have less traces than central ones sg_survey = (sg_survey.filter(edge_lines_filter, 'SUPERGATHER_CROSSLINE_3D', num_lines=1).filter(edge_lines_filter, 'SUPERGATHER_INLINE_3D', num_lines=1)) return survey, sg_survey
def test_remove(self, stat_segy, header_index, inplace, pre_mark_dead): """Check that `remove_dead_traces` properly updates survey `headers` and sets `n_dead_traces` counter to 0.""" path, trace_data = stat_segy survey = Survey(path, header_index=header_index, header_cols="offset") traces_pos = survey.headers.reset_index( )["TRACE_SEQUENCE_FILE"].values - 1 trace_data = trace_data[np.argsort(traces_pos)] survey_copy = survey.copy() if pre_mark_dead: survey.mark_dead_traces() survey_filtered = survey.remove_dead_traces(inplace=inplace) is_dead = np.isclose(trace_data.min(axis=1), trace_data.max(axis=1)) survey_copy.headers = survey_copy.headers.loc[~is_dead] survey_copy.n_dead_traces = 0 survey_copy.headers[HDR_DEAD_TRACE] = False # Validate that dead traces are not present assert survey_filtered.n_dead_traces == 0 assert survey_filtered.headers.index.is_monotonic_increasing assert_surveys_equal(survey_filtered, survey_copy) assert_survey_processed_inplace(survey, survey_filtered, inplace)
def test_mark(self, stat_segy, header_index, detection_limits): """Check that `mark_dead_traces` properly updates survey `headers` and sets `n_dead_traces` counter.""" path, trace_data = stat_segy survey = Survey(path, header_index=header_index, header_cols="offset") traces_pos = survey.headers.reset_index( )["TRACE_SEQUENCE_FILE"].values - 1 trace_data = trace_data[np.argsort(traces_pos)] survey_copy = survey.copy() survey.mark_dead_traces(limits=detection_limits, bar=False) if detection_limits: trace_data = trace_data[:, detection_limits] is_dead = np.isclose(trace_data.min(axis=1), trace_data.max(axis=1)) survey_copy.headers[HDR_DEAD_TRACE] = is_dead survey_copy.n_dead_traces = np.sum(is_dead) assert_surveys_equal(survey, survey_copy)
def test_load_traces_after_mmap_reconstruction(self, load_segy, init_limits, load_limits, traces_pos): """Compare loaded traces with the actual ones after the memory map is reconstructed.""" path, trace_data = load_segy survey = Survey(path, header_index="TRACE_SEQUENCE_FILE", limits=init_limits, use_segyio_trace_loader=False, bar=False) # The number of traces will change after filter. Memory map is reconstructed after copy and must remember # original data shape. survey = survey.filter(lambda tsf: tsf % 2 == 1, "TRACE_SEQUENCE_FILE", inplace=True).copy() # load_limits take priority over init_limits limits = init_limits if load_limits is None else load_limits trace_data = trace_data[traces_pos, limits] loaded_data = survey.load_traces(traces_pos, limits=load_limits) assert np.allclose(loaded_data, trace_data)
def test_benchmark_runs(self, segy_path, method_name, method_kwargs, root_pipeline): """Test benchmark""" survey = Survey(segy_path, header_index=['INLINE_3D', 'CROSSLINE_3D'], header_cols='offset', name='raw') dataset = SeismicDataset(survey) load_bm = Benchmark(method_name=method_name, method_kwargs=method_kwargs, targets=('for', 'threads'), batch_sizes=[1, 5, 10], dataset=dataset, root_pipeline=root_pipeline) load_bm.run(n_iters=3, bar=False, shuffle=42)
def test_no_limits(self, segy_path, header_index, expected_index, header_cols, expected_cols, name, expected_name): """Test survey loading when limits are not passed.""" survey = Survey(segy_path, header_index=header_index, header_cols=header_cols, name=name, n_workers=1, bar=False) expected_headers = expected_index | expected_cols | {"TRACE_SEQUENCE_FILE"} assert_survey_loaded(survey, segy_path, expected_name, expected_index, expected_headers) # Assert that whole traces are loaded limits = slice(0, survey.n_file_samples, 1) assert_survey_limits_set(survey, limits) # Assert that stats are not calculated assert survey.has_stats is False assert survey.dead_traces_marked is False
def test_aggregate_segys(segy_path, tmp_path, mode, indices): expected_survey = Survey(segy_path, header_index='FieldRecord', header_cols='all', name='raw') indices = expected_survey.headers.index.drop_duplicates() if indices == 'all' else indices if mode == 'split': paths = [f'folder/folder_{i}' for i in range(len(indices))] else: paths = [''] * len(indices) for num, (ix, path) in enumerate(zip(indices, paths)): g = expected_survey.get_gather(ix) g.dump(os.path.join(tmp_path, path), name=f'{num}_{ix}', retain_parent_segy_headers=True) aggregate_segys(os.path.join(tmp_path, './**/*.sgy'), os.path.join(tmp_path, 'aggr.sgy'), recursive=True) dumped_survey = Survey(os.path.join(tmp_path, 'aggr.sgy'), header_index='FieldRecord', header_cols='all') assert np.allclose(expected_survey.samples, dumped_survey.samples),"Samples don't match" assert np.allclose(expected_survey.sample_rate, dumped_survey.sample_rate), "Sample rate doesn't match" assert np.allclose(expected_survey.n_samples, dumped_survey.n_samples), "length of samples doesn't match" #TODO: optimize drop_columns = ["TRACE_SEQUENCE_FILE"] + list({"TRACE_SAMPLE_INTERVAL"} & set(tuple(expected_survey.headers.columns))) expected_survey_headers = (expected_survey.headers.loc[indices].reset_index() .sort_values(['FieldRecord', 'TraceNumber']) .drop(columns=drop_columns) .reset_index(drop=True)) dumped_survey_headers = (dumped_survey.headers.reset_index() .sort_values(['FieldRecord', 'TraceNumber']) .drop(columns=drop_columns) .reset_index(drop=True)) assert len(expected_survey_headers) == len(dumped_survey_headers), "Length of surveys' headers don't match" assert expected_survey_headers.equals(dumped_survey_headers), "The headers don't match" for ix in indices: expected_gather = expected_survey.get_gather(ix) expected_gather.sort(by='TraceNumber') dumped_gather = dumped_survey.get_gather(ix) dumped_gather.sort(by='TraceNumber') compare_gathers(expected_gather, dumped_gather, drop_cols=drop_columns, check_types=True, same_survey=False)
def test_collect_stats(self, stat_segy, init_limits, remove_dead, n_quantile_traces, quantile_precision, stats_limits, use_segyio_trace_loader): """Compare stats obtained by running `collect_stats` with the actual ones.""" path, trace_data = stat_segy survey = Survey(path, header_index="TRACE_SEQUENCE_FILE", header_cols="offset", limits=init_limits, use_segyio_trace_loader=use_segyio_trace_loader, bar=False) survey.mark_dead_traces(bar=False) if remove_dead: survey.remove_dead_traces(inplace=True) survey_copy = survey.copy() survey.collect_stats(n_quantile_traces=n_quantile_traces, quantile_precision=quantile_precision, limits=stats_limits, bar=True) # stats_limits take priority over init_limits stats_limits = init_limits if stats_limits is None else stats_limits trace_data = trace_data[:, stats_limits] if remove_dead: is_dead = np.isclose(trace_data.min(axis=1), trace_data.max(axis=1)) trace_data = trace_data[~is_dead].ravel() # Perform basic tests of estimated quantiles since fair comparison of interpolators is complicated quantiles = survey.quantile_interpolator(np.linspace(0, 1, 11)) assert np.isclose(quantiles[0], trace_data.min()) assert np.isclose(quantiles[-1], trace_data.max()) assert (np.diff(quantiles) >= 0).all() survey.quantile_interpolator = None # Fill the copy of the survey with actual stats and compare it with the source survey survey_copy.has_stats = True survey_copy.min = trace_data.min() survey_copy.max = trace_data.max() survey_copy.mean = trace_data.mean() survey_copy.std = trace_data.std() assert_surveys_equal(survey, survey_copy)
def test_concat(self, segy_path, header_index): """Test concatenation of two surveys.""" sur1 = Survey(segy_path, header_index=header_index, name="sur") sur2 = Survey(segy_path, header_index=header_index, name="sur") _ = SeismicIndex(sur1, sur2, mode="c")
def test_concat_wrong_names_fails(self, segy_path, header_index): """Concat must fail if surveys have different names.""" sur1 = Survey(segy_path, header_index=header_index, name="sur") sur2 = Survey(segy_path, header_index=header_index, name="not_sur") with pytest.raises(ValueError): _ = SeismicIndex(sur1, sur2, mode="c")
def test_concat_wrong_index_fails(self, segy_path, header_index): """Concat must fail if surveys are indexed by different headers.""" sur1 = Survey(segy_path, header_index=header_index, name="sur") sur2 = Survey(segy_path, header_index="CDP", name="sur") with pytest.raises(ValueError): _ = SeismicIndex(sur1, sur2, mode="c")
def test_merge(self, segy_path, header_index): """Test merging of two surveys.""" sur1 = Survey(segy_path, header_index=header_index, header_cols=HEADER_COLS, name="before") sur2 = Survey(segy_path, header_index=header_index, header_cols=HEADER_COLS, name="after") _ = SeismicIndex(sur1, sur2, mode="m")
def test_from_index(self, segy_path, header_index): """Test instantiation from an already created index.""" survey = Survey(segy_path, header_index=header_index) index = SeismicIndex(survey) _ = SeismicIndex(index)
def test_merge_wrong_names_fails(self, segy_path, header_index): """Merge must fail if surveys have same names.""" sur1 = Survey(segy_path, header_index=header_index, header_cols=HEADER_COLS, name="sur") sur2 = Survey(segy_path, header_index=header_index, header_cols=HEADER_COLS, name="sur") with pytest.raises(ValueError): _ = SeismicIndex(sur1, sur2, mode="m")
def test_headers_loading(self, segy_path, chunk_size, n_workers, bar, use_segyio_trace_loader): """Test sequential and parallel loading of survey trace headers.""" survey = Survey(segy_path, header_index="FieldRecord", header_cols="all", name="raw", chunk_size=chunk_size, n_workers=n_workers, bar=bar, use_segyio_trace_loader=use_segyio_trace_loader) assert_survey_loaded(survey, segy_path, "raw", {"FieldRecord"}, ALL_HEADERS)
def test_index_split(test_class, segy_path, header_index): """Test whether index or dataset `split` runs.""" survey = Survey(segy_path, header_index=header_index, bar=False) test_obj = test_class(survey) test_obj.split()
def test_merge_wrong_index_fails(self, segy_path, header_index): """Merge must fail if surveys are indexed by different headers.""" sur1 = Survey(segy_path, header_index=header_index, header_cols=HEADER_COLS, name="before") sur2 = Survey(segy_path, header_index="CDP", header_cols=HEADER_COLS, name="after") with pytest.raises(ValueError): _ = SeismicIndex(sur1, sur2, mode="m")
def test_from_survey(self, segy_path, header_index): """Test instantiation from a single survey.""" survey = Survey(segy_path, header_index=header_index) _ = SeismicIndex(survey)