def test_StreamArray(): # read usc data dpath = os.path.join("data", "testdata", "usc", "ci3144585") directory = pkg_resources.resource_filename("gmprocess", dpath) usc_streams, unprocessed_files, unprocessed_file_errors = directory_to_streams( directory) assert len(usc_streams) == 7 usc_sa = StreamArray(usc_streams) # Use print method print(usc_sa) usc_sa.describe() # Use len method assert len(usc_sa) == 7 # Use nonzero method assert bool(usc_sa) # read dmg data dpath = os.path.join("data", "testdata", "dmg", "ci3144585") directory = pkg_resources.resource_filename("gmprocess", dpath) dmg_streams, unprocessed_files, unprocessed_file_errors = directory_to_streams( directory) assert len(dmg_streams) == 1 dmg_sa = StreamArray(dmg_streams) dmg_sa.describe() # Has 3 streams assert len(dmg_sa) == 3
def test_directory_to_streams(): dpath = os.path.join('data', 'testdata', 'read_directory', 'whittier87') directory = pkg_resources.resource_filename('gmprocess', dpath) streams, unprocessed_files, unprocessed_file_errors = directory_to_streams( directory) assert len(streams) == 7
def test_directory_to_streams(): dpath = os.path.join("data", "testdata", "read_directory", "whittier87") directory = pkg_resources.resource_filename("gmprocess", dpath) streams, unprocessed_files, unprocessed_file_errors = directory_to_streams( directory) assert len(streams) == 7
def download(event, event_dir, config, directory): """Download data or load data from local directory, turn into Streams. Args: event (ScalarEvent): Object containing basic event hypocenter, origin time, magnitude. event_dir (str): Path where raw directory should be created (if downloading). config (dict): Dictionary with gmprocess configuration information. directory (str): Path where raw data already exists. Returns: tuple: - StreamWorkspace: Contains the event and raw streams. - str: Name of workspace HDF file. - StreamCollection: Raw data StationStreams. """ # generate the raw directory rawdir = get_rawdir(event_dir) if directory is None: tcollection, terrors = fetch_data( event.time.datetime, event.latitude, event.longitude, event.depth_km, event.magnitude, config=config, rawdir=rawdir) # create an event.json file in each event directory, # in case user is simply downloading for now create_event_file(event, event_dir) else: streams, bad, terrors = directory_to_streams(directory) tcollection = StreamCollection(streams) # plot the raw waveforms with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) pngfiles = glob.glob(os.path.join(rawdir, '*.png')) if not len(pngfiles): plot_raw(rawdir, tcollection, event) # create the workspace file and put the unprocessed waveforms in it workname = os.path.join(event_dir, 'workspace.hdf') if os.path.isfile(workname): os.remove(workname) workspace = StreamWorkspace(workname) workspace.addEvent(event) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=H5pyDeprecationWarning) workspace.addStreams(event, tcollection, label='unprocessed') return (workspace, workname, tcollection)
def from_directory(cls, directory): """Create a StreamCollection instance from a directory of data. Args: directory (str): Directory of ground motion files (streams) to be read. Returns: StreamCollection instance. """ streams, missed_files, errors = directory_to_streams(directory) # Might eventually want to include some of the missed files and # error info but don't have a sensible place to put it currently. return cls(streams)
def from_directory(cls, directory, use_default_config=False): """Create a StreamCollection instance from a directory of data. Args: directory (str): Directory of ground motion files (streams) to be read. use_default_config (bool): Use default ("production") config. Returns: StreamCollection instance. """ if use_default_config: config = get_config(use_default=True) else: config = None streams, missed_files, errors = directory_to_streams(directory, config=config) # Might eventually want to include some of the missed files and # error info but don't have a sensible place to put it currently. return cls(streams, config=config)
def download(event, event_dir, config, directory): """Download data or load data from local directory, turn into Streams. Args: event (ScalarEvent): Object containing basic event hypocenter, origin time, magnitude. event_dir (str): Path where raw directory should be created (if downloading). config (dict): Dictionary with gmprocess configuration information. directory (str): Path where data already exists. Must be organized in a 'raw' directory, within directories with names as the event ids. For example, if `directory` is 'proj_dir' and you have data for event id 'abc123' then the raw data to be read in should be located in `proj_dir/abc123/raw/`. Returns: tuple: - StreamWorkspace: Contains the event and raw streams. - str: Name of workspace HDF file. - StreamCollection: Raw data StationStreams. """ # Make raw directory rawdir = get_rawdir(event_dir) if directory is None: tcollection, terrors = fetch_data(event.time.datetime, event.latitude, event.longitude, event.depth_km, event.magnitude, config=config, rawdir=rawdir) # create an event.json file in each event directory, # in case user is simply downloading for now create_event_file(event, event_dir) else: # Make raw directory in_event_dir = os.path.join(directory, event.id) in_raw_dir = get_rawdir(in_event_dir) streams, bad, terrors = directory_to_streams(in_raw_dir) tcollection = StreamCollection(streams, **config['duplicate']) create_event_file(event, event_dir) # Plot the raw waveforms with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) pngfiles = glob.glob(os.path.join(rawdir, '*.png')) if not len(pngfiles): plot_raw(rawdir, tcollection, event) # Create the workspace file and put the unprocessed waveforms in it workname = os.path.join(event_dir, 'workspace.hdf') # Remove any existing workspace file if os.path.isfile(workname): os.remove(workname) workspace = StreamWorkspace(workname) workspace.addEvent(event) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=H5pyDeprecationWarning) workspace.addStreams(event, tcollection, label='unprocessed') return (workspace, workname, tcollection)
def test_StreamCollection(): # read usc data dpath = os.path.join('data', 'testdata', 'usc', 'ci3144585') directory = pkg_resources.resource_filename('gmprocess', dpath) usc_streams, unprocessed_files, unprocessed_file_errors = \ directory_to_streams(directory) assert len(usc_streams) == 7 usc_sc = StreamCollection(usc_streams) # Use print method print(usc_sc) # Use len method assert len(usc_sc) == 3 # Use nonzero method assert bool(usc_sc) # Slice lengths = [ len(usc_sc[0]), len(usc_sc[1]), len(usc_sc[2]) ] sort_lengths = np.sort(lengths) assert sort_lengths[0] == 1 assert sort_lengths[1] == 3 assert sort_lengths[2] == 3 # read dmg data dpath = os.path.join('data', 'testdata', 'dmg', 'ci3144585') directory = pkg_resources.resource_filename('gmprocess', dpath) dmg_streams, unprocessed_files, unprocessed_file_errors = \ directory_to_streams(directory) assert len(dmg_streams) == 1 dmg_sc = StreamCollection(dmg_streams) # Has one station assert len(dmg_sc) == 1 # With 3 channels assert len(dmg_sc[0]) == 3 # So this should have 4 stations test1 = dmg_sc + usc_sc assert len(test1) == 4 # Overwrite the dmg station and network to force it to be # a duplicate of one of the stations in usc_sc to check if # validation works with these addition methods for tr in dmg_sc[0]: tr.stats['network'] = 'LA' tr.stats['station'] = '57' test3 = dmg_sc + usc_sc assert len(test3) == 3 # usc_sc has 1 channel for station 57 and the modified # dmg_sc has 3 channels so the combined StreamCollection # should have 4 assert len(test3[0]) == 4 test_copy = dmg_sc.copy() assert test_copy[0][0].stats['standard']['process_level'] == \ 'corrected physical units' # Appending dmg should not add to length because of the # overwriting of the station/network above stream1 = test_copy[0] test_append = usc_sc.append(stream1) assert len(test_append) == 3 # Change back to unique values for station/network for tr in dmg_sc[0]: tr.stats['network'] = 'LALALA' tr.stats['station'] = '575757' stream2 = dmg_sc[0] test_append = usc_sc.append(stream2) assert len(test_append) == 4 # Check the from_directory method sc_test = StreamCollection.from_directory(directory) assert len(sc_test) == 1 # Test to_dataframe jsonfile = os.path.join(directory, 'event.json') with open(jsonfile, 'rt') as f: origin = json.load(f) dmg_df = sc_test.to_dataframe(origin) np.testing.assert_allclose( dmg_df['HN1']['PGA'], 0.145615, atol=1e5)
def test_duplicates(): datapath = os.path.join('data', 'testdata', 'duplicate', 'general') datadir = pkg_resources.resource_filename('gmprocess', datapath) streams = directory_to_streams(datadir)[0] sc_bad = StreamCollection(streams=streams, handle_duplicates=False) # Check that we begin with having three streams assert len(sc_bad) == 3 sc = StreamCollection(streams=streams, handle_duplicates=True) # Check that we now only have two streams in the StreamCollection assert len(sc) == 2 assert len(sc[0]) == 3 assert len(sc[1]) == 3 # Check that we kept the 'CE' network and not the '--' network assert sc.select(station='23837')[0][0].stats.network == 'CE' # Now try changing the process levels of one of the streams for tr in sc_bad.select(network='--')[0]: tr.stats.standard.process_level = 'uncorrected physical units' for tr in sc_bad.select(network='CE')[0]: tr.stats.standard.process_level = 'corrected physical units' sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True) # Now, we should have kept the '--' network and not the 'CE' network assert sc.select(station='23837')[0][0].stats.network == '--' # Now change the process preference order to see if we get back the # original results sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True, process_level_preference=['V2', 'V1']) assert sc.select(station='23837')[0][0].stats.network == 'CE' # Check that decreasing the distance tolerance results in streams now being # treated as different streams sc = StreamCollection(streams=streams, max_dist_tolerance=10, handle_duplicates=True) assert len(sc) == 3 # Change the streams to have the same processing level for st in sc_bad: for tr in st: tr.stats.standard.process_level = 'uncorrected physical units' # Try changing the preferred format order sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True, format_preference=['dmg', 'cosmos']) assert sc.select(station='23837')[0][0].stats.network == '--' sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True, format_preference=['cosmos', 'dmg']) assert sc.select(station='23837')[0][0].stats.network == 'CE' # Set process level and format to be he same for st in sc_bad: for tr in st: tr.stats.standard.source_format = 'cosmos' # Check that we keep the CE network due to the bad starttime on -- sczz = sc_bad.select(station='23837', network='--') for st in sczz: for tr in st: tr.stats.starttime = UTCDateTime(0) sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True) assert sc.select(station='23837')[0][0].stats.network == 'CE' for tr in sc_bad.select(network='CE')[0]: tr.stats.starttime = UTCDateTime(0) for tr in sc_bad.select(network='--')[0]: tr.stats.starttime = UTCDateTime(2018, 8, 29, 2, 33, 0) sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True) assert sc.select(station='23837')[0][0].stats.network == '--' for tr in sc_bad.select(network='--')[0]: tr.stats.starttime = UTCDateTime(0) tr.trim(endtime=UTCDateTime(5)) sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True) assert sc.select(station='23837')[0][0].stats.network == 'CE' for tr in sc_bad.select(network='CE')[0]: tr.trim(endtime=UTCDateTime(2)) sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True) assert sc.select(station='23837')[0][0].stats.network == '--' for tr in sc_bad.select(network='--')[0]: tr.trim(endtime=UTCDateTime(2)) tr.resample(20) sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True) assert sc.select(station='23837')[0][0].stats.network == 'CE' for tr in sc_bad.select(network='--')[0]: tr.resample(10) sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True) assert sc.select(station='23837')[0][0].stats.network == 'CE' # New test for some Hawaii data. datapath = os.path.join('data', 'testdata', 'duplicate', 'hawaii') datadir = pkg_resources.resource_filename('gmprocess', datapath) streams = directory_to_streams(datadir)[0] sc = StreamCollection(streams=streams, handle_duplicates=True) assert len(sc) == 1 # New test for some Alaska data. datapath = os.path.join('data', 'testdata', 'duplicate', 'alaska') datadir = pkg_resources.resource_filename('gmprocess', datapath) streams = directory_to_streams(datadir)[0] sc = StreamCollection(streams=streams, handle_duplicates=True, preference_order=['location_code']) assert len(sc) == 1 for st in sc: for tr in st: assert tr.stats.location == 'D0'
def test_StreamCollection(): # read usc data dpath = os.path.join('data', 'testdata', 'usc', 'ci3144585') directory = pkg_resources.resource_filename('gmprocess', dpath) usc_streams, unprocessed_files, unprocessed_file_errors = \ directory_to_streams(directory) assert len(usc_streams) == 7 usc_sc = StreamCollection(usc_streams) # Use print method print(usc_sc) # Use len method assert len(usc_sc) == 3 # Use nonzero method assert bool(usc_sc) # Slice lengths = [len(usc_sc[0]), len(usc_sc[1]), len(usc_sc[2])] sort_lengths = np.sort(lengths) assert sort_lengths[0] == 1 assert sort_lengths[1] == 3 assert sort_lengths[2] == 3 # read dmg data dpath = os.path.join('data', 'testdata', 'dmg', 'ci3144585') directory = pkg_resources.resource_filename('gmprocess', dpath) dmg_streams, unprocessed_files, unprocessed_file_errors = \ directory_to_streams(directory) assert len(dmg_streams) == 1 dmg_sc = StreamCollection(dmg_streams) # Has one station assert len(dmg_sc) == 1 # With 3 channels assert len(dmg_sc[0]) == 3 # So this should have 4 stations test1 = dmg_sc + usc_sc assert len(test1) == 4 test_copy = dmg_sc.copy() assert test_copy[0][0].stats['standard']['process_level'] == \ 'uncorrected physical units' stream1 = test_copy[0] test_append = usc_sc.append(stream1) assert len(test_append) == 4 # Change back to unique values for station/network for tr in dmg_sc[0]: tr.stats['network'] = 'LALALA' tr.stats['station'] = '575757' stream2 = dmg_sc[0] test_append = usc_sc.append(stream2) assert len(test_append) == 4 # Check the from_directory method sc_test = StreamCollection.from_directory(directory) assert len(sc_test) == 1 # Test to_dataframe jsonfile = os.path.join(directory, 'event.json') with open(jsonfile, 'rt', encoding='utf-8') as f: origin = json.load(f) dmg_df = sc_test.to_dataframe(origin) np.testing.assert_allclose(dmg_df['H1']['PGA'], 0.145615, atol=1e5) # Check the from_traces method traces = [] for st in sc_test: for tr in st: traces.append(tr) sc_test = StreamCollection.from_traces(traces) assert len(sc_test) == 1
def test_duplicates(): datapath = os.path.join("data", "testdata", "duplicate", "general") datadir = pkg_resources.resource_filename("gmprocess", datapath) streams = directory_to_streams(datadir)[0] sc_bad = StreamCollection(streams=streams, handle_duplicates=False) # Check that we begin with having three streams assert len(sc_bad) == 3 sc = StreamCollection(streams=streams, handle_duplicates=True) # Check that we now only have two streams in the StreamCollection assert len(sc) == 2 assert len(sc[0]) == 3 assert len(sc[1]) == 3 # Check that we kept the 'CE' network and not the '--' network assert sc.select(station="23837")[0][0].stats.network == "CE" # Now try changing the process levels of one of the streams for tr in sc_bad.select(network="--")[0]: tr.stats.standard.process_level = "uncorrected physical units" for tr in sc_bad.select(network="CE")[0]: tr.stats.standard.process_level = "corrected physical units" sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True) # Now, we should have kept the '--' network and not the 'CE' network assert sc.select(station="23837")[0][0].stats.network == "--" # Now change the process preference order to see if we get back the # original results sc = StreamCollection( streams=sc_bad.streams, handle_duplicates=True, process_level_preference=["V2", "V1"], ) assert sc.select(station="23837")[0][0].stats.network == "CE" # Check that decreasing the distance tolerance results in streams now being # treated as different streams sc = StreamCollection(streams=streams, max_dist_tolerance=10, handle_duplicates=True) assert len(sc) == 3 # Change the streams to have the same processing level for st in sc_bad: for tr in st: tr.stats.standard.process_level = "uncorrected physical units" # Try changing the preferred format order sc = StreamCollection( streams=sc_bad.streams, handle_duplicates=True, format_preference=["dmg", "cosmos"], ) assert sc.select(station="23837")[0][0].stats.network == "--" sc = StreamCollection( streams=sc_bad.streams, handle_duplicates=True, format_preference=["cosmos", "dmg"], ) assert sc.select(station="23837")[0][0].stats.network == "CE" # Set process level and format to be he same for st in sc_bad: for tr in st: tr.stats.standard.source_format = "cosmos" # Check that we keep the CE network due to the bad starttime on -- sczz = sc_bad.select(station="23837", network="--") for st in sczz: for tr in st: tr.stats.starttime = UTCDateTime(0) sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True) assert sc.select(station="23837")[0][0].stats.network == "CE" for tr in sc_bad.select(network="CE")[0]: tr.stats.starttime = UTCDateTime(0) for tr in sc_bad.select(network="--")[0]: tr.stats.starttime = UTCDateTime(2018, 8, 29, 2, 33, 0) sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True) assert sc.select(station="23837")[0][0].stats.network == "--" for tr in sc_bad.select(network="--")[0]: tr.stats.starttime = UTCDateTime(0) tr.trim(endtime=UTCDateTime(5)) sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True) assert sc.select(station="23837")[0][0].stats.network == "CE" for tr in sc_bad.select(network="CE")[0]: tr.trim(endtime=UTCDateTime(2)) sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True) assert sc.select(station="23837")[0][0].stats.network == "--" for tr in sc_bad.select(network="--")[0]: tr.trim(endtime=UTCDateTime(2)) tr.resample(20) sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True) assert sc.select(station="23837")[0][0].stats.network == "CE" for tr in sc_bad.select(network="--")[0]: tr.resample(10) sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True) assert sc.select(station="23837")[0][0].stats.network == "CE" # New test for some Hawaii data. datapath = os.path.join("data", "testdata", "duplicate", "hawaii") datadir = pkg_resources.resource_filename("gmprocess", datapath) streams = directory_to_streams(datadir)[0] sc = StreamCollection(streams=streams, handle_duplicates=True) assert len(sc) == 1 # New test for some Alaska data. datapath = os.path.join("data", "testdata", "duplicate", "alaska") datadir = pkg_resources.resource_filename("gmprocess", datapath) streams = directory_to_streams(datadir)[0] sc = StreamCollection(streams=streams, handle_duplicates=True, preference_order=["location_code"]) assert len(sc) == 1 for st in sc: for tr in st: assert tr.stats.location == "D0"
def test_StreamCollection(): # read usc data dpath = os.path.join("data", "testdata", "usc", "ci3144585") directory = pkg_resources.resource_filename("gmprocess", dpath) usc_streams, unprocessed_files, unprocessed_file_errors = directory_to_streams( directory) assert len(usc_streams) == 7 usc_sc = StreamCollection(usc_streams) # Use print method print(usc_sc) # Use len method assert len(usc_sc) == 3 # Use nonzero method assert bool(usc_sc) # Slice lengths = [len(usc_sc[0]), len(usc_sc[1]), len(usc_sc[2])] sort_lengths = np.sort(lengths) assert sort_lengths[0] == 1 assert sort_lengths[1] == 3 assert sort_lengths[2] == 3 # read dmg data dpath = os.path.join("data", "testdata", "dmg", "ci3144585") directory = pkg_resources.resource_filename("gmprocess", dpath) dmg_streams, unprocessed_files, unprocessed_file_errors = directory_to_streams( directory) assert len(dmg_streams) == 1 dmg_sc = StreamCollection(dmg_streams) # Has one station assert len(dmg_sc) == 1 # With 3 channels assert len(dmg_sc[0]) == 3 # So this should have 4 stations test1 = dmg_sc + usc_sc assert len(test1) == 4 test_copy = dmg_sc.copy() assert (test_copy[0][0].stats["standard"]["process_level"] == "uncorrected physical units") stream1 = test_copy[0] test_append = usc_sc.append(stream1) assert len(test_append) == 4 # Change back to unique values for station/network for tr in dmg_sc[0]: tr.stats["network"] = "LALALA" tr.stats["station"] = "575757" stream2 = dmg_sc[0] test_append = usc_sc.append(stream2) assert len(test_append) == 4 # Check the from_directory method sc_test = StreamCollection.from_directory(directory) assert len(sc_test) == 1 # Test to_dataframe jsonfile = os.path.join(directory, "event.json") with open(jsonfile, "rt", encoding="utf-8") as f: origin = json.load(f) dmg_df = sc_test.to_dataframe(origin) np.testing.assert_allclose(dmg_df["H1"]["PGA"], 0.145615, atol=1e5) # Check the from_traces method traces = [] for st in sc_test: for tr in st: traces.append(tr) sc_test = StreamCollection.from_traces(traces) assert len(sc_test) == 1
def assemble(event, config, directory, gmprocess_version): """Download data or load data from local directory, turn into Streams. Args: event (ScalarEvent): Object containing basic event hypocenter, origin time, magnitude. config (dict): Dictionary with gmprocess configuration information. directory (str): Path where data already exists. Must be organized in a 'raw' directory, within directories with names as the event ids. For example, if `directory` is 'proj_dir' and you have data for event id 'abc123' then the raw data to be read in should be located in `proj_dir/abc123/raw/`. gmprocess_version (str): Software version for gmprocess. Returns: tuple: - StreamWorkspace: Contains the event and raw streams. - str: Name of workspace HDF file. - StreamCollection: Raw data StationStreams. - str: Path to the rupture file. """ # Make raw directory in_event_dir = os.path.join(directory, event.id) in_raw_dir = get_rawdir(in_event_dir) logging.debug(f"in_raw_dir: {in_raw_dir}") streams, unprocessed_files, unprocessed_file_errors = directory_to_streams( in_raw_dir, config=config) # Write errors to a csv file failures_file = Path(in_raw_dir) / "read_failures.csv" colnames = ["File", "Failure"] with open(failures_file, "w", newline="") as f: writer = csv.writer(f, delimiter=",", quoting=csv.QUOTE_MINIMAL) writer.writerow(colnames) for ufile, uerror in zip(unprocessed_files, unprocessed_file_errors): writer.writerow([ufile, uerror]) logging.debug("streams:") logging.debug(streams) if config["read"]["use_streamcollection"]: stream_array = StreamCollection(streams, **config["duplicate"]) else: stream_array = StreamArray(streams) logging.info("stream_array.describe_string():") logging.info(stream_array.describe_string()) # Create the workspace file and put the unprocessed waveforms in it workname = os.path.join(in_event_dir, WORKSPACE_NAME) # Remove any existing workspace file if os.path.isfile(workname): os.remove(workname) workspace = StreamWorkspace(workname) workspace.addEvent(event) logging.debug("workspace.dataset.events:") logging.debug(workspace.dataset.events) workspace.addGmprocessVersion(gmprocess_version) workspace.addConfig() workspace.addStreams(event, stream_array, label="unprocessed", gmprocess_version=gmprocess_version) logging.debug("workspace.dataset.waveforms.list():") logging.debug(workspace.dataset.waveforms.list()) logging.debug("workspace.dataset.config") return workspace
def main(): desc = '''Convert a directory of strong motion data files into any ObsPy supported format. https://docs.obspy.org/packages/autogen/obspy.core.stream.Stream.write.html#supported-formats The inventory information will be written as an accompanying file in station XML format. To convert a single file in the NIED KNET format to MiniSEED: gmconvert AOM0011801241951.EW The following files will be written to the current directory: - BO.AOM001.--.HN2.mseed - BO.AOM001.--.HN2.xml To convert the three files that make up the BO.AOM001 station data into one MiniSEED file: gmconvert AOM0011801241951.* The following files will be written to the current directory: - BO.AOM001.HN.mseed - BO.AOM001.HN.xml To convert a directory "indatadir" full of files to SAC format, and write to a directory called "outdatadir": gmconvert -i datadir -o outdatadir -f SAC Note: The data files in "indatadir" can be distributed through subdirectories and gmconvert will find them. ''' parser = argparse.ArgumentParser( description=desc, formatter_class=CustomFormatter) parser.add_argument('files', help='List of files to convert.', nargs='*', default=None) parser.add_argument('-i', '--indir', help='Directory containing input files to convert.') parser.add_argument('-o', '--outdir', help='Output directory.', default=os.getcwd()) parser.add_argument('-f', '--format', help='Output strong motion data format.', choices=FORMATS, default='MSEED') # Shared arguments parser = add_shared_args(parser) args = parser.parse_args() setup_logger(args) logging.info("Running gmconvert.") # gather arguments indir = args.indir outdir = args.outdir oformat = args.format has_files = args.files is not None and len(args.files) if has_files and args.indir is not None: print('Specify input files or an input directory, not both.') sys.exit(1) if args.files is None and args.indir is None: print('You must specify input files or an input directory.') sys.exit(1) if not os.path.isdir(outdir): os.mkdir(outdir) if args.files: # read all the data files, gather up a list of obspy Stream objects allstreams = [] error_dict = {} for dfile in args.files: logging.info('Parsing %s...' % dfile) try: streams = read_data(dfile) except BaseException as e: error_dict[dfile] = str(e) continue allstreams += streams else: # grab all the files in the input directory allstreams, unprocessed, errors = directory_to_streams(indir) error_dict = dict(zip(unprocessed, errors)) sc = StreamCollection(allstreams) for stream in sc: streamid = stream.get_id() if len(stream) == 1: streamid = stream[0].get_id() outfile = os.path.join(outdir, '%s.%s' % (streamid, oformat.lower())) invfile = os.path.join(outdir, '%s.xml' % (streamid)) inv_format = 'STATIONXML' inv = stream.getInventory() logging.info('Writing data file %s...' % outfile) stream.write(outfile, format=oformat) logging.info('Writing inventory file %s...' % invfile) inv.write(invfile, format=inv_format) print('Wrote %i streams to %s' % (len(sc), outdir)) if len(error_dict): print('\nThe following files could not be read:') for fname, error in error_dict.items(): print('\t%s - "%s"' % (fname, error))
def test_StreamCollection(): # read usc data dpath = os.path.join('data', 'testdata', 'usc', 'ci3144585') directory = pkg_resources.resource_filename('gmprocess', dpath) usc_streams, unprocessed_files, unprocessed_file_errors = \ directory_to_streams(directory) assert len(usc_streams) == 7 usc_sc = StreamCollection(usc_streams) # Use print method print(usc_sc) # Use len method assert len(usc_sc) == 3 # Use nonzero method assert bool(usc_sc) # Slice lengths = [ len(usc_sc[0]), len(usc_sc[1]), len(usc_sc[2]) ] sort_lengths = np.sort(lengths) assert sort_lengths[0] == 1 assert sort_lengths[1] == 3 assert sort_lengths[2] == 3 # read dmg data dpath = os.path.join('data', 'testdata', 'dmg', 'ci3144585') directory = pkg_resources.resource_filename('gmprocess', dpath) dmg_streams, unprocessed_files, unprocessed_file_errors = \ directory_to_streams(directory) assert len(dmg_streams) == 1 dmg_sc = StreamCollection(dmg_streams) # Has one station assert len(dmg_sc) == 1 # With 3 channels assert len(dmg_sc[0]) == 3 # So this should have 4 stations test1 = dmg_sc + usc_sc assert len(test1) == 4 test_copy = dmg_sc.copy() assert test_copy[0][0].stats['standard']['process_level'] == \ 'corrected physical units' stream1 = test_copy[0] test_append = usc_sc.append(stream1) assert len(test_append) == 4 # Change back to unique values for station/network for tr in dmg_sc[0]: tr.stats['network'] = 'LALALA' tr.stats['station'] = '575757' stream2 = dmg_sc[0] test_append = usc_sc.append(stream2) assert len(test_append) == 4 # Check the from_directory method sc_test = StreamCollection.from_directory(directory) assert len(sc_test) == 1 # Test to_dataframe jsonfile = os.path.join(directory, 'event.json') with open(jsonfile, 'rt') as f: origin = json.load(f) dmg_df = sc_test.to_dataframe(origin) np.testing.assert_allclose( dmg_df['HN1']['PGA'], 0.145615, atol=1e5)