def _process_event(self, event): event_dir = os.path.join(self.gmrecords.data_path, event.id) workname = os.path.join(event_dir, WORKSPACE_NAME) if not os.path.isfile(workname): logging.info('No workspace file found for event %s. Please run ' 'subcommand \'assemble\' to generate workspace file.') logging.info('Continuing to next event.') return event.id workspace = StreamWorkspace.open(workname) rstreams = workspace.getStreams(event.id, labels=['unprocessed'], config=self.gmrecords.conf) if len(rstreams): logging.info('Processing \'%s\' streams for event %s...' % ('unprocessed', event.id)) pstreams = process_streams(rstreams, event, config=self.gmrecords.conf) workspace.addStreams(event, pstreams, label=self.process_tag) else: logging.info('No streams found. Nothing to do. Goodbye.') workspace.close() return event.id
def get_sample_data(volume): thisdir = pathlib.Path(__file__).parent datafile = (thisdir / ".." / ".." / ".." / ".." / "gmprocess" / "data" / "testdata" / "asdf" / "nc71126864" / "workspace.h5") workspace = StreamWorkspace.open(datafile) t1 = time.time() eventid = workspace.getEventIds()[0] t2 = time.time() print(f"{t2-t1:.2f} seconds to read eventid") scalar_event = workspace.getEvent(eventid) station = "CE.79435" labels = workspace.getLabels() if volume == Volume.RAW: labels.remove("default") elif volume == Volume.CONVERTED: labels.remove("default") else: labels.remove("unprocessed") plabel = labels[0] streams = workspace.getStreams(eventid, stations=[station], labels=[plabel]) gmprocess_version = workspace.getGmprocessVersion() idx = gmprocess_version.find(".dev") gmprocess_version = gmprocess_version[0:idx] stream = streams[0] trace = stream[0] workspace.close() return (trace, eventid, scalar_event, stream, gmprocess_version)
def generate_workspace(): """Generate simple HDF5 with ASDF layout for testing. """ PCOMMANDS = [ 'assemble', 'process', ] EVENTID = 'us1000778i' LABEL = 'ptest' datafiles, event = read_data_dir('geonet', EVENTID, '*.V1A') tdir = tempfile.mkdtemp() tfilename = os.path.join(tdir, 'workspace.h5') raw_data = [] for dfile in datafiles: raw_data += read_data(dfile) write_asdf(tfilename, raw_data, event, label="unprocessed") del raw_data config = get_config() workspace = StreamWorkspace.open(tfilename) raw_streams = workspace.getStreams(EVENTID, labels=['unprocessed']) pstreams = process_streams(raw_streams, event, config=config) workspace.addStreams(event, pstreams, label=LABEL) workspace.calcMetrics(event.id, labels=[LABEL], config=config) return tfilename
def get_eq_full_status_df(wdir, eqid, imc): ws = StreamWorkspace.open(os.path.join(wdir, eqid, 'workspace.h5')) labels = ws.getLabels() labels.remove('unprocessed') processed_label = labels[0] sc = ws.getStreams(ws.getEventIds()[0], labels=[processed_label]) ws.close() rows = [] for st in sc: coords = st[0].stats.coordinates row = [st.id, coords.latitude, coords.longitude] if st.passed: row.append('Passed') else: for tr in st: if tr.hasParameter('failure'): row.append(tr.getParameter('failure')['reason']) break rows.append(row) df = pd.DataFrame(rows, columns=[ 'StationID', 'StationLatitude', 'StationLongitude', 'Failure reason' ]) return df
def generate_workspace(): """Generate simple HDF5 with ASDF layout for testing.""" PCOMMANDS = [ "assemble", "process", ] EVENTID = "us1000778i" LABEL = "ptest" datafiles, event = read_data_dir("geonet", EVENTID, "*.V1A") tdir = tempfile.mkdtemp() tfilename = os.path.join(tdir, "workspace.h5") raw_data = [] for dfile in datafiles: raw_data += read_data(dfile) write_asdf(tfilename, raw_data, event, label="unprocessed") del raw_data config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml")) workspace = StreamWorkspace.open(tfilename) raw_streams = workspace.getStreams(EVENTID, labels=["unprocessed"], config=config) pstreams = process_streams(raw_streams, event, config=config) workspace.addStreams(event, pstreams, label=LABEL) workspace.calcMetrics(event.id, labels=[LABEL], config=config) return tfilename
def __init__(self, cosmos_directory, h5_filename, volume=Volume.PROCESSED, label=None): if volume == Volume.PROCESSED and label is None: raise Exception("Must supply label for processed data") self._workspace = StreamWorkspace.open(h5_filename) self._cosmos_directory = pathlib.Path(cosmos_directory) self._volume = volume self._label = label
def main(self, gmrecords): """Export provenance tables. Args: gmrecords: GMrecordsApp instance. """ logging.info('Running subcommand \'%s\'' % self.command_name) self.gmrecords = gmrecords self._check_arguments() self._get_events() for event in self.events: self.eventid = event.id logging.info('Creating provenance tables for event %s...' % self.eventid) event_dir = os.path.join(gmrecords.data_path, self.eventid) workname = os.path.join(event_dir, WORKSPACE_NAME) if not os.path.isfile(workname): logging.info( 'No workspace file found for event %s. Please run ' 'subcommand \'assemble\' to generate workspace file.' % self.eventid) logging.info('Continuing to next event.') continue self.workspace = StreamWorkspace.open(workname) self._get_pstreams() if not (hasattr(self, 'pstreams') and len(self.pstreams) > 0): logging.info('No processed waveforms available. No provenance ' 'tables created.') self.workspace.close() continue provdata = self.workspace.getProvenance( self.eventid, labels=self.gmrecords.args.label) self.workspace.close() basename = '%s_%s_provenance' % (gmrecords.project, gmrecords.args.label) if gmrecords.args.output_format == 'csv': csvfile = os.path.join(event_dir, '%s.csv' % basename) self.append_file('Provenance', csvfile) provdata.to_csv(csvfile, index=False) else: excelfile = os.path.join(event_dir, '%s.xlsx' % basename) self.append_file('Provenance', excelfile) provdata.to_excel(excelfile, index=False) self._summarize_files_created()
def main(self, gmrecords): """Export files for ShakeMap input. Args: gmrecords: GMrecordsApp instance. """ logging.info('Running subcommand \'%s\'' % self.command_name) self.gmrecords = gmrecords self._get_events() for event in self.events: self.eventid = event.id logging.info('Creating shakemap files for event %s...' % self.eventid) event_dir = os.path.join(gmrecords.data_path, event.id) workname = os.path.join(event_dir, WORKSPACE_NAME) if not os.path.isfile(workname): logging.info( 'No workspace file found for event %s. Please run ' 'subcommand \'assemble\' to generate workspace file.' % event.id) logging.info('Continuing to next event.') continue self.workspace = StreamWorkspace.open(workname) self._get_labels() self._get_pstreams() if not hasattr(self, 'pstreams'): logging.info('No processed waveforms available. No shakemap ' 'files created.') return # TODO: re-write this so that it uses the already computer values # in self.workspace.dataset.auxiliary_data.WaveFormMetrics # rather than recomputing the metrics from self.pstreams. # shakemap_file, jsonfile = save_shakemap_amps( # self.pstreams, event, event_dir) jsonfile, stationfile, _ = create_json(self.workspace, event, event_dir, self.gmrecords.args.label) self.workspace.close() self.append_file('shakemap', jsonfile) self.append_file('shakemap', stationfile) self._summarize_files_created()
def _compute_event_waveforms(self, event): self.eventid = event.id logging.info( 'Computing waveform metrics for event %s...' % self.eventid) event_dir = os.path.join(self.gmrecords.data_path, self.eventid) workname = os.path.join(event_dir, WORKSPACE_NAME) if not os.path.isfile(workname): logging.info( 'No workspace file found for event %s. Please run ' 'subcommand \'assemble\' to generate workspace file.' % self.eventid) logging.info('Continuing to next event.') return event.id self.workspace = StreamWorkspace.open(workname) self._get_pstreams() if not hasattr(self, 'pstreams'): logging.info('No processed waveforms available. No waveform ' 'metrics computed.') self.workspace.close() return event.id for stream in self.pstreams: if stream.passed: logging.info( 'Calculating waveform metrics for %s...' % stream.get_id() ) summary = StationSummary.from_config( stream, event=event, config=self.gmrecords.conf, calc_waveform_metrics=True, calc_station_metrics=False ) xmlstr = summary.get_metric_xml() tag = stream.tag metricpath = '/'.join([ format_netsta(stream[0].stats), format_nslit(stream[0].stats, stream.get_inst(), tag) ]) self.workspace.insert_aux( xmlstr, 'WaveFormMetrics', metricpath, overwrite=self.gmrecords.args.overwrite) logging.info('Added waveform metrics to workspace files ' 'with tag \'%s\'.' % self.gmrecords.args.label) self.workspace.close() return event.id
def main(self, gmrecords): """Export files for ShakeMap input. Args: gmrecords: GMrecordsApp instance. """ logging.info('Running subcommand \'%s\'' % self.command_name) self.gmrecords = gmrecords self._check_arguments() self._get_events() for event in self.events: self.eventid = event.id logging.info('Creating shakemap files for event %s...' % self.eventid) event_dir = os.path.join(gmrecords.data_path, event.id) workname = os.path.join(event_dir, WORKSPACE_NAME) if not os.path.isfile(workname): logging.info( 'No workspace file found for event %s. Please run ' 'subcommand \'assemble\' to generate workspace file.' % event.id) logging.info('Continuing to next event.') continue self.workspace = StreamWorkspace.open(workname) self._get_labels() expanded_imts = self.gmrecords.args.expand_imts jsonfile, stationfile, _ = create_json(self.workspace, event, event_dir, self.gmrecords.args.label, config=self.gmrecords.conf, expanded_imts=expanded_imts) self.workspace.close() if jsonfile is not None: self.append_file('shakemap', jsonfile) if stationfile is not None: self.append_file('shakemap', stationfile) self._summarize_files_created()
def test_get_shakemap(): tdir = tempfile.mkdtemp() try: thisdir = pathlib.Path(__file__).parent datadir = (thisdir / '..' / '..' / '..' / 'gmprocess' / 'data' / 'testdata') datafile = datadir / 'workspace_ci38457511.hdf' workspace = StreamWorkspace.open(datafile) eventid = workspace.getEventIds()[0] event = workspace.getEvent(eventid) label = '20201209195000' processed = workspace.getStreams(eventid, labels=[label]) excelfile, jsonfile = save_shakemap_amps(processed, event, tdir) with open(jsonfile, 'rt', encoding='utf-8') as fp: jdict = json.load(fp) assert jdict['features'][0]['id'] == 'CJ.T001230' except Exception as e: raise AssertionError(str(e)) finally: shutil.rmtree(tdir)
def main(self, gmrecords): """Generate summary report. This function generates summary plots and then combines them into a report with latex. If latex (specifically `pdflatex`) is not found on the system then the PDF report will not be generated but the constituent plots will be available. Args: gmrecords: GMrecordsApp instance. """ logging.info('Running subcommand \'%s\'' % self.command_name) self.gmrecords = gmrecords self._get_events() for event in self.events: self.eventid = event.id event_dir = os.path.join(gmrecords.data_path, self.eventid) workname = os.path.join(event_dir, WORKSPACE_NAME) if not os.path.isfile(workname): logging.info( 'No workspace file found for event %s. Please run ' 'subcommand \'assemble\' to generate workspace file.' % self.eventid) logging.info('Continuing to next event.') continue self.workspace = StreamWorkspace.open(workname) self._get_pstreams() self.workspace.close() if not hasattr(self, 'pstreams'): logging.info('No processed waveforms available. No report ' 'generated.') return logging.info('Creating diagnostic plots for event %s...' % self.eventid) plot_dir = os.path.join(event_dir, 'plots') if not os.path.isdir(plot_dir): os.makedirs(plot_dir) for stream in self.pstreams: summary_plots(stream, plot_dir, event) mapfile = draw_stations_map(self.pstreams, event, event_dir) moveoutfile = os.path.join(event_dir, 'moveout_plot.png') plot_moveout(self.pstreams, event.latitude, event.longitude, file=moveoutfile) self.append_file('Station map', mapfile) self.append_file('Moveout plot', moveoutfile) logging.info('Generating summary report for event %s...' % self.eventid) build_conf = gmrecords.conf['build_report'] report_format = build_conf['format'] if report_format == 'latex': report_file, success = build_report_latex( self.pstreams, event_dir, event, prefix="%s_%s" % (gmrecords.project, gmrecords.args.label), config=gmrecords.conf) else: report_file = '' success = False if os.path.isfile(report_file) and success: self.append_file('Summary report', report_file) self._summarize_files_created()
def main(self, gmrecords): """Export metric tables. Args: gmrecords: GMrecordsApp instance. """ logging.info('Running subcommand \'%s\'' % self.command_name) self.gmrecords = gmrecords self._get_events() for event in self.events: self.eventid = event.id logging.info('Creating tables for event %s...' % self.eventid) event_dir = os.path.join(gmrecords.data_path, self.eventid) workname = os.path.join(event_dir, WORKSPACE_NAME) if not os.path.isfile(workname): logging.info( 'No workspace file found for event %s. Please run ' 'subcommand \'assemble\' to generate workspace file.' % self.eventid) logging.info('Continuing to next event.') continue self.workspace = StreamWorkspace.open(workname) self._get_pstreams() if not hasattr(self, 'pstreams'): logging.info('No processed waveforms available. No metric ' 'tables created.') self.workspace.close() return event_table, imc_tables, readmes = self.workspace.getTables( self.gmrecords.args.label, streams=self.pstreams) ev_fit_spec, fit_readme = self.workspace.getFitSpectraTable( self.eventid, self.gmrecords.args.label, self.pstreams) self.workspace.close() outdir = gmrecords.data_path # Set the precisions for the imc tables, event table, and # fit_spectra table before writing imc_tables_formatted = {} for imc, imc_table in imc_tables.items(): imc_tables_formatted[imc] = set_precisions(imc_table) event_table_formatted = set_precisions(event_table) df_fit_spectra_formatted = set_precisions(ev_fit_spec) imc_list = [ '%s_%s_metrics_%s' % (gmrecords.project, gmrecords.args.label, imc.lower()) for imc in imc_tables_formatted.keys() ] readme_list = [ '%s_%s_metrics_%s_README' % (gmrecords.project, gmrecords.args.label, imc.lower()) for imc in readmes.keys() ] proj_lab = (gmrecords.project, gmrecords.args.label) event_filename = ['%s_%s_events' % proj_lab] filenames = event_filename + imc_list + readme_list + [ '%s_%s_fit_spectra_parameters' % proj_lab, '%s_%s_fit_spectra_parameters_README' % proj_lab ] files = [event_table_formatted] + list( imc_tables_formatted.values()) + list( readmes.values()) + [df_fit_spectra_formatted, fit_readme] output_format = gmrecords.args.output_format if output_format != 'csv': output_format = 'xlsx' for filename, df in dict(zip(filenames, files)).items(): filepath = os.path.join(outdir, filename + '.%s' % output_format) if os.path.exists(filepath): if 'README' in filename: continue else: if self.gmrecords.args.overwrite: logging.warning('File exists: %s' % filename) logging.warning('Overwriting file: %s' % filename) mode = 'w' header = True else: logging.warning('File exists: %s' % filename) logging.warning('Appending to file: %s' % filename) mode = 'a' header = False else: mode = 'w' header = True if output_format == 'csv': df.to_csv(filepath, index=False, float_format=DEFAULT_FLOAT_FORMAT, na_rep=DEFAULT_NA_REP, mode=mode, header=header) if mode == "w": self.append_file('Metric tables', filepath) else: df.to_excel(filepath, index=False, float_format=DEFAULT_FLOAT_FORMAT, na_rep=DEFAULT_NA_REP, mode=mode, header=header) if mode == "w": self.append_file('Metric tables', filepath) self._summarize_files_created()
def _event_station_metrics(self, event): self.eventid = event.id logging.info('Computing station metrics for event %s...' % self.eventid) event_dir = os.path.join(self.gmrecords.data_path, self.eventid) workname = os.path.join(event_dir, WORKSPACE_NAME) if not os.path.isfile(workname): logging.info( 'No workspace file found for event %s. Please run ' 'subcommand \'assemble\' to generate workspace file.' % self.eventid) logging.info('Continuing to next event.') return event.id self.workspace = StreamWorkspace.open(workname) self._get_pstreams() rupture_file = get_rupture_file(event_dir) origin = Origin({ 'id': self.eventid, 'netid': '', 'network': '', 'lat': event.latitude, 'lon': event.longitude, 'depth': event.depth_km, 'locstring': '', 'mag': event.magnitude, 'time': event.time }) rupture = get_rupture(origin, rupture_file) if not hasattr(self, 'pstreams'): logging.info('No processed waveforms available. No station ' 'metrics computed.') self.workspace.close() return for stream in self.pstreams: logging.info('Calculating station metrics for %s...' % stream.get_id()) summary = StationSummary.from_config(stream, event=event, config=self.gmrecords.conf, calc_waveform_metrics=False, calc_station_metrics=True, rupture=rupture, vs30_grids=self.vs30_grids) xmlstr = summary.get_station_xml() metricpath = '/'.join([ format_netsta(stream[0].stats), format_nslit(stream[0].stats, stream.get_inst(), self.eventid) ]) self.workspace.insert_aux(xmlstr, 'StationMetrics', metricpath, overwrite=self.gmrecords.args.overwrite) logging.info('Added station metrics to workspace files ' 'with tag \'%s\'.' % self.gmrecords.args.label) self.workspace.close() return event.id
def process_event(event, outdir, pcommands, config, input_directory, process_tag, files_created, output_format, status, recompute_metrics, export_dir=None): # setup logging to write to the input logfile argthing = namedtuple('args', ['debug', 'quiet']) args = argthing(debug=True, quiet=False) setup_logger(args) logger = logging.getLogger() stream_handler = logger.handlers[0] logfile = os.path.join(outdir, '%s.log' % event.id) fhandler = logging.FileHandler(logfile) logger.removeHandler(stream_handler) logger.addHandler(fhandler) event_dir = os.path.join(outdir, event.id) if not os.path.exists(event_dir): os.makedirs(event_dir) workname = os.path.join(event_dir, WORKSPACE_NAME) workspace_exists = os.path.isfile(workname) workspace_has_processed = False workspace = None processing_done = False if workspace_exists: workspace = StreamWorkspace.open(workname) labels = workspace.getLabels() if len(labels): labels.remove('unprocessed') elif 'assemble' not in pcommands: print('No data in workspace. Please run assemble.') sys.exit(1) if len(labels) == 1: process_tag = labels[0] workspace_has_processed = True else: if 'process' not in pcommands: fmt = '\nThere are %i sets of processed data in %s.' tpl = (len(labels), workname) print(fmt % tpl) print(('This software can only handle one set of ' 'processed data. Exiting.\n')) sys.exit(1) download_done = False # Need to initialize rstreams/pstreams rstreams = [] pstreams = [] rupture_file = None if 'assemble' in pcommands: logging.info('Downloading/loading raw streams...') workspace, workspace_file, rstreams, rupture_file = download( event, event_dir, config, input_directory) download_done = True append_file(files_created, 'Workspace', workname) else: if not workspace_exists: print('\nYou opted not to download or process from input.') print('No previous HDF workspace file could be found.') print('Try re-running with the assemble command with or ') print('without the --directory option.\n') sys.exit(1) if 'process' in pcommands: logging.info('Getting raw streams from workspace...') with warnings.catch_warnings(): warnings.simplefilter("ignore", category=H5pyDeprecationWarning) rstreams = workspace.getStreams(event.id, labels=['unprocessed']) download_done = True else: need_processed = set(['report', 'shakemap']) need_pstreams = len(need_processed.intersection(pcommands)) if workspace_has_processed: if need_pstreams: logging.info('Getting processed streams from workspace...') with warnings.catch_warnings(): warnings.simplefilter("ignore", category=H5pyDeprecationWarning) pstreams = workspace.getStreams(event.id, labels=[process_tag]) download_done = True processing_done = True if ('process' in pcommands and download_done and not processing_done and len(rstreams)): logging.info('Processing raw streams for event %s...' % event.id) pstreams = process_streams(rstreams, event, config=config) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=H5pyDeprecationWarning) workspace.addStreams(event, pstreams, label=process_tag) workspace.calcMetrics(event.id, labels=[process_tag], config=config, streams=pstreams, stream_label=process_tag, rupture_file=rupture_file) processing_done = True if 'export' in pcommands: if export_dir is not None: if not os.path.isdir(export_dir): os.makedirs(export_dir) outdir = export_dir labels = workspace.getLabels() if 'unprocessed' not in labels: fmt = ('Workspace file "%s" appears to have no unprocessed ' 'data. Skipping.') logging.info(fmt % workspace_file) else: labels.remove('unprocessed') if not labels: fmt = ('Workspace file "%s" appears to have no processed ' 'data. Skipping.') logging.info(fmt % workspace_file) else: logging.info('Creating tables for event %s...', event.id) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=H5pyDeprecationWarning) if recompute_metrics: del workspace.dataset.auxiliary_data.WaveFormMetrics del workspace.dataset.auxiliary_data.StationMetrics workspace.calcMetrics(event.id, labels=labels, config=config, rupture_file=rupture_file) event_table, imc_tables, readmes = workspace.getTables( labels[0], streams=pstreams, stream_label=process_tag) ev_fit_spec, fit_readme = workspace.getFitSpectraTable( event.id, labels[0], pstreams) # Set the precisions for the imc tables, event table, and # fit_spectra table before writing imc_tables_formatted = {} for imc, imc_table in imc_tables.items(): imc_tables_formatted[imc] = set_precisions(imc_table) event_table_formatted = set_precisions(event_table) df_fit_spectra_formatted = set_precisions(ev_fit_spec) if not os.path.isdir(outdir): os.makedirs(outdir) filenames = ['events'] + \ [imc.lower() for imc in imc_tables_formatted.keys()] + \ [imc.lower() + '_README' for imc in readmes.keys()] + \ ['fit_spectra_parameters', 'fit_spectra_parameters_README'] files = [event_table_formatted] + list( imc_tables_formatted.values()) + list(readmes.values()) + [ df_fit_spectra_formatted, fit_readme ] if output_format != 'csv': output_format = 'xlsx' for filename, df in dict(zip(filenames, files)).items(): filepath = os.path.join(outdir, filename + '.%s' % output_format) if os.path.exists(filepath): if 'README' in filename: continue else: mode = 'a' header = False else: mode = 'w' header = True append_file(files_created, 'Tables', filepath) if output_format == 'csv': df.to_csv(filepath, index=False, float_format=DEFAULT_FLOAT_FORMAT, na_rep=DEFAULT_NA_REP, mode=mode, header=header) else: df.to_excel(filepath, index=False, float_format=DEFAULT_FLOAT_FORMAT, na_rep=DEFAULT_NA_REP, mode=mode, header=header) if ('report' in pcommands and processing_done and len(pstreams)): logging.info('Creating diagnostic plots for event %s...' % event.id) plot_dir = os.path.join(event_dir, 'plots') if not os.path.isdir(plot_dir): os.makedirs(plot_dir) for stream in pstreams: summary_plots(stream, plot_dir, event) mapfile = draw_stations_map(pstreams, event, event_dir) plot_moveout(pstreams, event.latitude, event.longitude, file=os.path.join(event_dir, 'moveout_plot.png')) append_file(files_created, 'Station map', mapfile) append_file(files_created, 'Moveout plot', 'moveout_plot.png') logging.info('Creating diagnostic report for event %s...' % event.id) # Build the summary report? build_conf = config['build_report'] report_format = build_conf['format'] if report_format == 'latex': report_file, success = build_report_latex(pstreams, event_dir, event, config=config) else: report_file = '' success = False if os.path.isfile(report_file) and success: append_file(files_created, 'Summary report', report_file) if 'provenance' in pcommands and processing_done and len(pstreams): logging.info('Creating provenance table for event %s...' % event.id) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=H5pyDeprecationWarning) provdata = workspace.getProvenance(event.id, labels=[process_tag]) if output_format == 'csv': csvfile = os.path.join(event_dir, 'provenance.csv') append_file(files_created, 'Provenance', csvfile) provdata.to_csv(csvfile) else: excelfile = os.path.join(event_dir, 'provenance.xlsx') append_file(files_created, 'Provenance', excelfile) provdata.to_excel(excelfile, index=False) if 'shakemap' in pcommands and processing_done and len(pstreams): logging.info('Creating shakemap table for event %s...' % event.id) shakemap_file, jsonfile = save_shakemap_amps(pstreams, event, event_dir) append_file(files_created, 'shakemap', shakemap_file) append_file(files_created, 'shakemap', jsonfile) if status and processing_done and len(pstreams): if status == 'short': index = 'Failure reason' col = ['Number of records'] elif status == 'long': index = 'Station ID' col = ['Failure reason'] elif status == 'net': index = 'Network' col = ['Number of passed records', 'Number of failed records'] status_info = pstreams.get_status(status) status_info.to_csv(os.path.join(event_dir, 'status.csv'), header=col, index_label=index) # since we don't know how many events users will be processing, # let's guard against memory issues by clearing out the big data # structures workspace.close() logging.info('Finishing event %s' % event.id) return workname
def get_rec_plot(eqid, stid, wdir): ws = StreamWorkspace.open(os.path.join(wdir, eqid, 'workspace.h5')) labels = ws.getLabels() labels.remove('unprocessed') station = stid.split('.')[1] r_st = ws.getStreams(eqid, stations=[station], labels=['unprocessed'])[0] if labels: p_st = ws.getStreams(eqid, stations=[station], labels=[labels[0]])[0] v_st = p_st.copy().integrate() fig = make_subplots(rows=5, cols=len(r_st)) shapes = [] for tr_idx, r_tr in enumerate(r_st): p_tr, v_tr = p_st[tr_idx], v_st[tr_idx] x_start = p_tr.stats.starttime - r_tr.stats.starttime x_end = p_tr.stats.endtime - r_tr.stats.starttime sig_dict = p_tr.getCached('signal_spectrum') noi_dict = p_tr.getCached('noise_spectrum') sig_dict_s = p_tr.getCached('smooth_signal_spectrum') noi_dict_s = p_tr.getCached('smooth_noise_spectrum') snr_dict = p_tr.getCached('snr') sig_spec, sig_freq = sig_dict['spec'], sig_dict['freq'] noi_spec, noi_freq = noi_dict['spec'], noi_dict['freq'] sig_spec_s, sig_freq_s = sig_dict_s['spec'], sig_dict_s['freq'] noi_spec_s, noi_freq_s = noi_dict_s['spec'], noi_dict_s['freq'] snr, snr_freq = snr_dict['snr'], snr_dict['freq'] snr_conf = p_tr.getParameter('snr_conf') threshold = snr_conf['threshold'] min_freq, max_freq = snr_conf['min_freq'], snr_conf['max_freq'] lp = p_tr.getProvenance('lowpass_filter')[0]['corner_frequency'] hp = p_tr.getProvenance('highpass_filter')[0]['corner_frequency'] fit_spectra_dict = p_tr.getParameter('fit_spectra') f0 = fit_spectra_dict['f0'] model_spec = spectrum.model( (fit_spectra_dict['moment'], fit_spectra_dict['stress_drop']), freq=np.array(sig_freq_s), dist=fit_spectra_dict['epi_dist'], kappa=fit_spectra_dict['kappa']) scatter_data = [[r_tr.times(), r_tr.data, 'black', None, 1], [p_tr.times(), p_tr.data, 'black', None, 2], [v_tr.times(), v_tr.data, 'black', None, 3], [sig_freq, sig_spec, 'lightblue', None, 4], [sig_freq_s, sig_spec_s, 'blue', None, 4], [noi_freq, noi_spec, 'salmon', None, 4], [noi_freq_s, noi_spec_s, 'red', None, 4], [sig_freq_s, model_spec, 'black', 'dash', 4], [snr_freq, snr, 'black', None, 5]] line_data = [[ x_start, r_tr.data.min(), x_start, r_tr.data.max(), 'red', 'dash', 1 ], [x_end, r_tr.data.min(), x_end, r_tr.data.max(), 'red', 'dash', 1], [f0, 1e-10, f0, sig_spec.max(), 'black', 'dash', 4], [ snr_freq.min(), threshold, snr_freq.max(), threshold, 'gray', None, 5 ], [min_freq, 1e-3, min_freq, snr.max(), 'gray', None, 5], [max_freq, 1e-3, max_freq, snr.max(), 'gray', None, 5], [lp, 1e-3, lp, snr.max(), 'black', 'dash', 5], [hp, 1e-3, hp, snr.max(), 'black', 'dash', 5]] for scatter in scatter_data: fig.append_trace(go.Scatter(x=scatter[0], y=scatter[1], line=dict(color=scatter[2], dash=scatter[3])), row=scatter[4], col=tr_idx + 1) for line in line_data: i_ref = len(r_st) * (line[6] - 1) + tr_idx + 1 shapes.append({ 'type': 'line', 'x0': line[0], 'y0': line[1], 'x1': line[2], 'y1': line[3], 'xref': 'x%s' % i_ref, 'yref': 'y%s' % i_ref, 'line': { 'color': line[4], 'dash': line[5] } }) fig.update_xaxes(title_text='Time (s)') fig.update_yaxes(row=1, col=1, title_text='Raw counts') fig.update_yaxes(row=2, col=1, title_text='Acceleration (cm/s^2)') fig.update_yaxes(row=3, col=1, title_text='Velocity (cm/s)') fig.update_xaxes(row=4, title_text='Frequency (Hz)', type='log') fig.update_yaxes(row=4, col=1, title_text='Amplitude (cm/s)', type='log') fig.update_yaxes(row=4, type='log') fig.update_xaxes(row=5, title_text='Frequency (Hz)', type='log') fig.update_yaxes(row=5, col=1, title_text='SNR', type='log') fig.update_yaxes(row=5, type='log') fig.update_layout(showlegend=False) fig['layout'].update(shapes=shapes, margin={ 'b': 0, 't': 0, 'l': 0, 'r': 30 }) ws.close() return fig
def _event_station_metrics(self, event): self.eventid = event.id logging.info('Computing station metrics for event %s...' % self.eventid) event_dir = os.path.join(self.gmrecords.data_path, self.eventid) workname = os.path.join(event_dir, WORKSPACE_NAME) if not os.path.isfile(workname): logging.info( 'No workspace file found for event %s. Please run ' 'subcommand \'assemble\' to generate workspace file.' % self.eventid) logging.info('Continuing to next event.') return event.id self.workspace = StreamWorkspace.open(workname) self._get_pstreams() if not (hasattr(self, 'pstreams') and len(self.pstreams) > 0): logging.info('No streams found. Nothing to do. Goodbye.') self.workspace.close() return event.id rupture_file = get_rupture_file(event_dir) origin = Origin({ 'id': self.eventid, 'netid': '', 'network': '', 'lat': event.latitude, 'lon': event.longitude, 'depth': event.depth_km, 'locstring': '', 'mag': event.magnitude, 'time': event.time }) self.origin = origin rupture = get_rupture(origin, rupture_file) sta_lats = [] sta_lons = [] sta_elev = [] self.sta_repi = [] self.sta_rhyp = [] self.sta_baz = [] for st in self.pstreams: sta_lats.append(st[0].stats.coordinates.latitude) sta_lons.append(st[0].stats.coordinates.longitude) sta_elev.append(st[0].stats.coordinates.elevation) geo_tuple = gps2dist_azimuth(st[0].stats.coordinates.latitude, st[0].stats.coordinates.longitude, origin.lat, origin.lon) self.sta_repi.append(geo_tuple[0] / M_PER_KM) self.sta_baz.append(geo_tuple[1]) self.sta_rhyp.append( distance(st[0].stats.coordinates.longitude, st[0].stats.coordinates.latitude, -st[0].stats.coordinates.elevation / M_PER_KM, origin.lon, origin.lat, origin.depth)) if isinstance(rupture, PointRupture): self._get_ps2ff_splines() rjb_hat = self.rjb_spline(self.sta_repi) rjb_mean = rjb_hat[0] rjb_var = rjb_hat[1] rrup_hat = self.rrup_spline(self.sta_repi) rrup_mean = rrup_hat[0] rrup_var = rrup_hat[1] gc2_rx = np.full_like(rjb_mean, np.nan) gc2_ry = np.full_like(rjb_mean, np.nan) gc2_ry0 = np.full_like(rjb_mean, np.nan) gc2_U = np.full_like(rjb_mean, np.nan) gc2_T = np.full_like(rjb_mean, np.nan) else: logging.info('******************************') logging.info('* Found rupture *') logging.info('******************************') sta_lons = np.array(sta_lons) sta_lats = np.array(sta_lats) elev = np.full_like(sta_lons, ELEVATION_FOR_DISTANCE_CALCS) rrup_mean, rrup_var = rupture.computeRrup(sta_lons, sta_lats, elev) rjb_mean, rjb_var = rupture.computeRjb(sta_lons, sta_lats, elev) rrup_var = np.full_like(rrup_mean, np.nan) rjb_var = np.full_like(rjb_mean, np.nan) gc2_dict = rupture.computeGC2(sta_lons, sta_lats, elev) gc2_rx = gc2_dict['rx'] gc2_ry = gc2_dict['ry'] gc2_ry0 = gc2_dict['ry0'] gc2_U = gc2_dict['U'] gc2_T = gc2_dict['T'] # If we don't have a point rupture, then back azimuth needs # to be calculated to the closest point on the rupture self.sta_baz = [] for i in range(len(self.pstreams)): dists = [] bazs = [] for quad in rupture._quadrilaterals: P0, P1, P2, P3 = quad for point in [P0, P1]: dist, az, baz = gps2dist_azimuth( point.y, point.x, sta_lats[i], sta_lons[i]) dists.append(dist) bazs.append(baz) self.sta_baz.append(bazs[np.argmin(dists)]) for i, stream in enumerate(self.pstreams): logging.info('Calculating station metrics for %s...' % stream.get_id()) summary = StationSummary.from_config(stream, event=event, config=self.gmrecords.conf, calc_waveform_metrics=False, calc_station_metrics=False, rupture=rupture, vs30_grids=self.vs30_grids) summary._distances = { 'epicentral': self.sta_repi[i], 'hypocentral': self.sta_rhyp[i], 'rupture': rrup_mean[i], 'rupture_var': rrup_var[i], 'joyner_boore': rjb_mean[i], 'joyner_boore_var': rjb_var[i], 'gc2_rx': gc2_rx[i], 'gc2_ry': gc2_ry[i], 'gc2_ry0': gc2_ry0[i], 'gc2_U': gc2_U[i], 'gc2_T': gc2_T[i] } summary._back_azimuth = self.sta_baz[i] if self.vs30_grids is not None: for vs30_name in self.vs30_grids.keys(): tmpgrid = self.vs30_grids[vs30_name] summary._vs30[vs30_name] = { 'value': tmpgrid['grid_object'].getValue( float(sta_lats[i]), float(sta_lons[i])), 'column_header': tmpgrid['column_header'], 'readme_entry': tmpgrid['readme_entry'], 'units': tmpgrid['units'] } xmlstr = summary.get_station_xml() metricpath = '/'.join([ format_netsta(stream[0].stats), format_nslit(stream[0].stats, stream.get_inst(), self.eventid) ]) self.workspace.insert_aux(xmlstr, 'StationMetrics', metricpath, overwrite=self.gmrecords.args.overwrite) logging.info('Added station metrics to workspace files ' 'with tag \'%s\'.' % self.gmrecords.args.label) self.workspace.close() return event.id
def test_workspace(): eventid = 'us1000778i' datafiles, event = read_data_dir('geonet', eventid, '*.V1A') tdir = tempfile.mkdtemp() try: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=H5pyDeprecationWarning) warnings.filterwarnings("ignore", category=YAMLLoadWarning) warnings.filterwarnings("ignore", category=FutureWarning) config = get_config() tfile = os.path.join(tdir, 'test.hdf') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace(tfile) t1 = time.time() workspace.addStreams(event, raw_streams, label='raw') t2 = time.time() print('Adding %i streams took %.2f seconds' % (len(raw_streams), (t2 - t1))) str_repr = workspace.__repr__() assert str_repr == 'Events: 1 Stations: 3 Streams: 3' eventobj = workspace.getEvent(eventid) assert eventobj.origins[0].latitude == event.origins[0].latitude assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag stations = workspace.getStations() assert sorted(stations) == ['hses', 'thz', 'wtmc'] stations = workspace.getStations(eventid=eventid) assert sorted(stations) == ['hses', 'thz', 'wtmc'] # test retrieving tags for an event that doesn't exist try: workspace.getStreamTags('foo') except KeyError: assert 1 == 1 # test retrieving event that doesn't exist try: workspace.getEvent('foo') except KeyError: assert 1 == 1 instream = None for stream in raw_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: assert 1 == 2 outstream = workspace.getStreams(eventid, stations=['hses'], labels=['raw'])[0] compare_streams(instream, outstream) label_summary = workspace.summarizeLabels() assert label_summary.iloc[0]['Label'] == 'raw' assert label_summary.iloc[0]['Software'] == 'gmprocess' sc = StreamCollection(raw_streams) processed_streams = process_streams(sc, event, config=config) workspace.addStreams(event, processed_streams, 'processed') idlist = workspace.getEventIds() assert idlist[0] == eventid event_tags = workspace.getStreamTags(eventid) assert sorted(event_tags) == ['hses_processed', 'hses_raw', 'thz_processed', 'thz_raw', 'wtmc_processed', 'wtmc_raw'] outstream = workspace.getStreams(eventid, stations=['hses'], labels=['processed'])[0] provenance = workspace.getProvenance(eventid, labels=['processed']) first_row = pd.Series({'Record': 'NZ.HSES.HN1', 'Processing Step': 'Remove Response', 'Step Attribute': 'input_units', 'Attribute Value': 'counts'}) last_row = pd.Series({'Record': 'NZ.WTMC.HNZ', 'Processing Step': 'Lowpass Filter', 'Step Attribute': 'number_of_passes', 'Attribute Value': 2}) assert provenance.iloc[0].equals(first_row) assert provenance.iloc[-1].equals(last_row) # compare the parameters from the input processed stream # to it's output equivalent instream = None for stream in processed_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: assert 1 == 2 compare_streams(instream, outstream) workspace.close() # read in data from a second event and stash it in the workspace eventid = 'nz2018p115908' datafiles, event = read_data_dir('geonet', eventid, '*.V2A') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace.open(tfile) workspace.addStreams(event, raw_streams, label='foo') stations = workspace.getStations(eventid) eventids = workspace.getEventIds() assert eventids == ['us1000778i', 'nz2018p115908'] instation = raw_streams[0][0].stats.station this_stream = workspace.getStreams(eventid, stations=[instation], labels=['foo'])[0] assert instation == this_stream[0].stats.station usid = 'us1000778i' inventory = workspace.getInventory(usid) codes = [station.code for station in inventory.networks[0].stations] assert sorted(codes) == ['HSES', 'THZ', 'WPWS', 'WTMC'] except Exception as e: raise(e) finally: shutil.rmtree(tdir)
def _test_workspace(): eventid = "us1000778i" datafiles, event = read_data_dir("geonet", eventid, "*.V1A") tdir = tempfile.mkdtemp() try: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=H5pyDeprecationWarning) warnings.filterwarnings("ignore", category=YAMLError) warnings.filterwarnings("ignore", category=FutureWarning) config = update_config( os.path.join(datadir, "config_min_freq_0p2.yml")) tfile = os.path.join(tdir, "test.hdf") raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace(tfile) t1 = time.time() workspace.addStreams(event, raw_streams, label="raw") t2 = time.time() print("Adding %i streams took %.2f seconds" % (len(raw_streams), (t2 - t1))) str_repr = workspace.__repr__() assert str_repr == "Events: 1 Stations: 3 Streams: 3" eventobj = workspace.getEvent(eventid) assert eventobj.origins[0].latitude == event.origins[0].latitude assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag stations = workspace.getStations() assert sorted(stations) == ["HSES", "THZ", "WTMC"] stations = workspace.getStations() assert sorted(stations) == ["HSES", "THZ", "WTMC"] # test retrieving event that doesn't exist with pytest.raises(KeyError): workspace.getEvent("foo") instream = None for stream in raw_streams: if stream[0].stats.station.lower() == "hses": instream = stream break if instream is None: raise ValueError("Instream should not be none.") outstream = workspace.getStreams(eventid, stations=["HSES"], labels=["raw"])[0] compare_streams(instream, outstream) label_summary = workspace.summarizeLabels() assert label_summary.iloc[0]["Label"] == "raw" assert label_summary.iloc[0]["Software"] == "gmprocess" sc = StreamCollection(raw_streams) processed_streams = process_streams(sc, event, config=config) workspace.addStreams(event, processed_streams, "processed") idlist = workspace.getEventIds() assert idlist[0] == eventid outstream = workspace.getStreams(eventid, stations=["HSES"], labels=["processed"])[0] provenance = workspace.getProvenance(eventid, labels=["processed"]) first_row = pd.Series({ "Record": "NZ.HSES.--.HN1_us1000778i_processed", "Processing Step": "Remove Response", "Step Attribute": "input_units", "Attribute Value": "counts", }) last_row = pd.Series({ "Record": "NZ.WTMC.--.HNZ_us1000778i_processed", "Processing Step": "Lowpass Filter", "Step Attribute": "number_of_passes", "Attribute Value": 2, }) assert provenance.iloc[0].equals(first_row) assert provenance.iloc[-1].equals(last_row) # compare the parameters from the input processed stream # to it's output equivalent instream = None for stream in processed_streams: if stream[0].stats.station.lower() == "hses": instream = stream break if instream is None: raise ValueError("Instream should not be none.") compare_streams(instream, outstream) workspace.close() # read in data from a second event and stash it in the workspace eventid = "nz2018p115908" datafiles, event = read_data_dir("geonet", eventid, "*.V2A") raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace.open(tfile) workspace.addStreams(event, raw_streams, label="foo") stations = workspace.getStations() eventids = workspace.getEventIds() assert eventids == ["us1000778i", "nz2018p115908"] instation = raw_streams[0][0].stats.station this_stream = workspace.getStreams(eventid, stations=[instation], labels=["foo"])[0] assert instation == this_stream[0].stats.station usid = "us1000778i" inventory = workspace.getInventory(usid) workspace.close() codes = [ station.code for station in inventory.networks[0].stations ] assert sorted(set(codes)) == ["HSES", "THZ", "WPWS", "WTMC"] except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def _test_workspace(): eventid = 'us1000778i' datafiles, event = read_data_dir('geonet', eventid, '*.V1A') tdir = tempfile.mkdtemp() try: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=H5pyDeprecationWarning) warnings.filterwarnings("ignore", category=YAMLLoadWarning) warnings.filterwarnings("ignore", category=FutureWarning) config = update_config( os.path.join(datadir, 'config_min_freq_0p2.yml')) tfile = os.path.join(tdir, 'test.hdf') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace(tfile) t1 = time.time() workspace.addStreams(event, raw_streams, label='raw') t2 = time.time() print('Adding %i streams took %.2f seconds' % (len(raw_streams), (t2 - t1))) str_repr = workspace.__repr__() assert str_repr == 'Events: 1 Stations: 3 Streams: 3' eventobj = workspace.getEvent(eventid) assert eventobj.origins[0].latitude == event.origins[0].latitude assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag stations = workspace.getStations() assert sorted(stations) == ['HSES', 'THZ', 'WTMC'] stations = workspace.getStations(eventid=eventid) assert sorted(stations) == ['HSES', 'THZ', 'WTMC'] # test retrieving event that doesn't exist with pytest.raises(KeyError): workspace.getEvent('foo') instream = None for stream in raw_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: raise ValueError('Instream should not be none.') outstream = workspace.getStreams(eventid, stations=['HSES'], labels=['raw'])[0] compare_streams(instream, outstream) label_summary = workspace.summarizeLabels() assert label_summary.iloc[0]['Label'] == 'raw' assert label_summary.iloc[0]['Software'] == 'gmprocess' sc = StreamCollection(raw_streams) processed_streams = process_streams(sc, event, config=config) workspace.addStreams(event, processed_streams, 'processed') idlist = workspace.getEventIds() assert idlist[0] == eventid outstream = workspace.getStreams(eventid, stations=['HSES'], labels=['processed'])[0] provenance = workspace.getProvenance(eventid, labels=['processed']) first_row = pd.Series({ 'Record': 'NZ.HSES.--.HN1_us1000778i_processed', 'Processing Step': 'Remove Response', 'Step Attribute': 'input_units', 'Attribute Value': 'counts' }) last_row = pd.Series({ 'Record': 'NZ.WTMC.--.HNZ_us1000778i_processed', 'Processing Step': 'Lowpass Filter', 'Step Attribute': 'number_of_passes', 'Attribute Value': 2 }) assert provenance.iloc[0].equals(first_row) assert provenance.iloc[-1].equals(last_row) # compare the parameters from the input processed stream # to it's output equivalent instream = None for stream in processed_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: raise ValueError('Instream should not be none.') compare_streams(instream, outstream) workspace.close() # read in data from a second event and stash it in the workspace eventid = 'nz2018p115908' datafiles, event = read_data_dir('geonet', eventid, '*.V2A') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace.open(tfile) workspace.addStreams(event, raw_streams, label='foo') stations = workspace.getStations(eventid) eventids = workspace.getEventIds() assert eventids == ['us1000778i', 'nz2018p115908'] instation = raw_streams[0][0].stats.station this_stream = workspace.getStreams(eventid, stations=[instation], labels=['foo'])[0] assert instation == this_stream[0].stats.station usid = 'us1000778i' inventory = workspace.getInventory(usid) workspace.close() codes = [ station.code for station in inventory.networks[0].stations ] assert sorted(set(codes)) == ['HSES', 'THZ', 'WPWS', 'WTMC'] except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def main(self, gmrecords): """Export failure tables. Args: gmrecords: GMrecordsApp instance. """ logging.info('Running subcommand \'%s\'' % self.command_name) self.gmrecords = gmrecords self._get_events() failures = {} for event in self.events: self.eventid = event.id logging.info('Creating failure tables for event %s...' % self.eventid) event_dir = os.path.join(self.gmrecords.data_path, self.eventid) workname = os.path.join(event_dir, WORKSPACE_NAME) if not os.path.isfile(workname): logging.info( 'No workspace file found for event %s. Please run ' 'subcommand \'assemble\' to generate workspace file.' % self.eventid) logging.info('Continuing to next event.') continue self.workspace = StreamWorkspace.open(workname) self._get_pstreams() self.workspace.close() if not hasattr(self, 'pstreams'): logging.info('No processed waveforms available. No failure ' 'tables created.') return status_info = self.pstreams.get_status(self.gmrecords.args.type) failures[event.id] = status_info base_file_name = os.path.join( event_dir, '%s_%s_failure_reasons_%s' % (gmrecords.project, gmrecords.args.label, self.gmrecords.args.type)) if self.gmrecords.args.output_format == 'csv': csvfile = base_file_name + '.csv' self.append_file('Failure table', csvfile) status_info.to_csv(csvfile) else: excelfile = base_file_name + '.xlsx' self.append_file('Failure table', excelfile) status_info.to_excel(excelfile) if failures: comp_failures_path = os.path.join( self.gmrecords.data_path, '%s_%s_complete_failures.csv' % (gmrecords.project, gmrecords.args.label)) if self.gmrecords.args.type == 'long': for idx, item in enumerate(failures.items()): eqid, status = item status = pd.DataFrame(status) status['EarthquakeId'] = eqid if idx == 0: status.to_csv(comp_failures_path, mode='w') else: status.to_csv(comp_failures_path, mode='a', header=False) else: df_failures = pd.concat(failures.values()) df_failures = df_failures.groupby(df_failures.index).sum() df_failures.to_csv(comp_failures_path) self.append_file('Complete failures', comp_failures_path) self._summarize_files_created()
def test_workspace(): eventid = 'us1000778i' datafiles, origin = read_data_dir('geonet', eventid, '*.V1A') event = get_event_object(origin) tdir = tempfile.mkdtemp() try: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=H5pyDeprecationWarning) warnings.filterwarnings("ignore", category=YAMLLoadWarning) warnings.filterwarnings("ignore", category=FutureWarning) config = get_config() tfile = os.path.join(tdir, 'test.hdf') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace(tfile) t1 = time.time() workspace.addStreams(event, raw_streams, label='raw') t2 = time.time() print('Adding %i streams took %.2f seconds' % (len(raw_streams), (t2 - t1))) str_repr = workspace.__repr__() assert str_repr == 'Events: 1 Stations: 3 Streams: 3' eventobj = workspace.getEvent(eventid) assert eventobj.origins[0].latitude == event.origins[0].latitude assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag stations = workspace.getStations() assert sorted(stations) == ['hses', 'thz', 'wtmc'] stations = workspace.getStations(eventid=eventid) assert sorted(stations) == ['hses', 'thz', 'wtmc'] # test retrieving tags for an event that doesn't exist try: workspace.getStreamTags('foo') except KeyError: assert 1 == 1 # test retrieving event that doesn't exist try: workspace.getEvent('foo') except KeyError: assert 1 == 1 instream = None for stream in raw_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: assert 1 == 2 outstream = workspace.getStreams(eventid, stations=['hses'], labels=['raw'])[0] compare_streams(instream, outstream) label_summary = workspace.summarizeLabels() assert label_summary.iloc[0]['Label'] == 'raw' assert label_summary.iloc[0]['Software'] == 'gmprocess' sc = StreamCollection(raw_streams) processed_streams = process_streams(sc, origin, config=config) workspace.addStreams(event, processed_streams, 'processed') idlist = workspace.getEventIds() assert idlist[0] == eventid event_tags = workspace.getStreamTags(eventid) assert sorted(event_tags) == [ 'hses_processed', 'hses_raw', 'thz_processed', 'thz_raw', 'wtmc_processed', 'wtmc_raw' ] outstream = workspace.getStreams(eventid, stations=['hses'], labels=['processed'])[0] provenance = workspace.getProvenance(eventid, labels=['processed']) first_row = pd.Series({ 'Record': 'NZ.HSES.HN1', 'Processing Step': 'Remove Response', 'Step Attribute': 'input_units', 'Attribute Value': 'counts' }) last_row = pd.Series({ 'Record': 'NZ.WTMC.HNZ', 'Processing Step': 'Detrend', 'Step Attribute': 'detrending_method', 'Attribute Value': 'baseline_sixth_order' }) assert provenance.iloc[0].equals(first_row) assert provenance.iloc[-1].equals(last_row) # compare the parameters from the input processed stream # to it's output equivalent instream = None for stream in processed_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: assert 1 == 2 compare_streams(instream, outstream) workspace.close() # read in data from a second event and stash it in the workspace eventid = 'nz2018p115908' datafiles, origin = read_data_dir('geonet', eventid, '*.V2A') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) event = get_event_object(origin) workspace = StreamWorkspace.open(tfile) workspace.addStreams(event, raw_streams, label='foo') stations = workspace.getStations(eventid) eventids = workspace.getEventIds() assert eventids == ['us1000778i', 'nz2018p115908'] instation = raw_streams[0][0].stats.station this_stream = workspace.getStreams(eventid, stations=[instation], labels=['foo'])[0] assert instation == this_stream[0].stats.station # set and retrieve waveform metrics in the file imclist = [ 'greater_of_two_horizontals', 'channels', 'rotd50', 'rotd100' ] imtlist = ['sa1.0', 'PGA', 'pgv', 'fas2.0', 'arias'] usid = 'us1000778i' tags = workspace.getStreamTags(usid) workspace.setStreamMetrics(eventid, labels=['foo'], imclist=imclist, imtlist=imtlist) summary = workspace.getStreamMetrics(eventid, instation, 'foo') summary_series = summary.toSeries()['ARIAS'] cmpseries = pd.Series({ 'GEOMETRIC_MEAN': np.NaN, 'GREATER_OF_TWO_HORIZONTALS': 0.0005, 'HN1': 0.0001, 'HN2': 0.0005, 'HNZ': 0.0000, 'ROTD100.0': 0.0005, 'ROTD50.0': 0.0003 }) assert cmpseries.equals(summary_series) workspace.setStreamMetrics(usid, labels=['processed']) df = workspace.getMetricsTable(usid, labels=['processed']) cmpdict = { 'GREATER_OF_TWO_HORIZONTALS': [26.8906, 4.9415, 94.6646], 'HN1': [24.5105, 4.9415, 94.6646], 'HN2': [26.8906, 4.0758, 86.7877], 'HNZ': [16.0941, 2.5401, 136.7054] } cmpframe = pd.DataFrame(cmpdict) assert df['PGA'].equals(cmpframe) inventory = workspace.getInventory(usid) codes = [ station.code for station in inventory.networks[0].stations ] assert sorted(codes) == ['HSES', 'THZ', 'WPWS', 'WTMC'] except Exception as e: raise (e) finally: shutil.rmtree(tdir)