Beispiel #1
0
    def _process_event(self, event):
        event_dir = os.path.join(self.gmrecords.data_path, event.id)
        workname = os.path.join(event_dir, WORKSPACE_NAME)
        if not os.path.isfile(workname):
            logging.info('No workspace file found for event %s. Please run '
                         'subcommand \'assemble\' to generate workspace file.')
            logging.info('Continuing to next event.')
            return event.id

        workspace = StreamWorkspace.open(workname)
        rstreams = workspace.getStreams(event.id,
                                        labels=['unprocessed'],
                                        config=self.gmrecords.conf)

        if len(rstreams):
            logging.info('Processing \'%s\' streams for event %s...' %
                         ('unprocessed', event.id))
            pstreams = process_streams(rstreams,
                                       event,
                                       config=self.gmrecords.conf)
            workspace.addStreams(event, pstreams, label=self.process_tag)
        else:
            logging.info('No streams found. Nothing to do. Goodbye.')

        workspace.close()
        return event.id
Beispiel #2
0
def get_sample_data(volume):
    thisdir = pathlib.Path(__file__).parent
    datafile = (thisdir / ".." / ".." / ".." / ".." / "gmprocess" / "data" /
                "testdata" / "asdf" / "nc71126864" / "workspace.h5")
    workspace = StreamWorkspace.open(datafile)
    t1 = time.time()
    eventid = workspace.getEventIds()[0]
    t2 = time.time()
    print(f"{t2-t1:.2f} seconds to read eventid")
    scalar_event = workspace.getEvent(eventid)

    station = "CE.79435"
    labels = workspace.getLabels()
    if volume == Volume.RAW:
        labels.remove("default")
    elif volume == Volume.CONVERTED:
        labels.remove("default")
    else:
        labels.remove("unprocessed")
    plabel = labels[0]
    streams = workspace.getStreams(eventid,
                                   stations=[station],
                                   labels=[plabel])
    gmprocess_version = workspace.getGmprocessVersion()
    idx = gmprocess_version.find(".dev")
    gmprocess_version = gmprocess_version[0:idx]
    stream = streams[0]
    trace = stream[0]
    workspace.close()
    return (trace, eventid, scalar_event, stream, gmprocess_version)
Beispiel #3
0
def generate_workspace():
    """Generate simple HDF5 with ASDF layout for testing.
    """
    PCOMMANDS = [
        'assemble',
        'process',
    ]
    EVENTID = 'us1000778i'
    LABEL = 'ptest'
    datafiles, event = read_data_dir('geonet', EVENTID, '*.V1A')

    tdir = tempfile.mkdtemp()
    tfilename = os.path.join(tdir, 'workspace.h5')

    raw_data = []
    for dfile in datafiles:
        raw_data += read_data(dfile)
    write_asdf(tfilename, raw_data, event, label="unprocessed")
    del raw_data

    config = get_config()
    workspace = StreamWorkspace.open(tfilename)
    raw_streams = workspace.getStreams(EVENTID, labels=['unprocessed'])
    pstreams = process_streams(raw_streams, event, config=config)
    workspace.addStreams(event, pstreams, label=LABEL)
    workspace.calcMetrics(event.id, labels=[LABEL], config=config)

    return tfilename
Beispiel #4
0
def get_eq_full_status_df(wdir, eqid, imc):
    ws = StreamWorkspace.open(os.path.join(wdir, eqid, 'workspace.h5'))
    labels = ws.getLabels()
    labels.remove('unprocessed')
    processed_label = labels[0]
    sc = ws.getStreams(ws.getEventIds()[0], labels=[processed_label])
    ws.close()

    rows = []
    for st in sc:
        coords = st[0].stats.coordinates
        row = [st.id, coords.latitude, coords.longitude]
        if st.passed:
            row.append('Passed')
        else:
            for tr in st:
                if tr.hasParameter('failure'):
                    row.append(tr.getParameter('failure')['reason'])
                    break
        rows.append(row)
    df = pd.DataFrame(rows,
                      columns=[
                          'StationID', 'StationLatitude', 'StationLongitude',
                          'Failure reason'
                      ])
    return df
Beispiel #5
0
def generate_workspace():
    """Generate simple HDF5 with ASDF layout for testing."""
    PCOMMANDS = [
        "assemble",
        "process",
    ]
    EVENTID = "us1000778i"
    LABEL = "ptest"
    datafiles, event = read_data_dir("geonet", EVENTID, "*.V1A")

    tdir = tempfile.mkdtemp()
    tfilename = os.path.join(tdir, "workspace.h5")

    raw_data = []
    for dfile in datafiles:
        raw_data += read_data(dfile)
    write_asdf(tfilename, raw_data, event, label="unprocessed")
    del raw_data

    config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml"))

    workspace = StreamWorkspace.open(tfilename)
    raw_streams = workspace.getStreams(EVENTID,
                                       labels=["unprocessed"],
                                       config=config)
    pstreams = process_streams(raw_streams, event, config=config)
    workspace.addStreams(event, pstreams, label=LABEL)
    workspace.calcMetrics(event.id, labels=[LABEL], config=config)

    return tfilename
 def __init__(self,
              cosmos_directory,
              h5_filename,
              volume=Volume.PROCESSED,
              label=None):
     if volume == Volume.PROCESSED and label is None:
         raise Exception("Must supply label for processed data")
     self._workspace = StreamWorkspace.open(h5_filename)
     self._cosmos_directory = pathlib.Path(cosmos_directory)
     self._volume = volume
     self._label = label
Beispiel #7
0
    def main(self, gmrecords):
        """Export provenance tables.

        Args:
            gmrecords:
                GMrecordsApp instance.
        """
        logging.info('Running subcommand \'%s\'' % self.command_name)

        self.gmrecords = gmrecords
        self._check_arguments()
        self._get_events()

        for event in self.events:
            self.eventid = event.id
            logging.info('Creating provenance tables for event %s...' %
                         self.eventid)
            event_dir = os.path.join(gmrecords.data_path, self.eventid)
            workname = os.path.join(event_dir, WORKSPACE_NAME)
            if not os.path.isfile(workname):
                logging.info(
                    'No workspace file found for event %s. Please run '
                    'subcommand \'assemble\' to generate workspace file.' %
                    self.eventid)
                logging.info('Continuing to next event.')
                continue

            self.workspace = StreamWorkspace.open(workname)
            self._get_pstreams()

            if not (hasattr(self, 'pstreams') and len(self.pstreams) > 0):
                logging.info('No processed waveforms available. No provenance '
                             'tables created.')
                self.workspace.close()
                continue

            provdata = self.workspace.getProvenance(
                self.eventid, labels=self.gmrecords.args.label)
            self.workspace.close()

            basename = '%s_%s_provenance' % (gmrecords.project,
                                             gmrecords.args.label)
            if gmrecords.args.output_format == 'csv':
                csvfile = os.path.join(event_dir, '%s.csv' % basename)
                self.append_file('Provenance', csvfile)
                provdata.to_csv(csvfile, index=False)
            else:
                excelfile = os.path.join(event_dir, '%s.xlsx' % basename)
                self.append_file('Provenance', excelfile)
                provdata.to_excel(excelfile, index=False)

        self._summarize_files_created()
    def main(self, gmrecords):
        """Export files for ShakeMap input.

        Args:
            gmrecords:
                GMrecordsApp instance.
        """
        logging.info('Running subcommand \'%s\'' % self.command_name)

        self.gmrecords = gmrecords
        self._get_events()

        for event in self.events:
            self.eventid = event.id
            logging.info('Creating shakemap files for event %s...' %
                         self.eventid)

            event_dir = os.path.join(gmrecords.data_path, event.id)
            workname = os.path.join(event_dir, WORKSPACE_NAME)
            if not os.path.isfile(workname):
                logging.info(
                    'No workspace file found for event %s. Please run '
                    'subcommand \'assemble\' to generate workspace file.' %
                    event.id)
                logging.info('Continuing to next event.')
                continue

            self.workspace = StreamWorkspace.open(workname)
            self._get_labels()
            self._get_pstreams()

            if not hasattr(self, 'pstreams'):
                logging.info('No processed waveforms available. No shakemap '
                             'files created.')
                return

            # TODO: re-write this so that it uses the already computer values
            # in self.workspace.dataset.auxiliary_data.WaveFormMetrics
            # rather than recomputing the metrics from self.pstreams.
            # shakemap_file, jsonfile = save_shakemap_amps(
            #     self.pstreams, event, event_dir)

            jsonfile, stationfile, _ = create_json(self.workspace, event,
                                                   event_dir,
                                                   self.gmrecords.args.label)

            self.workspace.close()
            self.append_file('shakemap', jsonfile)
            self.append_file('shakemap', stationfile)

        self._summarize_files_created()
Beispiel #9
0
    def _compute_event_waveforms(self, event):
        self.eventid = event.id
        logging.info(
            'Computing waveform metrics for event %s...' % self.eventid)
        event_dir = os.path.join(self.gmrecords.data_path, self.eventid)
        workname = os.path.join(event_dir, WORKSPACE_NAME)
        if not os.path.isfile(workname):
            logging.info(
                'No workspace file found for event %s. Please run '
                'subcommand \'assemble\' to generate workspace file.'
                % self.eventid)
            logging.info('Continuing to next event.')
            return event.id

        self.workspace = StreamWorkspace.open(workname)
        self._get_pstreams()

        if not hasattr(self, 'pstreams'):
            logging.info('No processed waveforms available. No waveform '
                         'metrics computed.')
            self.workspace.close()
            return event.id

        for stream in self.pstreams:
            if stream.passed:
                logging.info(
                    'Calculating waveform metrics for %s...'
                    % stream.get_id()
                )
                summary = StationSummary.from_config(
                    stream, event=event, config=self.gmrecords.conf,
                    calc_waveform_metrics=True,
                    calc_station_metrics=False
                )
                xmlstr = summary.get_metric_xml()
                tag = stream.tag
                metricpath = '/'.join([
                    format_netsta(stream[0].stats),
                    format_nslit(stream[0].stats, stream.get_inst(), tag)
                ])
                self.workspace.insert_aux(
                    xmlstr, 'WaveFormMetrics', metricpath,
                    overwrite=self.gmrecords.args.overwrite)
            logging.info('Added waveform metrics to workspace files '
                         'with tag \'%s\'.' % self.gmrecords.args.label)

        self.workspace.close()
        return event.id
    def main(self, gmrecords):
        """Export files for ShakeMap input.

        Args:
            gmrecords:
                GMrecordsApp instance.
        """
        logging.info('Running subcommand \'%s\'' % self.command_name)

        self.gmrecords = gmrecords
        self._check_arguments()
        self._get_events()

        for event in self.events:
            self.eventid = event.id
            logging.info('Creating shakemap files for event %s...' %
                         self.eventid)

            event_dir = os.path.join(gmrecords.data_path, event.id)
            workname = os.path.join(event_dir, WORKSPACE_NAME)
            if not os.path.isfile(workname):
                logging.info(
                    'No workspace file found for event %s. Please run '
                    'subcommand \'assemble\' to generate workspace file.' %
                    event.id)
                logging.info('Continuing to next event.')
                continue

            self.workspace = StreamWorkspace.open(workname)
            self._get_labels()

            expanded_imts = self.gmrecords.args.expand_imts
            jsonfile, stationfile, _ = create_json(self.workspace,
                                                   event,
                                                   event_dir,
                                                   self.gmrecords.args.label,
                                                   config=self.gmrecords.conf,
                                                   expanded_imts=expanded_imts)

            self.workspace.close()
            if jsonfile is not None:
                self.append_file('shakemap', jsonfile)
            if stationfile is not None:
                self.append_file('shakemap', stationfile)

        self._summarize_files_created()
def test_get_shakemap():
    tdir = tempfile.mkdtemp()
    try:
        thisdir = pathlib.Path(__file__).parent
        datadir = (thisdir / '..' / '..' / '..' / 'gmprocess' / 'data' /
                   'testdata')
        datafile = datadir / 'workspace_ci38457511.hdf'

        workspace = StreamWorkspace.open(datafile)
        eventid = workspace.getEventIds()[0]
        event = workspace.getEvent(eventid)
        label = '20201209195000'
        processed = workspace.getStreams(eventid, labels=[label])

        excelfile, jsonfile = save_shakemap_amps(processed, event, tdir)
        with open(jsonfile, 'rt', encoding='utf-8') as fp:
            jdict = json.load(fp)
        assert jdict['features'][0]['id'] == 'CJ.T001230'

    except Exception as e:
        raise AssertionError(str(e))
    finally:
        shutil.rmtree(tdir)
Beispiel #12
0
    def main(self, gmrecords):
        """Generate summary report.

        This function generates summary plots and then combines them into a
        report with latex. If latex (specifically `pdflatex`) is not found on
        the system then the PDF report will not be generated but the
        constituent plots will be available.

        Args:
            gmrecords:
                GMrecordsApp instance.
        """
        logging.info('Running subcommand \'%s\'' % self.command_name)

        self.gmrecords = gmrecords
        self._get_events()

        for event in self.events:
            self.eventid = event.id
            event_dir = os.path.join(gmrecords.data_path, self.eventid)
            workname = os.path.join(event_dir, WORKSPACE_NAME)
            if not os.path.isfile(workname):
                logging.info(
                    'No workspace file found for event %s. Please run '
                    'subcommand \'assemble\' to generate workspace file.' %
                    self.eventid)
                logging.info('Continuing to next event.')
                continue

            self.workspace = StreamWorkspace.open(workname)
            self._get_pstreams()
            self.workspace.close()

            if not hasattr(self, 'pstreams'):
                logging.info('No processed waveforms available. No report '
                             'generated.')
                return

            logging.info('Creating diagnostic plots for event %s...' %
                         self.eventid)
            plot_dir = os.path.join(event_dir, 'plots')
            if not os.path.isdir(plot_dir):
                os.makedirs(plot_dir)
            for stream in self.pstreams:
                summary_plots(stream, plot_dir, event)

            mapfile = draw_stations_map(self.pstreams, event, event_dir)
            moveoutfile = os.path.join(event_dir, 'moveout_plot.png')
            plot_moveout(self.pstreams,
                         event.latitude,
                         event.longitude,
                         file=moveoutfile)
            self.append_file('Station map', mapfile)
            self.append_file('Moveout plot', moveoutfile)

            logging.info('Generating summary report for event %s...' %
                         self.eventid)

            build_conf = gmrecords.conf['build_report']
            report_format = build_conf['format']
            if report_format == 'latex':
                report_file, success = build_report_latex(
                    self.pstreams,
                    event_dir,
                    event,
                    prefix="%s_%s" % (gmrecords.project, gmrecords.args.label),
                    config=gmrecords.conf)
            else:
                report_file = ''
                success = False
            if os.path.isfile(report_file) and success:
                self.append_file('Summary report', report_file)

        self._summarize_files_created()
    def main(self, gmrecords):
        """Export metric tables.

        Args:
            gmrecords:
                GMrecordsApp instance.
        """
        logging.info('Running subcommand \'%s\'' % self.command_name)

        self.gmrecords = gmrecords
        self._get_events()

        for event in self.events:
            self.eventid = event.id
            logging.info('Creating tables for event %s...' % self.eventid)
            event_dir = os.path.join(gmrecords.data_path, self.eventid)
            workname = os.path.join(event_dir, WORKSPACE_NAME)
            if not os.path.isfile(workname):
                logging.info(
                    'No workspace file found for event %s. Please run '
                    'subcommand \'assemble\' to generate workspace file.' %
                    self.eventid)
                logging.info('Continuing to next event.')
                continue

            self.workspace = StreamWorkspace.open(workname)
            self._get_pstreams()

            if not hasattr(self, 'pstreams'):
                logging.info('No processed waveforms available. No metric '
                             'tables created.')
                self.workspace.close()
                return

            event_table, imc_tables, readmes = self.workspace.getTables(
                self.gmrecords.args.label, streams=self.pstreams)
            ev_fit_spec, fit_readme = self.workspace.getFitSpectraTable(
                self.eventid, self.gmrecords.args.label, self.pstreams)
            self.workspace.close()

            outdir = gmrecords.data_path

            # Set the precisions for the imc tables, event table, and
            # fit_spectra table before writing
            imc_tables_formatted = {}
            for imc, imc_table in imc_tables.items():
                imc_tables_formatted[imc] = set_precisions(imc_table)
            event_table_formatted = set_precisions(event_table)
            df_fit_spectra_formatted = set_precisions(ev_fit_spec)

            imc_list = [
                '%s_%s_metrics_%s' %
                (gmrecords.project, gmrecords.args.label, imc.lower())
                for imc in imc_tables_formatted.keys()
            ]
            readme_list = [
                '%s_%s_metrics_%s_README' %
                (gmrecords.project, gmrecords.args.label, imc.lower())
                for imc in readmes.keys()
            ]
            proj_lab = (gmrecords.project, gmrecords.args.label)

            event_filename = ['%s_%s_events' % proj_lab]
            filenames = event_filename + imc_list + readme_list + [
                '%s_%s_fit_spectra_parameters' % proj_lab,
                '%s_%s_fit_spectra_parameters_README' % proj_lab
            ]

            files = [event_table_formatted] + list(
                imc_tables_formatted.values()) + list(
                    readmes.values()) + [df_fit_spectra_formatted, fit_readme]

            output_format = gmrecords.args.output_format
            if output_format != 'csv':
                output_format = 'xlsx'

            for filename, df in dict(zip(filenames, files)).items():
                filepath = os.path.join(outdir,
                                        filename + '.%s' % output_format)
                if os.path.exists(filepath):
                    if 'README' in filename:
                        continue
                    else:
                        if self.gmrecords.args.overwrite:
                            logging.warning('File exists: %s' % filename)
                            logging.warning('Overwriting file: %s' % filename)
                            mode = 'w'
                            header = True
                        else:
                            logging.warning('File exists: %s' % filename)
                            logging.warning('Appending to file: %s' % filename)
                            mode = 'a'
                            header = False
                else:
                    mode = 'w'
                    header = True
                if output_format == 'csv':
                    df.to_csv(filepath,
                              index=False,
                              float_format=DEFAULT_FLOAT_FORMAT,
                              na_rep=DEFAULT_NA_REP,
                              mode=mode,
                              header=header)
                    if mode == "w":
                        self.append_file('Metric tables', filepath)
                else:
                    df.to_excel(filepath,
                                index=False,
                                float_format=DEFAULT_FLOAT_FORMAT,
                                na_rep=DEFAULT_NA_REP,
                                mode=mode,
                                header=header)
                    if mode == "w":
                        self.append_file('Metric tables', filepath)

        self._summarize_files_created()
Beispiel #14
0
    def _event_station_metrics(self, event):
        self.eventid = event.id
        logging.info('Computing station metrics for event %s...' %
                     self.eventid)
        event_dir = os.path.join(self.gmrecords.data_path, self.eventid)
        workname = os.path.join(event_dir, WORKSPACE_NAME)
        if not os.path.isfile(workname):
            logging.info(
                'No workspace file found for event %s. Please run '
                'subcommand \'assemble\' to generate workspace file.' %
                self.eventid)
            logging.info('Continuing to next event.')
            return event.id

        self.workspace = StreamWorkspace.open(workname)
        self._get_pstreams()

        rupture_file = get_rupture_file(event_dir)
        origin = Origin({
            'id': self.eventid,
            'netid': '',
            'network': '',
            'lat': event.latitude,
            'lon': event.longitude,
            'depth': event.depth_km,
            'locstring': '',
            'mag': event.magnitude,
            'time': event.time
        })
        rupture = get_rupture(origin, rupture_file)

        if not hasattr(self, 'pstreams'):
            logging.info('No processed waveforms available. No station '
                         'metrics computed.')
            self.workspace.close()
            return

        for stream in self.pstreams:
            logging.info('Calculating station metrics for %s...' %
                         stream.get_id())
            summary = StationSummary.from_config(stream,
                                                 event=event,
                                                 config=self.gmrecords.conf,
                                                 calc_waveform_metrics=False,
                                                 calc_station_metrics=True,
                                                 rupture=rupture,
                                                 vs30_grids=self.vs30_grids)
            xmlstr = summary.get_station_xml()
            metricpath = '/'.join([
                format_netsta(stream[0].stats),
                format_nslit(stream[0].stats, stream.get_inst(), self.eventid)
            ])
            self.workspace.insert_aux(xmlstr,
                                      'StationMetrics',
                                      metricpath,
                                      overwrite=self.gmrecords.args.overwrite)
            logging.info('Added station metrics to workspace files '
                         'with tag \'%s\'.' % self.gmrecords.args.label)

        self.workspace.close()
        return event.id
Beispiel #15
0
def process_event(event,
                  outdir,
                  pcommands,
                  config,
                  input_directory,
                  process_tag,
                  files_created,
                  output_format,
                  status,
                  recompute_metrics,
                  export_dir=None):

    # setup logging to write to the input logfile
    argthing = namedtuple('args', ['debug', 'quiet'])
    args = argthing(debug=True, quiet=False)
    setup_logger(args)

    logger = logging.getLogger()
    stream_handler = logger.handlers[0]
    logfile = os.path.join(outdir, '%s.log' % event.id)
    fhandler = logging.FileHandler(logfile)
    logger.removeHandler(stream_handler)
    logger.addHandler(fhandler)

    event_dir = os.path.join(outdir, event.id)
    if not os.path.exists(event_dir):
        os.makedirs(event_dir)

    workname = os.path.join(event_dir, WORKSPACE_NAME)
    workspace_exists = os.path.isfile(workname)
    workspace_has_processed = False
    workspace = None
    processing_done = False

    if workspace_exists:
        workspace = StreamWorkspace.open(workname)
        labels = workspace.getLabels()
        if len(labels):
            labels.remove('unprocessed')
        elif 'assemble' not in pcommands:
            print('No data in workspace. Please run assemble.')
            sys.exit(1)

        if len(labels) == 1:
            process_tag = labels[0]
            workspace_has_processed = True
        else:
            if 'process' not in pcommands:
                fmt = '\nThere are %i sets of processed data in %s.'
                tpl = (len(labels), workname)
                print(fmt % tpl)
                print(('This software can only handle one set of '
                       'processed data. Exiting.\n'))
                sys.exit(1)

    download_done = False

    # Need to initialize rstreams/pstreams
    rstreams = []
    pstreams = []

    rupture_file = None
    if 'assemble' in pcommands:
        logging.info('Downloading/loading raw streams...')
        workspace, workspace_file, rstreams, rupture_file = download(
            event, event_dir, config, input_directory)

        download_done = True
        append_file(files_created, 'Workspace', workname)

    else:
        if not workspace_exists:
            print('\nYou opted not to download or process from input.')
            print('No previous HDF workspace file could be found.')
            print('Try re-running with the assemble command with or ')
            print('without the --directory option.\n')
            sys.exit(1)
        if 'process' in pcommands:
            logging.info('Getting raw streams from workspace...')
            with warnings.catch_warnings():
                warnings.simplefilter("ignore",
                                      category=H5pyDeprecationWarning)
                rstreams = workspace.getStreams(event.id,
                                                labels=['unprocessed'])
            download_done = True
        else:
            need_processed = set(['report', 'shakemap'])
            need_pstreams = len(need_processed.intersection(pcommands))
            if workspace_has_processed:
                if need_pstreams:
                    logging.info('Getting processed streams from workspace...')
                    with warnings.catch_warnings():
                        warnings.simplefilter("ignore",
                                              category=H5pyDeprecationWarning)
                        pstreams = workspace.getStreams(event.id,
                                                        labels=[process_tag])
                download_done = True
                processing_done = True

    if ('process' in pcommands and download_done and not processing_done
            and len(rstreams)):
        logging.info('Processing raw streams for event %s...' % event.id)
        pstreams = process_streams(rstreams, event, config=config)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=H5pyDeprecationWarning)
            workspace.addStreams(event, pstreams, label=process_tag)
            workspace.calcMetrics(event.id,
                                  labels=[process_tag],
                                  config=config,
                                  streams=pstreams,
                                  stream_label=process_tag,
                                  rupture_file=rupture_file)
        processing_done = True

    if 'export' in pcommands:
        if export_dir is not None:
            if not os.path.isdir(export_dir):
                os.makedirs(export_dir)
            outdir = export_dir

        labels = workspace.getLabels()
        if 'unprocessed' not in labels:
            fmt = ('Workspace file "%s" appears to have no unprocessed '
                   'data. Skipping.')
            logging.info(fmt % workspace_file)
        else:
            labels.remove('unprocessed')
            if not labels:
                fmt = ('Workspace file "%s" appears to have no processed '
                       'data. Skipping.')
                logging.info(fmt % workspace_file)
            else:
                logging.info('Creating tables for event %s...', event.id)
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore",
                                          category=H5pyDeprecationWarning)
                    if recompute_metrics:
                        del workspace.dataset.auxiliary_data.WaveFormMetrics
                        del workspace.dataset.auxiliary_data.StationMetrics
                        workspace.calcMetrics(event.id,
                                              labels=labels,
                                              config=config,
                                              rupture_file=rupture_file)
                    event_table, imc_tables, readmes = workspace.getTables(
                        labels[0], streams=pstreams, stream_label=process_tag)
                    ev_fit_spec, fit_readme = workspace.getFitSpectraTable(
                        event.id, labels[0], pstreams)

                # Set the precisions for the imc tables, event table, and
                # fit_spectra table before writing
                imc_tables_formatted = {}
                for imc, imc_table in imc_tables.items():
                    imc_tables_formatted[imc] = set_precisions(imc_table)
                event_table_formatted = set_precisions(event_table)
                df_fit_spectra_formatted = set_precisions(ev_fit_spec)

                if not os.path.isdir(outdir):
                    os.makedirs(outdir)

                filenames = ['events'] + \
                    [imc.lower() for imc in imc_tables_formatted.keys()] + \
                    [imc.lower() + '_README' for imc in readmes.keys()] + \
                    ['fit_spectra_parameters', 'fit_spectra_parameters_README']

                files = [event_table_formatted] + list(
                    imc_tables_formatted.values()) + list(readmes.values()) + [
                        df_fit_spectra_formatted, fit_readme
                    ]

                if output_format != 'csv':
                    output_format = 'xlsx'

                for filename, df in dict(zip(filenames, files)).items():
                    filepath = os.path.join(outdir,
                                            filename + '.%s' % output_format)
                    if os.path.exists(filepath):
                        if 'README' in filename:
                            continue
                        else:
                            mode = 'a'
                            header = False
                    else:
                        mode = 'w'
                        header = True
                        append_file(files_created, 'Tables', filepath)
                    if output_format == 'csv':
                        df.to_csv(filepath,
                                  index=False,
                                  float_format=DEFAULT_FLOAT_FORMAT,
                                  na_rep=DEFAULT_NA_REP,
                                  mode=mode,
                                  header=header)
                    else:
                        df.to_excel(filepath,
                                    index=False,
                                    float_format=DEFAULT_FLOAT_FORMAT,
                                    na_rep=DEFAULT_NA_REP,
                                    mode=mode,
                                    header=header)

    if ('report' in pcommands and processing_done and len(pstreams)):
        logging.info('Creating diagnostic plots for event %s...' % event.id)
        plot_dir = os.path.join(event_dir, 'plots')
        if not os.path.isdir(plot_dir):
            os.makedirs(plot_dir)
        for stream in pstreams:
            summary_plots(stream, plot_dir, event)

        mapfile = draw_stations_map(pstreams, event, event_dir)
        plot_moveout(pstreams,
                     event.latitude,
                     event.longitude,
                     file=os.path.join(event_dir, 'moveout_plot.png'))

        append_file(files_created, 'Station map', mapfile)
        append_file(files_created, 'Moveout plot', 'moveout_plot.png')

        logging.info('Creating diagnostic report for event %s...' % event.id)
        # Build the summary report?
        build_conf = config['build_report']
        report_format = build_conf['format']
        if report_format == 'latex':
            report_file, success = build_report_latex(pstreams,
                                                      event_dir,
                                                      event,
                                                      config=config)
        else:
            report_file = ''
            success = False
        if os.path.isfile(report_file) and success:
            append_file(files_created, 'Summary report', report_file)

    if 'provenance' in pcommands and processing_done and len(pstreams):
        logging.info('Creating provenance table for event %s...' % event.id)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=H5pyDeprecationWarning)
            provdata = workspace.getProvenance(event.id, labels=[process_tag])
        if output_format == 'csv':
            csvfile = os.path.join(event_dir, 'provenance.csv')
            append_file(files_created, 'Provenance', csvfile)
            provdata.to_csv(csvfile)
        else:
            excelfile = os.path.join(event_dir, 'provenance.xlsx')
            append_file(files_created, 'Provenance', excelfile)
            provdata.to_excel(excelfile, index=False)

    if 'shakemap' in pcommands and processing_done and len(pstreams):
        logging.info('Creating shakemap table for event %s...' % event.id)
        shakemap_file, jsonfile = save_shakemap_amps(pstreams, event,
                                                     event_dir)
        append_file(files_created, 'shakemap', shakemap_file)
        append_file(files_created, 'shakemap', jsonfile)

    if status and processing_done and len(pstreams):
        if status == 'short':
            index = 'Failure reason'
            col = ['Number of records']
        elif status == 'long':
            index = 'Station ID'
            col = ['Failure reason']
        elif status == 'net':
            index = 'Network'
            col = ['Number of passed records', 'Number of failed records']

        status_info = pstreams.get_status(status)
        status_info.to_csv(os.path.join(event_dir, 'status.csv'),
                           header=col,
                           index_label=index)

    # since we don't know how many events users will be processing,
    # let's guard against memory issues by clearing out the big data
    # structures
    workspace.close()

    logging.info('Finishing event %s' % event.id)

    return workname
def get_rec_plot(eqid, stid, wdir):

    ws = StreamWorkspace.open(os.path.join(wdir, eqid, 'workspace.h5'))
    labels = ws.getLabels()
    labels.remove('unprocessed')

    station = stid.split('.')[1]

    r_st = ws.getStreams(eqid, stations=[station], labels=['unprocessed'])[0]

    if labels:
        p_st = ws.getStreams(eqid, stations=[station], labels=[labels[0]])[0]
        v_st = p_st.copy().integrate()

    fig = make_subplots(rows=5, cols=len(r_st))

    shapes = []

    for tr_idx, r_tr in enumerate(r_st):
        p_tr, v_tr = p_st[tr_idx], v_st[tr_idx]
        x_start = p_tr.stats.starttime - r_tr.stats.starttime
        x_end = p_tr.stats.endtime - r_tr.stats.starttime
        sig_dict = p_tr.getCached('signal_spectrum')
        noi_dict = p_tr.getCached('noise_spectrum')
        sig_dict_s = p_tr.getCached('smooth_signal_spectrum')
        noi_dict_s = p_tr.getCached('smooth_noise_spectrum')
        snr_dict = p_tr.getCached('snr')
        sig_spec, sig_freq = sig_dict['spec'], sig_dict['freq']
        noi_spec, noi_freq = noi_dict['spec'], noi_dict['freq']
        sig_spec_s, sig_freq_s = sig_dict_s['spec'], sig_dict_s['freq']
        noi_spec_s, noi_freq_s = noi_dict_s['spec'], noi_dict_s['freq']
        snr, snr_freq = snr_dict['snr'], snr_dict['freq']
        snr_conf = p_tr.getParameter('snr_conf')
        threshold = snr_conf['threshold']
        min_freq, max_freq = snr_conf['min_freq'], snr_conf['max_freq']
        lp = p_tr.getProvenance('lowpass_filter')[0]['corner_frequency']
        hp = p_tr.getProvenance('highpass_filter')[0]['corner_frequency']
        fit_spectra_dict = p_tr.getParameter('fit_spectra')
        f0 = fit_spectra_dict['f0']
        model_spec = spectrum.model(
            (fit_spectra_dict['moment'], fit_spectra_dict['stress_drop']),
            freq=np.array(sig_freq_s),
            dist=fit_spectra_dict['epi_dist'],
            kappa=fit_spectra_dict['kappa'])

        scatter_data = [[r_tr.times(), r_tr.data, 'black', None, 1],
                        [p_tr.times(), p_tr.data, 'black', None, 2],
                        [v_tr.times(), v_tr.data, 'black', None, 3],
                        [sig_freq, sig_spec, 'lightblue', None, 4],
                        [sig_freq_s, sig_spec_s, 'blue', None, 4],
                        [noi_freq, noi_spec, 'salmon', None, 4],
                        [noi_freq_s, noi_spec_s, 'red', None, 4],
                        [sig_freq_s, model_spec, 'black', 'dash', 4],
                        [snr_freq, snr, 'black', None, 5]]

        line_data = [[
            x_start,
            r_tr.data.min(), x_start,
            r_tr.data.max(), 'red', 'dash', 1
        ], [x_end,
            r_tr.data.min(), x_end,
            r_tr.data.max(), 'red', 'dash', 1],
                     [f0, 1e-10, f0,
                      sig_spec.max(), 'black', 'dash', 4],
                     [
                         snr_freq.min(), threshold,
                         snr_freq.max(), threshold, 'gray', None, 5
                     ], [min_freq, 1e-3, min_freq,
                         snr.max(), 'gray', None, 5],
                     [max_freq, 1e-3, max_freq,
                      snr.max(), 'gray', None, 5],
                     [lp, 1e-3, lp,
                      snr.max(), 'black', 'dash', 5],
                     [hp, 1e-3, hp,
                      snr.max(), 'black', 'dash', 5]]

        for scatter in scatter_data:
            fig.append_trace(go.Scatter(x=scatter[0],
                                        y=scatter[1],
                                        line=dict(color=scatter[2],
                                                  dash=scatter[3])),
                             row=scatter[4],
                             col=tr_idx + 1)

        for line in line_data:
            i_ref = len(r_st) * (line[6] - 1) + tr_idx + 1
            shapes.append({
                'type': 'line',
                'x0': line[0],
                'y0': line[1],
                'x1': line[2],
                'y1': line[3],
                'xref': 'x%s' % i_ref,
                'yref': 'y%s' % i_ref,
                'line': {
                    'color': line[4],
                    'dash': line[5]
                }
            })

    fig.update_xaxes(title_text='Time (s)')
    fig.update_yaxes(row=1, col=1, title_text='Raw counts')
    fig.update_yaxes(row=2, col=1, title_text='Acceleration (cm/s^2)')
    fig.update_yaxes(row=3, col=1, title_text='Velocity (cm/s)')
    fig.update_xaxes(row=4, title_text='Frequency (Hz)', type='log')
    fig.update_yaxes(row=4, col=1, title_text='Amplitude (cm/s)', type='log')
    fig.update_yaxes(row=4, type='log')
    fig.update_xaxes(row=5, title_text='Frequency (Hz)', type='log')
    fig.update_yaxes(row=5, col=1, title_text='SNR', type='log')
    fig.update_yaxes(row=5, type='log')
    fig.update_layout(showlegend=False)
    fig['layout'].update(shapes=shapes,
                         margin={
                             'b': 0,
                             't': 0,
                             'l': 0,
                             'r': 30
                         })

    ws.close()

    return fig
Beispiel #17
0
    def _event_station_metrics(self, event):
        self.eventid = event.id
        logging.info('Computing station metrics for event %s...' %
                     self.eventid)
        event_dir = os.path.join(self.gmrecords.data_path, self.eventid)
        workname = os.path.join(event_dir, WORKSPACE_NAME)
        if not os.path.isfile(workname):
            logging.info(
                'No workspace file found for event %s. Please run '
                'subcommand \'assemble\' to generate workspace file.' %
                self.eventid)
            logging.info('Continuing to next event.')
            return event.id

        self.workspace = StreamWorkspace.open(workname)
        self._get_pstreams()

        if not (hasattr(self, 'pstreams') and len(self.pstreams) > 0):
            logging.info('No streams found. Nothing to do. Goodbye.')
            self.workspace.close()
            return event.id

        rupture_file = get_rupture_file(event_dir)
        origin = Origin({
            'id': self.eventid,
            'netid': '',
            'network': '',
            'lat': event.latitude,
            'lon': event.longitude,
            'depth': event.depth_km,
            'locstring': '',
            'mag': event.magnitude,
            'time': event.time
        })
        self.origin = origin
        rupture = get_rupture(origin, rupture_file)

        sta_lats = []
        sta_lons = []
        sta_elev = []
        self.sta_repi = []
        self.sta_rhyp = []
        self.sta_baz = []
        for st in self.pstreams:
            sta_lats.append(st[0].stats.coordinates.latitude)
            sta_lons.append(st[0].stats.coordinates.longitude)
            sta_elev.append(st[0].stats.coordinates.elevation)
            geo_tuple = gps2dist_azimuth(st[0].stats.coordinates.latitude,
                                         st[0].stats.coordinates.longitude,
                                         origin.lat, origin.lon)
            self.sta_repi.append(geo_tuple[0] / M_PER_KM)
            self.sta_baz.append(geo_tuple[1])
            self.sta_rhyp.append(
                distance(st[0].stats.coordinates.longitude,
                         st[0].stats.coordinates.latitude,
                         -st[0].stats.coordinates.elevation / M_PER_KM,
                         origin.lon, origin.lat, origin.depth))

        if isinstance(rupture, PointRupture):
            self._get_ps2ff_splines()
            rjb_hat = self.rjb_spline(self.sta_repi)
            rjb_mean = rjb_hat[0]
            rjb_var = rjb_hat[1]
            rrup_hat = self.rrup_spline(self.sta_repi)
            rrup_mean = rrup_hat[0]
            rrup_var = rrup_hat[1]
            gc2_rx = np.full_like(rjb_mean, np.nan)
            gc2_ry = np.full_like(rjb_mean, np.nan)
            gc2_ry0 = np.full_like(rjb_mean, np.nan)
            gc2_U = np.full_like(rjb_mean, np.nan)
            gc2_T = np.full_like(rjb_mean, np.nan)
        else:
            logging.info('******************************')
            logging.info('* Found rupture              *')
            logging.info('******************************')
            sta_lons = np.array(sta_lons)
            sta_lats = np.array(sta_lats)
            elev = np.full_like(sta_lons, ELEVATION_FOR_DISTANCE_CALCS)
            rrup_mean, rrup_var = rupture.computeRrup(sta_lons, sta_lats, elev)
            rjb_mean, rjb_var = rupture.computeRjb(sta_lons, sta_lats, elev)
            rrup_var = np.full_like(rrup_mean, np.nan)
            rjb_var = np.full_like(rjb_mean, np.nan)
            gc2_dict = rupture.computeGC2(sta_lons, sta_lats, elev)
            gc2_rx = gc2_dict['rx']
            gc2_ry = gc2_dict['ry']
            gc2_ry0 = gc2_dict['ry0']
            gc2_U = gc2_dict['U']
            gc2_T = gc2_dict['T']

            # If we don't have a point rupture, then back azimuth needs
            # to be calculated to the closest point on the rupture
            self.sta_baz = []
            for i in range(len(self.pstreams)):
                dists = []
                bazs = []
                for quad in rupture._quadrilaterals:
                    P0, P1, P2, P3 = quad
                    for point in [P0, P1]:
                        dist, az, baz = gps2dist_azimuth(
                            point.y, point.x, sta_lats[i], sta_lons[i])
                        dists.append(dist)
                        bazs.append(baz)
                self.sta_baz.append(bazs[np.argmin(dists)])

        for i, stream in enumerate(self.pstreams):
            logging.info('Calculating station metrics for %s...' %
                         stream.get_id())
            summary = StationSummary.from_config(stream,
                                                 event=event,
                                                 config=self.gmrecords.conf,
                                                 calc_waveform_metrics=False,
                                                 calc_station_metrics=False,
                                                 rupture=rupture,
                                                 vs30_grids=self.vs30_grids)

            summary._distances = {
                'epicentral': self.sta_repi[i],
                'hypocentral': self.sta_rhyp[i],
                'rupture': rrup_mean[i],
                'rupture_var': rrup_var[i],
                'joyner_boore': rjb_mean[i],
                'joyner_boore_var': rjb_var[i],
                'gc2_rx': gc2_rx[i],
                'gc2_ry': gc2_ry[i],
                'gc2_ry0': gc2_ry0[i],
                'gc2_U': gc2_U[i],
                'gc2_T': gc2_T[i]
            }
            summary._back_azimuth = self.sta_baz[i]
            if self.vs30_grids is not None:
                for vs30_name in self.vs30_grids.keys():
                    tmpgrid = self.vs30_grids[vs30_name]
                    summary._vs30[vs30_name] = {
                        'value':
                        tmpgrid['grid_object'].getValue(
                            float(sta_lats[i]), float(sta_lons[i])),
                        'column_header':
                        tmpgrid['column_header'],
                        'readme_entry':
                        tmpgrid['readme_entry'],
                        'units':
                        tmpgrid['units']
                    }

            xmlstr = summary.get_station_xml()
            metricpath = '/'.join([
                format_netsta(stream[0].stats),
                format_nslit(stream[0].stats, stream.get_inst(), self.eventid)
            ])
            self.workspace.insert_aux(xmlstr,
                                      'StationMetrics',
                                      metricpath,
                                      overwrite=self.gmrecords.args.overwrite)
            logging.info('Added station metrics to workspace files '
                         'with tag \'%s\'.' % self.gmrecords.args.label)

        self.workspace.close()
        return event.id
def test_workspace():
    eventid = 'us1000778i'
    datafiles, event = read_data_dir('geonet', eventid, '*.V1A')
    tdir = tempfile.mkdtemp()
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=H5pyDeprecationWarning)
            warnings.filterwarnings("ignore", category=YAMLLoadWarning)
            warnings.filterwarnings("ignore", category=FutureWarning)
            config = get_config()
            tfile = os.path.join(tdir, 'test.hdf')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace(tfile)
            t1 = time.time()
            workspace.addStreams(event, raw_streams, label='raw')
            t2 = time.time()
            print('Adding %i streams took %.2f seconds' %
                  (len(raw_streams), (t2 - t1)))

            str_repr = workspace.__repr__()
            assert str_repr == 'Events: 1 Stations: 3 Streams: 3'

            eventobj = workspace.getEvent(eventid)
            assert eventobj.origins[0].latitude == event.origins[0].latitude
            assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag

            stations = workspace.getStations()
            assert sorted(stations) == ['hses', 'thz', 'wtmc']

            stations = workspace.getStations(eventid=eventid)
            assert sorted(stations) == ['hses', 'thz', 'wtmc']

            # test retrieving tags for an event that doesn't exist
            try:
                workspace.getStreamTags('foo')
            except KeyError:
                assert 1 == 1

            # test retrieving event that doesn't exist
            try:
                workspace.getEvent('foo')
            except KeyError:
                assert 1 == 1

            instream = None
            for stream in raw_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                assert 1 == 2
            outstream = workspace.getStreams(eventid,
                                             stations=['hses'],
                                             labels=['raw'])[0]
            compare_streams(instream, outstream)

            label_summary = workspace.summarizeLabels()
            assert label_summary.iloc[0]['Label'] == 'raw'
            assert label_summary.iloc[0]['Software'] == 'gmprocess'

            sc = StreamCollection(raw_streams)
            processed_streams = process_streams(sc, event, config=config)
            workspace.addStreams(event, processed_streams, 'processed')

            idlist = workspace.getEventIds()
            assert idlist[0] == eventid

            event_tags = workspace.getStreamTags(eventid)
            assert sorted(event_tags) == ['hses_processed', 'hses_raw',
                                          'thz_processed', 'thz_raw',
                                          'wtmc_processed', 'wtmc_raw']
            outstream = workspace.getStreams(eventid,
                                             stations=['hses'],
                                             labels=['processed'])[0]

            provenance = workspace.getProvenance(eventid, labels=['processed'])
            first_row = pd.Series({'Record': 'NZ.HSES.HN1',
                                   'Processing Step': 'Remove Response',
                                   'Step Attribute': 'input_units',
                                   'Attribute Value': 'counts'})

            last_row = pd.Series({'Record': 'NZ.WTMC.HNZ',
                                  'Processing Step': 'Lowpass Filter',
                                  'Step Attribute': 'number_of_passes',
                                  'Attribute Value': 2})
            assert provenance.iloc[0].equals(first_row)
            assert provenance.iloc[-1].equals(last_row)

            # compare the parameters from the input processed stream
            # to it's output equivalent
            instream = None
            for stream in processed_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                assert 1 == 2
            compare_streams(instream, outstream)
            workspace.close()

            # read in data from a second event and stash it in the workspace
            eventid = 'nz2018p115908'
            datafiles, event = read_data_dir('geonet', eventid, '*.V2A')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace.open(tfile)
            workspace.addStreams(event, raw_streams, label='foo')

            stations = workspace.getStations(eventid)

            eventids = workspace.getEventIds()
            assert eventids == ['us1000778i', 'nz2018p115908']
            instation = raw_streams[0][0].stats.station
            this_stream = workspace.getStreams(eventid,
                                               stations=[instation],
                                               labels=['foo'])[0]
            assert instation == this_stream[0].stats.station
            usid = 'us1000778i'
            inventory = workspace.getInventory(usid)
            codes = [station.code for station in inventory.networks[0].stations]
            assert sorted(codes) == ['HSES', 'THZ', 'WPWS', 'WTMC']

    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
Beispiel #19
0
def _test_workspace():
    eventid = "us1000778i"
    datafiles, event = read_data_dir("geonet", eventid, "*.V1A")
    tdir = tempfile.mkdtemp()
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=H5pyDeprecationWarning)
            warnings.filterwarnings("ignore", category=YAMLError)
            warnings.filterwarnings("ignore", category=FutureWarning)
            config = update_config(
                os.path.join(datadir, "config_min_freq_0p2.yml"))
            tfile = os.path.join(tdir, "test.hdf")
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace(tfile)
            t1 = time.time()
            workspace.addStreams(event, raw_streams, label="raw")
            t2 = time.time()
            print("Adding %i streams took %.2f seconds" % (len(raw_streams),
                                                           (t2 - t1)))

            str_repr = workspace.__repr__()
            assert str_repr == "Events: 1 Stations: 3 Streams: 3"

            eventobj = workspace.getEvent(eventid)
            assert eventobj.origins[0].latitude == event.origins[0].latitude
            assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag

            stations = workspace.getStations()
            assert sorted(stations) == ["HSES", "THZ", "WTMC"]

            stations = workspace.getStations()
            assert sorted(stations) == ["HSES", "THZ", "WTMC"]

            # test retrieving event that doesn't exist
            with pytest.raises(KeyError):
                workspace.getEvent("foo")

            instream = None
            for stream in raw_streams:
                if stream[0].stats.station.lower() == "hses":
                    instream = stream
                    break
            if instream is None:
                raise ValueError("Instream should not be none.")
            outstream = workspace.getStreams(eventid,
                                             stations=["HSES"],
                                             labels=["raw"])[0]
            compare_streams(instream, outstream)

            label_summary = workspace.summarizeLabels()
            assert label_summary.iloc[0]["Label"] == "raw"
            assert label_summary.iloc[0]["Software"] == "gmprocess"

            sc = StreamCollection(raw_streams)
            processed_streams = process_streams(sc, event, config=config)
            workspace.addStreams(event, processed_streams, "processed")

            idlist = workspace.getEventIds()
            assert idlist[0] == eventid

            outstream = workspace.getStreams(eventid,
                                             stations=["HSES"],
                                             labels=["processed"])[0]

            provenance = workspace.getProvenance(eventid, labels=["processed"])
            first_row = pd.Series({
                "Record": "NZ.HSES.--.HN1_us1000778i_processed",
                "Processing Step": "Remove Response",
                "Step Attribute": "input_units",
                "Attribute Value": "counts",
            })

            last_row = pd.Series({
                "Record": "NZ.WTMC.--.HNZ_us1000778i_processed",
                "Processing Step": "Lowpass Filter",
                "Step Attribute": "number_of_passes",
                "Attribute Value": 2,
            })
            assert provenance.iloc[0].equals(first_row)
            assert provenance.iloc[-1].equals(last_row)

            # compare the parameters from the input processed stream
            # to it's output equivalent
            instream = None
            for stream in processed_streams:
                if stream[0].stats.station.lower() == "hses":
                    instream = stream
                    break
            if instream is None:
                raise ValueError("Instream should not be none.")
            compare_streams(instream, outstream)
            workspace.close()

            # read in data from a second event and stash it in the workspace
            eventid = "nz2018p115908"
            datafiles, event = read_data_dir("geonet", eventid, "*.V2A")
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace.open(tfile)
            workspace.addStreams(event, raw_streams, label="foo")

            stations = workspace.getStations()

            eventids = workspace.getEventIds()
            assert eventids == ["us1000778i", "nz2018p115908"]
            instation = raw_streams[0][0].stats.station
            this_stream = workspace.getStreams(eventid,
                                               stations=[instation],
                                               labels=["foo"])[0]
            assert instation == this_stream[0].stats.station
            usid = "us1000778i"
            inventory = workspace.getInventory(usid)
            workspace.close()
            codes = [
                station.code for station in inventory.networks[0].stations
            ]
            assert sorted(set(codes)) == ["HSES", "THZ", "WPWS", "WTMC"]

    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
def _test_workspace():
    eventid = 'us1000778i'
    datafiles, event = read_data_dir('geonet', eventid, '*.V1A')
    tdir = tempfile.mkdtemp()
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=H5pyDeprecationWarning)
            warnings.filterwarnings("ignore", category=YAMLLoadWarning)
            warnings.filterwarnings("ignore", category=FutureWarning)
            config = update_config(
                os.path.join(datadir, 'config_min_freq_0p2.yml'))
            tfile = os.path.join(tdir, 'test.hdf')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace(tfile)
            t1 = time.time()
            workspace.addStreams(event, raw_streams, label='raw')
            t2 = time.time()
            print('Adding %i streams took %.2f seconds' % (len(raw_streams),
                                                           (t2 - t1)))

            str_repr = workspace.__repr__()
            assert str_repr == 'Events: 1 Stations: 3 Streams: 3'

            eventobj = workspace.getEvent(eventid)
            assert eventobj.origins[0].latitude == event.origins[0].latitude
            assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag

            stations = workspace.getStations()
            assert sorted(stations) == ['HSES', 'THZ', 'WTMC']

            stations = workspace.getStations(eventid=eventid)
            assert sorted(stations) == ['HSES', 'THZ', 'WTMC']

            # test retrieving event that doesn't exist
            with pytest.raises(KeyError):
                workspace.getEvent('foo')

            instream = None
            for stream in raw_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                raise ValueError('Instream should not be none.')
            outstream = workspace.getStreams(eventid,
                                             stations=['HSES'],
                                             labels=['raw'])[0]
            compare_streams(instream, outstream)

            label_summary = workspace.summarizeLabels()
            assert label_summary.iloc[0]['Label'] == 'raw'
            assert label_summary.iloc[0]['Software'] == 'gmprocess'

            sc = StreamCollection(raw_streams)
            processed_streams = process_streams(sc, event, config=config)
            workspace.addStreams(event, processed_streams, 'processed')

            idlist = workspace.getEventIds()
            assert idlist[0] == eventid

            outstream = workspace.getStreams(eventid,
                                             stations=['HSES'],
                                             labels=['processed'])[0]

            provenance = workspace.getProvenance(eventid, labels=['processed'])
            first_row = pd.Series({
                'Record': 'NZ.HSES.--.HN1_us1000778i_processed',
                'Processing Step': 'Remove Response',
                'Step Attribute': 'input_units',
                'Attribute Value': 'counts'
            })

            last_row = pd.Series({
                'Record': 'NZ.WTMC.--.HNZ_us1000778i_processed',
                'Processing Step': 'Lowpass Filter',
                'Step Attribute': 'number_of_passes',
                'Attribute Value': 2
            })
            assert provenance.iloc[0].equals(first_row)
            assert provenance.iloc[-1].equals(last_row)

            # compare the parameters from the input processed stream
            # to it's output equivalent
            instream = None
            for stream in processed_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                raise ValueError('Instream should not be none.')
            compare_streams(instream, outstream)
            workspace.close()

            # read in data from a second event and stash it in the workspace
            eventid = 'nz2018p115908'
            datafiles, event = read_data_dir('geonet', eventid, '*.V2A')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace.open(tfile)
            workspace.addStreams(event, raw_streams, label='foo')

            stations = workspace.getStations(eventid)

            eventids = workspace.getEventIds()
            assert eventids == ['us1000778i', 'nz2018p115908']
            instation = raw_streams[0][0].stats.station
            this_stream = workspace.getStreams(eventid,
                                               stations=[instation],
                                               labels=['foo'])[0]
            assert instation == this_stream[0].stats.station
            usid = 'us1000778i'
            inventory = workspace.getInventory(usid)
            workspace.close()
            codes = [
                station.code for station in inventory.networks[0].stations
            ]
            assert sorted(set(codes)) == ['HSES', 'THZ', 'WPWS', 'WTMC']

    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
    def main(self, gmrecords):
        """Export failure tables.

        Args:
            gmrecords:
                GMrecordsApp instance.
        """
        logging.info('Running subcommand \'%s\'' % self.command_name)

        self.gmrecords = gmrecords
        self._get_events()

        failures = {}
        for event in self.events:
            self.eventid = event.id
            logging.info('Creating failure tables for event %s...' %
                         self.eventid)
            event_dir = os.path.join(self.gmrecords.data_path, self.eventid)
            workname = os.path.join(event_dir, WORKSPACE_NAME)
            if not os.path.isfile(workname):
                logging.info(
                    'No workspace file found for event %s. Please run '
                    'subcommand \'assemble\' to generate workspace file.' %
                    self.eventid)
                logging.info('Continuing to next event.')
                continue

            self.workspace = StreamWorkspace.open(workname)
            self._get_pstreams()
            self.workspace.close()

            if not hasattr(self, 'pstreams'):
                logging.info('No processed waveforms available. No failure '
                             'tables created.')
                return

            status_info = self.pstreams.get_status(self.gmrecords.args.type)
            failures[event.id] = status_info

            base_file_name = os.path.join(
                event_dir, '%s_%s_failure_reasons_%s' %
                (gmrecords.project, gmrecords.args.label,
                 self.gmrecords.args.type))

            if self.gmrecords.args.output_format == 'csv':
                csvfile = base_file_name + '.csv'
                self.append_file('Failure table', csvfile)
                status_info.to_csv(csvfile)
            else:
                excelfile = base_file_name + '.xlsx'
                self.append_file('Failure table', excelfile)
                status_info.to_excel(excelfile)

        if failures:
            comp_failures_path = os.path.join(
                self.gmrecords.data_path, '%s_%s_complete_failures.csv' %
                (gmrecords.project, gmrecords.args.label))
            if self.gmrecords.args.type == 'long':
                for idx, item in enumerate(failures.items()):
                    eqid, status = item
                    status = pd.DataFrame(status)
                    status['EarthquakeId'] = eqid
                    if idx == 0:
                        status.to_csv(comp_failures_path, mode='w')
                    else:
                        status.to_csv(comp_failures_path,
                                      mode='a',
                                      header=False)
            else:
                df_failures = pd.concat(failures.values())
                df_failures = df_failures.groupby(df_failures.index).sum()
                df_failures.to_csv(comp_failures_path)
            self.append_file('Complete failures', comp_failures_path)

        self._summarize_files_created()
Beispiel #22
0
def test_workspace():
    eventid = 'us1000778i'
    datafiles, origin = read_data_dir('geonet', eventid, '*.V1A')
    event = get_event_object(origin)
    tdir = tempfile.mkdtemp()
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=H5pyDeprecationWarning)
            warnings.filterwarnings("ignore", category=YAMLLoadWarning)
            warnings.filterwarnings("ignore", category=FutureWarning)
            config = get_config()
            tfile = os.path.join(tdir, 'test.hdf')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace(tfile)
            t1 = time.time()
            workspace.addStreams(event, raw_streams, label='raw')
            t2 = time.time()
            print('Adding %i streams took %.2f seconds' % (len(raw_streams),
                                                           (t2 - t1)))

            str_repr = workspace.__repr__()
            assert str_repr == 'Events: 1 Stations: 3 Streams: 3'

            eventobj = workspace.getEvent(eventid)
            assert eventobj.origins[0].latitude == event.origins[0].latitude
            assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag

            stations = workspace.getStations()
            assert sorted(stations) == ['hses', 'thz', 'wtmc']

            stations = workspace.getStations(eventid=eventid)
            assert sorted(stations) == ['hses', 'thz', 'wtmc']

            # test retrieving tags for an event that doesn't exist
            try:
                workspace.getStreamTags('foo')
            except KeyError:
                assert 1 == 1

            # test retrieving event that doesn't exist
            try:
                workspace.getEvent('foo')
            except KeyError:
                assert 1 == 1

            instream = None
            for stream in raw_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                assert 1 == 2
            outstream = workspace.getStreams(eventid,
                                             stations=['hses'],
                                             labels=['raw'])[0]
            compare_streams(instream, outstream)

            label_summary = workspace.summarizeLabels()
            assert label_summary.iloc[0]['Label'] == 'raw'
            assert label_summary.iloc[0]['Software'] == 'gmprocess'

            sc = StreamCollection(raw_streams)
            processed_streams = process_streams(sc, origin, config=config)
            workspace.addStreams(event, processed_streams, 'processed')

            idlist = workspace.getEventIds()
            assert idlist[0] == eventid

            event_tags = workspace.getStreamTags(eventid)
            assert sorted(event_tags) == [
                'hses_processed', 'hses_raw', 'thz_processed', 'thz_raw',
                'wtmc_processed', 'wtmc_raw'
            ]
            outstream = workspace.getStreams(eventid,
                                             stations=['hses'],
                                             labels=['processed'])[0]

            provenance = workspace.getProvenance(eventid, labels=['processed'])
            first_row = pd.Series({
                'Record': 'NZ.HSES.HN1',
                'Processing Step': 'Remove Response',
                'Step Attribute': 'input_units',
                'Attribute Value': 'counts'
            })

            last_row = pd.Series({
                'Record': 'NZ.WTMC.HNZ',
                'Processing Step': 'Detrend',
                'Step Attribute': 'detrending_method',
                'Attribute Value': 'baseline_sixth_order'
            })
            assert provenance.iloc[0].equals(first_row)
            assert provenance.iloc[-1].equals(last_row)

            # compare the parameters from the input processed stream
            # to it's output equivalent
            instream = None
            for stream in processed_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                assert 1 == 2
            compare_streams(instream, outstream)
            workspace.close()

            # read in data from a second event and stash it in the workspace
            eventid = 'nz2018p115908'
            datafiles, origin = read_data_dir('geonet', eventid, '*.V2A')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            event = get_event_object(origin)
            workspace = StreamWorkspace.open(tfile)
            workspace.addStreams(event, raw_streams, label='foo')

            stations = workspace.getStations(eventid)

            eventids = workspace.getEventIds()
            assert eventids == ['us1000778i', 'nz2018p115908']
            instation = raw_streams[0][0].stats.station
            this_stream = workspace.getStreams(eventid,
                                               stations=[instation],
                                               labels=['foo'])[0]
            assert instation == this_stream[0].stats.station

            # set and retrieve waveform metrics in the file
            imclist = [
                'greater_of_two_horizontals', 'channels', 'rotd50', 'rotd100'
            ]
            imtlist = ['sa1.0', 'PGA', 'pgv', 'fas2.0', 'arias']
            usid = 'us1000778i'
            tags = workspace.getStreamTags(usid)
            workspace.setStreamMetrics(eventid,
                                       labels=['foo'],
                                       imclist=imclist,
                                       imtlist=imtlist)
            summary = workspace.getStreamMetrics(eventid, instation, 'foo')
            summary_series = summary.toSeries()['ARIAS']
            cmpseries = pd.Series({
                'GEOMETRIC_MEAN': np.NaN,
                'GREATER_OF_TWO_HORIZONTALS': 0.0005,
                'HN1': 0.0001,
                'HN2': 0.0005,
                'HNZ': 0.0000,
                'ROTD100.0': 0.0005,
                'ROTD50.0': 0.0003
            })
            assert cmpseries.equals(summary_series)

            workspace.setStreamMetrics(usid, labels=['processed'])
            df = workspace.getMetricsTable(usid, labels=['processed'])
            cmpdict = {
                'GREATER_OF_TWO_HORIZONTALS': [26.8906, 4.9415, 94.6646],
                'HN1': [24.5105, 4.9415, 94.6646],
                'HN2': [26.8906, 4.0758, 86.7877],
                'HNZ': [16.0941, 2.5401, 136.7054]
            }
            cmpframe = pd.DataFrame(cmpdict)
            assert df['PGA'].equals(cmpframe)

            inventory = workspace.getInventory(usid)
            codes = [
                station.code for station in inventory.networks[0].stations
            ]
            assert sorted(codes) == ['HSES', 'THZ', 'WPWS', 'WTMC']

    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)