def test_metrics2():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = get_config()
    config['metrics']['output_imts'].append('Arias')
    config['metrics']['output_imcs'].append('arithmetic_mean')
    # turn off sta/lta check and snr checks
    newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label='processed')
        workspace.calcMetrics(event.id, labels=['processed'])
        etable, imc_tables1 = workspace.getTables('processed')
        etable2, imc_tables2 = workspace.getTables('processed', config=config)
        assert 'ARITHMETIC_MEAN' not in imc_tables1
        assert 'ARITHMETIC_MEAN' in imc_tables2
        assert 'ARIAS' in imc_tables2['ARITHMETIC_MEAN']
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
Esempio n. 2
0
def test_stream_params():
    eventid = 'us1000778i'
    datafiles, event = read_data_dir(
        'geonet',
        eventid,
        '20161113_110259_WTMC_20.V1A'
    )
    tdir = tempfile.mkdtemp()
    streams = []
    try:
        streams += read_data(datafiles[0])
        statsdict = {'name': 'Fred', 'age': 34}
        streams[0].setStreamParam('stats', statsdict)
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, streams, label='stats')
        outstreams = workspace.getStreams(event.id, labels=['stats'])
        cmpdict = outstreams[0].getStreamParam('stats')
        assert cmpdict == statsdict
        workspace.close()
    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
Esempio n. 3
0
def get_sample_data(volume):
    thisdir = pathlib.Path(__file__).parent
    datafile = (thisdir / ".." / ".." / ".." / ".." / "gmprocess" / "data" /
                "testdata" / "asdf" / "nc71126864" / "workspace.h5")
    workspace = StreamWorkspace.open(datafile)
    t1 = time.time()
    eventid = workspace.getEventIds()[0]
    t2 = time.time()
    print(f"{t2-t1:.2f} seconds to read eventid")
    scalar_event = workspace.getEvent(eventid)

    station = "CE.79435"
    labels = workspace.getLabels()
    if volume == Volume.RAW:
        labels.remove("default")
    elif volume == Volume.CONVERTED:
        labels.remove("default")
    else:
        labels.remove("unprocessed")
    plabel = labels[0]
    streams = workspace.getStreams(eventid,
                                   stations=[station],
                                   labels=[plabel])
    gmprocess_version = workspace.getGmprocessVersion()
    idx = gmprocess_version.find(".dev")
    gmprocess_version = gmprocess_version[0:idx]
    stream = streams[0]
    trace = stream[0]
    workspace.close()
    return (trace, eventid, scalar_event, stream, gmprocess_version)
Esempio n. 4
0
def generate_workspace():
    """Generate simple HDF5 with ASDF layout for testing.
    """
    PCOMMANDS = [
        'assemble',
        'process',
    ]
    EVENTID = 'us1000778i'
    LABEL = 'ptest'
    datafiles, event = read_data_dir('geonet', EVENTID, '*.V1A')

    tdir = tempfile.mkdtemp()
    tfilename = os.path.join(tdir, 'workspace.h5')

    raw_data = []
    for dfile in datafiles:
        raw_data += read_data(dfile)
    write_asdf(tfilename, raw_data, event, label="unprocessed")
    del raw_data

    config = get_config()
    workspace = StreamWorkspace.open(tfilename)
    raw_streams = workspace.getStreams(EVENTID, labels=['unprocessed'])
    pstreams = process_streams(raw_streams, event, config=config)
    workspace.addStreams(event, pstreams, label=LABEL)
    workspace.calcMetrics(event.id, labels=[LABEL], config=config)

    return tfilename
Esempio n. 5
0
    def _process_event(self, event):
        event_dir = os.path.join(self.gmrecords.data_path, event.id)
        workname = os.path.join(event_dir, WORKSPACE_NAME)
        if not os.path.isfile(workname):
            logging.info('No workspace file found for event %s. Please run '
                         'subcommand \'assemble\' to generate workspace file.')
            logging.info('Continuing to next event.')
            return event.id

        workspace = StreamWorkspace.open(workname)
        rstreams = workspace.getStreams(event.id,
                                        labels=['unprocessed'],
                                        config=self.gmrecords.conf)

        if len(rstreams):
            logging.info('Processing \'%s\' streams for event %s...' %
                         ('unprocessed', event.id))
            pstreams = process_streams(rstreams,
                                       event,
                                       config=self.gmrecords.conf)
            workspace.addStreams(event, pstreams, label=self.process_tag)
        else:
            logging.info('No streams found. Nothing to do. Goodbye.')

        workspace.close()
        return event.id
Esempio n. 6
0
def get_eq_full_status_df(wdir, eqid, imc):
    ws = StreamWorkspace.open(os.path.join(wdir, eqid, 'workspace.h5'))
    labels = ws.getLabels()
    labels.remove('unprocessed')
    processed_label = labels[0]
    sc = ws.getStreams(ws.getEventIds()[0], labels=[processed_label])
    ws.close()

    rows = []
    for st in sc:
        coords = st[0].stats.coordinates
        row = [st.id, coords.latitude, coords.longitude]
        if st.passed:
            row.append('Passed')
        else:
            for tr in st:
                if tr.hasParameter('failure'):
                    row.append(tr.getParameter('failure')['reason'])
                    break
        rows.append(row)
    df = pd.DataFrame(rows,
                      columns=[
                          'StationID', 'StationLatitude', 'StationLongitude',
                          'Failure reason'
                      ])
    return df
Esempio n. 7
0
def generate_workspace():
    """Generate simple HDF5 with ASDF layout for testing."""
    PCOMMANDS = [
        "assemble",
        "process",
    ]
    EVENTID = "us1000778i"
    LABEL = "ptest"
    datafiles, event = read_data_dir("geonet", EVENTID, "*.V1A")

    tdir = tempfile.mkdtemp()
    tfilename = os.path.join(tdir, "workspace.h5")

    raw_data = []
    for dfile in datafiles:
        raw_data += read_data(dfile)
    write_asdf(tfilename, raw_data, event, label="unprocessed")
    del raw_data

    config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml"))

    workspace = StreamWorkspace.open(tfilename)
    raw_streams = workspace.getStreams(EVENTID,
                                       labels=["unprocessed"],
                                       config=config)
    pstreams = process_streams(raw_streams, event, config=config)
    workspace.addStreams(event, pstreams, label=LABEL)
    workspace.calcMetrics(event.id, labels=[LABEL], config=config)

    return tfilename
 def __init__(self,
              cosmos_directory,
              h5_filename,
              volume=Volume.PROCESSED,
              label=None):
     if volume == Volume.PROCESSED and label is None:
         raise Exception("Must supply label for processed data")
     self._workspace = StreamWorkspace.open(h5_filename)
     self._cosmos_directory = pathlib.Path(cosmos_directory)
     self._volume = volume
     self._label = label
Esempio n. 9
0
    def main(self, gmrecords):
        """Export provenance tables.

        Args:
            gmrecords:
                GMrecordsApp instance.
        """
        logging.info('Running subcommand \'%s\'' % self.command_name)

        self.gmrecords = gmrecords
        self._check_arguments()
        self._get_events()

        for event in self.events:
            self.eventid = event.id
            logging.info('Creating provenance tables for event %s...' %
                         self.eventid)
            event_dir = os.path.join(gmrecords.data_path, self.eventid)
            workname = os.path.join(event_dir, WORKSPACE_NAME)
            if not os.path.isfile(workname):
                logging.info(
                    'No workspace file found for event %s. Please run '
                    'subcommand \'assemble\' to generate workspace file.' %
                    self.eventid)
                logging.info('Continuing to next event.')
                continue

            self.workspace = StreamWorkspace.open(workname)
            self._get_pstreams()

            if not (hasattr(self, 'pstreams') and len(self.pstreams) > 0):
                logging.info('No processed waveforms available. No provenance '
                             'tables created.')
                self.workspace.close()
                continue

            provdata = self.workspace.getProvenance(
                self.eventid, labels=self.gmrecords.args.label)
            self.workspace.close()

            basename = '%s_%s_provenance' % (gmrecords.project,
                                             gmrecords.args.label)
            if gmrecords.args.output_format == 'csv':
                csvfile = os.path.join(event_dir, '%s.csv' % basename)
                self.append_file('Provenance', csvfile)
                provdata.to_csv(csvfile, index=False)
            else:
                excelfile = os.path.join(event_dir, '%s.xlsx' % basename)
                self.append_file('Provenance', excelfile)
                provdata.to_excel(excelfile, index=False)

        self._summarize_files_created()
    def main(self, gmrecords):
        """Export files for ShakeMap input.

        Args:
            gmrecords:
                GMrecordsApp instance.
        """
        logging.info('Running subcommand \'%s\'' % self.command_name)

        self.gmrecords = gmrecords
        self._get_events()

        for event in self.events:
            self.eventid = event.id
            logging.info('Creating shakemap files for event %s...' %
                         self.eventid)

            event_dir = os.path.join(gmrecords.data_path, event.id)
            workname = os.path.join(event_dir, WORKSPACE_NAME)
            if not os.path.isfile(workname):
                logging.info(
                    'No workspace file found for event %s. Please run '
                    'subcommand \'assemble\' to generate workspace file.' %
                    event.id)
                logging.info('Continuing to next event.')
                continue

            self.workspace = StreamWorkspace.open(workname)
            self._get_labels()
            self._get_pstreams()

            if not hasattr(self, 'pstreams'):
                logging.info('No processed waveforms available. No shakemap '
                             'files created.')
                return

            # TODO: re-write this so that it uses the already computer values
            # in self.workspace.dataset.auxiliary_data.WaveFormMetrics
            # rather than recomputing the metrics from self.pstreams.
            # shakemap_file, jsonfile = save_shakemap_amps(
            #     self.pstreams, event, event_dir)

            jsonfile, stationfile, _ = create_json(self.workspace, event,
                                                   event_dir,
                                                   self.gmrecords.args.label)

            self.workspace.close()
            self.append_file('shakemap', jsonfile)
            self.append_file('shakemap', stationfile)

        self._summarize_files_created()
Esempio n. 11
0
    def _compute_event_waveforms(self, event):
        self.eventid = event.id
        logging.info(
            'Computing waveform metrics for event %s...' % self.eventid)
        event_dir = os.path.join(self.gmrecords.data_path, self.eventid)
        workname = os.path.join(event_dir, WORKSPACE_NAME)
        if not os.path.isfile(workname):
            logging.info(
                'No workspace file found for event %s. Please run '
                'subcommand \'assemble\' to generate workspace file.'
                % self.eventid)
            logging.info('Continuing to next event.')
            return event.id

        self.workspace = StreamWorkspace.open(workname)
        self._get_pstreams()

        if not hasattr(self, 'pstreams'):
            logging.info('No processed waveforms available. No waveform '
                         'metrics computed.')
            self.workspace.close()
            return event.id

        for stream in self.pstreams:
            if stream.passed:
                logging.info(
                    'Calculating waveform metrics for %s...'
                    % stream.get_id()
                )
                summary = StationSummary.from_config(
                    stream, event=event, config=self.gmrecords.conf,
                    calc_waveform_metrics=True,
                    calc_station_metrics=False
                )
                xmlstr = summary.get_metric_xml()
                tag = stream.tag
                metricpath = '/'.join([
                    format_netsta(stream[0].stats),
                    format_nslit(stream[0].stats, stream.get_inst(), tag)
                ])
                self.workspace.insert_aux(
                    xmlstr, 'WaveFormMetrics', metricpath,
                    overwrite=self.gmrecords.args.overwrite)
            logging.info('Added waveform metrics to workspace files '
                         'with tag \'%s\'.' % self.gmrecords.args.label)

        self.workspace.close()
        return event.id
Esempio n. 12
0
def download(event, event_dir, config, directory):
    """Download data or load data from local directory, turn into Streams.

    Args:
        event (ScalarEvent):
            Object containing basic event hypocenter, origin time, magnitude.
        event_dir (str):
            Path where raw directory should be created (if downloading).
        config (dict):
            Dictionary with gmprocess configuration information.
        directory (str):
            Path where raw data already exists.
    Returns:
        tuple:
            - StreamWorkspace: Contains the event and raw streams.
            - str: Name of workspace HDF file.
            - StreamCollection: Raw data StationStreams.
    """
    # generate the raw directory
    rawdir = get_rawdir(event_dir)

    if directory is None:
        tcollection, terrors = fetch_data(
            event.time.datetime,
            event.latitude,
            event.longitude,
            event.depth_km,
            event.magnitude,
            config=config,
            rawdir=rawdir)
        # create an event.json file in each event directory,
        # in case user is simply downloading for now
        create_event_file(event, event_dir)
    else:
        streams, bad, terrors = directory_to_streams(directory)
        tcollection = StreamCollection(streams)

    # plot the raw waveforms
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=UserWarning)
        pngfiles = glob.glob(os.path.join(rawdir, '*.png'))
        if not len(pngfiles):
            plot_raw(rawdir, tcollection, event)

    # create the workspace file and put the unprocessed waveforms in it
    workname = os.path.join(event_dir, 'workspace.hdf')
    if os.path.isfile(workname):
        os.remove(workname)
    workspace = StreamWorkspace(workname)
    workspace.addEvent(event)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=H5pyDeprecationWarning)
        workspace.addStreams(event, tcollection, label='unprocessed')

    return (workspace, workname, tcollection)
Esempio n. 13
0
def _test_stream_params():
    eventid = "us1000778i"
    datafiles, event = read_data_dir("geonet", eventid,
                                     "20161113_110259_WTMC_20.V1A")
    tdir = tempfile.mkdtemp()
    streams = []
    try:
        streams += read_data(datafiles[0])
        statsdict = {"name": "Fred", "age": 34}
        streams[0].setStreamParam("stats", statsdict)
        tfile = os.path.join(tdir, "test.hdf")
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, streams, label="stats")
        outstreams = workspace.getStreams(event.id, labels=["stats"])
        cmpdict = outstreams[0].getStreamParam("stats")
        assert cmpdict == statsdict
        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
    def main(self, gmrecords):
        """Export files for ShakeMap input.

        Args:
            gmrecords:
                GMrecordsApp instance.
        """
        logging.info('Running subcommand \'%s\'' % self.command_name)

        self.gmrecords = gmrecords
        self._check_arguments()
        self._get_events()

        for event in self.events:
            self.eventid = event.id
            logging.info('Creating shakemap files for event %s...' %
                         self.eventid)

            event_dir = os.path.join(gmrecords.data_path, event.id)
            workname = os.path.join(event_dir, WORKSPACE_NAME)
            if not os.path.isfile(workname):
                logging.info(
                    'No workspace file found for event %s. Please run '
                    'subcommand \'assemble\' to generate workspace file.' %
                    event.id)
                logging.info('Continuing to next event.')
                continue

            self.workspace = StreamWorkspace.open(workname)
            self._get_labels()

            expanded_imts = self.gmrecords.args.expand_imts
            jsonfile, stationfile, _ = create_json(self.workspace,
                                                   event,
                                                   event_dir,
                                                   self.gmrecords.args.label,
                                                   config=self.gmrecords.conf,
                                                   expanded_imts=expanded_imts)

            self.workspace.close()
            if jsonfile is not None:
                self.append_file('shakemap', jsonfile)
            if stationfile is not None:
                self.append_file('shakemap', stationfile)

        self._summarize_files_created()
def test_stream_params():
    eventid = 'us1000778i'
    datafiles, event = read_data_dir('geonet',
                                     eventid,
                                     '20161113_110259_WTMC_20.V1A')
    tdir = tempfile.mkdtemp()
    streams = []
    try:
        streams += read_data(datafiles[0])
        statsdict = {'name': 'Fred', 'age': 34}
        streams[0].setStreamParam('stats', statsdict)
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, streams, label='stats')
        outstreams = workspace.getStreams(event.id, labels=['stats'])
        cmpdict = outstreams[0].getStreamParam('stats')
        assert cmpdict == statsdict
        workspace.close()
    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
def test_get_shakemap():
    tdir = tempfile.mkdtemp()
    try:
        thisdir = pathlib.Path(__file__).parent
        datadir = (thisdir / '..' / '..' / '..' / 'gmprocess' / 'data' /
                   'testdata')
        datafile = datadir / 'workspace_ci38457511.hdf'

        workspace = StreamWorkspace.open(datafile)
        eventid = workspace.getEventIds()[0]
        event = workspace.getEvent(eventid)
        label = '20201209195000'
        processed = workspace.getStreams(eventid, labels=[label])

        excelfile, jsonfile = save_shakemap_amps(processed, event, tdir)
        with open(jsonfile, 'rt', encoding='utf-8') as fp:
            jdict = json.load(fp)
        assert jdict['features'][0]['id'] == 'CJ.T001230'

    except Exception as e:
        raise AssertionError(str(e))
    finally:
        shutil.rmtree(tdir)
    def main(self, gmrecords):
        """Export metric tables.

        Args:
            gmrecords:
                GMrecordsApp instance.
        """
        logging.info('Running subcommand \'%s\'' % self.command_name)

        self.gmrecords = gmrecords
        self._get_events()

        for event in self.events:
            self.eventid = event.id
            logging.info('Creating tables for event %s...' % self.eventid)
            event_dir = os.path.join(gmrecords.data_path, self.eventid)
            workname = os.path.join(event_dir, WORKSPACE_NAME)
            if not os.path.isfile(workname):
                logging.info(
                    'No workspace file found for event %s. Please run '
                    'subcommand \'assemble\' to generate workspace file.' %
                    self.eventid)
                logging.info('Continuing to next event.')
                continue

            self.workspace = StreamWorkspace.open(workname)
            self._get_pstreams()

            if not hasattr(self, 'pstreams'):
                logging.info('No processed waveforms available. No metric '
                             'tables created.')
                self.workspace.close()
                return

            event_table, imc_tables, readmes = self.workspace.getTables(
                self.gmrecords.args.label, streams=self.pstreams)
            ev_fit_spec, fit_readme = self.workspace.getFitSpectraTable(
                self.eventid, self.gmrecords.args.label, self.pstreams)
            self.workspace.close()

            outdir = gmrecords.data_path

            # Set the precisions for the imc tables, event table, and
            # fit_spectra table before writing
            imc_tables_formatted = {}
            for imc, imc_table in imc_tables.items():
                imc_tables_formatted[imc] = set_precisions(imc_table)
            event_table_formatted = set_precisions(event_table)
            df_fit_spectra_formatted = set_precisions(ev_fit_spec)

            imc_list = [
                '%s_%s_metrics_%s' %
                (gmrecords.project, gmrecords.args.label, imc.lower())
                for imc in imc_tables_formatted.keys()
            ]
            readme_list = [
                '%s_%s_metrics_%s_README' %
                (gmrecords.project, gmrecords.args.label, imc.lower())
                for imc in readmes.keys()
            ]
            proj_lab = (gmrecords.project, gmrecords.args.label)

            event_filename = ['%s_%s_events' % proj_lab]
            filenames = event_filename + imc_list + readme_list + [
                '%s_%s_fit_spectra_parameters' % proj_lab,
                '%s_%s_fit_spectra_parameters_README' % proj_lab
            ]

            files = [event_table_formatted] + list(
                imc_tables_formatted.values()) + list(
                    readmes.values()) + [df_fit_spectra_formatted, fit_readme]

            output_format = gmrecords.args.output_format
            if output_format != 'csv':
                output_format = 'xlsx'

            for filename, df in dict(zip(filenames, files)).items():
                filepath = os.path.join(outdir,
                                        filename + '.%s' % output_format)
                if os.path.exists(filepath):
                    if 'README' in filename:
                        continue
                    else:
                        if self.gmrecords.args.overwrite:
                            logging.warning('File exists: %s' % filename)
                            logging.warning('Overwriting file: %s' % filename)
                            mode = 'w'
                            header = True
                        else:
                            logging.warning('File exists: %s' % filename)
                            logging.warning('Appending to file: %s' % filename)
                            mode = 'a'
                            header = False
                else:
                    mode = 'w'
                    header = True
                if output_format == 'csv':
                    df.to_csv(filepath,
                              index=False,
                              float_format=DEFAULT_FLOAT_FORMAT,
                              na_rep=DEFAULT_NA_REP,
                              mode=mode,
                              header=header)
                    if mode == "w":
                        self.append_file('Metric tables', filepath)
                else:
                    df.to_excel(filepath,
                                index=False,
                                float_format=DEFAULT_FLOAT_FORMAT,
                                na_rep=DEFAULT_NA_REP,
                                mode=mode,
                                header=header)
                    if mode == "w":
                        self.append_file('Metric tables', filepath)

        self._summarize_files_created()
def download(event, event_dir, config, directory):
    """Download data or load data from local directory, turn into Streams.

    Args:
        event (ScalarEvent):
            Object containing basic event hypocenter, origin time, magnitude.
        event_dir (str):
            Path where raw directory should be created (if downloading).
        config (dict):
            Dictionary with gmprocess configuration information.
        directory (str):
            Path where data already exists. Must be organized in a 'raw'
            directory, within directories with names as the event ids. For
            example, if `directory` is 'proj_dir' and you have data for
            event id 'abc123' then the raw data to be read in should be
            located in `proj_dir/abc123/raw/`.

    Returns:
        tuple:
            - StreamWorkspace: Contains the event and raw streams.
            - str: Name of workspace HDF file.
            - StreamCollection: Raw data StationStreams.
    """
    # Make raw directory
    rawdir = get_rawdir(event_dir)

    if directory is None:
        tcollection, terrors = fetch_data(event.time.datetime,
                                          event.latitude,
                                          event.longitude,
                                          event.depth_km,
                                          event.magnitude,
                                          config=config,
                                          rawdir=rawdir)
        # create an event.json file in each event directory,
        # in case user is simply downloading for now
        create_event_file(event, event_dir)
    else:
        # Make raw directory
        in_event_dir = os.path.join(directory, event.id)
        in_raw_dir = get_rawdir(in_event_dir)
        streams, bad, terrors = directory_to_streams(in_raw_dir)
        tcollection = StreamCollection(streams, **config['duplicate'])
        create_event_file(event, event_dir)

    # Plot the raw waveforms
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=UserWarning)
        pngfiles = glob.glob(os.path.join(rawdir, '*.png'))
        if not len(pngfiles):
            plot_raw(rawdir, tcollection, event)

    # Create the workspace file and put the unprocessed waveforms in it
    workname = os.path.join(event_dir, 'workspace.hdf')

    # Remove any existing workspace file
    if os.path.isfile(workname):
        os.remove(workname)

    workspace = StreamWorkspace(workname)
    workspace.addEvent(event)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=H5pyDeprecationWarning)
        workspace.addStreams(event, tcollection, label='unprocessed')

    return (workspace, workname, tcollection)
Esempio n. 19
0
    def _event_station_metrics(self, event):
        self.eventid = event.id
        logging.info('Computing station metrics for event %s...' %
                     self.eventid)
        event_dir = os.path.join(self.gmrecords.data_path, self.eventid)
        workname = os.path.join(event_dir, WORKSPACE_NAME)
        if not os.path.isfile(workname):
            logging.info(
                'No workspace file found for event %s. Please run '
                'subcommand \'assemble\' to generate workspace file.' %
                self.eventid)
            logging.info('Continuing to next event.')
            return event.id

        self.workspace = StreamWorkspace.open(workname)
        self._get_pstreams()

        if not (hasattr(self, 'pstreams') and len(self.pstreams) > 0):
            logging.info('No streams found. Nothing to do. Goodbye.')
            self.workspace.close()
            return event.id

        rupture_file = get_rupture_file(event_dir)
        origin = Origin({
            'id': self.eventid,
            'netid': '',
            'network': '',
            'lat': event.latitude,
            'lon': event.longitude,
            'depth': event.depth_km,
            'locstring': '',
            'mag': event.magnitude,
            'time': event.time
        })
        self.origin = origin
        rupture = get_rupture(origin, rupture_file)

        sta_lats = []
        sta_lons = []
        sta_elev = []
        self.sta_repi = []
        self.sta_rhyp = []
        self.sta_baz = []
        for st in self.pstreams:
            sta_lats.append(st[0].stats.coordinates.latitude)
            sta_lons.append(st[0].stats.coordinates.longitude)
            sta_elev.append(st[0].stats.coordinates.elevation)
            geo_tuple = gps2dist_azimuth(st[0].stats.coordinates.latitude,
                                         st[0].stats.coordinates.longitude,
                                         origin.lat, origin.lon)
            self.sta_repi.append(geo_tuple[0] / M_PER_KM)
            self.sta_baz.append(geo_tuple[1])
            self.sta_rhyp.append(
                distance(st[0].stats.coordinates.longitude,
                         st[0].stats.coordinates.latitude,
                         -st[0].stats.coordinates.elevation / M_PER_KM,
                         origin.lon, origin.lat, origin.depth))

        if isinstance(rupture, PointRupture):
            self._get_ps2ff_splines()
            rjb_hat = self.rjb_spline(self.sta_repi)
            rjb_mean = rjb_hat[0]
            rjb_var = rjb_hat[1]
            rrup_hat = self.rrup_spline(self.sta_repi)
            rrup_mean = rrup_hat[0]
            rrup_var = rrup_hat[1]
            gc2_rx = np.full_like(rjb_mean, np.nan)
            gc2_ry = np.full_like(rjb_mean, np.nan)
            gc2_ry0 = np.full_like(rjb_mean, np.nan)
            gc2_U = np.full_like(rjb_mean, np.nan)
            gc2_T = np.full_like(rjb_mean, np.nan)
        else:
            logging.info('******************************')
            logging.info('* Found rupture              *')
            logging.info('******************************')
            sta_lons = np.array(sta_lons)
            sta_lats = np.array(sta_lats)
            elev = np.full_like(sta_lons, ELEVATION_FOR_DISTANCE_CALCS)
            rrup_mean, rrup_var = rupture.computeRrup(sta_lons, sta_lats, elev)
            rjb_mean, rjb_var = rupture.computeRjb(sta_lons, sta_lats, elev)
            rrup_var = np.full_like(rrup_mean, np.nan)
            rjb_var = np.full_like(rjb_mean, np.nan)
            gc2_dict = rupture.computeGC2(sta_lons, sta_lats, elev)
            gc2_rx = gc2_dict['rx']
            gc2_ry = gc2_dict['ry']
            gc2_ry0 = gc2_dict['ry0']
            gc2_U = gc2_dict['U']
            gc2_T = gc2_dict['T']

            # If we don't have a point rupture, then back azimuth needs
            # to be calculated to the closest point on the rupture
            self.sta_baz = []
            for i in range(len(self.pstreams)):
                dists = []
                bazs = []
                for quad in rupture._quadrilaterals:
                    P0, P1, P2, P3 = quad
                    for point in [P0, P1]:
                        dist, az, baz = gps2dist_azimuth(
                            point.y, point.x, sta_lats[i], sta_lons[i])
                        dists.append(dist)
                        bazs.append(baz)
                self.sta_baz.append(bazs[np.argmin(dists)])

        for i, stream in enumerate(self.pstreams):
            logging.info('Calculating station metrics for %s...' %
                         stream.get_id())
            summary = StationSummary.from_config(stream,
                                                 event=event,
                                                 config=self.gmrecords.conf,
                                                 calc_waveform_metrics=False,
                                                 calc_station_metrics=False,
                                                 rupture=rupture,
                                                 vs30_grids=self.vs30_grids)

            summary._distances = {
                'epicentral': self.sta_repi[i],
                'hypocentral': self.sta_rhyp[i],
                'rupture': rrup_mean[i],
                'rupture_var': rrup_var[i],
                'joyner_boore': rjb_mean[i],
                'joyner_boore_var': rjb_var[i],
                'gc2_rx': gc2_rx[i],
                'gc2_ry': gc2_ry[i],
                'gc2_ry0': gc2_ry0[i],
                'gc2_U': gc2_U[i],
                'gc2_T': gc2_T[i]
            }
            summary._back_azimuth = self.sta_baz[i]
            if self.vs30_grids is not None:
                for vs30_name in self.vs30_grids.keys():
                    tmpgrid = self.vs30_grids[vs30_name]
                    summary._vs30[vs30_name] = {
                        'value':
                        tmpgrid['grid_object'].getValue(
                            float(sta_lats[i]), float(sta_lons[i])),
                        'column_header':
                        tmpgrid['column_header'],
                        'readme_entry':
                        tmpgrid['readme_entry'],
                        'units':
                        tmpgrid['units']
                    }

            xmlstr = summary.get_station_xml()
            metricpath = '/'.join([
                format_netsta(stream[0].stats),
                format_nslit(stream[0].stats, stream.get_inst(), self.eventid)
            ])
            self.workspace.insert_aux(xmlstr,
                                      'StationMetrics',
                                      metricpath,
                                      overwrite=self.gmrecords.args.overwrite)
            logging.info('Added station metrics to workspace files '
                         'with tag \'%s\'.' % self.gmrecords.args.label)

        self.workspace.close()
        return event.id
def assemble(event, config, directory, gmprocess_version):
    """Download data or load data from local directory, turn into Streams.

    Args:
        event (ScalarEvent):
            Object containing basic event hypocenter, origin time, magnitude.
        config (dict):
            Dictionary with gmprocess configuration information.
        directory (str):
            Path where data already exists. Must be organized in a 'raw'
            directory, within directories with names as the event ids. For
            example, if `directory` is 'proj_dir' and you have data for
            event id 'abc123' then the raw data to be read in should be
            located in `proj_dir/abc123/raw/`.
        gmprocess_version (str):
            Software version for gmprocess.

    Returns:
        tuple:
            - StreamWorkspace: Contains the event and raw streams.
            - str: Name of workspace HDF file.
            - StreamCollection: Raw data StationStreams.
            - str: Path to the rupture file.
    """

    # Make raw directory
    in_event_dir = os.path.join(directory, event.id)
    in_raw_dir = get_rawdir(in_event_dir)
    logging.debug(f"in_raw_dir: {in_raw_dir}")
    streams, unprocessed_files, unprocessed_file_errors = directory_to_streams(
        in_raw_dir, config=config)
    # Write errors to a csv file
    failures_file = Path(in_raw_dir) / "read_failures.csv"
    colnames = ["File", "Failure"]
    with open(failures_file, "w", newline="") as f:
        writer = csv.writer(f, delimiter=",", quoting=csv.QUOTE_MINIMAL)
        writer.writerow(colnames)
        for ufile, uerror in zip(unprocessed_files, unprocessed_file_errors):
            writer.writerow([ufile, uerror])

    logging.debug("streams:")
    logging.debug(streams)

    if config["read"]["use_streamcollection"]:
        stream_array = StreamCollection(streams, **config["duplicate"])
    else:
        stream_array = StreamArray(streams)

    logging.info("stream_array.describe_string():")
    logging.info(stream_array.describe_string())

    # Create the workspace file and put the unprocessed waveforms in it
    workname = os.path.join(in_event_dir, WORKSPACE_NAME)

    # Remove any existing workspace file
    if os.path.isfile(workname):
        os.remove(workname)

    workspace = StreamWorkspace(workname)
    workspace.addEvent(event)
    logging.debug("workspace.dataset.events:")
    logging.debug(workspace.dataset.events)
    workspace.addGmprocessVersion(gmprocess_version)
    workspace.addConfig()
    workspace.addStreams(event,
                         stream_array,
                         label="unprocessed",
                         gmprocess_version=gmprocess_version)
    logging.debug("workspace.dataset.waveforms.list():")
    logging.debug(workspace.dataset.waveforms.list())
    logging.debug("workspace.dataset.config")

    return workspace
def test_workspace():
    eventid = 'us1000778i'
    datafiles, event = read_data_dir('geonet', eventid, '*.V1A')
    tdir = tempfile.mkdtemp()
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=H5pyDeprecationWarning)
            warnings.filterwarnings("ignore", category=YAMLLoadWarning)
            warnings.filterwarnings("ignore", category=FutureWarning)
            config = get_config()
            tfile = os.path.join(tdir, 'test.hdf')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace(tfile)
            t1 = time.time()
            workspace.addStreams(event, raw_streams, label='raw')
            t2 = time.time()
            print('Adding %i streams took %.2f seconds' %
                  (len(raw_streams), (t2 - t1)))

            str_repr = workspace.__repr__()
            assert str_repr == 'Events: 1 Stations: 3 Streams: 3'

            eventobj = workspace.getEvent(eventid)
            assert eventobj.origins[0].latitude == event.origins[0].latitude
            assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag

            stations = workspace.getStations()
            assert sorted(stations) == ['hses', 'thz', 'wtmc']

            stations = workspace.getStations(eventid=eventid)
            assert sorted(stations) == ['hses', 'thz', 'wtmc']

            # test retrieving tags for an event that doesn't exist
            try:
                workspace.getStreamTags('foo')
            except KeyError:
                assert 1 == 1

            # test retrieving event that doesn't exist
            try:
                workspace.getEvent('foo')
            except KeyError:
                assert 1 == 1

            instream = None
            for stream in raw_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                assert 1 == 2
            outstream = workspace.getStreams(eventid,
                                             stations=['hses'],
                                             labels=['raw'])[0]
            compare_streams(instream, outstream)

            label_summary = workspace.summarizeLabels()
            assert label_summary.iloc[0]['Label'] == 'raw'
            assert label_summary.iloc[0]['Software'] == 'gmprocess'

            sc = StreamCollection(raw_streams)
            processed_streams = process_streams(sc, event, config=config)
            workspace.addStreams(event, processed_streams, 'processed')

            idlist = workspace.getEventIds()
            assert idlist[0] == eventid

            event_tags = workspace.getStreamTags(eventid)
            assert sorted(event_tags) == ['hses_processed', 'hses_raw',
                                          'thz_processed', 'thz_raw',
                                          'wtmc_processed', 'wtmc_raw']
            outstream = workspace.getStreams(eventid,
                                             stations=['hses'],
                                             labels=['processed'])[0]

            provenance = workspace.getProvenance(eventid, labels=['processed'])
            first_row = pd.Series({'Record': 'NZ.HSES.HN1',
                                   'Processing Step': 'Remove Response',
                                   'Step Attribute': 'input_units',
                                   'Attribute Value': 'counts'})

            last_row = pd.Series({'Record': 'NZ.WTMC.HNZ',
                                  'Processing Step': 'Lowpass Filter',
                                  'Step Attribute': 'number_of_passes',
                                  'Attribute Value': 2})
            assert provenance.iloc[0].equals(first_row)
            assert provenance.iloc[-1].equals(last_row)

            # compare the parameters from the input processed stream
            # to it's output equivalent
            instream = None
            for stream in processed_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                assert 1 == 2
            compare_streams(instream, outstream)
            workspace.close()

            # read in data from a second event and stash it in the workspace
            eventid = 'nz2018p115908'
            datafiles, event = read_data_dir('geonet', eventid, '*.V2A')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace.open(tfile)
            workspace.addStreams(event, raw_streams, label='foo')

            stations = workspace.getStations(eventid)

            eventids = workspace.getEventIds()
            assert eventids == ['us1000778i', 'nz2018p115908']
            instation = raw_streams[0][0].stats.station
            this_stream = workspace.getStreams(eventid,
                                               stations=[instation],
                                               labels=['foo'])[0]
            assert instation == this_stream[0].stats.station
            usid = 'us1000778i'
            inventory = workspace.getInventory(usid)
            codes = [station.code for station in inventory.networks[0].stations]
            assert sorted(codes) == ['HSES', 'THZ', 'WPWS', 'WTMC']

    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
def _test_metrics2():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))
    config['metrics']['output_imts'].append('Arias')
    config['metrics']['output_imcs'].append('arithmetic_mean')
    # Adjust checks so that streams pass checks for this test
    newconfig = drop_processing(config, ['check_sta_lta'])
    csnr = [s for s in newconfig['processing'] if 'compute_snr' in s.keys()][0]
    csnr['compute_snr']['check']['threshold'] = -10.0
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label='processed')
        workspace.calcMetrics(event.id, labels=['processed'])
        etable, imc_tables1, readmes1 = workspace.getTables('processed')
        assert 'ARITHMETIC_MEAN' not in imc_tables1
        assert 'ARITHMETIC_MEAN' not in readmes1
        del workspace.dataset.auxiliary_data.WaveFormMetrics
        del workspace.dataset.auxiliary_data.StationMetrics
        workspace.calcMetrics(event.id, labels=['processed'], config=config)
        etable2, imc_tables2, readmes2 = workspace.getTables('processed')
        assert 'ARITHMETIC_MEAN' in imc_tables2
        assert 'ARITHMETIC_MEAN' in readmes2
        assert 'ARIAS' in imc_tables2['ARITHMETIC_MEAN']
        testarray = readmes2['ARITHMETIC_MEAN']['Column header'].to_numpy()
        assert 'ARIAS' in testarray
        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
def test_metrics():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet',
                                     eventid,
                                     '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = get_config()
    # turn off sta/lta check and snr checks
    newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label='processed')
        stream1 = processed_streams[0]
        stream2 = processed_streams[1]
        summary1 = StationSummary.from_config(stream1)
        summary2 = StationSummary.from_config(stream2)
        workspace.setStreamMetrics(event.id, 'processed', summary1)
        workspace.setStreamMetrics(event.id, 'processed', summary2)
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.station,
                                                'processed')
        s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC'])
        s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC'])
        array1 = s1_df_in['Result'].as_matrix()
        array2 = s1_df_out['Result'].as_matrix()
        np.testing.assert_almost_equal(array1, array2, decimal=4)

        df = workspace.getMetricsTable(event.id)
        cmp_series = {'GREATER_OF_TWO_HORIZONTALS': 0.6787,
                      'HN1': 0.3869,
                      'HN2': 0.6787,
                      'HNZ': 0.7663}
        pga_dict = df.iloc[0]['PGA'].to_dict()
        for key, value in pga_dict.items():
            value2 = cmp_series[key]
            np.testing.assert_almost_equal(value, value2, decimal=4)

        workspace.close()
    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
Esempio n. 24
0
def test_metrics():
    eventid = "usb000syza"
    datafiles, event = read_data_dir("knet", eventid, "*")
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml"))
    # turn off sta/lta check and snr checks
    # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    # processed_streams = process_streams(raw_streams, event, config=newconfig)
    newconfig = config.copy()
    newconfig["processing"].append(
        {"NNet_QA": {
            "acceptance_threshold": 0.5,
            "model_name": "CantWell"
        }})
    processed_streams = process_streams(raw_streams.copy(),
                                        event,
                                        config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, "test.hdf")
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, raw_streams, label="raw")
        workspace.addStreams(event, processed_streams, label="processed")
        stream1 = raw_streams[0]

        # Get metrics from station summary for raw streams
        summary1 = StationSummary.from_config(stream1)
        s1_df_in = summary1.pgms.sort_values(["IMT", "IMC"])
        array1 = s1_df_in["Result"].to_numpy()

        # Compare to metrics from getStreamMetrics for raw streams
        workspace.calcMetrics(eventid, labels=["raw"])
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.network,
                                                stream1[0].stats.station,
                                                "raw")
        s1_df_out = summary1_a.pgms.sort_values(["IMT", "IMC"])
        array2 = s1_df_out["Result"].to_numpy()

        np.testing.assert_allclose(array1, array2, atol=1e-6, rtol=1e-6)
        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
Esempio n. 25
0
def _test_workspace():
    eventid = "us1000778i"
    datafiles, event = read_data_dir("geonet", eventid, "*.V1A")
    tdir = tempfile.mkdtemp()
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=H5pyDeprecationWarning)
            warnings.filterwarnings("ignore", category=YAMLError)
            warnings.filterwarnings("ignore", category=FutureWarning)
            config = update_config(
                os.path.join(datadir, "config_min_freq_0p2.yml"))
            tfile = os.path.join(tdir, "test.hdf")
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace(tfile)
            t1 = time.time()
            workspace.addStreams(event, raw_streams, label="raw")
            t2 = time.time()
            print("Adding %i streams took %.2f seconds" % (len(raw_streams),
                                                           (t2 - t1)))

            str_repr = workspace.__repr__()
            assert str_repr == "Events: 1 Stations: 3 Streams: 3"

            eventobj = workspace.getEvent(eventid)
            assert eventobj.origins[0].latitude == event.origins[0].latitude
            assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag

            stations = workspace.getStations()
            assert sorted(stations) == ["HSES", "THZ", "WTMC"]

            stations = workspace.getStations()
            assert sorted(stations) == ["HSES", "THZ", "WTMC"]

            # test retrieving event that doesn't exist
            with pytest.raises(KeyError):
                workspace.getEvent("foo")

            instream = None
            for stream in raw_streams:
                if stream[0].stats.station.lower() == "hses":
                    instream = stream
                    break
            if instream is None:
                raise ValueError("Instream should not be none.")
            outstream = workspace.getStreams(eventid,
                                             stations=["HSES"],
                                             labels=["raw"])[0]
            compare_streams(instream, outstream)

            label_summary = workspace.summarizeLabels()
            assert label_summary.iloc[0]["Label"] == "raw"
            assert label_summary.iloc[0]["Software"] == "gmprocess"

            sc = StreamCollection(raw_streams)
            processed_streams = process_streams(sc, event, config=config)
            workspace.addStreams(event, processed_streams, "processed")

            idlist = workspace.getEventIds()
            assert idlist[0] == eventid

            outstream = workspace.getStreams(eventid,
                                             stations=["HSES"],
                                             labels=["processed"])[0]

            provenance = workspace.getProvenance(eventid, labels=["processed"])
            first_row = pd.Series({
                "Record": "NZ.HSES.--.HN1_us1000778i_processed",
                "Processing Step": "Remove Response",
                "Step Attribute": "input_units",
                "Attribute Value": "counts",
            })

            last_row = pd.Series({
                "Record": "NZ.WTMC.--.HNZ_us1000778i_processed",
                "Processing Step": "Lowpass Filter",
                "Step Attribute": "number_of_passes",
                "Attribute Value": 2,
            })
            assert provenance.iloc[0].equals(first_row)
            assert provenance.iloc[-1].equals(last_row)

            # compare the parameters from the input processed stream
            # to it's output equivalent
            instream = None
            for stream in processed_streams:
                if stream[0].stats.station.lower() == "hses":
                    instream = stream
                    break
            if instream is None:
                raise ValueError("Instream should not be none.")
            compare_streams(instream, outstream)
            workspace.close()

            # read in data from a second event and stash it in the workspace
            eventid = "nz2018p115908"
            datafiles, event = read_data_dir("geonet", eventid, "*.V2A")
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace.open(tfile)
            workspace.addStreams(event, raw_streams, label="foo")

            stations = workspace.getStations()

            eventids = workspace.getEventIds()
            assert eventids == ["us1000778i", "nz2018p115908"]
            instation = raw_streams[0][0].stats.station
            this_stream = workspace.getStreams(eventid,
                                               stations=[instation],
                                               labels=["foo"])[0]
            assert instation == this_stream[0].stats.station
            usid = "us1000778i"
            inventory = workspace.getInventory(usid)
            workspace.close()
            codes = [
                station.code for station in inventory.networks[0].stations
            ]
            assert sorted(set(codes)) == ["HSES", "THZ", "WPWS", "WTMC"]

    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
def _test_colocated():
    eventid = 'ci38445975'
    datafiles, event = read_data_dir('fdsn', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config_file = os.path.join(datadir, 'test_config.yml')
    with open(config_file, 'r', encoding='utf-8') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    processed_streams = process_streams(raw_streams, event, config=config)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        ws = StreamWorkspace(tfile)
        ws.addEvent(event)
        ws.addStreams(event, raw_streams, label='raw')
        ws.addStreams(event, processed_streams, label='processed')
        ws.calcMetrics(eventid, labels=['processed'], config=config)
        stasum = ws.getStreamMetrics(eventid, 'CI', 'MIKB', 'processed')
        np.testing.assert_allclose(
            stasum.get_pgm('duration', 'geometric_mean'), 38.94480068)
        ws.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
def _test_vs30_dist_metrics():
    KNOWN_DISTANCES = {
        'epicentral': 5.1,
        'hypocentral': 10.2,
        'rupture': 2.21,
        'rupture_var': np.nan,
        'joyner_boore': 2.21,
        'joyner_boore_var': np.nan,
        'gc2_rx': 2.66,
        'gc2_ry': 3.49,
        'gc2_ry0': 0.00,
        'gc2_U': 34.34,
        'gc2_T': 2.66
    }
    KNOWN_BAZ = 239.46
    KNOWN_VS30 = 331.47

    eventid = 'ci38457511'
    datafiles, event = read_data_dir('fdsn', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))
    processed_streams = process_streams(raw_streams, event, config=config)
    rupture_file = get_rupture_file(datadir)
    grid_file = os.path.join(datadir, 'test_grid.grd')
    config['metrics']['vs30'] = {
        'vs30': {
            'file': grid_file,
            'column_header': 'GlobalVs30',
            'readme_entry': 'GlobalVs30',
            'units': 'm/s'
        }
    }
    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        ws = StreamWorkspace(tfile)
        ws.addEvent(event)
        ws.addStreams(event, raw_streams, label='raw')
        ws.addStreams(event, processed_streams, label='processed')
        ws.calcMetrics(event.id,
                       rupture_file=rupture_file,
                       labels=['processed'],
                       config=config)
        sta_sum = ws.getStreamMetrics(event.id, 'CI', 'CLC', 'processed')

        for dist in sta_sum.distances:
            np.testing.assert_allclose(sta_sum.distances[dist],
                                       KNOWN_DISTANCES[dist],
                                       rtol=0.01)
        np.testing.assert_allclose(sta_sum._back_azimuth, KNOWN_BAZ, rtol=0.01)
        np.testing.assert_allclose(sta_sum._vs30['vs30']['value'],
                                   KNOWN_VS30,
                                   rtol=0.01)
        event_df, imc_tables, readme_tables = ws.getTables('processed')
        ws.close()
        check_cols = set([
            'EpicentralDistance', 'HypocentralDistance', 'RuptureDistance',
            'RuptureDistanceVar', 'JoynerBooreDistance',
            'JoynerBooreDistanceVar', 'GC2_rx', 'GC2_ry', 'GC2_ry0', 'GC2_U',
            'GC2_T', 'GlobalVs30', 'BackAzimuth'
        ])
        assert check_cols.issubset(set(readme_tables['Z']['Column header']))
        assert check_cols.issubset(set(imc_tables['Z'].columns))
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
Esempio n. 28
0
def _test_vs30_dist_metrics():
    KNOWN_DISTANCES = {
        "epicentral": 5.1,
        "hypocentral": 10.2,
        "rupture": 2.21,
        "rupture_var": np.nan,
        "joyner_boore": 2.21,
        "joyner_boore_var": np.nan,
        "gc2_rx": 2.66,
        "gc2_ry": 3.49,
        "gc2_ry0": 0.00,
        "gc2_U": 34.34,
        "gc2_T": 2.66,
    }
    KNOWN_BAZ = 239.46
    KNOWN_VS30 = 331.47

    eventid = "ci38457511"
    datafiles, event = read_data_dir("fdsn", eventid, "*")
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml"))
    processed_streams = process_streams(raw_streams, event, config=config)
    rupture_file = get_rupture_file(datadir)
    grid_file = os.path.join(datadir, "test_grid.grd")
    config["metrics"]["vs30"] = {
        "vs30": {
            "file": grid_file,
            "column_header": "GlobalVs30",
            "readme_entry": "GlobalVs30",
            "units": "m/s",
        }
    }
    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, "test.hdf")
        ws = StreamWorkspace(tfile)
        ws.addEvent(event)
        ws.addStreams(event, raw_streams, label="raw")
        ws.addStreams(event, processed_streams, label="processed")
        ws.calcMetrics(event.id,
                       rupture_file=rupture_file,
                       labels=["processed"],
                       config=config)
        sta_sum = ws.getStreamMetrics(event.id, "CI", "CLC", "processed")

        for dist in sta_sum.distances:
            np.testing.assert_allclose(sta_sum.distances[dist],
                                       KNOWN_DISTANCES[dist],
                                       rtol=0.01)
        np.testing.assert_allclose(sta_sum._back_azimuth, KNOWN_BAZ, rtol=0.01)
        np.testing.assert_allclose(sta_sum._vs30["vs30"]["value"],
                                   KNOWN_VS30,
                                   rtol=0.01)
        event_df, imc_tables, readme_tables = ws.getTables("processed")
        ws.close()
        check_cols = set([
            "EpicentralDistance",
            "HypocentralDistance",
            "RuptureDistance",
            "RuptureDistanceVar",
            "JoynerBooreDistance",
            "JoynerBooreDistanceVar",
            "GC2_rx",
            "GC2_ry",
            "GC2_ry0",
            "GC2_U",
            "GC2_T",
            "GlobalVs30",
            "BackAzimuth",
        ])
        assert check_cols.issubset(set(readme_tables["Z"]["Column header"]))
        assert check_cols.issubset(set(imc_tables["Z"].columns))
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
Esempio n. 29
0
def _test_colocated():
    eventid = "ci38445975"
    datafiles, event = read_data_dir("fdsn", eventid, "*")
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config_file = os.path.join(datadir, "test_config.yml")
    with open(config_file, "r", encoding="utf-8") as f:
        yaml = YAML()
        yaml.preserve_quotes = True
        config = yaml.load(f)
    processed_streams = process_streams(raw_streams, event, config=config)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, "test.hdf")
        ws = StreamWorkspace(tfile)
        ws.addEvent(event)
        ws.addStreams(event, raw_streams, label="raw")
        ws.addStreams(event, processed_streams, label="processed")
        ws.calcMetrics(eventid, labels=["processed"], config=config)
        stasum = ws.getStreamMetrics(eventid, "CI", "MIKB", "processed")
        np.testing.assert_allclose(
            stasum.get_pgm("duration", "geometric_mean"), 38.94480068)
        ws.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
def test_metrics():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = get_config()
    # turn off sta/lta check and snr checks
    # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    # processed_streams = process_streams(raw_streams, event, config=newconfig)
    newconfig = config.copy()
    newconfig['processing'].append(
        {'NNet_QA': {
            'acceptance_threshold': 0.5,
            'model_name': 'CantWell'
        }})
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, raw_streams, label='raw')
        workspace.addStreams(event, processed_streams, label='processed')
        stream1 = raw_streams[0]
        summary1 = StationSummary.from_config(stream1)
        s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC'])
        array1 = s1_df_in['Result'].as_matrix()
        workspace.calcStreamMetrics(eventid, labels=['raw'])
        workspace.calcStationMetrics(event.id, labels=['raw'])
        pstreams2 = workspace.getStreams(event.id, labels=['processed'])
        assert pstreams2[0].getStreamParamKeys() == ['nnet_qa']
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.network,
                                                stream1[0].stats.station,
                                                'raw')
        s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC'])
        array2 = s1_df_out['Result'].as_matrix()
        np.testing.assert_almost_equal(array1, array2, decimal=4)

        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
def test_metrics():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))
    # turn off sta/lta check and snr checks
    # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    # processed_streams = process_streams(raw_streams, event, config=newconfig)
    newconfig = config.copy()
    newconfig['processing'].append(
        {'NNet_QA': {
            'acceptance_threshold': 0.5,
            'model_name': 'CantWell'
        }})
    processed_streams = process_streams(raw_streams.copy(),
                                        event,
                                        config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, raw_streams, label='raw')
        workspace.addStreams(event, processed_streams, label='processed')
        stream1 = raw_streams[0]

        # Get metrics from station summary for raw streams
        summary1 = StationSummary.from_config(stream1)
        s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC'])
        array1 = s1_df_in['Result'].to_numpy()

        # Compare to metrics from getStreamMetrics for raw streams
        workspace.calcMetrics(eventid, labels=['raw'])
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.network,
                                                stream1[0].stats.station,
                                                'raw')
        s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC'])
        array2 = s1_df_out['Result'].to_numpy()

        np.testing.assert_allclose(array1, array2, atol=1e-6, rtol=1e-6)
        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
Esempio n. 32
0
def _test_metrics2():
    eventid = "usb000syza"
    datafiles, event = read_data_dir("knet", eventid, "*")
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml"))
    config["metrics"]["output_imts"].append("Arias")
    config["metrics"]["output_imcs"].append("arithmetic_mean")
    # Adjust checks so that streams pass checks for this test
    newconfig = drop_processing(config, ["check_sta_lta"])
    csnr = [s for s in newconfig["processing"] if "compute_snr" in s.keys()][0]
    csnr["compute_snr"]["check"]["threshold"] = -10.0
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, "test.hdf")
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label="processed")
        workspace.calcMetrics(event.id, labels=["processed"])
        etable, imc_tables1, readmes1 = workspace.getTables("processed")
        assert "ARITHMETIC_MEAN" not in imc_tables1
        assert "ARITHMETIC_MEAN" not in readmes1
        del workspace.dataset.auxiliary_data.WaveFormMetrics
        del workspace.dataset.auxiliary_data.StationMetrics
        workspace.calcMetrics(event.id, labels=["processed"], config=config)
        etable2, imc_tables2, readmes2 = workspace.getTables("processed")
        assert "ARITHMETIC_MEAN" in imc_tables2
        assert "ARITHMETIC_MEAN" in readmes2
        assert "ARIAS" in imc_tables2["ARITHMETIC_MEAN"]
        testarray = readmes2["ARITHMETIC_MEAN"]["Column header"].to_numpy()
        assert "ARIAS" in testarray
        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
def _test_workspace():
    eventid = 'us1000778i'
    datafiles, event = read_data_dir('geonet', eventid, '*.V1A')
    tdir = tempfile.mkdtemp()
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=H5pyDeprecationWarning)
            warnings.filterwarnings("ignore", category=YAMLLoadWarning)
            warnings.filterwarnings("ignore", category=FutureWarning)
            config = update_config(
                os.path.join(datadir, 'config_min_freq_0p2.yml'))
            tfile = os.path.join(tdir, 'test.hdf')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace(tfile)
            t1 = time.time()
            workspace.addStreams(event, raw_streams, label='raw')
            t2 = time.time()
            print('Adding %i streams took %.2f seconds' % (len(raw_streams),
                                                           (t2 - t1)))

            str_repr = workspace.__repr__()
            assert str_repr == 'Events: 1 Stations: 3 Streams: 3'

            eventobj = workspace.getEvent(eventid)
            assert eventobj.origins[0].latitude == event.origins[0].latitude
            assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag

            stations = workspace.getStations()
            assert sorted(stations) == ['HSES', 'THZ', 'WTMC']

            stations = workspace.getStations(eventid=eventid)
            assert sorted(stations) == ['HSES', 'THZ', 'WTMC']

            # test retrieving event that doesn't exist
            with pytest.raises(KeyError):
                workspace.getEvent('foo')

            instream = None
            for stream in raw_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                raise ValueError('Instream should not be none.')
            outstream = workspace.getStreams(eventid,
                                             stations=['HSES'],
                                             labels=['raw'])[0]
            compare_streams(instream, outstream)

            label_summary = workspace.summarizeLabels()
            assert label_summary.iloc[0]['Label'] == 'raw'
            assert label_summary.iloc[0]['Software'] == 'gmprocess'

            sc = StreamCollection(raw_streams)
            processed_streams = process_streams(sc, event, config=config)
            workspace.addStreams(event, processed_streams, 'processed')

            idlist = workspace.getEventIds()
            assert idlist[0] == eventid

            outstream = workspace.getStreams(eventid,
                                             stations=['HSES'],
                                             labels=['processed'])[0]

            provenance = workspace.getProvenance(eventid, labels=['processed'])
            first_row = pd.Series({
                'Record': 'NZ.HSES.--.HN1_us1000778i_processed',
                'Processing Step': 'Remove Response',
                'Step Attribute': 'input_units',
                'Attribute Value': 'counts'
            })

            last_row = pd.Series({
                'Record': 'NZ.WTMC.--.HNZ_us1000778i_processed',
                'Processing Step': 'Lowpass Filter',
                'Step Attribute': 'number_of_passes',
                'Attribute Value': 2
            })
            assert provenance.iloc[0].equals(first_row)
            assert provenance.iloc[-1].equals(last_row)

            # compare the parameters from the input processed stream
            # to it's output equivalent
            instream = None
            for stream in processed_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                raise ValueError('Instream should not be none.')
            compare_streams(instream, outstream)
            workspace.close()

            # read in data from a second event and stash it in the workspace
            eventid = 'nz2018p115908'
            datafiles, event = read_data_dir('geonet', eventid, '*.V2A')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace.open(tfile)
            workspace.addStreams(event, raw_streams, label='foo')

            stations = workspace.getStations(eventid)

            eventids = workspace.getEventIds()
            assert eventids == ['us1000778i', 'nz2018p115908']
            instation = raw_streams[0][0].stats.station
            this_stream = workspace.getStreams(eventid,
                                               stations=[instation],
                                               labels=['foo'])[0]
            assert instation == this_stream[0].stats.station
            usid = 'us1000778i'
            inventory = workspace.getInventory(usid)
            workspace.close()
            codes = [
                station.code for station in inventory.networks[0].stations
            ]
            assert sorted(set(codes)) == ['HSES', 'THZ', 'WPWS', 'WTMC']

    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)