Exemple #1
0
def generate_workspace():
    """Generate simple HDF5 with ASDF layout for testing."""
    PCOMMANDS = [
        "assemble",
        "process",
    ]
    EVENTID = "us1000778i"
    LABEL = "ptest"
    datafiles, event = read_data_dir("geonet", EVENTID, "*.V1A")

    tdir = tempfile.mkdtemp()
    tfilename = os.path.join(tdir, "workspace.h5")

    raw_data = []
    for dfile in datafiles:
        raw_data += read_data(dfile)
    write_asdf(tfilename, raw_data, event, label="unprocessed")
    del raw_data

    config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml"))

    workspace = StreamWorkspace.open(tfilename)
    raw_streams = workspace.getStreams(EVENTID,
                                       labels=["unprocessed"],
                                       config=config)
    pstreams = process_streams(raw_streams, event, config=config)
    workspace.addStreams(event, pstreams, label=LABEL)
    workspace.calcMetrics(event.id, labels=[LABEL], config=config)

    return tfilename
Exemple #2
0
def test_zero_crossings():
    datapath = os.path.join("data", "testdata", "zero_crossings")
    datadir = pkg_resources.resource_filename("gmprocess", datapath)
    sc = StreamCollection.from_directory(datadir)
    sc.describe()

    conf = get_config()

    update = {
        "processing": [
            {"detrend": {"detrending_method": "demean"}},
            {"check_zero_crossings": {"min_crossings": 1}},
        ]
    }
    update_dict(conf, update)

    edict = {
        "id": "ak20419010",
        "time": UTCDateTime("2018-11-30T17:29:29"),
        "lat": 61.346,
        "lon": -149.955,
        "depth": 46.7,
        "magnitude": 7.1,
    }
    event = get_event_object(edict)
    test = process_streams(sc, event, conf)
    for st in test:
        for tr in st:
            assert tr.hasParameter("ZeroCrossingRate")
    np.testing.assert_allclose(
        test[0][0].getParameter("ZeroCrossingRate")["crossing_rate"],
        0.008888888888888889,
        atol=1e-5,
    )
def test_process_streams():
    # Loma Prieta test station (nc216859)

    data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    sc.describe()

    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))

    test = process_streams(sc, origin, config=config)

    logging.info('Testing trace: %s' % test[0][1])

    assert len(test) == 3
    assert len(test[0]) == 3
    assert len(test[1]) == 3
    assert len(test[2]) == 3

    # Apparently the traces end up in a different order on the Travis linux
    # container than on my local mac. So testing individual traces need to
    # not care about trace order.

    trace_maxes = np.sort(
        [np.max(np.abs(t.data)) for t in test.select(station='HSES')[0]])

    np.testing.assert_allclose(trace_maxes,
                               np.array(
                                   [157.81975508, 240.33718094, 263.67804256]),
                               rtol=1e-5)
def test_allow_nans():
    dpath = os.path.join("data", "testdata", "fdsn", "uu60363602")
    datadir = pkg_resources.resource_filename("gmprocess", dpath)
    sc = StreamCollection.from_directory(datadir)
    origin = read_event_json_files([os.path.join(datadir, "event.json")])[0]
    psc = process_streams(sc, origin)
    st = psc[0]

    ss = StationSummary.from_stream(
        st,
        components=["quadratic_mean"],
        imts=["FAS(4.0)"],
        bandwidth=300,
        allow_nans=True,
    )
    assert np.isnan(ss.pgms.Result).all()

    ss = StationSummary.from_stream(
        st,
        components=["quadratic_mean"],
        imts=["FAS(4.0)"],
        bandwidth=189,
        allow_nans=False,
    )
    assert ~np.isnan(ss.pgms.Result).all()
Exemple #5
0
def _test_colocated():
    eventid = "ci38445975"
    datafiles, event = read_data_dir("fdsn", eventid, "*")
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config_file = os.path.join(datadir, "test_config.yml")
    with open(config_file, "r", encoding="utf-8") as f:
        yaml = YAML()
        yaml.preserve_quotes = True
        config = yaml.load(f)
    processed_streams = process_streams(raw_streams, event, config=config)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, "test.hdf")
        ws = StreamWorkspace(tfile)
        ws.addEvent(event)
        ws.addStreams(event, raw_streams, label="raw")
        ws.addStreams(event, processed_streams, label="processed")
        ws.calcMetrics(eventid, labels=["processed"], config=config)
        stasum = ws.getStreamMetrics(eventid, "CI", "MIKB", "processed")
        np.testing.assert_allclose(
            stasum.get_pgm("duration", "geometric_mean"), 38.94480068)
        ws.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
Exemple #6
0
    def _process_event(self, event):
        event_dir = os.path.join(self.gmrecords.data_path, event.id)
        workname = os.path.join(event_dir, WORKSPACE_NAME)
        if not os.path.isfile(workname):
            logging.info('No workspace file found for event %s. Please run '
                         'subcommand \'assemble\' to generate workspace file.')
            logging.info('Continuing to next event.')
            return event.id

        workspace = StreamWorkspace.open(workname)
        rstreams = workspace.getStreams(event.id,
                                        labels=['unprocessed'],
                                        config=self.gmrecords.conf)

        if len(rstreams):
            logging.info('Processing \'%s\' streams for event %s...' %
                         ('unprocessed', event.id))
            pstreams = process_streams(rstreams,
                                       event,
                                       config=self.gmrecords.conf)
            workspace.addStreams(event, pstreams, label=self.process_tag)
        else:
            logging.info('No streams found. Nothing to do. Goodbye.')

        workspace.close()
        return event.id
Exemple #7
0
def generate_workspace():
    """Generate simple HDF5 with ASDF layout for testing.
    """
    PCOMMANDS = [
        'assemble',
        'process',
    ]
    EVENTID = 'us1000778i'
    LABEL = 'ptest'
    datafiles, event = read_data_dir('geonet', EVENTID, '*.V1A')

    tdir = tempfile.mkdtemp()
    tfilename = os.path.join(tdir, 'workspace.h5')

    raw_data = []
    for dfile in datafiles:
        raw_data += read_data(dfile)
    write_asdf(tfilename, raw_data, event, label="unprocessed")
    del raw_data

    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))

    workspace = StreamWorkspace.open(tfilename)
    raw_streams = workspace.getStreams(EVENTID, labels=['unprocessed'])
    pstreams = process_streams(raw_streams, event, config=config)
    workspace.addStreams(event, pstreams, label=LABEL)
    workspace.calcMetrics(event.id, labels=[LABEL], config=config)

    return tfilename
Exemple #8
0
def test_process_streams():
    # Loma Prieta test station (nc216859)

    data_files, origin = read_data_dir("geonet", "us1000778i", "*.V1A")
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    sc.describe()

    config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml"))

    test = process_streams(sc, origin, config=config)

    logging.info(f"Testing trace: {test[0][1]}")

    assert len(test) == 3
    assert len(test[0]) == 3
    assert len(test[1]) == 3
    assert len(test[2]) == 3

    # Apparently the traces end up in a different order on the Travis linux
    # container than on my local mac. So testing individual traces need to
    # not care about trace order.

    trace_maxes = np.sort(
        [np.max(np.abs(t.data)) for t in test.select(station="HSES")[0]])

    np.testing.assert_allclose(trace_maxes,
                               np.array([157.812449, 240.379521, 263.601519]),
                               rtol=1e-5)
def _test_colocated():
    eventid = 'ci38445975'
    datafiles, event = read_data_dir('fdsn', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config_file = os.path.join(datadir, 'test_config.yml')
    with open(config_file, 'r', encoding='utf-8') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    processed_streams = process_streams(raw_streams, event, config=config)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        ws = StreamWorkspace(tfile)
        ws.addEvent(event)
        ws.addStreams(event, raw_streams, label='raw')
        ws.addStreams(event, processed_streams, label='processed')
        ws.calcMetrics(eventid, labels=['processed'], config=config)
        stasum = ws.getStreamMetrics(eventid, 'CI', 'MIKB', 'processed')
        np.testing.assert_allclose(
            stasum.get_pgm('duration', 'geometric_mean'), 38.94480068)
        ws.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
Exemple #10
0
def test():
    # Read in data with only one stationxml entry
    data_files, origin = read_data_dir("station_xml_epochs", "nc73631381",
                                       "*.mseed")
    test_root = os.path.normpath(os.path.join(data_files[0], os.pardir))
    sc = StreamCollection.from_directory(test_root)
    psc = process_streams(sc, origin)

    # Read in data with all dates in stationxml
    data_files, origin = read_data_dir("station_xml_epochs", "nc73631381_ad",
                                       "*.mseed")
    test_root = os.path.normpath(os.path.join(data_files[0], os.pardir))
    sc_ad = StreamCollection.from_directory(test_root)
    psc_ad = process_streams(sc_ad, origin)

    single_maxes = np.sort([np.max(tr.data) for tr in psc[0]])
    alldates_maxes = np.sort([np.max(tr.data) for tr in psc_ad[0]])
    assert_allclose(single_maxes, alldates_maxes)
Exemple #11
0
def test_weird_sensitivity():
    datafiles, origin = read_data_dir("fdsn", "us70008dx7", "SL.KOGS*.mseed")
    streams = []
    for datafile in datafiles:
        streams += read_obspy(datafile)
    sc = StreamCollection(streams)
    psc = process_streams(sc, origin)
    channel = psc[0].select(component="E")[0]
    assert_almost_equal(channel.data.max(), 62900.197618074293)
def test_weird_sensitivity():
    datafiles, origin = read_data_dir('fdsn', 'us70008dx7', 'SL.KOGS*.mseed')
    streams = []
    for datafile in datafiles:
        streams += read_obspy(datafile)
    sc = StreamCollection(streams)
    psc = process_streams(sc, origin)
    channel = psc[0].select(component='E')[0]
    assert_almost_equal(channel.data.max(), 62900.191900393373)
Exemple #13
0
def test_metrics():
    eventid = "usb000syza"
    datafiles, event = read_data_dir("knet", eventid, "*")
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml"))
    # turn off sta/lta check and snr checks
    # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    # processed_streams = process_streams(raw_streams, event, config=newconfig)
    newconfig = config.copy()
    newconfig["processing"].append(
        {"NNet_QA": {
            "acceptance_threshold": 0.5,
            "model_name": "CantWell"
        }})
    processed_streams = process_streams(raw_streams.copy(),
                                        event,
                                        config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, "test.hdf")
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, raw_streams, label="raw")
        workspace.addStreams(event, processed_streams, label="processed")
        stream1 = raw_streams[0]

        # Get metrics from station summary for raw streams
        summary1 = StationSummary.from_config(stream1)
        s1_df_in = summary1.pgms.sort_values(["IMT", "IMC"])
        array1 = s1_df_in["Result"].to_numpy()

        # Compare to metrics from getStreamMetrics for raw streams
        workspace.calcMetrics(eventid, labels=["raw"])
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.network,
                                                stream1[0].stats.station,
                                                "raw")
        s1_df_out = summary1_a.pgms.sort_values(["IMT", "IMC"])
        array2 = s1_df_out["Result"].to_numpy()

        np.testing.assert_allclose(array1, array2, atol=1e-6, rtol=1e-6)
        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
Exemple #14
0
def test_nnet():

    conf = get_config()

    update = {
        "processing": [
            {
                "detrend": {
                    "detrending_method": "demean"
                }
            },
            {
                "detrend": {
                    "detrending_method": "linear"
                }
            },
            {
                "compute_snr": {
                    "bandwidth": 20.0,
                    "check": {
                        "max_freq": 5.0,
                        "min_freq": 0.2,
                        "threshold": 3.0
                    },
                }
            },
            {
                "NNet_QA": {
                    "acceptance_threshold": 0.5,
                    "model_name": "CantWell"
                }
            },
        ]
    }
    update_dict(conf, update)

    data_files, origin = read_data_dir("geonet", "us1000778i", "*.V1A")
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)
    test = process_streams(sc, origin, conf)
    tstream = test.select(station="HSES")[0]
    nnet_dict = tstream.getStreamParam("nnet_qa")
    np.testing.assert_allclose(nnet_dict["score_HQ"],
                               0.99321798811740059,
                               rtol=1e-3)
def test_metrics():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))
    # turn off sta/lta check and snr checks
    # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    # processed_streams = process_streams(raw_streams, event, config=newconfig)
    newconfig = config.copy()
    newconfig['processing'].append(
        {'NNet_QA': {
            'acceptance_threshold': 0.5,
            'model_name': 'CantWell'
        }})
    processed_streams = process_streams(raw_streams.copy(),
                                        event,
                                        config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, raw_streams, label='raw')
        workspace.addStreams(event, processed_streams, label='processed')
        stream1 = raw_streams[0]

        # Get metrics from station summary for raw streams
        summary1 = StationSummary.from_config(stream1)
        s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC'])
        array1 = s1_df_in['Result'].to_numpy()

        # Compare to metrics from getStreamMetrics for raw streams
        workspace.calcMetrics(eventid, labels=['raw'])
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.network,
                                                stream1[0].stats.station,
                                                'raw')
        s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC'])
        array2 = s1_df_out['Result'].to_numpy()

        np.testing.assert_allclose(array1, array2, atol=1e-6, rtol=1e-6)
        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
Exemple #16
0
def test_check_instrument():
    data_files, origin = read_data_dir("fdsn", "nc51194936", "*.mseed")
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)
    sc.describe()

    config = update_config(os.path.join(datadir,
                                        "config_test_check_instr.yml"))
    test = process_streams(sc, origin, config=config)

    for sta, expected in [("CVS", True), ("GASB", True), ("SBT", False)]:
        st = test.select(station=sta)[0]
        logging.info(f"Testing stream: {st}")
        assert st.passed == expected
def test_check_instrument():
    data_files, origin = read_data_dir('fdsn', 'nc51194936', '*.mseed')
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)
    sc.describe()

    config = update_config(os.path.join(datadir,
                                        'config_test_check_instr.yml'))
    test = process_streams(sc, origin, config=config)

    for sta, expected in [('CVS', True), ('GASB', True), ('SBT', False)]:
        st = test.select(station=sta)[0]
        logging.info('Testing stream: %s' % st)
        assert st.passed == expected
Exemple #18
0
def test_nnet():

    conf = get_config()

    update = {
        'processing': [{
            'detrend': {
                'detrending_method': 'demean'
            }
        }, {
            'detrend': {
                'detrending_method': 'linear'
            }
        }, {
            'compute_snr': {
                'bandwidth': 20.0,
                'check': {
                    'max_freq': 5.0,
                    'min_freq': 0.2,
                    'threshold': 3.0
                }
            }
        }, {
            'NNet_QA': {
                'acceptance_threshold': 0.5,
                'model_name': 'CantWell'
            }
        }]
    }
    update_dict(conf, update)

    data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)
    test = process_streams(sc, origin, conf)
    tstream = test.select(station='HSES')[0]
    nnet_dict = tstream.getStreamParam('nnet_qa')
    np.testing.assert_allclose(nnet_dict['score_HQ'],
                               0.99321798811740059,
                               rtol=1e-3)
def test_allow_nans():
    dpath = os.path.join('data', 'testdata', 'fdsn', 'uu60363602')
    datadir = pkg_resources.resource_filename('gmprocess', dpath)
    sc = StreamCollection.from_directory(datadir)
    origin = read_event_json_files([os.path.join(datadir, 'event.json')])[0]
    psc = process_streams(sc, origin)
    st = psc[0]

    ss = StationSummary.from_stream(st,
                                    components=['quadratic_mean'],
                                    imts=['FAS(4.0)'],
                                    bandwidth=189,
                                    allow_nans=True)
    assert np.isnan(ss.pgms.Result).all()

    ss = StationSummary.from_stream(st,
                                    components=['quadratic_mean'],
                                    imts=['FAS(4.0)'],
                                    bandwidth=189,
                                    allow_nans=False)
    assert ~np.isnan(ss.pgms.Result).all()
def _test_metrics2():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))
    config['metrics']['output_imts'].append('Arias')
    config['metrics']['output_imcs'].append('arithmetic_mean')
    # Adjust checks so that streams pass checks for this test
    newconfig = drop_processing(config, ['check_sta_lta'])
    csnr = [s for s in newconfig['processing'] if 'compute_snr' in s.keys()][0]
    csnr['compute_snr']['check']['threshold'] = -10.0
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label='processed')
        workspace.calcMetrics(event.id, labels=['processed'])
        etable, imc_tables1, readmes1 = workspace.getTables('processed')
        assert 'ARITHMETIC_MEAN' not in imc_tables1
        assert 'ARITHMETIC_MEAN' not in readmes1
        del workspace.dataset.auxiliary_data.WaveFormMetrics
        del workspace.dataset.auxiliary_data.StationMetrics
        workspace.calcMetrics(event.id, labels=['processed'], config=config)
        etable2, imc_tables2, readmes2 = workspace.getTables('processed')
        assert 'ARITHMETIC_MEAN' in imc_tables2
        assert 'ARITHMETIC_MEAN' in readmes2
        assert 'ARIAS' in imc_tables2['ARITHMETIC_MEAN']
        testarray = readmes2['ARITHMETIC_MEAN']['Column header'].to_numpy()
        assert 'ARIAS' in testarray
        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
def test_zero_crossings():
    datapath = os.path.join('data', 'testdata', 'zero_crossings')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)
    sc = StreamCollection.from_directory(datadir)
    sc.describe()

    conf = get_config()

    update = {
        'processing': [{
            'detrend': {
                'detrending_method': 'demean'
            }
        }, {
            'check_zero_crossings': {
                'min_crossings': 1
            }
        }]
    }
    update_dict(conf, update)

    edict = {
        'id': 'ak20419010',
        'time': UTCDateTime('2018-11-30T17:29:29'),
        'lat': 61.346,
        'lon': -149.955,
        'depth': 46.7,
        'magnitude': 7.1
    }
    event = get_event_object(edict)
    test = process_streams(sc, event, conf)
    for st in test:
        for tr in st:
            assert tr.hasParameter('ZeroCrossingRate')
    np.testing.assert_allclose(
        test[0][0].getParameter('ZeroCrossingRate')['crossing_rate'],
        0.008888888888888889,
        atol=1e-5)
Exemple #22
0
def _test_metrics2():
    eventid = "usb000syza"
    datafiles, event = read_data_dir("knet", eventid, "*")
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml"))
    config["metrics"]["output_imts"].append("Arias")
    config["metrics"]["output_imcs"].append("arithmetic_mean")
    # Adjust checks so that streams pass checks for this test
    newconfig = drop_processing(config, ["check_sta_lta"])
    csnr = [s for s in newconfig["processing"] if "compute_snr" in s.keys()][0]
    csnr["compute_snr"]["check"]["threshold"] = -10.0
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, "test.hdf")
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label="processed")
        workspace.calcMetrics(event.id, labels=["processed"])
        etable, imc_tables1, readmes1 = workspace.getTables("processed")
        assert "ARITHMETIC_MEAN" not in imc_tables1
        assert "ARITHMETIC_MEAN" not in readmes1
        del workspace.dataset.auxiliary_data.WaveFormMetrics
        del workspace.dataset.auxiliary_data.StationMetrics
        workspace.calcMetrics(event.id, labels=["processed"], config=config)
        etable2, imc_tables2, readmes2 = workspace.getTables("processed")
        assert "ARITHMETIC_MEAN" in imc_tables2
        assert "ARITHMETIC_MEAN" in readmes2
        assert "ARIAS" in imc_tables2["ARITHMETIC_MEAN"]
        testarray = readmes2["ARITHMETIC_MEAN"]["Column header"].to_numpy()
        assert "ARIAS" in testarray
        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
Exemple #23
0
def test_free_field():
    data_files, origin = read_data_dir("kiknet", "usp000hzq8")
    raw_streams = []
    for dfile in data_files:
        raw_streams += read_data(dfile)

    sc = StreamCollection(raw_streams)

    processed_streams = process_streams(sc, origin)

    # all of these streams should have failed for different reasons
    npassed = np.sum([pstream.passed for pstream in processed_streams])
    assert npassed == 0
    for pstream in processed_streams:
        is_free = pstream[0].free_field
        reason = ""
        for trace in pstream:
            if trace.hasParameter("failure"):
                reason = trace.getParameter("failure")["reason"]
                break
        if is_free:
            assert reason.startswith("Failed")
        else:
            assert reason == "Failed free field sensor check."
Exemple #24
0
def test_lowpass_max():
    datapath = os.path.join('data', 'testdata', 'lowpass_max')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)
    sc = StreamCollection.from_directory(datadir)
    sc.describe()

    conf = get_config()
    update = {
        'processing': [
            {
                'detrend': {
                    'detrending_method': 'demean'
                }
            },
            {
                'remove_response': {
                    'f1': 0.001,
                    'f2': 0.005,
                    'f3': None,
                    'f4': None,
                    'output': 'ACC',
                    'water_level': 60
                }
            },
            #            {'detrend': {'detrending_method': 'linear'}},
            #            {'detrend': {'detrending_method': 'demean'}},
            {
                'get_corner_frequencies': {
                    'constant': {
                        'highpass': 0.08,
                        'lowpass': 20.0
                    },
                    'method': 'constant',
                    'snr': {
                        'same_horiz': True
                    }
                }
            },
            {
                'lowpass_max_frequency': {
                    'fn_fac': 0.9
                }
            }
        ]
    }
    update_dict(conf, update)
    update = {
        'windows': {
            'signal_end': {
                'method': 'model',
                'vmin': 1.0,
                'floor': 120,
                'model': 'AS16',
                'epsilon': 2.0
            },
            'window_checks': {
                'do_check': False,
                'min_noise_duration': 1.0,
                'min_signal_duration': 1.0
            }
        }
    }
    update_dict(conf, update)
    edict = {
        'id': 'ci38038071',
        'time': UTCDateTime('2018-08-30 02:35:36'),
        'lat': 34.136,
        'lon': -117.775,
        'depth': 5.5,
        'magnitude': 4.4
    }
    event = get_event_object(edict)
    test = process_streams(sc, event, conf)
    for st in test:
        for tr in st:
            freq_dict = tr.getParameter('corner_frequencies')
            np.testing.assert_allclose(freq_dict['lowpass'], 18.0)
Exemple #25
0
def process_event(event,
                  outdir,
                  pcommands,
                  config,
                  input_directory,
                  process_tag,
                  files_created,
                  output_format,
                  status,
                  recompute_metrics,
                  export_dir=None):

    # setup logging to write to the input logfile
    argthing = namedtuple('args', ['debug', 'quiet'])
    args = argthing(debug=True, quiet=False)
    setup_logger(args)

    logger = logging.getLogger()
    stream_handler = logger.handlers[0]
    logfile = os.path.join(outdir, '%s.log' % event.id)
    fhandler = logging.FileHandler(logfile)
    logger.removeHandler(stream_handler)
    logger.addHandler(fhandler)

    event_dir = os.path.join(outdir, event.id)
    if not os.path.exists(event_dir):
        os.makedirs(event_dir)

    workname = os.path.join(event_dir, WORKSPACE_NAME)
    workspace_exists = os.path.isfile(workname)
    workspace_has_processed = False
    workspace = None
    processing_done = False

    if workspace_exists:
        workspace = StreamWorkspace.open(workname)
        labels = workspace.getLabels()
        if len(labels):
            labels.remove('unprocessed')
        elif 'assemble' not in pcommands:
            print('No data in workspace. Please run assemble.')
            sys.exit(1)

        if len(labels) == 1:
            process_tag = labels[0]
            workspace_has_processed = True
        else:
            if 'process' not in pcommands:
                fmt = '\nThere are %i sets of processed data in %s.'
                tpl = (len(labels), workname)
                print(fmt % tpl)
                print(('This software can only handle one set of '
                       'processed data. Exiting.\n'))
                sys.exit(1)

    download_done = False

    # Need to initialize rstreams/pstreams
    rstreams = []
    pstreams = []

    rupture_file = None
    if 'assemble' in pcommands:
        logging.info('Downloading/loading raw streams...')
        workspace, workspace_file, rstreams, rupture_file = download(
            event, event_dir, config, input_directory)

        download_done = True
        append_file(files_created, 'Workspace', workname)

    else:
        if not workspace_exists:
            print('\nYou opted not to download or process from input.')
            print('No previous HDF workspace file could be found.')
            print('Try re-running with the assemble command with or ')
            print('without the --directory option.\n')
            sys.exit(1)
        if 'process' in pcommands:
            logging.info('Getting raw streams from workspace...')
            with warnings.catch_warnings():
                warnings.simplefilter("ignore",
                                      category=H5pyDeprecationWarning)
                rstreams = workspace.getStreams(event.id,
                                                labels=['unprocessed'])
            download_done = True
        else:
            need_processed = set(['report', 'shakemap'])
            need_pstreams = len(need_processed.intersection(pcommands))
            if workspace_has_processed:
                if need_pstreams:
                    logging.info('Getting processed streams from workspace...')
                    with warnings.catch_warnings():
                        warnings.simplefilter("ignore",
                                              category=H5pyDeprecationWarning)
                        pstreams = workspace.getStreams(event.id,
                                                        labels=[process_tag])
                download_done = True
                processing_done = True

    if ('process' in pcommands and download_done and not processing_done
            and len(rstreams)):
        logging.info('Processing raw streams for event %s...' % event.id)
        pstreams = process_streams(rstreams, event, config=config)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=H5pyDeprecationWarning)
            workspace.addStreams(event, pstreams, label=process_tag)
            workspace.calcMetrics(event.id,
                                  labels=[process_tag],
                                  config=config,
                                  streams=pstreams,
                                  stream_label=process_tag,
                                  rupture_file=rupture_file)
        processing_done = True

    if 'export' in pcommands:
        if export_dir is not None:
            if not os.path.isdir(export_dir):
                os.makedirs(export_dir)
            outdir = export_dir

        labels = workspace.getLabels()
        if 'unprocessed' not in labels:
            fmt = ('Workspace file "%s" appears to have no unprocessed '
                   'data. Skipping.')
            logging.info(fmt % workspace_file)
        else:
            labels.remove('unprocessed')
            if not labels:
                fmt = ('Workspace file "%s" appears to have no processed '
                       'data. Skipping.')
                logging.info(fmt % workspace_file)
            else:
                logging.info('Creating tables for event %s...', event.id)
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore",
                                          category=H5pyDeprecationWarning)
                    if recompute_metrics:
                        del workspace.dataset.auxiliary_data.WaveFormMetrics
                        del workspace.dataset.auxiliary_data.StationMetrics
                        workspace.calcMetrics(event.id,
                                              labels=labels,
                                              config=config,
                                              rupture_file=rupture_file)
                    event_table, imc_tables, readmes = workspace.getTables(
                        labels[0], streams=pstreams, stream_label=process_tag)
                    ev_fit_spec, fit_readme = workspace.getFitSpectraTable(
                        event.id, labels[0], pstreams)

                # Set the precisions for the imc tables, event table, and
                # fit_spectra table before writing
                imc_tables_formatted = {}
                for imc, imc_table in imc_tables.items():
                    imc_tables_formatted[imc] = set_precisions(imc_table)
                event_table_formatted = set_precisions(event_table)
                df_fit_spectra_formatted = set_precisions(ev_fit_spec)

                if not os.path.isdir(outdir):
                    os.makedirs(outdir)

                filenames = ['events'] + \
                    [imc.lower() for imc in imc_tables_formatted.keys()] + \
                    [imc.lower() + '_README' for imc in readmes.keys()] + \
                    ['fit_spectra_parameters', 'fit_spectra_parameters_README']

                files = [event_table_formatted] + list(
                    imc_tables_formatted.values()) + list(readmes.values()) + [
                        df_fit_spectra_formatted, fit_readme
                    ]

                if output_format != 'csv':
                    output_format = 'xlsx'

                for filename, df in dict(zip(filenames, files)).items():
                    filepath = os.path.join(outdir,
                                            filename + '.%s' % output_format)
                    if os.path.exists(filepath):
                        if 'README' in filename:
                            continue
                        else:
                            mode = 'a'
                            header = False
                    else:
                        mode = 'w'
                        header = True
                        append_file(files_created, 'Tables', filepath)
                    if output_format == 'csv':
                        df.to_csv(filepath,
                                  index=False,
                                  float_format=DEFAULT_FLOAT_FORMAT,
                                  na_rep=DEFAULT_NA_REP,
                                  mode=mode,
                                  header=header)
                    else:
                        df.to_excel(filepath,
                                    index=False,
                                    float_format=DEFAULT_FLOAT_FORMAT,
                                    na_rep=DEFAULT_NA_REP,
                                    mode=mode,
                                    header=header)

    if ('report' in pcommands and processing_done and len(pstreams)):
        logging.info('Creating diagnostic plots for event %s...' % event.id)
        plot_dir = os.path.join(event_dir, 'plots')
        if not os.path.isdir(plot_dir):
            os.makedirs(plot_dir)
        for stream in pstreams:
            summary_plots(stream, plot_dir, event)

        mapfile = draw_stations_map(pstreams, event, event_dir)
        plot_moveout(pstreams,
                     event.latitude,
                     event.longitude,
                     file=os.path.join(event_dir, 'moveout_plot.png'))

        append_file(files_created, 'Station map', mapfile)
        append_file(files_created, 'Moveout plot', 'moveout_plot.png')

        logging.info('Creating diagnostic report for event %s...' % event.id)
        # Build the summary report?
        build_conf = config['build_report']
        report_format = build_conf['format']
        if report_format == 'latex':
            report_file, success = build_report_latex(pstreams,
                                                      event_dir,
                                                      event,
                                                      config=config)
        else:
            report_file = ''
            success = False
        if os.path.isfile(report_file) and success:
            append_file(files_created, 'Summary report', report_file)

    if 'provenance' in pcommands and processing_done and len(pstreams):
        logging.info('Creating provenance table for event %s...' % event.id)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=H5pyDeprecationWarning)
            provdata = workspace.getProvenance(event.id, labels=[process_tag])
        if output_format == 'csv':
            csvfile = os.path.join(event_dir, 'provenance.csv')
            append_file(files_created, 'Provenance', csvfile)
            provdata.to_csv(csvfile)
        else:
            excelfile = os.path.join(event_dir, 'provenance.xlsx')
            append_file(files_created, 'Provenance', excelfile)
            provdata.to_excel(excelfile, index=False)

    if 'shakemap' in pcommands and processing_done and len(pstreams):
        logging.info('Creating shakemap table for event %s...' % event.id)
        shakemap_file, jsonfile = save_shakemap_amps(pstreams, event,
                                                     event_dir)
        append_file(files_created, 'shakemap', shakemap_file)
        append_file(files_created, 'shakemap', jsonfile)

    if status and processing_done and len(pstreams):
        if status == 'short':
            index = 'Failure reason'
            col = ['Number of records']
        elif status == 'long':
            index = 'Station ID'
            col = ['Failure reason']
        elif status == 'net':
            index = 'Network'
            col = ['Number of passed records', 'Number of failed records']

        status_info = pstreams.get_status(status)
        status_info.to_csv(os.path.join(event_dir, 'status.csv'),
                           header=col,
                           index_label=index)

    # since we don't know how many events users will be processing,
    # let's guard against memory issues by clearing out the big data
    # structures
    workspace.close()

    logging.info('Finishing event %s' % event.id)

    return workname
Exemple #26
0
def _test_workspace():
    eventid = "us1000778i"
    datafiles, event = read_data_dir("geonet", eventid, "*.V1A")
    tdir = tempfile.mkdtemp()
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=H5pyDeprecationWarning)
            warnings.filterwarnings("ignore", category=YAMLError)
            warnings.filterwarnings("ignore", category=FutureWarning)
            config = update_config(
                os.path.join(datadir, "config_min_freq_0p2.yml"))
            tfile = os.path.join(tdir, "test.hdf")
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace(tfile)
            t1 = time.time()
            workspace.addStreams(event, raw_streams, label="raw")
            t2 = time.time()
            print("Adding %i streams took %.2f seconds" % (len(raw_streams),
                                                           (t2 - t1)))

            str_repr = workspace.__repr__()
            assert str_repr == "Events: 1 Stations: 3 Streams: 3"

            eventobj = workspace.getEvent(eventid)
            assert eventobj.origins[0].latitude == event.origins[0].latitude
            assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag

            stations = workspace.getStations()
            assert sorted(stations) == ["HSES", "THZ", "WTMC"]

            stations = workspace.getStations()
            assert sorted(stations) == ["HSES", "THZ", "WTMC"]

            # test retrieving event that doesn't exist
            with pytest.raises(KeyError):
                workspace.getEvent("foo")

            instream = None
            for stream in raw_streams:
                if stream[0].stats.station.lower() == "hses":
                    instream = stream
                    break
            if instream is None:
                raise ValueError("Instream should not be none.")
            outstream = workspace.getStreams(eventid,
                                             stations=["HSES"],
                                             labels=["raw"])[0]
            compare_streams(instream, outstream)

            label_summary = workspace.summarizeLabels()
            assert label_summary.iloc[0]["Label"] == "raw"
            assert label_summary.iloc[0]["Software"] == "gmprocess"

            sc = StreamCollection(raw_streams)
            processed_streams = process_streams(sc, event, config=config)
            workspace.addStreams(event, processed_streams, "processed")

            idlist = workspace.getEventIds()
            assert idlist[0] == eventid

            outstream = workspace.getStreams(eventid,
                                             stations=["HSES"],
                                             labels=["processed"])[0]

            provenance = workspace.getProvenance(eventid, labels=["processed"])
            first_row = pd.Series({
                "Record": "NZ.HSES.--.HN1_us1000778i_processed",
                "Processing Step": "Remove Response",
                "Step Attribute": "input_units",
                "Attribute Value": "counts",
            })

            last_row = pd.Series({
                "Record": "NZ.WTMC.--.HNZ_us1000778i_processed",
                "Processing Step": "Lowpass Filter",
                "Step Attribute": "number_of_passes",
                "Attribute Value": 2,
            })
            assert provenance.iloc[0].equals(first_row)
            assert provenance.iloc[-1].equals(last_row)

            # compare the parameters from the input processed stream
            # to it's output equivalent
            instream = None
            for stream in processed_streams:
                if stream[0].stats.station.lower() == "hses":
                    instream = stream
                    break
            if instream is None:
                raise ValueError("Instream should not be none.")
            compare_streams(instream, outstream)
            workspace.close()

            # read in data from a second event and stash it in the workspace
            eventid = "nz2018p115908"
            datafiles, event = read_data_dir("geonet", eventid, "*.V2A")
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace.open(tfile)
            workspace.addStreams(event, raw_streams, label="foo")

            stations = workspace.getStations()

            eventids = workspace.getEventIds()
            assert eventids == ["us1000778i", "nz2018p115908"]
            instation = raw_streams[0][0].stats.station
            this_stream = workspace.getStreams(eventid,
                                               stations=[instation],
                                               labels=["foo"])[0]
            assert instation == this_stream[0].stats.station
            usid = "us1000778i"
            inventory = workspace.getInventory(usid)
            workspace.close()
            codes = [
                station.code for station in inventory.networks[0].stations
            ]
            assert sorted(set(codes)) == ["HSES", "THZ", "WPWS", "WTMC"]

    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
Exemple #27
0
def _test_vs30_dist_metrics():
    KNOWN_DISTANCES = {
        "epicentral": 5.1,
        "hypocentral": 10.2,
        "rupture": 2.21,
        "rupture_var": np.nan,
        "joyner_boore": 2.21,
        "joyner_boore_var": np.nan,
        "gc2_rx": 2.66,
        "gc2_ry": 3.49,
        "gc2_ry0": 0.00,
        "gc2_U": 34.34,
        "gc2_T": 2.66,
    }
    KNOWN_BAZ = 239.46
    KNOWN_VS30 = 331.47

    eventid = "ci38457511"
    datafiles, event = read_data_dir("fdsn", eventid, "*")
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml"))
    processed_streams = process_streams(raw_streams, event, config=config)
    rupture_file = get_rupture_file(datadir)
    grid_file = os.path.join(datadir, "test_grid.grd")
    config["metrics"]["vs30"] = {
        "vs30": {
            "file": grid_file,
            "column_header": "GlobalVs30",
            "readme_entry": "GlobalVs30",
            "units": "m/s",
        }
    }
    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, "test.hdf")
        ws = StreamWorkspace(tfile)
        ws.addEvent(event)
        ws.addStreams(event, raw_streams, label="raw")
        ws.addStreams(event, processed_streams, label="processed")
        ws.calcMetrics(event.id,
                       rupture_file=rupture_file,
                       labels=["processed"],
                       config=config)
        sta_sum = ws.getStreamMetrics(event.id, "CI", "CLC", "processed")

        for dist in sta_sum.distances:
            np.testing.assert_allclose(sta_sum.distances[dist],
                                       KNOWN_DISTANCES[dist],
                                       rtol=0.01)
        np.testing.assert_allclose(sta_sum._back_azimuth, KNOWN_BAZ, rtol=0.01)
        np.testing.assert_allclose(sta_sum._vs30["vs30"]["value"],
                                   KNOWN_VS30,
                                   rtol=0.01)
        event_df, imc_tables, readme_tables = ws.getTables("processed")
        ws.close()
        check_cols = set([
            "EpicentralDistance",
            "HypocentralDistance",
            "RuptureDistance",
            "RuptureDistanceVar",
            "JoynerBooreDistance",
            "JoynerBooreDistanceVar",
            "GC2_rx",
            "GC2_ry",
            "GC2_ry0",
            "GC2_U",
            "GC2_T",
            "GlobalVs30",
            "BackAzimuth",
        ])
        assert check_cols.issubset(set(readme_tables["Z"]["Column header"]))
        assert check_cols.issubset(set(imc_tables["Z"].columns))
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
Exemple #28
0
def test_lowpass_max():
    datapath = os.path.join("data", "testdata", "lowpass_max")
    datadir = pkg_resources.resource_filename("gmprocess", datapath)
    sc = StreamCollection.from_directory(datadir)
    sc.describe()

    conf = get_config()
    update = {
        "processing": [
            {
                "detrend": {
                    "detrending_method": "demean"
                }
            },
            {
                "remove_response": {
                    "f1": 0.001,
                    "f2": 0.005,
                    "f3": None,
                    "f4": None,
                    "water_level": 60,
                }
            },
            #            {'detrend': {'detrending_method': 'linear'}},
            #            {'detrend': {'detrending_method': 'demean'}},
            {
                "get_corner_frequencies": {
                    "constant": {
                        "highpass": 0.08,
                        "lowpass": 20.0
                    },
                    "method": "constant",
                    "snr": {
                        "same_horiz": True
                    },
                }
            },
            {
                "lowpass_max_frequency": {
                    "fn_fac": 0.9
                }
            },
        ]
    }
    update_dict(conf, update)
    update = {
        "windows": {
            "signal_end": {
                "method": "model",
                "vmin": 1.0,
                "floor": 120,
                "model": "AS16",
                "epsilon": 2.0,
            },
            "window_checks": {
                "do_check": False,
                "min_noise_duration": 1.0,
                "min_signal_duration": 1.0,
            },
        }
    }
    update_dict(conf, update)
    edict = {
        "id": "ci38038071",
        "time": UTCDateTime("2018-08-30 02:35:36"),
        "lat": 34.136,
        "lon": -117.775,
        "depth": 5.5,
        "magnitude": 4.4,
    }
    event = get_event_object(edict)
    test = process_streams(sc, event, conf)
    for st in test:
        for tr in st:
            freq_dict = tr.getParameter("corner_frequencies")
            np.testing.assert_allclose(freq_dict["lowpass"], 18.0)
def _test_vs30_dist_metrics():
    KNOWN_DISTANCES = {
        'epicentral': 5.1,
        'hypocentral': 10.2,
        'rupture': 2.21,
        'rupture_var': np.nan,
        'joyner_boore': 2.21,
        'joyner_boore_var': np.nan,
        'gc2_rx': 2.66,
        'gc2_ry': 3.49,
        'gc2_ry0': 0.00,
        'gc2_U': 34.34,
        'gc2_T': 2.66
    }
    KNOWN_BAZ = 239.46
    KNOWN_VS30 = 331.47

    eventid = 'ci38457511'
    datafiles, event = read_data_dir('fdsn', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))
    processed_streams = process_streams(raw_streams, event, config=config)
    rupture_file = get_rupture_file(datadir)
    grid_file = os.path.join(datadir, 'test_grid.grd')
    config['metrics']['vs30'] = {
        'vs30': {
            'file': grid_file,
            'column_header': 'GlobalVs30',
            'readme_entry': 'GlobalVs30',
            'units': 'm/s'
        }
    }
    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        ws = StreamWorkspace(tfile)
        ws.addEvent(event)
        ws.addStreams(event, raw_streams, label='raw')
        ws.addStreams(event, processed_streams, label='processed')
        ws.calcMetrics(event.id,
                       rupture_file=rupture_file,
                       labels=['processed'],
                       config=config)
        sta_sum = ws.getStreamMetrics(event.id, 'CI', 'CLC', 'processed')

        for dist in sta_sum.distances:
            np.testing.assert_allclose(sta_sum.distances[dist],
                                       KNOWN_DISTANCES[dist],
                                       rtol=0.01)
        np.testing.assert_allclose(sta_sum._back_azimuth, KNOWN_BAZ, rtol=0.01)
        np.testing.assert_allclose(sta_sum._vs30['vs30']['value'],
                                   KNOWN_VS30,
                                   rtol=0.01)
        event_df, imc_tables, readme_tables = ws.getTables('processed')
        ws.close()
        check_cols = set([
            'EpicentralDistance', 'HypocentralDistance', 'RuptureDistance',
            'RuptureDistanceVar', 'JoynerBooreDistance',
            'JoynerBooreDistanceVar', 'GC2_rx', 'GC2_ry', 'GC2_ry0', 'GC2_U',
            'GC2_T', 'GlobalVs30', 'BackAzimuth'
        ])
        assert check_cols.issubset(set(readme_tables['Z']['Column header']))
        assert check_cols.issubset(set(imc_tables['Z'].columns))
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
def _test_workspace():
    eventid = 'us1000778i'
    datafiles, event = read_data_dir('geonet', eventid, '*.V1A')
    tdir = tempfile.mkdtemp()
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=H5pyDeprecationWarning)
            warnings.filterwarnings("ignore", category=YAMLLoadWarning)
            warnings.filterwarnings("ignore", category=FutureWarning)
            config = update_config(
                os.path.join(datadir, 'config_min_freq_0p2.yml'))
            tfile = os.path.join(tdir, 'test.hdf')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace(tfile)
            t1 = time.time()
            workspace.addStreams(event, raw_streams, label='raw')
            t2 = time.time()
            print('Adding %i streams took %.2f seconds' % (len(raw_streams),
                                                           (t2 - t1)))

            str_repr = workspace.__repr__()
            assert str_repr == 'Events: 1 Stations: 3 Streams: 3'

            eventobj = workspace.getEvent(eventid)
            assert eventobj.origins[0].latitude == event.origins[0].latitude
            assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag

            stations = workspace.getStations()
            assert sorted(stations) == ['HSES', 'THZ', 'WTMC']

            stations = workspace.getStations(eventid=eventid)
            assert sorted(stations) == ['HSES', 'THZ', 'WTMC']

            # test retrieving event that doesn't exist
            with pytest.raises(KeyError):
                workspace.getEvent('foo')

            instream = None
            for stream in raw_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                raise ValueError('Instream should not be none.')
            outstream = workspace.getStreams(eventid,
                                             stations=['HSES'],
                                             labels=['raw'])[0]
            compare_streams(instream, outstream)

            label_summary = workspace.summarizeLabels()
            assert label_summary.iloc[0]['Label'] == 'raw'
            assert label_summary.iloc[0]['Software'] == 'gmprocess'

            sc = StreamCollection(raw_streams)
            processed_streams = process_streams(sc, event, config=config)
            workspace.addStreams(event, processed_streams, 'processed')

            idlist = workspace.getEventIds()
            assert idlist[0] == eventid

            outstream = workspace.getStreams(eventid,
                                             stations=['HSES'],
                                             labels=['processed'])[0]

            provenance = workspace.getProvenance(eventid, labels=['processed'])
            first_row = pd.Series({
                'Record': 'NZ.HSES.--.HN1_us1000778i_processed',
                'Processing Step': 'Remove Response',
                'Step Attribute': 'input_units',
                'Attribute Value': 'counts'
            })

            last_row = pd.Series({
                'Record': 'NZ.WTMC.--.HNZ_us1000778i_processed',
                'Processing Step': 'Lowpass Filter',
                'Step Attribute': 'number_of_passes',
                'Attribute Value': 2
            })
            assert provenance.iloc[0].equals(first_row)
            assert provenance.iloc[-1].equals(last_row)

            # compare the parameters from the input processed stream
            # to it's output equivalent
            instream = None
            for stream in processed_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                raise ValueError('Instream should not be none.')
            compare_streams(instream, outstream)
            workspace.close()

            # read in data from a second event and stash it in the workspace
            eventid = 'nz2018p115908'
            datafiles, event = read_data_dir('geonet', eventid, '*.V2A')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace.open(tfile)
            workspace.addStreams(event, raw_streams, label='foo')

            stations = workspace.getStations(eventid)

            eventids = workspace.getEventIds()
            assert eventids == ['us1000778i', 'nz2018p115908']
            instation = raw_streams[0][0].stats.station
            this_stream = workspace.getStreams(eventid,
                                               stations=[instation],
                                               labels=['foo'])[0]
            assert instation == this_stream[0].stats.station
            usid = 'us1000778i'
            inventory = workspace.getInventory(usid)
            workspace.close()
            codes = [
                station.code for station in inventory.networks[0].stations
            ]
            assert sorted(set(codes)) == ['HSES', 'THZ', 'WPWS', 'WTMC']

    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)