예제 #1
0
def generate_workspace():
    """Generate simple HDF5 with ASDF layout for testing.
    """
    PCOMMANDS = [
        'assemble',
        'process',
    ]
    EVENTID = 'us1000778i'
    LABEL = 'ptest'
    datafiles, event = read_data_dir('geonet', EVENTID, '*.V1A')

    tdir = tempfile.mkdtemp()
    tfilename = os.path.join(tdir, 'workspace.h5')

    raw_data = []
    for dfile in datafiles:
        raw_data += read_data(dfile)
    write_asdf(tfilename, raw_data, event, label="unprocessed")
    del raw_data

    config = get_config()
    workspace = StreamWorkspace.open(tfilename)
    raw_streams = workspace.getStreams(EVENTID, labels=['unprocessed'])
    pstreams = process_streams(raw_streams, event, config=config)
    workspace.addStreams(event, pstreams, label=LABEL)
    workspace.calcMetrics(event.id, labels=[LABEL], config=config)

    return tfilename
def test_process_streams():
    # Loma Prieta test station (nc216859)

    data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    sc.describe()

    test = process_streams(sc, origin)

    logging.info('Testing trace: %s' % test[0][1])

    assert len(test) == 3
    assert len(test[0]) == 3
    assert len(test[1]) == 3
    assert len(test[2]) == 3

    # Apparently the traces end up in a different order on the Travis linux
    # container than on my local mac. So testing individual traces need to
    # not care about trace order.

    trace_maxes = np.sort([np.max(np.abs(t.data)) for t in test[0]])

    np.testing.assert_allclose(
        trace_maxes,
        np.array([157.81975508, 240.33718094, 263.67804256]),
        rtol=1e-5
    )
예제 #3
0
def test_free_field():
    data_files, origin = read_data_dir('kiknet', 'usp000hzq8')

    raw_streams = []
    for dfile in data_files:
        raw_streams += read_data(dfile)

    sc = StreamCollection(raw_streams)

    processed_streams = process_streams(sc, origin)

    # all of these streams should have failed for different reasons
    npassed = np.sum([pstream.passed for pstream in processed_streams])
    assert npassed == 0
    for pstream in processed_streams:
        is_free = pstream[0].free_field
        reason = ''
        for trace in pstream:
            if trace.hasParameter('failure'):
                reason = trace.getParameter('failure')['reason']
                break
        if is_free:
            assert reason.startswith('Failed sta/lta check')
        else:
            assert reason == 'Failed free field sensor check.'
예제 #4
0
def test_metrics2():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))
    config['metrics']['output_imts'].append('Arias')
    config['metrics']['output_imcs'].append('arithmetic_mean')
    # turn off sta/lta check and snr checks
    newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label='processed')
        workspace.calcMetrics(event.id, labels=['processed'])
        etable, imc_tables1, readmes1 = workspace.getTables('processed')
        assert 'ARITHMETIC_MEAN' not in imc_tables1
        assert 'ARITHMETIC_MEAN' not in readmes1
        del workspace.dataset.auxiliary_data.WaveFormMetrics
        del workspace.dataset.auxiliary_data.StationMetrics
        workspace.calcMetrics(event.id, labels=['processed'], config=config)
        etable2, imc_tables2, readmes2 = workspace.getTables('processed')
        assert 'ARITHMETIC_MEAN' in imc_tables2
        assert 'ARITHMETIC_MEAN' in readmes2
        assert 'ARIAS' in imc_tables2['ARITHMETIC_MEAN']
        testarray = readmes2['ARITHMETIC_MEAN']['Column header'].to_numpy()
        assert 'ARIAS' in testarray
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
def test_nnet():

    conf = get_config()

    update = {
        'processing': [
            {'detrend': {'detrending_method': 'demean'}},
            # {'check_zero_crossings': {'min_crossings': 10}},
            {'detrend': {'detrending_method': 'linear'}},
            {'compute_snr': {'bandwidth': 20.0,
                             'check': {'max_freq': 5.0,
                                       'min_freq': 0.2,
                                       'threshold': 3.0}}},
            {'NNet_QA': {'acceptance_threshold': 0.5,
                         'model_name': 'CantWell'}}
        ]
    }
    update_dict(conf, update)

    data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)
    test = process_streams(sc, origin, conf)
    tstream = test.select(station='HSES')[0]
    allparams = tstream.getStreamParamKeys()
    nnet_dict = tstream.getStreamParam('nnet_qa')
    np.testing.assert_allclose(
        nnet_dict['score_HQ'], 0.99321798811740059, rtol=1e-3)
def test_metrics2():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = get_config()
    config['metrics']['output_imts'].append('Arias')
    config['metrics']['output_imcs'].append('arithmetic_mean')
    # turn off sta/lta check and snr checks
    newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label='processed')
        workspace.calcMetrics(event.id, labels=['processed'])
        etable, imc_tables1 = workspace.getTables('processed')
        etable2, imc_tables2 = workspace.getTables('processed', config=config)
        assert 'ARITHMETIC_MEAN' not in imc_tables1
        assert 'ARITHMETIC_MEAN' in imc_tables2
        assert 'ARIAS' in imc_tables2['ARITHMETIC_MEAN']
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
예제 #7
0
def test():
    datafiles, origin = read_data_dir('fdsn', 'nc72282711', 'BK.CMB*.mseed')
    streams = []
    for datafile in datafiles:
        streams += read_fdsn(datafile)

    assert streams[0].get_id() == 'BK.CMB.HN'

    datafiles, origin = read_data_dir('fdsn', 'nc72282711', 'TA.M04C*.mseed')
    streams = []
    for datafile in datafiles:
        streams += read_fdsn(datafile)

    assert streams[0].get_id() == 'TA.M04C.HN'

    # test assignment of Z channel
    datafiles, origin = read_data_dir('fdsn', 'nc73300395', 'BK.VALB*.mseed')
    streams = []
    for datafile in datafiles:
        streams += read_fdsn(datafile)

    # get all channel names
    channels = sorted([st[0].stats.channel for st in streams])
    assert channels == ['HN2', 'HN3', 'HNZ']

    # DEBUGGING
    sc = StreamCollection(streams)
    psc = process_streams(sc, origin)
예제 #8
0
def test_process_streams():
    # Loma Prieta test station (nc216859)

    data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    sc.describe()

    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))

    test = process_streams(sc, origin, config=config)

    logging.info('Testing trace: %s' % test[0][1])

    assert len(test) == 3
    assert len(test[0]) == 3
    assert len(test[1]) == 3
    assert len(test[2]) == 3

    # Apparently the traces end up in a different order on the Travis linux
    # container than on my local mac. So testing individual traces need to
    # not care about trace order.

    trace_maxes = np.sort(
        [np.max(np.abs(t.data)) for t in test.select(station='HSES')[0]])

    np.testing.assert_allclose(trace_maxes,
                               np.array(
                                   [157.81975508, 240.33718094, 263.67804256]),
                               rtol=1e-5)
예제 #9
0
def test_colocated():
    eventid = 'ci38445975'
    datafiles, event = read_data_dir('fdsn', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config_file = os.path.join(datadir, 'test_config.yml')
    with open(config_file, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    processed_streams = process_streams(raw_streams, event, config=config)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        ws = StreamWorkspace(tfile)
        ws.addEvent(event)
        ws.addStreams(event, raw_streams, label='raw')
        ws.addStreams(event, processed_streams, label='processed')
        ws.calcMetrics(eventid, labels=['processed'], config=config)
        stasum = ws.getStreamMetrics(eventid, 'CI', 'MIKB', 'processed')
        np.testing.assert_allclose(
            stasum.get_pgm('duration', 'geometric_mean'), 38.94480068)
        ws.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
예제 #10
0
def test_lowpass_max():
    datapath = os.path.join('data', 'testdata', 'lowpass_max')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)
    sc = StreamCollection.from_directory(datadir)
    sc.describe()

    conf = get_config()
    update = {
        'processing': [
            {'detrend': {'detrending_method': 'demean'}},
            {'remove_response': {
                'f1': 0.001, 'f2': 0.005, 'f3': None, 'f4': None,
                'output': 'ACC', 'water_level': 60}
             },
            #            {'detrend': {'detrending_method': 'linear'}},
            #            {'detrend': {'detrending_method': 'demean'}},
            {'get_corner_frequencies': {
                'constant': {
                    'highpass': 0.08, 'lowpass': 20.0
                },
                'method': 'constant',
                'snr': {'same_horiz': True}}
             },
            {'lowpass_max_frequency': {'fn_fac': 0.9}}
        ]
    }
    update_dict(conf, update)
    update = {
        'windows': {
            'signal_end': {
                'method': 'model',
                'vmin': 1.0,
                'floor': 120,
                'model': 'AS16',
                'epsilon': 2.0
            },
            'window_checks': {
                'do_check': False,
                'min_noise_duration': 1.0,
                'min_signal_duration': 1.0
            }
        }
    }
    update_dict(conf, update)
    edict = {
        'id': 'ci38038071',
        'time': UTCDateTime('2018-08-30 02:35:36'),
        'lat': 34.136,
        'lon': -117.775,
        'depth': 5.5,
        'magnitude': 4.4
    }
    event = get_event_object(edict)
    test = process_streams(sc, event, conf)
    for st in test:
        for tr in st:
            freq_dict = tr.getParameter('corner_frequencies')
            np.testing.assert_allclose(freq_dict['lowpass'], 18.0)
예제 #11
0
def test_raw():
    msg = "dataset.value has been deprecated. Use dataset[()] instead."
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=H5pyDeprecationWarning)
        warnings.filterwarnings("ignore", category=YAMLLoadWarning)
        warnings.filterwarnings("ignore", category=FutureWarning)
        raw_streams, inv = request_raw_waveforms(
            fdsn_client='IRIS',
            org_time='2018-11-30T17-29-29.330Z',
            lat=61.3464,
            lon=-149.9552,
            before_time=120,
            after_time=120,
            dist_min=0,
            dist_max=0.135,
            networks='*',
            stations='*',
            channels=['?N?'],
            access_restricted=False)
        tdir = tempfile.mkdtemp()
        try:
            edict = get_event_dict('ak20419010')
            origin = get_event_object('ak20419010')
            tfile = os.path.join(tdir, 'test.hdf')
            sc1 = StreamCollection(raw_streams)
            workspace = StreamWorkspace(tfile)
            workspace.addStreams(origin, sc1, label='raw')
            tstreams = workspace.getStreams(edict['id'])
            assert len(tstreams) == 0

            imclist = [
                'greater_of_two_horizontals', 'channels', 'rotd50', 'rotd100'
            ]
            imtlist = ['sa1.0', 'PGA', 'pgv', 'fas2.0', 'arias']
            # this shouldn't do anything
            workspace.setStreamMetrics(edict['id'],
                                       imclist=imclist,
                                       imtlist=imtlist)

            processed_streams = process_streams(sc1, edict)
            workspace.addStreams(origin, processed_streams, 'processed')
            labels = workspace.getLabels()
            tags = workspace.getStreamTags(edict['id'])
            out_raw_streams = workspace.getStreams(edict['id'], get_raw=True)
            assert len(out_raw_streams) == len(sc1)

            # this should only work on processed data
            workspace.setStreamMetrics(edict['id'],
                                       imclist=imclist,
                                       imtlist=imtlist)

            df = workspace.summarizeLabels()
            x = 1

        except Exception as e:
            raise e
        finally:
            shutil.rmtree(tdir)
예제 #12
0
def test_weird_sensitivity():
    datafiles, origin = read_data_dir('fdsn', 'us70008dx7', 'SL.KOGS*.mseed')
    streams = []
    for datafile in datafiles:
        streams += read_fdsn(datafile)
    sc = StreamCollection(streams)
    psc = process_streams(sc, origin)
    channel = psc[0].select(component='E')[0]
    assert_almost_equal(channel.data.max(), 62900.191900393373)
예제 #13
0
def test_metrics():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = get_config()
    # turn off sta/lta check and snr checks
    newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label='processed')
        stream1 = processed_streams[0]
        stream2 = processed_streams[1]
        summary1 = StationSummary.from_config(stream1)
        summary2 = StationSummary.from_config(stream2)
        workspace.setStreamMetrics(event.id, 'processed', summary1)
        workspace.setStreamMetrics(event.id, 'processed', summary2)
        workspace.calcStationMetrics(event.id, labels=['processed'])
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.station,
                                                'processed')
        s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC'])
        s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC'])
        array1 = s1_df_in['Result'].as_matrix()
        array2 = s1_df_out['Result'].as_matrix()
        np.testing.assert_almost_equal(array1, array2, decimal=4)

        df = workspace.getMetricsTable(event.id)
        cmp_series = {
            'GREATER_OF_TWO_HORIZONTALS': 0.6787,
            'H1': 0.3869,
            'H2': 0.6787,
            'Z': 0.7663
        }
        pga_dict = df.iloc[0]['PGA'].to_dict()
        for key, value in pga_dict.items():
            value2 = cmp_series[key]
            np.testing.assert_almost_equal(value, value2, decimal=4)

        workspace.close()
    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
def test_metrics():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet',
                                     eventid,
                                     '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = get_config()
    # turn off sta/lta check and snr checks
    newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label='processed')
        stream1 = processed_streams[0]
        stream2 = processed_streams[1]
        summary1 = StationSummary.from_config(stream1)
        summary2 = StationSummary.from_config(stream2)
        workspace.setStreamMetrics(event.id, 'processed', summary1)
        workspace.setStreamMetrics(event.id, 'processed', summary2)
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.station,
                                                'processed')
        s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC'])
        s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC'])
        array1 = s1_df_in['Result'].as_matrix()
        array2 = s1_df_out['Result'].as_matrix()
        np.testing.assert_almost_equal(array1, array2, decimal=4)

        df = workspace.getMetricsTable(event.id)
        cmp_series = {'GREATER_OF_TWO_HORIZONTALS': 0.6787,
                      'HN1': 0.3869,
                      'HN2': 0.6787,
                      'HNZ': 0.7663}
        pga_dict = df.iloc[0]['PGA'].to_dict()
        for key, value in pga_dict.items():
            value2 = cmp_series[key]
            np.testing.assert_almost_equal(value, value2, decimal=4)

        workspace.close()
    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
예제 #15
0
def test_metrics():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))
    # turn off sta/lta check and snr checks
    # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    # processed_streams = process_streams(raw_streams, event, config=newconfig)
    newconfig = config.copy()
    newconfig['processing'].append(
        {'NNet_QA': {
            'acceptance_threshold': 0.5,
            'model_name': 'CantWell'
        }})
    processed_streams = process_streams(raw_streams.copy(),
                                        event,
                                        config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, raw_streams, label='raw')
        workspace.addStreams(event, processed_streams, label='processed')
        stream1 = raw_streams[0]
        summary1 = StationSummary.from_config(stream1)
        s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC'])
        array1 = s1_df_in['Result'].to_numpy()
        workspace.calcMetrics(eventid, labels=['raw'])
        pstreams2 = workspace.getStreams(event.id, labels=['processed'])
        assert pstreams2[0].getStreamParamKeys() == ['nnet_qa']
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.network,
                                                stream1[0].stats.station,
                                                'raw')
        s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC'])
        array2 = s1_df_out['Result'].to_numpy()
        np.testing.assert_almost_equal(array1, array2, decimal=4)

        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
예제 #16
0
def test_allow_nans():
    dpath = os.path.join('data', 'testdata', 'fdsn', 'uu60363602')
    datadir = pkg_resources.resource_filename('gmprocess', dpath)
    sc = StreamCollection.from_directory(datadir)
    origin = read_event_json_files([os.path.join(datadir, 'event.json')])[0]
    psc = process_streams(sc, origin)
    st = psc[0]

    ss = StationSummary.from_stream(
        st, components=['quadratic_mean'], imts=['FAS(4.0)'], bandwidth=189,
        allow_nans=True)
    assert np.isnan(ss.pgms.Result).all()

    ss = StationSummary.from_stream(
        st, components=['quadratic_mean'], imts=['FAS(4.0)'], bandwidth=189,
        allow_nans=False)
    assert ~np.isnan(ss.pgms.Result).all()
예제 #17
0
def test():
    datafiles, origin = read_data_dir('fdsn', 'nc72282711', 'BK.CMB*.mseed')
    streams = []
    for datafile in datafiles:
        streams += read_fdsn(datafile)

    assert streams[0].get_id() == 'BK.CMB.HN'

    datafiles, origin = read_data_dir('fdsn', 'nc72282711', 'TA.M04C*.mseed')
    streams = []
    for datafile in datafiles:
        streams += read_fdsn(datafile)

    assert streams[0].get_id() == 'TA.M04C.HN'

    # DEBUGGING
    sc = StreamCollection(streams)
    psc = process_streams(sc, origin)
예제 #18
0
def test_process_streams():
    # Loma Prieta test station (nc216859)
    origin = {
        'eventid': 'test',
        'time': UTCDateTime('2000-10-16T13:30:00'),
        'magnitude': 7.3,
        'lat': 35.278,
        'lon': 133.345
    }

    data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    sc.describe()

    test = process_streams(sc, origin)

    logging.info('Testing trace: %s' % test[0][1])

    assert len(test) == 3
    assert len(test[0]) == 3
    assert len(test[1]) == 3
    assert len(test[2]) == 3

    # Apparently the traces end up in a different order on the Travis linux
    # container than on my local mac. So testing individual traces need to
    # not care about trace order.

    trace_maxes = np.sort([np.max(np.abs(t.data)) for t in test[0]])

    np.testing.assert_allclose(
        trace_maxes,
        np.array([157.82909426, 240.36582093, 263.7063879]),
        rtol=1e-5
    )
예제 #19
0
def test_zero_crossings():
    datapath = os.path.join('data', 'testdata', 'zero_crossings')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)
    sc = StreamCollection.from_directory(datadir)
    sc.describe()

    conf = get_config()

    update = {
        'processing': [{
            'detrend': {
                'detrending_method': 'demean'
            }
        }, {
            'check_zero_crossings': {
                'min_crossings': 1
            }
        }]
    }
    update_dict(conf, update)

    edict = {
        'id': 'ak20419010',
        'time': UTCDateTime('2018-11-30T17:29:29'),
        'lat': 61.346,
        'lon': -149.955,
        'depth': 46.7,
        'magnitude': 7.1
    }
    event = get_event_object(edict)
    test = process_streams(sc, event, conf)
    for st in test:
        for tr in st:
            assert tr.hasParameter('ZeroCrossingRate')
    np.testing.assert_allclose(
        test[0][0].getParameter('ZeroCrossingRate')['crossing_rate'],
        0.008888888888888889,
        atol=1e-5)
def test_free_field():
    data_files, origin = read_data_dir('kiknet', 'usp000hzq8')
    raw_streams = []
    for dfile in data_files:
        raw_streams += read_data(dfile)

    sc = StreamCollection(raw_streams)

    processed_streams = process_streams(sc, origin)

    # all of these streams should have failed for different reasons
    npassed = np.sum([pstream.passed for pstream in processed_streams])
    assert npassed == 0
    for pstream in processed_streams:
        is_free = pstream[0].free_field
        reason = ''
        for trace in pstream:
            if trace.hasParameter('failure'):
                reason = trace.getParameter('failure')['reason']
                break
        if is_free:
            assert reason.startswith('Failed')
        else:
            assert reason == 'Failed free field sensor check.'
예제 #21
0
def test_workspace():
    eventid = 'us1000778i'
    datafiles, origin = read_data_dir('geonet', eventid, '*.V1A')
    event = get_event_object(origin)
    tdir = tempfile.mkdtemp()
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=H5pyDeprecationWarning)
            warnings.filterwarnings("ignore", category=YAMLLoadWarning)
            warnings.filterwarnings("ignore", category=FutureWarning)
            config = get_config()
            tfile = os.path.join(tdir, 'test.hdf')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace(tfile)
            t1 = time.time()
            workspace.addStreams(event, raw_streams, label='raw')
            t2 = time.time()
            print('Adding %i streams took %.2f seconds' % (len(raw_streams),
                                                           (t2 - t1)))

            str_repr = workspace.__repr__()
            assert str_repr == 'Events: 1 Stations: 3 Streams: 3'

            eventobj = workspace.getEvent(eventid)
            assert eventobj.origins[0].latitude == event.origins[0].latitude
            assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag

            stations = workspace.getStations()
            assert sorted(stations) == ['hses', 'thz', 'wtmc']

            stations = workspace.getStations(eventid=eventid)
            assert sorted(stations) == ['hses', 'thz', 'wtmc']

            # test retrieving tags for an event that doesn't exist
            try:
                workspace.getStreamTags('foo')
            except KeyError:
                assert 1 == 1

            # test retrieving event that doesn't exist
            try:
                workspace.getEvent('foo')
            except KeyError:
                assert 1 == 1

            instream = None
            for stream in raw_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                assert 1 == 2
            outstream = workspace.getStreams(eventid,
                                             stations=['hses'],
                                             labels=['raw'])[0]
            compare_streams(instream, outstream)

            label_summary = workspace.summarizeLabels()
            assert label_summary.iloc[0]['Label'] == 'raw'
            assert label_summary.iloc[0]['Software'] == 'gmprocess'

            sc = StreamCollection(raw_streams)
            processed_streams = process_streams(sc, origin, config=config)
            workspace.addStreams(event, processed_streams, 'processed')

            idlist = workspace.getEventIds()
            assert idlist[0] == eventid

            event_tags = workspace.getStreamTags(eventid)
            assert sorted(event_tags) == [
                'hses_processed', 'hses_raw', 'thz_processed', 'thz_raw',
                'wtmc_processed', 'wtmc_raw'
            ]
            outstream = workspace.getStreams(eventid,
                                             stations=['hses'],
                                             labels=['processed'])[0]

            provenance = workspace.getProvenance(eventid, labels=['processed'])
            first_row = pd.Series({
                'Record': 'NZ.HSES.HN1',
                'Processing Step': 'Remove Response',
                'Step Attribute': 'input_units',
                'Attribute Value': 'counts'
            })

            last_row = pd.Series({
                'Record': 'NZ.WTMC.HNZ',
                'Processing Step': 'Detrend',
                'Step Attribute': 'detrending_method',
                'Attribute Value': 'baseline_sixth_order'
            })
            assert provenance.iloc[0].equals(first_row)
            assert provenance.iloc[-1].equals(last_row)

            # compare the parameters from the input processed stream
            # to it's output equivalent
            instream = None
            for stream in processed_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                assert 1 == 2
            compare_streams(instream, outstream)
            workspace.close()

            # read in data from a second event and stash it in the workspace
            eventid = 'nz2018p115908'
            datafiles, origin = read_data_dir('geonet', eventid, '*.V2A')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            event = get_event_object(origin)
            workspace = StreamWorkspace.open(tfile)
            workspace.addStreams(event, raw_streams, label='foo')

            stations = workspace.getStations(eventid)

            eventids = workspace.getEventIds()
            assert eventids == ['us1000778i', 'nz2018p115908']
            instation = raw_streams[0][0].stats.station
            this_stream = workspace.getStreams(eventid,
                                               stations=[instation],
                                               labels=['foo'])[0]
            assert instation == this_stream[0].stats.station

            # set and retrieve waveform metrics in the file
            imclist = [
                'greater_of_two_horizontals', 'channels', 'rotd50', 'rotd100'
            ]
            imtlist = ['sa1.0', 'PGA', 'pgv', 'fas2.0', 'arias']
            usid = 'us1000778i'
            tags = workspace.getStreamTags(usid)
            workspace.setStreamMetrics(eventid,
                                       labels=['foo'],
                                       imclist=imclist,
                                       imtlist=imtlist)
            summary = workspace.getStreamMetrics(eventid, instation, 'foo')
            summary_series = summary.toSeries()['ARIAS']
            cmpseries = pd.Series({
                'GEOMETRIC_MEAN': np.NaN,
                'GREATER_OF_TWO_HORIZONTALS': 0.0005,
                'HN1': 0.0001,
                'HN2': 0.0005,
                'HNZ': 0.0000,
                'ROTD100.0': 0.0005,
                'ROTD50.0': 0.0003
            })
            assert cmpseries.equals(summary_series)

            workspace.setStreamMetrics(usid, labels=['processed'])
            df = workspace.getMetricsTable(usid, labels=['processed'])
            cmpdict = {
                'GREATER_OF_TWO_HORIZONTALS': [26.8906, 4.9415, 94.6646],
                'HN1': [24.5105, 4.9415, 94.6646],
                'HN2': [26.8906, 4.0758, 86.7877],
                'HNZ': [16.0941, 2.5401, 136.7054]
            }
            cmpframe = pd.DataFrame(cmpdict)
            assert df['PGA'].equals(cmpframe)

            inventory = workspace.getInventory(usid)
            codes = [
                station.code for station in inventory.networks[0].stations
            ]
            assert sorted(codes) == ['HSES', 'THZ', 'WPWS', 'WTMC']

    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
# data = "/gmprocess/data/testdata/knet/us2000cnnl/"
data = "/data/testdata/demo/ci38457511/raw/"
output = "/Users/gabriel/groundmotion-processing/output/"

# Path to example data
datapath = os.path.join('data', 'testdata', 'demo', 'ci38457511', 'raw')
datadir = pkg_resources.resource_filename('gmprocess', datapath)

sc = StreamCollection.from_directory(datadir)

# Includes 3 StationStreams, each with 3 StationTraces
sc.describe()

# Get the default config file
conf = get_config()

# Get event object
event = get_event_object('ci38457511')

# Process the streams
psc = process_streams(sc, event, conf)
psc.describe()

# Save plots of processed records
for st in psc:
    if os.path.exists(output + "test/"):
        st.plot(outfile='%stest/%s.png' % (output, st.get_id()))
    else:
        os.mkdir(os.path.join(output, "test/"))
        st.plot(outfile='%stest/%s.png' % (output, st.get_id()))
def test_workspace():
    eventid = 'us1000778i'
    datafiles, event = read_data_dir('geonet', eventid, '*.V1A')
    tdir = tempfile.mkdtemp()
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=H5pyDeprecationWarning)
            warnings.filterwarnings("ignore", category=YAMLLoadWarning)
            warnings.filterwarnings("ignore", category=FutureWarning)
            config = get_config()
            tfile = os.path.join(tdir, 'test.hdf')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace(tfile)
            t1 = time.time()
            workspace.addStreams(event, raw_streams, label='raw')
            t2 = time.time()
            print('Adding %i streams took %.2f seconds' %
                  (len(raw_streams), (t2 - t1)))

            str_repr = workspace.__repr__()
            assert str_repr == 'Events: 1 Stations: 3 Streams: 3'

            eventobj = workspace.getEvent(eventid)
            assert eventobj.origins[0].latitude == event.origins[0].latitude
            assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag

            stations = workspace.getStations()
            assert sorted(stations) == ['hses', 'thz', 'wtmc']

            stations = workspace.getStations(eventid=eventid)
            assert sorted(stations) == ['hses', 'thz', 'wtmc']

            # test retrieving tags for an event that doesn't exist
            try:
                workspace.getStreamTags('foo')
            except KeyError:
                assert 1 == 1

            # test retrieving event that doesn't exist
            try:
                workspace.getEvent('foo')
            except KeyError:
                assert 1 == 1

            instream = None
            for stream in raw_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                assert 1 == 2
            outstream = workspace.getStreams(eventid,
                                             stations=['hses'],
                                             labels=['raw'])[0]
            compare_streams(instream, outstream)

            label_summary = workspace.summarizeLabels()
            assert label_summary.iloc[0]['Label'] == 'raw'
            assert label_summary.iloc[0]['Software'] == 'gmprocess'

            sc = StreamCollection(raw_streams)
            processed_streams = process_streams(sc, event, config=config)
            workspace.addStreams(event, processed_streams, 'processed')

            idlist = workspace.getEventIds()
            assert idlist[0] == eventid

            event_tags = workspace.getStreamTags(eventid)
            assert sorted(event_tags) == ['hses_processed', 'hses_raw',
                                          'thz_processed', 'thz_raw',
                                          'wtmc_processed', 'wtmc_raw']
            outstream = workspace.getStreams(eventid,
                                             stations=['hses'],
                                             labels=['processed'])[0]

            provenance = workspace.getProvenance(eventid, labels=['processed'])
            first_row = pd.Series({'Record': 'NZ.HSES.HN1',
                                   'Processing Step': 'Remove Response',
                                   'Step Attribute': 'input_units',
                                   'Attribute Value': 'counts'})

            last_row = pd.Series({'Record': 'NZ.WTMC.HNZ',
                                  'Processing Step': 'Lowpass Filter',
                                  'Step Attribute': 'number_of_passes',
                                  'Attribute Value': 2})
            assert provenance.iloc[0].equals(first_row)
            assert provenance.iloc[-1].equals(last_row)

            # compare the parameters from the input processed stream
            # to it's output equivalent
            instream = None
            for stream in processed_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                assert 1 == 2
            compare_streams(instream, outstream)
            workspace.close()

            # read in data from a second event and stash it in the workspace
            eventid = 'nz2018p115908'
            datafiles, event = read_data_dir('geonet', eventid, '*.V2A')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace.open(tfile)
            workspace.addStreams(event, raw_streams, label='foo')

            stations = workspace.getStations(eventid)

            eventids = workspace.getEventIds()
            assert eventids == ['us1000778i', 'nz2018p115908']
            instation = raw_streams[0][0].stats.station
            this_stream = workspace.getStreams(eventid,
                                               stations=[instation],
                                               labels=['foo'])[0]
            assert instation == this_stream[0].stats.station
            usid = 'us1000778i'
            inventory = workspace.getInventory(usid)
            codes = [station.code for station in inventory.networks[0].stations]
            assert sorted(codes) == ['HSES', 'THZ', 'WPWS', 'WTMC']

    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
예제 #24
0
def test_vs30_dist_metrics():
    KNOWN_DISTANCES = {
        'epicentral': 5.1,
        'hypocentral': 10.2,
        'rupture': 2.21,
        'rupture_var': np.nan,
        'joyner_boore': 2.21,
        'joyner_boore_var': np.nan,
        'gc2_rx': 2.66,
        'gc2_ry': 3.49,
        'gc2_ry0': 0.00,
        'gc2_U': 34.34,
        'gc2_T': 2.66
    }
    KNOWN_BAZ = 239.46
    KNOWN_VS30 = 331.47

    eventid = 'ci38457511'
    datafiles, event = read_data_dir('fdsn', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))
    processed_streams = process_streams(raw_streams, event, config=config)
    rupture_file = get_rupture_file(datadir)
    grid_file = os.path.join(datadir, 'test_grid.grd')
    config['metrics']['vs30'] = {
        'vs30': {
            'file': grid_file,
            'column_header': 'GlobalVs30',
            'readme_entry': 'GlobalVs30',
            'units': 'm/s'
        }
    }
    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        ws = StreamWorkspace(tfile)
        ws.addEvent(event)
        ws.addStreams(event, raw_streams, label='raw')
        ws.addStreams(event, processed_streams, label='processed')
        ws.calcMetrics(event.id,
                       rupture_file=rupture_file,
                       labels=['processed'],
                       config=config)
        sta_sum = ws.getStreamMetrics(event.id, 'CI', 'CLC', 'processed')

        for dist in sta_sum.distances:
            np.testing.assert_allclose(sta_sum.distances[dist],
                                       KNOWN_DISTANCES[dist],
                                       rtol=0.01)
        np.testing.assert_allclose(sta_sum._back_azimuth, KNOWN_BAZ, rtol=0.01)
        np.testing.assert_allclose(sta_sum._vs30['vs30']['value'],
                                   KNOWN_VS30,
                                   rtol=0.01)
        event_df, imc_tables, readme_tables = ws.getTables('processed')
        check_cols = set([
            'EpicentralDistance', 'HypocentralDistance', 'RuptureDistance',
            'RuptureDistanceVar', 'JoynerBooreDistance',
            'JoynerBooreDistanceVar', 'GC2_rx', 'GC2_ry', 'GC2_ry0', 'GC2_U',
            'GC2_T', 'GlobalVs30', 'BackAzimuth'
        ])
        assert check_cols.issubset(set(readme_tables['Z']['Column header']))
        assert check_cols.issubset(set(imc_tables['Z'].columns))
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
예제 #25
0
def test_workspace():
    eventid = 'us1000778i'
    datafiles, event = read_data_dir('geonet', eventid, '*.V1A')
    tdir = tempfile.mkdtemp()
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=H5pyDeprecationWarning)
            warnings.filterwarnings("ignore", category=YAMLLoadWarning)
            warnings.filterwarnings("ignore", category=FutureWarning)
            config = update_config(
                os.path.join(datadir, 'config_min_freq_0p2.yml'))
            tfile = os.path.join(tdir, 'test.hdf')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace(tfile)
            t1 = time.time()
            workspace.addStreams(event, raw_streams, label='raw')
            t2 = time.time()
            print('Adding %i streams took %.2f seconds' % (len(raw_streams),
                                                           (t2 - t1)))

            str_repr = workspace.__repr__()
            assert str_repr == 'Events: 1 Stations: 3 Streams: 3'

            eventobj = workspace.getEvent(eventid)
            assert eventobj.origins[0].latitude == event.origins[0].latitude
            assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag

            stations = workspace.getStations()
            assert sorted(stations) == ['HSES', 'THZ', 'WTMC']

            stations = workspace.getStations(eventid=eventid)
            assert sorted(stations) == ['HSES', 'THZ', 'WTMC']

            # test retrieving event that doesn't exist
            with pytest.raises(KeyError):
                workspace.getEvent('foo')

            instream = None
            for stream in raw_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                raise ValueError('Instream should not be none.')
            outstream = workspace.getStreams(eventid,
                                             stations=['HSES'],
                                             labels=['raw'])[0]
            compare_streams(instream, outstream)

            label_summary = workspace.summarizeLabels()
            assert label_summary.iloc[0]['Label'] == 'raw'
            assert label_summary.iloc[0]['Software'] == 'gmprocess'

            sc = StreamCollection(raw_streams)
            processed_streams = process_streams(sc, event, config=config)
            workspace.addStreams(event, processed_streams, 'processed')

            idlist = workspace.getEventIds()
            assert idlist[0] == eventid

            outstream = workspace.getStreams(eventid,
                                             stations=['HSES'],
                                             labels=['processed'])[0]

            provenance = workspace.getProvenance(eventid, labels=['processed'])
            first_row = pd.Series({
                'Record': 'NZ.HSES.--.HN1_us1000778i_processed',
                'Processing Step': 'Remove Response',
                'Step Attribute': 'input_units',
                'Attribute Value': 'counts'
            })

            last_row = pd.Series({
                'Record': 'NZ.WTMC.--.HNZ_us1000778i_processed',
                'Processing Step': 'Lowpass Filter',
                'Step Attribute': 'number_of_passes',
                'Attribute Value': 2
            })
            assert provenance.iloc[0].equals(first_row)
            assert provenance.iloc[-1].equals(last_row)

            # compare the parameters from the input processed stream
            # to it's output equivalent
            instream = None
            for stream in processed_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                raise ValueError('Instream should not be none.')
            compare_streams(instream, outstream)
            workspace.close()

            # read in data from a second event and stash it in the workspace
            eventid = 'nz2018p115908'
            datafiles, event = read_data_dir('geonet', eventid, '*.V2A')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace.open(tfile)
            workspace.addStreams(event, raw_streams, label='foo')

            stations = workspace.getStations(eventid)

            eventids = workspace.getEventIds()
            assert eventids == ['us1000778i', 'nz2018p115908']
            instation = raw_streams[0][0].stats.station
            this_stream = workspace.getStreams(eventid,
                                               stations=[instation],
                                               labels=['foo'])[0]
            assert instation == this_stream[0].stats.station
            usid = 'us1000778i'
            inventory = workspace.getInventory(usid)
            codes = [
                station.code for station in inventory.networks[0].stations
            ]
            assert sorted(set(codes)) == ['HSES', 'THZ', 'WPWS', 'WTMC']

    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)