def test_read():
    cosmos_files, _ = read_data_dir('cosmos',
                                    'ci14155260',
                                    'Cosmos12TimeSeriesTest.v1')
    cwb_files, _ = read_data_dir('cwb',
                                 'us1000chhc',
                                 '1-EAS.dat')
    dmg_files, _ = read_data_dir('dmg',
                                 'nc71734741',
                                 'CE89146.V2')
    geonet_files, _ = read_data_dir('geonet',
                                    'us1000778i',
                                    '20161113_110259_WTMC_20.V1A')
    knet_files, _ = read_data_dir('knet',
                                  'us2000cnnl',
                                  'AOM0011801241951.EW')
    smc_files, _ = read_data_dir('smc',
                                 'nc216859',
                                 '0111a.smc')

    file_dict = {}
    file_dict['cosmos'] = cosmos_files[0]
    file_dict['cwb'] = cwb_files[0]
    file_dict['dmg'] = dmg_files[0]
    file_dict['geonet'] = geonet_files[0]
    file_dict['knet'] = knet_files[0]
    file_dict['smc'] = smc_files[0]

    for file_format in file_dict:
        file_path = file_dict[file_format]
        assert _get_format(file_path) == file_format
        assert _validate_format(file_path, file_format) == file_format

    assert _validate_format(file_dict['knet'], 'smc') == 'knet'
    assert _validate_format(file_dict['dmg'], 'cosmos') == 'dmg'
    assert _validate_format(file_dict['cosmos'], 'invalid') == 'cosmos'

    for file_format in file_dict:
        try:
            stream = read_data(file_dict[file_format], file_format)[0]
        except Exception as e:
            x = 1
        assert stream[0].stats.standard['source_format'] == file_format
        stream = read_data(file_dict[file_format])[0]
        assert stream[0].stats.standard['source_format'] == file_format
    # test exception
    try:
        file_path = smc_files[0].replace('0111a.smc', 'not_a_file.smc')
        read_data(file_path)[0]
        success = True
    except GMProcessException:
        success = False
    assert success == False
Esempio n. 2
0
def test_free_field():
    data_files, origin = read_data_dir('kiknet', 'usp000hzq8')

    raw_streams = []
    for dfile in data_files:
        raw_streams += read_data(dfile)

    sc = StreamCollection(raw_streams)

    processed_streams = process_streams(sc, origin)

    # all of these streams should have failed for different reasons
    npassed = np.sum([pstream.passed for pstream in processed_streams])
    assert npassed == 0
    for pstream in processed_streams:
        is_free = pstream[0].free_field
        reason = ''
        for trace in pstream:
            if trace.hasParameter('failure'):
                reason = trace.getParameter('failure')['reason']
                break
        if is_free:
            assert reason.startswith('Failed sta/lta check')
        else:
            assert reason == 'Failed free field sensor check.'
Esempio n. 3
0
def generate_workspace():
    """Generate simple HDF5 with ASDF layout for testing."""
    PCOMMANDS = [
        "assemble",
        "process",
    ]
    EVENTID = "us1000778i"
    LABEL = "ptest"
    datafiles, event = read_data_dir("geonet", EVENTID, "*.V1A")

    tdir = tempfile.mkdtemp()
    tfilename = os.path.join(tdir, "workspace.h5")

    raw_data = []
    for dfile in datafiles:
        raw_data += read_data(dfile)
    write_asdf(tfilename, raw_data, event, label="unprocessed")
    del raw_data

    config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml"))

    workspace = StreamWorkspace.open(tfilename)
    raw_streams = workspace.getStreams(EVENTID,
                                       labels=["unprocessed"],
                                       config=config)
    pstreams = process_streams(raw_streams, event, config=config)
    workspace.addStreams(event, pstreams, label=LABEL)
    workspace.calcMetrics(event.id, labels=[LABEL], config=config)

    return tfilename
def test_all_num_outliers():
    data_files, _ = read_data_dir("clipping_samples", "hv70907436", "*.mseed")
    data_files.sort()
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    num_outliers = []
    for st in sc:
        jerk_method = Jerk(st, test_all=True)
        num_outliers.append(jerk_method.num_outliers)

    np.testing.assert_equal(
        num_outliers,
        np.array(
            [
                [1145, 1137, 1158],
                [1227, 878, 1290],
                [872, 923, 1158],
                [860, 1111, 1381],
                [926, 1025, 954],
                [1205, 1356, 1600],
            ]
        ),
    )
Esempio n. 5
0
def test_process_streams():
    # Loma Prieta test station (nc216859)

    data_files, origin = read_data_dir("geonet", "us1000778i", "*.V1A")
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    sc.describe()

    config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml"))

    test = process_streams(sc, origin, config=config)

    logging.info(f"Testing trace: {test[0][1]}")

    assert len(test) == 3
    assert len(test[0]) == 3
    assert len(test[1]) == 3
    assert len(test[2]) == 3

    # Apparently the traces end up in a different order on the Travis linux
    # container than on my local mac. So testing individual traces need to
    # not care about trace order.

    trace_maxes = np.sort(
        [np.max(np.abs(t.data)) for t in test.select(station="HSES")[0]])

    np.testing.assert_allclose(trace_maxes,
                               np.array([157.812449, 240.379521, 263.601519]),
                               rtol=1e-5)
def test_signal_split2():
    datafiles, origin = read_data_dir("knet", "us2000cnnl", "AOM0011801241951*")
    streams = []
    for datafile in datafiles:
        streams += read_data(datafile)

    streams = StreamCollection(streams)
    stream = streams[0]
    signal_split(stream, origin)

    cmpdict = {
        "split_time": UTCDateTime(2018, 1, 24, 10, 51, 39, 841483),
        "method": "p_arrival",
        "picker_type": "travel_time",
    }

    pdict = stream[0].getParameter("signal_split")
    for key, value in cmpdict.items():
        v1 = pdict[key]
        # because I can't figure out how to get utcdattime __eq__
        # operator to behave as expected with the currently installed
        # version of obspy, we're going to pedantically compare two
        # of these objects...
        if isinstance(value, UTCDateTime):
            # value.__precision = 4
            # v1.__precision = 4
            assert value.year == v1.year
            assert value.month == v1.month
            assert value.day == v1.day
            assert value.hour == v1.hour
            assert value.minute == v1.minute
            assert value.second == v1.second
        else:
            assert v1 == value
def test_max_calc():
    data_files, _ = read_data_dir("clipping_samples", "hv70907436", "*.mseed")
    data_files.sort()
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    st_max_amps = []
    for st in sc:
        max_amp_method = Max_Amp(st)
        st_max_amps.append(max_amp_method.max_amp)

    np.testing.assert_allclose(
        st_max_amps,
        np.array([
            8553230.5231931563,
            8379389.0031664912,
            8122003.3022054331,
            8698976.5524693076,
            8509963.5836342424,
            8766397.4644186441,
        ]),
        rtol=1e-5,
    )
def test_process_streams():
    # Loma Prieta test station (nc216859)

    data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    sc.describe()

    test = process_streams(sc, origin)

    logging.info('Testing trace: %s' % test[0][1])

    assert len(test) == 3
    assert len(test[0]) == 3
    assert len(test[1]) == 3
    assert len(test[2]) == 3

    # Apparently the traces end up in a different order on the Travis linux
    # container than on my local mac. So testing individual traces need to
    # not care about trace order.

    trace_maxes = np.sort([np.max(np.abs(t.data)) for t in test[0]])

    np.testing.assert_allclose(
        trace_maxes,
        np.array([157.81975508, 240.33718094, 263.67804256]),
        rtol=1e-5
    )
def test_all_num_outliers():
    data_files, _ = read_data_dir("clipping_samples", "hv70907436", "*.mseed")
    data_files.sort()
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    num_outliers = []
    for st in sc:
        std_dev_method = Std_Dev(st, test_all=True)
        num_outliers.append(std_dev_method.num_outliers)

    np.testing.assert_equal(
        num_outliers,
        np.array([
            [0, 0, 0],
            [0, 1086, 23],
            [131, 252, 4482],
            [1018, 76, 0],
            [60, 1314, 1511],
            [0, 0, 4862],
        ]),
    )
def test_all_num_outliers():
    data_files, _ = read_data_dir("clipping_samples", "hv70907436", "*.mseed")
    data_files.sort()
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    num_outliers = []
    for st in sc:
        ping_method = Ping(st, test_all=True)
        num_outliers.append(ping_method.num_outliers)

    np.testing.assert_equal(
        num_outliers,
        np.array(
            [
                [239, 0, 22],
                [26, 199, 30],
                [0, 0, 0],
                [6, 8, 2],
                [133, 341, 22],
                [145, 264, 29],
            ]
        ),
    )
def test_correct_baseline():

    data_files, origin = read_data_dir("geonet", "us1000778i", "*.V1A")
    data_files.sort()
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)
    final_acc = []

    config = get_config()
    config["integration"]["frequency"] = True

    for st in sc:
        for tr in st:
            tmp_tr = correct_baseline(tr, config=config)
            final_acc.append(tmp_tr.data[-1])

    target_final_acc = np.array([
        0.599829,
        0.717284,
        -1.548017,
        0.377616,
        -0.685688,
        0.112147,
        0.024594,
        0.004697,
        -0.013296,
    ])

    np.testing.assert_allclose(final_acc, target_final_acc, atol=1e-6)
Esempio n. 12
0
def test_stream_params():
    eventid = 'us1000778i'
    datafiles, event = read_data_dir(
        'geonet',
        eventid,
        '20161113_110259_WTMC_20.V1A'
    )
    tdir = tempfile.mkdtemp()
    streams = []
    try:
        streams += read_data(datafiles[0])
        statsdict = {'name': 'Fred', 'age': 34}
        streams[0].setStreamParam('stats', statsdict)
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, streams, label='stats')
        outstreams = workspace.getStreams(event.id, labels=['stats'])
        cmpdict = outstreams[0].getStreamParam('stats')
        assert cmpdict == statsdict
        workspace.close()
    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
def test_signal_split2():
    datafiles, origin = read_data_dir(
        'knet', 'us2000cnnl', 'AOM0011801241951*')
    streams = []
    for datafile in datafiles:
        streams += read_data(datafile)

    streams = StreamCollection(streams)
    stream = streams[0]
    signal_split(stream, origin)

    cmpdict = {
        'split_time': UTCDateTime(2018, 1, 24, 10, 51, 39, 841483),
        'method': 'p_arrival',
        'picker_type': 'travel_time'}

    pdict = stream[0].getParameter('signal_split')
    for key, value in cmpdict.items():
        v1 = pdict[key]
        # because I can't figure out how to get utcdattime __eq__
        # operator to behave as expected with the currently installed
        # version of obspy, we're going to pedantically compare two
        # of these objects...
        if isinstance(value, UTCDateTime):
            #value.__precision = 4
            #v1.__precision = 4
            assert value.year == v1.year
            assert value.month == v1.month
            assert value.day == v1.day
            assert value.hour == v1.hour
            assert value.minute == v1.minute
            assert value.second == v1.second
        else:
            assert v1 == value
def test_velocity():
    datafiles, _ = read_data_dir("geonet", "us1000778i", "20161113_110259_WTMC_20.V2A")
    acc_file = datafiles[0]
    acc = read_data(acc_file)[0]
    target_v = acc.copy().integrate()[0]
    v = get_velocity(acc)
    np.testing.assert_allclose(v[0], target_v)
def test_all_max_calc():
    data_files, _ = read_data_dir("clipping_samples", "hv70907436", "*.mseed")
    data_files.sort()
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    st_max_amps = []
    for st in sc:
        max_amp_method = Max_Amp(st, test_all=True)
        st_max_amps.append(max_amp_method.max_amp)

    np.testing.assert_allclose(
        st_max_amps,
        np.array([
            [8553230.5231931563, 5621557.4998055659, 8344327.3850897169],
            [8379389.0031664912, 10090978.868285095, 8463705.7919004504],
            [8122003.3022054331, 8148959.0193878114, 8989844.6071329378],
            [8698976.5524693076, 8435914.830898283, 8204508.3222043216],
            [8509963.5836342424, 10646801.251152713, 8805642.5964668635],
            [8766397.4644186441, 8496598.1711016055, 11525175.173268152],
        ]),
        rtol=1e-5,
    )
Esempio n. 16
0
def test_asdf():
    eventid = 'us1000778i'
    datafiles, origin = read_data_dir('geonet', eventid, '*.V1A')
    event = get_event_object(origin)
    tdir = tempfile.mkdtemp()
    try:
        config = get_config()
        tfile = os.path.join(tdir, 'test.hdf')
        raw_streams = []
        for dfile in datafiles:
            raw_streams += read_data(dfile)

        write_asdf(tfile, raw_streams, event)

        assert is_asdf(tfile)
        assert not is_asdf(datafiles[0])

        outstreams = read_asdf(tfile)
        assert len(outstreams) == len(raw_streams)

        write_asdf(tfile, raw_streams, event, label='foo')
        outstreams2 = read_asdf(tfile, label='foo')
        assert len(outstreams2) == len(raw_streams)

    except Exception:
        assert 1 == 2
    finally:
        shutil.rmtree(tdir)
def test_get_vel():
    data_files, origin = read_data_dir("geonet", "us1000778i", "*.V1A")
    data_files.sort()
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    config = get_config()
    config["integration"]["frequency"] = True

    final_vel = []
    for st in sc:
        for tr in st:
            tmp_tr = get_vel(tr, config=config)
            final_vel.append(tmp_tr.data[-1])

    target_final_vel = np.array([
        -2.182293e-03,
        -1.417545e-03,
        2.111492e-03,
        -9.395322e-04,
        1.662219e-03,
        -2.690978e-04,
        1.376186e-04,
        -7.358185e-05,
        1.741465e-05,
    ])

    np.testing.assert_allclose(final_vel, target_final_vel, atol=1e-6)
Esempio n. 18
0
def test_process_streams():
    # Loma Prieta test station (nc216859)

    data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    sc.describe()

    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))

    test = process_streams(sc, origin, config=config)

    logging.info('Testing trace: %s' % test[0][1])

    assert len(test) == 3
    assert len(test[0]) == 3
    assert len(test[1]) == 3
    assert len(test[2]) == 3

    # Apparently the traces end up in a different order on the Travis linux
    # container than on my local mac. So testing individual traces need to
    # not care about trace order.

    trace_maxes = np.sort(
        [np.max(np.abs(t.data)) for t in test.select(station='HSES')[0]])

    np.testing.assert_allclose(trace_maxes,
                               np.array(
                                   [157.81975508, 240.33718094, 263.67804256]),
                               rtol=1e-5)
def test_integrate_taper():
    data_files, origin = read_data_dir("geonet", "us1000778i", "*.V1A")
    data_files.sort()
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    config = get_config()
    config["integration"]["taper"]["taper"] = True

    final_vel = []
    for st in sc:
        for tr in st:
            tmp_tr = tr.integrate(config=config)
            final_vel.append(tmp_tr.data[-1])

    target_final_vel = np.array([
        3.896186e00,
        -4.901823e00,
        -5.722080e-01,
        1.621672e-01,
        -1.654317e-01,
        -8.242356e-04,
        -1.482590e-02,
        1.504334e-01,
        1.021050e-01,
    ])

    np.testing.assert_allclose(final_vel, target_final_vel, atol=1e-6)
Esempio n. 20
0
def test_to_dataframe():
    cwb_files, event = read_data_dir('geonet', 'nz2018p115908')
    st = read_data(cwb_files[0])[0]
    df1 = streams_to_dataframe([st, st], event=event)
    np.testing.assert_array_equal(df1.STATION.tolist(), ['WPWS', 'WPWS'])
    np.testing.assert_array_equal(df1.NAME.tolist(),
                                  ['Waipawa_District_Council', 'Waipawa_District_Council'])
    target_levels = ['ELEVATION', 'EPICENTRAL_DISTANCE',
                     'GREATER_OF_TWO_HORIZONTALS', 'H1', 'H2', 'Z',
                     'HYPOCENTRAL_DISTANCE', 'LAT', 'LON', 'NAME', 'NETID', 'SOURCE',
                     'STATION', '', 'PGA', 'PGV', 'SA(0.3)', 'SA(1.0)', 'SA(3.0)']

    # let's use sets to make sure all the columns are present in whatever order
    cmp1 = set(['ELEVATION', 'EPICENTRAL_DISTANCE',
                'GREATER_OF_TWO_HORIZONTALS', 'H1', 'H2',
                'HYPOCENTRAL_DISTANCE', 'LAT', 'LON',
                'NAME', 'NETID', 'SOURCE', 'STATION', 'Z'])
    cmp2 = set(['', 'PGA', 'PGV', 'SA(0.3)', 'SA(1.0)', 'SA(3.0)'])
    header1 = set(df1.columns.levels[0])
    header2 = set(df1.columns.levels[1])
    assert header1 == cmp1
    assert header2 == cmp2
    # idx = 0
    # for s in df1.columns.levels:
    #     for col in s:
    #         try:
    #             assert col == target_levels[idx]
    #         except Exception as e:
    #             x = 1
    #         idx += 1

    # This was previously not being tested
    """imts = ['PGA', 'PGV', 'SA(0.3)', 'SA(1.0)', 'SA(3.0)']
def render_concise(files, save=False):
    errors = pd.DataFrame(columns=ERROR_COLUMNS)
    df = pd.DataFrame(columns=COLUMNS, index=None)
    folders = []
    for filename in files:
        fpath, fname = os.path.split(filename)
        if fpath not in folders:
            sys.stderr.write('Parsing files from subfolder %s...\n' % fpath)
            folders.append(fpath)
        try:
            streams = read_data(filename)
            for stream in streams:
                tdf = get_dataframe(filename, stream)
                df = pd.concat([df, tdf], axis=0)
        except BaseException as e:
            row = pd.Series(index=ERROR_COLUMNS)
            row['Filename'] = os.path.abspath(filename)
            row['Error'] = str(e)
            errors = errors.append(row, ignore_index=True)
            continue

    # organize dataframe by network, station, and channel
    df = df.sort_values(['Network', 'Station', 'Channel'])
    if not save:
        print(df.to_string(index=False))

    return (df, errors)
def test_asdf():
    eventid = 'us1000778i'
    datafiles, event = read_data_dir('geonet', eventid, '*.V1A')
    tdir = tempfile.mkdtemp()
    try:
        config = get_config()
        tfile = os.path.join(tdir, 'test.hdf')
        raw_streams = []
        for dfile in datafiles:
            raw_streams += read_data(dfile)

        write_asdf(tfile, raw_streams, event)

        assert is_asdf(tfile)
        assert not is_asdf(datafiles[0])

        outstreams = read_asdf(tfile)
        assert len(outstreams) == len(raw_streams)

        write_asdf(tfile, raw_streams, event, label='foo')
        outstreams2 = read_asdf(tfile, label='foo')
        assert len(outstreams2) == len(raw_streams)

    except Exception:
        assert 1 == 2
    finally:
        shutil.rmtree(tdir)
def test_nnet():

    conf = get_config()

    update = {
        'processing': [
            {'detrend': {'detrending_method': 'demean'}},
            # {'check_zero_crossings': {'min_crossings': 10}},
            {'detrend': {'detrending_method': 'linear'}},
            {'compute_snr': {'bandwidth': 20.0,
                             'check': {'max_freq': 5.0,
                                       'min_freq': 0.2,
                                       'threshold': 3.0}}},
            {'NNet_QA': {'acceptance_threshold': 0.5,
                         'model_name': 'CantWell'}}
        ]
    }
    update_dict(conf, update)

    data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)
    test = process_streams(sc, origin, conf)
    tstream = test.select(station='HSES')[0]
    allparams = tstream.getStreamParamKeys()
    nnet_dict = tstream.getStreamParam('nnet_qa')
    np.testing.assert_allclose(
        nnet_dict['score_HQ'], 0.99321798811740059, rtol=1e-3)
Esempio n. 24
0
def test_arias():
    ddir = os.path.join('data', 'testdata')
    datadir = pkg_resources.resource_filename('gmprocess', ddir)
    data_file = os.path.join(datadir, 'arias_data.json')
    with open(data_file, 'rt') as f:
        jdict = json.load(f)

    time = np.array(jdict['time'])
    # input output is m/s/s
    acc = np.array(jdict['acc']) / 100
    target_IA = jdict['ia']
    delta = time[2] - time[1]
    sr = 1 / delta
    header = {
        'delta': delta,
        'sampling_rate': sr,
        'npts': len(acc),
        'units': 'm/s/s',
        'channel': 'HN1',
        'standard': {
            'corner_frequency': np.nan,
            'station_name': '',
            'source': 'json',
            'source_file': '',
            'instrument': '',
            'instrument_period': np.nan,
            'source_format': 'json',
            'comments': '',
            'structure_type': '',
            'sensor_serial_number': '',
            'process_level': 'raw counts',
            'process_time': '',
            'horizontal_orientation': np.nan,
            'units': 'acc',
            'instrument_damping': np.nan
        }
    }
    # input is cm/s/s output is m/s/s
    trace = StationTrace(data=acc * 100, header=header)
    trace2 = trace.copy()
    trace2.stats.channel = 'HN2'
    stream = StationStream([trace, trace2])
    station = StationSummary.from_stream(stream, ['ARITHMETIC_MEAN'],
                                         ['arias'])
    pgms = station.pgms
    Ia = pgms[(pgms.IMT == 'ARIAS')
              & (pgms.IMC == 'ARITHMETIC_MEAN')].Result.tolist()[0]
    # the target has only one decimal place and is in cm/s/s
    Ia = Ia * 100
    np.testing.assert_almost_equal(Ia, target_IA, decimal=1)

    # Test other components
    data_files, _ = read_data_dir('cwb', 'us1000chhc', '2-ECU.dat')
    stream = read_data(data_files[0])[0]
    station = StationSummary.from_stream(stream, [
        'channels', 'gmrotd', 'rotd50', 'greater_of_two_horizontals',
        'ARITHMETIC_MEAN'
    ], ['arias'])
    stream = StationSummary.from_stream(stream, ['gmrotd50'], ['arias'])
    assert stream.pgms.Result.tolist() == []
Esempio n. 25
0
def test_asdf():
    eventid = 'us1000778i'
    datafiles, event = read_data_dir('geonet', eventid, '*.V1A')
    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        raw_streams = []
        for dfile in datafiles:
            raw_streams += read_data(dfile)

        write_asdf(tfile, raw_streams, event)

        assert is_asdf(tfile)
        assert not is_asdf(datafiles[0])

        outstreams = read_asdf(tfile)
        assert len(outstreams) == len(raw_streams)

        write_asdf(tfile, raw_streams, event, label='foo')
        outstreams2 = read_asdf(tfile, label='foo')
        assert len(outstreams2) == len(raw_streams)

    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
Esempio n. 26
0
def generate_workspace():
    """Generate simple HDF5 with ASDF layout for testing.
    """
    PCOMMANDS = [
        'assemble',
        'process',
    ]
    EVENTID = 'us1000778i'
    LABEL = 'ptest'
    datafiles, event = read_data_dir('geonet', EVENTID, '*.V1A')

    tdir = tempfile.mkdtemp()
    tfilename = os.path.join(tdir, 'workspace.h5')

    raw_data = []
    for dfile in datafiles:
        raw_data += read_data(dfile)
    write_asdf(tfilename, raw_data, event, label="unprocessed")
    del raw_data

    config = get_config()
    workspace = StreamWorkspace.open(tfilename)
    raw_streams = workspace.getStreams(EVENTID, labels=['unprocessed'])
    pstreams = process_streams(raw_streams, event, config=config)
    workspace.addStreams(event, pstreams, label=LABEL)
    workspace.calcMetrics(event.id, labels=[LABEL], config=config)

    return tfilename
Esempio n. 27
0
def directory_to_dataframe(directory, imcs=None, imts=None, origin=None, process=True):
    """Extract peak ground motions from list of Stream objects.
    Note: The PGM columns underneath each channel will be variable
    depending on the units of the Stream being passed in (velocity
    sensors can only generate PGV) and on the imtlist passed in by
    user. Spectral acceleration columns will be formatted as SA(0.3)
    for 0.3 second spectral acceleration, for example.
    Args:
        directory (str): Directory of ground motion files (streams).
        imcs (list): Strings designating desired components to create
                in table.
        imts (list): Strings designating desired PGMs to create
                in table.
        origin (obspy.core.event.Origin): Defines the focal time and
                geographical location of an earthquake hypocenter.
                Default is None.
        process (bool): Process the stream using the config file.
    Returns:
        DataFrame: Pandas dataframe containing columns:
            - STATION Station code.
            - NAME Text description of station.
            - LOCATION Two character location code.
            - SOURCE Long form string containing source network.
            - NETWORK Short network code.
            - LAT Station latitude
            - LON Station longitude
            - DISTANCE Epicentral distance (km) (if epicentral lat/lon provided)
            - HN1 East-west channel (or H1) (multi-index with pgm columns):
                - PGA Peak ground acceleration (%g).
                - PGV Peak ground velocity (cm/s).
                - SA(0.3) Pseudo-spectral acceleration at 0.3 seconds (%g).
                - SA(1.0) Pseudo-spectral acceleration at 1.0 seconds (%g).
                - SA(3.0) Pseudo-spectral acceleration at 3.0 seconds (%g).
            - HN2 North-south channel (or H2) (multi-index with pgm columns):
                - PGA Peak ground acceleration (%g).
                - PGV Peak ground velocity (cm/s).
                - SA(0.3) Pseudo-spectral acceleration at 0.3 seconds (%g).
                - SA(1.0) Pseudo-spectral acceleration at 1.0 seconds (%g).
                - SA(3.0) Pseudo-spectral acceleration at 3.0 seconds (%g).
            - HNZ Vertical channel (or HZ) (multi-index with pgm columns):
                - PGA Peak ground acceleration (%g).
                - PGV Peak ground velocity (cm/s).
                - SA(0.3) Pseudo-spectral acceleration at 0.3 seconds (%g).
                - SA(1.0) Pseudo-spectral acceleration at 1.0 seconds (%g).
                - SA(3.0) Pseudo-spectral acceleration at 3.0 seconds (%g).
            - GREATER_OF_TWO_HORIZONTALS (multi-index with pgm columns):
                - PGA Peak ground acceleration (%g).
                - PGV Peak ground velocity (cm/s).
                - SA(0.3) Pseudo-spectral acceleration at 0.3 seconds (%g).
                - SA(1.0) Pseudo-spectral acceleration at 1.0 seconds (%g).
                - SA(3.0) Pseudo-spectral acceleration at 3.0 seconds (%g).
    """
    streams = []
    for filepath in glob.glob(os.path.join(directory, "*")):
        streams += read_data(filepath)
    grouped_streams = StreamCollection(streams)

    dataframe = streams_to_dataframe(
        grouped_streams, imcs=imcs, imts=imts, origin=origin)
    return dataframe
def test_read():
    cosmos_files, _ = read_data_dir(
        'cosmos', 'ci14155260', 'Cosmos12TimeSeriesTest.v1')
    cwb_files, _ = read_data_dir(
        'cwb', 'us1000chhc', '1-EAS.dat')
    dmg_files, _ = read_data_dir(
        'dmg', 'nc71734741', 'CE89146.V2')
    geonet_files, _ = read_data_dir(
        'geonet', 'us1000778i', '20161113_110259_WTMC_20.V1A')
    knet_files, _ = read_data_dir(
        'knet', 'us2000cnnl', 'AOM0011801241951.EW')
    smc_files, _ = read_data_dir(
        'smc', 'nc216859', '0111a.smc')

    file_dict = {}
    file_dict['cosmos'] = cosmos_files[0]
    file_dict['cwb'] = cwb_files[0]
    file_dict['dmg'] = dmg_files[0]
    file_dict['geonet'] = geonet_files[0]
    file_dict['knet'] = knet_files[0]
    file_dict['smc'] = smc_files[0]

    for file_format in file_dict:
        file_path = file_dict[file_format]
        assert _get_format(file_path) == file_format
        assert _validate_format(file_path, file_format) == file_format

    assert _validate_format(file_dict['knet'], 'smc') == 'knet'
    assert _validate_format(file_dict['dmg'], 'cosmos') == 'dmg'
    assert _validate_format(file_dict['cosmos'], 'invalid') == 'cosmos'

    for file_format in file_dict:
        try:
            stream = read_data(file_dict[file_format], file_format)[0]
        except Exception as e:
            pass
        assert stream[0].stats.standard['source_format'] == file_format
        stream = read_data(file_dict[file_format])[0]
        assert stream[0].stats.standard['source_format'] == file_format
    # test exception
    try:
        file_path = smc_files[0].replace('0111a.smc', 'not_a_file.smc')
        read_data(file_path)[0]
        success = True
    except BaseException:
        success = False
    assert success == False
def test_velocity():
    datafiles, _ = read_data_dir(
        'geonet', 'us1000778i', '20161113_110259_WTMC_20.V2A')
    acc_file = datafiles[0]
    acc = read_data(acc_file)[0]
    target_v = acc.copy().integrate()[0]
    v = get_velocity(acc)
    np.testing.assert_allclose(v[0], target_v)
def test_plot():
    # read in data
    datafiles, _ = read_data_dir("cwb", "us1000chhc")
    streams = []
    for filename in datafiles:
        streams += read_data(filename)
    # One plot arias
    axes = plot_arias(streams[3])
    assert len(axes) == 3

    # Multiplot arias
    axs = matplotlib.pyplot.subplots(len(streams), 3, figsize=(15, 10))[1]
    axs = axs.flatten()
    idx = 0
    for stream in streams:
        axs = plot_arias(
            stream,
            axes=axs,
            axis_index=idx,
            minfontsize=15,
            show_maximum=False,
            title="18km NNE of Hualian, Taiwan",
        )
        idx += 3

    # One plot durations
    durations = [(0.05, 0.75), (0.2, 0.8), (0.05, 0.95)]
    axes = plot_durations(streams[3], durations)
    assert len(axes) == 3

    # Multiplot durations
    axs = matplotlib.pyplot.subplots(len(streams), 3, figsize=(15, 10))[1]
    axs = axs.flatten()
    idx = 0
    for stream in streams:
        axs = plot_durations(
            stream,
            durations,
            axes=axs,
            axis_index=idx,
            minfontsize=15,
            title="18km NNE of Hualian, Taiwan",
        )
        idx += 3

    # Moveout plots
    epicenter_lat = 24.14
    epicenter_lon = 121.69
    plot_moveout(
        streams,
        epicenter_lat,
        epicenter_lon,
        "1",
        figsize=(15, 10),
        minfontsize=16,
        normalize=True,
        factor=0.1,
    )
def directory_to_streams(directory, config=None):
    """Read in a directory of data to a list of streams.

    Note:
    If the directory only includes files that are readable by this library
    then the task is rather simple. However, often times data directories
    include random subdirectories and/or zip files, which we try to crawl in
    a sensible fashion.

    Args:
        directory (str):
            Directory of ground motion files (streams).
        config (dict):
            Configuration options.

    Returns:
        tuple: (List of obspy streams,
                List of unprocessed files,
                List of errors associated with trying to read unprocessed
                files).
    """
    # Use a temp dir so that we don't modify data on disk since that may not be
    # expected or desired in all cases. We create the temporary directory in
    # the parent directory, which permits using shutil.copytree to duplicate
    # the data prior to processing.
    intermediate_dir = tempfile.mkdtemp(dir=os.path.dirname(directory))
    temp_dir = os.path.join(intermediate_dir, "directory_to_streams")
    try:
        shutil.copytree(directory, temp_dir)
        flatten_directory(temp_dir)
        # ---------------------------------------------------------------------
        # Read streams
        # ---------------------------------------------------------------------
        streams = []
        unprocessed_files = []
        unprocessed_file_errors = []
        for file_path in glob.glob(os.path.join(temp_dir, "*")):
            file_name = os.path.basename(file_path)
            file_ext = os.path.splitext(file_name)[1].lower()
            if file_ext not in EXT_IGNORE:
                try:
                    logging.debug(f"Attempting to read: {file_path}")
                    streams += read_data(file_path, config=config)
                except BaseException as ex:
                    logging.info(f"Failed to read file: {file_name}")
                    unprocessed_files += [file_path]
                    unprocessed_file_errors += [ex]

    except BaseException as e:
        raise e
    finally:
        try:
            shutil.rmtree(intermediate_dir)
        except OSError:
            shutil.rmtree(intermediate_dir)

    return streams, unprocessed_files, unprocessed_file_errors
Esempio n. 32
0
def test_dwt_denoise():
    """ Check that sample data fed into dwt_denoise() can be processed and
    that the returned signal is reasonable"""

    # Loma Prieta test station (nc216859)
    data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for f in data_files:
        streams += read_data(f)
Esempio n. 33
0
def test_read():
    config = get_config()
    cosmos_files, _ = read_data_dir("cosmos", "ci14155260", "Cosmos12TimeSeriesTest.v1")
    cwb_files, _ = read_data_dir("cwb", "us1000chhc", "1-EAS.dat")
    dmg_files, _ = read_data_dir("dmg", "nc71734741", "CE89146.V2")
    geonet_files, _ = read_data_dir(
        "geonet", "us1000778i", "20161113_110259_WTMC_20.V1A"
    )
    knet_files, _ = read_data_dir("knet", "us2000cnnl", "AOM0011801241951.EW")
    smc_files, _ = read_data_dir("smc", "nc216859", "0111a.smc")

    file_dict = {}
    file_dict["cosmos"] = cosmos_files[0]
    file_dict["cwb"] = cwb_files[0]
    file_dict["dmg"] = dmg_files[0]
    file_dict["geonet"] = geonet_files[0]
    file_dict["knet"] = knet_files[0]
    file_dict["smc"] = smc_files[0]

    for file_format in file_dict:
        file_path = file_dict[file_format]
        assert _get_format(file_path, config) == file_format
        assert _validate_format(file_path, config, file_format) == file_format

    assert _validate_format(file_dict["knet"], config, "smc") == "knet"
    assert _validate_format(file_dict["dmg"], config, "cosmos") == "dmg"
    assert _validate_format(file_dict["cosmos"], config, "invalid") == "cosmos"

    for file_format in file_dict:
        try:
            stream = read_data(file_dict[file_format], config, file_format)[0]
        except Exception as e:
            pass
        assert stream[0].stats.standard["source_format"] == file_format
        stream = read_data(file_dict[file_format])[0]
        assert stream[0].stats.standard["source_format"] == file_format
    # test exception
    try:
        file_path = smc_files[0].replace("0111a.smc", "not_a_file.smc")
        read_data(file_path)[0]
        success = True
    except BaseException:
        success = False
    assert success == False
def test_get_disp():

    data_files, origin = read_data_dir("geonet", "us1000778i", "*.V1A")
    data_files.sort()
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    config = get_config()
    config["integration"]["frequency"] = True

    final_disp = []
    for st in sc:
        for tr in st:
            tmp_tr = get_disp(tr, config=config)
            final_disp.append(tmp_tr.data[-1])

    target_final_disp = np.array([
        -0.07689,
        0.082552,
        -0.024509,
        -0.00047,
        -0.000257,
        -0.000152,
        -0.003425,
        0.000671,
        0.000178,
    ])

    np.testing.assert_allclose(final_disp, target_final_disp, atol=1e-6)

    config["integration"]["frequency"] = False
    config["integration"]["initial"] = 0.0
    config["integration"]["demean"] = True

    final_disp = []
    for st in sc:
        for tr in st:
            tmp_tr = get_disp(tr, config=config)
            final_disp.append(tmp_tr.data[-1])

    target_final_disp = np.array([
        -0.076882,
        0.082549,
        -0.024512,
        -0.000469,
        -0.000259,
        -0.000152,
        -0.003425,
        0.000672,
        0.000178,
    ])

    np.testing.assert_allclose(final_disp, target_final_disp, atol=1e-6)
def test_arias():
    ddir = os.path.join('data', 'testdata')
    datadir = pkg_resources.resource_filename('gmprocess', ddir)
    data_file = os.path.join(datadir, 'arias_data.json')
    with open(data_file, 'rt') as f:
        jdict = json.load(f)

    time = np.array(jdict['time'])
    # input output is m/s/s
    acc = np.array(jdict['acc']) / 100
    target_IA = jdict['ia']
    delta = time[2] - time[1]
    sr = 1 / delta
    header = {
        'delta': delta,
        'sampling_rate': sr,
        'npts': len(acc),
        'units': 'm/s/s',
        'channel': 'HN1',
        'standard': {'corner_frequency': np.nan,
            'station_name': '',
            'source': 'json',
            'source_file': '',
            'instrument': '',
            'instrument_period': np.nan,
            'source_format': 'json',
            'comments': '',
            'structure_type': '',
            'sensor_serial_number': '',
            'process_level': 'raw counts',
            'process_time': '',
            'horizontal_orientation': np.nan,
            'units': 'acc',
            'instrument_damping': np.nan}
    }
    # input is cm/s/s output is m/s/s
    trace = StationTrace(data=acc * 100, header=header)
    trace2 = trace.copy()
    trace2.stats.channel = 'HN2'
    stream = StationStream([trace, trace2])
    station = StationSummary.from_stream(stream, ['ARITHMETIC_MEAN'], ['arias'])
    pgms = station.pgms
    Ia = pgms[(pgms.IMT == 'ARIAS') & (pgms.IMC == 'ARITHMETIC_MEAN')].Result.tolist()[0]
    # the target has only one decimal place and is in cm/s/s
    Ia = Ia * 100
    np.testing.assert_almost_equal(Ia, target_IA, decimal=1)

    # Test other components
    data_files, _ = read_data_dir('cwb', 'us1000chhc', '2-ECU.dat')
    stream = read_data(data_files[0])[0]
    station = StationSummary.from_stream(stream,
                                         ['channels', 'gmrotd', 'rotd50',
                                             'greater_of_two_horizontals', 'ARITHMETIC_MEAN'],
                                         ['arias'])
    stream = StationSummary.from_stream(stream, ['gmrotd50'], ['arias'])
    assert stream.pgms.Result.tolist() == []
def get_streams():
    datafiles1, origin1 = read_data_dir('cwb', 'us1000chhc', '*.dat')
    datafiles2, origin2 = read_data_dir('nsmn', 'us20009ynd', '*.txt')
    datafiles3, origin3 = read_data_dir('geonet', 'us1000778i', '*.V1A')
    datafiles = datafiles1 + datafiles2 + datafiles3
    streams = []
    for datafile in datafiles:
        streams += read_data(datafile)

    return StreamCollection(streams)
Esempio n. 37
0
def test_duration575():
    ddir = os.path.join("data", "testdata", "cosmos", "us1000hyfh")
    datadir = pkg_resources.resource_filename("gmprocess", ddir)
    data_file = os.path.join(
        datadir, "us1000hyfh_akbmrp_AKBMR--n.1000hyfh.BNZ.--.acc.V2c")
    stream = read_data(data_file)[0]

    dur = Duration(stream, interval=[5, 75])

    np.testing.assert_allclose(dur.result["HN1"], 45.325, atol=1e-4, rtol=1e-4)
def test_get_nga_record_sequence_no():
    datafiles, _ = read_data_dir('usc', 'ci3144585', '017m30cc.y0a')
    st = read_data(datafiles[0])[0]

    # Test when a single record is found
    assert get_nga_record_sequence_no(st, 'Northridge-01') == 960

    # Test when no records are found
    assert np.isnan(get_nga_record_sequence_no(st, 'Northridge-01', 1))

    # Test when multiple records are found
    assert np.isnan(get_nga_record_sequence_no(st, 'Northridge-01', 10000))
def test_travel_time():
    datafiles, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for datafile in datafiles:
        streams += read_data(datafile)

    cmps = {'NZ.HSES.HN': 42.118045132851641,
            'NZ.WTMC.HN': 40.77244584723671,
            'NZ.THZ.HN': 42.025007954412246}
    for stream in streams:
        minloc, mean_snr = pick_travel(stream, origin)
        np.testing.assert_almost_equal(minloc, cmps[stream.get_id()])
def directory_to_streams(directory):
    """Read in a directory of data to a list of streams.

    Note:
    If the directory only includes files that are readable by this library
    then the task is rather simple. However, often times data directories
    include random subdirectories and/or zip files, which we try to crawl in
    a sensible fashion.

    Args:
        directory (str):
            Directory of ground motion files (streams).

    Returns:
        tuple: (List of obspy streams,
                List of unprocessed files,
                List of errors associated with trying to read unprocessed
                files).
    """

    # Use a temp dir so that we don't modify data on disk since that may not be
    # expected or desired in all cases.
    temp_dir = os.path.join(tempfile.mkdtemp(), 'directory_to_streams')
    try:
        shutil.copytree(directory, temp_dir)
        flatten_directory(temp_dir)
        # -------------------------------------------------------------------------
        # Read streams
        # -------------------------------------------------------------------------
        streams = []
        unprocessed_files = []
        unprocessed_file_errors = []
        for file_path in glob.glob(os.path.join(temp_dir, "*")):
            file_ext = os.path.splitext(file_path)[1].lower()
            if file_ext not in EXT_IGNORE:
                try:
                    logging.debug('Attempting to read: %s' % file_path)
                    streams += read_data(file_path)
                except Exception as ex:
                    unprocessed_files += [file_path]
                    unprocessed_file_errors += [ex]
    except Exception as e:
        raise e
    finally:
        shutil.rmtree(temp_dir)

    return streams, unprocessed_files, unprocessed_file_errors
def test_plot():
    # read in data
    datafiles, _ = read_data_dir('cwb', 'us1000chhc')
    streams = []
    for filename in datafiles:
        streams += read_data(filename)
    # One plot arias
    axes = plot_arias(streams[3])
    assert len(axes) == 3

    # Multiplot arias
    axs = matplotlib.pyplot.subplots(len(streams), 3, figsize=(15, 10))[1]
    axs = axs.flatten()
    idx = 0
    for stream in streams:
        axs = plot_arias(
            stream, axes=axs, axis_index=idx, minfontsize=15,
            show_maximum=False, title="18km NNE of Hualian, Taiwan")
        idx += 3

    # One plot durations
    durations = [(0.05, 0.75),
                 (0.2, 0.8),
                 (0.05, .95)]
    axes = plot_durations(streams[3], durations)
    assert len(axes) == 3

    # Multiplot durations
    axs = matplotlib.pyplot.subplots(len(streams), 3, figsize=(15, 10))[1]
    axs = axs.flatten()
    idx = 0
    for stream in streams:
        axs = plot_durations(
            stream, durations, axes=axs, axis_index=idx,
            minfontsize=15, title="18km NNE of Hualian, Taiwan")
        idx += 3

    # Moveout plots
    epicenter_lat = 24.14
    epicenter_lon = 121.69
    plot_moveout(streams, epicenter_lat, epicenter_lon, 'BN1',
                 cmap='nipy_spectral_r', figsize=(15, 10), minfontsize=16,
                 normalize=True, scale=10)
def test_to_dataframe():
    cwb_files, event = read_data_dir('geonet', 'nz2018p115908')
    st = read_data(cwb_files[0])[0]
    df1 = streams_to_dataframe([st, st], event=event)
    np.testing.assert_array_equal(df1.STATION.tolist(), ['WPWS', 'WPWS'])
    np.testing.assert_array_equal(df1.NAME.tolist(),
                                  ['Waipawa_District_Council', 'Waipawa_District_Council'])
    target_levels = ['ELEVATION', 'EPICENTRAL_DISTANCE',
                     'GREATER_OF_TWO_HORIZONTALS', 'HN1', 'HN2', 'HNZ',
                     'HYPOCENTRAL_DISTANCE', 'LAT', 'LON', 'NAME', 'NETID', 'SOURCE',
                     'STATION', '', 'PGA', 'PGV', 'SA(0.3)', 'SA(1.0)', 'SA(3.0)']
    idx = 0
    for s in df1.columns.levels:
        for col in s:
            assert col == target_levels[idx]
            idx += 1

    # This was previously not being tested
    """imts = ['PGA', 'PGV', 'SA(0.3)', 'SA(1.0)', 'SA(3.0)']
def test_acceleration():
    datafiles, _ = read_data_dir(
        'geonet', 'us1000778i', '20161113_110259_WTMC_20.V2A')
    acc_file = datafiles[0]
    acc = read_data(acc_file)[0]
    target_g = acc[0].data * GAL_TO_PCTG
    target_m = acc[0].data / 100
    target_cm = acc[0].data

    acc_g = get_acceleration(acc, units='%%g')
    assert acc_g[0].stats['units'] == '%%g'
    np.testing.assert_allclose(acc_g[0], target_g)

    acc_m = get_acceleration(acc, units='m/s/s')
    assert acc_m[0].stats['units'] == 'm/s/s'
    np.testing.assert_allclose(acc_m[0], target_m)

    acc_cm = get_acceleration(acc, units='cm/s/s')
    assert acc_cm[0].stats['units'] == 'cm/s/s'
    np.testing.assert_allclose(acc_cm[0], target_cm)
def test_p_pick():
    datapath = os.path.join('data', 'testdata', 'process')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)
    # Testing a strong motion channel
    tr = read(datadir + '/ALCTENE.UW..sac')[0]
    chosen_ppick = UTCDateTime('2001-02-28T18:54:47')
    ppick = PowerPicker(tr)
    ptime = tr.times('utcdatetime')[0] + ppick
    assert (abs(chosen_ppick - ptime)) < 0.2

    # Testing a broadband channel
    tr = read(datadir + '/HAWABHN.US..sac')[0]
    chosen_ppick = UTCDateTime('2003-01-15T03:42:12.5')
    ppick = PowerPicker(tr)
    ptime = tr.times('utcdatetime')[0] + ppick
    assert (abs(chosen_ppick - ptime)) < 0.2

    # Test a Northridge file that should fail to return a P-pick
    tr = read_data(datadir + '/017m30ah.m0a')[0][0]
    ppick = PowerPicker(tr)
    assert ppick == -1
def test_stream_params():
    eventid = 'us1000778i'
    datafiles, event = read_data_dir('geonet',
                                     eventid,
                                     '20161113_110259_WTMC_20.V1A')
    tdir = tempfile.mkdtemp()
    streams = []
    try:
        streams += read_data(datafiles[0])
        statsdict = {'name': 'Fred', 'age': 34}
        streams[0].setStreamParam('stats', statsdict)
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, streams, label='stats')
        outstreams = workspace.getStreams(event.id, labels=['stats'])
        cmpdict = outstreams[0].getStreamParam('stats')
        assert cmpdict == statsdict
        workspace.close()
    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
def test_free_field():
    data_files, origin = read_data_dir('kiknet', 'usp000hzq8')
    raw_streams = []
    for dfile in data_files:
        raw_streams += read_data(dfile)

    sc = StreamCollection(raw_streams)

    processed_streams = process_streams(sc, origin)

    # all of these streams should have failed for different reasons
    npassed = np.sum([pstream.passed for pstream in processed_streams])
    assert npassed == 0
    for pstream in processed_streams:
        is_free = pstream[0].free_field
        reason = ''
        for trace in pstream:
            if trace.hasParameter('failure'):
                reason = trace.getParameter('failure')['reason']
                break
        if is_free:
            assert reason.startswith('Failed')
        else:
            assert reason == 'Failed free field sensor check.'
def test_corner_frequencies():
    # Default config has 'constant' corner frequency method, so the need
    # here is to force the 'snr' method.
    data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    config = get_config()

    window_conf = config['windows']

    processed_streams = sc.copy()
    for st in processed_streams:
        if st.passed:
            # Estimate noise/signal split time
            event_time = origin.time
            event_lon = origin.longitude
            event_lat = origin.latitude
            st = signal_split(st, origin)

            # Estimate end of signal
            end_conf = window_conf['signal_end']
            event_mag = origin.magnitude
            print(st)
            st = signal_end(
                st,
                event_time=event_time,
                event_lon=event_lon,
                event_lat=event_lat,
                event_mag=event_mag,
                **end_conf
            )
            wcheck_conf = window_conf['window_checks']
            st = window_checks(
                st,
                min_noise_duration=wcheck_conf['min_noise_duration'],
                min_signal_duration=wcheck_conf['min_signal_duration']
            )

    pconfig = config['processing']

    # Run SNR check
    # I think we don't do this anymore.
    test = [
        d for d in pconfig if list(d.keys())[0] == 'compute_snr'
    ]
    snr_config = test[0]['compute_snr']
    for stream in processed_streams:
        stream = compute_snr(
            stream,
            **snr_config
        )

    # Run get_corner_frequencies
    test = [
        d for d in pconfig if list(d.keys())[0] == 'get_corner_frequencies'
    ]
    cf_config = test[0]['get_corner_frequencies']
    snr_config = cf_config['snr']

    lp = []
    hp = []
    for stream in processed_streams:
        if not stream.passed:
            continue
        stream = get_corner_frequencies(
            stream,
            method="snr",
            snr=snr_config
        )
        if stream[0].hasParameter('corner_frequencies'):
            cfdict = stream[0].getParameter('corner_frequencies')
            lp.append(cfdict['lowpass'])
            hp.append(cfdict['highpass'])
    np.testing.assert_allclose(
        np.sort(hp),
        [0.00751431, 0.01354455, 0.04250735],
        atol=1e-6
    )
def test_workspace():
    eventid = 'us1000778i'
    datafiles, event = read_data_dir('geonet', eventid, '*.V1A')
    tdir = tempfile.mkdtemp()
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=H5pyDeprecationWarning)
            warnings.filterwarnings("ignore", category=YAMLLoadWarning)
            warnings.filterwarnings("ignore", category=FutureWarning)
            config = get_config()
            tfile = os.path.join(tdir, 'test.hdf')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace(tfile)
            t1 = time.time()
            workspace.addStreams(event, raw_streams, label='raw')
            t2 = time.time()
            print('Adding %i streams took %.2f seconds' %
                  (len(raw_streams), (t2 - t1)))

            str_repr = workspace.__repr__()
            assert str_repr == 'Events: 1 Stations: 3 Streams: 3'

            eventobj = workspace.getEvent(eventid)
            assert eventobj.origins[0].latitude == event.origins[0].latitude
            assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag

            stations = workspace.getStations()
            assert sorted(stations) == ['hses', 'thz', 'wtmc']

            stations = workspace.getStations(eventid=eventid)
            assert sorted(stations) == ['hses', 'thz', 'wtmc']

            # test retrieving tags for an event that doesn't exist
            try:
                workspace.getStreamTags('foo')
            except KeyError:
                assert 1 == 1

            # test retrieving event that doesn't exist
            try:
                workspace.getEvent('foo')
            except KeyError:
                assert 1 == 1

            instream = None
            for stream in raw_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                assert 1 == 2
            outstream = workspace.getStreams(eventid,
                                             stations=['hses'],
                                             labels=['raw'])[0]
            compare_streams(instream, outstream)

            label_summary = workspace.summarizeLabels()
            assert label_summary.iloc[0]['Label'] == 'raw'
            assert label_summary.iloc[0]['Software'] == 'gmprocess'

            sc = StreamCollection(raw_streams)
            processed_streams = process_streams(sc, event, config=config)
            workspace.addStreams(event, processed_streams, 'processed')

            idlist = workspace.getEventIds()
            assert idlist[0] == eventid

            event_tags = workspace.getStreamTags(eventid)
            assert sorted(event_tags) == ['hses_processed', 'hses_raw',
                                          'thz_processed', 'thz_raw',
                                          'wtmc_processed', 'wtmc_raw']
            outstream = workspace.getStreams(eventid,
                                             stations=['hses'],
                                             labels=['processed'])[0]

            provenance = workspace.getProvenance(eventid, labels=['processed'])
            first_row = pd.Series({'Record': 'NZ.HSES.HN1',
                                   'Processing Step': 'Remove Response',
                                   'Step Attribute': 'input_units',
                                   'Attribute Value': 'counts'})

            last_row = pd.Series({'Record': 'NZ.WTMC.HNZ',
                                  'Processing Step': 'Lowpass Filter',
                                  'Step Attribute': 'number_of_passes',
                                  'Attribute Value': 2})
            assert provenance.iloc[0].equals(first_row)
            assert provenance.iloc[-1].equals(last_row)

            # compare the parameters from the input processed stream
            # to it's output equivalent
            instream = None
            for stream in processed_streams:
                if stream[0].stats.station.lower() == 'hses':
                    instream = stream
                    break
            if instream is None:
                assert 1 == 2
            compare_streams(instream, outstream)
            workspace.close()

            # read in data from a second event and stash it in the workspace
            eventid = 'nz2018p115908'
            datafiles, event = read_data_dir('geonet', eventid, '*.V2A')
            raw_streams = []
            for dfile in datafiles:
                raw_streams += read_data(dfile)

            workspace = StreamWorkspace.open(tfile)
            workspace.addStreams(event, raw_streams, label='foo')

            stations = workspace.getStations(eventid)

            eventids = workspace.getEventIds()
            assert eventids == ['us1000778i', 'nz2018p115908']
            instation = raw_streams[0][0].stats.station
            this_stream = workspace.getStreams(eventid,
                                               stations=[instation],
                                               labels=['foo'])[0]
            assert instation == this_stream[0].stats.station
            usid = 'us1000778i'
            inventory = workspace.getInventory(usid)
            codes = [station.code for station in inventory.networks[0].stations]
            assert sorted(codes) == ['HSES', 'THZ', 'WPWS', 'WTMC']

    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
def test_grouping():
    cwb_files, _ = read_data_dir('cwb', 'us1000chhc')
    cwb_streams = []
    for filename in cwb_files:
        cwb_streams += read_data(filename)
    cwb_streams = StreamCollection(cwb_streams)
    assert len(cwb_streams) == 5
    for stream in cwb_streams:
        assert len(stream) == 3

    # dmg
    dpath = os.path.join('data', 'testdata', 'dmg')
    dmg_path = pkg_resources.resource_filename('gmprocess', dpath)
    dmg_files = []
    for (path, dirs, files) in os.walk(dmg_path):
        for file in files:
            if file.endswith('V2'):
                fullfile = os.path.join(path, file)
                dmg_files.append(fullfile)

    dmg_streams = []
    for filename in dmg_files:
        if (not os.path.basename(filename).startswith('Bad') and
                not os.path.basename(filename).startswith('CE58667')):
            dmg_streams += read_data(filename)
    dmg_streams = StreamCollection(dmg_streams)
    assert len(dmg_streams) == 2
    for stream in dmg_streams:
        assert len(stream) == 3

    # geonet
    geonet_files, _ = read_data_dir('geonet', 'us1000778i', '*.V1A')
    geonet_streams = []
    for filename in geonet_files:
        geonet_streams += read_data(filename)
    geonet_streams = StreamCollection(geonet_streams)
    assert len(geonet_streams) == 3
    for stream in geonet_streams:
        assert len(stream) == 3
        assert len(stream.select(station=stream[0].stats.station)) == 3
        level = stream[0].stats.standard.process_level
        for trace in stream:
            assert trace.stats.standard.process_level == level

    # kiknet
    kiknet_files, _ = read_data_dir('kiknet', 'usp000a1b0')
    kiknet_streams = []
    for filename in kiknet_files:
        kiknet_streams += read_data(filename)
    kiknet_streams = StreamCollection(kiknet_streams)
    assert len(kiknet_streams) == 1
    for stream in kiknet_streams:
        assert len(stream) == 3
        assert len(stream.select(station=stream[0].stats.station)) == 3

    # knet
    knet_files, _ = read_data_dir('knet', 'us2000cnnl')
    knet_streams = []
    for filename in knet_files:
        knet_streams += read_data(filename)
    knet_streams = StreamCollection(knet_streams)
    assert len(knet_streams) == 9
    for stream in knet_streams:
        assert len(stream) == 3
        assert len(stream.select(station=stream[0].stats.station)) == 3
        pl = stream[0].stats.standard.process_level
        for trace in stream:
            assert trace.stats.standard.process_level == pl

    # smc
    smc_files, _ = read_data_dir('smc', 'nc216859', '0111*')
    smc_streams = []
    for filename in smc_files:
        smc_streams += read_data(filename, any_structure=True)
    smc_streams = StreamCollection(smc_streams)
    assert len(smc_streams) == 1
    for stream in smc_streams:
        if stream[0].stats.station == 'DVD0':
            assert len(stream) == 1
            assert len(stream.select(station=stream[0].stats.station)) == 1
        elif stream[0].stats.location == '01':
            assert len(stream) == 2
            assert len(stream.select(station=stream[0].stats.station)) == 2
        else:
            assert len(stream) == 3
            assert len(stream.select(station=stream[0].stats.station)) == 3

    # usc
    usc_files, _ = read_data_dir('usc', 'ci3144585')
    usc_streams = []
    for filename in usc_files:
        if os.path.basename(filename) != '017m30bt.s0a':
            usc_streams += read_data(filename)
    usc_streams = StreamCollection(usc_streams)
    assert len(usc_streams) == 3
    for stream in usc_streams:
        if stream[0].stats.station == '57':
            assert len(stream) == 1
        else:
            assert len(stream) == 3