예제 #1
0
def test_duplicates():
    datapath = os.path.join('data', 'testdata', 'duplicate', 'general')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)
    streams = directory_to_streams(datadir)[0]

    sc_bad = StreamCollection(streams=streams, handle_duplicates=False)
    # Check that we begin with having three streams
    assert len(sc_bad) == 3

    sc = StreamCollection(streams=streams, handle_duplicates=True)
    # Check that we now only have two streams in the StreamCollection
    assert len(sc) == 2
    assert len(sc[0]) == 3
    assert len(sc[1]) == 3

    # Check that we kept the 'CE' network and not the '--' network
    assert sc.select(station='23837')[0][0].stats.network == 'CE'

    # Now try changing the process levels of one of the streams
    for tr in sc_bad.select(network='--')[0]:
        tr.stats.standard.process_level = 'uncorrected physical units'
    for tr in sc_bad.select(network='CE')[0]:
        tr.stats.standard.process_level = 'corrected physical units'

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    # Now, we should have kept the '--' network and not the 'CE' network
    assert sc.select(station='23837')[0][0].stats.network == '--'

    # Now change the process preference order to see if we get back the
    # original results
    sc = StreamCollection(streams=sc_bad.streams,
                          handle_duplicates=True,
                          process_level_preference=['V2', 'V1'])
    assert sc.select(station='23837')[0][0].stats.network == 'CE'

    # Check that decreasing the distance tolerance results in streams now being
    # treated as different streams
    sc = StreamCollection(streams=streams,
                          max_dist_tolerance=10,
                          handle_duplicates=True)
    assert len(sc) == 3

    # Change the streams to have the same processing level
    for st in sc_bad:
        for tr in st:
            tr.stats.standard.process_level = 'uncorrected physical units'

    # Try changing the preferred format order
    sc = StreamCollection(streams=sc_bad.streams,
                          handle_duplicates=True,
                          format_preference=['dmg', 'cosmos'])
    assert sc.select(station='23837')[0][0].stats.network == '--'

    sc = StreamCollection(streams=sc_bad.streams,
                          handle_duplicates=True,
                          format_preference=['cosmos', 'dmg'])
    assert sc.select(station='23837')[0][0].stats.network == 'CE'

    # Set process level and format to be he same
    for st in sc_bad:
        for tr in st:
            tr.stats.standard.source_format = 'cosmos'

    # Check that we keep the CE network due to the bad starttime on --
    sczz = sc_bad.select(station='23837', network='--')
    for st in sczz:
        for tr in st:
            tr.stats.starttime = UTCDateTime(0)
    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station='23837')[0][0].stats.network == 'CE'

    for tr in sc_bad.select(network='CE')[0]:
        tr.stats.starttime = UTCDateTime(0)
    for tr in sc_bad.select(network='--')[0]:
        tr.stats.starttime = UTCDateTime(2018, 8, 29, 2, 33, 0)

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station='23837')[0][0].stats.network == '--'

    for tr in sc_bad.select(network='--')[0]:
        tr.stats.starttime = UTCDateTime(0)
        tr.trim(endtime=UTCDateTime(5))

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station='23837')[0][0].stats.network == 'CE'

    for tr in sc_bad.select(network='CE')[0]:
        tr.trim(endtime=UTCDateTime(2))

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station='23837')[0][0].stats.network == '--'

    for tr in sc_bad.select(network='--')[0]:
        tr.trim(endtime=UTCDateTime(2))
        tr.resample(20)

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station='23837')[0][0].stats.network == 'CE'

    for tr in sc_bad.select(network='--')[0]:
        tr.resample(10)

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station='23837')[0][0].stats.network == 'CE'

    # New test for some Hawaii data.
    datapath = os.path.join('data', 'testdata', 'duplicate', 'hawaii')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)
    streams = directory_to_streams(datadir)[0]
    sc = StreamCollection(streams=streams, handle_duplicates=True)
    assert len(sc) == 1

    # New test for some Alaska data.
    datapath = os.path.join('data', 'testdata', 'duplicate', 'alaska')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)
    streams = directory_to_streams(datadir)[0]
    sc = StreamCollection(streams=streams,
                          handle_duplicates=True,
                          preference_order=['location_code'])
    assert len(sc) == 1
    for st in sc:
        for tr in st:
            assert tr.stats.location == 'D0'
def test_duplicates():
    datapath = os.path.join("data", "testdata", "duplicate", "general")
    datadir = pkg_resources.resource_filename("gmprocess", datapath)
    streams = directory_to_streams(datadir)[0]

    sc_bad = StreamCollection(streams=streams, handle_duplicates=False)
    # Check that we begin with having three streams
    assert len(sc_bad) == 3

    sc = StreamCollection(streams=streams, handle_duplicates=True)
    # Check that we now only have two streams in the StreamCollection
    assert len(sc) == 2
    assert len(sc[0]) == 3
    assert len(sc[1]) == 3

    # Check that we kept the 'CE' network and not the '--' network
    assert sc.select(station="23837")[0][0].stats.network == "CE"

    # Now try changing the process levels of one of the streams
    for tr in sc_bad.select(network="--")[0]:
        tr.stats.standard.process_level = "uncorrected physical units"
    for tr in sc_bad.select(network="CE")[0]:
        tr.stats.standard.process_level = "corrected physical units"

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    # Now, we should have kept the '--' network and not the 'CE' network
    assert sc.select(station="23837")[0][0].stats.network == "--"

    # Now change the process preference order to see if we get back the
    # original results
    sc = StreamCollection(
        streams=sc_bad.streams,
        handle_duplicates=True,
        process_level_preference=["V2", "V1"],
    )
    assert sc.select(station="23837")[0][0].stats.network == "CE"

    # Check that decreasing the distance tolerance results in streams now being
    # treated as different streams
    sc = StreamCollection(streams=streams,
                          max_dist_tolerance=10,
                          handle_duplicates=True)
    assert len(sc) == 3

    # Change the streams to have the same processing level
    for st in sc_bad:
        for tr in st:
            tr.stats.standard.process_level = "uncorrected physical units"

    # Try changing the preferred format order
    sc = StreamCollection(
        streams=sc_bad.streams,
        handle_duplicates=True,
        format_preference=["dmg", "cosmos"],
    )
    assert sc.select(station="23837")[0][0].stats.network == "--"

    sc = StreamCollection(
        streams=sc_bad.streams,
        handle_duplicates=True,
        format_preference=["cosmos", "dmg"],
    )
    assert sc.select(station="23837")[0][0].stats.network == "CE"

    # Set process level and format to be he same
    for st in sc_bad:
        for tr in st:
            tr.stats.standard.source_format = "cosmos"

    # Check that we keep the CE network due to the bad starttime on --
    sczz = sc_bad.select(station="23837", network="--")
    for st in sczz:
        for tr in st:
            tr.stats.starttime = UTCDateTime(0)
    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station="23837")[0][0].stats.network == "CE"

    for tr in sc_bad.select(network="CE")[0]:
        tr.stats.starttime = UTCDateTime(0)
    for tr in sc_bad.select(network="--")[0]:
        tr.stats.starttime = UTCDateTime(2018, 8, 29, 2, 33, 0)

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station="23837")[0][0].stats.network == "--"

    for tr in sc_bad.select(network="--")[0]:
        tr.stats.starttime = UTCDateTime(0)
        tr.trim(endtime=UTCDateTime(5))

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station="23837")[0][0].stats.network == "CE"

    for tr in sc_bad.select(network="CE")[0]:
        tr.trim(endtime=UTCDateTime(2))

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station="23837")[0][0].stats.network == "--"

    for tr in sc_bad.select(network="--")[0]:
        tr.trim(endtime=UTCDateTime(2))
        tr.resample(20)

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station="23837")[0][0].stats.network == "CE"

    for tr in sc_bad.select(network="--")[0]:
        tr.resample(10)

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station="23837")[0][0].stats.network == "CE"

    # New test for some Hawaii data.
    datapath = os.path.join("data", "testdata", "duplicate", "hawaii")
    datadir = pkg_resources.resource_filename("gmprocess", datapath)
    streams = directory_to_streams(datadir)[0]
    sc = StreamCollection(streams=streams, handle_duplicates=True)
    assert len(sc) == 1

    # New test for some Alaska data.
    datapath = os.path.join("data", "testdata", "duplicate", "alaska")
    datadir = pkg_resources.resource_filename("gmprocess", datapath)
    streams = directory_to_streams(datadir)[0]
    sc = StreamCollection(streams=streams,
                          handle_duplicates=True,
                          preference_order=["location_code"])
    assert len(sc) == 1
    for st in sc:
        for tr in st:
            assert tr.stats.location == "D0"