def test_get_waveform(buffer_mb):
    formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
    handler = logging.FileHandler('%s/a_%d.txt' % (tempdir, int(buffer_mb)),
                                  mode='w')
    handler.setFormatter(formatter)
    logger = logging.getLogger('test')
    logger.setLevel(logging.DEBUG)
    logger.addHandler(handler)

    fds = FederatedASDFDataSet(asdf_file_list,
                               logger=logger,
                               single_item_read_limit_in_mb=buffer_mb)

    rows = np.array(
        fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00'))

    for n, s, l, c in rows[:, 0:4]:
        wc = fds.get_waveform_count(n, s, l, c, '1900-01-01T00:00:00',
                                    '2100-01-01T00:00:00')
        stream = fds.get_waveforms(n,
                                   s,
                                   l,
                                   c,
                                   '1900-01-01T00:00:00',
                                   '2100-01-01T00:00:00',
                                   trace_count_threshold=1e4)

        assert wc == len(stream)
        logger.info('%s.%s: %d traces fetched' % (n, s, len(stream)))
Esempio n. 2
0
def test_get_coordinates():
    fds = FederatedASDFDataSet(asdf_file_list)

    rows = np.array(fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00'))

    station_set = set()
    for n, s in rows[:, 0:2]: station_set.add((n, s))

    # we should have coordinates for each station
    assert len(fds.unique_coordinates) == len(station_set)
Esempio n. 3
0
def test_get_stations():
    fds = FederatedASDFDataSet(asdf_file_list)

    rows = np.array(fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00'))

    station_set = set()
    for n, s in rows[:, 0:2]: station_set.add((n, s))

    # There are eight stations in the h5 file
    assert len(station_set) == 8
Esempio n. 4
0
def test_get_local_net_sta_list():
    fds = FederatedASDFDataSet(asdf_file_list)

    local_netsta_list = list(fds.local_net_sta_list())
    rows = np.array(fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00'))

    # Get a list of unique stations
    stations = set()
    for n, s in rows[:,0:2]:
        stations.add((n, s))
    # end for

    # On serial runs, all stations should be allocated to rank 0
    assert len(local_netsta_list) == len(stations)
Esempio n. 5
0
def test_db_integrity():
    fds = FederatedASDFDataSet(asdf_file_list)

    # get number of waveforms from the db directly
    conn = sqlite3.connect(fds.fds.db_fn)
    query = 'select count(*) from wdb;'
    db_waveform_count = conn.execute(query).fetchall()[0][0]

    # fetch waveform counts for each unique combination of net, sta, loc, cha
    waveform_count = 0
    rows = fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00')
    for row in rows:
        n, s, l, c, _, _ = row

        waveform_count += fds.get_waveform_count(n, s, l, c, '1900:01:01T00:00:00', '2100:01:01T00:00:00')
    # end for

    assert waveform_count == db_waveform_count
def process(asdf_source, start_time, end_time, net, sta, cha, output_basename):
    """
    ASDF_SOURCE: Text file containing a list of paths to ASDF files\n
    START_TIME: Start time in UTCDateTime format\n
    END_TIME: End time in UTCDateTime format\n
    NET: Network name\n
    STA: Station name ('*' for all stations; note that * must be in quotation marks)\n
    CHA: Channel name ('*' for all channels; note that * must be in quotation marks) \n
    OUTPUT_BASENAME: Basename of output file

    Example usage:
    mpirun -np 112 python plot_data_quality.py asdf_files.txt 1980:01:01 2020:01:01 OA '*' '*' data_quality.oa
    """

    start_time = UTCDateTime(start_time)
    end_time = UTCDateTime(end_time)
    if (sta == '*'): sta = None
    if (cha == '*'): cha = None

    comm = MPI.COMM_WORLD
    nproc = comm.Get_size()
    rank = comm.Get_rank()

    l = setup_logger(name=output_basename, log_file='%s.log' % output_basename)
    fds = FederatedASDFDataSet(asdf_source, logger=l)

    stations = []
    if rank == 0:
        stations = fds.get_stations(start_time, end_time, network=net, station=sta, channel=cha)

        stations = split_list(sorted(stations), nproc)
    # end if

    stations = comm.bcast(stations, root=0)
    results = process_data(rank, fds, sorted(stations[rank]), start_time, end_time)

    results = comm.gather(results, root=0)
    if rank == 0:
        results = [item for sublist in results for item in sublist]  # flatten sublists for each proc
        stations = [item for sublist in stations for item in sublist]  # flatten sublists for each proc
        plot_results(stations, results, output_basename)
Esempio n. 7
0
def test_get_global_time_range():
    fds = FederatedASDFDataSet(asdf_file_list)

    rows = np.array(fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00'))

    station_set = set()
    for n, s in rows[:, 0:2]: station_set.add((n, s))

    minlist =[]
    maxlist = []
    for (n, s) in station_set:
        min, max = fds.get_global_time_range(n, s)
        minlist.append(min)
        maxlist.append(max)
    # end for

    min = UTCDateTime(np.array(minlist).min())
    max = UTCDateTime(np.array(maxlist).max())

    # Ensure aggregate min/max to corresponding values in the db
    assert min == UTCDateTime('2000-01-01T00:00:00.000000Z')
    assert max == UTCDateTime('2002-01-01T00:00:00.000000Z')
Esempio n. 8
0
def process(asdf_source, event_folder, output_path, min_magnitude, restart,
            save_quality_plots):
    """
    ASDF_SOURCE: Text file containing a list of paths to ASDF files
    EVENT_FOLDER: Path to folder containing event files\n
    OUTPUT_PATH: Output folder \n
    """

    comm = MPI.COMM_WORLD
    nproc = comm.Get_size()
    rank = comm.Get_rank()
    proc_workload = None

    if (rank == 0):

        def outputConfigParameters():
            # output config parameters
            fn = 'pick.%s.cfg' % (datetime.now().strftime('%Y-%m-%d-%H-%M-%S'))
            fn = os.path.join(output_path, fn)

            f = open(fn, 'w+')
            f.write('Parameter Values:\n\n')
            f.write('%25s\t\t: %s\n' % ('ASDF_SOURCE', asdf_source))
            f.write('%25s\t\t: %s\n' % ('EVENT_FOLDER', event_folder))
            f.write('%25s\t\t: %s\n' % ('OUTPUT_PATH', output_path))
            f.write('%25s\t\t: %s\n' % ('MIN_MAGNITUDE', min_magnitude))
            f.write('%25s\t\t: %s\n' %
                    ('RESTART_MODE', 'TRUE' if restart else 'FALSE'))
            f.write('%25s\t\t: %s\n' %
                    ('SAVE_PLOTS', 'TRUE' if save_quality_plots else 'FALSE'))
            f.close()

        # end func

        outputConfigParameters()
    # end if

    # ==================================================
    # Create output-folder for snr-plots
    # ==================================================
    plot_output_folder = None
    if (save_quality_plots):
        plot_output_folder = os.path.join(output_path, 'plots')
        if (rank == 0):
            if (not os.path.exists(plot_output_folder)):
                os.mkdir(plot_output_folder)
        # end if
        comm.Barrier()
    # end if

    # ==================================================
    # Read catalogue and retrieve origin times
    # ==================================================
    cat = CatalogCSV(event_folder)
    events = cat.get_events()
    originTimestamps = cat.get_preferred_origin_timestamps()

    # ==================================================
    # Create lists of pickers for both p- and s-arrivals
    # ==================================================
    sigmalist = np.arange(8, 3, -1)
    pickerlist_p = []
    pickerlist_s = []
    for sigma in sigmalist:
        picker_p = aicdpicker.AICDPicker(t_ma=5,
                                         nsigma=sigma,
                                         t_up=1,
                                         nr_len=5,
                                         nr_coeff=2,
                                         pol_len=10,
                                         pol_coeff=10,
                                         uncert_coeff=3)
        picker_s = aicdpicker.AICDPicker(t_ma=15,
                                         nsigma=sigma,
                                         t_up=1,
                                         nr_len=5,
                                         nr_coeff=2,
                                         pol_len=10,
                                         pol_coeff=10,
                                         uncert_coeff=3)

        pickerlist_p.append(picker_p)
        pickerlist_s.append(picker_s)
    # end for

    # ==================================================
    # Define theoretical model
    # Instantiate data-access object
    # Retrieve estimated workload
    # ==================================================
    taupyModel = TauPyModel(model='iasp91')
    fds = FederatedASDFDataSet(asdf_source, use_json_db=False, logger=None)
    workload = getWorkloadEstimate(fds, originTimestamps)

    # ==================================================
    # Define output header and open output files
    # depending on the mode of operation (fresh/restart)
    # ==================================================
    header = '#eventID originTimestamp mag originLon originLat originDepthKm net sta cha pickTimestamp stationLon stationLat az baz distance ttResidual snr qualityMeasureCWT domFreq qualityMeasureSlope bandIndex nSigma\n'
    ofnp = os.path.join(output_path, 'p_arrivals.%d.txt' % (rank))
    ofns = os.path.join(output_path, 's_arrivals.%d.txt' % (rank))
    ofp = None
    ofs = None
    if (restart == False):
        ofp = open(ofnp, 'w+')
        ofs = open(ofns, 'w+')
        ofp.write(header)
        ofs.write(header)
    else:
        ofp = open(ofnp, 'a+')
        ofs = open(ofns, 'a+')
    # end if

    progTracker = ProgressTracker(output_folder=output_path,
                                  restart_mode=restart)
    totalTraceCount = 0
    for nc, sc, start_time, end_time in fds.local_net_sta_list():
        day = 24 * 3600
        dayCount = 0
        curr = start_time
        traceCountP = 0
        pickCountP = 0
        traceCountS = 0
        pickCountS = 0
        sw_start = datetime.now()
        step = day
        while (curr < end_time):
            if (curr + step > end_time):
                step = end_time - curr
            # end if

            eventIndices = (np.where((originTimestamps >= curr.timestamp) & \
                                     (originTimestamps <= (curr + day).timestamp)))[0]

            if (eventIndices.shape[0] > 0):
                totalTraceCount += 1
                stations = fds.get_stations(curr,
                                            curr + day,
                                            network=nc,
                                            station=sc)
                stations_zch = [s for s in stations
                                if 'Z' in s[3]]  # only Z channels
                stations_nch = [
                    s for s in stations if 'N' in s[3] or '1' in s[3]
                ]  # only N channels
                stations_ech = [
                    s for s in stations if 'E' in s[3] or '2' in s[3]
                ]  # only E channels

                for codes in stations_zch:
                    if (progTracker.increment()): pass
                    else: continue

                    st = fds.get_waveforms(codes[0],
                                           codes[1],
                                           codes[2],
                                           codes[3],
                                           curr,
                                           curr + step,
                                           automerge=True,
                                           trace_count_threshold=200)

                    if (len(st) == 0): continue
                    dropBogusTraces(st)

                    slon, slat = codes[4], codes[5]
                    for ei in eventIndices:
                        event = events[ei]
                        po = event.preferred_origin
                        da = gps2dist_azimuth(po.lat, po.lon, slat, slon)
                        mag = None
                        if (event.preferred_magnitude):
                            mag = event.preferred_magnitude.magnitude_value
                        elif (len(po.magnitude_list)):
                            mag = po.magnitude_list[0].magnitude_value
                        if (mag == None): mag = np.NaN

                        if (np.isnan(mag) or mag < min_magnitude): continue

                        result = extract_p(
                            taupyModel,
                            pickerlist_p,
                            event,
                            slon,
                            slat,
                            st,
                            plot_output_folder=plot_output_folder)
                        if (result):
                            picklist, residuallist, snrlist, bandindex, pickerindex = result

                            arcdistance = kilometers2degrees(da[0] / 1e3)
                            for ip, pick in enumerate(picklist):
                                line = '%s %f %f %f %f %f ' \
                                       '%s %s %s %f %f %f ' \
                                       '%f %f %f ' \
                                       '%f %f %f %f %f '\
                                       '%d %d\n' % (event.public_id, po.utctime.timestamp, mag, po.lon, po.lat, po.depthkm,
                                                    codes[0], codes[1], codes[3], pick.timestamp, slon, slat,
                                                    da[1], da[2], arcdistance,
                                                    residuallist[ip], snrlist[ip, 0], snrlist[ip, 1], snrlist[ip, 2], snrlist[ip, 3],
                                                    bandindex, sigmalist[pickerindex])
                                ofp.write(line)
                            # end for
                            ofp.flush()
                            pickCountP += 1
                        # end if

                        if (len(stations_nch) == 0 and len(stations_ech) == 0):
                            result = extract_s(
                                taupyModel,
                                pickerlist_s,
                                event,
                                slon,
                                slat,
                                st,
                                None,
                                da[2],
                                plot_output_folder=plot_output_folder)
                            if (result):
                                picklist, residuallist, snrlist, bandindex, pickerindex = result

                                arcdistance = kilometers2degrees(da[0] / 1e3)
                                for ip, pick in enumerate(picklist):
                                    line = '%s %f %f %f %f %f ' \
                                           '%s %s %s %f %f %f ' \
                                           '%f %f %f ' \
                                           '%f %f %f %f %f ' \
                                           '%d %d\n' % (event.public_id, po.utctime.timestamp, mag, po.lon, po.lat, po.depthkm,
                                                        codes[0], codes[1], codes[3], pick.timestamp, slon, slat,
                                                        da[1], da[2], arcdistance,
                                                        residuallist[ip], snrlist[ip, 0], snrlist[ip, 1], snrlist[ip, 2], snrlist[ip, 3],
                                                        bandindex, sigmalist[pickerindex])
                                    ofs.write(line)
                                # end for
                                ofs.flush()
                                pickCountS += 1
                            # end if
                        # end if
                    # end for

                    traceCountP += len(st)
                # end for

                if (len(stations_nch) > 0
                        and len(stations_nch) == len(stations_ech)):
                    for codesn, codese in zip(stations_nch, stations_ech):
                        if (progTracker.increment()): pass
                        else: continue

                        stn = fds.get_waveforms(codesn[0],
                                                codesn[1],
                                                codesn[2],
                                                codesn[3],
                                                curr,
                                                curr + step,
                                                automerge=True,
                                                trace_count_threshold=200)
                        ste = fds.get_waveforms(codese[0],
                                                codese[1],
                                                codese[2],
                                                codese[3],
                                                curr,
                                                curr + step,
                                                automerge=True,
                                                trace_count_threshold=200)

                        dropBogusTraces(stn)
                        dropBogusTraces(ste)

                        if (len(stn) == 0): continue
                        if (len(ste) == 0): continue

                        slon, slat = codesn[4], codesn[5]

                        for ei in eventIndices:
                            event = events[ei]
                            po = event.preferred_origin
                            da = gps2dist_azimuth(po.lat, po.lon, slat, slon)

                            mag = None
                            if (event.preferred_magnitude):
                                mag = event.preferred_magnitude.magnitude_value
                            elif (len(po.magnitude_list)):
                                mag = po.magnitude_list[0].magnitude_value
                            if (mag == None): mag = np.NaN

                            if (np.isnan(mag) or mag < min_magnitude): continue

                            result = extract_s(
                                taupyModel,
                                pickerlist_s,
                                event,
                                slon,
                                slat,
                                stn,
                                ste,
                                da[2],
                                plot_output_folder=plot_output_folder)
                            if (result):
                                picklist, residuallist, snrlist, bandindex, pickerindex = result

                                arcdistance = kilometers2degrees(da[0] / 1e3)
                                for ip, pick in enumerate(picklist):
                                    line = '%s %f %f %f %f %f ' \
                                           '%s %s %s %f %f %f ' \
                                           '%f %f %f ' \
                                           '%f %f %f %f %f ' \
                                           '%d %d\n' % (event.public_id, po.utctime.timestamp, mag, po.lon, po.lat, po.depthkm,
                                                        codesn[0], codesn[1], '00T', pick.timestamp, slon, slat,
                                                        da[1], da[2], arcdistance,
                                                        residuallist[ip], snrlist[ip, 0], snrlist[ip, 1], snrlist[ip, 2], snrlist[ip, 3],
                                                        bandindex, sigmalist[pickerindex])
                                    ofs.write(line)
                                # end for
                                ofs.flush()
                                pickCountS += 1
                            # end if
                        # end for

                        traceCountS += (len(stn) + len(ste))
                    # end for
                # end if
            # end if
            curr += step
            dayCount += 1
        # wend
        sw_stop = datetime.now()
        totalTime = (sw_stop - sw_start).total_seconds()

        gc.collect()
        print '(Rank %d: %5.2f%%, %d/%d) Processed %d traces and found %d p-arrivals and %d s-arrivals for ' \
              'network %s station %s in %f s. Memory usage: %5.2f MB.' % \
              (rank, (float(totalTraceCount) / float(workload) * 100) if workload > 0 else 100, totalTraceCount, workload,
               traceCountP + traceCountS, pickCountP, pickCountS, nc, sc, totalTime,
               round(psutil.Process().memory_info().rss / 1024. / 1024., 2))
    # end for
    ofp.close()
    ofs.close()

    print 'Processing complete on rank %d' % (rank)

    del fds
from obspy import UTCDateTime, read_events, read_inventory
from obspy.taup.taup_geo import calc_dist
from obspy.clients.iris import Client as IrisClient
from obspy.clients.fdsn import Client
from obspy.taup import TauPyModel
from obspy.signal.trigger import trigger_onset, z_detect, classic_sta_lta, recursive_sta_lta, ar_pick
from obspy.signal.rotate import rotate_ne_rt
from obspy.core.event import Pick, CreationInfo, WaveformStreamID, ResourceIdentifier, Arrival, Event, Origin, Arrival, \
    OriginQuality, Magnitude, Comment

fds = FederatedASDFDataSet(
    '/g/data/ha3/Passive/SHARED_DATA/Index/asdf_files.txt', logger=None)

stations = fds.get_stations('2009-05-17T00:00:00',
                            '2009-05-18T00:00:00',
                            station='QLP')

print(stations)

s = fds.get_waveforms('AU',
                      'QLP',
                      '',
                      'BHE',
                      '2011-03-15T00:00:00',
                      '2011-03-16T00:00:00',
                      trace_count_threshold=10)
print(s)

if len(s) > 0:
    print("Plotting traces")