def test_get_waveform(buffer_mb):
    formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
    handler = logging.FileHandler('%s/a_%d.txt' % (tempdir, int(buffer_mb)),
                                  mode='w')
    handler.setFormatter(formatter)
    logger = logging.getLogger('test')
    logger.setLevel(logging.DEBUG)
    logger.addHandler(handler)

    fds = FederatedASDFDataSet(asdf_file_list,
                               logger=logger,
                               single_item_read_limit_in_mb=buffer_mb)

    rows = np.array(
        fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00'))

    for n, s, l, c in rows[:, 0:4]:
        wc = fds.get_waveform_count(n, s, l, c, '1900-01-01T00:00:00',
                                    '2100-01-01T00:00:00')
        stream = fds.get_waveforms(n,
                                   s,
                                   l,
                                   c,
                                   '1900-01-01T00:00:00',
                                   '2100-01-01T00:00:00',
                                   trace_count_threshold=1e4)

        assert wc == len(stream)
        logger.info('%s.%s: %d traces fetched' % (n, s, len(stream)))
Ejemplo n.º 2
0
def test_get_closest_stations(num_neighbours):
    fds = FederatedASDFDataSet(asdf_file_list)

    netsta, dist = fds.get_closest_stations(0, 0, num_neighbours)

    # There are a total of 8 stations in the data set.
    assert len(netsta) > 0 and len(netsta) <= 8 and len(netsta) <= num_neighbours
Ejemplo n.º 3
0
def test_get_stations():
    fds = FederatedASDFDataSet(asdf_file_list)

    rows = np.array(fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00'))

    station_set = set()
    for n, s in rows[:, 0:2]: station_set.add((n, s))

    # There are eight stations in the h5 file
    assert len(station_set) == 8
Ejemplo n.º 4
0
def test_get_coordinates():
    fds = FederatedASDFDataSet(asdf_file_list)

    rows = np.array(fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00'))

    station_set = set()
    for n, s in rows[:, 0:2]: station_set.add((n, s))

    # we should have coordinates for each station
    assert len(fds.unique_coordinates) == len(station_set)
Ejemplo n.º 5
0
def test_get_local_net_sta_list():
    fds = FederatedASDFDataSet(asdf_file_list)

    local_netsta_list = list(fds.local_net_sta_list())
    rows = np.array(fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00'))

    # Get a list of unique stations
    stations = set()
    for n, s in rows[:,0:2]:
        stations.add((n, s))
    # end for

    # On serial runs, all stations should be allocated to rank 0
    assert len(local_netsta_list) == len(stations)
Ejemplo n.º 6
0
    def __init__(self, asdf_file_name, netsta_list='*'):

        self._data_path = asdf_file_name
        self._earth_radius = 6371  # km

        self.fds = FederatedASDFDataSet(asdf_file_name)
        # Gather station metadata
        netsta_list_subset = set(
            netsta_list.split(' ')) if netsta_list != '*' else netsta_list
        self.netsta_list = []
        self.metadata = defaultdict(list)

        rtps = []
        for netsta in list(self.fds.unique_coordinates.keys()):
            if (netsta_list_subset != '*'):
                if netsta not in netsta_list_subset:
                    continue

            self.netsta_list.append(netsta)
            self.metadata[netsta] = self.fds.unique_coordinates[netsta]

            rtps.append([
                self._earth_radius,
                np.radians(90 - self.metadata[netsta][1]),
                np.radians(self.metadata[netsta][0])
            ])
        # end for

        rtps = np.array(rtps)
        xyzs = rtp2xyz(rtps[:, 0], rtps[:, 1], rtps[:, 2])

        self._tree = cKDTree(xyzs)
        self._cart_location = defaultdict(list)
        for i, ns in enumerate(self.netsta_list):
            self._cart_location[ns] = xyzs[i, :]
Ejemplo n.º 7
0
def test_db_integrity():
    fds = FederatedASDFDataSet(asdf_file_list)

    # get number of waveforms from the db directly
    conn = sqlite3.connect(fds.fds.db_fn)
    query = 'select count(*) from wdb;'
    db_waveform_count = conn.execute(query).fetchall()[0][0]

    # fetch waveform counts for each unique combination of net, sta, loc, cha
    waveform_count = 0
    rows = fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00')
    for row in rows:
        n, s, l, c, _, _ = row

        waveform_count += fds.get_waveform_count(n, s, l, c, '1900:01:01T00:00:00', '2100:01:01T00:00:00')
    # end for

    assert waveform_count == db_waveform_count
Ejemplo n.º 8
0
def process(asdf_source, start_time, end_time, net, sta, cha, output_basename):
    """
    ASDF_SOURCE: Text file containing a list of paths to ASDF files\n
    START_TIME: Start time in UTCDateTime format\n
    END_TIME: End time in UTCDateTime format\n
    NET: Network name\n
    STA: Station name ('*' for all stations; note that * must be in quotation marks)\n
    CHA: Channel name ('*' for all channels; note that * must be in quotation marks) \n
    OUTPUT_BASENAME: Basename of output file

    Example usage:
    mpirun -np 112 python plot_data_quality.py asdf_files.txt 1980:01:01 2020:01:01 OA '*' '*' data_quality.oa
    """

    start_time = UTCDateTime(start_time)
    end_time = UTCDateTime(end_time)
    if (sta == '*'): sta = None
    if (cha == '*'): cha = None

    comm = MPI.COMM_WORLD
    nproc = comm.Get_size()
    rank = comm.Get_rank()

    l = setup_logger(name=output_basename, log_file='%s.log' % output_basename)
    fds = FederatedASDFDataSet(asdf_source, logger=l)

    stations = []
    if rank == 0:
        stations = fds.get_stations(start_time, end_time, network=net, station=sta, channel=cha)

        stations = split_list(sorted(stations), nproc)
    # end if

    stations = comm.bcast(stations, root=0)
    results = process_data(rank, fds, sorted(stations[rank]), start_time, end_time)

    results = comm.gather(results, root=0)
    if rank == 0:
        results = [item for sublist in results for item in sublist]  # flatten sublists for each proc
        stations = [item for sublist in stations for item in sublist]  # flatten sublists for each proc
        plot_results(stations, results, output_basename)
Ejemplo n.º 9
0
def test_get_global_time_range():
    fds = FederatedASDFDataSet(asdf_file_list)

    rows = np.array(fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00'))

    station_set = set()
    for n, s in rows[:, 0:2]: station_set.add((n, s))

    minlist =[]
    maxlist = []
    for (n, s) in station_set:
        min, max = fds.get_global_time_range(n, s)
        minlist.append(min)
        maxlist.append(max)
    # end for

    min = UTCDateTime(np.array(minlist).min())
    max = UTCDateTime(np.array(maxlist).max())

    # Ensure aggregate min/max to corresponding values in the db
    assert min == UTCDateTime('2000-01-01T00:00:00.000000Z')
    assert max == UTCDateTime('2002-01-01T00:00:00.000000Z')
Ejemplo n.º 10
0
def aggregate(input_folder, output_file, folder_mask, station_database,
              max_depth_km, depth_levels):
    """
    Scrape together all the trans-D inversion solutions and collect into volumetric dataset.

    :param input_folder: Folder containing solutions to scrape together
    :type input_folder: str or Path
    :param output_file: Output file (must not exist already)
    :type output_file: str or Path (pdf extension expected)
    """

    # Open station database from which to get station lat,lon coordinates
    station_location_db = FederatedASDFDataSet(
        station_database).unique_coordinates

    # Process folders in alphanumerical order
    folders = sorted(glob.glob(os.path.join(input_folder, folder_mask)))

    # regex pattern for matching case strings containing network, station and channel codes
    case_pattern = '^([a-zA-Z0-9]+)_([a-zA-Z0-9]+)_([a-zA-Z0-9]+)'
    matcher = re.compile(case_pattern)

    # Container for storing Vs as a function of depth for each station.
    station_profiles = []

    # Loop over folders one at a time
    for f in folders:
        # If it is a folder and has a solution file in it.
        if os.path.isdir(f) and os.path.isfile(os.path.join(f, SOLUTION_FILE)):
            _, case_folder = os.path.split(f)
            case_meta = matcher.match(case_folder)
            # Extract network, station and channel metadata from folder name
            net = case_meta.group(1)
            sta = case_meta.group(2)
            cha = case_meta.group(3)
            station_id = '.'.join([net, sta, cha])

            soln_file = os.path.join(f, SOLUTION_FILE)
            # station_coords are in lon,lat order
            station_coords = station_location_db['.'.join([net, sta])]

            print(station_id, station_coords)

            # Open solution file and collect relevant fields
            with open(soln_file, 'r') as posterior:
                post_dat = posterior.readlines()
            # end with
            _0, depth_discretization, depth_max = post_dat[0].strip(
                '\n').split(None)
            depth_discretization = int(depth_discretization)
            depth_max = float(depth_max)
            z_range = depth_max * (np.arange(depth_discretization) +
                                   0.5) / depth_discretization

            Vs_min, Vs_max, vel_discretization, _width = post_dat[1].strip(
                '\n').split(None)
            vel_discretization = int(vel_discretization)
            Vs_min, Vs_max = float(Vs_min), float(Vs_max)
            vel_range = Vs_min + (Vs_max - Vs_min) * (
                np.arange(vel_discretization) + 0.5) / vel_discretization
            # Each row of posterior_distribution corresponds to a discrete depth. At each depth,
            # we have a velocity probability distribution based on MCMC sampling.
            posterior_distribution = np.reshape(
                np.array([float(x.strip('\n')) for x in post_dat[2:]]),
                (depth_discretization, vel_discretization))

            # Normalize the distribution at each depth.
            post = posterior_distribution / np.expand_dims(
                np.sum(posterior_distribution, axis=-1), -1)
            assert np.allclose(np.sum(post, axis=-1), 1)

            # Compute mean at each depth, reducing the 2D posterior to 1D
            # velocity as a function of depth.
            vel_mean = np.dot(post, vel_range)

            # Create 4-column 2D matrix storing results for this station.
            xy_range = np.array([[station_coords[0], station_coords[1]]] *
                                depth_levels)
            interpolator = interp1d(z_range, vel_mean, kind='cubic')
            z_interp = max_depth_km * (np.arange(depth_levels) +
                                       0.5) / depth_levels
            vel_interp = interpolator(z_interp)
            data_all = np.column_stack([xy_range, z_interp, vel_interp])
            station_profiles.append(data_all)

        # end if
    # end for

    data_all = np.vstack(station_profiles)
    np.save(output_file, data_all, allow_pickle=False)
    print('Saved {} size array to {}'.format(data_all.shape, output_file))
def main(infile, fds_file, sheet_names, k):
    """
    Process Excel spreadsheet into point dataset based on station codes.

    Example usage:

    python hk_stations2point.py --fds-file /g/data/ha3/Passive/SHARED_DATA/Index/asdf_files.txt \
        network_hk_data_sample.xlsx

    Output format is csv file containing point data in the form of lon/lat
    coordinates and depth measurement.
    For example:
        # Sta,Lon,Lat,Depth
        I8,133.035951,-19.473353,37.9
        H8,133.006100,-20.003900,45.9
        G8,132.997000,-20.486800,40.8
        F8,132.991205,-20.997177,47.3
        D8,132.989100,-21.506900,30.0
        ...

    Output file name is inferred from input Excel file name with extension changed to '.csv'

    :param infile: Input Excel file containing bespoke digitized Moho depths
    :param fds_file: Index file used to instantiate FederatedASDFDataSet
    :return: None
    """

    with xlrd.open_workbook(infile) as wb:
        if not sheet_names:
            sheet_names = wb.sheet_names()
            print('Processing all sheets:\n', sheet_names)
        else:
            _sheet_names = wb.sheet_names()
            for name in sheet_names:
                assert name in _sheet_names, 'Sheet {} not found in workbook!'.format(name)

    fds = FederatedASDFDataSet(fds_file)
    sta_coords = fds.unique_coordinates
    pts = []
    with xlrd.open_workbook(infile) as wb:
        for sheet_name in sheet_names:
            sheet = wb.sheet_by_name(sheet_name)
            print('Processing sheet {}'.format(sheet_name))
            try:
                network = sheet.cell_value(0, 0)
                network = network.split()[-1]
            except IndexError:
                print('Network code not found in string "{}", exiting'.format(network))
                exit(1)
            network = NETWORK_CODE_MAPPINGS.get(network, network)
            # end try
            for i, row in enumerate(sheet.get_rows()):
                if i == 0:
                    print('Skipping header row:', row)
                    continue
                # end if
                if not row or not row[0].value:
                    break
                # end if
                station = str(row[0].value)
                for sc in SPECIAL_CHARS:
                    station = station.split(sc)[0]
                station = '.'.join([network, station])
                if k:
                    val = float(row[2].value)
                else:
                    val = float(row[1].value)
                if np.isnan(val):
                    print(f"Invalid depth value for {station}, skipping")
                    continue
                coords = sta_coords[station]
                if not coords:
                    print(f"Couldn't find coordinates for {station}, skipping")
                pt_data = [station] + coords + [val]
                pts.append(pt_data)

    all_data = np.array(pts)
    print('Collected {} samples from {} sheets'.format(all_data.shape[0], len(sheet_names)))
    filebase = os.path.splitext(infile)[0]
    outfile = filebase + '.csv'
    print('Saving point data to file "{}"'.format(outfile))
    header = 'Sta,Lon,Lat,'
    header = header + 'K' if k else header + 'Depth'
    np.savetxt(outfile, all_data, fmt=['%s', '%s', '%s', '%s'], delimiter=',',
               header=header)
Ejemplo n.º 12
0
        :return: tuples containing [net, sta, start_time, end_time]; start- and end-times are instances of obspy.UTCDateTime
        """
        for item in self.fds.local_net_sta_list():
            yield item
        # end for

    # end func


# end class

if __name__ == "__main__":
    """
    How to Run Example::

        python ASDFdatabase/FederatedASDFDataSet.py /Datasets/asdf_file_index.txt

    Upon success, a db file will be created: /Datasets/f374ca9e7dd8abd2a1d58575e0d55520f30ffc23.db
    """
    import sys
    from seismic.ASDFdatabase.FederatedASDFDataSet import FederatedASDFDataSet

    if len(sys.argv) < 2:
        print("******** USAGE: python3 %s %s **********" %
              (sys.argv[0], "asdf_file_list_txt"))
        sys.exit(1)

    asdf_file_list = sys.argv[1]
    ds = FederatedASDFDataSet(asdf_file_list)
Ejemplo n.º 13
0
def main(inventory_file, waveform_database, event_catalog_file,
         rf_trace_datafile, start_time, end_time, taup_model, distance_range,
         magnitude_range):

    log = logging.getLogger(__name__)
    log.setLevel(logging.INFO)

    waveform_db_is_web = is_url(
        waveform_database
    ) or waveform_database in obspy.clients.fdsn.header.URL_MAPPINGS
    if not waveform_db_is_web:
        assert os.path.exists(
            waveform_database), "Cannot find waveform database file {}".format(
                waveform_database)
    log.info("Using waveform data source: {}".format(waveform_database))

    assert not os.path.exists(rf_trace_datafile), \
        "Won't delete existing file {}, remove manually.".format(rf_trace_datafile)

    min_dist_deg = distance_range[0]
    max_dist_deg = distance_range[1]
    min_mag = magnitude_range[0]
    max_mag = magnitude_range[1]

    inventory = read_inventory(inventory_file)
    log.info("Loaded inventory {}".format(inventory_file))

    # Compute reference lonlat from the inventory.
    channels = inventory.get_contents()['channels']
    lonlat_coords = []
    for ch in channels:
        coords = inventory.get_coordinates(ch)
        lonlat_coords.append((coords['longitude'], coords['latitude']))
    lonlat_coords = np.array(lonlat_coords)
    lonlat = np.mean(lonlat_coords, axis=0)
    log.info("Inferred reference coordinates {}".format(lonlat))

    # If start and end time not provided, infer from date range of inventory.
    if not start_time:
        start_time = inventory[0].start_date
        for net in inventory:
            start_time = min(start_time, net.start_date)
        log.info("Inferred start time {}".format(start_time))
    # end if
    if not end_time:
        end_time = inventory[0].end_date
        if end_time is None:
            end_time = UTC.now()
        for net in inventory:
            end_time = max(end_time, net.end_date)
        log.info("Inferred end time {}".format(end_time))
    # end if

    start_time = UTC(start_time)
    end_time = UTC(end_time)
    event_catalog_file = timestamp_filename(event_catalog_file, start_time,
                                            end_time)
    rf_trace_datafile = timestamp_filename(rf_trace_datafile, start_time,
                                           end_time)
    log.info("Traces will be written to: {}".format(rf_trace_datafile))

    exit_after_catalog = False
    catalog = get_events(lonlat, start_time, end_time, event_catalog_file,
                         (min_dist_deg, max_dist_deg), (min_mag, max_mag),
                         exit_after_catalog)

    if waveform_db_is_web:
        existing_index = None
        log.info("Use fresh query results from web")
        client = Client(waveform_database)
        waveform_getter = client.get_waveforms
    else:
        # Form closure to allow waveform source file to be derived from a setting (or command line input)
        asdf_dataset = FederatedASDFDataSet(waveform_database, logger=log)

        def closure_get_waveforms(network, station, location, channel,
                                  starttime, endtime):
            return custom_get_waveforms(asdf_dataset, network, station,
                                        location, channel, starttime, endtime)

        existing_index = _get_existing_index(rf_trace_datafile)
        if existing_index is not None:
            log.warning(
                "Resuming extraction using existing index from file {}".format(
                    rf_trace_datafile))
        waveform_getter = closure_get_waveforms
    # end if

    with tqdm(smoothing=0) as pbar:
        stream_count = 0
        for s in iter_event_data(catalog,
                                 inventory,
                                 waveform_getter,
                                 tt_model=taup_model,
                                 pbar=pbar):
            # Write traces to output file in append mode so that arbitrarily large file
            # can be processed. If the file already exists, then existing streams will
            # be overwritten rather than duplicated.
            # Check first if rotation for unaligned *H1, *H2 channels to *HN, *HE is required.
            if s.select(component='1') and s.select(component='2'):
                s.rotate('->ZNE', inventory=inventory)
            # end if
            # Order the traces in ZNE ordering. This is required so that normalization
            # can be specified in terms of an integer index, i.e. the default of 0 in rf
            # library will normalize against the Z component.
            s.traces = sorted(s.traces, key=zne_order)
            # Assert the ordering of traces in the stream is ZNE.
            assert s.traces[0].stats.channel[-1] == 'Z'
            assert s.traces[1].stats.channel[-1] == 'N'
            assert s.traces[2].stats.channel[-1] == 'E'
            # Loop over ZNE traces
            for tr in s:
                grp_id = '.'.join(tr.id.split('.')[0:3])
                event_time = str(tr.meta.event_time)[0:19]
                pbar.set_description("{} -- {}".format(grp_id, event_time))
                if existing_index is not None:
                    # Skip records that already exist in the file to speed up generation
                    if grp_id in existing_index and event_time in existing_index[
                            grp_id]:
                        pbar.write(
                            "Skipping {} -- {} already exists in output file".
                            format(grp_id, event_time))
                        continue
                    else:
                        # Use don't override mode just in case our hand-crafted index is faulty
                        stream_count += 1
                        tr.write(rf_trace_datafile,
                                 'H5',
                                 mode='a',
                                 override='dont')
                else:
                    stream_count += 1
                    tr.write(rf_trace_datafile, 'H5', mode='a')
            # end for
        # end for
        if stream_count == 0:
            log.warning("No traces found!")
        else:
            log.info("Wrote {} new stream to output file".format(stream_count))
Ejemplo n.º 14
0
# TODO: Fix resource management here so that asdf_files_dir gets deleted when tests finished/finalized.
path = os.path.dirname(os.path.abspath(__file__))

# Initialize input data
asdf_files_dir = tempfile.mkdtemp(suffix='_test')
asdf_file_list1 = os.path.join(asdf_files_dir, 'asdf_file_list1.txt')
asdf_file_list2 = os.path.join(asdf_files_dir, 'asdf_file_list2.txt')

f1 = open(asdf_file_list1, 'w+')
f2 = open(asdf_file_list2, 'w+')
f1.write('%s/data/test_data_ARMA.h5\n' % (path))
f2.write('%s/data/test_data_QLP.h5\n' % (path))
f1.close()
f2.close()

fds1 = FederatedASDFDataSet(asdf_file_list1)
fds2 = FederatedASDFDataSet(asdf_file_list2)

# Initialize input inventory
inv = None
inv = read_inventory('%s/data/response_inventory.fdsnxml' % (path))

# Unzip expected results
expected_folder = tempfile.mkdtemp()
cmd = 'tar -zxvf %s -C %s' % ('%s/data/expected/expected.tar.gz' % path,
                              expected_folder)
os.system(cmd)


@pytest.fixture(params=['BHZ', '00T'])
def cha(request):
Ejemplo n.º 15
0
def main(infile, fds_file):
    """
    Process Excel spreadsheet into point dataset based on line profiles.

    Example usage:

    python ccp_lines2point.py --fds-file /g/data/ha3/Passive/SHARED_DATA/Index/asdf_files.txt \
        ccp_line_data_sample.xlsx

    Output format is csv file containing point data in the form of lon/lat
    coordinates and depth measurement.
    For example:

        # Lon,Lat,Depth
        134.909765,-17.572545,47.9
        135.017670,-17.570829,47.3
        135.134567,-17.568970,48.9
        135.395337,-17.564823,52.1
        135.494250,-17.563250,52.1
        ...

    Output file name is inferred from input Excel file name with extension changed to '.csv'

    :param infile: Input Excel file containing bespoke digitized Moho depths
    :param fds_file: Index file used to instantiate FederatedASDFDataSet
    :return: None
    """

    with xlrd.open_workbook(infile) as wb:
        sheet = wb.sheet_by_index(0)
        network = sheet.cell_value(0, 3)
        lines_row = sheet.row_values(3)
        lines = [line for line in lines_row if line]
    # end with

    df = pd.read_excel(infile, header=4)
    df.drop(df.columns[0], axis=1, inplace=True)

    fds = FederatedASDFDataSet(fds_file)
    sta_coords = fds.unique_coordinates
    vol_data_dict = {}
    for i, line in enumerate(lines):
        line = line.strip()
        sta_start, sta_end = line.split(',')
        sta_start = sta_start.strip()
        sta_end = sta_end.strip()
        start = '.'.join([network, sta_start])
        end = '.'.join([network, sta_end])
        start = np.array(sta_coords[start])
        end = np.array(sta_coords[end])
        assert np.any(end != start)
        dirn = (end - start)
        dirn = dirn / np.linalg.norm(dirn)
        dist_col = df.iloc[:, 3 * i + 1]
        dist_col = pd.to_numeric(dist_col, errors='coerce').astype(float)
        valid = dist_col.notna()
        if not np.any(valid):
            continue
        dist = dist_col[valid].values - LEAD_INOUT_DIST_KM
        depth = df.iloc[:, 3 * i + 2][valid].values
        lonlat = start + np.outer(dist, dirn) / KM_PER_DEG
        # Difficult to correct for differences in station elevation because
        # FDS does not include it in station coords. Ignore for now.
        vol_data = np.hstack((lonlat, depth[:, np.newaxis]))
        vol_data_dict[line] = vol_data
    # end for

    filebase = os.path.splitext(infile)[0]
    outfile = filebase + '.csv'
    all_data = np.vstack(tuple(v for v in vol_data_dict.values()))
    np.savetxt(outfile,
               all_data,
               fmt=['%.6f', '%.6f', '%.1f'],
               delimiter=',',
               header='Lon,Lat,Depth')
def main(infile, fds_file, raise_errors=False):
    """
    Process Excel spreadsheet into point dataset based on line profiles.

    Example usage:

    python ccp_lines2point.py --fds-file /g/data/ha3/Passive/SHARED_DATA/Index/asdf_files.txt \
        ccp_line_data_sample.xlsx

    Output format is csv file containing point data in the form of sta, 
    lon/lat coordinates and depth measurement.
    For example:

        # Sta,Lon,Lat,Depth
        I8,134.909765,-17.572545,47.9
        H8,135.017670,-17.570829,47.3
        G8,135.134567,-17.568970,48.9
        F8,135.395337,-17.564823,52.1
        D8,135.494250,-17.563250,52.1
        ...

    Output file name is inferred from input Excel file name with extension changed to '.csv'

    :param infile: Input Excel file containing bespoke digitized Moho depths
    :param fds_file: Index file used to instantiate FederatedASDFDataSet
    :return: None
    """
    fds = FederatedASDFDataSet(fds_file)
    sta_coords = fds.unique_coordinates
    all_network_codes = {s.split('.')[0] for s in sta_coords.keys()}
    vol_data_list = []

    with xlrd.open_workbook(infile) as wb:
        sheet = wb.sheet_by_index(0)
        for i, sheet in enumerate(wb.sheets()):
            network = sheet.cell_value(0, 3)
            network = NETWORK_CODE_MAPPINGS.get(network, network)
            lines_row = sheet.row_values(3)
            lines = [line for line in lines_row if line]
            df = pd.read_excel(infile, sheet_name=i, header=4)
            df.drop(df.columns[0], axis=1, inplace=True)

            for i, line in enumerate(lines):
                line = line.strip()
                sta_start, sta_end = line.split(',')
                sta_start = sta_start.strip()
                sta_end = sta_end.strip()
                netsta_start = '.'.join([network, sta_start])
                netsta_end = '.'.join([network, sta_end])
                start = np.array(sta_coords[netsta_start])
                end = np.array(sta_coords[netsta_end])
                if start.size == 0 or end.size == 0:
                    msg = f"Can't get coordinates for {netsta_start} or {netsta_end}"
                    if raise_errors:
                        raise Exception(msg)
                    else:
                        print(msg)
                        continue
                if not np.any(end != start):
                    msg = f"Invalid profile line {netsta_start} to {netsta_end}"
                    if raise_errors:
                        raise Exception(msg)
                    else:
                        print(msg)
                        continue
                dirn = (end - start)
                dirn = dirn / np.linalg.norm(dirn)
                dist_col = df.iloc[:, 3 * i + 1]
                dist_col = pd.to_numeric(dist_col,
                                         errors='coerce').astype(float)
                valid = dist_col.notna()
                if not np.any(valid):
                    msg = f"No valid values for profile line {netsta_start} to {netsta_end}"
                    if raise_errors:
                        raise Exception(msg)
                    else:
                        print(msg)
                        continue
                dist = dist_col[valid].values - LEAD_INOUT_DIST_KM
                depth = df.iloc[:, 3 * i + 2][valid].values
                stations = df.iloc[:, 3 * i][valid].values
                for sc in SPECIAL_CHARS:
                    stations = [s.split(sc)[0] for s in stations]
                stations = np.array(['.'.join([network, s]) for s in stations])
                lonlat = start + np.outer(dist, dirn) / KM_PER_DEG
                vol_data = np.hstack(
                    (stations[:, np.newaxis], lonlat, depth[:, np.newaxis]))
                vol_data_list.append(vol_data)

    filebase = os.path.splitext(infile)[0]
    outfile = filebase + '.csv'
    all_data = np.vstack(tuple(v for v in vol_data_list))
    np.savetxt(outfile,
               all_data,
               fmt=['%s', '%s', '%s', '%s'],
               delimiter=',',
               header='Sta,Lon,Lat,Depth')
Ejemplo n.º 17
0
def process(asdf_source, event_folder, output_path, min_magnitude, restart,
            save_quality_plots):
    """
    ASDF_SOURCE: Text file containing a list of paths to ASDF files
    EVENT_FOLDER: Path to folder containing event files\n
    OUTPUT_PATH: Output folder \n
    """

    comm = MPI.COMM_WORLD
    nproc = comm.Get_size()
    rank = comm.Get_rank()
    proc_workload = None

    if (rank == 0):

        def outputConfigParameters():
            # output config parameters
            fn = 'pick.%s.cfg' % (datetime.now().strftime('%Y-%m-%d-%H-%M-%S'))
            fn = os.path.join(output_path, fn)

            f = open(fn, 'w+')
            f.write('Parameter Values:\n\n')
            f.write('%25s\t\t: %s\n' % ('ASDF_SOURCE', asdf_source))
            f.write('%25s\t\t: %s\n' % ('EVENT_FOLDER', event_folder))
            f.write('%25s\t\t: %s\n' % ('OUTPUT_PATH', output_path))
            f.write('%25s\t\t: %s\n' % ('MIN_MAGNITUDE', min_magnitude))
            f.write('%25s\t\t: %s\n' %
                    ('RESTART_MODE', 'TRUE' if restart else 'FALSE'))
            f.write('%25s\t\t: %s\n' %
                    ('SAVE_PLOTS', 'TRUE' if save_quality_plots else 'FALSE'))
            f.close()

        # end func

        outputConfigParameters()
    # end if

    # ==================================================
    # Create output-folder for snr-plots
    # ==================================================
    plot_output_folder = None
    if (save_quality_plots):
        plot_output_folder = os.path.join(output_path, 'plots')
        if (rank == 0):
            if (not os.path.exists(plot_output_folder)):
                os.mkdir(plot_output_folder)
        # end if
        comm.Barrier()
    # end if

    # ==================================================
    # Read catalogue and retrieve origin times
    # ==================================================
    cat = CatalogCSV(event_folder)
    events = cat.get_events()
    originTimestamps = cat.get_preferred_origin_timestamps()

    # ==================================================
    # Create lists of pickers for both p- and s-arrivals
    # ==================================================
    sigmalist = np.arange(8, 3, -1)
    pickerlist_p = []
    pickerlist_s = []
    for sigma in sigmalist:
        picker_p = aicdpicker.AICDPicker(t_ma=5,
                                         nsigma=sigma,
                                         t_up=1,
                                         nr_len=5,
                                         nr_coeff=2,
                                         pol_len=10,
                                         pol_coeff=10,
                                         uncert_coeff=3)
        picker_s = aicdpicker.AICDPicker(t_ma=15,
                                         nsigma=sigma,
                                         t_up=1,
                                         nr_len=5,
                                         nr_coeff=2,
                                         pol_len=10,
                                         pol_coeff=10,
                                         uncert_coeff=3)

        pickerlist_p.append(picker_p)
        pickerlist_s.append(picker_s)
    # end for

    # ==================================================
    # Define theoretical model
    # Instantiate data-access object
    # Retrieve estimated workload
    # ==================================================
    taupyModel = TauPyModel(model='iasp91')
    fds = FederatedASDFDataSet(asdf_source, use_json_db=False, logger=None)
    workload = getWorkloadEstimate(fds, originTimestamps)

    # ==================================================
    # Define output header and open output files
    # depending on the mode of operation (fresh/restart)
    # ==================================================
    header = '#eventID originTimestamp mag originLon originLat originDepthKm net sta cha pickTimestamp stationLon stationLat az baz distance ttResidual snr qualityMeasureCWT domFreq qualityMeasureSlope bandIndex nSigma\n'
    ofnp = os.path.join(output_path, 'p_arrivals.%d.txt' % (rank))
    ofns = os.path.join(output_path, 's_arrivals.%d.txt' % (rank))
    ofp = None
    ofs = None
    if (restart == False):
        ofp = open(ofnp, 'w+')
        ofs = open(ofns, 'w+')
        ofp.write(header)
        ofs.write(header)
    else:
        ofp = open(ofnp, 'a+')
        ofs = open(ofns, 'a+')
    # end if

    progTracker = ProgressTracker(output_folder=output_path,
                                  restart_mode=restart)
    totalTraceCount = 0
    for nc, sc, start_time, end_time in fds.local_net_sta_list():
        day = 24 * 3600
        dayCount = 0
        curr = start_time
        traceCountP = 0
        pickCountP = 0
        traceCountS = 0
        pickCountS = 0
        sw_start = datetime.now()
        step = day
        while (curr < end_time):
            if (curr + step > end_time):
                step = end_time - curr
            # end if

            eventIndices = (np.where((originTimestamps >= curr.timestamp) & \
                                     (originTimestamps <= (curr + day).timestamp)))[0]

            if (eventIndices.shape[0] > 0):
                totalTraceCount += 1
                stations = fds.get_stations(curr,
                                            curr + day,
                                            network=nc,
                                            station=sc)
                stations_zch = [s for s in stations
                                if 'Z' in s[3]]  # only Z channels
                stations_nch = [
                    s for s in stations if 'N' in s[3] or '1' in s[3]
                ]  # only N channels
                stations_ech = [
                    s for s in stations if 'E' in s[3] or '2' in s[3]
                ]  # only E channels

                for codes in stations_zch:
                    if (progTracker.increment()): pass
                    else: continue

                    st = fds.get_waveforms(codes[0],
                                           codes[1],
                                           codes[2],
                                           codes[3],
                                           curr,
                                           curr + step,
                                           automerge=True,
                                           trace_count_threshold=200)

                    if (len(st) == 0): continue
                    dropBogusTraces(st)

                    slon, slat = codes[4], codes[5]
                    for ei in eventIndices:
                        event = events[ei]
                        po = event.preferred_origin
                        da = gps2dist_azimuth(po.lat, po.lon, slat, slon)
                        mag = None
                        if (event.preferred_magnitude):
                            mag = event.preferred_magnitude.magnitude_value
                        elif (len(po.magnitude_list)):
                            mag = po.magnitude_list[0].magnitude_value
                        if (mag == None): mag = np.NaN

                        if (np.isnan(mag) or mag < min_magnitude): continue

                        result = extract_p(
                            taupyModel,
                            pickerlist_p,
                            event,
                            slon,
                            slat,
                            st,
                            plot_output_folder=plot_output_folder)
                        if (result):
                            picklist, residuallist, snrlist, bandindex, pickerindex = result

                            arcdistance = kilometers2degrees(da[0] / 1e3)
                            for ip, pick in enumerate(picklist):
                                line = '%s %f %f %f %f %f ' \
                                       '%s %s %s %f %f %f ' \
                                       '%f %f %f ' \
                                       '%f %f %f %f %f '\
                                       '%d %d\n' % (event.public_id, po.utctime.timestamp, mag, po.lon, po.lat, po.depthkm,
                                                    codes[0], codes[1], codes[3], pick.timestamp, slon, slat,
                                                    da[1], da[2], arcdistance,
                                                    residuallist[ip], snrlist[ip, 0], snrlist[ip, 1], snrlist[ip, 2], snrlist[ip, 3],
                                                    bandindex, sigmalist[pickerindex])
                                ofp.write(line)
                            # end for
                            ofp.flush()
                            pickCountP += 1
                        # end if

                        if (len(stations_nch) == 0 and len(stations_ech) == 0):
                            result = extract_s(
                                taupyModel,
                                pickerlist_s,
                                event,
                                slon,
                                slat,
                                st,
                                None,
                                da[2],
                                plot_output_folder=plot_output_folder)
                            if (result):
                                picklist, residuallist, snrlist, bandindex, pickerindex = result

                                arcdistance = kilometers2degrees(da[0] / 1e3)
                                for ip, pick in enumerate(picklist):
                                    line = '%s %f %f %f %f %f ' \
                                           '%s %s %s %f %f %f ' \
                                           '%f %f %f ' \
                                           '%f %f %f %f %f ' \
                                           '%d %d\n' % (event.public_id, po.utctime.timestamp, mag, po.lon, po.lat, po.depthkm,
                                                        codes[0], codes[1], codes[3], pick.timestamp, slon, slat,
                                                        da[1], da[2], arcdistance,
                                                        residuallist[ip], snrlist[ip, 0], snrlist[ip, 1], snrlist[ip, 2], snrlist[ip, 3],
                                                        bandindex, sigmalist[pickerindex])
                                    ofs.write(line)
                                # end for
                                ofs.flush()
                                pickCountS += 1
                            # end if
                        # end if
                    # end for

                    traceCountP += len(st)
                # end for

                if (len(stations_nch) > 0
                        and len(stations_nch) == len(stations_ech)):
                    for codesn, codese in zip(stations_nch, stations_ech):
                        if (progTracker.increment()): pass
                        else: continue

                        stn = fds.get_waveforms(codesn[0],
                                                codesn[1],
                                                codesn[2],
                                                codesn[3],
                                                curr,
                                                curr + step,
                                                automerge=True,
                                                trace_count_threshold=200)
                        ste = fds.get_waveforms(codese[0],
                                                codese[1],
                                                codese[2],
                                                codese[3],
                                                curr,
                                                curr + step,
                                                automerge=True,
                                                trace_count_threshold=200)

                        dropBogusTraces(stn)
                        dropBogusTraces(ste)

                        if (len(stn) == 0): continue
                        if (len(ste) == 0): continue

                        slon, slat = codesn[4], codesn[5]

                        for ei in eventIndices:
                            event = events[ei]
                            po = event.preferred_origin
                            da = gps2dist_azimuth(po.lat, po.lon, slat, slon)

                            mag = None
                            if (event.preferred_magnitude):
                                mag = event.preferred_magnitude.magnitude_value
                            elif (len(po.magnitude_list)):
                                mag = po.magnitude_list[0].magnitude_value
                            if (mag == None): mag = np.NaN

                            if (np.isnan(mag) or mag < min_magnitude): continue

                            result = extract_s(
                                taupyModel,
                                pickerlist_s,
                                event,
                                slon,
                                slat,
                                stn,
                                ste,
                                da[2],
                                plot_output_folder=plot_output_folder)
                            if (result):
                                picklist, residuallist, snrlist, bandindex, pickerindex = result

                                arcdistance = kilometers2degrees(da[0] / 1e3)
                                for ip, pick in enumerate(picklist):
                                    line = '%s %f %f %f %f %f ' \
                                           '%s %s %s %f %f %f ' \
                                           '%f %f %f ' \
                                           '%f %f %f %f %f ' \
                                           '%d %d\n' % (event.public_id, po.utctime.timestamp, mag, po.lon, po.lat, po.depthkm,
                                                        codesn[0], codesn[1], '00T', pick.timestamp, slon, slat,
                                                        da[1], da[2], arcdistance,
                                                        residuallist[ip], snrlist[ip, 0], snrlist[ip, 1], snrlist[ip, 2], snrlist[ip, 3],
                                                        bandindex, sigmalist[pickerindex])
                                    ofs.write(line)
                                # end for
                                ofs.flush()
                                pickCountS += 1
                            # end if
                        # end for

                        traceCountS += (len(stn) + len(ste))
                    # end for
                # end if
            # end if
            curr += step
            dayCount += 1
        # wend
        sw_stop = datetime.now()
        totalTime = (sw_stop - sw_start).total_seconds()

        gc.collect()
        print '(Rank %d: %5.2f%%, %d/%d) Processed %d traces and found %d p-arrivals and %d s-arrivals for ' \
              'network %s station %s in %f s. Memory usage: %5.2f MB.' % \
              (rank, (float(totalTraceCount) / float(workload) * 100) if workload > 0 else 100, totalTraceCount, workload,
               traceCountP + traceCountS, pickCountP, pickCountS, nc, sc, totalTime,
               round(psutil.Process().memory_info().rss / 1024. / 1024., 2))
    # end for
    ofp.close()
    ofs.close()

    print 'Processing complete on rank %d' % (rank)

    del fds
Ejemplo n.º 18
0
#Parallelised autopick harvester. We have like a million picks so this is the only way to go about it
import sys

from seismic.ASDFdatabase.FederatedASDFDataSet import FederatedASDFDataSet
from seismic.ml_classifier.data_harvester.autopicks import pickLoaderRand

from obspy.clients.fdsn.client import Client
ic = Client("IRIS")
fds = FederatedASDFDataSet(
    '/g/data/ha3/Passive/SHARED_DATA/Index/asdf_files.txt',
    variant='db',
    use_json_db=True,
    logger=None)

import numpy as np

pl = pickLoaderRand(fds, ic)

import multiprocessing as mp

nproc = mp.cpu_count()
print(nproc)


def lockInit(l):
    global lock
    lock = l


l = mp.Lock()
pool = mp.Pool(processes=nproc, initializer=lockInit, initargs=(l, ))
Ejemplo n.º 19
0
from obspy.signal.detrend import simple, spline
from obspy.signal.filter import bandpass

from seismic.ASDFdatabase.FederatedASDFDataSet import FederatedASDFDataSet

from obspy import UTCDateTime, read_events, read_inventory
from obspy.taup.taup_geo import calc_dist
from obspy.clients.iris import Client as IrisClient
from obspy.clients.fdsn import Client
from obspy.taup import TauPyModel
from obspy.signal.trigger import trigger_onset, z_detect, classic_sta_lta, recursive_sta_lta, ar_pick
from obspy.signal.rotate import rotate_ne_rt
from obspy.core.event import Pick, CreationInfo, WaveformStreamID, ResourceIdentifier, Arrival, Event, Origin, Arrival, \
    OriginQuality, Magnitude, Comment

fds = FederatedASDFDataSet(
    '/g/data/ha3/Passive/SHARED_DATA/Index/asdf_files.txt', logger=None)

stations = fds.get_stations('2009-05-17T00:00:00',
                            '2009-05-18T00:00:00',
                            station='QLP')

print(stations)

s = fds.get_waveforms('AU',
                      'QLP',
                      '',
                      'BHE',
                      '2011-03-15T00:00:00',
                      '2011-03-16T00:00:00',
                      trace_count_threshold=10)
print(s)
Ejemplo n.º 20
0
def main(inventory_file,
         waveform_database,
         event_catalog_file,
         event_trace_datafile,
         start_time,
         end_time,
         taup_model,
         distance_range,
         magnitude_range,
         catalog_only=False):

    log = logging.getLogger(__name__)
    log.setLevel(logging.INFO)

    waveform_db_is_web = is_url(
        waveform_database
    ) or waveform_database in obspy.clients.fdsn.header.URL_MAPPINGS
    if not waveform_db_is_web:
        assert os.path.exists(
            waveform_database), "Cannot find waveform database file {}".format(
                waveform_database)
    log.info("Using waveform data source: {}".format(waveform_database))

    min_dist_deg = distance_range[0]
    max_dist_deg = distance_range[1]
    min_mag = magnitude_range[0]
    max_mag = magnitude_range[1]

    inventory = read_inventory(inventory_file)
    log.info("Loaded inventory {}".format(inventory_file))

    # Compute reference lonlat from the inventory.
    channels = inventory.get_contents()['channels']
    lonlat_coords = []
    for ch in channels:
        coords = inventory.get_coordinates(ch)
        lonlat_coords.append((coords['longitude'], coords['latitude']))
    lonlat_coords = np.array(lonlat_coords)
    lonlat = np.mean(lonlat_coords, axis=0)
    log.info("Inferred reference coordinates {}".format(lonlat))

    # If start and end time not provided, infer from date range of inventory.
    if not start_time:
        start_time = inventory[0].start_date
        for net in inventory:
            start_time = min(start_time, net.start_date)
        log.info("Inferred start time {}".format(start_time))
    # end if
    if not end_time:
        end_time = inventory[0].end_date
        if end_time is None:
            end_time = UTC.now()
        for net in inventory:
            end_time = max(end_time, net.end_date)
        log.info("Inferred end time {}".format(end_time))
    # end if

    start_time = UTC(start_time)
    end_time = UTC(end_time)
    event_catalog_file = timestamp_filename(event_catalog_file, start_time,
                                            end_time)
    event_trace_datafile = timestamp_filename(event_trace_datafile, start_time,
                                              end_time)
    assert not os.path.exists(event_trace_datafile), \
        "Output file {} already exists, please remove!".format(event_trace_datafile)
    log.info("Traces will be written to: {}".format(event_trace_datafile))

    exit_after_catalog = catalog_only
    catalog = get_events(lonlat, start_time, end_time, event_catalog_file,
                         (min_dist_deg, max_dist_deg), (min_mag, max_mag),
                         exit_after_catalog)

    if waveform_db_is_web:
        log.info("Use fresh query results from web")
        client = Client(waveform_database)
        waveform_getter = client.get_waveforms
    else:
        # Form closure to allow waveform source file to be derived from a setting (or command line input)
        asdf_dataset = FederatedASDFDataSet(waveform_database, logger=log)

        def closure_get_waveforms(network, station, location, channel,
                                  starttime, endtime):
            return asdf_get_waveforms(asdf_dataset, network, station, location,
                                      channel, starttime, endtime)

        waveform_getter = closure_get_waveforms
    # end if

    with tqdm(smoothing=0) as pbar:
        stream_count = 0
        for s in iter_event_data(catalog,
                                 inventory,
                                 waveform_getter,
                                 tt_model=taup_model,
                                 pbar=pbar):
            # Write traces to output file in append mode so that arbitrarily large file
            # can be processed. If the file already exists, then existing streams will
            # be overwritten rather than duplicated.
            # Check first if rotation for unaligned *H1, *H2 channels to *HN, *HE is required.
            if not s:
                continue
            # end if
            if s.select(component='1') and s.select(component='2'):
                try:
                    s.rotate('->ZNE', inventory=inventory)
                except ValueError as e:
                    log.error('Unable to rotate to ZNE with error:\n{}'.format(
                        str(e)))
                    continue
                # end try
            # end if
            # Order the traces in ZNE ordering. This is required so that normalization
            # can be specified in terms of an integer index, i.e. the default of 0 in rf
            # library will normalize against the Z component.
            s.traces = sorted(s.traces, key=zne_order)
            # Assert the ordering of traces in the stream is ZNE.
            assert s[0].stats.channel[-1] == 'Z'
            assert s[1].stats.channel[-1] == 'N'
            assert s[2].stats.channel[-1] == 'E'
            # Iterator returns rf.RFStream. Write traces from obspy.Stream to decouple from RFStream.
            grp_id = '.'.join(s.traces[0].id.split('.')[0:3])
            event_time = str(s.traces[0].meta.event_time)[0:19]
            pbar.set_description("{} -- {}".format(grp_id, event_time))
            out_stream = obspy.Stream([tr for tr in s])
            assert out_stream[0].stats.channel[-1] == 'Z'
            assert out_stream[1].stats.channel[-1] == 'N'
            assert out_stream[2].stats.channel[-1] == 'E'
            write_h5_event_stream(event_trace_datafile, out_stream, mode='a')
            stream_count += 1
        # end for

        if stream_count == 0:
            log.warning("No traces found!")
        else:
            log.info("Wrote {} streams to output file".format(stream_count))