Ejemplo n.º 1
0
 def test_func(self, mkdir_mock):
     o = (self.evt.preferred_origin() or self.evt.origins[0])
     ot_loc = UTCDateTime(o.time, precision=-1).format_fissures()[:-6]
     evtlat_loc = str(roundhalf(o.latitude))
     evtlon_loc = str(roundhalf(o.longitude))
     folder = os.path.join('rawloc',
                           '%s_%s_%s' % (ot_loc, evtlat_loc, evtlon_loc))
     net = 'bla'
     stat = 'blub'
     fn = os.path.join(folder, '%s.%s.mseed' % (net, stat))
     with patch.object(self.st, 'write') as write_mock:
         pu.save_raw_mseed(self.evt, self.st, 'rawloc', net, stat)
         write_mock.assert_called_once_with(fn, fmt='mseed')
     mkdir_mock.assert_called_once_with(folder, exist_ok=True)
Ejemplo n.º 2
0
 def setUp(self):
     self.evtcat = read_events()
     self.rawdir = os.path.join('path', 'to', 'waveforms')
     self.calls = []
     self.writeraw_calls = []
     for event in self.evtcat:
         origin_time = event.origins[0].time
         ot_loc = UTCDateTime(origin_time,
                              precision=-1).format_fissures()[:-6]
         evtlat = event.origins[0].latitude
         evtlon = event.origins[0].longitude
         evtlat_loc = str(roundhalf(evtlat))
         evtlon_loc = str(roundhalf(evtlon))
         evtdir = os.path.join(
             self.rawdir, '%s_%s_%s' % (ot_loc, evtlat_loc, evtlon_loc))
         self.calls.append(call(evtdir))
         self.writeraw_calls.append(
             call(event, evtdir, os.path.join('path', 'to', 'response'),
                  False, True))
Ejemplo n.º 3
0
def rewrite_to_hdf5(catfile: str, rawfolder: str, statloc: str):
    """
    Converts an existing miniseed waveform database to hierachal data format
    (hdf5).

    :param catfile: The pat hto the event catalogue that was used to download
        the raw data. Will be altered during the process (removes already used
        ones).
    :type catfile: path to obspy.Catalog (str)
    :param rawfolder: The folder that the raw data is saved in - ending with
        the phase code (i.e., waveforms/raw/P)
    :type rawfolder: str
    :param statloc: Location that the station xmls are saved in.
    :type statloc: str
    """
    # Create backup of original catalog
    shutil.copyfile(catfile, '%s_bac' % catfile)
    cat = read_events(catfile)
    while cat.count():
        event = cat[0]
        origin_time = event.origins[0].time
        ot_loc = UTCDateTime(origin_time, precision=-1).format_fissures()[:-6]
        evtlat = event.origins[0].latitude
        evtlon = event.origins[0].longitude
        evtlat_loc = str(roundhalf(evtlat))
        evtlon_loc = str(roundhalf(evtlon))
        evtdir = os.path.join(rawfolder,
                              '%s_%s_%s' % (ot_loc, evtlat_loc, evtlon_loc))
        if not os.path.isdir(evtdir):
            pass
        elif not os.listdir(evtdir):
            os.rmdir(evtdir)
        else:
            writeraw(event, evtdir, statloc, False, True)
        logging.warning('removing event...')
        del cat[0]
        # Overwrite old catalog, so we don't have to restart the whole
        # process over again afterwards
        cat.write(catfile, format="QUAKEML")
Ejemplo n.º 4
0
def download_small_db(
    phase: str, min_epid: float, max_epid: float, model: TauPyModel,
    event_cat: Catalog, tz: float, ta: float, statloc: str,
    rawloc: str, clients: list, network: str, station: str, channel: str,
        saveasdf: bool):
    """
    see corresponding method :meth:`~pyglimer.waveform.request.Request.\
    download_waveforms_small_db`
    """

    # logging
    logger = logging.getLogger('pyglimer.request')

    # If station and network are None
    station = station or '*'
    network = network or '*'
    # First we download the stations to subsequently compute the times of
    # theoretical arrival
    clients = pu.get_multiple_fdsn_clients(clients)

    logger.info('Requesting data from the following FDSN servers:\n %s' % str(
        clients))

    bulk_stat = pu.create_bulk_str(network, station, '*', channel, '*', '*')

    logger.info('Bulk_stat parameter created.')
    logger.debug('Bulk stat parameters: %s' % str(bulk_stat))

    logger.info('Initialising station response download.')

    # Create Station Output folder
    os.makedirs(statloc, exist_ok=True)

    # Run parallel station loop.
    out = Parallel(n_jobs=-1, prefer='threads')(
        delayed(pu.__client__loop__)(client, statloc, bulk_stat)
        for client in clients)
    inv = pu.join_inv([inv for inv in out])

    logger.info(
        'Computing theoretical times of arrival and checking available data.')

    # Now we compute the theoretical arrivals using the events and the station
    # information
    # We make a list of dicts akin to
    d = {'event': [], 'startt': [], 'endt': [], 'net': [], 'stat': []}
    for net in inv:
        for stat in net:
            logger.info(f"Checking {net.code}.{stat.code}")
            for evt in event_cat:
                try:
                    toa, _, _, _, delta = compute_toa(
                        evt, stat.latitude, stat.longitude, phase, model)
                except (IndexError, ValueError):

                    # occurs when there is no arrival of the phase at stat
                    logger.debug(
                        'No valid arrival found for station %s,' % stat.code
                        + 'event %s, and phase %s' % (evt.resource_id, phase))
                    continue

                # Already in DB?
                if saveasdf:
                    if wav_in_asdf(net, stat, '*', channel, toa-tz, toa+ta):
                        logger.info(
                            'File already in database. %s ' % stat.code
                            + 'Event: %s' % evt.resource_id)
                        continue
                else:
                    o = (evt.preferred_origin() or evt.origins[0])
                    ot_loc = UTCDateTime(
                        o.time, precision=-1).format_fissures()[:-6]
                    evtlat_loc = str(roundhalf(o.latitude))
                    evtlon_loc = str(roundhalf(o.longitude))
                    folder = os.path.join(
                        rawloc, '%s_%s_%s' % (ot_loc, evtlat_loc, evtlon_loc))
                    fn = os.path.join(folder, '%s.%s.mseed' % (net, stat))
                    if os.path.isfile(fn):
                        logger.info(
                            'File already in database. %s ' % stat.code
                            + 'Event: %s' % evt.resource_id)
                        continue
                # It's new data, so add to request!
                d['event'].append(evt)
                d['startt'].append(toa-tz)
                d['endt'].append(toa+ta)
                d['net'].append(net.code)
                d['stat'].append(stat.code)

    # Create waveform download bulk list
    bulk_wav = pu.create_bulk_str(
        d['net'], d['stat'], '*', channel, d['startt'], d['endt'])

    if len(bulk_wav) == 0:
        logger.info('No new data found.')
        return

    # Sort bulk request
    bulk_wav.sort()

    # This does almost certainly need to be split up, so we don't overload the
    # RAM with the downloaded mseeds
    logger.info('Initialising waveform download.')
    logger.debug('The request string looks like this:')
    for _bw in bulk_wav:
        logger.debug(f"{_bw}")

    # Create waveform directories
    os.makedirs(rawloc, exist_ok=True)

    if len(clients) == 1:
        pu.__client__loop_wav__(clients[0], rawloc, bulk_wav, d, saveasdf, inv)
    else:
        Parallel(n_jobs=-1, prefer='threads')(
            delayed(pu.__client__loop_wav__)(
                client, rawloc, bulk_wav, d, saveasdf, inv)
            for client in clients)
Ejemplo n.º 5
0
def downloadwav(
    phase: str, min_epid: float, max_epid: float, model: TauPyModel,
    event_cat: Catalog, tz: float, ta: float, statloc: str,
    rawloc: str, clients: list, evtfile: str, network: str = None,
    station: str = None, saveasdf: bool = False,
    log_fh: logging.FileHandler = None, loglvl: int = logging.WARNING,
        verbose: bool = False, fast_redownload: bool = False):
    """
    Downloads the waveforms for all events in the catalogue
     for a circular domain around the epicentre with defined epicentral
     distances from Clients defined in clients. Also Station
     xmls for corresponding stations are downloaded.

    Parameters
    ----------
    phase : string
        Arrival phase to be used. P, S, SKS, or ScS.
    min_epid : float
        Minimal epicentral distance to be downloaded.
    max_epid : float
        Maxmimal epicentral distance to be downloaded.
    model : obspy.taup.TauPyModel
        1D velocity model to calculate arrival.
    event_cat : Obspy event catalog
        Catalog containing all events, for which waveforms should be
        downloaded.
    tz : int
        time window before first arrival to download (seconds)
    ta : int
        time window after first arrival to download (seconds)
    statloc : string
        Directory containing the station xmls.
    rawloc : string
        Directory containing the raw seismograms.
    clients : list
        List of FDSN servers. See obspy.Client documentation for acronyms.
    network : string or list, optional
        Network restrictions. Only download from these networks, wildcards
        allowed. The default is None.
    station : string or list, optional
        Only allowed if network != None. Station restrictions.
        Only download from these stations, wildcards are allowed.
        The default is None.
    saveasdf : bool, optional
        Save the dataset as Adaptable Seismic Data Format (asdf; recommended).
        Else, one will be left with .mseeds.
    log_fh : logging.FileHandler, optional
        file handler to be used for the massdownloader logger.
    loglvl : int, optional
        Use this logging level.
    verbose: Bool, optional
        Set True, when experiencing issues with download. Output of
        obspy MassDownloader will be logged in download.log.

    Returns
    -------
    None

    """

    # needed to check whether data is already in the asdf
    global asdfsave
    asdfsave = saveasdf

    # Calculate the min and max theoretical arrival time after event time
    # according to minimum and maximum epicentral distance
    min_time = model.get_travel_times(source_depth_in_km=500,
                                      distance_in_degree=min_epid,
                                      phase_list=[phase])[0].time

    max_time = model.get_travel_times(source_depth_in_km=0.001,
                                      distance_in_degree=max_epid,
                                      phase_list=[phase])[0].time

    mdl = MassDownloader(providers=clients)

    ###########
    # logging for the download
    fdsn_mass_logger = logging.getLogger("obspy.clients.fdsn.mass_downloader")
    fdsn_mass_logger.setLevel(loglvl)

    # # Create handler to the log
    if log_fh is None:
        fh = logging.FileHandler(os.path.join('logs', 'download.log'))
        fh.setLevel(logging.INFO)
        fh.setLevel(loglvl)
        # Create Formatter
        fmt = logging.Formatter(
            fmt='%(asctime)s - %(levelname)s - %(message)s')
        fh.setFormatter(fmt)
    else:
        fh = log_fh

    fdsn_mass_logger.addHandler(fh)

    ####
    # Loop over each event
    global event
    for ii, event in enumerate(tqdm(event_cat)):
        # fetch event-data
        origin_time = event.origins[0].time
        ot_fiss = UTCDateTime(origin_time).format_fissures()
        fdsn_mass_logger.info('Downloading event: '+ot_fiss)
        evtlat = event.origins[0].latitude
        evtlon = event.origins[0].longitude

        # Download location
        ot_loc = UTCDateTime(origin_time, precision=-1).format_fissures()[:-6]
        evtlat_loc = str(roundhalf(evtlat))
        evtlon_loc = str(roundhalf(evtlon))
        tmp.folder = os.path.join(
            rawloc, '%s_%s_%s' % (ot_loc, evtlat_loc, evtlon_loc))

        # create folder for each event
        os.makedirs(tmp.folder, exist_ok=True)

        # Circular domain around the epicenter. This module also offers
        # rectangular and global domains. More complex domains can be
        # defined by inheriting from the Domain class.

        domain = CircularDomain(latitude=evtlat, longitude=evtlon,
                                minradius=min_epid, maxradius=max_epid)

        restrictions = Restrictions(
            # Get data from sufficient time before earliest arrival
            # and after the latest arrival
            # Note: All the traces will still have the same length
            starttime=origin_time + min_time - tz,
            endtime=origin_time + max_time + ta,
            network=network, station=station,
            # You might not want to deal with gaps in the data.
            # If this setting is
            # True, any trace with a gap/overlap will be discarded.
            # This will delete streams with several traces!
            reject_channels_with_gaps=False,
            # And you might only want waveforms that have data for at least 95%
            # of the requested time span. Any trace that is shorter than 95% of
            # the desired total duration will be discarded.
            minimum_length=0.95,  # For 1.00 it will always delete the waveform
            # No two stations should be closer than 1 km to each other. This is
            # useful to for example filter out stations that are part of
            # different networks but at the same physical station. Settings
            # this option to zero or None will disable that filtering.
            # Guard against the same station having different names.
            minimum_interstation_distance_in_m=100.0,
            # Only HH or BH channels. If a station has BH channels, those will
            # be downloaded, otherwise the HH. Nothing will be downloaded if it
            # has neither.
            channel_priorities=["BH[ZNE12]", "HH[ZNE12]"],
            # Location codes are arbitrary and there is no rule as to which
            # location is best. Same logic as for the previous setting.
            # location_priorities=["", "00", "10"],
            sanitize=False
            # discards all mseeds for which no station information is available
            # I changed it too False because else it will redownload over and
            # over and slow down the script
        )

        # The data will be downloaded to the ``./waveforms/`` and
        # ``./stations/`` folders with automatically chosen file names.
        incomplete = True
        while incomplete:
            try:
                mdl.download(
                    domain, restrictions,
                    mseed_storage=get_mseed_storage,
                    stationxml_storage=statloc,
                    threads_per_client=3, download_chunk_size_in_mb=50)
                incomplete = False
            except IncompleteRead:
                continue  # Just retry for poor connection
            except Exception:
                incomplete = False  # Any other error: continue

        # 2021.02.15 Here, we write everything to asdf
        if saveasdf:
            writeraw(event, tmp.folder, statloc, verbose, True)
            # If that works, we will be deleting the cached mseeds here
            try:
                shutil.rmtree(tmp.folder)
            except FileNotFoundError:
                # This does not make much sense, but for some reason it occurs
                # even if the folder exists? However, we will not want the
                # whole process to stop because of this
                pass
        if fast_redownload:
            event_cat[ii:].write(evtfile, format="QUAKEML")

    if not saveasdf:
        download_full_inventory(statloc, clients)
    tmp.folder = "finished"  # removes the restriction for preprocess.py