def find_seismogram(top_level_dir,
                    starttime,
                    endtime,
                    stations=['S12', 'S14', 'S15', 'S16'],
                    channels=['MH1', 'MH2', 'MHZ'],
                    dir_type='pdart_dir'):

    for station in stations:
        stream = Stream()
        channel = '*'
        if dir_type == 'processed_dir':
            dir = find_processed_dir(top_level_dir, starttime.year, station)
            filename = '*%s.%s.%s.%s.%s.%03d*.gz' % (
                'XA', station, '*', channel, str(
                    starttime.year), starttime.julday)
        else:
            dir = find_dir(top_level_dir, starttime.year, station, channel)
            filename = '%s.%s.%s.%s.%s.%03d.gz' % ('XA', station, '*', channel,
                                                   str(starttime.year),
                                                   starttime.julday)
        filename = os.path.join(dir, filename)
        try:
            stream += read(filename)
        except Exception as e:
            print(str(e))

        if starttime.julday != endtime.julday:
            if dir_type == 'processed_dir':
                dir = find_processed_dir(top_level_dir, endtime.year, station)
                filename = '*%s.%s.%s.%s.%s.%03d*.gz' % (
                    'XA', station, '*', channel, str(
                        endtime.year), endtime.julday)
            else:
                dir = find_dir(top_level_dir, endtime.year, station, channel)
                filename = '*%s.%s.%s.%s.%s.%03d*.gz' % (
                    'XA', station, '*', channel, str(
                        endtime.year), endtime.julday)
            filename = os.path.join(dir, filename)
            try:
                stream += read(filename)
            except Exception as e:
                print(str(e))

        # print('Before ', stream)
        stream = stream.trim(starttime=starttime, endtime=endtime)
        # print('After ', stream)

        if stream is not None and len(stream) > 0:
            for tr in stream:
                tr.stats.location = ''
                if tr.stats.channel not in channels:
                    stream.remove(tr)

            stream.merge()

    return stream
Beispiel #2
0
def get_streams_gema(networks,
                     stations,
                     starttime,
                     endtime,
                     only_vertical_channel=False,
                     local_dir_name=None):
    if not local_dir_name:
        local_dir_name = "%s/archive" % (os.getenv("HOME"))

    if only_vertical_channel:
        channels = "*Z"
    else:
        channels = "*"

    # READ ARCHIVE DATABASE
    st = Stream()
    this_day = UTCDateTime(starttime.strftime("%Y-%m-%d"))
    last_day = UTCDateTime(endtime.strftime("%Y-%m-%d"))
    while this_day <= last_day:
        for network, station in zip(networks, stations):
            pattern = '%s/%s/%s/%s/%s*' % (local_dir_name,
                                           this_day.strftime("%Y"), network,
                                           station, channels)
            paths_ch = sorted(glob.glob(pattern))
            for path in paths_ch:
                pattern = "%s/*%s" % (path, this_day.strftime("%Y.%03j"))
                msfile_list = glob.glob(pattern)
                if len(msfile_list) > 0:
                    for msfile in msfile_list:
                        st += read(msfile,
                                   starttime=starttime,
                                   endtime=endtime)

        this_day += 86400

    # PATCH PROBLEM DIFFERENT SAMPLING RATES IN LONQ STATION FROM SCREAM
    for tr in st.select(station="LONQ"):
        if tr.stats.sampling_rate != 50:
            st.remove(tr)

    # EXPORT GAPS AND MERGE STREAM
    gaps = st.get_gaps()
    if len(st) > 0:  # and len(gaps)>0
        st.trim(starttime, endtime)
        st.merge(method=1, interpolation_samples=-1, fill_value='interpolate')

    return st, gaps
Beispiel #3
0
def preprocess(db, stations, comps, goal_day, params, responses=None):
    """
    Fetches data for each ``stations`` and each ``comps`` using the
    data_availability table in the database.

    To correct for instrument responses, make sure to set ``remove_response``
    to "Y" in the config and to provide the ``responses`` DataFrame.

    :Example:
    >>> from msnoise.api import connect, get_params, preload_instrument_responses
    >>> from msnoise.preprocessing import preprocess
    >>> db = connect()
    >>> params = get_params(db)
    >>> responses = preload_instrument_responses(db)
    >>> st = preprocess(db, ["YA.UV06","YA.UV10"], ["Z",], "2010-09-01", params, responses)
    >>> st
     2 Trace(s) in Stream:
    YA.UV06.00.HHZ | 2010-09-01T00:00:00.000000Z - 2010-09-01T23:59:59.950000Z | 20.0 Hz, 1728000 samples
    YA.UV10.00.HHZ | 2010-09-01T00:00:00.000000Z - 2010-09-01T23:59:59.950000Z | 20.0 Hz, 1728000 samples

    :type db: :class:`sqlalchemy.orm.session.Session`
    :param db: A :class:`~sqlalchemy.orm.session.Session` object, as
        obtained by :func:`msnoise.api.connect`.
    :type stations: list of str
    :param stations: a list of station names, in the format NET.STA.
    :type comps: list of str
    :param comps: a list of component names, in Z,N,E,1,2.
    :type goal_day: str
    :param goal_day: the day of data to load, ISO 8601 format: e.g. 2016-12-31.
    :type params: class
    :param params: an object containing the config parameters, as obtained by
        :func:`msnoise.api.get_params`.
    :type responses: :class:`pandas.DataFrame`
    :param responses: a DataFrame containing the instrument responses, as
        obtained by :func:`msnoise.api.preload_instrument_responses`.
    :rtype: :class:`obspy.core.stream.Stream`
    :return: A Stream object containing all traces.
    """
    datafiles = {}
    output = Stream()
    for station in stations:
        datafiles[station] = {}
        net, sta = station.split('.')
        gd = datetime.datetime.strptime(goal_day, '%Y-%m-%d')
        files = get_data_availability(db,
                                      net=net,
                                      sta=sta,
                                      starttime=gd,
                                      endtime=gd)
        for comp in comps:
            datafiles[station][comp] = []
        for file in files:
            if file.comp[-1] not in comps:
                continue
            fullpath = os.path.join(file.path, file.file)
            datafiles[station][file.comp[-1]].append(fullpath)

    for istation, station in enumerate(stations):
        net, sta = station.split(".")
        for comp in comps:
            files = eval("datafiles['%s']['%s']" % (station, comp))
            if len(files) != 0:
                logging.debug("%s.%s Reading %i Files" %
                              (station, comp, len(files)))
                stream = Stream()
                for file in sorted(files):
                    try:
                        st = read(file,
                                  dytpe=np.float,
                                  starttime=UTCDateTime(gd),
                                  endtime=UTCDateTime(gd) + 86400)
                    except:
                        logging.debug("ERROR reading file %s" % file)
                        continue
                    for tr in st:
                        if len(tr.stats.channel) == 2:
                            tr.stats.channel += tr.stats.location
                            tr.stats.location = "00"
                    tmp = st.select(network=net, station=sta, component=comp)
                    if not len(tmp):
                        for tr in st:
                            tr.stats.network = net
                        st = st.select(network=net,
                                       station=sta,
                                       component=comp)
                    else:
                        st = tmp
                    for tr in st:
                        tr.data = tr.data.astype(np.float)
                        tr.stats.network = tr.stats.network.upper()
                        tr.stats.station = tr.stats.station.upper()
                        tr.stats.channel = tr.stats.channel.upper()

                    stream += st
                    del st
                stream.sort()
                try:
                    # HACK not super clean... should find a way to prevent the
                    # same trace id with different sps to occur
                    stream.merge(method=1,
                                 interpolation_samples=3,
                                 fill_value=None)
                except:
                    logging.debug("Error while merging...")
                    traceback.print_exc()
                    continue
                stream = stream.split()
                if not len(stream):
                    continue
                logging.debug("%s Checking sample alignment" % stream[0].id)
                for i, trace in enumerate(stream):
                    stream[i] = check_and_phase_shift(trace)

                logging.debug("%s Checking Gaps" % stream[0].id)
                if len(getGaps(stream)) > 0:
                    max_gap = params.preprocess_max_gap * stream[
                        0].stats.sampling_rate
                    only_too_long = False
                    while getGaps(stream) and not only_too_long:
                        too_long = 0
                        gaps = getGaps(stream)
                        for gap in gaps:
                            if int(gap[-1]) <= max_gap:
                                try:
                                    stream[gap[0]] = stream[gap[0]].__add__(
                                        stream[gap[1]],
                                        method=1,
                                        fill_value="interpolate")
                                    stream.remove(stream[gap[1]])
                                except:
                                    stream.remove(stream[gap[1]])

                                break
                            else:
                                too_long += 1
                        if too_long == len(gaps):
                            only_too_long = True

                stream = stream.split()
                for tr in stream:
                    if tr.stats.sampling_rate < (params.goal_sampling_rate -
                                                 1):
                        stream.remove(tr)
                taper_length = 20.0  # seconds
                for trace in stream:
                    if trace.stats.npts < 4 * taper_length * trace.stats.sampling_rate:
                        stream.remove(trace)
                    else:
                        trace.detrend(type="demean")
                        trace.detrend(type="linear")
                        trace.taper(max_percentage=None, max_length=1.0)

                if not len(stream):
                    logging.debug(" has only too small traces, skipping...")
                    continue

                for trace in stream:
                    logging.debug("%s Highpass at %.2f Hz" %
                                  (trace.id, params.preprocess_highpass))
                    trace.filter("highpass",
                                 freq=params.preprocess_highpass,
                                 zerophase=True)

                    if trace.stats.sampling_rate != params.goal_sampling_rate:
                        logging.debug("%s Lowpass at %.2f Hz" %
                                      (trace.id, params.preprocess_lowpass))
                        trace.filter("lowpass",
                                     freq=params.preprocess_lowpass,
                                     zerophase=True,
                                     corners=8)

                        if params.resampling_method == "Resample":
                            logging.debug(
                                "%s Downsample to %.1f Hz" %
                                (trace.id, params.goal_sampling_rate))
                            trace.data = resample(
                                trace.data, params.goal_sampling_rate /
                                trace.stats.sampling_rate, 'sinc_fastest')

                        elif params.resampling_method == "Decimate":
                            decimation_factor = trace.stats.sampling_rate / params.goal_sampling_rate
                            if not int(decimation_factor) == decimation_factor:
                                logging.warning(
                                    "%s CANNOT be decimated by an integer factor, consider using Resample or Lanczos methods"
                                    " Trace sampling rate = %i ; Desired CC sampling rate = %i"
                                    % (trace.id, trace.stats.sampling_rate,
                                       params.goal_sampling_rate))
                                sys.stdout.flush()
                                sys.exit()
                            logging.debug("%s Decimate by a factor of %i" %
                                          (trace.id, decimation_factor))
                            trace.data = trace.data[::int(decimation_factor)]

                        elif params.resampling_method == "Lanczos":
                            logging.debug(
                                "%s Downsample to %.1f Hz" %
                                (trace.id, params.goal_sampling_rate))
                            trace.data = np.array(trace.data)
                            trace.interpolate(
                                method="lanczos",
                                sampling_rate=params.goal_sampling_rate,
                                a=1.0)

                        trace.stats.sampling_rate = params.goal_sampling_rate

                if params.remove_response:
                    logging.debug('%s Removing instrument response' %
                                  stream[0].id)

                    response = responses[responses["channel_id"] ==
                                         stream[0].id]
                    if len(response) > 1:
                        response = response[
                            response["start_date"] <= UTCDateTime(gd)]
                    if len(response) > 1:
                        response = response[
                            response["end_date"] >= UTCDateTime(gd)]
                    elif len(response) == 0:
                        logging.info("No instrument response information "
                                     "for %s, skipping" % stream[0].id)
                        continue
                    try:
                        datalesspz = response["paz"].values[0]
                    except:
                        logging.error("Bad instrument response information "
                                      "for %s, skipping" % stream[0].id)
                        continue
                    stream.simulate(
                        paz_remove=datalesspz,
                        remove_sensitivity=True,
                        pre_filt=params.response_prefilt,
                        paz_simulate=None,
                    )
                for tr in stream:
                    tr.data = tr.data.astype(np.float32)
                output += stream
                del stream
            del files
    clean_scipy_cache()
    return output
Beispiel #4
0
def preprocess(db, stations, comps, goal_day, params, responses=None):

    datafiles = {}
    output = Stream()
    for station in stations:
        datafiles[station] = {}
        net, sta = station.split('.')
        gd = datetime.datetime.strptime(goal_day, '%Y-%m-%d')
        files = get_data_availability(db,
                                      net=net,
                                      sta=sta,
                                      starttime=gd,
                                      endtime=gd)
        for comp in comps:
            datafiles[station][comp] = []
        for file in files:
            if file.comp[-1] not in comps:
                continue
            fullpath = os.path.join(file.path, file.file)
            datafiles[station][file.comp[-1]].append(fullpath)

    for istation, station in enumerate(stations):
        net, sta = station.split(".")
        for comp in comps:
            files = eval("datafiles['%s']['%s']" % (station, comp))
            if len(files) != 0:
                logging.debug("%s.%s Reading %i Files" %
                              (station, comp, len(files)))
                stream = Stream()
                for file in sorted(files):
                    st = read(file,
                              dytpe=np.float,
                              starttime=UTCDateTime(gd),
                              endtime=UTCDateTime(gd) + 86400)
                    tmp = st.select(network=net, station=sta, component=comp)
                    if not len(tmp):
                        for tr in st:
                            tr.stats.network = net
                        st = st.select(network=net,
                                       station=sta,
                                       component=comp)
                    else:
                        st = tmp
                    for tr in st:
                        tr.data = tr.data.astype(np.float)
                    stream += st
                    del st
                stream.sort()
                stream.merge(method=1,
                             interpolation_samples=3,
                             fill_value=None)
                stream = stream.split()

                logging.debug("Checking sample alignment")
                for i, trace in enumerate(stream):
                    stream[i] = check_and_phase_shift(trace)

                logging.debug("Checking Gaps")
                if len(getGaps(stream)) > 0:
                    max_gap = 10
                    only_too_long = False
                    while getGaps(stream) and not only_too_long:
                        too_long = 0
                        gaps = getGaps(stream)
                        for gap in gaps:
                            if int(gap[-1]) <= max_gap:
                                stream[gap[0]] = stream[gap[0]].__add__(
                                    stream[gap[1]],
                                    method=1,
                                    fill_value="interpolate")
                                stream.remove(stream[gap[1]])
                                break
                            else:
                                too_long += 1
                        if too_long == len(gaps):
                            only_too_long = True
                stream = stream.split()
                taper_length = 20.0  # seconds
                for trace in stream:
                    if trace.stats.npts < 4 * taper_length * trace.stats.sampling_rate:
                        stream.remove(trace)
                    else:
                        trace.detrend(type="demean")
                        trace.detrend(type="linear")
                        trace.taper(max_percentage=None, max_length=1.0)

                if not len(stream):
                    logging.debug(" has only too small traces, skipping...")
                    continue

                for trace in stream:
                    logging.debug("%s.%s Highpass at %.2f Hz" %
                                  (station, comp, params.preprocess_highpass))
                    trace.filter("highpass",
                                 freq=params.preprocess_highpass,
                                 zerophase=True)

                    if trace.stats.sampling_rate != params.goal_sampling_rate:
                        logging.debug(
                            "%s.%s Lowpass at %.2f Hz" %
                            (station, comp, params.preprocess_lowpass))
                        trace.filter("lowpass",
                                     freq=params.preprocess_lowpass,
                                     zerophase=True,
                                     corners=8)

                        if params.resampling_method == "Resample":
                            logging.debug(
                                "%s.%s Downsample to %.1f Hz" %
                                (station, comp, params.goal_sampling_rate))
                            trace.data = resample(
                                trace.data, params.goal_sampling_rate /
                                trace.stats.sampling_rate, 'sinc_fastest')

                        elif params.resampling_method == "Decimate":
                            decimation_factor = trace.stats.sampling_rate / params.goal_sampling_rate
                            if not int(decimation_factor) == decimation_factor:
                                logging.warning(
                                    "%s.%s CANNOT be decimated by an integer factor, consider using Resample or Lanczos methods"
                                    " Trace sampling rate = %i ; Desired CC sampling rate = %i"
                                    %
                                    (station, comp, trace.stats.sampling_rate,
                                     params.goal_sampling_rate))
                                sys.stdout.flush()
                                sys.exit()
                            logging.debug("%s.%s Decimate by a factor of %i" %
                                          (station, comp, decimation_factor))
                            trace.data = trace.data[::int(decimation_factor)]

                        elif params.resampling_method == "Lanczos":
                            logging.debug(
                                "%s.%s Downsample to %.1f Hz" %
                                (station, comp, params.goal_sampling_rate))
                            trace.data = np.array(trace.data)
                            trace.interpolate(
                                method="lanczos",
                                sampling_rate=params.goal_sampling_rate,
                                a=1.0)

                        trace.stats.sampling_rate = params.goal_sampling_rate

                if get_config(db, 'remove_response', isbool=True):
                    logging.debug('%s Removing instrument response' %
                                  stream[0].id)
                    response_prefilt = eval(get_config(db, 'response_prefilt'))

                    response = responses[responses["channel_id"] ==
                                         stream[0].id]
                    if len(response) > 1:
                        response = response[
                            response["start_date"] < UTCDateTime(gd)]
                        response = response[
                            response["end_date"] > UTCDateTime(gd)]
                    elif len(response) == 0:
                        logging.info("No instrument response information "
                                     "for %s, exiting" % stream[0].id)
                        sys.exit()
                    datalesspz = response["paz"].values[0]
                    stream.simulate(
                        paz_remove=datalesspz,
                        remove_sensitivity=True,
                        pre_filt=response_prefilt,
                        paz_simulate=None,
                    )
                for tr in stream:
                    tr.data = tr.data.astype(np.float32)
                output += stream
                del stream
            del files
    clean_scipy_cache()
    return 0, output
Beispiel #5
0
def preprocess(db, stations, comps, goal_day, params, responses=None):
    """
    Fetches data for each ``stations`` and each ``comps`` using the
    data_availability table in the database.

    To correct for instrument responses, make sure to set ``remove_response``
    to "Y" in the config and to provide the ``responses`` DataFrame.

    :Example:

    >>> from msnoise.api import connect, get_params, preload_instrument_responses
    >>> from msnoise.preprocessing import preprocess
    >>> db = connect()
    >>> params = get_params(db)
    >>> responses = preload_instrument_responses(db)
    >>> st = preprocess(db, ["YA.UV06","YA.UV10"], ["Z",], "2010-09-01", params, responses)
    >>> st
     2 Trace(s) in Stream:
    YA.UV06.00.HHZ | 2010-09-01T00:00:00.000000Z - 2010-09-01T23:59:59.950000Z | 20.0 Hz, 1728000 samples
    YA.UV10.00.HHZ | 2010-09-01T00:00:00.000000Z - 2010-09-01T23:59:59.950000Z | 20.0 Hz, 1728000 samples

    :type db: :class:`sqlalchemy.orm.session.Session`
    :param db: A :class:`~sqlalchemy.orm.session.Session` object, as
        obtained by :func:`msnoise.api.connect`.
    :type stations: list of str
    :param stations: a list of station names, in the format NET.STA.
    :type comps: list of str
    :param comps: a list of component names, in Z,N,E,1,2.
    :type goal_day: str
    :param goal_day: the day of data to load, ISO 8601 format: e.g. 2016-12-31.
    :type params: class
    :param params: an object containing the config parameters, as obtained by
        :func:`msnoise.api.get_params`.
    :type responses: :class:`pandas.DataFrame`
    :param responses: a DataFrame containing the instrument responses, as
        obtained by :func:`msnoise.api.preload_instrument_responses`.
    :rtype: :class:`obspy.core.stream.Stream`
    :return: A Stream object containing all traces.
    """
    datafiles = {}
    output = Stream()
    MULTIPLEX = False
    MULTIPLEX_files = {}
    for station in stations:
        datafiles[station] = {}
        net, sta, loc = station.split('.')
        gd = datetime.datetime.strptime(goal_day, '%Y-%m-%d')
        files = get_data_availability(db,
                                      net=net,
                                      sta=sta,
                                      loc=loc,
                                      starttime=gd,
                                      endtime=gd)
        for comp in comps:
            datafiles[station][comp] = []
        for file in files:
            if file.sta != "MULTIPLEX":
                if file.chan[-1] not in comps:
                    continue
                fullpath = os.path.join(file.path, file.file)
                datafiles[station][file.chan[-1]].append(fullpath)
            else:
                MULTIPLEX = True
                print("Mutliplex mode, reading the files")
                fullpath = os.path.join(file.path, file.file)
                multiplexed = sorted(glob.glob(fullpath))
                for comp in comps:
                    for fn in multiplexed:
                        if fn in MULTIPLEX_files:
                            _ = MULTIPLEX_files[fn]
                        else:
                            # print("Reading %s" % fn)
                            _ = read(fn, format=params.archive_format or None)
                            traces = []
                            for tr in _:
                                if "%s.%s" % (
                                        tr.stats.network, tr.stats.station
                                ) in stations and tr.stats.channel[-1] in comps:
                                    traces.append(tr)
                            del _
                            _ = Stream(traces=traces)
                            MULTIPLEX_files[fn] = _
                        datafiles[station][comp].append(_)

    for istation, station in enumerate(stations):
        net, sta, loc = station.split(".")
        for comp in comps:
            files = eval("datafiles['%s']['%s']" % (station, comp))
            if len(files) != 0:
                logger.debug("%s.%s Reading %i Files" %
                             (station, comp, len(files)))
                traces = []
                for file in files:
                    if isinstance(file, Stream):
                        st = file.select(network=net,
                                         station=sta,
                                         component=comp).copy()
                    else:
                        try:
                            # print("Reading %s" % file)
                            # t=  time.time()
                            st = read(file,
                                      dytpe=np.float,
                                      starttime=UTCDateTime(gd),
                                      endtime=UTCDateTime(gd) + 86400,
                                      station=sta,
                                      format=params.archive_format or None)
                            # print("done in", time.time()-t)
                        except:
                            logger.debug("ERROR reading file %s" % file)
                            # TODO add traceback (optional?)
                            continue
                    for tr in st:
                        if len(tr.stats.channel) == 2:
                            tr.stats.channel += tr.stats.location
                            tr.stats.location = "00"
                    tmp = st.select(network=net, station=sta, component=comp)
                    if not len(tmp):
                        for tr in st:
                            tr.stats.network = net
                        st = st.select(network=net,
                                       station=sta,
                                       component=comp)
                    else:
                        st = tmp
                    for tr in st:
                        tr.data = tr.data.astype(np.float)
                        tr.stats.network = tr.stats.network.upper()
                        tr.stats.station = tr.stats.station.upper()
                        tr.stats.channel = tr.stats.channel.upper()
                        if tr.stats.location == "":
                            tr.stats.location = "--"

                        traces.append(tr)
                    del st
                stream = Stream(traces=traces)
                if not (len(stream)):
                    continue
                f = io.BytesIO()
                stream.write(f, format='MSEED')
                f.seek(0)
                stream = read(f, format="MSEED")

                stream.sort()
                # try:
                #     # HACK not super clean... should find a way to prevent the
                #     # same trace id with different sps to occur
                #     stream.merge(method=1, interpolation_samples=3, fill_value=None)
                # except:
                #     logger.debug("Error while merging...")
                #     traceback.print_exc()
                #     continue
                # stream = stream.split()
                if not len(stream):
                    continue
                logger.debug("%s Checking sample alignment" % stream[0].id)
                for i, trace in enumerate(stream):
                    stream[i] = check_and_phase_shift(
                        trace, params.preprocess_taper_length)

                logger.debug("%s Checking Gaps" % stream[0].id)
                if len(getGaps(stream)) > 0:
                    max_gap = params.preprocess_max_gap * stream[
                        0].stats.sampling_rate

                    gaps = getGaps(stream)
                    while len(gaps):
                        too_long = 0
                        for gap in gaps:
                            if int(gap[-1]) <= max_gap:
                                try:
                                    stream[gap[0]] = stream[gap[0]].__add__(
                                        stream[gap[1]],
                                        method=1,
                                        fill_value="interpolate")
                                    stream.remove(stream[gap[1]])
                                except:
                                    stream.remove(stream[gap[1]])

                                break
                            else:
                                too_long += 1

                        if too_long == len(gaps):
                            break
                        gaps = getGaps(stream)
                    del gaps

                stream = stream.split()
                for tr in stream:
                    if tr.stats.sampling_rate < (params.goal_sampling_rate -
                                                 1):
                        stream.remove(tr)
                taper_length = params.preprocess_taper_length  # seconds
                for trace in stream:
                    if trace.stats.npts < (4 * taper_length *
                                           trace.stats.sampling_rate):
                        stream.remove(trace)
                    else:
                        trace.detrend(type="demean")
                        trace.detrend(type="linear")
                        trace.taper(max_percentage=None,
                                    max_length=taper_length)

                if not len(stream):
                    logger.debug(" has only too small traces, skipping...")
                    continue

                for trace in stream:
                    logger.debug("%s Highpass at %.2f Hz" %
                                 (trace.id, params.preprocess_highpass))
                    trace.filter("highpass",
                                 freq=params.preprocess_highpass,
                                 zerophase=True,
                                 corners=4)

                    if trace.stats.sampling_rate != params.goal_sampling_rate:
                        logger.debug("%s Lowpass at %.2f Hz" %
                                     (trace.id, params.preprocess_lowpass))
                        trace.filter("lowpass",
                                     freq=params.preprocess_lowpass,
                                     zerophase=True,
                                     corners=8)

                        if params.resampling_method == "Resample":
                            logger.debug("%s Downsample to %.1f Hz" %
                                         (trace.id, params.goal_sampling_rate))
                            trace.data = resample(
                                trace.data, params.goal_sampling_rate /
                                trace.stats.sampling_rate, 'sinc_fastest')

                        elif params.resampling_method == "Decimate":
                            decimation_factor = trace.stats.sampling_rate / params.goal_sampling_rate
                            if not int(decimation_factor) == decimation_factor:
                                logger.warning(
                                    "%s CANNOT be decimated by an integer factor, consider using Resample or Lanczos methods"
                                    " Trace sampling rate = %i ; Desired CC sampling rate = %i"
                                    % (trace.id, trace.stats.sampling_rate,
                                       params.goal_sampling_rate))
                                sys.stdout.flush()
                                sys.exit()
                            logger.debug("%s Decimate by a factor of %i" %
                                         (trace.id, decimation_factor))
                            trace.data = trace.data[::int(decimation_factor)]

                        elif params.resampling_method == "Lanczos":
                            logger.debug("%s Downsample to %.1f Hz" %
                                         (trace.id, params.goal_sampling_rate))
                            trace.data = np.array(trace.data)
                            trace.interpolate(
                                method="lanczos",
                                sampling_rate=params.goal_sampling_rate,
                                a=1.0)

                        trace.stats.sampling_rate = params.goal_sampling_rate
                    del trace

                if params.remove_response:
                    logger.debug('%s Removing instrument response' %
                                 stream[0].id)
                    try:
                        stream.attach_response(responses)
                        stream.remove_response(
                            pre_filt=params.response_prefilt, taper=False)
                    except:
                        logger.error("Bad or no instrument response "
                                     "information for %s, skipping" %
                                     stream[0].id)
                        continue

                for tr in stream:
                    tr.data = tr.data.astype(np.float32)
                output += stream
                del stream
            del files
    clean_scipy_cache()
    del MULTIPLEX_files
    return output
Beispiel #6
0
def main():

    print()
    print(
        "################################################################################"
    )
    print(
        "#        __                 _                                      _           #"
    )
    print(
        "#  _ __ / _|_ __  _   _    | |__   __ _ _ __ _ __ ___   ___  _ __ (_) ___ ___  #"
    )
    print(
        "# | '__| |_| '_ \| | | |   | '_ \ / _` | '__| '_ ` _ \ / _ \| '_ \| |/ __/ __| #"
    )
    print(
        "# | |  |  _| |_) | |_| |   | | | | (_| | |  | | | | | | (_) | | | | | (__\__ \ #"
    )
    print(
        "# |_|  |_| | .__/ \__, |___|_| |_|\__,_|_|  |_| |_| |_|\___/|_| |_|_|\___|___/ #"
    )
    print(
        "#          |_|    |___/_____|                                                  #"
    )
    print(
        "#                                                                              #"
    )
    print(
        "################################################################################"
    )
    print()

    # Run Input Parser
    args = get_harmonics_arguments()

    # Load Database
    db, stkeys = stdb.io.load_db(fname=args.indb, keys=args.stkeys)

    # Track processed folders
    procfold = []

    # Loop over station keys
    for stkey in list(stkeys):

        # Extract station information from dictionary
        sta = db[stkey]

        # Construct Folder Name
        stfld = stkey
        if not args.lkey:
            stfld = stkey.split('.')[0] + "." + stkey.split('.')[1]

        # Define path to see if it exists
        if args.phase in ['P', 'PP', 'allP']:
            datapath = Path('P_DATA') / stfld
        elif args.phase in ['S', 'SKS', 'allS']:
            datapath = Path('S_DATA') / stfld
        if not datapath.is_dir():
            print('Path to ' + str(datapath) + ' doesn`t exist - continuing')
            continue

        # Get search start time
        if args.startT is None:
            tstart = sta.startdate
        else:
            tstart = args.startT

        # Get search end time
        if args.endT is None:
            tend = sta.enddate
        else:
            tend = args.endT

        if tstart > sta.enddate or tend < sta.startdate:
            continue

        # Temporary print locations
        tlocs = sta.location
        if len(tlocs) == 0:
            tlocs = ['']
        for il in range(0, len(tlocs)):
            if len(tlocs[il]) == 0:
                tlocs[il] = "--"
        sta.location = tlocs

        # Update Display
        print(" ")
        print(" ")
        print("|===============================================|")
        print("|===============================================|")
        print("|                   {0:>8s}                    |".format(
            sta.station))
        print("|===============================================|")
        print("|===============================================|")
        print("|  Station: {0:>2s}.{1:5s}                            |".format(
            sta.network, sta.station))
        print("|      Channel: {0:2s}; Locations: {1:15s}  |".format(
            sta.channel, ",".join(tlocs)))
        print("|      Lon: {0:7.2f}; Lat: {1:6.2f}                |".format(
            sta.longitude, sta.latitude))
        print("|      Start time: {0:19s}          |".format(
            sta.startdate.strftime("%Y-%m-%d %H:%M:%S")))
        print("|      End time:   {0:19s}          |".format(
            sta.enddate.strftime("%Y-%m-%d %H:%M:%S")))
        print("|-----------------------------------------------|")

        # Check for folder already processed
        if stfld in procfold:
            print('  {0} already processed...skipping   '.format(stfld))
            continue

        rfRstream = Stream()
        rfTstream = Stream()

        datafiles = [x for x in datapath.iterdir() if x.is_dir()]
        for folder in datafiles:

            # Skip hidden folders
            if folder.name.startswith('.'):
                continue

            date = folder.name.split('_')[0]
            year = date[0:4]
            month = date[4:6]
            day = date[6:8]
            dateUTC = UTCDateTime(year + '-' + month + '-' + day)

            if dateUTC > tstart and dateUTC < tend:

                filename = folder / "RF_Data.pkl"
                if filename.is_file():
                    file = open(filename, "rb")
                    rfdata = pickle.load(file)
                    if rfdata[0].stats.snrh > args.snrh and \
                            rfdata[0].stats.snr and \
                            rfdata[0].stats.cc > args.cc:

                        rfRstream.append(rfdata[1])
                        rfTstream.append(rfdata[2])

                    file.close()

            else:
                continue

        if args.no_outl:
            # Remove outliers wrt variance
            varR = np.array([np.var(tr.data) for tr in rfRstream])

            # Calculate outliers
            medvarR = np.median(varR)
            madvarR = 1.4826 * np.median(np.abs(varR - medvarR))
            robustR = np.abs((varR - medvarR) / madvarR)
            outliersR = np.arange(len(rfRstream))[robustR > 2.]
            for i in outliersR[::-1]:
                rfRstream.remove(rfRstream[i])
                rfTstream.remove(rfTstream[i])

            # Do the same for transverse
            varT = np.array([np.var(tr.data) for tr in rfTstream])
            medvarT = np.median(varT)
            madvarT = 1.4826 * np.median(np.abs(varT - medvarT))
            robustT = np.abs((varT - medvarT) / madvarT)
            outliersT = np.arange(len(rfTstream))[robustT > 2.]
            for i in outliersT[::-1]:
                rfRstream.remove(rfRstream[i])
                rfTstream.remove(rfTstream[i])

        # Try binning if specified
        if args.nbin is not None:
            rf_tmp = binning.bin(rfRstream,
                                 rfTstream,
                                 typ='baz',
                                 nbin=args.nbin + 1)
            rfRstream = rf_tmp[0]
            rfTstream = rf_tmp[1]

        # Filter original streams
        rfRstream.filter('bandpass',
                         freqmin=args.bp[0],
                         freqmax=args.bp[1],
                         corners=2,
                         zerophase=True)
        rfTstream.filter('bandpass',
                         freqmin=args.bp[0],
                         freqmax=args.bp[1],
                         corners=2,
                         zerophase=True)

        # Initialize the Harmonics object
        harmonics = Harmonics(rfRstream, rfTstream)

        # Stack with or without dip
        if args.find_azim:
            harmonics.dcomp_find_azim(xmin=args.trange[0], xmax=args.trange[1])
            print("Optimal azimuth for trange between " + str(args.trange[0]) +
                  " and " + str(args.trange[1]) + " seconds is: " +
                  str(harmonics.azim))
        else:
            harmonics.dcomp_fix_azim(azim=args.azim)

        if args.save_plot and not Path('FIGURES').is_dir():
            Path('FIGURES').mkdir(parents=True)

        if args.plot:
            harmonics.plot(args.ymax, args.scale, args.save_plot, args.title,
                           args.form)

        if args.save:
            filename = datapath / (harmonics.hstream[0].stats.station +
                                   ".harmonics.pkl")
            harmonics.save(filename)

        # Update processed folders
        procfold.append(stfld)
Beispiel #7
0
                        ".." + channel + ".mseed")

                if os.path.isfile(file):
                    st_temp += read(file)

    # load 24h continuous waveforms only if template exists
    for tt in st_temp:
        station = tt.stats.station
        channel = tt.stats.channel
        file1 = cont_dir + sday + "." + station + "." + channel
        # print(" file1 ===", file1)
        if os.path.isfile(file1):
            st_cont += read(file1)
        else:
            # remove from the template stream if continuous not exists
            st_temp.remove(tt)

    st_cont.filter("bandpass",
                   freqmin=bandpass[0],
                   freqmax=bandpass[1],
                   zerophase=True)

    # define variables
    # st1_temp = st_temp.select(channel=channel[0])
    tt = Trace()
    tc = Trace()
    count = 0
    nmin = 0

    npanels = len(st_temp)
    nfile = len(st_temp)
Beispiel #8
0
def main():

    print()
    print("#########################################")
    print("#        __                 _     _     #")
    print("#  _ __ / _|_ __  _   _    | |__ | | __ #")
    print("# | '__| |_| '_ \| | | |   | '_ \| |/ / #")
    print("# | |  |  _| |_) | |_| |   | | | |   <  #")
    print("# |_|  |_| | .__/ \__, |___|_| |_|_|\_\ #")
    print("#          |_|    |___/_____|           #")
    print("#                                       #")
    print("#########################################")
    print()

    # Run Input Parser
    args = arguments.get_hk_arguments()

    # Load Database
    db = stdb.io.load_db(fname=args.indb)

    # Construct station key loop
    allkeys = db.keys()
    sorted(allkeys)

    # Extract key subset
    if len(args.stkeys) > 0:
        stkeys = []
        for skey in args.stkeys:
            stkeys.extend([s for s in allkeys if skey in s])
    else:
        stkeys = db.keys()
        sorted(stkeys)

    # Loop over station keys
    for stkey in list(stkeys):

        # Extract station information from dictionary
        sta = db[stkey]

        # Define path to see if it exists
        if args.phase in ['P', 'PP', 'allP']:
            datapath = Path('P_DATA') / stkey
        elif args.phase in ['S', 'SKS', 'allS']:
            datapath = Path('S_DATA') / stkey
        if not datapath.is_dir():
            print('Path to ' + str(datapath) + ' doesn`t exist - continuing')
            continue

        # Define save path
        if args.save:
            savepath = Path('HK_DATA') / stkey
            if not savepath.is_dir():
                print('Path to ' + str(savepath) +
                      ' doesn`t exist - creating it')
                savepath.mkdir(parents=True)

        # Get search start time
        if args.startT is None:
            tstart = sta.startdate
        else:
            tstart = args.startT

        # Get search end time
        if args.endT is None:
            tend = sta.enddate
        else:
            tend = args.endT

        if tstart > sta.enddate or tend < sta.startdate:
            continue

        # Temporary print locations
        tlocs = sta.location
        if len(tlocs) == 0:
            tlocs = ['']
        for il in range(0, len(tlocs)):
            if len(tlocs[il]) == 0:
                tlocs[il] = "--"
        sta.location = tlocs

        # Update Display
        print(" ")
        print(" ")
        print("|===============================================|")
        print("|===============================================|")
        print("|                   {0:>8s}                    |".format(
            sta.station))
        print("|===============================================|")
        print("|===============================================|")
        print("|  Station: {0:>2s}.{1:5s}                            |".format(
            sta.network, sta.station))
        print("|      Channel: {0:2s}; Locations: {1:15s}  |".format(
            sta.channel, ",".join(tlocs)))
        print("|      Lon: {0:7.2f}; Lat: {1:6.2f}                |".format(
            sta.longitude, sta.latitude))
        print("|      Start time: {0:19s}          |".format(
            sta.startdate.strftime("%Y-%m-%d %H:%M:%S")))
        print("|      End time:   {0:19s}          |".format(
            sta.enddate.strftime("%Y-%m-%d %H:%M:%S")))
        print("|-----------------------------------------------|")

        rfRstream = Stream()

        datafiles = [x for x in datapath.iterdir() if x.is_dir()]
        for folder in datafiles:

            # Skip hidden folders
            if folder.name.startswith('.'):
                continue

            date = folder.name.split('_')[0]
            year = date[0:4]
            month = date[4:6]
            day = date[6:8]
            dateUTC = UTCDateTime(year + '-' + month + '-' + day)

            if dateUTC > tstart and dateUTC < tend:

                # Load meta data
                metafile = folder / "Meta_Data.pkl"
                if not metafile.is_file():
                    continue
                meta = pickle.load(open(metafile, 'rb'))

                # Skip data not in list of phases
                if meta.phase not in args.listphase:
                    continue

                # QC Thresholding
                if meta.snrh < args.snrh:
                    continue
                if meta.snr < args.snr:
                    continue
                if meta.cc < args.cc:
                    continue

                # # Check bounds on data
                # if meta.slow < args.slowbound[0] and meta.slow > args.slowbound[1]:
                #     continue
                # if meta.baz < args.bazbound[0] and meta.baz > args.bazbound[1]:
                #     continue

                # If everything passed, load the RF data
                filename = folder / "RF_Data.pkl"
                if filename.is_file():
                    file = open(filename, "rb")
                    rfdata = pickle.load(file)
                    rfRstream.append(rfdata[1])
                    file.close()
                if rfdata[0].stats.npts != 1451:
                    print(folder)

        if len(rfRstream) == 0:
            continue

        if args.no_outl:
            t1 = 0.
            t2 = 30.

            varR = []
            for i in range(len(rfRstream)):
                taxis = rfRstream[i].stats.taxis
                tselect = (taxis > t1) & (taxis < t2)
                varR.append(np.var(rfRstream[i].data[tselect]))
            varR = np.array(varR)

            # Remove outliers wrt variance within time range
            medvarR = np.median(varR)
            madvarR = 1.4826 * np.median(np.abs(varR - medvarR))
            robustR = np.abs((varR - medvarR) / madvarR)
            outliersR = np.arange(len(rfRstream))[robustR > 2.5]
            for i in outliersR[::-1]:
                rfRstream.remove(rfRstream[i])

        print('')
        print("Number of radial RF data: " + str(len(rfRstream)))
        print('')

        # Try binning if specified
        if args.calc_dip:
            rf_tmp = binning.bin_baz_slow(rfRstream,
                                          nbaz=args.nbaz + 1,
                                          nslow=args.nslow + 1,
                                          pws=args.pws)
            rfRstream = rf_tmp[0]
        else:
            rf_tmp = binning.bin(rfRstream,
                                 typ='slow',
                                 nbin=args.nslow + 1,
                                 pws=args.pws)
            rfRstream = rf_tmp[0]

        # Get a copy of the radial component and filter
        if args.copy:
            rfRstream_copy = rfRstream.copy()
            rfRstream_copy.filter('bandpass',
                                  freqmin=args.bp_copy[0],
                                  freqmax=args.bp_copy[1],
                                  corners=2,
                                  zerophase=True)

        # Check bin counts:
        for tr in rfRstream:
            if (tr.stats.nbin < args.binlim):
                rfRstream.remove(tr)

        # Continue if stream is too short
        if len(rfRstream) < 5:
            continue

        if args.save_plot and not Path('HK_PLOTS').is_dir():
            Path('HK_PLOTS').mkdir(parents=True)

        print('')
        print("Number of radial RF bins: " + str(len(rfRstream)))
        print('')

        # Filter original stream
        rfRstream.filter('bandpass',
                         freqmin=args.bp[0],
                         freqmax=args.bp[1],
                         corners=2,
                         zerophase=True)

        # Initialize the HkStack object
        try:
            hkstack = HkStack(rfRstream,
                              rfV2=rfRstream_copy,
                              strike=args.strike,
                              dip=args.dip,
                              vp=args.vp)
        except:
            hkstack = HkStack(rfRstream,
                              strike=args.strike,
                              dip=args.dip,
                              vp=args.vp)

        # Update attributes
        hkstack.hbound = args.hbound
        hkstack.kbound = args.kbound
        hkstack.dh = args.dh
        hkstack.dk = args.dk
        hkstack.weights = args.weights

        # Stack with or without dip
        if args.calc_dip:
            hkstack.stack_dip()
        else:
            hkstack.stack()

        # Average stacks
        hkstack.average(typ=args.typ)

        if args.plot:
            hkstack.plot(args.save_plot, args.title, args.form)

        if args.save:
            filename = savepath / (hkstack.rfV1[0].stats.station + \
                ".hkstack."+args.typ+".pkl")

            hkstack.save(file=filename)
Beispiel #9
0
class chunk(object):
	def __init__(self, N, S, L, C, verbose = False):
		self.N          = N
		self.S          = S
		self.L          = L
		self.C          = C
		self._verbose   = verbose

		self._S         = Stream()

	def id(self):
		return "%s.%s.%s.%s" % (self.N, self.S, self.L, self.C)

	def holdings(self):
		print(self._S)
		return

	@staticmethod
	def _syncronize(others, this):
		others.append(this)

		ss = reduce(max, map(lambda st: st[0].stats.starttime, others))
		ee = reduce(min, map(lambda st: st[0].stats.endtime, others))

		for st in others:
			st.trim(ss, ee, nearest_sample = True)

		mask = np.zeros(len(this[0]), dtype=bool)
		for st in others:
			if len(st) > 1: st.merge()
			if not hasattr(st[0].data, "mask"): continue
			mask += st[0].data.mask

		for st in others:
			st[0].data = np.ma.masked_where(mask , st[0].data)

		return

	def _clean(self, s, e = None):
		toremove = []

		for t in self._S:
			if t.stats.endtime < s:
				toremove.append(t)

		for t in toremove:
			self._S.remove(t)

		if e is not None:
			for tr in self._S:
				tr.trim(starttime=s, endtime=e)

		return

	def _update(self, s, e):
		raise NotImplemented("Please Implement Me!")

	def get(self, start, end, others, withgaps = False, incomplete = False,
						nosync = False):

		if isinstance(start, int):
			s, _ = sol_span_in_utc(start)
			_, e = sol_span_in_utc(end)

		else:
			s, e = start, end

		self._update(s, e)

		sliced_t = self._S.slice(s, e, keep_empty_traces = False,
						nearest_sample = True).copy()

		ntraces = len(sliced_t)
		sliced_t = sliced_t.merge(method = 1,
			interpolation_samples = 0,
			fill_value = None)

		if ntraces == 0: return False

		if not incomplete and (sliced_t[0].stats.starttime > s or \
						sliced_t[0].stats.endtime < e):
			log(f" W:> Trace is incomplete: {sliced_t[0].stats.starttime} >" +\
				f" {s} || {sliced_t[0].stats.endtime} < {e}.", level=1,
				verbose=self._verbose)
			return False

		if not withgaps and ntraces > 1:
			if self._verbose: print(" W:> Trace has %d gap%s." % (ntraces - 1,
						 "" if ntraces == 2 else "s"))
			return False

		if not nosync:
			self._syncronize(others, sliced_t)
		else:
			others.append(sliced_t)

		return True
Beispiel #10
0
def main():

    print()
    print("############################################")
    print("#        __                                #")
    print("#  _ __ / _|_ __  _   _     ___ ___ _ __   #")
    print("# | '__| |_| '_ \| | | |   / __/ __| '_ \  #")
    print("# | |  |  _| |_) | |_| |  | (_| (__| |_) | #")
    print("# |_|  |_| | .__/ \__, |___\___\___| .__/  #")
    print("#          |_|    |___/_____|      |_|     #")
    print("#                                          #")
    print("############################################")
    print()

    # Run Input Parser
    args = arguments.get_ccp_arguments()

    # Load Database
    db = stdb.io.load_db(fname=args.indb)

    # Construct station key loop
    allkeys = db.keys()

    # Extract key subset
    if len(args.stkeys) > 0:
        stkeys = []
        for skey in args.stkeys:
            stkeys.extend([s for s in allkeys if skey in s])
    else:
        stkeys = db.keys()

    if args.load:

        # Check if CCPimage object exists and whether overwrite has been set
        load_file = Path('CCP_load.pkl')
        if load_file.is_file() and not args.ovr:
            ccpfile = open(load_file, "rb")
            ccpimage = pickle.load(ccpfile)
            ccpfile.close()

        else:

            print()
            print("|-----------------------------------------------|")
            print("|  Loading data                                 |")
            print("|-----------------------------------------------|")
            print("| Gridding: ")
            print("|     start    = {0:5.1f},{1:6.1f}".format(
                args.coord_start[0], args.coord_start[1]))
            print("|     end      = {0:5.1f},{1:6.1f}".format(
                args.coord_end[0], args.coord_end[1]))
            print("|     dz    = {0} (km)".format(str(args.dz)))
            print("|     dx    = {0} (km)".format(str(args.dx)))
            print()

            # Initialize CCPimage object
            ccpimage = CCPimage(coord_start=args.coord_start,
                                coord_end=args.coord_end,
                                dz=args.dz,
                                dx=args.dx)

            # Loop over station keys
            for stkey in list(stkeys):

                # Extract station information from dictionary
                sta = db[stkey]

                # Define path to see if it exists
                if args.phase in ['P', 'PP', 'allP']:
                    datapath = Path('P_DATA') / stkey
                elif args.phase in ['S', 'SKS', 'allS']:
                    datapath = Path('S_DATA') / stkey
                if not datapath.is_dir():
                    print('Path to ' + str(datapath) +
                          ' doesn`t exist - continuing')
                    continue

                # Temporary print locations
                tlocs = sta.location
                if len(tlocs) == 0:
                    tlocs = ['']
                for il in range(0, len(tlocs)):
                    if len(tlocs[il]) == 0:
                        tlocs[il] = "--"
                sta.location = tlocs

                rfRstream = Stream()

                datafiles = [x for x in datapath.iterdir() if x.is_dir()]
                for folder in datafiles:

                    # Skip hidden folders
                    if folder.name.startswith('.'):
                        continue

                    # Load meta data
                    filename = folder / "Meta_Data.pkl"
                    if not filename.is_file():
                        continue
                    metafile = open(filename, 'rb')
                    meta = pickle.load(metafile)
                    metafile.close()

                    # Skip data not in list of phases
                    if meta.phase not in args.listphase:
                        continue

                    # QC Thresholding
                    if meta.snrh < args.snrh:
                        continue
                    if meta.snr < args.snr:
                        continue
                    if meta.cc < args.cc:
                        continue

                    # If everything passed, load the RF data
                    filename = folder / "RF_Data.pkl"
                    if filename.is_file():
                        file = open(filename, "rb")
                        rfdata = pickle.load(file)
                        rfRstream.append(rfdata[1])
                        file.close()

                if len(rfRstream) == 0:
                    continue

                if args.no_outl:
                    t1 = 0.
                    t2 = 30.

                    varR = []
                    for i in range(len(rfRstream)):
                        taxis = rfRstream[i].stats.taxis
                        tselect = (taxis > t1) & (taxis < t2)
                        varR.append(np.var(rfRstream[i].data[tselect]))
                    varR = np.array(varR)

                    # Remove outliers wrt variance within time range
                    medvarR = np.median(varR)
                    madvarR = 1.4826 * np.median(np.abs(varR - medvarR))
                    robustR = np.abs((varR - medvarR) / madvarR)
                    outliersR = np.arange(len(rfRstream))[robustR > 2.5]
                    for i in outliersR[::-1]:
                        rfRstream.remove(rfRstream[i])

                print("Station: {0:>2s}.{1:5s} -  {2} traces loaded".format(
                    sta.network, sta.station, len(rfRstream)))
                if len(rfRstream) == 0:
                    continue

                ccpimage.add_rfstream(rfRstream)

            if len(ccpimage.radialRF) > 0:
                ccpimage.save("CCP_load.pkl")
                ccpimage.is_ready_for_prep = True
                print()
                print("CCPimage saved to 'CCP_load.pkl'")
            else:
                ccpimage.is_ready_for_prep = False
    else:
        pass

    if args.prep:

        prep_file = Path("CCP_prep.pkl")
        if prep_file.is_file() and not args.ovr:
            ccpfile = open(prep_file, 'rb')
            ccpimage = pickle.load(ccpfile)
            ccpfile.close()
        else:
            load_file = Path('CCP_load.pkl')
            if not load_file.is_file():
                raise (Exception("No CCP_load.pkl file available - aborting"))
            else:
                print()
                print("|-----------------------------------------------|")
                print("|  Preparing data before stacking               |")
                print("|-----------------------------------------------|")
                print("| Frequencies: ")
                print("|     f1    = {0:4.2f} (Hz)".format(args.f1))
                print("|     f2ps  = {0:4.2f} (Hz)".format(args.f2ps))
                print("|     f2pps = {0:4.2f} (Hz)".format(args.f2pps))
                print("|     f2pss = {0:4.2f} (Hz)".format(args.f2pss))
                print("| Binning: ")
                print("|     nbaz  = {0}".format(str(args.nbaz)))
                print("|     nslow = {0}".format(str(args.nslow)))
                print()

                ccpfile = open(load_file, "rb")
                ccpimage = pickle.load(ccpfile)
                ccpfile.close()
                ccpimage.prep_data(f1=args.f1,
                                   f2ps=args.f2ps,
                                   f2pps=args.f2pps,
                                   f2pss=args.f2pss,
                                   nbaz=args.nbaz,
                                   nslow=args.nslow)
                ccpimage.is_ready_for_prestack = True
                ccpimage.save(prep_file)
                print()
                print("CCPimage saved to {0}".format(str(prep_file)))

    else:
        pass

    if args.prestack:

        prestack_file = Path("CCP_prestack.pkl")
        if prestack_file.is_file() and not args.ovr:
            ccpfile = open(prestack_file, 'rb')
            ccpimage = pickle.load(ccpfile)
            ccpfile.close()
        else:
            prep_file = Path("CCP_prep.pkl")
            if not prep_file.is_file():
                raise (Exception("No CCP_prep.pkl file available - aborting"))
            else:
                print()
                print("|-----------------------------------------------|")
                print("|  CCP pre-stacking each phase                  |")
                print("|-----------------------------------------------|")
                print()

                ccpfile = open(prep_file, 'rb')
                ccpimage = pickle.load(ccpfile)
                ccpfile.close()
                ccpimage.prestack()
                ccpimage.save(prestack_file)
                print()
                print("CCPimage saved to {0}".format(str(prestack_file)))

    else:
        pass

    if args.ccp:

        ccp_file = Path("CCP_stack.pkl")
        if ccp_file.is_file() and not args.ovr:
            ccpfile = open(ccp_file, 'rb')
            ccpimage = pickle.load(ccpfile)
            ccpfile.close()
        else:
            prestack_file = Path("CCP_prestack.pkl")
            if not prestack_file.is_file():
                raise (
                    Exception("No CCP_prestack.pkl file available - aborting"))
            else:
                if args.linear:
                    print()
                    print("|-----------------------------------------------|")
                    print("|  Linear CCP stack - all phases                |")
                    print("|-----------------------------------------------|")
                    print()
                elif args.pws:
                    print()
                    print("|-----------------------------------------------|")
                    print("|  Phase-weighted CCP stack - all phases        |")
                    print("|-----------------------------------------------|")
                    print()

                ccpfile = open(prestack_file, 'rb')
                ccpimage = pickle.load(ccpfile)
                ccpfile.close()
                ccpimage.ccp()
                if args.linear:
                    if args.weights:
                        ccpimage.weights = args.weights
                    ccpimage.linear_stack(typ='ccp')
                elif args.pws:
                    if args.weights:
                        ccpimage.weights = args.weights
                    ccpimage.phase_weighted_stack(typ='ccp')
                ccpimage.save(ccp_file)
                print()
                print("CCPimage saved to {0}".format(str(ccp_file)))

        if args.ccp_figure:
            ccpimage.plot_ccp(save=args.save_figure,
                              fmt=args.fmt,
                              vmin=-1. * args.cbound,
                              vmax=args.cbound,
                              title=args.title)

    else:
        pass

    if args.gccp:

        gccp_file = Path("GCCP_stack.pkl")
        if gccp_file.is_file() and not args.ovr:
            ccpfile = open(gccp_file, 'rb')
            ccpimage = pickle.load(ccpfile)
            ccpfile.close()
        else:
            prestack_file = Path("CCP_prestack.pkl")
            if not prestack_file.is_file():
                raise (
                    Exception("No CCP_prestack.pkl file available - aborting"))
            else:
                if args.linear:
                    print()
                    print("|-----------------------------------------------|")
                    print("|  Linear GCCP stack - all phases               |")
                    print("|-----------------------------------------------|")
                    print()
                elif args.pws:
                    print()
                    print("|-----------------------------------------------|")
                    print("|  Phase-weighted GCCP stack - all phases       |")
                    print("|-----------------------------------------------|")
                    print()

                ccpfile = open(prestack_file, 'rb')
                ccpimage = pickle.load(ccpfile)
                ccpfile.close()
                ccpimage.gccp(wlen=args.wlen)
                if args.linear:
                    if args.weights:
                        ccpimage.weights = args.weights
                    ccpimage.linear_stack(typ='gccp')
                elif args.pws:
                    if args.weights:
                        ccpimage.weights = args.weights
                    ccpimage.phase_weighted_stack(typ='gccp')
                ccpimage.save(gccp_file)
                print()
                print("CCPimage saved to {0}".format(str(gccp_file)))

        if args.ccp_figure:
            ccpimage.plot_gccp(save=args.save_figure,
                               fmt=args.fmt,
                               vmin=-1. * args.cbound,
                               vmax=args.cbound,
                               title=args.title)

    else:
        pass
Beispiel #11
0
def preprocess(db, stations, comps, goal_day, params, responses=None):

    datafiles = {}
    output = Stream()
    for station in stations:
        datafiles[station] = {}
        net, sta = station.split('.')
        gd = datetime.datetime.strptime(goal_day, '%Y-%m-%d')
        files = get_data_availability(
            db, net=net, sta=sta, starttime=gd, endtime=gd)
        for comp in comps:
            datafiles[station][comp] = []
        for file in files:
            if file.comp[-1] not in comps:
                continue
            fullpath = os.path.join(file.path, file.file)
            datafiles[station][file.comp[-1]].append(fullpath)

    for istation, station in enumerate(stations):
        net, sta = station.split(".")
        for comp in comps:
            files = eval("datafiles['%s']['%s']" % (station, comp))
            if len(files) != 0:
                logging.debug("%s.%s Reading %i Files" %
                              (station, comp, len(files)))
                stream = Stream()
                for file in sorted(files):
                    st = read(file, dytpe=np.float,
                              starttime=UTCDateTime(gd),
                              endtime=UTCDateTime(gd)+86400)
                    tmp = st.select(network=net, station=sta, component=comp)
                    if not len(tmp):
                        for tr in st:
                            tr.stats.network = net
                        st = st.select(network=net, station=sta, component=comp)
                    else:
                        st = tmp
                    for tr in st:
                        tr.data = tr.data.astype(np.float)
                    stream += st
                    del st
                stream.sort()
                stream.merge(method=1, interpolation_samples=3, fill_value=None)
                stream = stream.split()

                logging.debug("Checking sample alignment")
                for i, trace in enumerate(stream):
                    stream[i] = check_and_phase_shift(trace)

                logging.debug("Checking Gaps")
                if len(getGaps(stream)) > 0:
                    max_gap = 10
                    only_too_long = False
                    while getGaps(stream) and not only_too_long:
                        too_long = 0
                        gaps = getGaps(stream)
                        for gap in gaps:
                            if int(gap[-1]) <= max_gap:
                                stream[gap[0]] = stream[gap[0]].__add__(stream[gap[1]], method=1,
                                                                        fill_value="interpolate")
                                stream.remove(stream[gap[1]])
                                break
                            else:
                                too_long += 1
                        if too_long == len(gaps):
                            only_too_long = True
                stream = stream.split()
                taper_length = 20.0  # seconds
                for trace in stream:
                    if trace.stats.npts < 4 * taper_length * trace.stats.sampling_rate:
                        stream.remove(trace)
                    else:
                        trace.detrend(type="demean")
                        trace.detrend(type="linear")
                        trace.taper(max_percentage=None, max_length=1.0)

                if not len(stream):
                    logging.debug(" has only too small traces, skipping...")
                    continue

                for trace in stream:
                    logging.debug(
                        "%s.%s Highpass at %.2f Hz" % (station, comp, params.preprocess_highpass))
                    trace.filter("highpass", freq=params.preprocess_highpass, zerophase=True)

                    if trace.stats.sampling_rate != params.goal_sampling_rate:
                        logging.debug(
                            "%s.%s Lowpass at %.2f Hz" % (station, comp, params.preprocess_lowpass))
                        trace.filter("lowpass", freq=params.preprocess_lowpass, zerophase=True, corners=8)

                        if params.resampling_method == "Resample":
                            logging.debug("%s.%s Downsample to %.1f Hz" %
                                          (station, comp, params.goal_sampling_rate))
                            trace.data = resample(
                                trace.data, params.goal_sampling_rate / trace.stats.sampling_rate, 'sinc_fastest')

                        elif params.resampling_method == "Decimate":
                            decimation_factor = trace.stats.sampling_rate / params.goal_sampling_rate
                            if not int(decimation_factor) == decimation_factor:
                                logging.warning("%s.%s CANNOT be decimated by an integer factor, consider using Resample or Lanczos methods"
                                                " Trace sampling rate = %i ; Desired CC sampling rate = %i" %
                                                (station, comp, trace.stats.sampling_rate, params.goal_sampling_rate))
                                sys.stdout.flush()
                                sys.exit()
                            logging.debug("%s.%s Decimate by a factor of %i" %
                                          (station, comp, decimation_factor))
                            trace.data = trace.data[::int(decimation_factor)]

                        elif params.resampling_method == "Lanczos":
                            logging.debug("%s.%s Downsample to %.1f Hz" %
                                          (station, comp, params.goal_sampling_rate))
                            trace.data = np.array(trace.data)
                            trace.interpolate(method="lanczos", sampling_rate=params.goal_sampling_rate, a=1.0)

                        trace.stats.sampling_rate = params.goal_sampling_rate

                if get_config(db, 'remove_response', isbool=True):
                    logging.debug('%s Removing instrument response'%stream[0].id)
                    response_prefilt = eval(get_config(db, 'response_prefilt'))

                    response = responses[responses["channel_id"] == stream[0].id]
                    if len(response) > 1:
                        response = response[response["start_date"]<UTCDateTime(gd)]
                        response = response[response["end_date"]>UTCDateTime(gd)]
                    elif len(response) == 0:
                        logging.info("No instrument response information "
                                     "for %s, exiting" % stream[0].id)
                        sys.exit()
                    datalesspz = response["paz"].values[0]
                    stream.simulate(paz_remove=datalesspz,
                                    remove_sensitivity=True,
                                    pre_filt=response_prefilt,
                                    paz_simulate=None, )
                for tr in stream:
                    tr.data = tr.data.astype(np.float32)
                output += stream
                del stream
            del files
    clean_scipy_cache()
    return 0, output
def get_waveforms(
        eqinfo: Event,
        META,
        wp_tw_factor=15,
        t_beforeP=1500.,  # seconds
        t_afterWP=60.,  # seconds
        client=None,
        dist_range=(5., 90.),  # degrees
        add_ptime=True,
        bulk_chunk_len=200,
        prune_cutoffs=(1., 2., 3., 4., 5., 5.),
        decimate=True,
        reject_incomplete=False,
        req_times=None,
        waveforms=None,
        save_path=None):
    '''
    This function will get the waveforms associated with an event to
    perform a WP inversion.

    :param float wp_tw_factor: Defines the Wphase time window. this is used as
        :math:`[t_p, t_p + wp_tw_factor Delta]`.
    :param float dist_range: A pair of floats specifying the min/max epicentral
        distance (in degrees) to be considered; i.e. stations outside of this
        range will be excluded.
    :param bool add_ptime: Should the p arrival time (with respect to the origin
        time) be included in each stations metadata. Including this should
        result in a significant speedup.
    :param int bulk_chunk_len: Maximum number of entries (channels) to be
        included in a single request to a metadata service.
    :param prune_cutoffs: List of pruning (distance) cutoffs passed to
        :py:func:`station_pruningNEZ` :type prune_cutoffs: list of floats
    :param bool decimate: Perform decimation of BH channels. **This should
        always be true when running Wphase**.
    :param dict req_times: Dictionary keyed by channel id containing the start
        and end times of the time window required by Wphase.
    :param client: An Obspy FDSN client.

    :return: If *add_ptime* is *True*, then return a two element tuple containing:
        #. An :py:class:`obspy.core.stream.Stream` containing the data.
        #. A new metadata dictionary containing the p arrival time for each trace.

        If *add_ptime* is *False* return the stream only.
    '''

    # Obtaining stations within the distance range only. trlist_in_dist_range will contain
    # the ids of channels which are within the specified distance range.
    trlist_in_dist_range = []
    tr_dists_in_range = []
    for trid, stmeta in META.items():
        stlat, stlon = stmeta["latitude"], stmeta["longitude"]
        dist = locations2degrees(eqinfo.latitude, eqinfo.longitude, stlat,
                                 stlon)
        if dist >= dist_range[0] and dist <= dist_range[1]:
            trlist_in_dist_range.append(trid)
            tr_dists_in_range.append(dist)

    # Determining times for the waveform request. req_times will contain time series
    # window for each channel.
    # {
    #   <channel-id>: (<obspy.core.utcdatetime.UTCDateTime>, <obspy.core.utcdatetime.UTCDateTime>)
    # }
    #
    # new_META:
    # {
    #   <channel-id>: original metadata for channel and extra key 'ptime'
    # }
    if req_times is None:
        req_times = {}
        if add_ptime:
            new_META = {}
        for i_trid, trid in enumerate(trlist_in_dist_range):
            stmeta = META[trid]
            dist = tr_dists_in_range[i_trid]
            t_p = getPtime(dist, eqinfo.depth)
            t_p_UTC = eqinfo.time + t_p
            t_wp_end = wp_tw_factor * dist
            t_wp_end_UTC = t_p_UTC + t_wp_end
            t1 = t_p_UTC - t_beforeP
            t2 = t_wp_end_UTC + t_afterWP
            if add_ptime:
                new_META[trid] = stmeta
                new_META[trid]['ptime'] = t_p
            req_times[trid] = [t1, t2]

    # ---------------get waveforms--------------
    # Station pruning
    if prune_cutoffs is not None:
        trlist_in_dist_range = station_pruningNEZ(trlist_in_dist_range,
                                                  new_META, prune_cutoffs)

    #reject incomplete traces:
    if reject_incomplete:
        now = UTCDateTime()
        trlist_in_dist_range = [
            trid for trid in trlist_in_dist_range if req_times[trid][1] < now
        ]

    if waveforms:
        # waveforms provided as input, just clean them
        if isinstance(waveforms, str):
            waveforms = obspy.read(waveforms)
        logger.info('%d traces provided as input', len(waveforms))
        st = waveforms
    else:
        # fetch waveforms from server
        logger.info('fetching data from %s', client.base_url)
        st = Stream()

        # Create the subsets for each request
        bulk = []
        for trid in trlist_in_dist_range:
            net, sta, loc, cha = trid.split('.')
            t1, t2 = req_times[trid]
            bulk.append([net, sta, loc, cha, t1, t2])
        bulk_chunks = [
            bulk[i_chunk:i_chunk + bulk_chunk_len]
            for i_chunk in range(0, len(bulk), bulk_chunk_len)
        ]

        # make a call for each subset
        # TODO: One might want to do this in parallel.
        for chunk in bulk_chunks:
            # TODO: Do want to try/catch here?
            try:
                st += client.get_waveforms_bulk(chunk)
            except Exception as e:
                logger.error('Problem with request from server %s:\n%s',
                             client.base_url, str(e))
                continue
    if save_path:
        logger.info("Saving waveforms in %s", save_path)
        st.write(save_path, format='MSEED')

    # Removing gappy traces (that is channel ids that are repeated)
    st = remove_gappy_traces(st)
    logger.info('%s traces remaining after throwing out gappy ones', len(st))

    # Decimating BH channels. This can be done in parallel.
    if decimate:
        st_B_cha_list = [tr for tr in st if tr.id.split('.')[-1][0] == 'B']
        for tr in st_B_cha_list:
            try:
                decimateTo1Hz(tr)
            except CannotDecimate as e:
                logger.info("Removing trace %s - %s", tr.id, e)
                st.remove(tr)

        # Creating contigous arrays for the traces. This may speed up things later.
        for tr in st:
            tr.data = np.ascontiguousarray(tr.data)

    if add_ptime:
        return st, new_META
    else:
        return st
Beispiel #13
0
def GetData(
        eqinfo,
        META,
        wp_tw_factor=15,
        t_beforeP=1500., # seconds
        t_afterWP=60.,   # seconds
        server="http://rhe-eqm-seiscomp-dev.dev.lan:8081/",
        dist_range=(5., 90.), # degrees
        add_ptime=True,
        bulk_chunk_len=200,
        prune_cutoffs=(1., 2., 3., 4., 5., 5.),
        decimate=True,
        reject_incomplete=False,
        req_times=None):
    '''
    This function will get the waveforms associated with an event to
    perform a WP inversion.

    :param float wp_tw_factor: Defines the Wphase time window. this is used as
        :math:`[t_p, t_p + wp\_tw\_factor \Delta]`.
    :param float dist_range: A pair of floats specifying the min/max epicentral
        distance (in degrees) to be considered; i.e. stations outside of this
        range will be excluded.
    :param bool add_ptime: Should the p arrival time (with respect to the origin
        time) be included in each stations metadata. Including this should
        result in a significant speedup.
    :param int bulk_chunk_len: Maximum number of entries (channels) to be
        included in a single request to a metadata service.
    :param prune_cutoffs: List of pruning (distance) cutoffs passed to
        :py:func:`station_pruningNEZ` :type prune_cutoffs: list of floats
    :param bool decimate: Perform decimation of BH channels. **This should
        always be true when running Wphase**.
    :param dict req_times: Dictionary keyed by channel id containing the start
        and end times of the time window required by Wphase.

    :return: If *add_ptime* is *True*, then return a two element tuple containing:
        #. An :py:class:`obspy.core.stream.Stream` containing the data.
        #. A new metadata dictionary containing the p arrival time for each trace.

        If *add_ptime* is *False* return the stream only.
    '''

    # Sampling rates we will decimate. Channels with other sampling rates will be ignored.
    decimable_BH_ch = [20., 40., 50.]

    client = Client(server)

    hyplat = eqinfo['lat']
    hyplon = eqinfo['lon']
    hypdep = eqinfo['dep']
    otime = eqinfo['time']

    # Obtaining stations within the distance range only. trlist_in_dist_range will contain
    # the ids of channels which are within the specified distance range.
    trlist_in_dist_range = []
    tr_dists_in_range = []
    for trid, stmeta in META.iteritems():
        stlat, stlon = stmeta['latitude'], stmeta['longitude']
        dist = locations2degrees(hyplat, hyplon, stlat, stlon)
        if dist >= dist_range[0] and dist <= dist_range[1]:
            trlist_in_dist_range.append(trid)
            tr_dists_in_range.append(dist)

    # Determining times for the waveform request. req_times will contain time series
    # window for each channel.
    # {
    #   <channel-id>: (<obspy.core.utcdatetime.UTCDateTime>, <obspy.core.utcdatetime.UTCDateTime>)
    # }
    #
    # new_META:
    # {
    #   <channel-id>: original metadata for channel and extra key 'ptime'
    # }
    if req_times is None:
        req_times = {}
        if add_ptime:
            new_META = {}
        for i_trid, trid in enumerate(trlist_in_dist_range):
            stmeta = META[trid]
            dist = tr_dists_in_range[i_trid]
            t_p = getPtime(dist, hypdep)
            t_p_UTC = otime + t_p
            t_wp_end = wp_tw_factor*dist
            t_wp_end_UTC = t_p_UTC + t_wp_end
            t1 = t_p_UTC - t_beforeP
            t2 = t_wp_end_UTC + t_afterWP
            if add_ptime:
                new_META[trid] = stmeta
                new_META[trid]['ptime'] = t_p
            req_times[trid] = [t1, t2]

    # ---------------get waveforms--------------
    # Station pruning
    if prune_cutoffs is not None:
        trlist_in_dist_range = station_pruningNEZ(
            trlist_in_dist_range,
            new_META, prune_cutoffs)

    #reject incomplete traces:
    if reject_incomplete:
        now = UTCDateTime()
        trlist_in_dist_range = [trid for trid in trlist_in_dist_range
            if req_times[trid][1] < now]
    st = Stream()

    # Create the subsets for each request
    bulk = []
    for trid in trlist_in_dist_range:
        net, sta, loc, cha = trid.split('.')
        t1, t2 = req_times[trid]
        bulk.append([net, sta, loc, cha, t1, t2])
    bulk_chunks = [bulk[i_chunk:i_chunk + bulk_chunk_len]
                   for i_chunk in xrange(0, len(bulk), bulk_chunk_len)]

    # make a call for each subset
    # TODO: One might want to do this in parallel.
    for chunk in bulk_chunks:
        # TODO: Do want to try/catch here?
        try:
            st += client.get_waveforms_bulk(chunk)
        except Exception as e:
            print 'Problem with request from server: {}'.format(server)
            print e
            continue

    # Removing gappy traces (that is channel ids that are repeated)
    trlist_data = [tr.id for tr in st]
    rep_ids = [trid for trid, nrep in Counter(trlist_data).items()
               if nrep > 1]
    st = Stream(tr for tr in st if tr.id not in rep_ids)

    # Decimating BH channels. This can be done in parallel.
    if decimate:
        st_B_cha_list = [tr for tr in st if tr.id.split('.')[-1][0] == 'B']
        for tr in st_B_cha_list:
            samp_rate = tr.stats.sampling_rate
            if samp_rate not in decimable_BH_ch:
                st.remove(tr)
                continue
            if samp_rate == 20.:
                tr = dec20to1(tr, fast=True)
            elif samp_rate == 40:
                tr = dec40to1(tr, fast=True)
            elif samp_rate == 50:
                tr = dec50to1(tr, fast=True)

        # Creating contigous arrays for the traces. This may speed up things later.
        for tr in st:
            tr.data = np.ascontiguousarray(tr.data)

    if add_ptime:
        return st, new_META
    else:
        return st
Beispiel #14
0
snpts = 10

## Read in seismic data ##
st = Stream()
for day in range(stime.julday, etime.julday):
    for staloc in stalocs:
        try:
            sta, loc = staloc.split('_')
            st += read('/msd/' + net + '_' + sta + '/' + str(stime.year) +
                       '/' + str(stime.julday) + '/' + loc + '_LH*')
        except:
            pass

# Remove the vertical
for tr in st.select(channel="LHZ"):
    st.remove(tr)
st.merge(fill_value=0)
st.sort()
if debug:
    print(st)

## Creating Figure ##
fig = plt.figure(1, figsize=(19, 10))
plt.subplots_adjust(hspace=0.0)

## Setting font parameters ##
mpl.rc('font', family='serif')
mpl.rc('font', serif='Times')
mpl.rc('text', usetex=True)
mpl.rc('font', size=20)
Beispiel #15
0
def preprocess(db, stations, comps, goal_day, params, responses=None):
    """
    Fetches data for each ``stations`` and each ``comps`` using the
    data_availability table in the database.

    To correct for instrument responses, make sure to set ``remove_response``
    to "Y" in the config and to provide the ``responses`` DataFrame.

    :Example:

    >>> from msnoise.api import connect, get_params, preload_instrument_responses
    >>> from msnoise.preprocessing import preprocess
    >>> db = connect()
    >>> params = get_params(db)
    >>> responses = preload_instrument_responses(db)
    >>> st = preprocess(db, ["YA.UV06","YA.UV10"], ["Z",], "2010-09-01", params, responses)
    >>> st
     2 Trace(s) in Stream:
    YA.UV06.00.HHZ | 2010-09-01T00:00:00.000000Z - 2010-09-01T23:59:59.950000Z | 20.0 Hz, 1728000 samples
    YA.UV10.00.HHZ | 2010-09-01T00:00:00.000000Z - 2010-09-01T23:59:59.950000Z | 20.0 Hz, 1728000 samples

    :type db: :class:`sqlalchemy.orm.session.Session`
    :param db: A :class:`~sqlalchemy.orm.session.Session` object, as
        obtained by :func:`msnoise.api.connect`.
    :type stations: list of str
    :param stations: a list of station names, in the format NET.STA.
    :type comps: list of str
    :param comps: a list of component names, in Z,N,E,1,2.
    :type goal_day: str
    :param goal_day: the day of data to load, ISO 8601 format: e.g. 2016-12-31.
    :type params: class
    :param params: an object containing the config parameters, as obtained by
        :func:`msnoise.api.get_params`.
    :type responses: :class:`pandas.DataFrame`
    :param responses: a DataFrame containing the instrument responses, as
        obtained by :func:`msnoise.api.preload_instrument_responses`.
    :rtype: :class:`obspy.core.stream.Stream`
    :return: A Stream object containing all traces.
    """
    datafiles = {}
    output = Stream()
    for station in stations:
        datafiles[station] = {}
        net, sta = station.split('.')
        gd = datetime.datetime.strptime(goal_day, '%Y-%m-%d')
        files = get_data_availability(
            db, net=net, sta=sta, starttime=gd, endtime=gd)
        for comp in comps:
            datafiles[station][comp] = []
        for file in files:
            if file.comp[-1] not in comps:
                continue
            fullpath = os.path.join(file.path, file.file)
            datafiles[station][file.comp[-1]].append(fullpath)

    for istation, station in enumerate(stations):
        net, sta = station.split(".")
        for comp in comps:
            files = eval("datafiles['%s']['%s']" % (station, comp))
            if len(files) != 0:
                logger.debug("%s.%s Reading %i Files" %
                              (station, comp, len(files)))
                stream = Stream()
                for file in sorted(files):
                    try:
                        st = read(file, dytpe=np.float,
                              starttime=UTCDateTime(gd),
                              endtime=UTCDateTime(gd)+86400)
                    except:
                        logger.debug("ERROR reading file %s" % file)
                        continue
                    for tr in st:
                        if len(tr.stats.channel) == 2:
                            tr.stats.channel += tr.stats.location
                            tr.stats.location = "00"
                    tmp = st.select(network=net, station=sta, component=comp)
                    if not len(tmp):
                        for tr in st:
                            tr.stats.network = net
                        st = st.select(network=net, station=sta, component=comp)
                    else:
                        st = tmp
                    for tr in st:
                        tr.data = tr.data.astype(np.float)
                        tr.stats.network = tr.stats.network.upper()
                        tr.stats.station = tr.stats.station.upper()
                        tr.stats.channel = tr.stats.channel.upper()

                    stream += st
                    del st
                stream.sort()
                try:
                    # HACK not super clean... should find a way to prevent the
                    # same trace id with different sps to occur
                    stream.merge(method=1, interpolation_samples=3, fill_value=None)
                except:
                    logger.debug("Error while merging...")
                    traceback.print_exc()
                    continue
                stream = stream.split()
                if not len(stream):
                    continue
                logger.debug("%s Checking sample alignment" % stream[0].id)
                for i, trace in enumerate(stream):
                    stream[i] = check_and_phase_shift(trace)

                logger.debug("%s Checking Gaps" % stream[0].id)
                if len(getGaps(stream)) > 0:
                    max_gap = params.preprocess_max_gap*stream[0].stats.sampling_rate

                    gaps = getGaps(stream)
                    while len(gaps):
                        too_long = 0
                        for gap in gaps:
                            if int(gap[-1]) <= max_gap:
                                try:
                                    stream[gap[0]] = stream[gap[0]].__add__(stream[gap[1]], method=1,
                                                                        fill_value="interpolate")
                                    stream.remove(stream[gap[1]])
                                except:
                                    stream.remove(stream[gap[1]])

                                break
                            else:
                                too_long += 1

                        if too_long == len(gaps):
                            break
                        gaps = getGaps(stream)
                    del gaps

                stream = stream.split()
                for tr in stream:
                    if tr.stats.sampling_rate < (params.goal_sampling_rate-1):
                        stream.remove(tr)
                taper_length = 20.0  # seconds
                for trace in stream:
                    if trace.stats.npts < 4 * taper_length * trace.stats.sampling_rate:
                        stream.remove(trace)
                    else:
                        trace.detrend(type="demean")
                        trace.detrend(type="linear")
                        trace.taper(max_percentage=None, max_length=1.0)

                if not len(stream):
                    logger.debug(" has only too small traces, skipping...")
                    continue

                for trace in stream:
                    logger.debug(
                        "%s Highpass at %.2f Hz" % (trace.id, params.preprocess_highpass))
                    trace.filter("highpass", freq=params.preprocess_highpass, zerophase=True, corners=4)

                    if trace.stats.sampling_rate != params.goal_sampling_rate:
                        logger.debug(
                            "%s Lowpass at %.2f Hz" % (trace.id, params.preprocess_lowpass))
                        trace.filter("lowpass", freq=params.preprocess_lowpass, zerophase=True, corners=8)

                        if params.resampling_method == "Resample":
                            logger.debug("%s Downsample to %.1f Hz" %
                                          (trace.id, params.goal_sampling_rate))
                            trace.data = resample(
                                trace.data, params.goal_sampling_rate / trace.stats.sampling_rate, 'sinc_fastest')

                        elif params.resampling_method == "Decimate":
                            decimation_factor = trace.stats.sampling_rate / params.goal_sampling_rate
                            if not int(decimation_factor) == decimation_factor:
                                logger.warning("%s CANNOT be decimated by an integer factor, consider using Resample or Lanczos methods"
                                                " Trace sampling rate = %i ; Desired CC sampling rate = %i" %
                                                (trace.id, trace.stats.sampling_rate, params.goal_sampling_rate))
                                sys.stdout.flush()
                                sys.exit()
                            logger.debug("%s Decimate by a factor of %i" %
                                          (trace.id, decimation_factor))
                            trace.data = trace.data[::int(decimation_factor)]

                        elif params.resampling_method == "Lanczos":
                            logger.debug("%s Downsample to %.1f Hz" %
                                          (trace.id, params.goal_sampling_rate))
                            trace.data = np.array(trace.data)
                            trace.interpolate(method="lanczos", sampling_rate=params.goal_sampling_rate, a=1.0)

                        trace.stats.sampling_rate = params.goal_sampling_rate
                    del trace

                if params.remove_response:
                    logger.debug('%s Removing instrument response'%stream[0].id)

                    response = responses[responses["channel_id"] == stream[0].id]
                    if len(response) > 1:
                        response = response[response["start_date"] <= UTCDateTime(gd)]
                    if len(response) > 1:
                        response = response[response["end_date"] >= UTCDateTime(gd)]
                    elif len(response) == 0:
                        logger.info("No instrument response information "
                                     "for %s, skipping" % stream[0].id)
                        continue
                    try:
                        datalesspz = response["paz"].values[0]
                    except:
                        logger.error("Bad instrument response information "
                                      "for %s, skipping" % stream[0].id)
                        continue
                    stream.simulate(paz_remove=datalesspz,
                                    remove_sensitivity=True,
                                    pre_filt=params.response_prefilt,
                                    paz_simulate=None, )
                for tr in stream:
                    tr.data = tr.data.astype(np.float32)
                output += stream
                del stream
            del files
    clean_scipy_cache()
    return output
Beispiel #16
0
def main():

    print()
    print(
        "################################################################################"
    )
    print(
        "#        __                 _                                      _           #"
    )
    print(
        "#  _ __ / _|_ __  _   _    | |__   __ _ _ __ _ __ ___   ___  _ __ (_) ___ ___  #"
    )
    print(
        "# | '__| |_| '_ \| | | |   | '_ \ / _` | '__| '_ ` _ \ / _ \| '_ \| |/ __/ __| #"
    )
    print(
        "# | |  |  _| |_) | |_| |   | | | | (_| | |  | | | | | | (_) | | | | | (__\__ \ #"
    )
    print(
        "# |_|  |_| | .__/ \__, |___|_| |_|\__,_|_|  |_| |_| |_|\___/|_| |_|_|\___|___/ #"
    )
    print(
        "#          |_|    |___/_____|                                                  #"
    )
    print(
        "#                                                                              #"
    )
    print(
        "################################################################################"
    )
    print()

    # Run Input Parser
    (opts, indb) = options.get_harmonics_options()

    # Load Database
    db = stdb.io.load_db(fname=indb)

    # Construct station key loop
    allkeys = db.keys()
    sorted(allkeys)

    # Extract key subset
    if len(opts.stkeys) > 0:
        stkeys = []
        for skey in opts.stkeys:
            stkeys.extend([s for s in allkeys if skey in s])
    else:
        stkeys = db.keys()
        sorted(stkeys)

    # Loop over station keys
    for stkey in list(stkeys):

        # Extract station information from dictionary
        sta = db[stkey]

        # Define path to see if it exists
        datapath = 'DATA/' + stkey
        if not os.path.isdir(datapath):
            raise (Exception('Path to ' + datapath +
                             ' doesn`t exist - aborting'))

        # Get search start time
        if opts.startT is None:
            tstart = sta.startdate
        else:
            tstart = opts.startT

        # Get search end time
        if opts.endT is None:
            tend = sta.enddate
        else:
            tend = opts.endT

        if tstart > sta.enddate or tend < sta.startdate:
            continue

        # Temporary print locations
        tlocs = sta.location
        if len(tlocs) == 0:
            tlocs = ['']
        for il in range(0, len(tlocs)):
            if len(tlocs[il]) == 0:
                tlocs[il] = "--"
        sta.location = tlocs

        # Update Display
        print(" ")
        print(" ")
        print("|===============================================|")
        print("|===============================================|")
        print("|                   {0:>8s}                    |".format(
            sta.station))
        print("|===============================================|")
        print("|===============================================|")
        print("|  Station: {0:>2s}.{1:5s}                            |".format(
            sta.network, sta.station))
        print("|      Channel: {0:2s}; Locations: {1:15s}  |".format(
            sta.channel, ",".join(tlocs)))
        print("|      Lon: {0:7.2f}; Lat: {1:6.2f}                |".format(
            sta.longitude, sta.latitude))
        print("|      Start time: {0:19s}          |".format(
            sta.startdate.strftime("%Y-%m-%d %H:%M:%S")))
        print("|      End time:   {0:19s}          |".format(
            sta.enddate.strftime("%Y-%m-%d %H:%M:%S")))
        print("|-----------------------------------------------|")

        rfRstream = Stream()
        rfTstream = Stream()

        for folder in os.listdir(datapath):

            # Skip hidden folders
            if folder.startswith('.'):
                continue

            date = folder.split('_')[0]
            year = date[0:4]
            month = date[4:6]
            day = date[6:8]
            dateUTC = UTCDateTime(year + '-' + month + '-' + day)

            if dateUTC > tstart and dateUTC < tend:

                filename = datapath + "/" + folder + "/RF_Data.pkl"
                if os.path.isfile(filename):
                    file = open(filename, "rb")
                    rfdata = pickle.load(file)
                    if rfdata[0].stats.snrh > opts.snrh and rfdata[0].stats.snr and \
                            rfdata[0].stats.cc > opts.cc:

                        rfRstream.append(rfdata[1])
                        rfTstream.append(rfdata[2])

                    file.close()

            else:
                continue

        if opts.no_outl:
            # Remove outliers wrt variance
            varR = np.array([np.var(tr.data) for tr in rfRstream])

            # Calculate outliers
            medvarR = np.median(varR)
            madvarR = 1.4826 * np.median(np.abs(varR - medvarR))
            robustR = np.abs((varR - medvarR) / madvarR)
            outliersR = np.arange(len(rfRstream))[robustR > 2.]
            for i in outliersR[::-1]:
                rfRstream.remove(rfRstream[i])
                rfTstream.remove(rfTstream[i])

            # Do the same for transverse
            varT = np.array([np.var(tr.data) for tr in rfTstream])
            medvarT = np.median(varT)
            madvarT = 1.4826 * np.median(np.abs(varT - medvarT))
            robustT = np.abs((varT - medvarT) / madvarT)
            outliersT = np.arange(len(rfTstream))[robustT > 2.]
            for i in outliersT[::-1]:
                rfRstream.remove(rfRstream[i])
                rfTstream.remove(rfTstream[i])

        # Try binning if specified
        if opts.nbin is not None:
            rf_tmp = binning.bin(rfRstream,
                                 rfTstream,
                                 typ='baz',
                                 nbin=opts.nbin + 1)
            rfRstream = rf_tmp[0]
            rfTstream = rf_tmp[1]

        # Filter original streams
        rfRstream.filter('bandpass',
                         freqmin=opts.bp[0],
                         freqmax=opts.bp[1],
                         corners=2,
                         zerophase=True)
        rfTstream.filter('bandpass',
                         freqmin=opts.bp[0],
                         freqmax=opts.bp[1],
                         corners=2,
                         zerophase=True)

        # Initialize the HkStack object
        harmonics = Harmonics(rfRstream, rfTstream)

        # Stack with or without dip
        if opts.find_azim:
            harmonics.dcomp_find_azim(xmin=opts.trange[0], xmax=opts.trange[1])
            print("Optimal azimuth for trange between "+\
                str(opts.trange[0])+" and "+str(opts.trange[1])+\
                    "is: "+str(harmonics.azim))
        else:
            harmonics.dcomp_fix_azim(azim=opts.azim)

        if opts.plot:
            harmonics.plot(opts.ymax, opts.scale, opts.save_plot, opts.title,
                           opts.form)

        if opts.save:
            filename = datapath + "/" + hkstack.hstream[0].stats.station + \
                ".harmonics.pkl"
            harmonics.save()