Example #1
0
def manual_remove(stream, manual_remove_list):
    for line in manual_remove_list:
        (network, station, location, starttime, endtime, npts,
         percent_invalid) = line.split()
        starttime = UTCDateTime(starttime)
        endtime = UTCDateTime(endtime)

        match = stream_select(stream,
                              network=network,
                              station=station,
                              location=location,
                              starttime=starttime,
                              endtime=endtime)
        for mat in match:
            stream.remove(mat)

    return stream
Example #2
0
def splice_chains(stream,
                  manual_remove_list=None,
                  starttime0=None,
                  framecount0=None,
                  adjust0=None,
                  obs_delta0=None):
    '''
    Splice the records (chains) together.
    Remember that the absolute timing depends on the previous stream,
    so run call_splice_chains in date order.
    '''
    log_filename = 'logs/splice.log'
    logging.basicConfig(filename=log_filename,
                        filemode='w',
                        level=logging.INFO)
    # logging.basicConfig(filename=log_filename, filemode='w', level=logging.DEBUG)

    if adjust0 is None:
        if starttime0 is not None or framecount0 is not None:
            msg = 'If adjust0 is not set, starttime0 and framecount0 must also be set.'
            logging.warning(msg)
            exit()

    # quick check to make sure only one station
    station = stream[0].stats.station
    if len(stream) != len(stream.select(station=station)):
        raise ValueError("More than one station in the stream")

    # begin by selecting the frame traces, and sorting by starttime
    frm_stream = stream.select(channel='AFR')
    frm_stream = frm_stream.sort(keys=['starttime'])

    # if we need anything to remove, remove it here
    if manual_remove_list is not None:
        frm_stream = manual_remove(frm_stream, manual_remove_list)

    return_stream = Stream()

    # for i, fs in enumerate(frm_stream):
    #     fs.plot()
    #     if i > 4:
    #         exit()
    # exit()

    # for ix, fs in enumerate(frm_stream):
    #     if ix > 4:
    #         exit()
    for fs in frm_stream:

        # if type(fs.data) == ma.MaskedArray:
        #     percent_invalid = ma.count_masked(fs.data)/len(fs.data)*100.
        # else:
        #     percent_invalid = 0

        # msg = ('######: {} {} {} {} {} {}'.format(fs.stats.network, fs.stats.station, fs.stats.location, fs.stats.starttime, fs.stats.endtime, fs.stats.npts))
        # logging.info(msg)
        # msg = ('######: {} {} {} {} {} {} {}%'.format(fs.stats.network, fs.stats.station, fs.stats.location, fs.stats.starttime, fs.stats.endtime, fs.stats.npts, round(percent_invalid,2)))
        # logging.info(msg)

        # is the chain valid?
        st_ATT = stream_select(stream,
                               network=fs.stats.network,
                               station=fs.stats.station,
                               location=fs.stats.location,
                               starttime=fs.stats.starttime,
                               endtime=fs.stats.endtime,
                               channel='ATT')
        tim1 = st_ATT[0].data[0]
        tim2 = st_ATT[0].data[-1]
        obs_delta1 = (tim2 - tim1) / (st_ATT[0].stats.npts - 1)

        # check if error is out of range
        percent_delta_error = abs(((obs_delta1 - DELTA) / DELTA) * 100)

        if percent_delta_error > PERCENT_DELTA_ERROR:
            msg = 'obs_delta1 == {} {} {} {} - reject automatically'.format(
                fs.id, fs.stats.starttime, fs.stats.endtime, obs_delta1)
            logging.info(msg)
            print(msg)
            continue

        # if starttime0, framecount0 and adjust0
        # have not already been set, we set them from the earliest
        # trace in the stream
        if adjust0 is None:
            starttime0 = fs.stats.starttime
            framecount0 = fs.data[0]
            adjust0 = 0
            obs_delta0 = DELTA
            # find the matching traces
            match = stream_select(stream,
                                  network=fs.stats.network,
                                  station=fs.stats.station,
                                  location=fs.stats.location,
                                  starttime=fs.stats.starttime,
                                  endtime=fs.stats.endtime)
            return_stream += match
            continue

        # find the starttime and framecount of the current trace
        starttime1 = fs.stats.starttime
        endtime1 = fs.stats.endtime
        framecount1 = fs.data[0]

        # adjust the starttime
        adjust_starttime0 = starttime0 - adjust0

        # estimate the sample number of the current trace, assuming
        # it continues from the successful trace
        sample_idx = _calc_match_samp_frame(starttime1,
                                            adjust_starttime0,
                                            framecount0,
                                            framecount1,
                                            obs_delta0=obs_delta0,
                                            obs_delta1=obs_delta1)
        # valid_chain = False

        # sample_idx is None when it is invalid
        if sample_idx is not None:
            # estimate the new starttime for the current trace
            est_starttime1 = starttime0 + (sample_idx * DELTA)
            # check that the adjust time is not getting too large
            adjust1 = est_starttime1 - starttime1
            msg = 'Est starttime1: {} Starttime1: {} Adjust1: {} Adjust0: {} Adjust diff: {}'.format(
                est_starttime1, starttime1, adjust1, adjust0,
                abs(adjust1 - adjust0))
            logging.debug(msg)
            if abs(adjust1 - adjust0) < ABSOLUTE_ADJUST_TIME:
                # valid_chain = True

                # record the change in the log for the first trace only
                msg = 'adjust_time:{}, for station: {}, location: {}, starttime: {}, endtime: {}'.format(
                    adjust1, fs.stats.station, fs.stats.location, starttime1,
                    endtime1)
                logging.debug(msg)

                # update the startimes for the traces which match the other details
                st_update = stream_select(stream,
                                          network=fs.stats.network,
                                          station=fs.stats.station,
                                          location=fs.stats.location,
                                          starttime=fs.stats.starttime,
                                          endtime=fs.stats.endtime)

                # loop through the matching traces
                for tr in st_update:
                    # traces are either updated or removed from the original stream
                    # if valid_chain:
                    # if i==0:

                    # adjust trace starttime
                    tr.stats.starttime = est_starttime1
                    # else:
                    #     # throw the trace away
                    #     if i==0:
                    #         msg = 'Remvoing this trace: for station: {}, location: {}, , starttime: {}, endtime: {}'.format(
                    #           tr.stats.station, tr.stats.location,  tr.stats.starttime, tr.stats.endtime )
                    #         logging.debug(msg)
                    #     # remove the stream from the trace we passed in to the method
                    #     st_update.remove(tr)
                return_stream += st_update
                # update starttime0, framecount0, adjust0 with details from this trace
                starttime0 = est_starttime1
                framecount0 = framecount1
                adjust0 = adjust1

    if len(return_stream) > 0:
        # calculate the length and write to the log file
        length_stream = return_stream.select(channel='ATT')
        length_stream = length_stream.sort(keys=['starttime'])
        length_stream2 = length_stream.copy()
        length_stream2 = length_stream2.sort(keys=['endtime'], reverse=True)
        elapsed_time = length_stream2[0].stats.endtime - length_stream[
            0].stats.starttime
        elapsed_timestamps = length_stream2[0].data[-1] - length_stream[
            0].data[0]
        obs_delta = elapsed_timestamps / ((elapsed_time) / DELTA)
        msg = ('elapsed_time: {} elapsed_timestamps: {} obs_delta: {}'.format(
            round(elapsed_time, 3), round(elapsed_timestamps, 3), obs_delta))
        if elapsed_timestamps > 10801. or elapsed_timestamps < 10799:
            logging.warning(msg)
        else:
            logging.info(msg)

    return return_stream, starttime0, framecount0, adjust0
Example #3
0
def call_build_chains(stations=['S11', 'S12', 'S14', 'S15', 'S16'],
                      starttime0=None,
                      framecount0=None,
                      adjust0=None,
                      obs_delta0=DELTA,
                      timestamp0=None,
                      framecount_adjust=None,
                      raw_dir='.',
                      chain_dir='.',
                      start_time=UTCDateTime('1969-07-21T03:00:00.000000Z'),
                      end_time=UTCDateTime('1977-09-30T:21:00.000000Z'),
                      read_gzip=True,
                      write_gzip=True):
    '''
    Calls build_chains()
    '''

    log_filename = 'logs/build_chains.log'
    logging.basicConfig(filename=log_filename,
                        filemode='w',
                        level=logging.INFO)
    # logging.basicConfig(filename=log_filename, filemode='w', level=logging.DEBUG)

    for station in stations:

        # check that the overall directory exists
        if not os.path.exists(chain_dir):
            msg = ("The directory {} doesn't exist".format(chain_dir))
            raise IOError(msg)
        else:
            # make the subdirectory with the station name
            chain_dir_station = os.path.join(chain_dir, station)
            if not os.path.exists(chain_dir_station):
                os.makedirs(chain_dir_station)

    # build chains for each station
    for station in stations:

        raw_dir_station = os.path.join(raw_dir, station)
        chain_dir_station = os.path.join(chain_dir, station)

        time_interval = timedelta(hours=3)
        start = start_time
        while start < end_time:

            # work out the base filenames
            # stream_filename = '%s_%s.MINISEED' % (start.strftime("%Y-%m-%dT%H:%M:%S"), station)
            # if read_gzip:
            #     stream_filename = '%s.gz' % (stream_filename)
            #
            # gzip_chain_filename = '%s.gz' % (chain_filename)
            # raw_dir_filename = os.path.join(raw_dir_station, stream_filename)
            # chain_dir_filename = os.path.join(chain_dir_station, chain_filename)
            # gzip_chain_dir_filename = os.path.join(chain_dir_station,
            #   gzip_chain_filename)

            raw_filename = '%s_%s' % (start.strftime("%Y-%m-%dT%H:%M:%S"),
                                      station)
            raw_filename = os.path.join(raw_dir_station, raw_filename)
            if read_gzip:
                raw_filename = '%s.MINISEED.gz' % (raw_filename)
            else:
                raw_filename = '%s.MINISEED' % (raw_filename)

            base_chain_filename = '%s_%s' % (
                start.strftime("%Y-%m-%dT%H:%M:%S"), station)
            base_chain_filename = os.path.join(chain_dir_station,
                                               base_chain_filename)

            # read in the raw SEED file
            try:
                stream = read(raw_filename)
            except FileNotFoundError:
                msg = 'chains.py cannot find file: {}'.format(raw_filename)
                print(msg)
                logging.info(msg)
                # increment the time interval
                start += time_interval
                continue

            # select for just this station, (not necessary, but just in case)
            stream = stream.select(station=station)

            # delete existing chains
            chain_filename = '%s%s' % (base_chain_filename, '*.MINISEED*')
            for i in glob.glob(chain_filename):
                os.unlink(i)

            if len(stream) > 0:

                # build the chains (for this station)
                stream2 = build_chains(stream=stream)

                stream3 = stream2.select(channel='ATT')

                for i, tr in enumerate(stream3):

                    chain_filename = '%s_%s.%s' % (base_chain_filename, i,
                                                   'MINISEED')
                    if write_gzip:
                        chain_filename_gzip = '%s.gz' % (chain_filename)

                    match = stream_select(stream2,
                                          network=tr.stats.network,
                                          station=tr.stats.station,
                                          location=tr.stats.location,
                                          starttime=tr.stats.starttime,
                                          endtime=tr.stats.endtime)

                    match = match.split()
                    match.write(chain_filename, 'MSEED')
                    # print(match)
                    if write_gzip:
                        with open(chain_filename, 'rb') as f_in, gzip.open(
                                chain_filename_gzip, 'wb') as f_out:
                            shutil.copyfileobj(f_in, f_out)
                        os.unlink(chain_filename)

                # if len(stream2) > 0:
                #     # split if the streams have masked data (because there
                #     # are gaps
                #     stream2 = stream2.split()
                #     stream2.write(chain_dir_filename, 'MSEED')
                #
                #     if write_gzip:
                #         with open(chain_dir_filename, 'rb') as f_in, gzip.open(gzip_chain_dir_filename, 'wb') as f_out:
                #             shutil.copyfileobj(f_in, f_out)
                #         os.unlink(chain_dir_filename)

            # increment the time interval
            start += time_interval
Example #4
0
def discard_short_traces(stream):
    """
    Discard short traces which overlap with longer traces.
    Short traces are often quite poor quality - but sometimes occur when
    a longer trace exists. If so, they can be safely discarded.
    """

    # copy the original stream
    return_stream = stream.copy()

    # sort a stream (from the timing channel) with the number of samples
    sorted_stream = return_stream.select(channel='_TT').sort(keys=['npts'])

    # if there is more than one trace in sorted_stream, see if there are any
    # traces to discard.
    if len(sorted_stream) > 1:

        # outer loop of traces, sorted number of samples
        for tr in sorted_stream:
            # if the trace is short
            if tr.stats.npts < MIN_SAMPLE_LENGTH:
                start_timestamp = tr.data[0]
                end_timestamp = tr.data[-1]
                # print(UTCDateTime(start_timestamp), UTCDateTime(end_timestamp))
                # exit()
                # inner loop of traces, to check against
                for tr1 in sorted_stream:
                    remove_flag = False
                    # if the inner and outer trace are the same, do nothing
                    if trace_eq(tr, tr1):
                        continue
                    start_timestamp_check = tr1.data[0]
                    end_timestamp_check = tr1.data[-1]
                    # print(UTCDateTime(start_timestamp_check), UTCDateTime(end_timestamp_check))
                    # check the short trace overlaps both ends of another trace
                    if (start_timestamp > start_timestamp_check
                            and end_timestamp < end_timestamp_check):
                        remove_flag = True
                        msg = ('Removing short trace: ', tr)
                        # print(msg)
                        logging.debug(msg)
                        stream_short = stream_select(
                            stream,
                            network=tr.stats.network,
                            station=tr.stats.station,
                            location=tr.stats.location,
                            starttime=tr.stats.starttime,
                            endtime=tr.stats.endtime)
                        for tr2 in stream_short:
                            # remove from the return_stream
                            return_stream.remove(tr2)

                        if remove_flag:
                            # break the inner loop (and continue the outer one)
                            break

                if remove_flag:
                    # if we removed the trace, we can move to the next short sample
                    continue

            # the stream is ordered by trace length, so we can stop execution
            # when the traces are too long
            else:
                break

    return return_stream
Example #5
0
def build_chains(stream):
    '''
    Make 'chains' by checking the framecount and inserting gaps.
    '''
    log_filename = 'logs/build_chains.log'
    logging.basicConfig(filename=log_filename,
                        filemode='w',
                        level=logging.INFO)
    # logging.basicConfig(filename=log_filename, filemode='w', level=logging.DEBUG)

    # quick check to make sure only one station
    station = stream[0].stats.station

    if len(stream) != len(stream.select(station=station)):
        raise ValueError("More than one station in the stream")

    # TODO only need 3 if running SPZ
    # original_data = np.full((n,3), INVALID, 'int32')

    # get rid of any short traces that overlap with other
    # streams

    stream = discard_short_traces(stream)

    # begin by selecting the raw trace, and sorting by starttime
    FR_stream = stream.select(channel='_FR')
    FR_stream = FR_stream.sort(keys=['starttime'])

    return_stream = Stream()

    # for each of the raw streams
    for fs in FR_stream:

        # find the matching streams with the same start and end time
        original = stream_select(stream,
                                 network=fs.stats.network,
                                 station=fs.stats.station,
                                 location=fs.stats.location,
                                 starttime=fs.stats.starttime,
                                 endtime=fs.stats.endtime)

        # get the stream for the timing trace
        TT_stream = original.select(channel='_TT')

        start_pointer = 0
        pointer = 0
        consecutive_invalid = 0
        valid_chain = False
        len_data = len(fs.data)

        # TODO remove invalid nasty data

        while start_pointer < len_data:
            # loop through data from start pointer to the end

            for i, framecount1 in enumerate(fs.data[start_pointer:]):

                # pointer is the CURRENT index
                pointer = start_pointer + i

                match_timestamp1 = TT_stream[0].data[pointer]

                # first step - look for a short chain assuming the first one in the trace is ok
                if i == 0:
                    msg = ('i = 0, {} {} {} {}'.format(
                        i, start_pointer, framecount1,
                        UTCDateTime(match_timestamp1)))
                    logging.debug(msg)
                    chain_framecount0 = fs.data[start_pointer]
                    chain_timestamp0 = TT_stream[0].data[start_pointer]
                    chain_pointer0 = start_pointer
                    # make a pointer_array
                    n = fs.stats.npts
                    pointer_array = np.full(n, INVALID, 'int32')
                    pointer_array[start_pointer] = start_pointer
                    valid_chain = False
                    consecutive_invalid = 0
                    # if the framecount is out of range, continue
                    if framecount1 < 0 or framecount1 > 89.75:
                        msg = 'invalid framecount'
                        logging.debug(msg)
                        break

                else:  # records where i < 0

                    # use for debugging
                    # if i < 6:
                    #     msg = ('i = {} {} {}'.format(i, start_pointer, framecount1))
                    #     logging.debug(msg)

                    # if the framecount is out of range, start again
                    if framecount1 < 0 or framecount1 > 89.75:
                        if i < 4:
                            # unable to make a chain of 4, so break out
                            valid_chain = False
                            msg = 'invalid framecount, less than 4 {}'.format(
                                start_pointer)
                            logging.debug(msg)
                            break
                        else:
                            # we just ignore it
                            continue
                    # if the frame range is valid
                    else:
                        # check for the correct sample index from the framecount
                        # and timestamp
                        msg = ('Framecount {}'.format(framecount1))
                        logging.debug(msg)
                        sample_diff = _calc_match(match_timestamp1,
                                                  chain_timestamp0,
                                                  chain_framecount0,
                                                  framecount1,
                                                  obs_delta0=DELTA)

                        if sample_diff is not None:
                            pointer_array[
                                pointer] = sample_diff + chain_pointer0
                            # last_idx = pointer_array[pointer-1]
                            msg = ('Sample, i, framecount and pointer array, ',
                                   sample_diff, i, framecount1,
                                   str(pointer_array[0:7]))
                            logging.debug(msg)

                            # check for consecutive values within the frame
                            if pointer_array[pointer -
                                             1] + 1 != pointer_array[pointer]:
                                consecutive_invalid += 1
                            else:
                                consecutive_invalid = 0

                            # the current one is valid, so update the
                            # timestamp and framecount
                            chain_timestamp0 = match_timestamp1
                            chain_framecount0 = framecount1
                            chain_pointer0 += sample_diff
                        else:
                            consecutive_invalid += 1

                        # if i is 3 and the framecounts have been consecutive
                        # then mark the chain as invalid
                        if i == 3 and consecutive_invalid == 0:
                            valid_chain = True

                        if i < 4 and consecutive_invalid != 0:
                            # unable to make a chain of 4, so break out
                            msg = (
                                'Unable to make a chain of 4, so break out {} {} {}'
                                .format(sample_diff, pointer, framecount1))
                            logging.debug(msg)
                            break

                        # break the chain if more than 3 framecounts have been
                        # invalid
                        if consecutive_invalid > 3:
                            break

            # make a chain from previous execution the loop if a
            # valid one exists
            if valid_chain:
                re_stream, last_pointer = _reconstruct_streams(
                    original, pointer_array)
                return_stream += re_stream
                start_pointer = last_pointer + 1
                valid_chain = False
                msg = ('Start pointer after valid {} {} {}'.format(
                    start_pointer, last_pointer, len_data))
                logging.debug(msg)
            else:
                start_pointer = start_pointer + 1
                msg = ('Start pointer after invalid {} {}'.format(
                    start_pointer, len_data))
                logging.debug(msg)

    return return_stream