def manual_remove(stream, manual_remove_list): for line in manual_remove_list: (network, station, location, starttime, endtime, npts, percent_invalid) = line.split() starttime = UTCDateTime(starttime) endtime = UTCDateTime(endtime) match = stream_select(stream, network=network, station=station, location=location, starttime=starttime, endtime=endtime) for mat in match: stream.remove(mat) return stream
def splice_chains(stream, manual_remove_list=None, starttime0=None, framecount0=None, adjust0=None, obs_delta0=None): ''' Splice the records (chains) together. Remember that the absolute timing depends on the previous stream, so run call_splice_chains in date order. ''' log_filename = 'logs/splice.log' logging.basicConfig(filename=log_filename, filemode='w', level=logging.INFO) # logging.basicConfig(filename=log_filename, filemode='w', level=logging.DEBUG) if adjust0 is None: if starttime0 is not None or framecount0 is not None: msg = 'If adjust0 is not set, starttime0 and framecount0 must also be set.' logging.warning(msg) exit() # quick check to make sure only one station station = stream[0].stats.station if len(stream) != len(stream.select(station=station)): raise ValueError("More than one station in the stream") # begin by selecting the frame traces, and sorting by starttime frm_stream = stream.select(channel='AFR') frm_stream = frm_stream.sort(keys=['starttime']) # if we need anything to remove, remove it here if manual_remove_list is not None: frm_stream = manual_remove(frm_stream, manual_remove_list) return_stream = Stream() # for i, fs in enumerate(frm_stream): # fs.plot() # if i > 4: # exit() # exit() # for ix, fs in enumerate(frm_stream): # if ix > 4: # exit() for fs in frm_stream: # if type(fs.data) == ma.MaskedArray: # percent_invalid = ma.count_masked(fs.data)/len(fs.data)*100. # else: # percent_invalid = 0 # msg = ('######: {} {} {} {} {} {}'.format(fs.stats.network, fs.stats.station, fs.stats.location, fs.stats.starttime, fs.stats.endtime, fs.stats.npts)) # logging.info(msg) # msg = ('######: {} {} {} {} {} {} {}%'.format(fs.stats.network, fs.stats.station, fs.stats.location, fs.stats.starttime, fs.stats.endtime, fs.stats.npts, round(percent_invalid,2))) # logging.info(msg) # is the chain valid? st_ATT = stream_select(stream, network=fs.stats.network, station=fs.stats.station, location=fs.stats.location, starttime=fs.stats.starttime, endtime=fs.stats.endtime, channel='ATT') tim1 = st_ATT[0].data[0] tim2 = st_ATT[0].data[-1] obs_delta1 = (tim2 - tim1) / (st_ATT[0].stats.npts - 1) # check if error is out of range percent_delta_error = abs(((obs_delta1 - DELTA) / DELTA) * 100) if percent_delta_error > PERCENT_DELTA_ERROR: msg = 'obs_delta1 == {} {} {} {} - reject automatically'.format( fs.id, fs.stats.starttime, fs.stats.endtime, obs_delta1) logging.info(msg) print(msg) continue # if starttime0, framecount0 and adjust0 # have not already been set, we set them from the earliest # trace in the stream if adjust0 is None: starttime0 = fs.stats.starttime framecount0 = fs.data[0] adjust0 = 0 obs_delta0 = DELTA # find the matching traces match = stream_select(stream, network=fs.stats.network, station=fs.stats.station, location=fs.stats.location, starttime=fs.stats.starttime, endtime=fs.stats.endtime) return_stream += match continue # find the starttime and framecount of the current trace starttime1 = fs.stats.starttime endtime1 = fs.stats.endtime framecount1 = fs.data[0] # adjust the starttime adjust_starttime0 = starttime0 - adjust0 # estimate the sample number of the current trace, assuming # it continues from the successful trace sample_idx = _calc_match_samp_frame(starttime1, adjust_starttime0, framecount0, framecount1, obs_delta0=obs_delta0, obs_delta1=obs_delta1) # valid_chain = False # sample_idx is None when it is invalid if sample_idx is not None: # estimate the new starttime for the current trace est_starttime1 = starttime0 + (sample_idx * DELTA) # check that the adjust time is not getting too large adjust1 = est_starttime1 - starttime1 msg = 'Est starttime1: {} Starttime1: {} Adjust1: {} Adjust0: {} Adjust diff: {}'.format( est_starttime1, starttime1, adjust1, adjust0, abs(adjust1 - adjust0)) logging.debug(msg) if abs(adjust1 - adjust0) < ABSOLUTE_ADJUST_TIME: # valid_chain = True # record the change in the log for the first trace only msg = 'adjust_time:{}, for station: {}, location: {}, starttime: {}, endtime: {}'.format( adjust1, fs.stats.station, fs.stats.location, starttime1, endtime1) logging.debug(msg) # update the startimes for the traces which match the other details st_update = stream_select(stream, network=fs.stats.network, station=fs.stats.station, location=fs.stats.location, starttime=fs.stats.starttime, endtime=fs.stats.endtime) # loop through the matching traces for tr in st_update: # traces are either updated or removed from the original stream # if valid_chain: # if i==0: # adjust trace starttime tr.stats.starttime = est_starttime1 # else: # # throw the trace away # if i==0: # msg = 'Remvoing this trace: for station: {}, location: {}, , starttime: {}, endtime: {}'.format( # tr.stats.station, tr.stats.location, tr.stats.starttime, tr.stats.endtime ) # logging.debug(msg) # # remove the stream from the trace we passed in to the method # st_update.remove(tr) return_stream += st_update # update starttime0, framecount0, adjust0 with details from this trace starttime0 = est_starttime1 framecount0 = framecount1 adjust0 = adjust1 if len(return_stream) > 0: # calculate the length and write to the log file length_stream = return_stream.select(channel='ATT') length_stream = length_stream.sort(keys=['starttime']) length_stream2 = length_stream.copy() length_stream2 = length_stream2.sort(keys=['endtime'], reverse=True) elapsed_time = length_stream2[0].stats.endtime - length_stream[ 0].stats.starttime elapsed_timestamps = length_stream2[0].data[-1] - length_stream[ 0].data[0] obs_delta = elapsed_timestamps / ((elapsed_time) / DELTA) msg = ('elapsed_time: {} elapsed_timestamps: {} obs_delta: {}'.format( round(elapsed_time, 3), round(elapsed_timestamps, 3), obs_delta)) if elapsed_timestamps > 10801. or elapsed_timestamps < 10799: logging.warning(msg) else: logging.info(msg) return return_stream, starttime0, framecount0, adjust0
def call_build_chains(stations=['S11', 'S12', 'S14', 'S15', 'S16'], starttime0=None, framecount0=None, adjust0=None, obs_delta0=DELTA, timestamp0=None, framecount_adjust=None, raw_dir='.', chain_dir='.', start_time=UTCDateTime('1969-07-21T03:00:00.000000Z'), end_time=UTCDateTime('1977-09-30T:21:00.000000Z'), read_gzip=True, write_gzip=True): ''' Calls build_chains() ''' log_filename = 'logs/build_chains.log' logging.basicConfig(filename=log_filename, filemode='w', level=logging.INFO) # logging.basicConfig(filename=log_filename, filemode='w', level=logging.DEBUG) for station in stations: # check that the overall directory exists if not os.path.exists(chain_dir): msg = ("The directory {} doesn't exist".format(chain_dir)) raise IOError(msg) else: # make the subdirectory with the station name chain_dir_station = os.path.join(chain_dir, station) if not os.path.exists(chain_dir_station): os.makedirs(chain_dir_station) # build chains for each station for station in stations: raw_dir_station = os.path.join(raw_dir, station) chain_dir_station = os.path.join(chain_dir, station) time_interval = timedelta(hours=3) start = start_time while start < end_time: # work out the base filenames # stream_filename = '%s_%s.MINISEED' % (start.strftime("%Y-%m-%dT%H:%M:%S"), station) # if read_gzip: # stream_filename = '%s.gz' % (stream_filename) # # gzip_chain_filename = '%s.gz' % (chain_filename) # raw_dir_filename = os.path.join(raw_dir_station, stream_filename) # chain_dir_filename = os.path.join(chain_dir_station, chain_filename) # gzip_chain_dir_filename = os.path.join(chain_dir_station, # gzip_chain_filename) raw_filename = '%s_%s' % (start.strftime("%Y-%m-%dT%H:%M:%S"), station) raw_filename = os.path.join(raw_dir_station, raw_filename) if read_gzip: raw_filename = '%s.MINISEED.gz' % (raw_filename) else: raw_filename = '%s.MINISEED' % (raw_filename) base_chain_filename = '%s_%s' % ( start.strftime("%Y-%m-%dT%H:%M:%S"), station) base_chain_filename = os.path.join(chain_dir_station, base_chain_filename) # read in the raw SEED file try: stream = read(raw_filename) except FileNotFoundError: msg = 'chains.py cannot find file: {}'.format(raw_filename) print(msg) logging.info(msg) # increment the time interval start += time_interval continue # select for just this station, (not necessary, but just in case) stream = stream.select(station=station) # delete existing chains chain_filename = '%s%s' % (base_chain_filename, '*.MINISEED*') for i in glob.glob(chain_filename): os.unlink(i) if len(stream) > 0: # build the chains (for this station) stream2 = build_chains(stream=stream) stream3 = stream2.select(channel='ATT') for i, tr in enumerate(stream3): chain_filename = '%s_%s.%s' % (base_chain_filename, i, 'MINISEED') if write_gzip: chain_filename_gzip = '%s.gz' % (chain_filename) match = stream_select(stream2, network=tr.stats.network, station=tr.stats.station, location=tr.stats.location, starttime=tr.stats.starttime, endtime=tr.stats.endtime) match = match.split() match.write(chain_filename, 'MSEED') # print(match) if write_gzip: with open(chain_filename, 'rb') as f_in, gzip.open( chain_filename_gzip, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) os.unlink(chain_filename) # if len(stream2) > 0: # # split if the streams have masked data (because there # # are gaps # stream2 = stream2.split() # stream2.write(chain_dir_filename, 'MSEED') # # if write_gzip: # with open(chain_dir_filename, 'rb') as f_in, gzip.open(gzip_chain_dir_filename, 'wb') as f_out: # shutil.copyfileobj(f_in, f_out) # os.unlink(chain_dir_filename) # increment the time interval start += time_interval
def discard_short_traces(stream): """ Discard short traces which overlap with longer traces. Short traces are often quite poor quality - but sometimes occur when a longer trace exists. If so, they can be safely discarded. """ # copy the original stream return_stream = stream.copy() # sort a stream (from the timing channel) with the number of samples sorted_stream = return_stream.select(channel='_TT').sort(keys=['npts']) # if there is more than one trace in sorted_stream, see if there are any # traces to discard. if len(sorted_stream) > 1: # outer loop of traces, sorted number of samples for tr in sorted_stream: # if the trace is short if tr.stats.npts < MIN_SAMPLE_LENGTH: start_timestamp = tr.data[0] end_timestamp = tr.data[-1] # print(UTCDateTime(start_timestamp), UTCDateTime(end_timestamp)) # exit() # inner loop of traces, to check against for tr1 in sorted_stream: remove_flag = False # if the inner and outer trace are the same, do nothing if trace_eq(tr, tr1): continue start_timestamp_check = tr1.data[0] end_timestamp_check = tr1.data[-1] # print(UTCDateTime(start_timestamp_check), UTCDateTime(end_timestamp_check)) # check the short trace overlaps both ends of another trace if (start_timestamp > start_timestamp_check and end_timestamp < end_timestamp_check): remove_flag = True msg = ('Removing short trace: ', tr) # print(msg) logging.debug(msg) stream_short = stream_select( stream, network=tr.stats.network, station=tr.stats.station, location=tr.stats.location, starttime=tr.stats.starttime, endtime=tr.stats.endtime) for tr2 in stream_short: # remove from the return_stream return_stream.remove(tr2) if remove_flag: # break the inner loop (and continue the outer one) break if remove_flag: # if we removed the trace, we can move to the next short sample continue # the stream is ordered by trace length, so we can stop execution # when the traces are too long else: break return return_stream
def build_chains(stream): ''' Make 'chains' by checking the framecount and inserting gaps. ''' log_filename = 'logs/build_chains.log' logging.basicConfig(filename=log_filename, filemode='w', level=logging.INFO) # logging.basicConfig(filename=log_filename, filemode='w', level=logging.DEBUG) # quick check to make sure only one station station = stream[0].stats.station if len(stream) != len(stream.select(station=station)): raise ValueError("More than one station in the stream") # TODO only need 3 if running SPZ # original_data = np.full((n,3), INVALID, 'int32') # get rid of any short traces that overlap with other # streams stream = discard_short_traces(stream) # begin by selecting the raw trace, and sorting by starttime FR_stream = stream.select(channel='_FR') FR_stream = FR_stream.sort(keys=['starttime']) return_stream = Stream() # for each of the raw streams for fs in FR_stream: # find the matching streams with the same start and end time original = stream_select(stream, network=fs.stats.network, station=fs.stats.station, location=fs.stats.location, starttime=fs.stats.starttime, endtime=fs.stats.endtime) # get the stream for the timing trace TT_stream = original.select(channel='_TT') start_pointer = 0 pointer = 0 consecutive_invalid = 0 valid_chain = False len_data = len(fs.data) # TODO remove invalid nasty data while start_pointer < len_data: # loop through data from start pointer to the end for i, framecount1 in enumerate(fs.data[start_pointer:]): # pointer is the CURRENT index pointer = start_pointer + i match_timestamp1 = TT_stream[0].data[pointer] # first step - look for a short chain assuming the first one in the trace is ok if i == 0: msg = ('i = 0, {} {} {} {}'.format( i, start_pointer, framecount1, UTCDateTime(match_timestamp1))) logging.debug(msg) chain_framecount0 = fs.data[start_pointer] chain_timestamp0 = TT_stream[0].data[start_pointer] chain_pointer0 = start_pointer # make a pointer_array n = fs.stats.npts pointer_array = np.full(n, INVALID, 'int32') pointer_array[start_pointer] = start_pointer valid_chain = False consecutive_invalid = 0 # if the framecount is out of range, continue if framecount1 < 0 or framecount1 > 89.75: msg = 'invalid framecount' logging.debug(msg) break else: # records where i < 0 # use for debugging # if i < 6: # msg = ('i = {} {} {}'.format(i, start_pointer, framecount1)) # logging.debug(msg) # if the framecount is out of range, start again if framecount1 < 0 or framecount1 > 89.75: if i < 4: # unable to make a chain of 4, so break out valid_chain = False msg = 'invalid framecount, less than 4 {}'.format( start_pointer) logging.debug(msg) break else: # we just ignore it continue # if the frame range is valid else: # check for the correct sample index from the framecount # and timestamp msg = ('Framecount {}'.format(framecount1)) logging.debug(msg) sample_diff = _calc_match(match_timestamp1, chain_timestamp0, chain_framecount0, framecount1, obs_delta0=DELTA) if sample_diff is not None: pointer_array[ pointer] = sample_diff + chain_pointer0 # last_idx = pointer_array[pointer-1] msg = ('Sample, i, framecount and pointer array, ', sample_diff, i, framecount1, str(pointer_array[0:7])) logging.debug(msg) # check for consecutive values within the frame if pointer_array[pointer - 1] + 1 != pointer_array[pointer]: consecutive_invalid += 1 else: consecutive_invalid = 0 # the current one is valid, so update the # timestamp and framecount chain_timestamp0 = match_timestamp1 chain_framecount0 = framecount1 chain_pointer0 += sample_diff else: consecutive_invalid += 1 # if i is 3 and the framecounts have been consecutive # then mark the chain as invalid if i == 3 and consecutive_invalid == 0: valid_chain = True if i < 4 and consecutive_invalid != 0: # unable to make a chain of 4, so break out msg = ( 'Unable to make a chain of 4, so break out {} {} {}' .format(sample_diff, pointer, framecount1)) logging.debug(msg) break # break the chain if more than 3 framecounts have been # invalid if consecutive_invalid > 3: break # make a chain from previous execution the loop if a # valid one exists if valid_chain: re_stream, last_pointer = _reconstruct_streams( original, pointer_array) return_stream += re_stream start_pointer = last_pointer + 1 valid_chain = False msg = ('Start pointer after valid {} {} {}'.format( start_pointer, last_pointer, len_data)) logging.debug(msg) else: start_pointer = start_pointer + 1 msg = ('Start pointer after invalid {} {}'.format( start_pointer, len_data)) logging.debug(msg) return return_stream