def test_NllRead(self): from array import array base_path = os.getenv('WAVELOC_PATH') nll_name = os.path.join(base_path, 'test_data', 'test.time') nll_hdr_file = "%s.hdr" % nll_name nll_buf_file = "%s.buf" % nll_name info = read_hdr_file(nll_hdr_file) nx = info['nx'] ny = info['ny'] nz = info['nz'] f = open(nll_buf_file, 'rb') buf = array('f') buf.fromfile(f, nx * ny * nz) f.close() np_buf = np.array(buf) b_index = np.random.randint(0, nx) * np.random.randint(0, ny) * \ np.random.randint(0, nz) filename = 'test_nll.hdf5' sg = H5NllSingleGrid(filename, nll_name) self.assertEqual(info['ny'], sg.grid_info['ny']) self.assertAlmostEqual(info['orig_lat'], sg.grid_info['orig_lat']) self.assertEqual(info['station'], 'FIU') self.assertEqual(info['station'], sg.grid_info['station']) self.assertEqual(np_buf.shape, sg.grid_data.shape) self.assertAlmostEqual(np_buf[b_index], sg.grid_data[b_index]) del sg del buf os.remove(filename)
def test_NllRead(self): from array import array base_path = os.getenv('WAVELOC_PATH') nll_name = os.path.join(base_path, 'test_data', 'test.time') nll_hdr_file = "%s.hdr" % nll_name nll_buf_file = "%s.buf" % nll_name info = read_hdr_file(nll_hdr_file) nx = info['nx'] ny = info['ny'] nz = info['nz'] f = open(nll_buf_file, 'rb') buf = array('f') buf.fromfile(f, nx*ny*nz) f.close() np_buf = np.array(buf) b_index = np.random.randint(0, nx) * np.random.randint(0, ny) * \ np.random.randint(0, nz) filename = 'test_nll.hdf5' sg = H5NllSingleGrid(filename, nll_name) self.assertEqual(info['ny'], sg.grid_info['ny']) self.assertAlmostEqual(info['orig_lat'], sg.grid_info['orig_lat']) self.assertEqual(info['station'], 'FIU') self.assertEqual(info['station'], sg.grid_info['station']) self.assertEqual(np_buf.shape, sg.grid_data.shape) self.assertAlmostEqual(np_buf[b_index], sg.grid_data[b_index]) del sg del buf os.remove(filename)
def get_interpolated_time_grids(opdict): import glob from NllGridLib import read_hdr_file base_path=opdict['base_path'] full_time_grids=glob.glob(os.path.join(base_path,'lib',opdict['time_grid']+'*.hdf5')) full_time_grids.sort() if len(full_time_grids)==0 : raise UserWarning('No .hdf5 time grids found in directory %s'%(os.path.join(base_path,'lib'))) # read the search grid search_grid=os.path.join(base_path,'lib',opdict['search_grid']) tgrid_dir=os.path.join(base_path,'out',opdict['outdir'],'time_grids') if not os.path.exists(tgrid_dir) : os.makedirs(tgrid_dir) search_info=read_hdr_file(search_grid) time_grids={} # for each of the full-length time grids logging.info('Loading time grids ... ') for f_timegrid in full_time_grids: f_basename=os.path.basename(f_timegrid) # get the filename of the corresponding short-length grid (the one for the search grid in particular) tgrid_filename=os.path.join(tgrid_dir,f_basename) # if file exists and we want to load it, then open the file and give it to the dictionary if os.path.isfile(tgrid_filename) and opdict['load_ttimes_buf']: logging.debug('Loading %s'%tgrid_filename) grid=H5SingleGrid(tgrid_filename) name=grid.grid_info['station'] time_grids[name]=grid # if the file does not exist, or want to force re-creation, then create it if not os.path.isfile(tgrid_filename) or not opdict['load_ttimes_buf']: logging.info('Creating %s - Please be patient'%tgrid_filename) full_grid=H5SingleGrid(f_timegrid) # copy the common part of the grid info new_info={} for name,value in full_grid.grid_info.iteritems(): new_info[name]=value # set the new part of the grid info to correspond to the search grid new_info['x_orig']=search_info['x_orig'] new_info['y_orig']=search_info['y_orig'] new_info['z_orig']=search_info['z_orig'] new_info['nx']=search_info['nx'] new_info['ny']=search_info['ny'] new_info['nz']=search_info['nz'] new_info['dx']=search_info['dx'] new_info['dy']=search_info['dy'] new_info['dz']=search_info['dz'] # do interpolation grid=full_grid.interp_to_newgrid(tgrid_filename,new_info) # add to dictionary name=grid.grid_info['station'] time_grids[name]=grid # close full grid safely del full_grid return time_grids
def test_NllReadHdr(self): base_path = os.getenv('WAVELOC_PATH') nll_hdr_file = os.path.join(base_path, 'test_data', 'test_grid.search.geo.hdr') info = read_hdr_file(nll_hdr_file) self.assertEqual(info['nx'], 51) self.assertEqual(info['y_orig'], -10.) self.assertEqual(info['dz'], 2.) self.assertEqual(info['proj_name'], 'TRANS_SIMPLE') self.assertAlmostEqual(info['orig_lat'], 44.727000) self.assertAlmostEqual(info['orig_lon'], 11.086000) self.assertAlmostEqual(info['map_rot'], 0.)
def __init__(self, filename, nll_filename): from array import array hdr = "%s.hdr" % nll_filename buf = "%s.buf" % nll_filename info = read_hdr_file(hdr) nx = info['nx'] ny = info['ny'] nz = info['nz'] f = open(buf, 'rb') buf = array('f') buf.fromfile(f, nx*ny*nz) f.close() H5SingleGrid.__init__(self, filename, buf, info)
def __init__(self, filename, nll_filename): from array import array hdr = "%s.hdr" % nll_filename buf = "%s.buf" % nll_filename info = read_hdr_file(hdr) nx = info['nx'] ny = info['ny'] nz = info['nz'] f = open(buf, 'rb') buf = array('f') buf.fromfile(f, nx * ny * nz) f.close() H5SingleGrid.__init__(self, filename, buf, info)
def do_migration_setup_and_run(opdict): """ Do setup and launch migration. :param opdict: WavelocOptions.opdict """ base_path = opdict['base_path'] runtime = opdict['time'] reloc = opdict['reloc'] # data data_dir = os.path.join(base_path, 'data', opdict['datadir']) if opdict['kderiv']: data_glob = opdict['gradglob'] if opdict['gauss']: data_glob = opdict['gaussglob'] else: data_glob = opdict['kurtglob'] data_files = glob.glob(os.path.join(data_dir, data_glob)) data_files.sort() if len(data_files) == 0: logging.error('No data files found for %s and %s' % (data_dir, data_glob)) raise UserWarning # grids search_grid_filename = os.path.join(base_path, 'lib', opdict['search_grid']) time_grids = get_interpolated_time_grids(opdict) #start and end times starttime = opdict['starttime'] endtime = opdict['endtime'] data_length = opdict['data_length'] data_overlap = opdict['data_overlap'] initial_start_time = utcdatetime.UTCDateTime(starttime) initial_end_time = initial_start_time + data_length final_end_time = utcdatetime.UTCDateTime(endtime) time_shift_secs = data_length - data_overlap ######### FOR EACH TIME SPAN - DO MIGRATION ############# # start loop over time start_time = initial_start_time end_time = initial_end_time if runtime: t_ref = time() while (start_time < final_end_time): # read data logging.info("Reading data : %s - %s." % (start_time.isoformat(), end_time.isoformat())) data, delta = \ read_data_compatible_with_time_dict(data_files, time_grids, start_time, end_time) if reloc: tr_glob = opdict['kurtglob'] files = glob.glob(os.path.join(data_dir, tr_glob)) traces, delta = \ read_data_compatible_with_time_dict(files, time_grids, start_time, end_time) sta_list = sorted(traces) for staname in sta_list: snr = np.max(traces[staname]) / np.mean(np.abs( traces[staname])) if snr < opdict['reloc_snr']: data[staname] = np.zeros(len(data[staname])) # re-read grid_info at each iteration to make sure it is a clean copy grid_info = read_hdr_file(search_grid_filename) # do migration if have enough data (3 is bare minimum) if len(data.keys()) >= 3: logging.info("Migrating data : %s - %s." % (start_time.isoformat(), end_time.isoformat())) do_migration_loop_continuous(opdict, data, delta, start_time, grid_info, time_grids) elif len(data.keys()) == 0: logging.warn('No data found between %s and %s.' % (start_time.isoformat(), end_time.isoformat())) else: logging.warn('Insufficient data found between %s and %s.' % (start_time.isoformat(), end_time.isoformat())) # Reset the start and end times to loop again start_time = start_time + time_shift_secs end_time = end_time + time_shift_secs if runtime: t = time() - t_ref logging.info("Time for migrating all time slices : %.2f s\n" % (t))
def do_plotting_setup_and_run(opdict, plot_wfm=True, plot_grid=True): # get / set info base_path = opdict['base_path'] locfile = os.path.join(base_path, 'out', opdict['outdir'], 'loc', 'locations.dat') stackfile = os.path.join(base_path, 'out', opdict['outdir'], 'stack', 'combined_stack_all.hdf5') grid_dir = os.path.join(base_path, 'out', opdict['outdir'], 'grid') output_dir = os.path.join(base_path, 'out', opdict['outdir']) data_dir = os.path.join(base_path, 'data', opdict['datadir']) data_glob = opdict['dataglob'] data_files = glob.glob(os.path.join(data_dir, data_glob)) data_files.sort() kurt_glob = opdict['kurtglob'] kurt_files = glob.glob(os.path.join(data_dir, kurt_glob)) kurt_files.sort() mig_files = kurt_files if opdict['kderiv']: grad_glob = opdict['gradglob'] grad_files = glob.glob(os.path.join(data_dir, grad_glob)) grad_files.sort() mig_files = grad_files if opdict['gauss']: gauss_glob = opdict['gaussglob'] gauss_files = glob.glob(os.path.join(data_dir, gauss_glob)) gauss_files.sort() mig_files = gauss_files figdir = os.path.join(base_path, 'out', opdict['outdir'], 'fig') # stations stations_filename = os.path.join(base_path, 'lib', opdict['stations']) stations = read_stations_file(stations_filename) # grids grid_filename_base = os.path.join(base_path, 'lib', opdict['time_grid']) search_grid_filename = os.path.join(base_path, 'lib', opdict['search_grid']) # read time grid information time_grids = get_interpolated_time_grids(opdict) # read locations locs = read_locs_from_file(locfile) # open stack file f_stack = h5py.File(stackfile, 'r') max_val = f_stack['max_val_smooth'] stack_start_time = UTCDateTime(max_val.attrs['start_time']) for loc in locs: # generate the grids o_time = loc['o_time'] start_time = o_time - opdict['plot_tbefore'] end_time = o_time + opdict['plot_tafter'] # re-read grid info to ensure clean copy grid_info = read_hdr_file(search_grid_filename) nx = grid_info['nx'] ny = grid_info['ny'] nz = grid_info['nz'] dx = grid_info['dx'] dy = grid_info['dy'] dz = grid_info['dz'] x = loc['x_mean'] y = loc['y_mean'] z = loc['z_mean'] # get the corresponding travel-times for time-shifting ttimes = {} for sta in time_grids.keys(): ttimes[sta] = time_grids[sta].value_at_point(x, y, z) tshift_migration = max(ttimes.values()) start_time_migration = start_time - tshift_migration end_time_migration = end_time + tshift_migration if plot_grid: logging.info('Plotting grid for location %s' % o_time.isoformat()) # TODO implement a rough estimation of the stack shift based on propagation time across the whole network # read data mig_dict, delta = read_data_compatible_with_time_dict( mig_files, time_grids, start_time_migration, end_time_migration) # do migration do_migration_loop_continuous(opdict, mig_dict, delta, start_time_migration, grid_info, time_grids, keep_grid=True) # plot plotLocationGrid(loc, grid_info, figdir, opdict['plot_otime_window']) if plot_wfm: logging.info('Plotting waveforms for location %s' % o_time.isoformat()) # get the index of the location # ix=np.int(np.round((loc['x_mean']-grid_info['x_orig'])/dx)) # iy=np.int(np.round((loc['y_mean']-grid_info['y_orig'])/dy)) # iz=np.int(np.round((loc['z_mean']-grid_info['z_orig'])/dz)) # ib= ix*ny*nz + iy*nz + iz # read data data_dict, delta = read_data_compatible_with_time_dict( data_files, time_grids, start_time_migration, end_time_migration) mig_dict, delta = read_data_compatible_with_time_dict( mig_files, time_grids, start_time_migration, end_time_migration) # cut desired portion out of data for sta in data_dict.keys(): tmp = data_dict[sta] istart = np.int( np.round( (start_time + ttimes[sta] - start_time_migration) / delta)) iend = istart + np.int( np.round((opdict['plot_tbefore'] + opdict['plot_tafter']) / delta)) # sanity check in case event is close to start or end of data if istart < 0: istart = 0 if iend > len(tmp): iend = len(tmp) data_dict[sta] = tmp[istart:iend] # do slice tmp = mig_dict[sta] mig_dict[sta] = tmp[istart:iend] # retrieve relevant portion of stack max istart = np.int( np.round((o_time - opdict['plot_tbefore'] - stack_start_time) / delta)) iend = istart + np.int( np.round( (opdict['plot_tbefore'] + opdict['plot_tafter']) / delta)) # sanity check in case event is close to start or end of data if istart < 0: start_time = start_time + np.abs(istart) * dt istart = 0 if iend > len(max_val): iend = len(max_val) # do slice stack_wfm = max_val[istart:iend] # plot plotLocationWaveforms(loc, start_time, delta, data_dict, mig_dict, stack_wfm, figdir) f_stack.close()
def do_locations_prob_setup_and_run(opdict): # get / set info base_path = opdict['base_path'] space_only = opdict['probloc_spaceonly'] locfile = os.path.join(base_path, 'out', opdict['outdir'], 'loc', 'locations.dat') locfile_prob = os.path.join(base_path, 'out', opdict['outdir'], 'loc', 'locations_prob.dat') locfile_hdf5 = os.path.join(base_path, 'out', opdict['outdir'], 'loc', 'locations_prob.hdf5') f_prob = open(locfile_prob, 'w') # if locfile does not exist then make it by running trigger location if not os.path.exists(locfile): logging.info( 'No location found at %s. Running trigger location first...' % locfile) do_locations_trigger_setup_and_run(opdict) # directories grid_dir = os.path.join(base_path, 'out', opdict['outdir'], 'grid') output_dir = os.path.join(base_path, 'out', opdict['outdir']) # data files data_dir = os.path.join(base_path, 'data', opdict['datadir']) data_glob = opdict['dataglob'] kurt_glob = opdict['kurtglob'] grad_glob = opdict['gradglob'] data_files = glob.glob(os.path.join(data_dir, data_glob)) kurt_files = glob.glob(os.path.join(data_dir, kurt_glob)) grad_files = glob.glob(os.path.join(data_dir, grad_glob)) data_files.sort() kurt_files.sort() grad_files.sort() # stations stations_filename = os.path.join(base_path, 'lib', opdict['stations']) stations = read_stations_file(stations_filename) # grids grid_filename_base = os.path.join(base_path, 'lib', opdict['time_grid']) search_grid_filename = os.path.join(base_path, 'lib', opdict['search_grid']) # read time grid information time_grids = get_interpolated_time_grids(opdict) # read locations locs = read_locs_from_file(locfile) # prepare file for output of marginals f_marginals = h5py.File(locfile_hdf5, 'w') # iterate over locations for loc in locs: # create the appropriate grid on the fly # generate the grids o_time = loc['o_time'] if space_only: start_time = o_time end_time = o_time else: start_time = o_time - 3 * loc['o_err_left'] end_time = o_time + 3 * loc['o_err_right'] # make a buffer for migration start_time_migration = start_time - 10.0 end_time_migration = end_time + 10.0 # re-read grid info to ensure clean copy grid_info = read_hdr_file(search_grid_filename) # read data grad_dict, delta = read_data_compatible_with_time_dict( grad_files, time_grids, start_time_migration, end_time_migration) # do migration (all metadata on grid is added to grid_info) do_migration_loop_continuous(opdict, grad_dict, delta, start_time_migration, grid_info, time_grids, keep_grid=True) # integrate to get the marginal probability density distributions # get required info grid_starttime = grid_info['start_time'] nx, ny, nz, nt = grid_info['grid_shape'] dx, dy, dz, dt = grid_info['grid_spacing'] x_orig, y_orig, z_orig = grid_info['grid_orig'] # we are only interested in the time around the origin time of the event it_left = np.int(np.round((start_time - grid_starttime) / dt)) it_right = np.int(np.round((end_time - grid_starttime) / dt)) it_true = np.int(np.round((o_time - grid_starttime) / dt)) nt = (it_right - it_left) + 1 # set up integration axes (wrt reference) x = np.arange(nx) * dx y = np.arange(ny) * dy z = np.arange(nz) * dz if not space_only: t = np.arange(nt) * dt # open the grid file grid_filename = grid_info['dat_file'] f = h5py.File(grid_filename, 'r') stack_grid = f['stack_grid'] # extract the portion of interest (copy data) if space_only: stack_3D = np.empty((nx, ny, nz)) stack_3D[:] = stack_grid[:, it_true].reshape(nx, ny, nz) else: stack_4D = np.empty((nx, ny, nz, nt)) stack_4D[:] = stack_grid[:, it_left:it_right + 1].reshape( nx, ny, nz, nt) # close the grid file f.close() # Get expected values (normalizes grid internally) if space_only: exp_x, exp_y, exp_z, cov_matrix, prob_dict = \ compute_expected_coordinates3D(stack_3D,x,y,z,return_2Dgrids=True) else: exp_x, exp_y, exp_z, exp_t, cov_matrix, prob_dict = \ compute_expected_coordinates4D(stack_4D,x,y,z,t,return_2Dgrids=True) # put reference location back exp_x = exp_x + x_orig exp_y = exp_y + y_orig exp_z = exp_z + z_orig if space_only: exp_t = o_time else: exp_t = start_time + exp_t # extract uncertainties from covariance matrix if space_only: sig_x, sig_y, sig_z = np.sqrt(np.diagonal(cov_matrix)) sig_t = (loc['o_err_left'] + loc['o_err_right']) / 2. else: sig_x, sig_y, sig_z, sig_t = np.sqrt(np.diagonal(cov_matrix)) # save the marginals to a hdf5 file in loc subdirectory (f_marginals) # each event becomes a group in this one file grp = f_marginals.create_group(exp_t.isoformat()) grp.create_dataset('x', data=x + x_orig) grp.create_dataset('y', data=y + y_orig) grp.create_dataset('z', data=z + z_orig) grp.create_dataset('prob_x', data=prob_dict['prob_x0']) grp.create_dataset('prob_y', data=prob_dict['prob_x1']) grp.create_dataset('prob_z', data=prob_dict['prob_x2']) grp.create_dataset('prob_xy', data=prob_dict['prob_x0_x1']) grp.create_dataset('prob_xz', data=prob_dict['prob_x0_x2']) grp.create_dataset('prob_yz', data=prob_dict['prob_x1_x2']) if not space_only: grp.create_dataset('t', data=t - (o_time - start_time)) grp.create_dataset('prob_t', data=prob_dict['prob_x3']) grp.create_dataset('prob_xt', data=prob_dict['prob_x0_x3']) grp.create_dataset('prob_yt', data=prob_dict['prob_x1_x3']) grp.create_dataset('prob_zt', data=prob_dict['prob_x2_x3']) # write the expected values to a plain text locations file f_prob.write("PROB DENSITY : T = %s s pm %.2f s, x= %.4f pm %.4f km, \ y= %.4f pm %.4f km, z= %.4f pm %.4f km\n" % (exp_t.isoformat(), sig_t, \ exp_x, sig_x, exp_y, sig_y, exp_z, sig_z)) # close location files f_prob.close() f_marginals.close()
def generateSyntheticDirac(opdict,time_grids=None): # Creates the synthetic dataset for us to work with from NllGridLib import read_stations_file, read_hdr_file from migration import migrate_4D_stack, extract_max_values from hdf5_grids import get_interpolated_time_grids load_time_grids = False if time_grids==None : load_time_grids = True #define length and sampling frequency of synthetic data s_amplitude = opdict['syn_amplitude'] s_data_length = opdict['syn_datalength'] s_sample_freq = opdict['syn_samplefreq'] s_filename = opdict['syn_filename'] s_npts=int(s_data_length*s_sample_freq) s_delta=1/s_sample_freq s_kwidth=opdict['syn_kwidth'] s_nkwidth=int(round(s_kwidth*s_sample_freq)) # define origin time s_t0 = opdict['syn_otime'] base_path=opdict['base_path'] outdir=opdict['outdir'] test_grid_file=os.path.join(base_path,'out',opdict['outdir'],'grid',s_filename) test_stack_file=os.path.join(base_path,'out',opdict['outdir'],'stack','stack_all_'+s_filename) test_info_file=os.path.join(base_path,'out',opdict['outdir'],'grid','%s.info'%s_filename) fig_path = os.path.join(base_path,'out',outdir,'fig') # get filenames for time-grids and search grids grid_filename_base = os.path.join(base_path,'lib',opdict['time_grid']) search_grid_filename = os.path.join(base_path,'lib',opdict['search_grid']) stations_filename = os.path.join(base_path,'lib',opdict['stations']) stations=read_stations_file(stations_filename) if opdict.has_key('sta_list') : sta_list=opdict['sta_list'].split(',') else: sta_list=stations.keys() # get parameters for noise etc syn_addnoise=opdict['syn_addnoise'] ################################# # start setting up synthetic data ################################# grid_info=read_hdr_file(search_grid_filename) if load_time_grids: time_grids=get_interpolated_time_grids(opdict) ################################# # create synthetic data ################################# # choose hypocenter nx=grid_info['nx'] ny=grid_info['ny'] nz=grid_info['nz'] dx=grid_info['dx'] dy=grid_info['dy'] dz=grid_info['dz'] x_orig=grid_info['x_orig'] y_orig=grid_info['y_orig'] z_orig=grid_info['z_orig'] ix=opdict['syn_ix'] iy=opdict['syn_iy'] iz=opdict['syn_iz'] it=int(round(s_t0/s_delta)) # retrieve travel times for chosen hypocenter # and station list ib= ix*ny*nz + iy*nz + iz n_buf=nx*ny*nz logging.debug('ib for true hypocenter = %d'%ib) ttimes={} for sta in sta_list: if time_grids.has_key(sta): ttimes[sta]=time_grids[sta].grid_data[ib] else: logging.info('Missing travel-time information for station %s. Ignoring station...'%sta) logging.debug('Travel-times for true hypocenter = %s'%ttimes) # construct data with these travel times data={} for key,delay in ttimes.iteritems(): if syn_addnoise: s_snr=opdict['syn_snr'] s=np.random.rand(s_npts)*s_amplitude/s_snr else: s=np.zeros(s_npts) atime=s_t0+delay i_atime=np.int(atime/s_delta) if i_atime+s_nkwidth > len(s) : logging.error('syn_datalength is too small compared with geographical size of network ') s[i_atime:i_atime+s_nkwidth]=s_amplitude-np.arange(s_nkwidth)*(s_amplitude/float(s_nkwidth)) data[key]=s # DO MIGRATION logging.info('Doing migration to %s'%test_grid_file) f=h5py.File(test_grid_file,'w') stack_grid=f.create_dataset('stack_grid',(n_buf,s_npts),'f',chunks=(1,s_npts)) stack_shift_time = migrate_4D_stack(data,s_delta,time_grids,stack_grid) n_buf,nt=stack_grid.shape # add useful information to dataset for key,value in grid_info.iteritems(): stack_grid.attrs[key]=value stack_grid.attrs['dt']=s_delta stack_grid.attrs['start_time']=-stack_shift_time # extract max-stack logging.info('Extracting max_val etc. to %s'%test_stack_file) f_stack = h5py.File(test_stack_file,'w') # extract maxima extract_max_values(stack_grid,grid_info,f_stack) for name in f_stack: dset=f_stack[name] logging.debug('After extract_max_values : %s %f %f'%(name,np.max(dset),np.sum(dset))) dset.attrs['start_time']=-stack_shift_time dset.attrs['dt']=s_delta # close the stack and grid files f_stack.close() f.close() logging.info('Saved 4D grid to file %s'%test_grid_file) shifted_it=it+int(round(stack_shift_time/s_delta)) # SETUP information to pass back test_info={} test_info['dat_file']=test_grid_file test_info['stack_file']=test_stack_file test_info['grid_shape']=nx,ny,nz,nt test_info['grid_spacing']=dx,dy,dz,s_delta test_info['grid_orig']=x_orig,y_orig,z_orig test_info['true_indexes']=(ix,iy,iz,shifted_it) test_info['start_time']=-stack_shift_time logging.debug(test_info) f=open(test_info_file,'w') f.write(str(test_info)) return test_info
def generateSyntheticDirac(opdict, time_grids=None): # Creates the synthetic dataset for us to work with from NllGridLib import read_stations_file, read_hdr_file from migration import migrate_4D_stack, extract_max_values from hdf5_grids import get_interpolated_time_grids load_time_grids = False if time_grids == None: load_time_grids = True #define length and sampling frequency of synthetic data s_amplitude = opdict['syn_amplitude'] s_data_length = opdict['syn_datalength'] s_sample_freq = opdict['syn_samplefreq'] s_filename = opdict['syn_filename'] s_npts = int(s_data_length * s_sample_freq) s_delta = 1 / s_sample_freq s_kwidth = opdict['syn_kwidth'] s_nkwidth = int(round(s_kwidth * s_sample_freq)) # define origin time s_t0 = opdict['syn_otime'] base_path = opdict['base_path'] outdir = opdict['outdir'] test_grid_file = os.path.join(base_path, 'out', opdict['outdir'], 'grid', s_filename) test_stack_file = os.path.join(base_path, 'out', opdict['outdir'], 'stack', 'stack_all_' + s_filename) test_info_file = os.path.join(base_path, 'out', opdict['outdir'], 'grid', '%s.info' % s_filename) fig_path = os.path.join(base_path, 'out', outdir, 'fig') # get filenames for time-grids and search grids grid_filename_base = os.path.join(base_path, 'lib', opdict['time_grid']) search_grid_filename = os.path.join(base_path, 'lib', opdict['search_grid']) stations_filename = os.path.join(base_path, 'lib', opdict['stations']) stations = read_stations_file(stations_filename) if opdict.has_key('sta_list'): sta_list = opdict['sta_list'].split(',') else: sta_list = stations.keys() # get parameters for noise etc syn_addnoise = opdict['syn_addnoise'] ################################# # start setting up synthetic data ################################# grid_info = read_hdr_file(search_grid_filename) if load_time_grids: time_grids = get_interpolated_time_grids(opdict) ################################# # create synthetic data ################################# # choose hypocenter nx = grid_info['nx'] ny = grid_info['ny'] nz = grid_info['nz'] dx = grid_info['dx'] dy = grid_info['dy'] dz = grid_info['dz'] x_orig = grid_info['x_orig'] y_orig = grid_info['y_orig'] z_orig = grid_info['z_orig'] ix = opdict['syn_ix'] iy = opdict['syn_iy'] iz = opdict['syn_iz'] it = int(round(s_t0 / s_delta)) # retrieve travel times for chosen hypocenter # and station list ib = ix * ny * nz + iy * nz + iz n_buf = nx * ny * nz logging.debug('ib for true hypocenter = %d' % ib) ttimes = {} for sta in sta_list: if time_grids.has_key(sta): ttimes[sta] = time_grids[sta].grid_data[ib] else: logging.info( 'Missing travel-time information for station %s. Ignoring station...' % sta) logging.debug('Travel-times for true hypocenter = %s' % ttimes) # construct data with these travel times data = {} for key, delay in ttimes.iteritems(): if syn_addnoise: s_snr = opdict['syn_snr'] s = np.random.rand(s_npts) * s_amplitude / s_snr else: s = np.zeros(s_npts) atime = s_t0 + delay i_atime = np.int(atime / s_delta) if i_atime + s_nkwidth > len(s): logging.error( 'syn_datalength is too small compared with geographical size of network ' ) s[i_atime:i_atime + s_nkwidth] = s_amplitude - np.arange(s_nkwidth) * (s_amplitude / float(s_nkwidth)) data[key] = s # DO MIGRATION logging.info('Doing migration to %s' % test_grid_file) f = h5py.File(test_grid_file, 'w') stack_grid = f.create_dataset('stack_grid', (n_buf, s_npts), 'f', chunks=(1, s_npts)) stack_shift_time = migrate_4D_stack(data, s_delta, time_grids, stack_grid) n_buf, nt = stack_grid.shape # add useful information to dataset for key, value in grid_info.iteritems(): stack_grid.attrs[key] = value stack_grid.attrs['dt'] = s_delta stack_grid.attrs['start_time'] = -stack_shift_time # extract max-stack logging.info('Extracting max_val etc. to %s' % test_stack_file) f_stack = h5py.File(test_stack_file, 'w') # extract maxima extract_max_values(stack_grid, grid_info, f_stack) for name in f_stack: dset = f_stack[name] logging.debug('After extract_max_values : %s %f %f' % (name, np.max(dset), np.sum(dset))) dset.attrs['start_time'] = -stack_shift_time dset.attrs['dt'] = s_delta # close the stack and grid files f_stack.close() f.close() logging.info('Saved 4D grid to file %s' % test_grid_file) shifted_it = it + int(round(stack_shift_time / s_delta)) # SETUP information to pass back test_info = {} test_info['dat_file'] = test_grid_file test_info['stack_file'] = test_stack_file test_info['grid_shape'] = nx, ny, nz, nt test_info['grid_spacing'] = dx, dy, dz, s_delta test_info['grid_orig'] = x_orig, y_orig, z_orig test_info['true_indexes'] = (ix, iy, iz, shifted_it) test_info['start_time'] = -stack_shift_time logging.debug(test_info) f = open(test_info_file, 'w') f.write(str(test_info)) return test_info
def do_locations_prob_setup_and_run(opdict): """ Setup and run probability-based locations on migration grids. Takes all parameters from WavelocOptions.opdict. :param opdict: Parameters and options for Waveloc. """ # get / set info base_path = opdict["base_path"] space_only = opdict["probloc_spaceonly"] locfile = os.path.join(base_path, "out", opdict["outdir"], "loc", "locations.dat") locfile_prob = os.path.join(base_path, "out", opdict["outdir"], "loc", "locations_prob.dat") locfile_hdf5 = os.path.join(base_path, "out", opdict["outdir"], "loc", "locations_prob.hdf5") f_prob = open(locfile_prob, "w") # if locfile does not exist then make it by running trigger location if not os.path.exists(locfile): logging.info( "No location found at %s. Running trigger location \ first..." % locfile ) do_locations_trigger_setup_and_run(opdict) # data files data_dir = os.path.join(base_path, "data", opdict["datadir"]) data_glob = opdict["dataglob"] kurt_glob = opdict["kurtglob"] grad_glob = opdict["gradglob"] data_files = glob.glob(os.path.join(data_dir, data_glob)) kurt_files = glob.glob(os.path.join(data_dir, kurt_glob)) grad_files = glob.glob(os.path.join(data_dir, grad_glob)) data_files.sort() kurt_files.sort() grad_files.sort() # grids search_grid_filename = os.path.join(base_path, "lib", opdict["search_grid"]) # read time grid information time_grids = get_interpolated_time_grids(opdict) # read locations locs = read_locs_from_file(locfile) # prepare file for output of marginals f_marginals = h5py.File(locfile_hdf5, "w") # iterate over locations for loc in locs: # create the appropriate grid on the fly # generate the grids o_time = loc["o_time"] if space_only: start_time = o_time end_time = o_time else: start_time = o_time - 3 * loc["o_err_left"] end_time = o_time + 3 * loc["o_err_right"] # make a buffer for migration start_time_migration = start_time - 10.0 end_time_migration = end_time + 10.0 # re-read grid info to ensure clean copy grid_info = read_hdr_file(search_grid_filename) # read data grad_dict, delta = read_data_compatible_with_time_dict( grad_files, time_grids, start_time_migration, end_time_migration ) # do migration (all metadata on grid is added to grid_info) do_migration_loop_continuous( opdict, grad_dict, delta, start_time_migration, grid_info, time_grids, keep_grid=True ) # integrate to get the marginal probability density distributions # get required info grid_starttime = grid_info["start_time"] nx, ny, nz, nt = grid_info["grid_shape"] dx, dy, dz, dt = grid_info["grid_spacing"] x_orig, y_orig, z_orig = grid_info["grid_orig"] # we are only interested in the time around the origin time of the # event it_left = np.int(np.round((start_time - grid_starttime) / dt)) it_right = np.int(np.round((end_time - grid_starttime) / dt)) it_true = np.int(np.round((o_time - grid_starttime) / dt)) nt = (it_right - it_left) + 1 # set up integration axes (wrt reference) x = np.arange(nx) * dx y = np.arange(ny) * dy z = np.arange(nz) * dz if not space_only: t = np.arange(nt) * dt # open the grid file grid_filename = grid_info["dat_file"] f = h5py.File(grid_filename, "r") stack_grid = f["stack_grid"] # extract the portion of interest (copy data) if space_only: stack_3D = np.empty((nx, ny, nz)) stack_3D[:] = stack_grid[:, it_true].reshape(nx, ny, nz) else: stack_4D = np.empty((nx, ny, nz, nt)) stack_4D[:] = stack_grid[:, it_left : it_right + 1].reshape(nx, ny, nz, nt) # close the grid file f.close() # Get expected values (normalizes grid internally) if space_only: exp_x, exp_y, exp_z, cov_matrix, prob_dict = compute_expected_coordinates3D( stack_3D, x, y, z, return_2Dgrids=True ) else: exp_x, exp_y, exp_z, exp_t, cov_matrix, prob_dict = compute_expected_coordinates4D( stack_4D, x, y, z, t, return_2Dgrids=True ) # put reference location back exp_x = exp_x + x_orig exp_y = exp_y + y_orig exp_z = exp_z + z_orig if space_only: exp_t = o_time else: exp_t = start_time + exp_t # extract uncertainties from covariance matrix if space_only: sig_x, sig_y, sig_z = np.sqrt(np.diagonal(cov_matrix)) sig_t = (loc["o_err_left"] + loc["o_err_right"]) / 2.0 else: sig_x, sig_y, sig_z, sig_t = np.sqrt(np.diagonal(cov_matrix)) # save the marginals to a hdf5 file in loc subdirectory (f_marginals) # each event becomes a group in this one file grp = f_marginals.create_group(exp_t.isoformat()) grp.create_dataset("x", data=x + x_orig) grp.create_dataset("y", data=y + y_orig) grp.create_dataset("z", data=z + z_orig) grp.create_dataset("prob_x", data=prob_dict["prob_x0"]) grp.create_dataset("prob_y", data=prob_dict["prob_x1"]) grp.create_dataset("prob_z", data=prob_dict["prob_x2"]) grp.create_dataset("prob_xy", data=prob_dict["prob_x0_x1"]) grp.create_dataset("prob_xz", data=prob_dict["prob_x0_x2"]) grp.create_dataset("prob_yz", data=prob_dict["prob_x1_x2"]) if not space_only: grp.create_dataset("t", data=t - (o_time - start_time)) grp.create_dataset("prob_t", data=prob_dict["prob_x3"]) grp.create_dataset("prob_xt", data=prob_dict["prob_x0_x3"]) grp.create_dataset("prob_yt", data=prob_dict["prob_x1_x3"]) grp.create_dataset("prob_zt", data=prob_dict["prob_x2_x3"]) # write the expected values to a plain text locations file f_prob.write( "PROB DENSITY : T = %s s pm %.2f s, x= %.4f pm %.4f km, \ y= %.4f pm %.4f km, z= %.4f pm %.4f km\n" % (exp_t.isoformat(), sig_t, exp_x, sig_x, exp_y, sig_y, exp_z, sig_z) ) # close location files f_prob.close() f_marginals.close()
def do_migration_setup_and_run(opdict): base_path=opdict['base_path'] verbose=opdict['verbose'] runtime=opdict['time'] reloc=opdict['reloc'] # stations stations_filename=os.path.join(base_path,'lib',opdict['stations']) stations=read_stations_file(stations_filename) # output directory output_dir=os.path.join(base_path,'out',opdict['outdir']) stack_dir=os.path.join(output_dir,'stack') # data data_dir=os.path.join(base_path,'data',opdict['datadir']) if opdict['kderiv']: data_glob=opdict['gradglob'] if opdict['gauss']: data_glob=opdict['gaussglob'] else: data_glob=opdict['kurtglob'] data_files=glob.glob(os.path.join(data_dir,data_glob)) data_files.sort() if len(data_files)==0: logging.error('No data files found for %s and %s'%(data_dir,data_glob)) raise UserWarning # grids grid_filename_base=os.path.join(base_path,'lib',opdict['time_grid']) search_grid_filename=os.path.join(base_path,'lib',opdict['search_grid']) time_grids=get_interpolated_time_grids(opdict) #start and end times starttime=opdict['starttime'] endtime=opdict['endtime'] data_length=opdict['data_length'] data_overlap=opdict['data_overlap'] initial_start_time=utcdatetime.UTCDateTime(starttime) initial_end_time=initial_start_time+data_length final_end_time=utcdatetime.UTCDateTime(endtime) time_shift_secs=data_length-data_overlap ######### FOR EACH TIME SPAN - DO MIGRATION ############# # start loop over time start_time=initial_start_time end_time=initial_end_time if runtime: t_ref=time() while (start_time < final_end_time): # read data logging.info("Reading data : %s - %s."%(start_time.isoformat(), end_time.isoformat())) data,delta=read_data_compatible_with_time_dict(data_files,time_grids,start_time,end_time) print len(data_files) if reloc: tr_glob=opdict['kurtglob'] files=glob.glob(os.path.join(data_dir,tr_glob)) traces,delta=read_data_compatible_with_time_dict(files,time_grids,start_time,end_time) sta_list=sorted(traces) for staname in sta_list: snr=np.max(traces[staname])/np.mean(np.abs(traces[staname])) if snr < opdict['reloc_snr']: data[staname]=np.zeros(len(data[staname])) # re-read grid_info at each iteration to make sure it is a clean copy grid_info=read_hdr_file(search_grid_filename) # do migration if have enough data (3 is bare minimum) if len(data.keys())>=3: logging.info("Migrating data : %s - %s."%(start_time.isoformat(), end_time.isoformat())) do_migration_loop_continuous(opdict, data, delta, start_time, grid_info, time_grids) elif len(data.keys())==0: logging.warn('No data found between %s and %s.'%(start_time.isoformat(),end_time.isoformat())) else: logging.warn('Insufficient data found between %s and %s.'%(start_time.isoformat(),end_time.isoformat())) # Reset the start and end times to loop again start_time=start_time+time_shift_secs end_time=end_time+time_shift_secs if runtime: t=time()-t_ref logging.info("Time for migrating all time slices : %.2f s\n" % (t))
def do_plotting_setup_and_run(opdict, plot_wfm=True, plot_grid=True): """ Plot the results of a wavloc run (migration and location). All options and parameters are taken from an opdict. :param opdict: WavlocOptions.opdict that contains the options / parameters. :param plot_wfm: If ``True`` plots waveforms after location (filtered data and kurtosis). :param plot_grid: If ``True``plots the migration grid. :type plot_wfm: boolean :type plot_grid: boolean """ # get / set info base_path = opdict['base_path'] locfile = os.path.join(base_path, 'out', opdict['outdir'], 'loc', 'locations.dat') stackfile = os.path.join(base_path, 'out', opdict['outdir'], 'stack', 'combined_stack_all.hdf5') data_dir = os.path.join(base_path, 'data', opdict['datadir']) data_glob = opdict['dataglob'] data_files = glob.glob(os.path.join(data_dir, data_glob)) data_files.sort() kurt_glob = opdict['kurtglob'] kurt_files = glob.glob(os.path.join(data_dir, kurt_glob)) kurt_files.sort() mig_files = kurt_files if opdict['kderiv']: grad_glob = opdict['gradglob'] grad_files = glob.glob(os.path.join(data_dir, grad_glob)) grad_files.sort() mig_files = grad_files if opdict['gauss']: gauss_glob = opdict['gaussglob'] gauss_files = glob.glob(os.path.join(data_dir, gauss_glob)) gauss_files.sort() mig_files = gauss_files figdir = os.path.join(base_path, 'out', opdict['outdir'], 'fig') # grids search_grid_filename = os.path.join(base_path, 'lib', opdict['search_grid']) # read time grid information time_grids = get_interpolated_time_grids(opdict) # read locations locs = read_locs_from_file(locfile) # open stack file f_stack = h5py.File(stackfile, 'r') max_val = f_stack['max_val_smooth'] stack_start_time = UTCDateTime(max_val.attrs['start_time']) for loc in locs: # generate the grids o_time = loc['o_time'] start_time = o_time-opdict['plot_tbefore'] end_time = o_time+opdict['plot_tafter'] # re-read grid info to ensure clean copy grid_info = read_hdr_file(search_grid_filename) x = loc['x_mean'] y = loc['y_mean'] z = loc['z_mean'] # get the corresponding travel-times for time-shifting ttimes = {} for sta in time_grids.keys(): ttimes[sta] = time_grids[sta].value_at_point(x, y, z) tshift_migration = max(ttimes.values()) start_time_migration = start_time-tshift_migration end_time_migration = end_time+tshift_migration if plot_grid: logging.info('Plotting grid for location %s' % o_time.isoformat()) # read data mig_dict, delta = \ read_data_compatible_with_time_dict(mig_files, time_grids, start_time_migration, end_time_migration) # do migration do_migration_loop_continuous(opdict, mig_dict, delta, start_time_migration, grid_info, time_grids, keep_grid=True) # plot plotLocationGrid(loc, grid_info, figdir, opdict['plot_otime_window']) if plot_wfm: logging.info('Plotting waveforms for location %s' % o_time.isoformat()) # read data data_dict, delta = \ read_data_compatible_with_time_dict(data_files, time_grids, start_time_migration, end_time_migration) mig_dict, delta = \ read_data_compatible_with_time_dict(mig_files, time_grids, start_time_migration, end_time_migration) # cut desired portion out of data for sta in data_dict.keys(): tmp = data_dict[sta] # alignment on origin time istart = np.int(np.round((start_time + ttimes[sta] - start_time_migration) / delta)) iend = istart + np.int(np.round((opdict['plot_tbefore'] + opdict['plot_tafter']) / delta)) # sanity check in case event is close to start or end of data if istart < 0: istart = 0 if iend > len(tmp): iend = len(tmp) data_dict[sta] = tmp[istart:iend] # do slice tmp = mig_dict[sta] mig_dict[sta] = tmp[istart:iend] # retrieve relevant portion of stack max istart = np.int(np.round((o_time - opdict['plot_tbefore'] - stack_start_time) / delta)) iend = istart + np.int(np.round((opdict['plot_tbefore'] + opdict['plot_tafter']) / delta)) # sanity check in case event is close to start or end of data if istart < 0: start_time = start_time + np.abs(istart)*delta istart = 0 if iend > len(max_val): iend = len(max_val) # do slice stack_wfm = max_val[istart:iend] # plot plotLocationWaveforms(loc, start_time, delta, data_dict, mig_dict, stack_wfm, figdir) f_stack.close()
def do_double_diff_setup_and_run(opdict): """ Do double difference (outer routine). Takes options from a WavelocOptions.opdict dictionary. :param opdict: Dictionary of parameters and options """ base_path = opdict['base_path'] verbose = opdict['verbose'] dd_loc = opdict['dd_loc'] # Station stations_filename = os.path.join(base_path, 'lib', opdict['stations']) stations = read_stations_file(stations_filename) # Location file locdir = os.path.join(base_path, 'out', opdict['outdir'], 'loc') loc_filename = os.path.join(locdir, 'locations.dat') locs = read_locs_from_file(loc_filename) opdict = read_header_from_file(loc_filename, opdict) # ------------------------------------------------------------------------ # search grid search_grid_filename = os.path.join(base_path, 'lib', opdict['search_grid']) # traveltimes grid grid_info = read_hdr_file(search_grid_filename) time_grids = get_interpolated_time_grids(opdict) # Extract the UTM coordinates of the area of study xstart = grid_info['x_orig'] xend = xstart + grid_info['nx'] * grid_info['dx'] ystart = grid_info['y_orig'] yend = ystart + grid_info['ny'] * grid_info['dy'] zend = -grid_info['z_orig'] zstart = -(-zend + grid_info['nz'] * grid_info['dz']) area = [xstart, xend, ystart, yend, zstart, zend] # ------------------------------------------------------------------------ nbmin = int(opdict['nbsta']) threshold = float(opdict['clus']) # Correlation, time delay and cluster files corr_file = os.path.join(locdir, opdict['xcorr_corr']) cfile = BinaryFile(corr_file) coeff = cfile.read_binary_file() delay_file = os.path.join(locdir, opdict['xcorr_delay']) dfile = BinaryFile(delay_file) delay = dfile.read_binary_file() cluster_file = os.path.join(locdir, 'cluster-%s-%s' % (str(threshold), str(nbmin))) clfile = BinaryFile(cluster_file) cluster = clfile.read_binary_file() # ------------------------------------------------------------------------ # Input parameters len_cluster_min = 2 if dd_loc: new_loc_filename = os.path.join(locdir, 'relocations.dat') new_loc_file = open(new_loc_filename, 'w') write_header_options(new_loc_file, opdict) # ------------------------------------------------------------------------ # Iterate over clusters for i in cluster.keys(): print "CLUSTER %d:" % i, cluster[i], len(cluster[i]) N = len(cluster[i]) # Hypocentral parameters to be changed x, y, z, z_ph, to = coord_cluster(cluster[i], locs) # Replace bad locations by the centroid coordinates centroid_x = np.mean(x) centroid_y = np.mean(y) centroid_z = np.mean(z) for ii in range(len(cluster[i])): if np.abs(x[ii] - centroid_x) > .75: x[ii] = centroid_x if np.abs(y[ii] - centroid_y) > .75: y[ii] = centroid_y if np.abs(z[ii] - centroid_z) > .75: z[ii] = centroid_z if N > len_cluster_min: # Theroretical traveltimes and arrival times t_th, arr_times = traveltimes(x, y, z, to, stations, time_grids) # do double difference location x, y, z, to = do_double_diff(x, y, z, to, stations, coeff, delay, cluster[i], threshold, t_th, arr_times) if verbose: from clustering import compute_nbsta nbsta = compute_nbsta(len(locs), coeff, threshold) plot_events(cluster, locs, stations, x, y, z, i, threshold, nbmin, area, nbsta) if dd_loc: ind = 0 for j in cluster[i]: locs[j - 1]['x_mean'] = x[ind] locs[j - 1]['y_mean'] = y[ind] locs[j - 1]['z_mean'] = z[ind] locs[j - 1]['o_time'] = to[ind] locs[j - 1]['x_sigma'] = 0 locs[j - 1]['y_sigma'] = 0 locs[j - 1]['z_sigma'] = 0 locs[j - 1]['o_err_right'] = 0 locs[j - 1]['o_err_left'] = 0 ind += 1 new_loc_file.write( "Max = %.2f, %s - %.2f s + %.2f s, x= %.4f pm\ %.4f km, y= %.4f pm %.4f km, z= %.4f pm %.4f km\n" % (locs[j - 1]['max_trig'], locs[j - 1]['o_time'].isoformat(), locs[j - 1]['o_err_left'], locs[j - 1]['o_err_right'], locs[j - 1]['x_mean'], locs[j - 1]['x_sigma'], locs[j - 1]['y_mean'], locs[j - 1]['y_sigma'], locs[j - 1]['z_mean'], locs[j - 1]['z_sigma'])) if dd_loc: new_loc_file.close()
def get_interpolated_time_grids(opdict): """ Interpolates the NLL time grids onto the search grids. Uses options contained in a WavelocOptions.opdict. :param opdict: Dictionary of options in WavelocOptions.opdict format """ import glob from NllGridLib import read_hdr_file base_path = opdict['base_path'] full_time_grids = glob.glob( os.path.join(base_path, 'lib', opdict['time_grid'] + '*.hdf5')) full_time_grids.sort() if len(full_time_grids) == 0: raise UserWarning('No .hdf5 time grids found in directory %s' % (os.path.join(base_path, 'lib'))) # read the search grid search_grid = os.path.join(base_path, 'lib', opdict['search_grid']) tgrid_dir = os.path.join(base_path, 'out', opdict['outdir'], 'time_grids') if not os.path.exists(tgrid_dir): os.makedirs(tgrid_dir) search_info = read_hdr_file(search_grid) time_grids = {} # for each of the full-length time grids logging.info('Loading time grids ... ') for f_timegrid in full_time_grids: f_basename = os.path.basename(f_timegrid) # get the filename of the corresponding short-length grid (the one for # the search grid in particular) tgrid_filename = os.path.join(tgrid_dir, f_basename) # if file exists and we want to load it, then open the file and give it # to the dictionary if os.path.isfile(tgrid_filename) and opdict['load_ttimes_buf']: logging.debug('Loading %s' % tgrid_filename) grid = H5SingleGrid(tgrid_filename) name = grid.grid_info['station'] time_grids[name] = grid # if the file does not exist, or want to force re-creation, then create # it if not os.path.isfile(tgrid_filename) or not opdict['load_ttimes_buf']: logging.info('Creating %s - Please be patient' % tgrid_filename) full_grid = H5SingleGrid(f_timegrid) # copy the common part of the grid info new_info = {} for name, value in full_grid.grid_info.iteritems(): new_info[name] = value # set the new part of the grid info to correspond to the search # grid new_info['x_orig'] = search_info['x_orig'] new_info['y_orig'] = search_info['y_orig'] new_info['z_orig'] = search_info['z_orig'] new_info['nx'] = search_info['nx'] new_info['ny'] = search_info['ny'] new_info['nz'] = search_info['nz'] new_info['dx'] = search_info['dx'] new_info['dy'] = search_info['dy'] new_info['dz'] = search_info['dz'] # do interpolation grid = full_grid.interp_to_newgrid(tgrid_filename, new_info) # add to dictionary name = grid.grid_info['station'] time_grids[name] = grid # close full grid safely del full_grid return time_grids
def do_double_diff_setup_and_run(opdict): """ Do double difference (outer routine). Takes options from a WavelocOptions.opdict dictionary. :param opdict: Dictionary of parameters and options """ base_path = opdict['base_path'] verbose = opdict['verbose'] dd_loc = opdict['dd_loc'] # Station stations_filename = os.path.join(base_path, 'lib', opdict['stations']) stations = read_stations_file(stations_filename) # Location file locdir = os.path.join(base_path, 'out', opdict['outdir'], 'loc') loc_filename = os.path.join(locdir, 'locations.dat') locs = read_locs_from_file(loc_filename) opdict = read_header_from_file(loc_filename, opdict) # ------------------------------------------------------------------------ # search grid search_grid_filename = os.path.join(base_path, 'lib', opdict['search_grid']) # traveltimes grid grid_info = read_hdr_file(search_grid_filename) time_grids = get_interpolated_time_grids(opdict) # Extract the UTM coordinates of the area of study xstart = grid_info['x_orig'] xend = xstart+grid_info['nx']*grid_info['dx'] ystart = grid_info['y_orig'] yend = ystart+grid_info['ny']*grid_info['dy'] zend = -grid_info['z_orig'] zstart = -(-zend+grid_info['nz']*grid_info['dz']) area = [xstart, xend, ystart, yend, zstart, zend] # ------------------------------------------------------------------------ nbmin = int(opdict['nbsta']) threshold = float(opdict['clus']) # Correlation, time delay and cluster files corr_file = os.path.join(locdir, opdict['xcorr_corr']) cfile = BinaryFile(corr_file) coeff = cfile.read_binary_file() delay_file = os.path.join(locdir, opdict['xcorr_delay']) dfile = BinaryFile(delay_file) delay = dfile.read_binary_file() cluster_file = os.path.join(locdir, 'cluster-%s-%s' % (str(threshold), str(nbmin))) clfile = BinaryFile(cluster_file) cluster = clfile.read_binary_file() # ------------------------------------------------------------------------ # Input parameters len_cluster_min = 2 if dd_loc: new_loc_filename = os.path.join(locdir, 'relocations.dat') new_loc_file = open(new_loc_filename, 'w') write_header_options(new_loc_file, opdict) # ------------------------------------------------------------------------ # Iterate over clusters for i in cluster.keys(): print "CLUSTER %d:" % i, cluster[i], len(cluster[i]) N = len(cluster[i]) # Hypocentral parameters to be changed x, y, z, z_ph, to = coord_cluster(cluster[i], locs) # Replace bad locations by the centroid coordinates centroid_x = np.mean(x) centroid_y = np.mean(y) centroid_z = np.mean(z) for ii in range(len(cluster[i])): if np.abs(x[ii]-centroid_x) > .75: x[ii] = centroid_x if np.abs(y[ii]-centroid_y) > .75: y[ii] = centroid_y if np.abs(z[ii]-centroid_z) > .75: z[ii] = centroid_z if N > len_cluster_min: # Theroretical traveltimes and arrival times t_th, arr_times = traveltimes(x, y, z, to, stations, time_grids) # do double difference location x, y, z, to = do_double_diff(x, y, z, to, stations, coeff, delay, cluster[i], threshold, t_th, arr_times) if verbose: from clustering import compute_nbsta nbsta = compute_nbsta(len(locs), coeff, threshold) plot_events(cluster, locs, stations, x, y, z, i, threshold, nbmin, area, nbsta) if dd_loc: ind = 0 for j in cluster[i]: locs[j-1]['x_mean'] = x[ind] locs[j-1]['y_mean'] = y[ind] locs[j-1]['z_mean'] = z[ind] locs[j-1]['o_time'] = to[ind] locs[j-1]['x_sigma'] = 0 locs[j-1]['y_sigma'] = 0 locs[j-1]['z_sigma'] = 0 locs[j-1]['o_err_right'] = 0 locs[j-1]['o_err_left'] = 0 ind += 1 new_loc_file.write("Max = %.2f, %s - %.2f s + %.2f s, x= %.4f pm\ %.4f km, y= %.4f pm %.4f km, z= %.4f pm %.4f km\n" % (locs[j-1]['max_trig'], locs[j-1]['o_time'].isoformat(), locs[j-1]['o_err_left'], locs[j-1]['o_err_right'], locs[j-1]['x_mean'], locs[j-1]['x_sigma'], locs[j-1]['y_mean'], locs[j-1]['y_sigma'], locs[j-1]['z_mean'], locs[j-1]['z_sigma'])) if dd_loc: new_loc_file.close()
def do_locations_prob_setup_and_run(opdict): # get / set info base_path=opdict['base_path'] space_only = opdict['probloc_spaceonly'] locfile=os.path.join(base_path,'out',opdict['outdir'],'loc','locations.dat') locfile_prob=os.path.join(base_path,'out',opdict['outdir'],'loc','locations_prob.dat') locfile_hdf5=os.path.join(base_path,'out',opdict['outdir'],'loc','locations_prob.hdf5') f_prob=open(locfile_prob,'w') # if locfile does not exist then make it by running trigger location if not os.path.exists(locfile): logging.info('No location found at %s. Running trigger location first...'%locfile) do_locations_trigger_setup_and_run(opdict) # directories grid_dir=os.path.join(base_path,'out',opdict['outdir'],'grid') output_dir=os.path.join(base_path,'out',opdict['outdir']) # data files data_dir=os.path.join(base_path,'data',opdict['datadir']) data_glob=opdict['dataglob'] kurt_glob=opdict['kurtglob'] grad_glob=opdict['gradglob'] data_files=glob.glob(os.path.join(data_dir,data_glob)) kurt_files=glob.glob(os.path.join(data_dir,kurt_glob)) grad_files=glob.glob(os.path.join(data_dir,grad_glob)) data_files.sort() kurt_files.sort() grad_files.sort() # stations stations_filename=os.path.join(base_path,'lib',opdict['stations']) stations=read_stations_file(stations_filename) # grids grid_filename_base=os.path.join(base_path,'lib',opdict['time_grid']) search_grid_filename=os.path.join(base_path,'lib',opdict['search_grid']) # read time grid information time_grids=get_interpolated_time_grids(opdict) # read locations locs=read_locs_from_file(locfile) # prepare file for output of marginals f_marginals = h5py.File(locfile_hdf5,'w') # iterate over locations for loc in locs: # create the appropriate grid on the fly # generate the grids o_time=loc['o_time'] if space_only: start_time=o_time end_time =o_time else: start_time=o_time-3*loc['o_err_left'] end_time=o_time+3*loc['o_err_right'] # make a buffer for migration start_time_migration = start_time - 10.0 end_time_migration = end_time + 10.0 # re-read grid info to ensure clean copy grid_info=read_hdr_file(search_grid_filename) # read data grad_dict,delta = read_data_compatible_with_time_dict(grad_files, time_grids, start_time_migration, end_time_migration) # do migration (all metadata on grid is added to grid_info) do_migration_loop_continuous(opdict, grad_dict, delta, start_time_migration, grid_info, time_grids, keep_grid=True) # integrate to get the marginal probability density distributions # get required info grid_starttime=grid_info['start_time'] nx,ny,nz,nt=grid_info['grid_shape'] dx,dy,dz,dt=grid_info['grid_spacing'] x_orig,y_orig,z_orig=grid_info['grid_orig'] # we are only interested in the time around the origin time of the event it_left = np.int(np.round((start_time - grid_starttime)/dt)) it_right = np.int(np.round((end_time - grid_starttime)/dt)) it_true = np.int(np.round((o_time - grid_starttime)/dt)) nt=(it_right-it_left)+1 # set up integration axes (wrt reference) x=np.arange(nx)*dx y=np.arange(ny)*dy z=np.arange(nz)*dz if not space_only: t=np.arange(nt)*dt # open the grid file grid_filename=grid_info['dat_file'] f=h5py.File(grid_filename,'r') stack_grid=f['stack_grid'] # extract the portion of interest (copy data) if space_only: stack_3D=np.empty((nx,ny,nz)) stack_3D[:] = stack_grid[:,it_true].reshape(nx,ny,nz) else: stack_4D=np.empty((nx,ny,nz,nt)) stack_4D[:] = stack_grid[:,it_left:it_right+1].reshape(nx,ny,nz,nt) # close the grid file f.close() # Get expected values (normalizes grid internally) if space_only: exp_x, exp_y, exp_z, cov_matrix, prob_dict = \ compute_expected_coordinates3D(stack_3D,x,y,z,return_2Dgrids=True) else: exp_x, exp_y, exp_z, exp_t, cov_matrix, prob_dict = \ compute_expected_coordinates4D(stack_4D,x,y,z,t,return_2Dgrids=True) # put reference location back exp_x = exp_x + x_orig exp_y = exp_y + y_orig exp_z = exp_z + z_orig if space_only: exp_t = o_time else: exp_t = start_time + exp_t # extract uncertainties from covariance matrix if space_only: sig_x,sig_y,sig_z = np.sqrt(np.diagonal(cov_matrix)) sig_t = (loc['o_err_left']+loc['o_err_right'])/2. else: sig_x,sig_y,sig_z,sig_t = np.sqrt(np.diagonal(cov_matrix)) # save the marginals to a hdf5 file in loc subdirectory (f_marginals) # each event becomes a group in this one file grp = f_marginals.create_group(exp_t.isoformat()) grp.create_dataset('x',data=x+x_orig) grp.create_dataset('y',data=y+y_orig) grp.create_dataset('z',data=z+z_orig) grp.create_dataset('prob_x',data=prob_dict['prob_x0']) grp.create_dataset('prob_y',data=prob_dict['prob_x1']) grp.create_dataset('prob_z',data=prob_dict['prob_x2']) grp.create_dataset('prob_xy',data=prob_dict['prob_x0_x1']) grp.create_dataset('prob_xz',data=prob_dict['prob_x0_x2']) grp.create_dataset('prob_yz',data=prob_dict['prob_x1_x2']) if not space_only: grp.create_dataset('t',data=t-(o_time - start_time)) grp.create_dataset('prob_t',data=prob_dict['prob_x3']) grp.create_dataset('prob_xt',data=prob_dict['prob_x0_x3']) grp.create_dataset('prob_yt',data=prob_dict['prob_x1_x3']) grp.create_dataset('prob_zt',data=prob_dict['prob_x2_x3']) # write the expected values to a plain text locations file f_prob.write("PROB DENSITY : T = %s s pm %.2f s, x= %.4f pm %.4f km, \ y= %.4f pm %.4f km, z= %.4f pm %.4f km\n" % (exp_t.isoformat(), sig_t, \ exp_x, sig_x, exp_y, sig_y, exp_z, sig_z)) # close location files f_prob.close() f_marginals.close()
def do_plotting_setup_and_run(opdict,plot_wfm=True,plot_grid=True): # get / set info base_path=opdict['base_path'] locfile=os.path.join(base_path,'out',opdict['outdir'],'loc','locations.dat') stackfile=os.path.join(base_path,'out',opdict['outdir'],'stack','combined_stack_all.hdf5') grid_dir=os.path.join(base_path,'out',opdict['outdir'],'grid') output_dir=os.path.join(base_path,'out',opdict['outdir']) data_dir=os.path.join(base_path,'data',opdict['datadir']) data_glob=opdict['dataglob'] data_files=glob.glob(os.path.join(data_dir,data_glob)) data_files.sort() kurt_glob=opdict['kurtglob'] kurt_files=glob.glob(os.path.join(data_dir,kurt_glob)) kurt_files.sort() mig_files=kurt_files if opdict['kderiv']: grad_glob=opdict['gradglob'] grad_files=glob.glob(os.path.join(data_dir,grad_glob)) grad_files.sort() mig_files=grad_files if opdict['gauss']: gauss_glob=opdict['gaussglob'] gauss_files=glob.glob(os.path.join(data_dir,gauss_glob)) gauss_files.sort() mig_files=gauss_files figdir=os.path.join(base_path,'out',opdict['outdir'],'fig') # stations stations_filename=os.path.join(base_path,'lib',opdict['stations']) stations=read_stations_file(stations_filename) # grids grid_filename_base=os.path.join(base_path,'lib',opdict['time_grid']) search_grid_filename=os.path.join(base_path,'lib',opdict['search_grid']) # read time grid information time_grids=get_interpolated_time_grids(opdict) # read locations locs=read_locs_from_file(locfile) # open stack file f_stack=h5py.File(stackfile,'r') max_val=f_stack['max_val_smooth'] stack_start_time=UTCDateTime(max_val.attrs['start_time']) for loc in locs: # generate the grids o_time=loc['o_time'] start_time=o_time-opdict['plot_tbefore'] end_time=o_time+opdict['plot_tafter'] # re-read grid info to ensure clean copy grid_info=read_hdr_file(search_grid_filename) nx=grid_info['nx'] ny=grid_info['ny'] nz=grid_info['nz'] dx=grid_info['dx'] dy=grid_info['dy'] dz=grid_info['dz'] x=loc['x_mean'] y=loc['y_mean'] z=loc['z_mean'] # get the corresponding travel-times for time-shifting ttimes={} for sta in time_grids.keys(): ttimes[sta]=time_grids[sta].value_at_point(x,y,z) tshift_migration=max(ttimes.values()) start_time_migration=start_time-tshift_migration end_time_migration=end_time+tshift_migration if plot_grid: logging.info('Plotting grid for location %s'%o_time.isoformat()) # TODO implement a rough estimation of the stack shift based on propagation time across the whole network # read data mig_dict,delta = read_data_compatible_with_time_dict(mig_files, time_grids, start_time_migration, end_time_migration) # do migration do_migration_loop_continuous(opdict, mig_dict, delta, start_time_migration, grid_info, time_grids, keep_grid=True) # plot plotLocationGrid(loc,grid_info,figdir,opdict['plot_otime_window']) if plot_wfm: logging.info('Plotting waveforms for location %s'%o_time.isoformat()) # get the index of the location # ix=np.int(np.round((loc['x_mean']-grid_info['x_orig'])/dx)) # iy=np.int(np.round((loc['y_mean']-grid_info['y_orig'])/dy)) # iz=np.int(np.round((loc['z_mean']-grid_info['z_orig'])/dz)) # ib= ix*ny*nz + iy*nz + iz # read data data_dict,delta = read_data_compatible_with_time_dict(data_files, time_grids, start_time_migration, end_time_migration) mig_dict,delta = read_data_compatible_with_time_dict(mig_files, time_grids, start_time_migration, end_time_migration) # cut desired portion out of data for sta in data_dict.keys(): tmp=data_dict[sta] istart=np.int(np.round( (start_time + ttimes[sta] - start_time_migration) / delta)) iend=istart + np.int(np.round( (opdict['plot_tbefore'] + opdict['plot_tafter']) / delta)) # sanity check in case event is close to start or end of data if istart < 0 : istart=0 if iend > len(tmp) : iend = len(tmp) data_dict[sta]=tmp[istart:iend] # do slice tmp=mig_dict[sta] mig_dict[sta]=tmp[istart:iend] # retrieve relevant portion of stack max istart=np.int(np.round( (o_time - opdict['plot_tbefore'] -stack_start_time) / delta)) iend=istart + np.int(np.round( (opdict['plot_tbefore'] + opdict['plot_tafter']) / delta)) # sanity check in case event is close to start or end of data if istart < 0 : start_time = start_time + np.abs(istart)*dt istart=0 if iend > len(max_val) : iend = len(max_val) # do slice stack_wfm=max_val[istart:iend] # plot plotLocationWaveforms(loc,start_time,delta,data_dict,mig_dict,stack_wfm,figdir) f_stack.close()