def coord_formatter(x,y): ''' Takes coordinates, *x* and *y*, and returns their string representation ''' out = 'time : %g ' % x out += '(date : %s)' % mjd_inv(x,'%Y-%m-%d') return out
def pygeons_info(input_file): ''' prints metadata ''' logger.info('Running pygeons info ...') data_dict = dict_from_hdf5(input_file) # put together info string units = _unit_string(data_dict['space_exponent'], data_dict['time_exponent']) stations = str(len(data_dict['id'])) times = str(len(data_dict['time'])) observations = (np.sum(~np.isinf(data_dict['east_std_dev'])) + np.sum(~np.isinf(data_dict['north_std_dev'])) + np.sum(~np.isinf(data_dict['vertical_std_dev']))) time_range = '%s, %s' % (mjd.mjd_inv(data_dict['time'][0], '%Y-%m-%d'), mjd.mjd_inv(data_dict['time'][-1], '%Y-%m-%d')) lon_range = '%s, %s' % (np.min( data_dict['longitude']), np.max(data_dict['longitude'])) lat_range = '%s, %s' % (np.min( data_dict['latitude']), np.max(data_dict['latitude'])) # split names into groups of no more than 8 station_name_list = list(data_dict['id']) station_name_groups = [] while len(station_name_list) > 0: station_name_groups += [', '.join(station_name_list[:7])] station_name_list = station_name_list[7:] msg = '\n' msg += '------------------ PYGEONS DATA INFORMATION ------------------\n\n' msg += 'file : %s\n' % input_file msg += 'units : %s\n' % units msg += 'stations : %s\n' % stations msg += 'times : %s\n' % times msg += 'observations : %s\n' % observations msg += 'time range : %s\n' % time_range msg += 'longitude range : %s\n' % lon_range msg += 'latitude range : %s\n' % lat_range msg += 'station names : %s\n' % station_name_groups[0] for g in station_name_groups[1:]: msg += ' %s\n' % g msg += '\n' msg += '--------------------------------------------------------------' print(msg) return
def pygeons_info(input_file): ''' prints metadata ''' logger.info('Running pygeons info ...') data_dict = dict_from_hdf5(input_file) # put together info string units = _unit_string(data_dict['space_exponent'], data_dict['time_exponent']) stations = str(len(data_dict['id'])) times = str(len(data_dict['time'])) observations = (np.sum(~np.isinf(data_dict['east_std_dev'])) + np.sum(~np.isinf(data_dict['north_std_dev'])) + np.sum(~np.isinf(data_dict['vertical_std_dev']))) time_range = '%s, %s' % (mjd.mjd_inv(data_dict['time'][0],'%Y-%m-%d'), mjd.mjd_inv(data_dict['time'][-1],'%Y-%m-%d')) lon_range = '%s, %s' % (np.min(data_dict['longitude']), np.max(data_dict['longitude'])) lat_range = '%s, %s' % (np.min(data_dict['latitude']), np.max(data_dict['latitude'])) # split names into groups of no more than 8 station_name_list = list(data_dict['id']) station_name_groups = [] while len(station_name_list) > 0: station_name_groups += [', '.join(station_name_list[:7])] station_name_list = station_name_list[7:] msg = '\n' msg += '------------------ PYGEONS DATA INFORMATION ------------------\n\n' msg += 'file : %s\n' % input_file msg += 'units : %s\n' % units msg += 'stations : %s\n' % stations msg += 'times : %s\n' % times msg += 'observations : %s\n' % observations msg += 'time range : %s\n' % time_range msg += 'longitude range : %s\n' % lon_range msg += 'latitude range : %s\n' % lat_range msg += 'station names : %s\n' % station_name_groups[0] for g in station_name_groups[1:]: msg += ' %s\n' % g msg += '\n' msg += '--------------------------------------------------------------' print(msg) return
def pygeons_strain_view(xdiff_file,ydiff_file,map_resolution='i',**kwargs): ''' runs the PyGeoNS Interactive Strain Viewer Parameters ---------- xdiff_file : str ydiff_file : str map_resolution : str basemap resolution **kwargs : gets passed to pygeons.strain.view ''' logger.info('Running pygeons strain-view ...') data_dx = dict_from_hdf5(xdiff_file) data_dy = dict_from_hdf5(ydiff_file) data_dx,data_dy = _common_context([data_dx,data_dy]) if ((data_dx['space_exponent'] != 0) | (data_dy['space_exponent'] != 0)): raise ValueError('The input datasets cannot have spatial units') t = data_dx['time'] id = data_dx['id'] lon = data_dx['longitude'] lat = data_dx['latitude'] dates = [mjd_inv(ti,'%Y-%m-%d') for ti in t] units = _unit_string(data_dx['space_exponent'], data_dx['time_exponent']) # factor that converts units of days and m to the units in *units* conv = 1.0/unit_conversion(units,time='day',space='m') exx = conv*data_dx['east'] eyy = conv*data_dy['north'] exy = 0.5*conv*(data_dx['north'] + data_dy['east']) sxx = conv*data_dx['east_std_dev'] syy = conv*data_dy['north_std_dev'] sxy = 0.5*conv*np.sqrt(data_dx['north_std_dev']**2 + data_dy['east_std_dev']**2) ts_fig,ts_ax = plt.subplots(3,1,sharex=True,num='Time Series View', facecolor='white') _setup_ts_ax(ts_ax) map_fig,map_ax = plt.subplots(num='Map View',facecolor='white') bm = make_basemap(lon,lat,resolution=map_resolution) _setup_map_ax(bm,map_ax) x,y = bm(lon,lat) pos = np.array([x,y]).T interactive_strain_viewer( t,pos,exx,eyy,exy,sxx=sxx,syy=syy,sxy=sxy, map_ax=map_ax,ts_ax=ts_ax,time_labels=dates, station_labels=id,units=units,**kwargs) return
def pygeons_vector_view(input_files,map_resolution='i',**kwargs): ''' runs the PyGeoNS interactive vector viewer Parameters ---------- data_list : (N,) list of dicts list of data dictionaries being plotted map_resolution : str basemap resolution **kwargs : gets passed to pygeons.plot.view.interactive_view ''' logger.info('Running pygeons vector-view ...') data_list = [dict_from_hdf5(i) for i in input_files] data_list = _common_context(data_list) # use filenames for dataset labels if none were provided dataset_labels = kwargs.pop('dataset_labels',input_files) t = data_list[0]['time'] lon = data_list[0]['longitude'] lat = data_list[0]['latitude'] id = data_list[0]['id'] dates = [mjd_inv(ti,'%Y-%m-%d') for ti in t] units = _unit_string(data_list[0]['space_exponent'], data_list[0]['time_exponent']) # factor that converts units of days and m to the units in *units* conv = 1.0/unit_conversion(units,time='day',space='m') u = [conv*d['east'] for d in data_list] v = [conv*d['north'] for d in data_list] z = [conv*d['vertical'] for d in data_list] su = [conv*d['east_std_dev'] for d in data_list] sv = [conv*d['north_std_dev'] for d in data_list] sz = [conv*d['vertical_std_dev'] for d in data_list] ts_fig,ts_ax = plt.subplots(3,1,sharex=True,num='Time Series View', facecolor='white') _setup_ts_ax(ts_ax) map_fig,map_ax = plt.subplots(num='Map View',facecolor='white') bm = make_basemap(lon,lat,resolution=map_resolution) _setup_map_ax(bm,map_ax) x,y = bm(lon,lat) pos = np.array([x,y]).T interactive_vector_viewer( t,pos,u=u,v=v,z=z,su=su,sv=sv,sz=sz, ts_ax=ts_ax,map_ax=map_ax, dataset_labels=dataset_labels, station_labels=id,time_labels=dates, units=units,**kwargs) return
def xtick_formatter(x,p): ''' Takes *x* and the number of ticks, *p*, and returns a string representation of *x* ''' try: out = mjd_inv(x,'%Y-%m-%d') except (ValueError,OverflowError): out = '' return out
def _write_csv(data): ''' Write data for a single station to a csv file ''' time = data['time'] out = '4-character id, %s\n' % data['id'] out += 'begin date, %s\n' % mjd_inv(time[0],'%Y-%m-%d') out += 'end date, %s\n' % mjd_inv(time[-1],'%Y-%m-%d') out += 'longitude, %s E\n' % data['longitude'] out += 'latitude, %s N\n' % data['latitude'] out += ('units, meters**%s days**%s\n' % (data['space_exponent'],data['time_exponent'])) out += ('date, north, east, vertical, north std. deviation, ' 'east std. deviation, vertical std. deviation\n') # convert displacements and uncertainties to strings for i in range(len(data['time'])): date_str = mjd_inv(time[i],'%Y-%m-%d') out += ('%s, %e, %e, %e, %e, %e, %e\n' % (date_str,data['north'][i],data['east'][i], data['vertical'][i],data['north_std_dev'][i], data['east_std_dev'][i],data['vertical_std_dev'][i])) return out
def check_unique_dates(data): ''' makes sure each date is unique ''' unique_days = list(set(data['time'])) if len(data['time']) != len(unique_days): # there are duplicate dates, now find them duplicates = [] for i in unique_days: if sum(data['time'] == i) > 1: duplicates += [mjd_inv(i,'%Y-%m-%d')] duplicates = ', '.join(duplicates) raise DataError( 'Dataset contains the following duplicate dates : %s ' % duplicates)
def pygeons_clean(input_file, resolution='i', input_edits_file=None, break_lons=None, break_lats=None, break_conn=None, no_display=False, output_stem=None, **kwargs): ''' runs the PyGeoNS Interactive Cleaner Parameters ---------- data : dict data dictionary resolution : str basemap resolution input_edits_file : str Name of the file containing edits which will automatically be applied before opening up the interactive viewer. output_edits_file : str Name of the file where all edits will be recorded. **kwargs : gets passed to pygeons.clean.clean Returns ------- out : dict output data dictionary ''' logger.info('Running pygeons clean ...') data = dict_from_hdf5(input_file) out = dict((k, np.copy(v)) for k, v in data.iteritems()) ts_fig, ts_ax = plt.subplots(3, 1, sharex=True, num='Time Series View', facecolor='white') _setup_ts_ax(ts_ax) map_fig, map_ax = plt.subplots(num='Map View', facecolor='white') bm = make_basemap(data['longitude'], data['latitude'], resolution=resolution) _setup_map_ax(bm, map_ax) x, y = bm(data['longitude'], data['latitude']) pos = np.array([x, y]).T t = data['time'] dates = [mjd_inv(ti, '%Y-%m-%d') for ti in t] units = _unit_string(data['space_exponent'], data['time_exponent']) conv = 1.0 / unit_conversion(units, time='day', space='m') u = conv * data['east'] v = conv * data['north'] z = conv * data['vertical'] su = conv * data['east_std_dev'] sv = conv * data['north_std_dev'] sz = conv * data['vertical_std_dev'] ic = InteractiveCleaner(t, pos, u=u, v=v, z=z, su=su, sv=sv, sz=sz, map_ax=map_ax, ts_ax=ts_ax, time_labels=dates, units=units, station_labels=data['id'], **kwargs) # make edits to the data set prior to displaying it if input_edits_file is not None: with open(input_edits_file, 'r') as fin: for line in fin: # ignore blank lines if line.isspace(): continue type, sta, a, b = line.strip().split() # set the current station in *ic* to the station for this edit xidx, = (data['id'] == sta).nonzero() if len(xidx) == 0: # continue because the station does not exist in this # dataset continue ic.xidx = xidx[0] if type == 'outliers': start_time = mjd(a, '%Y-%m-%d') stop_time = mjd(b, '%Y-%m-%d') ic.remove_outliers(start_time, stop_time) elif type == 'jump': jump_time = mjd(a, '%Y-%m-%d') delta = int(b) ic.remove_jump(jump_time, delta) else: raise ValueError( 'edit type must be either "outliers" or "jump"') if not no_display: ic.update() ic.connect() # set output file name if output_stem is None: output_stem = _remove_extension(input_file) + '.clean' output_file = output_stem + '.h5' output_edits_file = output_stem + '.txt' with open(output_edits_file, 'w') as fout: for i in ic.log: type, xidx, a, b = i if type == 'outliers': station = data['id'][xidx] start_date = mjd_inv(a, '%Y-%m-%d') stop_date = mjd_inv(b, '%Y-%m-%d') fout.write('outliers %s %s %s\n' % (station, start_date, stop_date)) elif type == 'jump': station = data['id'][xidx] jump_date = mjd_inv(a, '%Y-%m-%d') fout.write('jump %s %s %s\n' % (station, jump_date, b)) else: raise ValueError( 'edit type must be either "outliers" or "jump"') logger.info('Edits saved to %s' % output_edits_file) clean_data = ic.get_data() out['east'] = clean_data[0] / conv out['north'] = clean_data[1] / conv out['vertical'] = clean_data[2] / conv out['east_std_dev'] = clean_data[3] / conv out['north_std_dev'] = clean_data[4] / conv out['vertical_std_dev'] = clean_data[5] / conv hdf5_from_dict(output_file, out) logger.info('Cleaned data written to %s' % output_file) logger.info('Edits written to %s' % output_edits_file) return
def pygeons_crop(input_file, start_date=None, stop_date=None, min_lat=-np.inf, max_lat=np.inf, min_lon=-np.inf, max_lon=np.inf, stations=None, output_stem=None): ''' Sets the time span of the data set to be between *start_date* and *stop_date*. Sets the stations to be within the latitude and longitude bounds. Parameters ---------- data : dict data dictionary start_date : str, optional start date of output data set in YYYY-MM-DD. Uses the start date of *data* if not provided. Defaults to the earliest date. stop_date : str, optional Stop date of output data set in YYYY-MM-DD. Uses the stop date of *data* if not provided. Defaults to the latest date. min_lon, max_lon, min_lat, max_lat : float, optional Spatial bounds on the output data set stations : str list, optional List of stations to be removed from the dataset. This is in addition to the station removed by the lon/lat bounds. Returns ------- out_dict : dict output data dictionary ''' logger.info('Running pygeons crop ...') data = dict_from_hdf5(input_file) out = dict((k, np.copy(v)) for k, v in data.iteritems()) if start_date is None: start_date = mjd.mjd_inv(data['time'].min(), '%Y-%m-%d') if stop_date is None: stop_date = mjd.mjd_inv(data['time'].max(), '%Y-%m-%d') if stations is None: stations = [] # remove times that are not within the bounds of *start_date* and # *stop_date* start_time = int(mjd.mjd(start_date, '%Y-%m-%d')) stop_time = int(mjd.mjd(stop_date, '%Y-%m-%d')) idx = ((data['time'] >= start_time) & (data['time'] <= stop_time)) out['time'] = out['time'][idx] for dir in ['east', 'north', 'vertical']: out[dir] = out[dir][idx, :] out[dir + '_std_dev'] = out[dir + '_std_dev'][idx, :] # find stations that are within the bounds in_bounds = ((data['longitude'] > min_lon) & (data['longitude'] < max_lon) & (data['latitude'] > min_lat) & (data['latitude'] < max_lat)) # find stations that are in the list of stations to be removed in_list = np.array([i in stations for i in data['id']]) # keep stations that are in bounds and not in the list idx, = (in_bounds & ~in_list).nonzero() out['id'] = out['id'][idx] out['longitude'] = out['longitude'][idx] out['latitude'] = out['latitude'][idx] for dir in ['east', 'north', 'vertical']: out[dir] = out[dir][:, idx] out[dir + '_std_dev'] = out[dir + '_std_dev'][:, idx] # set output file name if output_stem is None: output_stem = _remove_extension(input_file) + '.crop' output_file = output_stem + '.h5' hdf5_from_dict(output_file, out) logger.info('Cropped data written to %s' % output_file) return
def pygeons_strain(input_file, network_prior_model=('spwen12-se',), network_prior_params=(1.0,0.1,100.0), network_noise_model=(), network_noise_params=(), station_noise_model=('linear',), station_noise_params=(), start_date=None,stop_date=None, positions=None,positions_file=None, rate=True,vertical=True,covariance=False, output_stem=None): ''' calculates strain ''' logger.info('Running pygeons strain ...') data = dict_from_hdf5(input_file) if data['time_exponent'] != 0: raise ValueError('input dataset must have units of displacement') if data['space_exponent'] != 1: raise ValueError('input dataset must have units of displacement') out_dx = dict((k,np.copy(v)) for k,v in data.iteritems()) out_dy = dict((k,np.copy(v)) for k,v in data.iteritems()) # convert params to a dictionary of hyperparameters for each direction network_prior_params = _params_dict(network_prior_params) network_noise_params = _params_dict(network_noise_params) station_noise_params = _params_dict(station_noise_params) # convert geodetic input positions to cartesian bm = make_basemap(data['longitude'],data['latitude']) x,y = bm(data['longitude'],data['latitude']) xy = np.array([x,y]).T # set output positions if (positions is None) & (positions_file is None): # no output positions were specified so return the solution at the # input data positions output_id = np.array(data['id'],copy=True) output_lon = np.array(data['longitude'],copy=True) output_lat = np.array(data['latitude'],copy=True) else: output_id = np.zeros((0,),dtype=str) output_lon = np.zeros((0,),dtype=float) output_lat = np.zeros((0,),dtype=float) if positions_file is not None: # if positions file was specified pos = np.loadtxt(positions_file,dtype=str,ndmin=2) if pos.shape[1] != 3: raise ValueError( 'positions file must contain a column for IDs, longitudes, ' 'and latitudes') output_id = np.hstack((output_id,pos[:,0])) output_lon = np.hstack((output_lon,pos[:,1].astype(float))) output_lat = np.hstack((output_lat,pos[:,2].astype(float))) if positions is not None: # if positions were specified via the command line pos = np.array(positions,dtype=str).reshape((-1,3)) output_id = np.hstack((output_id,pos[:,0])) output_lon = np.hstack((output_lon,pos[:,1].astype(float))) output_lat = np.hstack((output_lat,pos[:,2].astype(float))) # convert geodetic output positions to cartesian output_x,output_y = bm(output_lon,output_lat) output_xy = np.array([output_x,output_y]).T # set output times if start_date is None: start_date = mjd_inv(np.min(data['time']),'%Y-%m-%d') if stop_date is None: stop_date = mjd_inv(np.max(data['time']),'%Y-%m-%d') start_time = mjd(start_date,'%Y-%m-%d') stop_time = mjd(stop_date,'%Y-%m-%d') output_time = np.arange(start_time,stop_time+1) # set output file names if output_stem is None: output_stem = _remove_extension(input_file) + '.strain' output_dx_file = output_stem + '.dudx.h5' output_dy_file = output_stem + '.dudy.h5' _log_strain(input_file, network_prior_model,network_prior_params, network_noise_model,network_noise_params, station_noise_model,station_noise_params, start_date,stop_date,output_id,rate,vertical, covariance,output_dx_file,output_dy_file) for dir in ['east','north','vertical']: if (dir == 'vertical') & (not vertical): logger.debug('Not computing vertical deformation gradients') # do not compute the deformation gradients for vertical. Just # return zeros. dx = np.zeros((output_time.shape[0],output_xy.shape[0])) sdx = np.zeros((output_time.shape[0],output_xy.shape[0])) dy = np.zeros((output_time.shape[0],output_xy.shape[0])) sdy = np.zeros((output_time.shape[0],output_xy.shape[0])) if covariance: # if covariance is True then create an empty array of # covariances cdx = np.zeros((output_time.shape[0],output_xy.shape[0], output_time.shape[0],output_xy.shape[0])) cdy = np.zeros((output_time.shape[0],output_xy.shape[0], output_time.shape[0],output_xy.shape[0])) soln = (dx,sdx,cdx,dy,sdy,cdy) else: soln = (dx,sdx,dy,sdy) else: soln = strain(t=data['time'][:,None], x=xy, d=data[dir], sd=data[dir+'_std_dev'], network_prior_model=network_prior_model, network_prior_params=network_prior_params[dir], network_noise_model=network_noise_model, network_noise_params=network_noise_params[dir], station_noise_model=station_noise_model, station_noise_params=station_noise_params[dir], out_t=output_time[:,None], out_x=output_xy, rate=rate, covariance=covariance) if covariance: # soln contains six entries when covariance is True dx,sdx,cdx,dy,sdy,cdy = soln out_dx[dir] = dx out_dx[dir+'_std_dev'] = sdx out_dx[dir+'_covariance'] = cdx out_dy[dir] = dy out_dy[dir+'_std_dev'] = sdy out_dy[dir+'_covariance'] = cdy else: # soln contains four entries when covariance is False dx,sdx,dy,sdy = soln out_dx[dir] = dx out_dx[dir+'_std_dev'] = sdx out_dy[dir] = dy out_dy[dir+'_std_dev'] = sdy out_dx['time'] = output_time out_dx['longitude'] = output_lon out_dx['latitude'] = output_lat out_dx['id'] = output_id out_dx['time_exponent'] = -int(rate) out_dx['space_exponent'] = 0 out_dy['time'] = output_time out_dy['longitude'] = output_lon out_dy['latitude'] = output_lat out_dy['id'] = output_id out_dy['time_exponent'] = -int(rate) out_dy['space_exponent'] = 0 hdf5_from_dict(output_dx_file,out_dx) hdf5_from_dict(output_dy_file,out_dy) if rate: logger.info('Posterior velocity gradients written to %s and %s' % (output_dx_file,output_dy_file)) else: logger.info('Posterior displacement gradients written to %s and %s' % (output_dx_file,output_dy_file)) return
def pygeons_clean(input_file,resolution='i', input_edits_file=None, break_lons=None,break_lats=None, break_conn=None,no_display=False, output_stem=None,**kwargs): ''' runs the PyGeoNS Interactive Cleaner Parameters ---------- data : dict data dictionary resolution : str basemap resolution input_edits_file : str Name of the file containing edits which will automatically be applied before opening up the interactive viewer. output_edits_file : str Name of the file where all edits will be recorded. **kwargs : gets passed to pygeons.clean.clean Returns ------- out : dict output data dictionary ''' logger.info('Running pygeons clean ...') data = dict_from_hdf5(input_file) out = dict((k,np.copy(v)) for k,v in data.iteritems()) ts_fig,ts_ax = plt.subplots(3,1,sharex=True,num='Time Series View',facecolor='white') _setup_ts_ax(ts_ax) map_fig,map_ax = plt.subplots(num='Map View',facecolor='white') bm = make_basemap(data['longitude'],data['latitude'],resolution=resolution) _setup_map_ax(bm,map_ax) x,y = bm(data['longitude'],data['latitude']) pos = np.array([x,y]).T t = data['time'] dates = [mjd_inv(ti,'%Y-%m-%d') for ti in t] units = _unit_string(data['space_exponent'],data['time_exponent']) conv = 1.0/unit_conversion(units,time='day',space='m') u = conv*data['east'] v = conv*data['north'] z = conv*data['vertical'] su = conv*data['east_std_dev'] sv = conv*data['north_std_dev'] sz = conv*data['vertical_std_dev'] ic = InteractiveCleaner( t,pos,u=u,v=v,z=z,su=su,sv=sv,sz=sz, map_ax=map_ax,ts_ax=ts_ax, time_labels=dates, units=units, station_labels=data['id'], **kwargs) # make edits to the data set prior to displaying it if input_edits_file is not None: with open(input_edits_file,'r') as fin: for line in fin: # ignore blank lines if line.isspace(): continue type,sta,a,b = line.strip().split() # set the current station in *ic* to the station for this edit xidx, = (data['id'] == sta).nonzero() if len(xidx) == 0: # continue because the station does not exist in this # dataset continue ic.xidx = xidx[0] if type == 'outliers': start_time = mjd(a,'%Y-%m-%d') stop_time = mjd(b,'%Y-%m-%d') ic.remove_outliers(start_time,stop_time) elif type == 'jump': jump_time = mjd(a,'%Y-%m-%d') delta = int(b) ic.remove_jump(jump_time,delta) else: raise ValueError('edit type must be either "outliers" or "jump"') if not no_display: ic.update() ic.connect() # set output file name if output_stem is None: output_stem = _remove_extension(input_file) + '.clean' output_file = output_stem + '.h5' output_edits_file = output_stem + '.txt' with open(output_edits_file,'w') as fout: for i in ic.log: type,xidx,a,b = i if type == 'outliers': station = data['id'][xidx] start_date = mjd_inv(a,'%Y-%m-%d') stop_date = mjd_inv(b,'%Y-%m-%d') fout.write('outliers %s %s %s\n' % (station,start_date,stop_date)) elif type == 'jump': station = data['id'][xidx] jump_date = mjd_inv(a,'%Y-%m-%d') fout.write('jump %s %s %s\n' % (station,jump_date,b)) else: raise ValueError('edit type must be either "outliers" or "jump"') logger.info('Edits saved to %s' % output_edits_file) clean_data = ic.get_data() out['east'] = clean_data[0]/conv out['north'] = clean_data[1]/conv out['vertical'] = clean_data[2]/conv out['east_std_dev'] = clean_data[3]/conv out['north_std_dev'] = clean_data[4]/conv out['vertical_std_dev'] = clean_data[5]/conv hdf5_from_dict(output_file,out) logger.info('Cleaned data written to %s' % output_file) logger.info('Edits written to %s' % output_edits_file) return
def pygeons_crop(input_file,start_date=None,stop_date=None, min_lat=-np.inf,max_lat=np.inf, min_lon=-np.inf,max_lon=np.inf, stations=None,output_stem=None): ''' Sets the time span of the data set to be between *start_date* and *stop_date*. Sets the stations to be within the latitude and longitude bounds. Parameters ---------- data : dict data dictionary start_date : str, optional start date of output data set in YYYY-MM-DD. Uses the start date of *data* if not provided. Defaults to the earliest date. stop_date : str, optional Stop date of output data set in YYYY-MM-DD. Uses the stop date of *data* if not provided. Defaults to the latest date. min_lon, max_lon, min_lat, max_lat : float, optional Spatial bounds on the output data set stations : str list, optional List of stations to be removed from the dataset. This is in addition to the station removed by the lon/lat bounds. Returns ------- out_dict : dict output data dictionary ''' logger.info('Running pygeons crop ...') data = dict_from_hdf5(input_file) out = dict((k,np.copy(v)) for k,v in data.iteritems()) if start_date is None: start_date = mjd.mjd_inv(data['time'].min(),'%Y-%m-%d') if stop_date is None: stop_date = mjd.mjd_inv(data['time'].max(),'%Y-%m-%d') if stations is None: stations = [] # remove times that are not within the bounds of *start_date* and # *stop_date* start_time = int(mjd.mjd(start_date,'%Y-%m-%d')) stop_time = int(mjd.mjd(stop_date,'%Y-%m-%d')) idx = ((data['time'] >= start_time) & (data['time'] <= stop_time)) out['time'] = out['time'][idx] for dir in ['east','north','vertical']: out[dir] = out[dir][idx,:] out[dir + '_std_dev'] = out[dir + '_std_dev'][idx,:] # find stations that are within the bounds in_bounds = ((data['longitude'] > min_lon) & (data['longitude'] < max_lon) & (data['latitude'] > min_lat) & (data['latitude'] < max_lat)) # find stations that are in the list of stations to be removed in_list = np.array([i in stations for i in data['id']]) # keep stations that are in bounds and not in the list idx, = (in_bounds & ~in_list).nonzero() out['id'] = out['id'][idx] out['longitude'] = out['longitude'][idx] out['latitude'] = out['latitude'][idx] for dir in ['east','north','vertical']: out[dir] = out[dir][:,idx] out[dir + '_std_dev'] = out[dir + '_std_dev'][:,idx] # set output file name if output_stem is None: output_stem = _remove_extension(input_file) + '.crop' output_file = output_stem + '.h5' hdf5_from_dict(output_file,out) logger.info('Cropped data written to %s' % output_file) return