def read_transect(transect): hydro_txt_fn, section_name = transect ds = xr_transect.section_hydro_to_transect(hydro_txt_fn, section_name) xy = np.c_[ds.x_sample.values, ds.y_sample.values] ll = proj_utils.mapper("EPSG:26910", "WGS84")(xy) ds['lon'] = ('sample', ), ll[:, 0] ds['lat'] = ('sample', ), ll[:, 1] return [ds]
def read_transect(transect): hydro_txt_fn, section_name, sun_model = transect untrim_ds = xr_transect.section_hydro_to_transect( hydro_txt_fn, section_name) line_xy = np.c_[untrim_ds.x_sample.values, untrim_ds.y_sample.values] ds = sun_model.extract_transect(time=-1, xy=line_xy, dx=3) xy = np.c_[ds.x_sample.values, ds.y_sample.values] ll = proj_utils.mapper("EPSG:26910", "WGS84")(xy) ds['lon'] = ('sample', ), ll[:, 0] ds['lat'] = ('sample', ), ll[:, 1] ds.attrs['source'] = source return [ds]
def read_transect(transect): hydro_txt_fn, section_name, dfm_map, g = transect untrim_ds = xr_transect.section_hydro_to_transect( hydro_txt_fn, section_name) line_xy = np.c_[untrim_ds.x_sample.values, untrim_ds.y_sample.values] ds = dfm.extract_transect(dfm_map.isel(time=-1), line=line_xy, dx=3, grid=g) xy = np.c_[ds.x_sample.values, ds.y_sample.values] ll = proj_utils.mapper("EPSG:26910", "WGS84")(xy) ds['lon'] = ('sample', ), ll[:, 0] ds['lat'] = ('sample', ), ll[:, 1] return [ds]
def samples_from_usgs(run_start, field='salinity'): run_start = utils.to_dt64(run_start) pad = np.timedelta64(30, 'D') df = usgs_sfbay.query_usgs_sfbay(period_start=run_start - pad, period_end=run_start + pad, cache_dir=common.cache_dir) # narrow to the columns we care about: if field == 'salinity': dfield = 'Salinity' elif field == 'temperature': dfield = 'Temperature' else: assert False, "Unknown field %s" % field df2 = df.loc[:, ['Date', 'Station Number', 'Depth', dfield]] # will have to get coordinates from elsewhere # depth average: df3 = df2.groupby(['Date', 'Station Number'])[dfield].mean() run_start_dnum = utils.to_dnum(run_start) def time_interp(grp): dnums = [utils.to_dnum(v[0]) for v in grp.index.values] return np.interp(run_start_dnum, dnums, grp.values) ser4 = df3.groupby('Station Number').apply(time_interp) lls = [usgs_sfbay.station_number_to_lonlat(s) for s in ser4.index.values] lls = np.array(lls) xys = proj_utils.mapper('WGS84', 'EPSG:26910')(lls) # glue those together to get [N,3] array, {x,y,salt} return np.c_[xys, ser4.values]
import numpy as np import pandas as pd import matplotlib.pyplot as plt from matplotlib.dates import num2date, date2num from datetime import datetime, timedelta from matplotlib.dates import DateFormatter from scipy.stats.stats import pearsonr from scipy import stats import os from stompy.spatial import proj_utils import xarray as xr from scipy import interpolate from pyscripts import analysis as an plt.ioff() ll_to_utm = proj_utils.mapper('WGS84', 'EPSG:26910') utm_to_ll = proj_utils.mapper('EPSG:26910', 'WGS84') data_path = os.path.join("/hpcvol1/emma/sfb_dfm/moored_sensor_data") file = "MooredSensor_L3.nc" moored_sensor_data_path = os.path.join(data_path, file) moored_sensor_data = xr.open_dataset(moored_sensor_data_path) run_name = "wy2017-v4" begindate = "20160801" path = "/hpcvol1/emma/sfb_dfm/runs/%s/DFM_OUTPUT_%s/" % (run_name, run_name) hisfile = os.path.join(path, "%s_0000_%s_000000_his.nc" % (run_name, begindate)) start_date = np.datetime64('2016-10-01') end_date = np.datetime64('2017-10-01')
args.start=utils.floor_dt64(combined.time.min(),dt) log.info("Start date defaults to start of data: %s"%args.start) else: args.start=np.datetime64(args.start) if args.end is None: args.end=utils.ceil_dt64(combined.time.max(),dt) log.info("End date defaults to end of data: %s"%args.end) else: args.end=np.datetime64(args.end) args.reference=np.datetime64(args.reference) # REPROJECT log.info("Reprojecting to %s"%args.projection) mapper=proj_utils.mapper('WGS84',args.projection) ll=df.loc[:,['longitude','latitude']].values xy=mapper(ll) df['x']=xy[:,0] df['y']=xy[:,1] if args.plot: plot_mode='each' elif args.save_plot: plot_mode='save' else: plot_mode=None ExtrapolateToGrid(grid,data=df, start=args.start,end=args.end,dt=dt, n_layers=args.layers,
from stompy import utils from stompy.spatial import field, wkb2shp, proj_utils import stompy.plot.cmap as scmap from stompy.plot import plot_wkb, plot_utils from stompy.grid import unstructured_grid import pandas as pd import matplotlib.pyplot as plt from matplotlib import cm, collections import numpy as np from shapely import ops ## upper_ll = [-120.930408, 37.309940] lower_ll = [-121.271098, 37.691794] release_xy = proj_utils.mapper('WGS84', 'EPSG:26910')([upper_ll, lower_ll]) ## dem_mrg = field.MultiRasterField([ "../../../bathy/junction-composite-20200605-w_smooth.tif", "/home/rusty/data/bathy_dwr/gtiff/dem_bay_delta_10m_v3_20121109_2.tif" ]) clip = (646390., 648336., 4184677., 4187210.) dem = dem_mrg.extract_tile(clip) ## aerial = field.GdalGrid( "../../../gis/aerial/m_3712114_sw_10_h_20160621_20161004-UTM.tif") aerial.F = aerial.F[:, :, :3] # drop alpha - seems mis-scaled
results_rev = self.t_net.shortest_path(n, samples, edge_weight=rev_weight, directed=True, return_type='cost', max_return=self.search_n // 2) # results are lists of (n,cost) return results_fwd + results_rev # concatenate ## adcp_shp = wkb2shp.shp2geom('derived/samples-depth.shp') adcp_ll = np.array([np.array(pnt) for pnt in adcp_shp['geom']]) adcp_xy = proj_utils.mapper('WGS84', 'EPSG:26910')(adcp_ll) adcp_xyz = np.c_[adcp_xy, adcp_shp['depth']] ## src = 'dem' cluster = True xyz_input = adcp_xyz.copy() if src == 'dem': # Rather than use the ADCP data directly, during testing # use its horizontal distribution, but pull "truth" from the # DEM xyz_input[:, 2] = dem(xyz_input[:, :2]) if cluster:
def parsed_to_ds(section, filename): """ NOTE: As written, this creates a """ ds = xr.Dataset() ds.attrs['name'] = section[0] ds.attrs['source'] = "%s:%s" % (filename, section[0]) ds.attrs['filename'] = filename profiles = section[1] wet_profiles = [ prof for prof in profiles if prof['k_bed'] < prof['k_surf'] ] k_min = min([p['k_bed'] for p in wet_profiles]) k_max = max([p['k_surf'] for p in wet_profiles]) ds['z_surf'] = ('sample', ), np.array([p['z_surf'] for p in wet_profiles ]) # z_surf is positive up ds['z_bed'] = ('sample', ), np.array([-p['z_bed'] for p in wet_profiles ]) # p['z_bed'] is positive down z_min = ds.z_bed.values.min() z_max = ds.z_surf.values.max() dz_min = 0.1 # could scan, but a sliced surface layer might look really small # Resample to evenly spaced vertical axis. Will be shifting to be positive up, relative to # water surface, below. z_resamp = np.arange(z_min, z_max + dz_min, dz_min) ds['sample'] = ('sample', ), np.arange(len(wet_profiles)) ds['z'] = ('cell', ), z_resamp # 'cell' is more like 'bin' here. dists = np.zeros(len(wet_profiles), 'f8') Ve = np.nan * np.ones((len(ds['sample']), len(ds['z'])), 'f8') Vn = np.nan * np.ones((len(ds['sample']), len(ds['z'])), 'f8') Vu = np.nan * np.ones((len(ds['sample']), len(ds['z'])), 'f8') for p_i, p in enumerate(wet_profiles): # represent discrete cells directly # thickness of each layer in the model output dzs = [b['dz'] for b in p['bins']] # Add a zero thickness at the top to help with going from bins to # interfaces bin_dz = np.array([0] + dzs) # n'th item is the elevation of the bottom of the n'th layer, relative # to the surface, with an extra item for the top of the top layer. # bin_bottoms=p['z_surf'] - (np.cumsum(bin_dz[::-1])[::-1]) interface_elevations = (-p['z_bed']) + np.cumsum(bin_dz) # should be within text tolerances. assert np.abs(interface_elevations[-1] - p['z_surf']) < 0.01 Ninterface = len(interface_elevations) bins = np.searchsorted(interface_elevations, z_resamp) valid = (bins > 0) & (bins < Ninterface) bins = bins.clip(1, Ninterface - 1) - 1 for tgt, src in [(Ve, 'u'), (Vn, 'v')]: bin_values = np.array([b[src] for b in p['bins']]) tgt[p_i] = np.where(valid, bin_values[bins], np.nan) Vu[...] = 0 * Ve[...] # not reported ds['Ve'] = ('sample', 'cell'), Ve ds['Vn'] = ('sample', 'cell'), Vn ds['Vu'] = ('sample', 'cell'), Vu ds['location'] = ds['z'] # xy = np.array([[p['x'], p['y']] for p in wet_profiles]) ds['x_utm'] = ('sample', ), xy[:, 0] ds['y_utm'] = ('sample', ), xy[:, 1] ll = proj_utils.mapper('EPSG:26910', 'WGS84')(xy) ds['lon'] = ('sample', ), ll[:, 0] ds['lat'] = ('sample', ), ll[:, 1] return ds
def cruise_dataset(start,stop): """ Fetches USGS SF Bay water quality cruises from SFEI ERDDAP, munges the data to some degree and returns in an xarray dataset. start, stop: dates bracketing the period of interest. """ full_remote_usgs_ds=xr.open_dataset(usgs_erddap) # Limit to requested period start=utils.to_dt64(start) stop=utils.to_dt64(stop) time_slc=slice( *np.searchsorted( full_remote_usgs_ds['s.time'].values, [start,stop]) ) remote_usgs_ds=full_remote_usgs_ds.isel(s=time_slc) # Drop the annoying s. prefix renames=dict([ (v,v[2:]) for v in remote_usgs_ds.data_vars if v.startswith('s.') ] ) # some sort of xarray or numpy bug. the first copy sorts out something deep in the internals. # see xarray bug report #1253. _dummy=remote_usgs_ds.copy(deep=True) # second copy can then proceed correctly ds0=remote_usgs_ds.copy(deep=True) ds=ds0.rename(renames) # add dates: # have to be careful about time zones here. Add 7 hours before rounding to # date to get PST days, within 1hr. # Also note that xarray sneakily pushes everything to datetime[ns], # so even though we cast to M8[D], xarray immediately makes that midnight # UTC on the given date, which then displays as 1600 or 1700 on the previous # day. ds['date']=(ds.time-np.timedelta64(7*3600,'s')).astype('M8[D]') # At this point, ds also respects the ordering. so far so good # Kludge missing distances: bad=np.isnan(ds['Distance_from_station_36'].values) # for weird reasons, must use .values here! hmm - doesn't always work. ds['Distance_from_station_36'].values[ bad ] = 999 # A little dicey within each profile, notably it doesn't # explicitly force the depths to be in order. # Using Distance_from_station_36 yields a better ordering... ds4=xr_utils.redimension(ds.copy(deep=True), ['date','Distance_from_station_36'], intragroup_dim='prof_sample', save_mapping=True, inplace=True) if 1: # There are a few variables which are specific to a station, so no need to carry # them around in full dimension: spatial=['StationNumber', 'latitude','longitude', 'StationName'] for fld in spatial: # This fails because if a cast has no surface sample, we get # nan values. # ds4[fld] = ds4[fld].isel(drop=True,date=0,prof_sample=0) # Instead, aggregate over the dimension. min() picks out nan # values. median can't handle strings. max() appears ok. # In some cases this is still a problem, ie on hpc, can get a # TypeError because missing values are nan, and cannot be compared # to byte or string. # Because there are some lurking inconsistencies with the type # of strings coming in as bytes vs. strings, we still have some # weird logic here try: # this should work if the field is already a float ds4[fld] = ds4[fld].max(dim='date').max(dim='prof_sample') except TypeError: # maybe it's a string? try: ds4[fld] = ds4[fld].fillna(b'').max(dim='date').max(dim='prof_sample') except TypeError: ds4[fld] = ds4[fld].fillna('').max(dim='date').max(dim='prof_sample') ds4=ds4.set_coords(spatial) # And some nutrient variables which do not have a vertical dimension # with the exception of 3 samples, there is at most 1 nutrient sample # per water column. nutrients=['nh','p','si','nn','NO2','ext_coeff','ext_coeff_calc'] for fld in nutrients: valid=np.isfinite(ds4[fld].values) max_count=np.sum( valid, axis=2).max() if 0: # show some add'l info on how often there is more than 1 sample per profile: print("%s: max per cast %d"%(fld,max_count)) for mc in range(max_count): print(" %d casts have %d samples"%( np.sum( (mc+1)==np.sum( valid,axis=2 ) ), mc+1)) ds4[fld] = ds4[fld].mean(dim='prof_sample') # The rest will get sorted by depth ds5=xr_utils.sort_dimension(ds4,'depth','prof_sample') # add a bit of CF-style metadata - possible that this could be copied from the # ERDDAP data... ds5.depth.attrs['positive']='down' # Go ahead and add UTM coordinates utm_xy=proj_utils.mapper('WGS84','EPSG:26910')( np.array( [ds5.longitude,ds5.latitude] ).T ) ds5['x']=( ds5.longitude.dims, utm_xy[:,0] ) ds5['y']=( ds5.longitude.dims, utm_xy[:,1] ) ds5=ds5.set_coords(['time','x','y','depth']) # add more metadata ds5['prof_sample'].attrs['long_name']='Profile (vertical) dimension' return ds5
zoom = (6322685., 6324276, 2115808, 2118406.) axs[0].axis(zoom) fig.tight_layout() fig.savefig('roughness-dx%d.png' % dx) ## # similar but on the grid from stompy.grid import unstructured_grid from stompy.spatial import proj_utils g = unstructured_grid.UnstructuredGrid.from_ugrid( "../model/suntans/grid-with_bathy.nc") # reproject grid to 1ft dem to_ca = proj_utils.mapper('EPSG:26910', 'EPSG:2227') g.nodes['x'][:] = to_ca(g.nodes['x']) g.cells_center(refresh=True) ## #ripple_xy=[6323641, 2116848.] #riprap_xy=[6323582, 2116749] cell_mean = np.zeros(g.Ncells(), np.float64) cell_rms = np.zeros(g.Ncells(), np.float64) X, Y = dem.XY() #crop=(6323542, 6323719, 2116656, 2116927) crop = total_crop for c in utils.progress(np.nonzero(g.cell_clip_mask(crop))[0]):
## # Pasted in from HPC.sfei.org import numpy as np import xarray as xr import matplotlib.pyplot as plt from stompy.spatial import proj_utils from stompy.grid import unstructured_grid import stompy.model.delft.waq_scenario as waq from stompy.model.delft import hydro_utils from stompy import utils srv_xy=[615117,4224383] pc_ll=[-(122+2.4/60.),(38+3.3/60.)] pc_xy=proj_utils.mapper('WGS84','EPSG:26910')(pc_ll) ges_xy=[629223,4233353] fpx_xy=[630728,4257433] hydro=waq.HydroFiles('/hpcvol1/cascade/WY2011/DFM_DELWAQ_sal+temp/sal+temp.hyd') g=hydro.grid() plt.figure() g.plot_edges(lw=0.3,color='k') plt.axis('equal') t0=utils.to_dt64(hydro.time0) t1=t0+np.timedelta64(170,'D')
def add_roughness(g): # make a copy of the grid reprojected grid to 1ft dem g_ft = g.copy() to_ca = proj_utils.mapper('EPSG:26910', 'EPSG:2227') g_ft.nodes['x'][:] = to_ca(g_ft.nodes['x']) g_ft.cells_center(refresh=True) # for snubby grid it's reasonable to just grab the whole # overlapping DEM. total_crop = g_ft.bounds() # that covers the junction dem = field.GdalGrid("../../bathy/dwr/OldRiver_SJ_072020171.tif", geo_bounds=total_crop) dem.F[dem.F < -1e+10] = np.nan cell_mean = np.zeros(g.Ncells(), np.float64) cell_rms = np.zeros(g.Ncells(), np.float64) cell_list = np.arange( g.Ncells()) # eventually may have to process in tiles for c in utils.progress(cell_list): cell_poly = g_ft.cell_polygon(c) xyxy = cell_poly.bounds xxyy = [xyxy[0], xyxy[2], xyxy[1], xyxy[3]] dcrop = dem.crop(xxyy) try: sel = dcrop.polygon_mask(cell_poly) except AttributeError: print("!") continue sel = sel & np.isfinite(dcrop.F) if sel.sum() < 10: # ad hoc continue dem_vals = dcrop.F[sel] X, Y = dcrop.XY() dem_x = X[sel] dem_y = Y[sel] dem_x_loc = dem_x - dem_x.mean() dem_y_loc = dem_y - dem_y.mean() fit_y = dem_vals fit_X = np.c_[dem_x_loc, dem_y_loc, np.ones_like(dem_x)] beta_hat = np.linalg.lstsq(fit_X, fit_y, rcond=None)[0] flat = np.dot(fit_X, beta_hat) anom = dem_vals - flat cell_rms[c] = np.sqrt((anom**2).mean()) # fill in the remaining cells based on averaging neighbors df = pd.DataFrame() valid = cell_rms != 0.0 df['value'] = cell_rms[valid] df['cell'] = np.nonzero(valid)[0] df['weight'] = 1.0 filled = interp_4d.weighted_grid_extrapolation(g, df, cell_col='cell', alpha=10) g.add_cell_field('bathy_rms', filled, on_exists='overwrite')
# the validation plots are showing bad juju in the north. import xarray as xr from stompy.spatial import proj_utils from stompy import utils # sfb1332 is nearest the inflow boundary. ## adcp = xr.open_dataset('/opt/data/noaa/ports/SFB1332-2013.nc') adcp_xy = proj_utils.mapper('WGS84', 'EPSG:26910')([adcp.longitude, adcp.latitude]) ## output_path = "/opt/data/delft/sfb_dfm_v2/runs/wy2013a/DFM_OUTPUT_wy2013a/" model_files = output_path + "wy2013a_0000_20120801_000000_his.nc" model = xr.open_dataset(model_files) model_xy = np.c_[model.station_x_coordinate.values, model.station_y_coordinate.values] ## # What plt.figure(1).clf() fig, ax = plt.subplots(num=1) ax.plot(model.station_x_coordinate.values, model.station_y_coordinate.values, 'go')
parse_dates=['time'], infer_datetime_format=True) loc = df.comment[1] lat = float(re.search(r'Lat:([-0-9.]+)', loc).group(1)) lon = float(re.search(r'Long?:([-0-9.]+)', loc).group(1)) gage = xr.Dataset.from_dataframe(df.set_index('time')) gage.attrs['url'] = url gage.attrs['src'] = "Water Data Library" gage.attrs['code'] = wdl_code gage['lat'] = (), lat gage['lon'] = (), lon ll = [lon, lat] xy = proj_utils.mapper('WGS84', 'EPSG:26910')(ll) gage['x'] = (), xy[0] gage['y'] = (), xy[1] gages.append(gage) ## # How do those time series compare? Quick glance: if 0: plt.figure(2).clf() fig, ax = plt.subplots(1, 1, num=2) for gage in gages: ax.plot(gage.time, gage.stage, label=gage.attrs['code']) ax.legend()
""" parse the BAHM output format (fixed width text) into pandas dataframe, add the extra fields via df_post() and return """ df = pd.read_fwf(src_fn, [(0, 4), (5, 7), (8, 10), (10, 23)], skiprows=5, names=['year', 'month', 'day', 'flow_cfs'], parse_dates={'date': [0, 1, 2]}) df_post(df) return df ## utm2ll = proj_utils.mapper(target_srs, 'WGS84') # collect datasets for all the sources all_ds = [] for rec in pour_points: name = rec['immediatec'] # Find the text data file that goes with this point src_name = name.replace(' ', '') # Fix a few mismatches in naming if src_name == 'UALAMEDAg': src_name = 'UALAMEDA' elif src_name == 'COYOTEd': # HERE - COYOTE needs to come from USGS data anyway, not to mention that # that the src_name shouldn't have the 'd'.
from stompy import utils from stompy.spatial import wkb2shp, proj_utils from stompy.io.local import noaa_coops import stompy.model.delft.io as dio ############ COMPARISONS ################## run_name = "short_test_18" run_base_dir = "runs/%s" % run_name mdu_fn = os.path.join(run_base_dir, "%s.mdu" % run_name) mdu = dio.MDUFile(mdu_fn) ## utm2ll = proj_utils.mapper('EPSG:26910', "WGS84") ## # Comparisons across ROMS, NOAA tides, NOAA ADCPs, and this model obs_pnts = wkb2shp.shp2geom( "/opt/data/delft/sfb_dfm_v2/inputs-static/observation-points.shp") ## def load_subroms(): ca_roms_files = ca_roms.fetch_ca_roms(run_start, run_stop) sub_vars = ['zeta', 'salt'] ds = xr.open_dataset(ca_roms_files[0])
def surveyor_to_xr(rivr_fn, proj=None, source_preferences=['mat', 'csv'], positive='down', z_bed_preferences=['depth_bt', 'depth_vb']): """ Read River Surveyor outputs (post-processed rivr file as either MAT or CSV). Return results in xarray Dataset. source_preferences: list of formats to check for, currently only mat or csv. positive: if 'up', flip the coordinates so that positive values are up. otherwise default to positive=down """ suffix = rivr_fn.split('.')[-1] assert suffix in ['rivr', 'riv'] base = rivr_fn[:-len(suffix)] # includes trailing '.' ds = None for preference in source_preferences: if preference == 'mat' and os.path.exists(base + 'mat'): ds = _surveyor_mat_to_xr(base) if ds is not None: break elif preference == 'csv' and os.path.exists(base + 'vel'): ds = _surveyor_csv_to_xr(base) if ds is not None: break if ds is None: raise Exception("Couldn't find any post-processed files for %s" % rivr_fn) if proj is not None: mapper = proj_utils.mapper('WGS84', proj) ll = np.c_[ds.lon.values, ds.lat.values] xy = mapper(ll) ds['x_sample'] = ds.lon.dims, xy[:, 0] ds['y_sample'] = ds.lon.dims, xy[:, 1] # Other metadata: ds.attrs['rivr_filename'] = rivr_fn ds.attrs['source'] = rivr_fn if positive == 'up': ds.z_ctr.values *= -1 ds.z_ctr.attrs['positive'] = 'up' ds['z_bed'] = -ds[z_bed_preferences[0]] ds.z_bed.attrs['positive'] = 'up' else: ds.z_ctr.attrs['positive'] = 'down' ds['z_bed'] = ds[z_bed_preferences[0]] ds.z_bed.attrs['positive'] = 'down' log.info("Assuming Sontek data is relative to transducer, including depth") ds.z_bed.attrs['datum'] = 'transducer' ds.z_ctr.attrs['datum'] = 'transducer' return ds
from stompy import utils from stompy.grid import unstructured_grid from stompy.model.fish_ptm import ptm_tools from stompy.plot import plot_wkb from stompy.io.local import usgs_nwis from stompy.spatial import proj_utils ## cache_dir="cache" os.path.exists(cache_dir) or os.mkdir(cache_dir) ## g=unstructured_grid.UnstructuredGrid.from_ugrid('../../../dflowfm/runs/20180807_grid98_17/ptm_hydro.nc') grid_poly=g.boundary_polygon() ll2utm=proj_utils.mapper('WGS84','EPSG:26910') ## ptm_run_dir="../run_10days" ptm_groups=["INIT","SAC","SRV"] ptm_data=[ ptm_tools.PtmBin(os.path.join(ptm_run_dir,grp+"_bin.out")) for grp in ptm_groups ] ntimes=ptm_data[0].count_timesteps() run_start=ptm_data[0].read_timestep(0)[0] run_stop =ptm_data[0].read_timestep(ntimes-1)[0] ##
print(" '%s'"%rec['short_name']) continue xy=np.array(rec['geom'].centroid) ds['utm_x'].loc[ dict(site=rec['short_name']) ]=xy[0] ds['utm_y'].loc[ dict(site=rec['short_name']) ]=xy[1] missing=ds['site'][ np.isnan(ds['utm_x'].values) ].values if len(missing): print("Sites in sfbay_potw data, but without a location from discharge_approx_locations.shp") print(",".join(missing)) else: print("All site in sfbay_potw matched with a lat/lon") xy=np.c_[ ds.utm_x, ds.utm_y] ll=proj_utils.mapper('EPSG:26910','WGS84')(xy) ds['latitude']=( ('site',),ll[:,1]) ds['longitude']=( ('site',),ll[:,0]) ds=ds.set_coords(['utm_x','utm_y','latitude','longitude']) ## # Try writing directly to a netcdf file that ERDDAP is willing to load: # map some abbreviations to descriptive names glossary={'TDN':'total dissolved nitrogen', 'SKN':'soluble Kjeldahl nitrogen', 'TDP':'total dissolved phosphorus', 'TKN':'total Kjeldahlf nitrogen', 'TP':'total phosphorus', 'TSS':'total suspended solids'}
def samples_from_sfei_erddap(run_start, cache_dir=None): """ return [N,3] array of salinity data from SFEI moorings appropriate for given date. Note that this may have no data, but will be returned as a [0,3] array This version fetches and caches data from SFEI's ERDDAP server """ if cache_dir is None: cache_dir = common.cache_dir dt_str = utils.to_datetime(run_start).strftime('%Y%m%d%H%M') if cache_dir is not None: my_cache_dir = os.path.join(cache_dir, 'enviz_erddap') os.path.exists(my_cache_dir) or os.mkdir(my_cache_dir) cache_fn = os.path.join(my_cache_dir, "temp_salt-%s.csv" % dt_str) print("Cache fn: %s" % cache_fn) else: cache_fn = None if cache_fn is not None and os.path.exists(cache_fn): csv_data = cache_fn else: # Fetch data before/after run_start by this much pad = np.timedelta64(30 * 60, 's') fetch_period = [run_start - pad, run_start + pad] fetch_strs = [ utils.to_datetime(p).strftime('%Y-%m-%dT%H:%M:00Z') for p in fetch_period ] # Because the table in ERDDAP is stored by sample, there is not guarantee that # times are increasing. That makes access via opendap inefficient, so instead # specify the query to ERDDAP more directly, and grab CSV for easier human # readability # choose dataset base_url = "http://sfbaynutrients.sfei.org/erddap/tabledap/enviz_mirror.csv" # choose fields to download params = ",".join([ 'stationcode', 'time', 'spcond_uS_cm', 'temp_C', 'stationname', 'latitude', 'longitude' ]) # And the time range criteria = "time%%3E=%s&time%%3C=%s" % tuple(fetch_strs) url = base_url + "?" + params + "&" + criteria import requests logging.info("Fetching SFEI data from %s" % url) resp = requests.get(url) if cache_fn is not None: with open(cache_fn, 'wt') as fp: fp.write(resp.content.decode()) csv_data = cache_fn else: csv_data = six.StringIO(resp.content.decode()) # 2nd row of file has units, which we ignore. df = pd.read_csv(csv_data, skiprows=[1], parse_dates=['time']) # Could get fancier and choose the closest in time reading, or # interpolate. But this is not too bad, averaging over a total of # 1 hour. dfm = df.groupby('stationcode').mean() # Get salinity from specific conductance import seawater as sw # specific conductance to mS/cm, and ratio to conductivityt at 35 psu, 15 degC. # Note that mooring data comes in already adjusted to "specific conductance # in uS/cm at 25 degC" rt = dfm['spcond_uS_cm'].values / 1000. / sw.constants.c3515 dfm['salinity'] = sw.sals(rt, 25.0) ll = np.c_[dfm.longitude.values, dfm.latitude.values] xy = proj_utils.mapper('WGS84', 'EPSG:26910')(ll) xys = np.c_[xy, dfm['salinity'].values] valid = np.isfinite(xys[:, 2]) return xys[valid, :]
def parsed_to_ds(section, filename): ds = xr.Dataset() ds.attrs['name'] = section[0] ds.attrs['source'] = "%s:%s" % (filename, section[0]) ds.attrs['filename'] = filename profiles = section[1] wet_profiles = [ prof for prof in profiles if prof['k_bed'] < prof['k_surf'] ] k_min = min([p['k_bed'] for p in wet_profiles]) k_max = max([p['k_surf'] for p in wet_profiles]) ds['z_surf'] = ('sample', ), np.array([p['z_surf'] for p in wet_profiles ]) # z_surf is positive up ds['z_bed'] = ('sample', ), np.array([-p['z_bed'] for p in wet_profiles]) z_min = ds.z_bed.values.min( ) # min( [ -p['z_bed'] for p in wet_profiles] ) # z_bed is positive down z_max = ds.z_surf.values.max( ) # max( [p['z_surf'] for p in wet_profiles] ) # z_surf is positive up # Is it possible that we're losing the bed cell? dz_min = 0.2 # could scan, but a sliced surface layer might look really small # Resample to evenly spaced vertical axis: z_resamp = np.arange(z_min, z_max + dz_min, dz_min) ds['sample'] = ('sample', ), np.arange(len(wet_profiles)) ds['z'] = ('cell', ), z_resamp # 'cell' is more like 'bin' here. dists = np.zeros(len(wet_profiles), 'f8') Ve = np.nan * np.ones((len(ds['sample']), len(ds['z'])), 'f8') Vn = np.nan * np.ones((len(ds['sample']), len(ds['z'])), 'f8') Vu = np.nan * np.ones((len(ds['sample']), len(ds['z'])), 'f8') for p_i, p in enumerate(wet_profiles): # Seems that the bed cells are shaved, so use the surface elevation to get # the true vertical coordinate bin_dz = np.array([b['dz'] for b in p['bins']]) bin_center_z = p['z_surf'] - (np.cumsum(bin_dz[::-1])[::-1] - 0.5 * bin_dz) for tgt, src in [(Ve, 'u'), (Vn, 'v')]: tgt[p_i] = np.interp(z_resamp, bin_center_z, [b[src] for b in p['bins']], left=np.nan, right=np.nan) Vu[...] = 0 * Ve[...] # not reported ds['Ve'] = ('sample', 'cell'), Ve ds['Vn'] = ('sample', 'cell'), Vn ds['Vu'] = ('sample', 'cell'), Vu ds['location'] = ds['z'] # xy = np.array([[p['x'], p['y']] for p in wet_profiles]) ds['x_utm'] = ('sample', ), xy[:, 0] ds['y_utm'] = ('sample', ), xy[:, 1] ll = proj_utils.mapper('EPSG:26910', 'WGS84')(xy) ds['lon'] = ('sample', ), ll[:, 0] ds['lat'] = ('sample', ), ll[:, 1] return ds