def plot_ctdmo(data_dict, var, stdev=None): colors10 = [ 'red', 'firebrick', 'orange', 'mediumseagreen', 'blue', 'darkgreen', 'purple', 'indigo', 'slategray', 'black' ] colors16 = [ 'red', 'firebrick', 'orange', 'gold', 'mediumseagreen', 'darkcyan', 'blue', 'darkgreen', 'purple', 'lightgray', 'slategray', 'black', 'coral', 'gold', 'limegreen', 'midnightblue' ] fig, ax1 = plt.subplots() sensor_list = [] median_list = [] for i, (key, value) in enumerate(data_dict.items()): if len(data_dict) < 11: colors = colors10 else: colors = colors16 t = value['time'] y = value['yD'] if stdev != None: ind = cf.reject_outliers(value['yD'], stdev) t = t[ind] y = y[ind] refdes = str(key) sensor_list.append(refdes.split('-')[-1]) median_list.append(value['median']) plt.scatter(t, y, c=colors[i], marker='.', s=.5) if i == len(data_dict) - 1: # if the last dataset has been plotted plt.grid() plt.margins(y=.05, x=.05) # refdes on secondary y-axis only for pressure and density if var in ['ctdmo_seawater_pressure', 'density']: ax2 = ax1.twinx() ax2.set_ylim(ax1.get_ylim()) plt.yticks(median_list, sensor_list, fontsize=7.5) plt.subplots_adjust(right=.85) pf.format_date_axis(ax1, fig) pf.y_axis_disable_offset(ax1) subsite = refdes.split('-')[0] title = subsite + ' ' + ('-'.join( (value['dms'].split('-')[0], value['dms'].split('-')[1]))) ax1.set_ylabel((var + " (" + value['yunits'] + ")"), fontsize=9) ax1.set_title(title, fontsize=10) fname = '-'.join((subsite, value['dms'], var)) if stdev != None: fname = '-'.join((fname, 'outliers_rejected')) sdir = os.path.join(sDir, subsite, value['dms'].split('-')[0]) cf.create_dir(sdir) pf.save_fig(sdir, fname)
def main(files, out): """ files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file. out: Directory to save plots """ fname, ext = os.path.splitext(files) if ext in '.nc': list_files = [files] elif ext in '.ncml': list_files = [files] else: list_files = read_file(files) stream_vars = pf.load_variable_dict(var='eng') # load engineering variables # for nc in list_files: # print nc # the engine that xarray uses can be changed as specified here # http://xarray.pydata.org/en/stable/generated/xarray.open_dataset.html#xarray.open_dataset with xr.open_mfdataset(list_files, engine='netcdf4') as ds_disk: # change dimensions from 'obs' to 'time' ds_disk = ds_disk.swap_dims({'obs': 'time'}) ds_variables = ds_disk.data_vars.keys() # List of dataset variables stream = ds_disk.stream # List stream name associated with the data title_pre = mk_str(ds_disk.attrs, 't') # , var, tt0, tt1, 't') save_pre = mk_str(ds_disk.attrs, 's') # , var, tt0, tt1, 's') save_dir = os.path.join(out, ds_disk.subsite, ds_disk.node, ds_disk.stream, 'pcolor') cf.create_dir(save_dir) # t0, t1 = cf.get_rounded_start_and_end_times(ds_disk['time'].data) # tI = t0 + t1 - (t0 / 2) # time_list = [[t0, t1], [t0, tI], [tI, t1]] # time_list = [[t0, t1]] # for period in time_list: # tt0 = period[0] # tt1 = period[1] # sub_ds = ds_disk.sel(time=slice(str(tt0), str(tt1))) bins = ds_disk['bin_depths'] north = ds_disk['northward_seawater_velocity'] east = ds_disk['eastward_seawater_velocity'] # up = ds_disk['upward_seawater_velocity'] # error = ds_disk['error_velocity'] time = dict(data=ds_disk['time'].data, info=dict(label=ds_disk['time'].standard_name, units='GMT')) bins = dict(data=bins.data.T, info=dict(label=bins.long_name, units=bins.units)) north = dict(data=north.data.T, info=dict(label=north.long_name, units=north.units)) east = dict(data=east.data.T, info=dict(label=east.long_name, units=east.units)) # up = dict(data=up.data.T, info=dict(label=up.long_name, units=up.units)) # error = dict(data=error.data.T, info=dict(label=error.long_name, units=error.units)) sname = save_pre + 'ADCP' title = title_pre fig, axs = pf.adcp(time, bins, north, east, title) pf.resize(width=12, height=8.5) # Resize figure pf.save_fig(save_dir, sname, res=250) # Save figure plt.close('all')
def main(sDir, f): ff = pd.read_csv(os.path.join(sDir, f)) datasets = cf.get_nc_urls(ff['outputUrl'].tolist()) for d in datasets: print(d) fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( d) save_dir = os.path.join(sDir, subsite, refdes, deployment) cf.create_dir(save_dir) sci_vars = cf.return_science_vars(stream) colors = cm.jet(np.linspace(0, 1, len(sci_vars))) with xr.open_dataset(d, mask_and_scale=False) as ds: ds = ds.swap_dims({'obs': 'time'}) t = ds['time'].data t0 = pd.to_datetime(t.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(t.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) fig, ax = plt.subplots() axes = [ax] for i in range(len(sci_vars)): if i > 0: axes.append(ax.twinx() ) # twin the x-axis to make independent y-axes fig.subplots_adjust(right=0.6) right_additive = (0.98 - 0.6) / float(5) for i in range(len(sci_vars)): if i > 0: axes[i].spines['right'].set_position( ('axes', 1. + right_additive * i)) y = ds[sci_vars[i]] ind = cf.reject_outliers(y, 5) yD = y.data[ind] x = t[ind] #yD = y.data c = colors[i] axes[i].plot(x, yD, '.', markersize=2, color=c) axes[i].set_ylabel((y.name + " (" + y.units + ")"), color=c, fontsize=9) axes[i].tick_params(axis='y', colors=c) if i == len( sci_vars) - 1: # if the last variable has been plotted pf.format_date_axis(axes[i], fig) axes[0].set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) sfile = '_'.join((fname, 'timeseries')) pf.save_fig(save_dir, sfile)
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only, n_std, surface_params, depth_params): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list and 'ENG' not in rd: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join( (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets main_sensor = r.split('-')[-1] fdatasets_sel = cf.filter_collocated_instruments( main_sensor, fdatasets) for fd in fdatasets_sel: part_d = fd.split('/')[-1] print('\n{}'.format(part_d)) ds = xr.open_dataset(fd, mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( fd) array = subsite[0:2] sci_vars = cf.return_science_vars(stream) if 'CE05MOAS' in r or 'CP05MOAS' in r: # for coastal gliders, get m_water_depth for bathymetry eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng')) eng_url = [s for s in url_list if eng in s] if len(eng_url) == 1: eng_datasets = cf.get_nc_urls(eng_url) # filter out collocated datasets eng_dataset = [ j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1]) ] if len(eng_dataset) > 0: ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False) t_eng = ds_eng['time'].values m_water_depth = ds_eng['m_water_depth'].values # m_altimeter_status = 0 means a good reading (not nan or -1) eng_ind = ds_eng['m_altimeter_status'].values == 0 m_water_depth = m_water_depth[eng_ind] t_eng = t_eng[eng_ind] else: print('No engineering file for deployment {}'.format( deployment)) m_water_depth = None t_eng = None else: m_water_depth = None t_eng = None else: m_water_depth = None t_eng = None if deployment_num is not None: if int(deployment.split('0')[-1]) is not deployment_num: print(type(int(deployment.split('0')[-1])), type(deployment_num)) continue if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})'. format(start_time, end_time)) continue stime = start_time.strftime('%Y-%m-%d') etime = end_time.strftime('%Y-%m-%d') ext = stime + 'to' + etime # .join((ds0_method, ds1_method save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment, ext) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment, ext) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment, ext) else: save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment) tm = ds['time'].values try: ds_lat = ds['lat'].values except KeyError: ds_lat = None print('No latitude variable in file') try: ds_lon = ds['lon'].values except KeyError: ds_lon = None print('No longitude variable in file') # get pressure variable y, y_units, press = cf.add_pressure_to_dictionary_of_sci_vars(ds) for sv in sci_vars: print(sv) if 'pressure' not in sv: z = ds[sv].values fv = ds[sv]._FillValue sv_units = ds[sv].units # Check if the array is all NaNs if sum(np.isnan(z)) == len(z): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z[z != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # reject erroneous data dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \ cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon) # get rid of 0.0 data if 'CTD' in r: ind = zpressure > 0.0 else: ind = ndata > 0.0 lenzero = np.sum(~ind) dtime = dtime[ind] zpressure = zpressure[ind] ndata = ndata[ind] if ds_lat is not None and ds_lon is not None: lat = lat[ind] lon = lon[ind] else: lat = None lon = None t0 = pd.to_datetime( dtime.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime( dtime.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1 # reject time range from data portal file export t_portal, z_portal, y_portal, lat_portal, lon_portal = \ cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon) print( 'removed {} data points using visual inspection of data' .format(len(ndata) - len(z_portal))) # create data groups columns = ['tsec', 'dbar', str(sv)] # min_r = int(round(min(y_portal) - zcell_size)) # max_r = int(round(max(y_portal) + zcell_size)) # ranges = list(range(min_r, max_r, zcell_size)) #ranges = [0, 10, 20, 30, 40, 50, 60, 70, 80, 200] range1 = list( range(surface_params[0], surface_params[1], surface_params[2])) range2 = list( range(depth_params[0], depth_params[1] + depth_params[2], depth_params[2])) ranges = range1 + range2 groups, d_groups = gt.group_by_depth_range( t_portal, y_portal, z_portal, columns, ranges) if 'scatter' in sv: n_std = None # to use percentile else: n_std = n_std # get percentile analysis for printing on the profile plot inpercentile = [surface_params[3]] * len( range1) + [depth_params[3]] * len(range2) n_std = [surface_params[3]] * len( range1) + [depth_params[3]] * len(range2) y_plt, n_med, n_min, n_max, n0_std, n1_std, l_arr, time_ex = reject_timestamps_in_groups( groups, d_groups, n_std, inpercentile) """ Plot all data """ if len(tm) > 0: cf.create_dir(save_dir_profile) cf.create_dir(save_dir_xsection) sname = '-'.join((r, method, sv)) sfileall = '_'.join(('all_data', sname)) ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" clabel = 'Time' fig, ax = pf.plot_profiles(z, y, tm, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) fig.tight_layout() pf.save_fig(save_dir_profile, sfileall) ''' xsection plot ''' clabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" fig, ax, bar = pf.plot_xsection(subsite, tm, y, z, clabel, ylabel, t_eng, m_water_depth, inpercentile=None, stdev=None) ax.set_title(title, fontsize=9) fig.tight_layout() pf.save_fig(save_dir_xsection, sfileall) """ Plot cleaned-up data """ if len(dtime) > 0: sfile = '_'.join(('rm_erroneous_data', sname)) ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" clabel = 'Time' fig, ax = pf.plot_profiles(z_portal, y_portal, t_portal, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) ax.plot(n_med, y_plt, '.k') ax.fill_betweenx(y_plt, n0_std, n1_std, color='m', alpha=0.2) leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nexcluded {} suspect data points when inspected visually' .format(len(ndata) - len(z_portal)) + '\n(black) data median in {} dbar segments (break at {} dbar)' .format([surface_params[2], depth_params[2]], depth_params[0]) + '\n(magenta) upper and lower {} percentile envelope in {} dbar segments' .format( [surface_params[3], depth_params[3]], [surface_params[2], depth_params[2]]), ) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_profile, sfile) ''' xsection plot ''' clabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" # plot non-erroneous data fig, ax, bar = pf.plot_xsection(subsite, t_portal, y_portal, z_portal, clabel, ylabel, t_eng, m_water_depth, inpercentile=None, stdev=None) ax.set_title(title, fontsize=9) leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nexcluded {} suspect data points when inspected visually' .format(len(ndata) - len(z_portal)), ) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_xsection, sfile) ''' 4D plot for gliders only ''' if 'MOAS' in r: if ds_lat is not None and ds_lon is not None: cf.create_dir(save_dir_4d) clabel = sv + " (" + sv_units + ")" zlabel = press[0] + " (" + y_units[0] + ")" fig = plt.figure() ax = fig.add_subplot(111, projection='3d') sct = ax.scatter(lon_portal, lat_portal, y_portal, c=z_portal, s=2) cbar = plt.colorbar(sct, label=clabel, extend='both') cbar.ax.tick_params(labelsize=8) ax.invert_zaxis() ax.view_init(25, 32) ax.invert_xaxis() ax.invert_yaxis() ax.set_zlabel(zlabel, fontsize=9) ax.set_ylabel('Latitude', fontsize=9) ax.set_xlabel('Longitude', fontsize=9) ax.set_title(title, fontsize=9) pf.save_fig(save_dir_4d, sfile)
def main(sDir, url_list, start_time, end_time, preferred_only): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list and 'PRESF' in rd: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) for ud in udatasets: # filter out collocated data files if 'PRESF' in ud.split('/')[-1]: datasets.append(ud) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join( (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets fdatasets = np.unique(fdatasets).tolist() for fd in fdatasets: ds = xr.open_dataset(fd, mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})'. format(start_time, end_time)) continue fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( fd) sci_vars = cf.return_science_vars(stream) print('\nPlotting {} {}'.format(r, deployment)) array = subsite[0:2] filename = '_'.join(fname.split('_')[:-1]) save_dir = os.path.join(sDir, array, subsite, refdes, 'timeseries_plots', deployment) cf.create_dir(save_dir) tm = ds['time'].values t0 = pd.to_datetime(tm.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(tm.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) for var in sci_vars: print(var) if var != 'id': #if var == 'presf_wave_burst_pressure': y = ds[var] fv = y._FillValue if len(y.dims) == 1: # Check if the array is all NaNs if sum(np.isnan(y.values)) == len(y.values): print('Array of all NaNs - skipping plot.') # Check if the array is all fill values elif len(y[y != fv]) == 0: print('Array of all fill values - skipping plot.') else: # reject fill values ind = y.values != fv t = tm[ind] y = y[ind] # Plot all data fig, ax = pf.plot_timeseries(t, y, y.name, stdev=None) ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) sfile = '-'.join((filename, y.name, t0[:10])) pf.save_fig(save_dir, sfile) # Plot data with outliers removed fig, ax = pf.plot_timeseries(t, y, y.name, stdev=5) ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) sfile = '-'.join( (filename, y.name, t0[:10])) + '_rmoutliers' pf.save_fig(save_dir, sfile) else: v = y.values.T n_nan = np.sum(np.isnan(v)) # convert fill values to nans try: v[v == fv] = np.nan except ValueError: v = v.astype(float) v[v == fv] = np.nan n_fv = np.sum(np.isnan(v)) - n_nan # plot before global ranges are removed fig, ax = pf.plot_presf_2d(tm, v, y.name, y.units) ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) sfile = '-'.join((filename, var, t0[:10])) pf.save_fig(save_dir, sfile) # reject data outside of global ranges [g_min, g_max] = cf.get_global_ranges(r, var) if g_min is not None and g_max is not None: v[v < g_min] = np.nan v[v > g_max] = np.nan n_grange = np.sum(np.isnan(v)) - n_fv - n_nan if n_grange > 0: # don't plot if the array is all nans if len(np.unique( np.isnan(v))) == 1 and np.unique( np.isnan(v))[0] == True: continue else: # plot after global ranges are removed fig, ax = pf.plot_presf_2d( tm, v, y.name, y.units) title2 = 'removed: {} global ranges [{}, {}]'.format( n_grange, g_min, g_max) ax.set_title((title + '\n' + t0 + ' - ' + t1 + '\n' + title2), fontsize=9) sfile = '-'.join( (filename, var, t0[:10], 'rmgr')) pf.save_fig(save_dir, sfile)
def plot_map(save_directory, savefile, plt_title, londata, latdata, tm, array, bfiles, plt_type=None, add_box=None): #ax = plt.axes(projection=ccrs.PlateCarree()) fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(projection=ccrs.PlateCarree())) plt.subplots_adjust(right=0.85) states = cfeature.NaturalEarthFeature(category="cultural", scale="10m", facecolor="none", name="admin_1_states_provinces_shp") ax.add_feature(states, linewidth=.5, edgecolor="black", facecolor='grey') ax.add_feature(cfeature.RIVERS, zorder=10, facecolor='white') #gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True, linewidth=.5, color='gray', alpha=0.5, linestyle='--') gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True) gl.xlabels_top = False gl.ylabels_right = False gl.xlines = False gl.ylines = False # gl.xlabel_style = {'size': 14.5} # gl.ylabel_style = {'size': 14.5} ax.coastlines('10m', linewidth=1) array_loc = cf.return_array_subsites_standard_loc(array) ax.set_title(plt_title, fontsize=10) if array == 'CE': if add_box == 'yes': ax = plot_glider_box(ax, array) else: lonmin, lonmax = define_extent(array_loc.lon, londata, 'lon') latmin, latmax = define_extent(array_loc.lat, latdata, 'lat') lims = [lonmin, lonmax, latmin, latmax] ax.set_extent(lims, crs=ccrs.PlateCarree()) gf = os.path.join(bfiles, 'GMRTv3_6_20190510topo_CE.grd') grid_file = xr.open_dataset(gf) bathy_contours = [-3000, -2500, -2000, -1500, -1000, -50, 0] else: if array == 'CP': lims = [-72.5, -69.5, 38.5, 42] gf = os.path.join(bfiles, 'GMRTv3_6_20190510topo_CP.grd') grid_file = xr.open_dataset(gf) bathy_contours = [-3000, -2500, -2000, -1500, -1000, -50, 0] else: if plt_type == 'glider_track_drift': lonmin, lonmax = define_extent(array_loc.lon, londata, 'lon') latmin, latmax = define_extent(array_loc.lat, latdata, 'lat') lims = [lonmin, lonmax, latmin, latmax] gf = None else: if array == 'GA': lims = [-43.5, -41.5, -43.5, -42] gf = os.path.join(bfiles, 'GMRTv3_6_20190510topo_GA.grd') grid_file = xr.open_dataset(gf) bathy_contours = [-5500, -5400, -5300, -5200, -5100, -5000] elif array == 'GI': lims = [-40.1, -39, 59.2, 60.3] gf = os.path.join(bfiles, 'GMRTv3_6_20190510topo_GI.grd') grid_file = xr.open_dataset(gf) bathy_contours = [-3500, -3250, -3000, -2750, -2500, -2250, -2000] elif array == 'GP': gf = os.path.join(bfiles, 'GMRTv3_6_20190513topo_GP.grd') grid_file = xr.open_dataset(gf) bathy_contours = [-4500, -4250, -4000, -3750, -3500, -3250, -3000] lims = [-145.1, -143.95, 49.7, 50.6] elif array == 'GS': gf = os.path.join(bfiles, 'GMRTv3_6_20190513topo_GS.grd') grid_file = xr.open_dataset(gf) bathy_contours = [-5500, -5000, -4500, -4000, -3500, -3000, -2500, -2000] lims = [-89.95, -88.65, -54.8, -53.7] ax.set_extent(lims, crs=ccrs.PlateCarree()) ax = plot_glider_box(ax, array) if gf: gf_lon = grid_file['lon'] gf_lat = grid_file['lat'] lon_ind = np.logical_and(gf_lon > ax.get_xlim()[0], gf_lon < ax.get_xlim()[1]) lat_ind = np.logical_and(gf_lat > ax.get_ylim()[0], gf_lat < ax.get_ylim()[1]) bathy = grid_file['altitude'][lat_ind, lon_ind].values CS = ax.contour(gf_lon[lon_ind], gf_lat[lat_ind], bathy, bathy_contours, colors='gray', linewidths=0.5, alpha=0.5) ax.clabel(CS, inline=1, fontsize=8, fmt='%.0f') #h = ax.pcolormesh(xx, yy, bathy, cmap='Blues_r', linewidth=0, rasterized=True) #h = ax.pcolor(grid_file['altitude'], cmap='Blues_r', alpha=.1) sct = plt.scatter(londata, latdata, c=tm, marker='.', s=2, cmap='rainbow', transform=ccrs.Geodetic()) plt.scatter(array_loc.lon, array_loc.lat, s=45, marker='x', color='k') divider = make_axes_locatable(ax) cax = divider.new_horizontal(size='5%', pad=0.1, axes_class=plt.Axes) fig.add_axes(cax) cbar = plt.colorbar(sct, cax=cax, label='Time') cbar.ax.set_yticklabels(pd.to_datetime(cbar.ax.get_yticks()).strftime(date_format='%Y-%m-%d')) pf.save_fig(save_directory, savefile)
def main(files, out): """ files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file. out: Directory to save plots """ fname, ext = os.path.splitext(files) if ext in '.nc': list_files = [files] elif ext in '.ncml': list_files = [files] else: list_files = read_file(files) stream_vars = pf.load_variable_dict( var='eng') # load engineering variables # for nc in list_files: # print nc # the engine that xarray uses can be changed as specified here # http://xarray.pydata.org/en/stable/generated/xarray.open_dataset.html#xarray.open_dataset with xr.open_mfdataset(list_files, engine='netcdf4') as ds_disk: # change dimensions from 'obs' to 'time' ds_disk = ds_disk.swap_dims({'obs': 'time'}) ds_variables = ds_disk.data_vars.keys() # List of dataset variables stream = ds_disk.stream # List stream name associated with the data title_pre = mk_str(ds_disk.attrs, 't') # , var, tt0, tt1, 't') save_pre = mk_str(ds_disk.attrs, 's') # , var, tt0, tt1, 's') save_dir = os.path.join(out, ds_disk.subsite, ds_disk.node, ds_disk.stream, 'pcolor') cf.create_dir(save_dir) # t0, t1 = cf.get_rounded_start_and_end_times(ds_disk['time'].data) # tI = t0 + t1 - (t0 / 2) # time_list = [[t0, t1], [t0, tI], [tI, t1]] # time_list = [[t0, t1]] # for period in time_list: # tt0 = period[0] # tt1 = period[1] # sub_ds = ds_disk.sel(time=slice(str(tt0), str(tt1))) bins = ds_disk['bin_depths'] north = ds_disk['northward_seawater_velocity'] east = ds_disk['eastward_seawater_velocity'] # up = ds_disk['upward_seawater_velocity'] # error = ds_disk['error_velocity'] time = dict(data=ds_disk['time'].data, info=dict(label=ds_disk['time'].standard_name, units='GMT')) bins = dict(data=bins.data.T, info=dict(label=bins.long_name, units=bins.units)) north = dict(data=north.data.T, info=dict(label=north.long_name, units=north.units)) east = dict(data=east.data.T, info=dict(label=east.long_name, units=east.units)) # up = dict(data=up.data.T, info=dict(label=up.long_name, units=up.units)) # error = dict(data=error.data.T, info=dict(label=error.long_name, units=error.units)) sname = save_pre + 'ADCP' title = title_pre fig, axs = pf.adcp(time, bins, north, east, title) pf.resize(width=12, height=8.5) # Resize figure pf.save_fig(save_dir, sname, res=250) # Save figure plt.close('all')
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only, n_std, inpercentile, zcell_size, zdbar): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list and 'ENG' not in rd and 'ADCP' not in rd: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] deployments = [] for url in url_list: splitter = url.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) catalog_rms = '-'.join((r, splitter[-2], splitter[-1])) if rd_check == r: udatasets = cf.get_nc_urls([url]) for u in udatasets: # filter out collocated data files if catalog_rms == u.split('/')[-1].split('_20')[0][15:]: datasets.append(u) deployments.append( int(u.split('/')[-1].split('_')[0][-4:])) deployments = np.unique(deployments).tolist() fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join( (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets main_sensor = r.split('-')[-1] fdatasets_sel = cf.filter_collocated_instruments( main_sensor, fdatasets) for dep in deployments: if deployment_num is not None: if dep is not deployment_num: print('\nskipping deployment {}'.format(dep)) continue rdatasets = [ s for s in fdatasets_sel if 'deployment%04d' % dep in s ] rdatasets.sort() if len(rdatasets) > 0: sci_vars_dict = {} # rdatasets = rdatasets[0:2] #### for testing for i in range(len(rdatasets)): ds = xr.open_dataset(rdatasets[i], mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) print('\nAppending data from {}: file {} of {}'.format( 'deployment%04d' % dep, i + 1, len(rdatasets))) array = r[0:2] subsite = r.split('-')[0] if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})' .format(start_time, end_time)) continue stime = start_time.strftime('%Y-%m-%d') etime = end_time.strftime('%Y-%m-%d') ext = stime + 'to' + etime # .join((ds0_method, ds1_method save_dir_profile = os.path.join( sDir, array, subsite, r, 'profile_plots', 'deployment%04d' % dep, ext) save_dir_xsection = os.path.join( sDir, array, subsite, r, 'xsection_plots', 'deployment%04d' % dep, ext) else: save_dir_profile = os.path.join( sDir, array, subsite, r, 'profile_plots', 'deployment%04d' % dep) save_dir_xsection = os.path.join( sDir, array, subsite, r, 'xsection_plots', 'deployment%04d' % dep) if len(sci_vars_dict) == 0: fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( rdatasets[0]) sci_vars = cf.return_science_vars(stream) if 'CTDPF' not in r: sci_vars.append('int_ctd_pressure') sci_vars.append('time') sci_vars = list(np.unique(sci_vars)) # initialize the dictionary for sci_var in sci_vars: if sci_var == 'time': sci_vars_dict.update({ sci_var: dict(values=np.array([], dtype=np.datetime64), units=[], fv=[]) }) else: sci_vars_dict.update({ sci_var: dict(values=np.array([]), units=[], fv=[]) }) # append data for the deployment into the dictionary for s_v in sci_vars_dict.keys(): vv = ds[s_v] try: if vv.units not in sci_vars_dict[s_v]['units']: sci_vars_dict[s_v]['units'].append(vv.units) except AttributeError: print('') try: if vv._FillValue not in sci_vars_dict[s_v]['fv']: sci_vars_dict[s_v]['fv'].append(vv._FillValue) vv_data = vv.values try: vv_data[ vv_data == vv. _FillValue] = np.nan # turn fill values to nans except ValueError: print('') except AttributeError: print('') if len(vv.dims) > 1: print('Skipping plot: variable has >1 dimension') else: sci_vars_dict[s_v]['values'] = np.append( sci_vars_dict[s_v]['values'], vv.values) # plot after appending all data into one file data_start = pd.to_datetime( min(sci_vars_dict['time']['values'])).strftime( '%Y-%m-%dT%H:%M:%S') data_stop = pd.to_datetime(max( sci_vars_dict['time']['values'])).strftime( '%Y-%m-%dT%H:%M:%S') time1 = sci_vars_dict['time']['values'] ds_lat1 = np.empty(np.shape(time1)) ds_lon1 = np.empty(np.shape(time1)) # define pressure variable try: pname = 'seawater_pressure' press = sci_vars_dict[pname] except KeyError: pname = 'int_ctd_pressure' press = sci_vars_dict[pname] y1 = press['values'] try: y_units = press['units'][0] except IndexError: y_units = '' for sv in sci_vars_dict.keys(): print('') print(sv) if sv not in [ 'seawater_pressure', 'int_ctd_pressure', 'time' ]: z1 = sci_vars_dict[sv]['values'] fv = sci_vars_dict[sv]['fv'][0] sv_units = sci_vars_dict[sv]['units'][0] # Check if the array is all NaNs if sum(np.isnan(z1)) == len(z1): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z1[z1 != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # remove unreasonable pressure data (e.g. for surface piercing profilers) if zdbar: po_ind = (0 < y1) & (y1 < zdbar) tm = time1[po_ind] y = y1[po_ind] z = z1[po_ind] ds_lat = ds_lat1[po_ind] ds_lon = ds_lon1[po_ind] else: tm = time1 y = y1 z = z1 ds_lat = ds_lat1 ds_lon = ds_lon1 # reject erroneous data dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \ cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon) # get rid of 0.0 data # if sv == 'salinity': # ind = ndata > 20 # elif sv == 'density': # ind = ndata > 1010 # elif sv == 'conductivity': # ind = ndata > 2 # else: # ind = ndata > 0 # if sv == 'sci_flbbcd_chlor_units': # ind = ndata < 7.5 # elif sv == 'sci_flbbcd_cdom_units': # ind = ndata < 25 # else: # ind = ndata > 0.0 if 'CTD' in r: ind = zpressure > 0.0 else: ind = ndata > 0.0 lenzero = np.sum(~ind) dtime = dtime[ind] zpressure = zpressure[ind] ndata = ndata[ind] if ds_lat is not None and ds_lon is not None: lat = lat[ind] lon = lon[ind] else: lat = None lon = None if len(dtime) > 0: # reject time range from data portal file export t_portal, z_portal, y_portal, lat_portal, lon_portal = \ cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon) print( 'removed {} data points using visual inspection of data' .format(len(ndata) - len(z_portal))) # create data groups # if len(y_portal) > 0: # columns = ['tsec', 'dbar', str(sv)] # min_r = int(round(np.nanmin(y_portal) - zcell_size)) # max_r = int(round(np.nanmax(y_portal) + zcell_size)) # ranges = list(range(min_r, max_r, zcell_size)) # # groups, d_groups = gt.group_by_depth_range(t_portal, y_portal, z_portal, columns, ranges) # # if 'scatter' in sv: # n_std = None # to use percentile # else: # n_std = n_std # # # get percentile analysis for printing on the profile plot # y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups( # groups, d_groups, n_std, inpercentile) """ Plot all data """ if len(time1) > 0: cf.create_dir(save_dir_profile) cf.create_dir(save_dir_xsection) sname = '-'.join((r, method, sv)) # sfileall = '_'.join(('all_data', sname, pd.to_datetime(time1.min()).strftime('%Y%m%d'))) # tm0 = pd.to_datetime(time1.min()).strftime('%Y-%m-%dT%H:%M:%S') # tm1 = pd.to_datetime(time1.max()).strftime('%Y-%m-%dT%H:%M:%S') sfileall = '_'.join( (sname, pd.to_datetime( t_portal.min()).strftime('%Y%m%d'))) tm0 = pd.to_datetime(t_portal.min()).strftime( '%Y-%m-%dT%H:%M:%S') tm1 = pd.to_datetime(t_portal.max()).strftime( '%Y-%m-%dT%H:%M:%S') title = ' '.join( (deployment, refdes, method)) + '\n' + tm0 + ' to ' + tm1 if 'SPKIR' in r: title = title + '\nWavelength = 510 nm' ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = pname + " (" + y_units + ")" clabel = 'Time' # fig, ax = pf.plot_profiles(z1, y1, time1, ylabel, xlabel, clabel, stdev=None) fig, ax = pf.plot_profiles(z_portal, y_portal, t_portal, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) fig.tight_layout() pf.save_fig(save_dir_profile, sfileall) ''' xsection plot ''' clabel = sv + " (" + sv_units + ")" ylabel = pname + " (" + y_units + ")" # fig, ax, bar = pf.plot_xsection(subsite, time1, y1, z1, clabel, ylabel, t_eng=None, # m_water_depth=None, inpercentile=None, stdev=None) fig, ax, bar = pf.plot_xsection( subsite, t_portal, y_portal, z_portal, clabel, ylabel, t_eng=None, m_water_depth=None, inpercentile=None, stdev=None) if fig: ax.set_title(title, fontsize=9) fig.tight_layout() pf.save_fig(save_dir_xsection, sfileall) """
def main(url_list, sDir, plot_type, start_time, end_time, deployment_num): for i, u in enumerate(url_list): elements = u.split('/')[-2].split('-') r = '-'.join((elements[1], elements[2], elements[3], elements[4])) ms = u.split(r + '-')[1].split('/')[0] subsite = r.split('-')[0] array = subsite[0:2] main_sensor = r.split('-')[-1] datasets = cf.get_nc_urls([u]) datasets_sel = cf.filter_collocated_instruments(main_sensor, datasets) save_dir = os.path.join(sDir, array, subsite, r, plot_type) cf.create_dir(save_dir) sname = '-'.join((r, ms, 'track')) print('Appending....') sh = pd.DataFrame() deployments = [] end_times = [] for ii, d in enumerate(datasets_sel): print('\nDataset {} of {}: {}'.format(ii + 1, len(datasets_sel), d.split('/')[-1])) ds = xr.open_dataset(d, mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})'. format(start_time, end_time)) continue fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( d) if deployment_num is not None: if int(deployment.split('0')[-1]) is not deployment_num: print(type(int(deployment.split('0')[-1])), type(deployment_num)) continue # get end times of deployments ps_df, n_streams = cf.get_preferred_stream_info(r) dr_data = cf.refdes_datareview_json(r) for index, row in ps_df.iterrows(): deploy = row['deployment'] deploy_info = cf.get_deployment_information( dr_data, int(deploy[-4:])) if int(deploy[-4:]) not in deployments: deployments.append(int(deploy[-4:])) if pd.to_datetime(deploy_info['stop_date']) not in end_times: end_times.append(pd.to_datetime(deploy_info['stop_date'])) data = {'lat': ds['lat'].values, 'lon': ds['lon'].values} new_r = pd.DataFrame(data, columns=['lat', 'lon'], index=ds['time'].values) sh = sh.append(new_r) xD = sh.lon.values yD = sh.lat.values tD = sh.index.values clabel = 'Time' ylabel = 'Latitude' xlabel = 'Longitude' fig, ax = pf.plot_profiles(xD, yD, tD, ylabel, xlabel, clabel, end_times, deployments, stdev=None) ax.invert_yaxis() ax.set_title('Glider Track - ' + r + '\n' + 'x: platform location', fontsize=9) ax.set_xlim(-71.75, -69.75) ax.set_ylim(38.75, 40.75) #cbar.ax.set_yticklabels(end_times) # add Pioneer glider sampling area ax.add_patch( Rectangle((-71.5, 39.0), 1.58, 1.67, linewidth=3, edgecolor='b', facecolor='none')) ax.text(-71, 40.6, 'Pioneer Glider Sampling Area', color='blue', fontsize=8) # add Pioneer AUV sampling area # ax.add_patch(Rectangle((-71.17, 39.67), 0.92, 1.0, linewidth=3, edgecolor='m', facecolor='none')) array_loc = cf.return_array_subsites_standard_loc(array) ax.scatter(array_loc.lon, array_loc.lat, s=40, marker='x', color='k', alpha=0.3) #ax.legend(legn, array_loc.index, scatterpoints=1, loc='lower left', ncol=4, fontsize=8) pf.save_fig(save_dir, sname)
def main(files, out, time_break, depth, start, end, interactive): """ files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file. out: Directory to save plots """ fname, ext = os.path.splitext(files) if ext in '.nc': list_files = [files] elif ext in '.ncml': list_files = [files] else: list_files = read_file(files) stream_vars = pf.load_variable_dict( var='eng') # load engineering variables for nc in list_files: print nc with xr.open_dataset(nc, mask_and_scale=False) as ds: # change dimensions from 'obs' to 'time' ds = ds.swap_dims({'obs': 'time'}) ds_variables = ds.data_vars.keys() # List of dataset variables stream = ds.stream # List stream name associated with the data title_pre = mk_str(ds.attrs, 't') # , var, tt0, tt1, 't') save_pre = mk_str(ds.attrs, 's') # , var, tt0, tt1, 's') platform = ds.subsite node = ds.node sensor = ds.sensor # save_dir = os.path.join(out,'xsection_depth_profiles') save_dir = os.path.join( out, ds.subsite, ds.subsite + '-' + ds.node + '-' + ds.sensor, ds.stream, 'xsection_depth_profiles') cf.create_dir(save_dir) misc = [ 'quality', 'string', 'timestamp', 'deployment', 'id', 'provenance', 'qc', 'time', 'mission', 'obs', 'volt', 'ref', 'sig', 'amp', 'rph', 'calphase', 'phase', 'therm', 'light' ] reg_ex = re.compile('|'.join(misc)) # keep variables that are not in the regular expression sci_vars = [s for s in ds_variables if not reg_ex.search(s)] if not time_break == None: times = np.unique(ds[time_break]) for t in times: time_ind = t == ds[time_break].data for var in sci_vars: x = dict(data=ds['time'].data[time_ind], info=dict(label='Time', units='GMT')) t0 = pd.to_datetime( x['data'].min()).strftime('%Y-%m-%dT%H%M%00') t1 = pd.to_datetime( x['data'].max()).strftime('%Y-%m-%dT%H%M%00') try: sci = ds[var] print var # sci = sub_ds[var] except UnicodeEncodeError: # some comments have latex characters ds[var].attrs.pop( 'comment') # remove from the attributes sci = ds[var] # or else the variable won't load y = dict(data=ds[depth].data[time_ind], info=dict(label='Pressure', units='dbar', var=var, platform=platform, node=node, sensor=sensor)) try: z_lab = sci.long_name except AttributeError: z_lab = sci.standard_name z = dict(data=sci.data[time_ind], info=dict(label=z_lab, units=str(sci.units), var=var, platform=platform, node=node, sensor=sensor)) title = title_pre + var # plot timeseries with outliers fig, ax = pf.depth_glider_cross_section(x, y, z, title=title) if interactive == True: fig.canvas.mpl_connect( 'pick_event', lambda event: pf.onpick3( event, x['data'], y['data'], z['data'])) plt.show() else: pf.resize(width=12, height=8.5) # Resize figure save_name = '{}-{}-{}_{}_{}-{}'.format( platform, node, sensor, var, t0, t1) pf.save_fig(save_dir, save_name, res=150) # Save figure plt.close('all') else: ds = ds.sel(time=slice(start, end)) for var in sci_vars: x = dict(data=ds['time'].data[:], info=dict(label='Time', units='GMT')) t0 = pd.to_datetime( x['data'].min()).strftime('%Y-%m-%dT%H%M%00') t1 = pd.to_datetime( x['data'].max()).strftime('%Y-%m-%dT%H%M%00') try: sci = ds[var] print var # sci = sub_ds[var] except UnicodeEncodeError: # some comments have latex characters ds[var].attrs.pop( 'comment') # remove from the attributes sci = ds[var] # or else the variable won't load y = dict(data=ds[depth].data[:], info=dict(label='Pressure', units='dbar', var=var, platform=platform, node=node, sensor=sensor)) try: z_lab = sci.long_name except AttributeError: z_lab = sci.standard_name z = dict(data=sci.data[:], info=dict(label=z_lab, units=sci.units, var=var, platform=platform, node=node, sensor=sensor)) title = title_pre + var # plot timeseries with outliers fig, ax = pf.depth_glider_cross_section( x, y, z, title=title, interactive=interactive) if interactive == True: fig.canvas.mpl_connect( 'pick_event', lambda event: pf.onpick3( event, x['data'], y['data'], z['data'])) plt.show() else: pf.resize(width=12, height=8.5) # Resize figure save_name = '{}-{}-{}_{}_{}-{}'.format( platform, node, sensor, var, t0, t1) pf.save_fig(save_dir, save_name, res=150) # Save figure plt.close('all')
def main(sDir, ncdir, start_time, end_time): rd_list = [ncdir.split('/')[-2]] for r in rd_list: print('\n{}'.format(r)) datasets = [] for root, dirs, files in os.walk(ncdir): for f in files: if f.endswith('.nc'): datasets.append(f) # for u in url_list: # splitter = u.split('/')[-2].split('-') # rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4])) # if rd_check == r: # udatasets = cf.get_nc_urls([u]) # datasets.append(udatasets) #datasets = list(itertools.chain(*datasets)) for fd in datasets: if '_blank' not in fd: ds = xr.open_dataset(os.path.join(ncdir, fd), mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) ds_vars = list(ds.data_vars.keys()) + [ x for x in ds.coords.keys() if 'pressure' in x ] # get pressure variable from coordinates #raw_vars = cf.return_raw_vars(ds_vars) if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})' .format(start_time, end_time)) continue fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( os.path.join(ncdir, fd)) if 'NUTNR' in refdes or 'VEL3D in refdes': vars = cf.return_science_vars(stream) else: vars = cf.return_raw_vars(ds_vars) print('\nPlotting {} {}'.format(r, deployment)) array = subsite[0:2] filename = '_'.join(fname.split('_')[:-1]) save_dir = os.path.join(sDir, array, subsite, refdes, 'timeseries_plots', deployment) cf.create_dir(save_dir) tm = ds['time'].values t0 = pd.to_datetime(tm.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(tm.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) for var in vars: print(var) if var not in ['id', 'record_type', 'unique_id']: # if var != 'id' y = ds[var] try: fv = y._FillValue except AttributeError: fv = np.nan if len(y.dims) == 1: # Check if the array is all NaNs y[y == fv] = np.nan # turn fill values to nans if sum(np.isnan(y.values)) == len(y.values): print( 'Array of all NaNs and/or fill values - skipping plot.' ) # Check if the array is all fill values # elif len(y[y != fv]) == 0: # print('Array of all fill values - skipping plot.') else: # reject fill values ind = y.values != fv t = tm[ind] y = y[ind] # Plot all data fig, ax = pf.plot_timeseries(t, y, y.name, stdev=None) ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) sfile = '-'.join((filename, y.name, t0[:10])) pf.save_fig(save_dir, sfile) # Plot data with outliers removed fig, ax = pf.plot_timeseries(t, y, y.name, stdev=5) ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) sfile = '-'.join((filename, y.name, t0[:10])) + '_rmoutliers' pf.save_fig(save_dir, sfile)
def main(sDir, url_list, start_time, end_time): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) main_sensor = r.split('-')[-1] fdatasets_sel = cf.filter_collocated_instruments(main_sensor, fdatasets) # get science variable long names from the Data Review Database #stream_sci_vars = cd.sci_var_long_names(r) if 'SPKIR' in r or 'PRESF' in r: # only get the main science variable for SPKIR stream_vars = cd.sci_var_long_names(r) else: stream_vars = var_long_names(r) # check if the science variable long names are the same for each stream and initialize empty arrays sci_vars_dict = cd.sci_var_long_names_check(stream_vars) # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) # build dictionary of science data from the preferred dataset for each deployment print('\nAppending data from files') et = [] sci_vars_dict, __, __ = cd.append_science_data(ps_df, n_streams, r, fdatasets_sel, sci_vars_dict, et, start_time, end_time) # get end times of deployments dr_data = cf.refdes_datareview_json(r) deployments = [] dend_times = [] for index, row in ps_df.iterrows(): deploy = row['deployment'] deploy_info = get_deployment_information(dr_data, int(deploy[-4:])) deployments.append(int(deploy[-4:])) dend_times.append(pd.to_datetime(deploy_info['stop_date'])) subsite = r.split('-')[0] array = subsite[0:2] save_dir = os.path.join(sDir, array, subsite, r, 'timeseries_plots_preferred_all') cf.create_dir(save_dir) print('\nPlotting data') for m, n in sci_vars_dict.items(): for sv, vinfo in n['vars'].items(): print(sv) if 'SPKIR' in r: fv_lst = np.unique(vinfo['fv']).tolist() if len(fv_lst) == 1: fill_value = fv_lst[0] else: print(fv_lst) print('No unique fill value for {}'.format(sv)) sv_units = np.unique(vinfo['units']).tolist() t = vinfo['t'] if len(t) > 1: data = vinfo['values'] [dd_data, g_min, g_max] = index_dataset_2d(r, 'spkir_abj_cspp_downwelling_vector', data, fill_value) t0 = pd.to_datetime(min(t)).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(max(t)).strftime('%Y-%m-%dT%H:%M:%S') deploy_final = vinfo['deployments'] deploy = list(np.unique(deploy_final)) deployments = [int(dd) for dd in deploy] sname = '-'.join((r, sv)) fig, ax = pf.plot_spkir(t, dd_data, sv, sv_units[0]) ax.set_title((r + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1 + '\n' + 'removed global ranges +/- [{} - {}]'.format(g_min, g_max)), fontsize=8) for etimes in dend_times: ax.axvline(x=etimes, color='k', linestyle='--', linewidth=.6) pf.save_fig(save_dir, sname) # plot each wavelength wavelengths = ['412nm', '443nm', '490nm', '510nm', '555nm', '620nm', '683nm'] for wvi in range(len(dd_data)): fig, ax = pf.plot_spkir_wv(t, dd_data[wvi], sv, sv_units[0], wvi) ax.set_title( (r + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1 + '\n' + 'removed global ranges +/- [{} - {}]'.format(g_min, g_max)), fontsize=8) for etimes in dend_times: ax.axvline(x=etimes, color='k', linestyle='--', linewidth=.6) snamewvi = '-'.join((sname, wavelengths[wvi])) pf.save_fig(save_dir, snamewvi) elif 'presf_abc_wave_burst' in m: fv_lst = np.unique(vinfo['fv']).tolist() if len(fv_lst) == 1: fill_value = fv_lst[0] else: print(fv_lst) print('No unique fill value for {}'.format(sv)) sv_units = np.unique(vinfo['units']).tolist() t = vinfo['t'] if len(t) > 1: data = vinfo['values'] [dd_data, g_min, g_max] = index_dataset_2d(r, 'presf_wave_burst_pressure', data, fill_value) t0 = pd.to_datetime(min(t)).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(max(t)).strftime('%Y-%m-%dT%H:%M:%S') deploy_final = vinfo['deployments'] deploy = list(np.unique(deploy_final)) deployments = [int(dd) for dd in deploy] sname = '-'.join((r, sv)) fig, ax = pf.plot_presf_2d(t, dd_data, sv, sv_units[0]) ax.set_title((r + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1 + '\n' + 'removed global ranges +/- [{} - {}]'.format(g_min, g_max)), fontsize=8) for etimes in dend_times: ax.axvline(x=etimes, color='k', linestyle='--', linewidth=.6) pf.save_fig(save_dir, sname) else: if type(vinfo['values']) != dict: # if the variable is not a 2D array if 'Spectra' not in sv: if len(vinfo['t']) < 1: print('no variable data to plot') else: sv_units = vinfo['units'][0] sv_name = vinfo['var_name'] t0 = pd.to_datetime(min(vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(max(vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S') x = vinfo['t'] y = vinfo['values'] # reject NaNs and values of 0.0 nan_ind = (~np.isnan(y)) & (y != 0.0) x_nonan = x[nan_ind] y_nonan = y[nan_ind] # reject fill values fv_ind = y_nonan != vinfo['fv'][0] x_nonan_nofv = x_nonan[fv_ind] y_nonan_nofv = y_nonan[fv_ind] # reject extreme values Ev_ind = cf.reject_extreme_values(y_nonan_nofv) y_nonan_nofv_nE = y_nonan_nofv[Ev_ind] x_nonan_nofv_nE = x_nonan_nofv[Ev_ind] # reject values outside global ranges: global_min, global_max = cf.get_global_ranges(r, sv_name) if any(e is None for e in [global_min, global_max]): y_nonan_nofv_nE_nogr = y_nonan_nofv_nE x_nonan_nofv_nE_nogr = x_nonan_nofv_nE else: gr_ind = cf.reject_global_ranges(y_nonan_nofv_nE, global_min, global_max) y_nonan_nofv_nE_nogr = y_nonan_nofv_nE[gr_ind] x_nonan_nofv_nE_nogr = x_nonan_nofv_nE[gr_ind] if len(y_nonan_nofv) > 0: if m == 'common_stream_placeholder': sname = '-'.join((r, sv)) else: sname = '-'.join((r, m, sv)) plt_deploy = [int(x) for x in list(np.unique(vinfo['deployments']))] # plot hourly averages for cabled and FDCHP data if 'streamed' in sci_vars_dict[list(sci_vars_dict.keys())[0]]['ms'][0] or 'FDCHP' in r: sname = '-'.join((sname, 'hourlyavg')) df = pd.DataFrame({'dfx': x_nonan_nofv_nE_nogr, 'dfy': y_nonan_nofv_nE_nogr}) dfr = df.resample('H', on='dfx').mean() # Plot all data fig, ax = pf.plot_timeseries_all(dfr.index, dfr['dfy'], sv, sv_units, stdev=None) ax.set_title((r + '\nDeployments: ' + str(plt_deploy) + '\n' + t0 + ' - ' + t1), fontsize=8) # if plotting a specific time range, plot deployment lines only for those deployments if type(start_time) == dt.datetime: for e in list(np.unique(vinfo['deployments'])): etime = dend_times[int(e) - 1] ax.axvline(x=etime, color='b', linestyle='--', linewidth=.6) else: for etime in dend_times: ax.axvline(x=etime, color='b', linestyle='--', linewidth=.6) pf.save_fig(save_dir, sname) else: # Plot all data fig, ax = pf.plot_timeseries_all(x_nonan_nofv, y_nonan_nofv, sv, sv_units, stdev=None) ax.set_title((r + '\nDeployments: ' + str(plt_deploy) + '\n' + t0 + ' - ' + t1), fontsize=8) # if plotting a specific time range, plot deployment lines only for those deployments if type(start_time) == dt.datetime: # for e in list(np.unique(vinfo['deployments'])): # etime = dend_times[int(e) - 1] # ax.axvline(x=etime, color='b', linestyle='--', linewidth=.6) etime = dend_times[int(list(np.unique(vinfo['deployments']))[0]) - 1] ax.axvline(x=etime, color='b', linestyle='--', linewidth=.6) else: for etime in dend_times: ax.axvline(x=etime, color='b', linestyle='--', linewidth=.6) # if not any(e is None for e in [global_min, global_max]): # ax.axhline(y=global_min, color='r', linestyle='--', linewidth=.6) # ax.axhline(y=global_max, color='r', linestyle='--', linewidth=.6) # else: # maxpoint = x[np.argmax(y_nonan_nofv)], max(y_nonan_nofv) # ax.annotate('No Global Ranges', size=8, # xy=maxpoint, xytext=(5, 5), textcoords='offset points') pf.save_fig(save_dir, sname) # Plot data with outliers removed fig, ax = pf.plot_timeseries_all(x_nonan_nofv_nE_nogr, y_nonan_nofv_nE_nogr, sv, sv_units, stdev=5) ax.set_title((r + '\nDeployments: ' + str(plt_deploy) + '\n' + t0 + ' - ' + t1), fontsize=8) # if plotting a specific time range, plot deployment lines only for those deployments if type(start_time) == dt.datetime: # for e in list(np.unique(vinfo['deployments'])): # etime = dend_times[int(e) - 1] # ax.axvline(x=etime, color='b', linestyle='--', linewidth=.6) etime = dend_times[int(list(np.unique(vinfo['deployments']))[0]) - 1] ax.axvline(x=etime, color='b', linestyle='--', linewidth=.6) else: for etime in dend_times: ax.axvline(x=etime, color='b', linestyle='--', linewidth=.6) # if not any(e is None for e in [global_min, global_max]): # ax.axhline(y=global_min, color='r', linestyle='--', linewidth=.6) # ax.axhline(y=global_max, color='r', linestyle='--', linewidth=.6) # else: # maxpoint = x[np.argmax(y_nonan_nofv_nE_nogr)], max(y_nonan_nofv_nE_nogr) # ax.annotate('No Global Ranges', size=8, # xy=maxpoint, xytext=(5, 5), textcoords='offset points') sfile = '_'.join((sname, 'rmoutliers')) pf.save_fig(save_dir, sfile)
def compare_plot_datasets(df, r, start_time, end_time, sDir, strm=None): names = df.columns for d, row in df.iterrows(): #if '0001' not in d: print('\n{}'.format(d)) for i, n in enumerate(names): ii = i + 1 if ii > 1: f1 = row[n] if type(f1) == float: continue elif type(f1) == list: for x in range(ii - 1): f0 = row[names[x]] if type(f0) == float: continue elif type(f0) == list: compare = '{} {}'.format(names[x], n) if len(f0) == 1: ds0 = xr.open_dataset(f0[0]) ds0 = ds0.swap_dims({'obs': 'time'}) else: ds0 = xr.open_mfdataset(f0) ds0 = ds0.swap_dims({'obs': 'time'}) ds0 = ds0.chunk({'time': 100}) splt0 = compare.split(' ')[0].split('-') ds0_sci_vars = cf.return_science_vars(splt0[1]) ds0_method = splt0[0] if start_time is not None and end_time is not None: ds0 = ds0.sel(time=slice(start_time, end_time)) if len(ds0['time'].values) == 0: print( 'No {} data to plot for specified time range: ({} to {})' .format(ds0_method, start_time, end_time)) continue if len(f1) == 1: ds1 = xr.open_dataset(f1[0]) ds1 = ds1.swap_dims({'obs': 'time'}) else: ds1 = xr.open_mfdataset(f1) ds1 = ds1.swap_dims({'obs': 'time'}) ds1 = ds1.chunk({'time': 100}) splt1 = compare.split(' ')[1].split('-') ds1_sci_vars = cf.return_science_vars(splt1[1]) ds1_method = splt1[0] if start_time is not None and end_time is not None: ds1 = ds1.sel(time=slice(start_time, end_time)) if len(ds1['time'].values) == 0: print( 'No {} data to plot for specified time range: ({} to {})' .format(ds1_method, start_time, end_time)) continue t0 = ds0['time'] t1 = ds1['time'] # find where the variable long names are the same ds0names = long_names(ds0, ds0_sci_vars) ds0names.rename(columns={'name': 'name_ds0'}, inplace=True) ds1names = long_names(ds1, ds1_sci_vars) ds1names.rename(columns={'name': 'name_ds1'}, inplace=True) mapping = pd.merge(ds0names, ds1names, on='long_name', how='inner') print('----------------------') print('{}: {}'.format(d, compare)) print('----------------------') subsite = r.split('-')[0] array = subsite[0:2] if start_time is not None and end_time is not None: stime = start_time.strftime('%Y-%m-%d') etime = end_time.strftime('%Y-%m-%d') ext = '-'.join( (d, compare) ) + '-' + stime + 'to' + etime #.join((ds0_method, ds1_method save_dir = os.path.join( sDir, array, subsite, r, 'method_compare_plots', ext) else: save_dir = os.path.join( sDir, array, subsite, r, 'method_compare_plots', '-'.join( (ds0_method, ds1_method))) cf.create_dir(save_dir) for rr in mapping.itertuples(): index, name_ds0, long_name, name_ds1 = rr print(long_name) ds0_var = ds0[name_ds0] ds1_var = ds1[name_ds1] # reject NaNs nan0_ind = ~np.isnan(ds0_var.data) ds0_nonan = ds0_var.data[nan0_ind] nan1_ind = ~np.isnan(ds1_var.data) ds1_nonan = ds1_var.data[nan1_ind] # only plot if both arrays have data if len(ds0_nonan) > 0 and len(ds1_nonan) > 0: # Plot all data fig, ax = pf.plot_timeseries_compare( t0, t1, ds0_var, ds1_var, ds0_method, ds1_method, long_name, stdev=None) title = ' '.join((d, r, '{} vs {}'.format( ds0_method, ds1_method))) ax.set_title(title, fontsize=9) if strm: sfile = '_'.join( (d, r, long_name, strm)) else: sfile = '_'.join((d, r, long_name)) pf.save_fig(save_dir, sfile) # Plot data with outliers removed fig, ax = pf.plot_timeseries_compare( t0, t1, ds0_var, ds1_var, ds0_method, ds1_method, long_name, stdev=5) title = ' '.join((d, r, '{} vs {}'.format( ds0_method, ds1_method))) ax.set_title(title, fontsize=9) if strm: sfile = '_'.join((d, r, long_name, strm, 'rmoutliers')) else: sfile = '_'.join( (d, r, long_name, 'rmoutliers')) pf.save_fig(save_dir, sfile)
def main(nc, directory, out, time_break, breakdown): """ files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file. out: Directory to save plots """ list_files = directory + "/*.nc" # list_files = ['https://opendap.oceanobservatories.org/thredds/dodsC/ooi/friedrich-knuth-gmail/20170322T191659-RS03AXPS-PC03A-4A-CTDPFA303-streamed-ctdpf_optode_sample/deployment0003_RS03AXPS-PC03A-4A-CTDPFA303-streamed-ctdpf_optode_sample_20170312T000000.426102-20170322T190000.059973.nc', # 'https://opendap.oceanobservatories.org/thredds/dodsC/ooi/friedrich-knuth-gmail/20170322T191659-RS03AXPS-PC03A-4A-CTDPFA303-streamed-ctdpf_optode_sample/deployment0003_RS03AXPS-PC03A-4A-CTDPFA303-streamed-ctdpf_optode_sample_20161222T000000.132709-20170311T235959.426096.nc'] # print list_files stream_vars = pf.load_variable_dict(var='eng') # load engineering variables with xr.open_dataset(nc, mask_and_scale=False) as ds_ncfile: stream = ds_ncfile.stream # List stream name associated with the data title_pre = mk_str(ds_ncfile.attrs, 't') # , var, tt0, tt1, 't') save_pre = mk_str(ds_ncfile.attrs, 's') # , var, tt0, tt1, 's') platform = ds_ncfile.subsite node = ds_ncfile.node sensor = ds_ncfile.sensor # save_dir = os.path.join(out, platform, node, stream, 'xsection_depth_profiles') save_dir = os.path.join(out,'timeseries',breakdown) cf.create_dir(save_dir) with xr.open_mfdataset(list_files) as ds: # change dimensions from 'obs' to 'time' ds = ds.swap_dims({'obs': 'time'}) ds_variables = ds.data_vars.keys() # List of dataset variables # try: # eng = stream_vars[stream] # select specific streams engineering variables # except KeyError: # eng = [''] misc = ['quality', 'string', 'timestamp', 'deployment', 'id', 'provenance', 'qc', 'time', 'mission', 'obs', 'volt', 'ref', 'sig', 'amp', 'rph', 'calphase', 'phase', 'therm'] # reg_ex = re.compile('|'.join(eng+misc)) # make regular expression reg_ex = re.compile('|'.join(misc)) # keep variables that are not in the regular expression sci_vars = [s for s in ds_variables if not reg_ex.search(s)] # t0, t1 = pf.get_rounded_start_and_end_times(ds_disk['time'].data) # tI = (pd.to_datetime(t0) + (pd.to_datetime(t1) - pd.to_datetime(t0)) / 2) # time_list = [[t0, t1], [t0, tI], [tI, t1]] times = np.unique(ds[time_break]) for t in times: time_ind = t == ds[time_break].data for var in sci_vars: x = dict(data=ds['time'].data[time_ind], info=dict(label='Time', units='GMT')) t0 = pd.to_datetime(x['data'].min()).strftime('%Y-%m-%dT%H%M%00') t1 = pd.to_datetime(x['data'].max()).strftime('%Y-%m-%dT%H%M%00') try: sci = ds[var] print var # sci = sub_ds[var] except UnicodeEncodeError: # some comments have latex characters ds[var].attrs.pop('comment') # remove from the attributes sci = ds[var] # or else the variable won't load try: y_lab = sci.long_name except AttributeError: y_lab = sci.standard_name y = dict(data=sci.data[time_ind], info=dict(label=y_lab, units=str(sci.units), var=var, platform=platform, node=node, sensor=sensor)) title = title_pre + var # plot timeseries with outliers fig, ax = pf.auto_plot(x, y, title, stdev=None, line_style='.', g_range=True) pf.resize(width=12, height=8.5) # Resize figure save_name = '{}-{}-{}_{}_{}-{}'.format(platform, node, sensor, var, t0, t1) pf.save_fig(save_dir, save_name, res=150) # Save figure plt.close('all') # try: # y_lab = sci.standard_name # except AttributeError: # y_lab = var # y = dict(data=sci.data, info=dict(label=y_lab, units=sci.units)) # plot timeseries with outliers removed # fig, ax = pf.auto_plot(x, y, title, stdev=1, line_style='.', g_range=True) # pf.resize(width=12, height=8.5) # Resize figure # save_name = '{}-{}-{}_{}_{}-{}_outliers_removed'.format(platform, node, sensor, var, t0, t1) # pf.save_fig(save_dir, save_name, res=150) # Save figure # plt.close('all') del x, y
def main(sDir, url_list, start_time, end_time, deployment_num, interval): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] deployments = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) for ud in udatasets: if ud.split('/')[-1].split('_')[0] not in deployments: deployments.append(ud.split('/')[-1].split('_')[0]) datasets = list(itertools.chain(*datasets)) datasets = cf.filter_collocated_instruments(r, datasets) deployments.sort() fdatasets = np.unique(datasets).tolist() for deploy in deployments: if deployment_num is not None: if int(deploy[-4:]) is not deployment_num: print('\nskipping {}'.format(deploy)) continue rdatasets = [s for s in fdatasets if deploy in s] # break deployment into 4 segments or make a list of the time range specified if start_time is not None and end_time is not None: dt_range = [dt.datetime.strftime(start_time, '%Y-%m-%d'), dt.datetime.strftime(end_time, '%Y-%m-%d')] else: # Get deployment info from the data review database dr_data = cf.refdes_datareview_json(r) d_info = [x for x in dr_data['instrument']['deployments'] if x['deployment_number'] == int(deploy[-4:])] d_info = d_info[0] deploy_start = dt.datetime.strptime(str(d_info['start_date']).split('T')[0], '%Y-%m-%d') deploy_stop = dt.datetime.strptime(str(d_info['stop_date']).split('T')[0], '%Y-%m-%d') + dt.timedelta( days=1) dt_range = list(date_range(deploy_start, deploy_stop, 4)) sci_vars_dict = {'time': dict(values=np.array([], dtype=np.datetime64), fv=[], ln=[]), 'bin_depths': dict(values=np.array([]), units=[], fv=[], ln=[])} percentgood = {'percent_good_beam1': dict(values=np.array([])), 'percent_good_beam2': dict(values=np.array([])), 'percent_good_beam3': dict(values=np.array([])), 'percent_good_beam4': dict(values=np.array([]))} if interval is None: toplot = range(len(dt_range) - 1) else: toplot = [interval - 1] for dtri in toplot: stime = dt.datetime.strptime(dt_range[dtri], '%Y-%m-%d') etime = dt.datetime.strptime(dt_range[dtri + 1], '%Y-%m-%d') if len(rdatasets) > 0: for i in range(len(rdatasets)): #for i in range(0, 2): ##### for testing ds = xr.open_dataset(rdatasets[i], mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) print('\nAppending data from {}: file {} of {}'.format(deploy, i + 1, len(rdatasets))) ds = ds.sel(time=slice(stime, etime)) if len(ds['time'].values) == 0: print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time)) continue try: print(fname) except NameError: fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(rdatasets[0]) array = subsite[0:2] sci_vars = cf.return_science_vars(stream) # drop the following list of key words from science variables list sci_vars = notin_list(sci_vars, ['salinity', 'temperature', 'bin_depths', 'beam']) sci_vars = [name for name in sci_vars if ds[name].units != 'mm s-1'] for sci_var in sci_vars: sci_vars_dict.update({sci_var: dict(values=np.array([]), units=[], fv=[], ln=[])}) # append data for the deployment into a dictionary for s_v, info in sci_vars_dict.items(): print(s_v) vv = ds[s_v] try: if vv.units not in info['units']: info['units'].append(vv.units) except AttributeError: print('no units') try: if vv._FillValue not in info['fv']: info['fv'].append(vv._FillValue) except AttributeError: print('no fill value') try: if vv.long_name not in info['ln']: info['ln'].append(vv.long_name) except AttributeError: print('no long name') if len(vv.dims) == 1: info['values'] = np.append(info['values'], vv.values) else: if len(info['values']) == 0: info['values'] = vv.values.T else: info['values'] = np.concatenate((info['values'], vv.values.T), axis=1) # append percent good beams for j, k in percentgood.items(): pgvv = ds[j] fv_pgvv = pgvv._FillValue pgvv = pgvv.values.T.astype(float) pgvv[pgvv == fv_pgvv] = np.nan if len(k['values']) == 0: k['values'] = pgvv else: k['values'] = np.concatenate((k['values'], pgvv), axis=1) if len(sci_vars_dict['time']['values']) > 0: filename = '_'.join(fname.split('_')[:-1]) save_dir = os.path.join(sDir, array, subsite, refdes, 'plots', deployment) cf.create_dir(save_dir) tm = sci_vars_dict['time']['values'] t0 = pd.to_datetime(tm.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(tm.max()).strftime('%Y-%m-%dT%H:%M:%S') title_text = ' '.join((deployment, refdes, method)) bd = sci_vars_dict['bin_depths'] ylabel = 'bin_depths ({})'.format(bd['units'][0]) print('\nPlotting interval {}'.format(int(dtri) + 1)) for var in sci_vars: print('----{}'.format(var)) v = sci_vars_dict[var] fv = v['fv'][0] v_name = v['ln'][0] units = v['units'][0] if len(np.shape(v['values'])) == 1: v, n_nan, n_fv, n_ev, n_grange, g_min, g_max, n_std = reject_err_data_1_dims(v['values'], fv, r, var, n=5) if len(tm) > np.sum(np.isnan(v)): # only plot if the array contains values # Plot all data fig, ax = pf.plot_timeseries(tm, v, v_name, stdev=None) ax.set_title((title_text + '\n' + t0 + ' - ' + t1), fontsize=9) sfile = '-'.join((filename, v_name, t0[:10])) pf.save_fig(save_dir, sfile) # Plot data with outliers removed fig, ax = pf.plot_timeseries(tm, v, v_name, stdev=5) title_i = 'removed: {} nans, {} fill values, {} extreme values, {} GR [{}, {}],' \ ' {} outliers +/- 5 SD'.format(n_nan, n_fv , n_ev, n_grange, g_min, g_max, n_std) ax.set_title((title_text + '\n' + t0 + ' - ' + t1 + '\n' + title_i), fontsize=8) sfile = '-'.join((filename, v_name, t0[:10])) + '_rmoutliers' pf.save_fig(save_dir, sfile) else: print('Array of all nans - skipping plot') else: v, n_nan, n_fv, n_ev, n_bb, n_grange, g_min, g_max = reject_err_data_2_dims(v['values'], percentgood, fv, r, var) clabel = '{} ({})'.format(var, units) # check bin depths for extreme values y = bd['values'] # if all the values are negative, take the absolute value (cabled data bin depths are negative) if int(np.nanmin(y)) < 0 and int(np.nanmax(y)) < 0: y = abs(y) y_nan = np.sum(np.isnan(y)) y = np.where(y < 6000, y, np.nan) # replace extreme bin_depths by nans bin_nan = np.sum(np.isnan(y)) - y_nan bin_title = 'removed: {} bin depths > 6000'.format(bin_nan) if 'echo' in var: color = 'BuGn' else: color = 'RdBu' new_y = dropna(y, axis=1) # convert to DataFrame to drop nan y_mask = new_y.loc[list(new_y.index), list(new_y.columns)] v_new = pd.DataFrame(v) v_mask = v_new.loc[list(new_y.index), list(new_y.columns)] tm_mask = tm[new_y.columns] fig, ax, __ = pf.plot_adcp(tm_mask, np.array(y_mask), np.array(v_mask), ylabel, clabel, color, n_stdev=None) if bin_nan > 0: ax.set_title((title_text + '\n' + t0 + ' - ' + t1 + '\n' + bin_title), fontsize=8) else: ax.set_title((title_text + '\n' + t0 + ' - ' + t1), fontsize=8) sfile = '-'.join((filename, var, t0[:10])) pf.save_fig(save_dir, sfile) fig, ax, n_nans_all = pf.plot_adcp(tm_mask, np.array(y_mask), np.array(v_mask), ylabel, clabel, color, n_stdev=5) title_i = 'removed: {} nans, {} fill values, {} extreme values, {} bad beams, {} GR [{}, {}]'.format( n_nan, n_fv, n_ev, n_bb, n_grange, g_min, g_max) if bin_nan > 0: ax.set_title((title_text + '\n' + t0 + ' - ' + t1 + '\n' + title_i + '\n' + bin_title), fontsize=8) else: ax.set_title((title_text + '\n' + t0 + ' - ' + t1 + '\n' + title_i), fontsize=8) sfile = '-'.join((filename, var, t0[:10])) + '_rmoutliers' pf.save_fig(save_dir, sfile)
tel_long_name.append(tv_longname) # list of telemetered long names tel_df = pd.DataFrame({'tel_name': tel_name, 'long_name': tel_long_name}) mapping = pd.merge(rec_df, tel_df, on='long_name',how='inner') # map the recovered and telemetered names based on long name for row in mapping.itertuples(): index,long_name,rec_name,tel_name = row r_var = rec[rec_name] r_data = r_var.data t_var = tel[tel_name] t_data = t_var.data time_rec = rec['time'].data time_tel = tel['time'].data x1 = dict(data=time_rec, info=dict(platform=platform, node=node, sensor=sensor, units='GMT', label='Time', var=rec_name)) y1 = dict(data=r_data, info=dict(platform=platform, node=node, sensor=sensor, label=long_name, units=r_var.units, var=rec_name)) x2 = dict(data=time_tel, info=dict(platform=platform, node=node, sensor=sensor, units='GMT', label='Time', var=tel_name)) y2 = dict(data=t_data, info=dict(platform=platform, node=node, sensor=sensor, label=long_name, units=t_var.units, var=tel_name)) fig,ax = pf.compare_timeseries(x1, y1, x2, y2, g_range=True) title_text = '{}\nVariable: {}\ntelemetered ({}) vs {} ({})'.format(title, long_name, tel_name, rec_method, rec_name) plt.title(title_text, fontsize=10) pf.resize(width=12, height=8.5) # Resize figure save_name = '{}_{}'.format(title, long_name) pf.save_fig(save_dir, save_name, res=150) # Save figure plt.close('all')
info=dict(platform=platform, node=node, sensor=sensor, label=long_name, units=r_var.units, var=rec_name)) x2 = dict(data=time_tel, info=dict(platform=platform, node=node, sensor=sensor, units='GMT', label='Time', var=tel_name)) y2 = dict(data=t_data, info=dict(platform=platform, node=node, sensor=sensor, label=long_name, units=t_var.units, var=tel_name)) fig, ax = pf.compare_timeseries(x1, y1, x2, y2, g_range=True) title_text = '{}\nVariable: {}\ntelemetered ({}) vs {} ({})'.format( title, long_name, tel_name, rec_method, rec_name) plt.title(title_text, fontsize=10) pf.resize(width=12, height=8.5) # Resize figure save_name = '{}_{}'.format(title, long_name) pf.save_fig(save_dir, save_name, res=150) # Save figure plt.close('all')
def main(sDir, url_list, start_time, end_time, preferred_only): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join( (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets fdatasets = np.unique(fdatasets).tolist() for fd in fdatasets: ds = xr.open_dataset(fd, mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})'. format(start_time, end_time)) continue fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( fd) sci_vars = cf.return_science_vars(stream) print('\nPlotting {} {}'.format(r, deployment)) array = subsite[0:2] filename = '_'.join(fname.split('_')[:-1]) save_dir = os.path.join(sDir, array, subsite, refdes, 'timeseries_plots') cf.create_dir(save_dir) tm = ds['time'].values t0 = pd.to_datetime(tm.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(tm.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) # -------- plot entire deployment -------- for var in sci_vars: print(var) vv = ds[var] fv = vv._FillValue # need to round SPKIR values to 1 decimal place to match the global ranges. otherwise, values that # round to zero (e.g. 1.55294e-05) will be excluded by the global range test # v = np.round(vv.values.T, 1) # .T = transpose 2D array v = vv.values.T n_nan = np.sum(np.isnan(v)) # convert fill values to nans v[v == fv] = np.nan n_fv = np.sum(np.isnan(v)) - n_nan # plot before global ranges are removed fig, ax = pf.plot_spkir(tm, v, vv.name, vv.units) ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) sfile = '-'.join((filename, var, t0[:10])) pf.save_fig(save_dir, sfile) # reject data outside of global ranges [g_min, g_max] = cf.get_global_ranges(r, var) if g_min is not None and g_max is not None: v[v < g_min] = np.nan v[v > g_max] = np.nan n_grange = np.sum(np.isnan(v)) - n_fv - n_nan else: n_grange = 'no global ranges' # plot after global ranges are removed fig, ax = pf.plot_spkir(tm, v, vv.name, vv.units) title2 = 'removed: {} global ranges [{}, {}]'.format( n_grange, g_min, g_max) ax.set_title((title + '\n' + t0 + ' - ' + t1 + '\n' + title2), fontsize=9) sfile = '-'.join((filename, var, t0[:10], 'rmgr')) pf.save_fig(save_dir, sfile) # -------- break the deployment into months and plot -------- save_dir = os.path.join(sDir, array, subsite, refdes, 'timeseries_plots', 'monthly') cf.create_dir(save_dir) # create list of start and end dates dt_start = dt.datetime.strptime(t0, '%Y-%m-%dT%H:%M:%S') dt_end = dt.datetime.strptime(t1, '%Y-%m-%dT%H:%M:%S') start_dates = [dt_start.strftime('%m-%d-%YT00:00:00')] end_dates = [] ts1 = dt_start while ts1 <= dt_end: ts2 = ts1 + dt.timedelta(days=1) if ts2.month != ts1.month: start_dates.append(ts2.strftime('%m-%d-%YT00:00:00')) end_dates.append(ts1.strftime('%m-%d-%YT23:59:59')) ts1 = ts2 end_dates.append(dt_end.strftime('%m-%d-%YT23:59:59')) for sd, ed in zip(start_dates, end_dates): sd_format = dt.datetime.strptime(sd, '%m-%d-%YT%H:%M:%S') ed_format = dt.datetime.strptime(ed, '%m-%d-%YT%H:%M:%S') ds_month = ds.sel(time=slice(sd_format, ed_format)) if len(ds_month['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})'. format(sd, ed)) continue tm = ds_month['time'].values t0 = pd.to_datetime(tm.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(tm.max()).strftime('%Y-%m-%dT%H:%M:%S') for var in sci_vars: print(var) vv = ds_month[var] fv = vv._FillValue v = vv.values.T # transpose 2D array n_nan = np.sum(np.isnan(v)) # convert fill values to nans v[v == fv] = np.nan n_fv = np.sum(np.isnan(v)) - n_nan # reject data outside of global ranges [g_min, g_max] = cf.get_global_ranges(r, var) if g_min is not None and g_max is not None: v[v < g_min] = np.nan v[v > g_max] = np.nan n_grange = np.sum(np.isnan(v)) - n_fv - n_nan else: n_grange = 'no global ranges' # plot after global ranges are removed fig, ax = pf.plot_spkir(tm, v, vv.name, vv.units) title2 = 'removed: {} global ranges [{}, {}]'.format( n_grange, g_min, g_max) ax.set_title( (title + '\n' + t0 + ' - ' + t1 + '\n' + title2), fontsize=9) sfile = '-'.join((filename, var, t0[:7], 'rmgr')) pf.save_fig(save_dir, sfile)
def main(sDir, url_list, start_time, end_time, preferred_only): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets fdatasets = np.unique(fdatasets).tolist() main_sensor = r.split('-')[-1] fdatasets = cf.filter_collocated_instruments(main_sensor, fdatasets) for fd in fdatasets: if '_blank' not in fd: ds = xr.open_dataset(fd, mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) ds_vars = list(ds.data_vars.keys()) + [x for x in ds.coords.keys() if 'pressure' in x] # get pressure variable from coordinates #raw_vars = cf.return_raw_vars(ds_vars) if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time)) continue fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd) if 'NUTNR' in refdes: vars = cf.return_science_vars(stream) else: vars = cf.return_raw_vars(ds_vars) print('\nPlotting {} {}'.format(r, deployment)) array = subsite[0:2] filename = '_'.join(fname.split('_')[:-1]) save_dir = os.path.join(sDir, array, subsite, refdes, 'timeseries_plots', deployment) cf.create_dir(save_dir) tm = ds['time'].values t0 = pd.to_datetime(tm.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(tm.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) for var in vars: print(var) if var != 'id': y = ds[var] try: fv = y._FillValue except AttributeError: fv = np.nan if len(y.dims) == 1: # Check if the array is all NaNs if sum(np.isnan(y.values)) == len(y.values): print('Array of all NaNs - skipping plot.') # Check if the array is all fill values elif len(y[y != fv]) == 0: print('Array of all fill values - skipping plot.') else: # reject fill values ind = y.values != fv t = tm[ind] y = y[ind] # Plot all data fig, ax = pf.plot_timeseries(t, y, y.name, stdev=None) ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) sfile = '-'.join((filename, y.name, t0[:10])) pf.save_fig(save_dir, sfile) # Plot data with outliers removed fig, ax = pf.plot_timeseries(t, y, y.name, stdev=5) ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) sfile = '-'.join((filename, y.name, t0[:10])) + '_rmoutliers' pf.save_fig(save_dir, sfile)
def main(files, out, time_break, depth, start, end, interactive): """ files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file. out: Directory to save plots """ fname, ext = os.path.splitext(files) if ext in '.nc': list_files = [files] elif ext in '.ncml': list_files = [files] else: list_files = read_file(files) stream_vars = pf.load_variable_dict(var='eng') # load engineering variables for nc in list_files: print nc with xr.open_dataset(nc, mask_and_scale=False) as ds: # change dimensions from 'obs' to 'time' ds = ds.swap_dims({'obs': 'time'}) ds_variables = ds.data_vars.keys() # List of dataset variables stream = ds.stream # List stream name associated with the data title_pre = mk_str(ds.attrs, 't') # , var, tt0, tt1, 't') save_pre = mk_str(ds.attrs, 's') # , var, tt0, tt1, 's') platform = ds.subsite node = ds.node sensor = ds.sensor # save_dir = os.path.join(out,'xsection_depth_profiles') save_dir = os.path.join(out, ds.subsite, ds.subsite + '-' + ds.node + '-' + ds.sensor, ds.stream, 'xsection_depth_profiles') cf.create_dir(save_dir) misc = ['quality', 'string', 'timestamp', 'deployment', 'id', 'provenance', 'qc', 'time', 'mission', 'obs', 'volt', 'ref', 'sig', 'amp', 'rph', 'calphase', 'phase', 'therm', 'light'] reg_ex = re.compile('|'.join(misc)) # keep variables that are not in the regular expression sci_vars = [s for s in ds_variables if not reg_ex.search(s)] if not time_break == None: times = np.unique(ds[time_break]) for t in times: time_ind = t == ds[time_break].data for var in sci_vars: x = dict(data=ds['time'].data[time_ind], info=dict(label='Time', units='GMT')) t0 = pd.to_datetime(x['data'].min()).strftime('%Y-%m-%dT%H%M%00') t1 = pd.to_datetime(x['data'].max()).strftime('%Y-%m-%dT%H%M%00') try: sci = ds[var] print var # sci = sub_ds[var] except UnicodeEncodeError: # some comments have latex characters ds[var].attrs.pop('comment') # remove from the attributes sci = ds[var] # or else the variable won't load y = dict(data=ds[depth].data[time_ind], info=dict(label='Pressure', units='dbar', var=var, platform=platform, node=node, sensor=sensor)) try: z_lab = sci.long_name except AttributeError: z_lab = sci.standard_name z = dict(data=sci.data[time_ind], info=dict(label=z_lab, units=str(sci.units), var=var, platform=platform, node=node, sensor=sensor)) title = title_pre + var # plot timeseries with outliers fig, ax = pf.depth_glider_cross_section(x, y, z, title=title) if interactive == True: fig.canvas.mpl_connect('pick_event', lambda event: pf.onpick3(event, x['data'], y['data'], z['data'])) plt.show() else: pf.resize(width=12, height=8.5) # Resize figure save_name = '{}-{}-{}_{}_{}-{}'.format(platform, node, sensor, var, t0, t1) pf.save_fig(save_dir, save_name, res=150) # Save figure plt.close('all') else: ds = ds.sel(time=slice(start, end)) for var in sci_vars: x = dict(data=ds['time'].data[:], info=dict(label='Time', units='GMT')) t0 = pd.to_datetime(x['data'].min()).strftime('%Y-%m-%dT%H%M%00') t1 = pd.to_datetime(x['data'].max()).strftime('%Y-%m-%dT%H%M%00') try: sci = ds[var] print var # sci = sub_ds[var] except UnicodeEncodeError: # some comments have latex characters ds[var].attrs.pop('comment') # remove from the attributes sci = ds[var] # or else the variable won't load y = dict(data=ds[depth].data[:], info=dict(label='Pressure', units='dbar', var=var, platform=platform, node=node, sensor=sensor)) try: z_lab = sci.long_name except AttributeError: z_lab = sci.standard_name z = dict(data=sci.data[:], info=dict(label=z_lab, units=sci.units, var=var, platform=platform, node=node, sensor=sensor)) title = title_pre + var # plot timeseries with outliers fig, ax = pf.depth_glider_cross_section(x, y, z, title=title, interactive=interactive) if interactive == True: fig.canvas.mpl_connect('pick_event', lambda event: pf.onpick3(event, x['data'], y['data'], z['data'])) plt.show() else: pf.resize(width=12, height=8.5) # Resize figure save_name = '{}-{}-{}_{}_{}-{}'.format(platform, node, sensor, var, t0, t1) pf.save_fig(save_dir, save_name, res=150) # Save figure plt.close('all')
def main(url_list, sDir, mDir, zcell_size, zdbar, start_time, end_time, inpercentile): """"" URL : path to instrument data by methods sDir : path to the directory on your machine to save plots mDir : path to the directory on your machine to save data ranges zcell_size : depth cell size to group data zdbar : define depth where suspect data are identified start_time : select start date to slice timeseries end_time : select end date to slice timeseries """"" rd_list = [] ms_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) ms = uu.split(rd + '-')[1].split('/')[0] if rd not in rd_list: rd_list.append(rd) if ms not in ms_list: ms_list.append(ms) for r in rd_list: print('\n{}'.format(r)) subsite = r.split('-')[0] array = subsite[0:2] main_sensor = r.split('-')[-1] # read in the analysis file dr_data = cf.refdes_datareview_json(r) # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) # get science variable long names from the Data Review Database stream_sci_vars = cd.sci_var_long_names(r) # check if the science variable long names are the same for each stream and initialize empty arrays sci_vars_dict0 = cd.sci_var_long_names_check(stream_sci_vars) # get the list of data files and filter out collocated instruments and other streams datasets = [] for u in url_list: print(u) splitter = u.split('/')[-2].split('-') rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = cf.filter_collocated_instruments(main_sensor, datasets) fdatasets = cf.filter_other_streams(r, ms_list, fdatasets) # select the list of data files from the preferred dataset for each deployment fdatasets_final = [] for ii in range(len(ps_df)): for x in fdatasets: if ps_df['deployment'][ii] in x and ps_df[0][ii] in x: fdatasets_final.append(x) # build dictionary of science data from the preferred dataset for each deployment print('\nAppending data from files') sci_vars_dict, y_unit, y_name, l0 = cd.append_evaluated_science_data( sDir, ps_df, n_streams, r, fdatasets_final, sci_vars_dict0, zdbar, start_time, end_time) # get end times of deployments deployments = [] end_times = [] for index, row in ps_df.iterrows(): deploy = row['deployment'] deploy_info = cf.get_deployment_information(dr_data, int(deploy[-4:])) deployments.append(int(deploy[-4:])) end_times.append(pd.to_datetime(deploy_info['stop_date'])) # create data range output folders save_dir_stat = os.path.join(mDir, array, subsite) cf.create_dir(save_dir_stat) # create plots output folder save_fdir = os.path.join(sDir, array, subsite, r, 'data_range') cf.create_dir(save_fdir) stat_df = pd.DataFrame() """ create data ranges csv file and figures """ for m, n in sci_vars_dict.items(): for sv, vinfo in n['vars'].items(): print('\n' + vinfo['var_name']) if len(vinfo['t']) < 1: print('no variable data to plot') continue else: sv_units = vinfo['units'][0] fv = vinfo['fv'][0] t = vinfo['t'] z = vinfo['values'] y = vinfo['pressure'] # Check if the array is all NaNs if sum(np.isnan(z)) == len(z): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z[z != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: if len(y) > 0: if m == 'common_stream_placeholder': sname = '-'.join((vinfo['var_name'], r)) else: sname = '-'.join((vinfo['var_name'], r, m)) """ create data ranges for non - pressure data only """ if 'pressure' in vinfo['var_name']: pass else: columns = ['tsec', 'dbar', str(vinfo['var_name'])] # create depth ranges min_r = int(round(min(y) - zcell_size)) max_r = int(round(max(y) + zcell_size)) ranges = list(range(min_r, max_r, zcell_size)) # group data by depth groups, d_groups = gt.group_by_depth_range(t, y, z, columns, ranges) print('writing data ranges for {}'.format(vinfo['var_name'])) stat_data = groups.describe()[vinfo['var_name']] stat_data.insert(loc=0, column='parameter', value=sv, allow_duplicates=False) t_deploy = deployments[0] for i in range(len(deployments))[1:len(deployments)]: t_deploy = '{}, {}'.format(t_deploy, deployments[i]) stat_data.insert(loc=1, column='deployments', value=t_deploy, allow_duplicates=False) stat_df = stat_df.append(stat_data, ignore_index=False) """ plot full time range free from errors and suspect data """ clabel = sv + " (" + sv_units + ")" ylabel = (y_name[0][0] + " (" + y_unit[0][0] + ")") t_eng = None m_water_depth = None # plot non-erroneous -suspect data fig, ax, bar = pf.plot_xsection(subsite, t, y, z, clabel, ylabel, t_eng, m_water_depth, inpercentile, stdev=None) title0 = 'Data colored using the upper and lower {} percentile.'.format(inpercentile) ax.set_title(r+'\n'+title0, fontsize=9) leg_text = ('{} % erroneous values removed after Human In the Loop review'.format( (len(t)/l0) * 100),) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) for ii in range(len(end_times)): ax.axvline(x=end_times[ii], color='b', linestyle='--', linewidth=.8) ax.text(end_times[ii], min(y)-5, 'End' + str(deployments[ii]), fontsize=6, style='italic', bbox=dict(boxstyle='round', ec=(0., 0.5, 0.5), fc=(1., 1., 1.), )) # fig.tight_layout() sfile = '_'.join(('data_range', sname)) pf.save_fig(save_fdir, sfile) # write stat file stat_df.to_csv('{}/{}_data_ranges.csv'.format(save_dir_stat, r), index=True, float_format='%11.6f')
title2 = 'Cruise CTD file: {} Date: {}'.format( CTDfile.split('/')[-1], dt.datetime.strftime( cast_start, '%Y-%m-%dT%H:%M:%S')) title3 = 'Platform: from {} to {}'.format( str(ds['time'].values[0])[:19], str(ds['time'].values[-1]) [:19]) fig.suptitle( (title1 + '\n' + title2 + '\n' + title3), fontsize=8.5) sfile = '{}_{}_shipCTDcompare_{}'.format( refdes, deployment, pvarname) pf.save_fig(save_dir, sfile) plt.close() else: print( 'No platform data available for Shipboard CTD time frame' ) if 'FLOR' in ds.sensor: if 'MOAS' in ds.subsite: if 'FLORTM' in ds.sensor: chlname = 'sci_flbbcd_chlor_units' else: chlname = 'sci_flbb_chlor_units' else: chlname = 'fluorometric_chlorophyll_a' pchla = ds[chlname]
def main(sDir, url_list, start_time, end_time, preferred_only): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): rms = '-'.join((r, row[ii])) for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets for fd in fdatasets: with xr.open_dataset(fd, mask_and_scale=False) as ds: ds = ds.swap_dims({'obs': 'time'}) if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time)) continue fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd) print('\nPlotting {} {}'.format(r, deployment)) array = subsite[0:2] save_dir = os.path.join(sDir, array, subsite, refdes, 'ts_plots') cf.create_dir(save_dir) tme = ds['time'].values t0 = pd.to_datetime(tme.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(tme.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) filename = '-'.join(('_'.join(fname.split('_')[:-1]), 'ts', t0[:10])) ds_vars = list(ds.data_vars.keys()) raw_vars = cf.return_raw_vars(ds_vars) xvar = return_var(ds, raw_vars, 'salinity', 'Practical Salinity') sal = ds[xvar].values sal_fv = ds[xvar]._FillValue yvar = return_var(ds, raw_vars, 'temp', 'Seawater Temperature') temp = ds[yvar].values temp_fv = ds[yvar]._FillValue press = pf.pressure_var(ds, list(ds.coords.keys())) if press is None: press = pf.pressure_var(ds, list(ds.data_vars.keys())) p = ds[press].values # get rid of nans, 0.0s, fill values sind1 = (~np.isnan(sal)) & (sal != 0.0) & (sal != sal_fv) sal = sal[sind1] temp = temp[sind1] tme = tme[sind1] p = p[sind1] tind1 = (~np.isnan(temp)) & (temp != 0.0) & (temp != temp_fv) sal = sal[tind1] temp = temp[tind1] tme = tme[tind1] p = p[tind1] # reject values outside global ranges: global_min, global_max = cf.get_global_ranges(r, xvar) if any(e is None for e in [global_min, global_max]): sal = sal temp = temp tme = tme p = p else: sgr_ind = cf.reject_global_ranges(sal, global_min, global_max) sal = sal[sgr_ind] temp = temp[sgr_ind] tme = tme[sgr_ind] p = p[sgr_ind] global_min, global_max = cf.get_global_ranges(r, yvar) if any(e is None for e in [global_min, global_max]): sal = sal temp = temp tme = tme p = p else: tgr_ind = cf.reject_global_ranges(temp, global_min, global_max) sal = sal[tgr_ind] temp = temp[tgr_ind] tme = tme[tgr_ind] p = p[tgr_ind] # get rid of outliers soind = cf.reject_outliers(sal, 5) sal = sal[soind] temp = temp[soind] tme = tme[soind] p = p[soind] toind = cf.reject_outliers(temp, 5) sal = sal[toind] temp = temp[toind] tme = tme[toind] p = p[toind] if len(sal) > 0: # if there are any data to plot colors = cm.rainbow(np.linspace(0, 1, len(tme))) # Figure out boundaries (mins and maxes) #smin = sal.min() - (0.01 * sal.min()) #smax = sal.max() + (0.01 * sal.max()) if sal.max() - sal.min() < 0.2: smin = sal.min() - (0.0005 * sal.min()) smax = sal.max() + (0.0005 * sal.max()) else: smin = sal.min() - (0.001 * sal.min()) smax = sal.max() + (0.001 * sal.max()) if temp.max() - temp.min() <= 1: tmin = temp.min() - (0.01 * temp.min()) tmax = temp.max() + (0.01 * temp.max()) elif 1 < temp.max() - temp.min() < 1.5: tmin = temp.min() - (0.05 * temp.min()) tmax = temp.max() + (0.05 * temp.max()) else: tmin = temp.min() - (0.1 * temp.min()) tmax = temp.max() + (0.1 * temp.max()) # Calculate how many gridcells are needed in the x and y directions and # Create temp and sal vectors of appropriate dimensions xdim = int(round((smax-smin)/0.1 + 1, 0)) if xdim == 1: xdim = 2 si = np.linspace(0, xdim - 1, xdim) * 0.1 + smin if 1.1 <= temp.max() - temp.min() < 1.7: # if the diff between min and max temp is small ydim = int(round((tmax-tmin)/0.75 + 1, 0)) ti = np.linspace(0, ydim - 1, ydim) * 0.75 + tmin elif temp.max() - temp.min() < 1.1: ydim = int(round((tmax - tmin) / 0.1 + 1, 0)) ti = np.linspace(0, ydim - 1, ydim) * 0.1 + tmin else: ydim = int(round((tmax - tmin) + 1, 0)) ti = np.linspace(0, ydim - 1, ydim) + tmin # Create empty grid of zeros mdens = np.zeros((ydim, xdim)) # Loop to fill in grid with densities for j in range(0, ydim): for i in range(0, xdim): mdens[j, i] = gsw.density.rho(si[i], ti[j], np.median(p)) # calculate density using median pressure value fig, ax = pf.plot_ts(si, ti, mdens, sal, temp, colors) ax.set_title((title + '\n' + t0 + ' - ' + t1 + '\ncolors = time (cooler: earlier)'), fontsize=9) leg_text = ('Removed {} values (SD=5)'.format(len(ds[xvar].values) - len(sal)),) ax.legend(leg_text, loc='best', fontsize=6) pf.save_fig(save_dir, filename)
def main(sDir, ncdir): rd_list = [ncdir.split('/')[-2]] for r in rd_list: print('\n{}'.format(r)) subsite = r.split('-')[0] array = subsite[0:2] ps_df, n_streams = cf.get_preferred_stream_info(r) # get end times of deployments dr_data = cf.refdes_datareview_json(r) deployments = [] end_times = [] for index, row in ps_df.iterrows(): deploy = row['deployment'] deploy_info = get_deployment_information(dr_data, int(deploy[-4:])) deployments.append(int(deploy[-4:])) end_times.append(pd.to_datetime(deploy_info['stop_date'])) # filter datasets fdatasets = [] for root, dirs, files in os.walk(ncdir): for f in files: if f.endswith('.nc'): fdatasets.append(f) # for u in url_list: # splitter = u.split('/')[-2].split('-') # rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4])) # if rd_check == r: # udatasets = cf.get_nc_urls([u]) # datasets.append(udatasets) # datasets = list(itertools.chain(*datasets)) # main_sensor = r.split('-')[-1] # fdatasets = cf.filter_collocated_instruments(main_sensor, datasets) methodstream = [] for f in fdatasets: strm = '_'.join((f.split('-')[-2].split('_')[0], f.split('-')[-2].split('_')[1])) methodstream.append('-'.join((f.split('-')[-3], strm))) for ms in np.unique(methodstream): fdatasets_sel = [x for x in fdatasets if ms in x] save_dir = os.path.join(sDir, array, subsite, r, 'timeseries_plots_all') cf.create_dir(save_dir) stream_sci_vars_dict = dict() for x in dr_data['instrument']['data_streams']: dr_ms = '-'.join((x['method'], x['stream_name'])) if ms == dr_ms: stream_sci_vars_dict[dr_ms] = dict(vars=dict()) sci_vars = dict() for y in x['stream']['parameters']: if y['data_product_type'] == 'Science Data': sci_vars.update({y['name']: dict(db_units=y['unit'])}) if len(sci_vars) > 0: stream_sci_vars_dict[dr_ms]['vars'] = sci_vars sci_vars_dict = cd.initialize_empty_arrays(stream_sci_vars_dict, ms) print('\nAppending data from files: {}'.format(ms)) for fd in fdatasets_sel: ds = xr.open_dataset(os.path.join(ncdir, fd), mask_and_scale=False) for var in list(sci_vars_dict[ms]['vars'].keys()): sh = sci_vars_dict[ms]['vars'][var] if ds[var].units == sh['db_units']: if ds[var]._FillValue not in sh['fv']: sh['fv'].append(ds[var]._FillValue) if ds[var].units not in sh['units']: sh['units'].append(ds[var].units) tD = ds['time'].values varD = ds[var].values sh['t'] = np.append(sh['t'], tD) sh['values'] = np.append(sh['values'], varD) print('\nPlotting data') for m, n in sci_vars_dict.items(): for sv, vinfo in n['vars'].items(): print(sv) if len(vinfo['t']) < 1: print('no variable data to plot') else: sv_units = vinfo['units'][0] t0 = pd.to_datetime(min(vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(max(vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S') x = vinfo['t'] y = vinfo['values'] # reject NaNs nan_ind = ~np.isnan(y) x_nonan = x[nan_ind] y_nonan = y[nan_ind] # reject fill values fv_ind = y_nonan != vinfo['fv'][0] x_nonan_nofv = x_nonan[fv_ind] y_nonan_nofv = y_nonan[fv_ind] # reject extreme values Ev_ind = cf.reject_extreme_values(y_nonan_nofv) y_nonan_nofv_nE = y_nonan_nofv[Ev_ind] x_nonan_nofv_nE = x_nonan_nofv[Ev_ind] # reject values outside global ranges: global_min, global_max = cf.get_global_ranges(r, sv) if global_min is not None and global_max is not None: gr_ind = cf.reject_global_ranges(y_nonan_nofv_nE, global_min, global_max) y_nonan_nofv_nE_nogr = y_nonan_nofv_nE[gr_ind] x_nonan_nofv_nE_nogr = x_nonan_nofv_nE[gr_ind] else: y_nonan_nofv_nE_nogr = y_nonan_nofv_nE x_nonan_nofv_nE_nogr = x_nonan_nofv_nE title = ' '.join((r, ms.split('-')[0])) if len(y_nonan_nofv) > 0: if m == 'common_stream_placeholder': sname = '-'.join((r, sv)) else: sname = '-'.join((r, m, sv)) # Plot all data fig, ax = pf.plot_timeseries_all(x_nonan_nofv, y_nonan_nofv, sv, sv_units, stdev=None) ax.set_title((title + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1), fontsize=8) for etimes in end_times: ax.axvline(x=etimes, color='b', linestyle='--', linewidth=.6) # if global_min is not None and global_max is not None: # ax.axhline(y=global_min, color='r', linestyle='--', linewidth=.6) # ax.axhline(y=global_max, color='r', linestyle='--', linewidth=.6) pf.save_fig(save_dir, sname) # Plot data with extreme values, data outside global ranges and outliers removed fig, ax = pf.plot_timeseries_all(x_nonan_nofv_nE_nogr, y_nonan_nofv_nE_nogr, sv, sv_units, stdev=5) ax.set_title((title + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1), fontsize=8) for etimes in end_times: ax.axvline(x=etimes, color='b', linestyle='--', linewidth=.6) # if global_min is not None and global_max is not None: # ax.axhline(y=global_min, color='r', linestyle='--', linewidth=.6) # ax.axhline(y=global_max, color='r', linestyle='--', linewidth=.6) sfile = '_'.join((sname, 'rmoutliers')) pf.save_fig(save_dir, sfile)
def main(folder, out, time_break): """ files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file. out: Directory to save plots """ with xr.open_mfdataset(folder, mask_and_scale=False) as ds: # change dimensions from 'obs' to 'time' ds = ds.swap_dims({'obs': 'time'}) ds_variables = ds.data_vars.keys() # List of dataset variables stream = ds.stream # List stream name associated with the data title_pre = mk_str(ds.attrs, 't') # , var, tt0, tt1, 't') save_pre = mk_str(ds.attrs, 's') # , var, tt0, tt1, 's') platform = ds.subsite node = ds.node sensor = ds.sensor save_dir = os.path.join(out, ds.subsite, ds.node, ds.stream, 'timeseries') cf.create_dir(save_dir) try: eng = stream_vars[ stream] # select specific streams engineering variables except KeyError: eng = [''] misc = [ 'timestamp', 'provenance', 'qc', 'id', 'obs', 'deployment', 'volts', 'counts', 'quality_flag' ] reg_ex = re.compile('|'.join(eng + misc)) # make regular expression # keep variables that are not in the regular expression sci_vars = [s for s in ds_variables if not reg_ex.search(s)] # t0, t1 = pf.get_rounded_start_and_end_times(ds_disk['time'].data) # tI = (pd.to_datetime(t0) + (pd.to_datetime(t1) - pd.to_datetime(t0)) / 2) # time_list = [[t0, t1], [t0, tI], [tI, t1]] times = np.unique(ds[time_break]) for t in times: time_ind = t == ds[time_break].data for var in sci_vars: x = dict(data=ds['time'].data[time_ind], info=dict(label='Time', units='GMT')) t0 = pd.to_datetime( x['data'].min()).strftime('%Y-%m-%dT%H%M%00') t1 = pd.to_datetime( x['data'].max()).strftime('%Y-%m-%dT%H%M%00') try: sci = ds[var] print var # sci = sub_ds[var] except UnicodeEncodeError: # some comments have latex characters ds[var].attrs.pop('comment') # remove from the attributes sci = ds[var] # or else the variable won't load # define possible pressure variables pressure_vars = [ 'seawater_pressure', 'sci_water_pressure_dbar', 'ctdgv_m_glider_instrument_recovered-sci_water_pressure_dbar', 'ctdgv_m_glider_instrument-sci_water_pressure_dbar' ] rePressure = re.compile('|'.join(pressure_vars)) # define y as pressure variable pressure = [s for s in sci.variables if rePressure.search(s)] pressure = ''.join(pressure) y = sci.variables[pressure] yN = pressure y_units = sci.units try: y_lab = sci.long_name except AttributeError: y_lab = sci.standard_name y = dict(data=sci.data[time_ind], info=dict(label=y_lab, units=sci.units, var=var, platform=platform, node=node, sensor=sensor)) title = title_pre + var # plot timeseries with outliers fig, ax = pf.auto_plot(x, y, title, stdev=None, line_style='r-o', g_range=True) pf.resize(width=12, height=8.5) # Resize figure save_name = '{}-{}-{}_{}_{}-{}'.format(platform, node, sensor, var, t0, t1) pf.save_fig(save_dir, save_name, res=150) # Save figure plt.close('all') # plot z variable each time fig, ax = pf.depth_cross_section(x, y, title, stdev=1, line_style='r-o', g_range=True) pf.resize(width=12, height=8.5) # Resize figure save_name = '{}-{}-{}_{}_{}-{}_outliers_removed'.format( platform, node, sensor, var, t0, t1) pf.save_fig(save_dir, save_name, res=150) # Save figure plt.close('all') del x, y
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only, zdbar, n_std, inpercentile, zcell_size): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list and 'ENG' not in rd and 'ADCP' not in rd: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join( (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets main_sensor = r.split('-')[-1] fdatasets_sel = cf.filter_collocated_instruments( main_sensor, fdatasets) for fd in fdatasets_sel: part_d = fd.split('/')[-1] print('\n{}'.format(part_d)) ds = xr.open_dataset(fd, mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( fd) array = subsite[0:2] sci_vars = cf.return_science_vars(stream) # if 'CE05MOAS' in r or 'CP05MOAS' in r: # for coastal gliders, get m_water_depth for bathymetry # eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng')) # eng_url = [s for s in url_list if eng in s] # if len(eng_url) == 1: # eng_datasets = cf.get_nc_urls(eng_url) # # filter out collocated datasets # eng_dataset = [j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1])] # if len(eng_dataset) > 0: # ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False) # t_eng = ds_eng['time'].values # m_water_depth = ds_eng['m_water_depth'].values # # # m_altimeter_status = 0 means a good reading (not nan or -1) # try: # eng_ind = ds_eng['m_altimeter_status'].values == 0 # except KeyError: # eng_ind = (~np.isnan(m_water_depth)) & (m_water_depth >= 0) # # m_water_depth = m_water_depth[eng_ind] # t_eng = t_eng[eng_ind] # # # get rid of any remaining nans or fill values # eng_ind2 = (~np.isnan(m_water_depth)) & (m_water_depth >= 0) # m_water_depth = m_water_depth[eng_ind2] # t_eng = t_eng[eng_ind2] # else: # print('No engineering file for deployment {}'.format(deployment)) # m_water_depth = None # t_eng = None # else: # m_water_depth = None # t_eng = None # else: # m_water_depth = None # t_eng = None if deployment_num is not None: if int(int(deployment[-4:])) is not deployment_num: print(type(int(deployment[-4:])), type(deployment_num)) continue if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})'. format(start_time, end_time)) continue stime = start_time.strftime('%Y-%m-%d') etime = end_time.strftime('%Y-%m-%d') ext = stime + 'to' + etime # .join((ds0_method, ds1_method save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment, ext) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment, ext) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment, ext) else: save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment) texclude_dir = os.path.join(sDir, array, subsite, refdes, 'time_to_exclude') cf.create_dir(texclude_dir) time1 = ds['time'].values try: ds_lat1 = ds['lat'].values except KeyError: ds_lat1 = None print('No latitude variable in file') try: ds_lon1 = ds['lon'].values except KeyError: ds_lon1 = None print('No longitude variable in file') # get pressure variable pvarname, y1, y_units, press, y_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars( ds) # prepare file to list timestamps with suspect data for each data parameter stat_data = pd.DataFrame( columns=['deployments', 'time_to_exclude']) file_exclude = '{}/{}_{}_{}_excluded_timestamps.csv'.format( texclude_dir, deployment, refdes, method) stat_data.to_csv(file_exclude, index=True) # loop through sensor-data parameters for sv in sci_vars: print(sv) if 'pressure' not in sv: z1 = ds[sv].values fv = ds[sv]._FillValue sv_units = ds[sv].units # Check if the array is all NaNs if sum(np.isnan(z1)) == len(z1): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z1[z1 != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # remove unreasonable pressure data (e.g. for surface piercing profilers) if zdbar: po_ind = (0 < y1) & (y1 < zdbar) n_zdbar = np.sum(~po_ind) tm = time1[po_ind] y = y1[po_ind] z = z1[po_ind] ds_lat = ds_lat1[po_ind] ds_lon = ds_lon1[po_ind] print('{} in water depth > {} dbar'.format( n_zdbar, zdbar)) else: tm = time1 y = y1 z = z1 ds_lat = ds_lat1 ds_lon = ds_lon1 # reject erroneous data dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \ cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon) # get rid of 0.0 data if sv == 'salinity': ind = ndata > 30 elif sv == 'density': ind = ndata > 1022.5 elif sv == 'conductivity': ind = ndata > 3.45 else: ind = ndata > 0 # if sv == 'sci_flbbcd_chlor_units': # ind = ndata < 7.5 # elif sv == 'sci_flbbcd_cdom_units': # ind = ndata < 25 # else: # ind = ndata > 0.0 # if 'CTD' in r: # ind = zpressure > 0.0 # else: # ind = ndata > 0.0 lenzero = np.sum(~ind) dtime = dtime[ind] zpressure = zpressure[ind] ndata = ndata[ind] if ds_lat is not None and ds_lon is not None: lat = lat[ind] lon = lon[ind] else: lat = None lon = None if len(dtime) > 0: # reject time range from data portal file export t_portal, z_portal, y_portal, lat_portal, lon_portal = \ cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon) print( 'removed {} data points using visual inspection of data' .format(len(ndata) - len(z_portal))) # create data groups if len(y_portal) > 0: columns = ['tsec', 'dbar', str(sv)] min_r = int(round(min(y_portal) - zcell_size)) max_r = int(round(max(y_portal) + zcell_size)) ranges = list(range(min_r, max_r, zcell_size)) groups, d_groups = gt.group_by_depth_range( t_portal, y_portal, z_portal, columns, ranges) if 'scatter' in sv: n_std = None # to use percentile else: n_std = n_std # identifying timestamps from percentile analysis y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups( groups, d_groups, n_std, inpercentile) """ writing timestamps to .csv file to use with data_range.py script """ if len(time_ex) != 0: t_exclude = time_ex[0] for i in range( len(time_ex))[1:len(time_ex)]: t_exclude = '{}, {}'.format( t_exclude, time_ex[i]) stat_data = pd.DataFrame( { 'deployments': deployment, 'time_to_exclude': t_exclude }, index=[sv]) stat_data.to_csv(file_exclude, index=True, mode='a', header=False) # rejecting timestamps from percentile analysis if len(time_ex) > 0: t_nospct, z_nospct, y_nospct = cf.reject_suspect_data( t_portal, y_portal, z_portal, time_ex) else: t_nospct = t_portal z_nospct = z_portal y_nospct = y_portal """ Plot data """ if len(t_nospct) > 0: if len(t_nospct) != len(dtime): cf.create_dir(save_dir_profile) cf.create_dir(save_dir_xsection) sname = '-'.join((r, method, sv)) sfile = '_'.join( ('rm_suspect_data', sname, pd.to_datetime( t_nospct.min()).strftime( '%Y%m%d'))) t0 = pd.to_datetime( t_nospct.min()).strftime( '%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime( t_nospct.max()).strftime( '%Y-%m-%dT%H:%M:%S') title = ' '.join( (deployment, refdes, method)) + '\n' + t0 + ' to ' + t1 if zdbar: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges ' '[{} - {}], {} unreasonable values' .format( lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nremoved {} in the upper and lower {} percentile of data grouped in {} ' 'dbar segments'.format( len(z_portal) - len(z_nospct), inpercentile, zcell_size) + '\nexcluded {} suspect data points when inspected visually' .format( len(ndata) - len(z_portal)) + '\nexcluded {} suspect data in water depth greater than {} dbar' .format(n_zdbar, zdbar), ) elif n_std: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} unreasonable values'. format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nremoved {} data points +/- {} SD of data grouped in {} dbar segments' .format( len(z_portal) - len(z_nospct), n_std, zcell_size) + '\nexcluded {} suspect data points when inspected visually' .format( len(ndata) - len(z_portal)), ) else: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} unreasonable values'. format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nremoved {} in the upper and lower {} percentile of data grouped in {} dbar segments' .format( len(z_portal) - len(z_nospct), inpercentile, zcell_size) + '\nexcluded {} suspect data points when inspected visually' .format( len(ndata) - len(z_portal)), ) ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[ 0] + ")" clabel = 'Time' # plot non-erroneous data print('plotting profile') fig, ax = pf.plot_profiles(z_nospct, y_nospct, t_nospct, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) ax.plot(n_avg, y_avg, '-k') #ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_profile, sfile) ''' xsection plot ''' print('plotting xsection') clabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[ 0] + ")" # plot bathymetry only within data time ranges # if t_eng is not None: # eng_ind = (t_eng >= np.nanmin(t_array)) & (t_eng <= np.nanmax(t_array)) # t_eng = t_eng[eng_ind] # m_water_depth = m_water_depth[eng_ind] # plot non-erroneous data fig, ax, bar = pf.plot_xsection( subsite, t_nospct, y_nospct, z_nospct, clabel, ylabel, t_eng=None, m_water_depth=None, inpercentile=inpercentile, stdev=None) ax.set_title(title, fontsize=9) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_xsection, sfile)
def main(files, out, east_var, north_var, up_var, err_var): """ files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file. out: Directory to save plots """ fname, ext = os.path.splitext(files) if ext in '.nc': list_files = [files] elif ext in '.ncml': list_files = [files] else: list_files = read_file(files) stream_vars = pf.load_variable_dict(var='eng') # load engineering variables # for nc in list_files: # print nc # the engine that xarray uses can be changed as specified here # http://xarray.pydata.org/en/stable/generated/xarray.open_dataset.html#xarray.open_dataset for nc in list_files: print nc with xr.open_dataset(nc, mask_and_scale=False) as ds_disk: #with xr.open_mfdataset(nc, engine='netcdf4') as ds_disk: # change dimensions from 'obs' to 'time' ds_disk = ds_disk.swap_dims({'obs': 'time'}) ds_variables = ds_disk.data_vars.keys() # List of dataset variables stream = ds_disk.stream # List stream name associated with the data deployment = 'D0000{}'.format(str(numpy.unique(ds_disk.deployment)[0])) title_pre = mk_str(ds_disk.attrs, 't') # , var, tt0, tt1, 't') save_pre = mk_str(ds_disk.attrs, 's') # , var, tt0, tt1, 's') save_dir = os.path.join(out, ds_disk.subsite, deployment, ds_disk.node, ds_disk.stream, 'pcolor') cf.create_dir(save_dir) # t0, t1 = cf.get_rounded_start_and_end_times(ds_disk['time'].data) # tI = t0 + t1 - (t0 / 2) # time_list = [[t0, t1], [t0, tI], [tI, t1]] # time_list = [[t0, t1]] # for period in time_list: # tt0 = period[0] # tt1 = period[1] # sub_ds = ds_disk.sel(time=slice(str(tt0), str(tt1))) north = ds_disk[north_var] east = ds_disk[east_var] up = ds_disk[up_var] error = ds_disk[err_var] try: bins = ds_disk['bin_depths'] bins = dict(data=bins.data.T, info=dict(label=bins.long_name, units=bins.units)) except KeyError: # use the matrix indices to plot bins = numpy.zeros_like(east.data) for i, item in enumerate(east): for jj, xtem in enumerate(east[i]): bins[i][jj] = jj bins = numpy.reshape(bins,(bins.shape[-1],bins.shape[0])) bins = dict(data=bins, label='bin_indices', units='') # the correct way to do this is to calculate the bin_depths, for that you need: # 9 First Cell Range(meters) (rounded bin_1_distance average, m) # 73 deployment depth of the ADCP instrument (pull from asset-management, depth in m) # 21 number of bins (num_cells, m) # 4 cell length (cell_length, m) # equation with the numbers above would be: # depths = 73 - 9 - ([1:21]-1)*4; time = dict(data=ds_disk['time'].data, info=dict(label=ds_disk['time'].standard_name, units='GMT')) #bins = dict(data=bins.data.T, info=dict(label=bins.long_name, units=bins.units)) north = dict(data=north.data.T, info=dict(label=north.long_name, units=north.units)) east = dict(data=east.data.T, info=dict(label=east.long_name, units=east.units)) up = dict(data=up.data.T, info=dict(label=up.long_name, units=up.units)) error = dict(data=error.data.T, info=dict(label=error.long_name, units=error.units)) sname_ew = save_pre + 'E-W-ADCP' title = title_pre fig, axs = pf.adcp(time, bins, north, east, title) pf.resize(width=12, height=8.5) # Resize figure pf.save_fig(save_dir, sname_ew, res=250) # Save figure sname_ur = save_pre + 'U-R-ADCP' fig, axs = pf.adcp(time, bins, up, error, title) pf.resize(width=12, height=8.5) # Resize figure pf.save_fig(save_dir, sname_ur, res=250) # Save figure plt.close('all')
def main(url_list, sDir, mDir, zcell_size, zdbar, start_time, end_time): """"" URL : path to instrument data by methods sDir : path to the directory on your machine to save files plot_type: folder name for a plot type """ "" rd_list = [] ms_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) ms = uu.split(rd + '-')[1].split('/')[0] if rd not in rd_list: rd_list.append(rd) if ms not in ms_list: ms_list.append(ms) ''' separate different instruments ''' for r in rd_list: print('\n{}'.format(r)) subsite = r.split('-')[0] array = subsite[0:2] main_sensor = r.split('-')[-1] # read in the analysis file dr_data = cf.refdes_datareview_json(r) # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) # get science variable long names from the Data Review Database stream_sci_vars = cd.sci_var_long_names(r) #stream_vars = cd.var_long_names(r) # check if the science variable long names are the same for each stream and initialize empty arrays sci_vars_dict0 = cd.sci_var_long_names_check(stream_sci_vars) # get the list of data files and filter out collocated instruments and other streams datasets = [] for u in url_list: print(u) splitter = u.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = cf.filter_collocated_instruments(main_sensor, datasets) fdatasets = cf.filter_other_streams(r, ms_list, fdatasets) # select the list of data files from the preferred dataset for each deployment fdatasets_final = [] for ii in range(len(ps_df)): for x in fdatasets: if ps_df['deployment'][ii] in x and ps_df[0][ii] in x: fdatasets_final.append(x) # build dictionary of science data from the preferred dataset for each deployment print('\nAppending data from files') et = [] sci_vars_dict, y_unit, y_name = cd.append_evaluated_science_data( sDir, ps_df, n_streams, r, fdatasets_final, sci_vars_dict0, et, start_time, end_time) # get end times of deployments deployments = [] end_times = [] for index, row in ps_df.iterrows(): deploy = row['deployment'] deploy_info = cf.get_deployment_information( dr_data, int(deploy[-4:])) deployments.append(int(deploy[-4:])) end_times.append(pd.to_datetime(deploy_info['stop_date'])) """ create a data-ranges table and figure for full data time range """ # create a folder to save data ranges save_dir_stat = os.path.join(mDir, array, subsite) cf.create_dir(save_dir_stat) save_fdir = os.path.join(sDir, array, subsite, r, 'data_range') cf.create_dir(save_fdir) stat_df = pd.DataFrame() for m, n in sci_vars_dict.items(): for sv, vinfo in n['vars'].items(): print(vinfo['var_name']) if len(vinfo['t']) < 1: print('no variable data to plot') continue else: sv_units = vinfo['units'][0] fv = vinfo['fv'][0] t = vinfo['t'] z = vinfo['values'] y = vinfo['pressure'] # Check if the array is all NaNs if sum(np.isnan(z)) == len(z): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z[z != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: """ clean up data """ # reject erroneous data dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max = \ cf.reject_erroneous_data(r, sv, t, y, z, fv) # reject timestamps from stat analysis Dpath = '{}/{}/{}/{}/{}'.format(sDir, array, subsite, r, 'time_to_exclude') onlyfiles = [] for item in os.listdir(Dpath): if not item.startswith('.') and os.path.isfile( os.path.join(Dpath, item)): onlyfiles.append(join(Dpath, item)) dre = pd.DataFrame() for nn in onlyfiles: dr = pd.read_csv(nn) dre = dre.append(dr, ignore_index=True) drn = dre.loc[dre['Unnamed: 0'] == vinfo['var_name']] list_time = [] for itime in drn.time_to_exclude: ntime = itime.split(', ') list_time.extend(ntime) u_time_list = np.unique(list_time) if len(u_time_list) != 0: t_nospct, z_nospct, y_nospct = cf.reject_suspect_data( dtime, zpressure, ndata, u_time_list) print( '{} using {} percentile of data grouped in {} dbar segments' .format( len(zpressure) - len(z_nospct), inpercentile, zcell_size)) # reject time range from data portal file export t_portal, z_portal, y_portal = cf.reject_timestamps_dataportal( subsite, r, t_nospct, y_nospct, z_nospct) print('{} using visual inspection of data'.format( len(z_nospct) - len(z_portal), inpercentile, zcell_size)) # reject data in a depth range if zdbar is not None: y_ind = y_portal < zdbar t_array = t_portal[y_ind] y_array = y_portal[y_ind] z_array = z_portal[y_ind] else: y_ind = [] t_array = t_portal y_array = y_portal z_array = z_portal print('{} in water depth > {} dbar'.format( len(y_ind), zdbar)) if len(y_array) > 0: if m == 'common_stream_placeholder': sname = '-'.join((vinfo['var_name'], r)) else: sname = '-'.join((vinfo['var_name'], r, m)) """ create data ranges for non - pressure data only """ if 'pressure' in vinfo['var_name']: pass else: columns = ['tsec', 'dbar', str(vinfo['var_name'])] # create depth ranges min_r = int(round(min(y_array) - zcell_size)) max_r = int(round(max(y_array) + zcell_size)) ranges = list(range(min_r, max_r, zcell_size)) # group data by depth groups, d_groups = gt.group_by_depth_range( t_array, y_array, z_array, columns, ranges) print('writing data ranges for {}'.format( vinfo['var_name'])) stat_data = groups.describe()[vinfo['var_name']] stat_data.insert(loc=0, column='parameter', value=sv, allow_duplicates=False) t_deploy = deployments[0] for i in range( len(deployments))[1:len(deployments)]: t_deploy = '{}, {}'.format( t_deploy, deployments[i]) stat_data.insert(loc=1, column='deployments', value=t_deploy, allow_duplicates=False) stat_df = stat_df.append(stat_data, ignore_index=True) """ plot full time range free from errors and suspect data """ clabel = sv + " (" + sv_units + ")" ylabel = (y_name[0][0] + " (" + y_unit[0][0] + ")") title = ' '.join((r, m)) # plot non-erroneous -suspect data fig, ax, bar = pf.plot_xsection(subsite, t_array, y_array, z_array, clabel, ylabel, inpercentile=None, stdev=None) ax.set_title(title, fontsize=9) leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}]' .format( len(z) - lenfv, len(z) - lennan, len(z) - lenev, lengr, global_min, global_max) + '\n' + ('removed {} in the upper and lower {} percentile of data grouped in {} dbar segments' .format( len(zpressure) - len(z_nospct), inpercentile, zcell_size)), ) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) for ii in range(len(end_times)): ax.axvline(x=end_times[ii], color='b', linestyle='--', linewidth=.8) ax.text(end_times[ii], min(y_array) - 5, 'End' + str(deployments[ii]), fontsize=6, style='italic', bbox=dict( boxstyle='round', ec=(0., 0.5, 0.5), fc=(1., 1., 1.), )) fig.tight_layout() sfile = '_'.join(('data_range', sname)) pf.save_fig(save_fdir, sfile) # write stat file stat_df.to_csv('{}/{}_data_ranges.csv'.format(save_dir_stat, r), index=True, float_format='%11.6f')
def main(sDir, plotting_sDir, url_list, sd_calc): dr = pd.read_csv('https://datareview.marine.rutgers.edu/notes/export') drn = dr.loc[dr.type == 'exclusion'] rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) pms = [] for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) pms.append(row[ii]) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join( (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) main_sensor = r.split('-')[-1] fdatasets_sel = cf.filter_collocated_instruments( main_sensor, fdatasets) # find time ranges to exclude from analysis for data review database subsite = r.split('-')[0] subsite_node = '-'.join((subsite, r.split('-')[1])) drne = drn.loc[drn.reference_designator.isin( [subsite, subsite_node, r])] et = [] for i, row in drne.iterrows(): sdate = cf.format_dates(row.start_date) edate = cf.format_dates(row.end_date) et.append([sdate, edate]) # get science variable long names from the Data Review Database stream_sci_vars = cd.sci_var_long_names(r) # check if the science variable long names are the same for each stream sci_vars_dict = cd.sci_var_long_names_check(stream_sci_vars) # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) # build dictionary of science data from the preferred dataset for each deployment print('\nAppending data from files') sci_vars_dict, pressure_unit, pressure_name = cd.append_science_data( ps_df, n_streams, r, fdatasets_sel, sci_vars_dict, et) # analyze combined dataset print('\nAnalyzing combined dataset and writing summary file') array = subsite[0:2] save_dir = os.path.join(sDir, array, subsite) cf.create_dir(save_dir) rows = [] if ('FLM' in r) and ( 'CTDMO' in r ): # calculate Flanking Mooring CTDMO stats based on pressure headers = [ 'common_stream_name', 'preferred_methods_streams', 'deployments', 'long_name', 'units', 't0', 't1', 'fill_value', 'global_ranges', 'n_all', 'press_min_max', 'n_excluded_forpress', 'n_nans', 'n_fillvalues', 'n_grange', 'define_stdev', 'n_outliers', 'n_stats', 'mean', 'min', 'max', 'stdev', 'note' ] else: headers = [ 'common_stream_name', 'preferred_methods_streams', 'deployments', 'long_name', 'units', 't0', 't1', 'fill_value', 'global_ranges', 'n_all', 'n_nans', 'n_fillvalues', 'n_grange', 'define_stdev', 'n_outliers', 'n_stats', 'mean', 'min', 'max', 'stdev' ] for m, n in sci_vars_dict.items(): print('\nSTREAM: ', m) if m == 'common_stream_placeholder': m = 'science_data_stream' if m == 'metbk_hourly': # don't calculate ranges for metbk_hourly continue if ('FLM' in r) and ( 'CTDMO' in r ): # calculate Flanking Mooring CTDMO stats based on pressure # index the pressure variable to filter and calculate stats on the rest of the variables sv_press = 'Seawater Pressure' vinfo_press = n['vars'][sv_press] # first, index where data are nans, fill values, and outside of global ranges fv_press = list(np.unique(vinfo_press['fv']))[0] pdata = vinfo_press['values'] [pind, __, __, __, __, __] = index_dataset(r, vinfo_press['var_name'], pdata, fv_press) pdata_filtered = pdata[pind] [__, pmean, __, __, psd, __] = cf.variable_statistics(pdata_filtered, None) # index of pressure = average of all 'valid' pressure data +/- 1 SD ipress_min = pmean - psd ipress_max = pmean + psd ind_press = (pdata >= ipress_min) & (pdata <= ipress_max) # calculate stats for all variables print('\nPARAMETERS:') for sv, vinfo in n['vars'].items(): print(sv) fv_lst = np.unique(vinfo['fv']).tolist() if len(fv_lst) == 1: fill_value = fv_lst[0] else: print('No unique fill value for {}'.format(sv)) lunits = np.unique(vinfo['units']).tolist() n_all = len(vinfo['t']) # filter data based on pressure index t_filtered = vinfo['t'][ind_press] data_filtered = vinfo['values'][ind_press] deploy_filtered = vinfo['deployments'][ind_press] n_excluded = n_all - len(t_filtered) [dataind, g_min, g_max, n_nan, n_fv, n_grange] = index_dataset(r, vinfo['var_name'], data_filtered, fill_value) t_final = t_filtered[dataind] data_final = data_filtered[dataind] deploy_final = deploy_filtered[dataind] t0 = pd.to_datetime( min(t_final)).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime( max(t_final)).strftime('%Y-%m-%dT%H:%M:%S') deploy = list(np.unique(deploy_final)) deployments = [int(dd) for dd in deploy] if len(data_final) > 1: [num_outliers, mean, vmin, vmax, sd, n_stats ] = cf.variable_statistics(data_final, sd_calc) else: mean = None vmin = None vmax = None sd = None n_stats = None note = 'restricted stats calculation to data points where pressure is within defined ranges' \ ' (average of all pressure data +/- 1 SD)' rows.append([ m, list(np.unique(pms)), deployments, sv, lunits, t0, t1, fv_lst, [g_min, g_max], n_all, [round(ipress_min, 2), round(ipress_max, 2)], n_excluded, n_nan, n_fv, n_grange, sd_calc, num_outliers, n_stats, mean, vmin, vmax, sd, note ]) # plot CTDMO data used for stats psave_dir = os.path.join(plotting_sDir, array, subsite, r, 'timeseries_plots_stats') cf.create_dir(psave_dir) dr_data = cf.refdes_datareview_json(r) deployments = [] end_times = [] for index, row in ps_df.iterrows(): deploy = row['deployment'] deploy_info = cf.get_deployment_information( dr_data, int(deploy[-4:])) deployments.append(int(deploy[-4:])) end_times.append( pd.to_datetime(deploy_info['stop_date'])) sname = '-'.join((r, sv)) fig, ax = pf.plot_timeseries_all(t_final, data_final, sv, lunits[0], stdev=None) ax.set_title( (r + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1), fontsize=8) for etimes in end_times: ax.axvline(x=etimes, color='k', linestyle='--', linewidth=.6) pf.save_fig(psave_dir, sname) if sd_calc: sname = '-'.join((r, sv, 'rmoutliers')) fig, ax = pf.plot_timeseries_all(t_final, data_final, sv, lunits[0], stdev=sd_calc) ax.set_title( (r + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1), fontsize=8) for etimes in end_times: ax.axvline(x=etimes, color='k', linestyle='--', linewidth=.6) pf.save_fig(psave_dir, sname) else: if not sd_calc: sdcalc = None print('\nPARAMETERS: ') for sv, vinfo in n['vars'].items(): print(sv) fv_lst = np.unique(vinfo['fv']).tolist() if len(fv_lst) == 1: fill_value = fv_lst[0] else: print(fv_lst) print('No unique fill value for {}'.format(sv)) lunits = np.unique(vinfo['units']).tolist() t = vinfo['t'] if len(t) > 1: data = vinfo['values'] n_all = len(t) if 'SPKIR' in r or 'presf_abc_wave_burst' in m: if 'SPKIR' in r: [dd_data, g_min, g_max, n_nan, n_fv, n_grange] = index_dataset_2d( r, 'spkir_abj_cspp_downwelling_vector', data, fill_value) else: [dd_data, g_min, g_max, n_nan, n_fv, n_grange] = index_dataset_2d( r, 'presf_wave_burst_pressure', data, fill_value) t_final = t t0 = pd.to_datetime( min(t_final)).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime( max(t_final)).strftime('%Y-%m-%dT%H:%M:%S') deploy_final = vinfo['deployments'] deploy = list(np.unique(deploy_final)) deployments = [int(dd) for dd in deploy] num_outliers = [] mean = [] vmin = [] vmax = [] sd = [] n_stats = [] for i in range(len(dd_data)): dd = data[i] # drop nans before calculating stats dd = dd[~np.isnan(dd)] [ num_outliersi, meani, vmini, vmaxi, sdi, n_statsi ] = cf.variable_statistics(dd, sd_calc) num_outliers.append(num_outliersi) mean.append(meani) vmin.append(vmini) vmax.append(vmaxi) sd.append(sdi) n_stats.append(n_statsi) else: [dataind, g_min, g_max, n_nan, n_fv, n_grange] = index_dataset(r, vinfo['var_name'], data, fill_value) t_final = t[dataind] if len(t_final) > 0: t0 = pd.to_datetime( min(t_final)).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime( max(t_final)).strftime('%Y-%m-%dT%H:%M:%S') data_final = data[dataind] # if sv == 'Dissolved Oxygen Concentration': # xx = (data_final > 0) & (data_final < 400) # data_final = data_final[xx] # t_final = t_final[xx] # if sv == 'Seawater Conductivity': # xx = (data_final > 1) & (data_final < 400) # data_final = data_final[xx] # t_final = t_final[xx] deploy_final = vinfo['deployments'][dataind] deploy = list(np.unique(deploy_final)) deployments = [int(dd) for dd in deploy] if len(data_final) > 1: [ num_outliers, mean, vmin, vmax, sd, n_stats ] = cf.variable_statistics( data_final, sd_calc) else: sdcalc = None num_outliers = None mean = None vmin = None vmax = None sd = None n_stats = None else: sdcalc = None num_outliers = None mean = None vmin = None vmax = None sd = None n_stats = None deployments = None t0 = None t1 = None else: sdcalc = None num_outliers = None mean = None vmin = None vmax = None sd = None n_stats = None deployments = None t0 = None t1 = None t_final = [] if sd_calc: print_sd = sd_calc else: print_sd = sdcalc rows.append([ m, list(np.unique(pms)), deployments, sv, lunits, t0, t1, fv_lst, [g_min, g_max], n_all, n_nan, n_fv, n_grange, print_sd, num_outliers, n_stats, mean, vmin, vmax, sd ]) if len(t_final) > 0: # plot data used for stats psave_dir = os.path.join( plotting_sDir, array, subsite, r, 'timeseries_reviewed_datarange') cf.create_dir(psave_dir) dr_data = cf.refdes_datareview_json(r) deployments = [] end_times = [] for index, row in ps_df.iterrows(): deploy = row['deployment'] deploy_info = cf.get_deployment_information( dr_data, int(deploy[-4:])) deployments.append(int(deploy[-4:])) end_times.append( pd.to_datetime(deploy_info['stop_date'])) sname = '-'.join((r, sv)) # plot hourly averages for streaming data if 'streamed' in sci_vars_dict[list( sci_vars_dict.keys())[0]]['ms'][0]: sname = '-'.join((sname, 'hourlyavg')) df = pd.DataFrame({ 'dfx': t_final, 'dfy': data_final }) dfr = df.resample('H', on='dfx').mean() # Plot all data fig, ax = pf.plot_timeseries_all(dfr.index, dfr['dfy'], sv, lunits[0], stdev=None) ax.set_title((r + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1), fontsize=8) for etimes in end_times: ax.axvline(x=etimes, color='k', linestyle='--', linewidth=.6) pf.save_fig(psave_dir, sname) if sd_calc: sname = '-'.join( (sname, 'hourlyavg_rmoutliers')) fig, ax = pf.plot_timeseries_all(dfr.index, dfr['dfy'], sv, lunits[0], stdev=sd_calc) ax.set_title((r + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1), fontsize=8) for etimes in end_times: ax.axvline(x=etimes, color='k', linestyle='--', linewidth=.6) pf.save_fig(psave_dir, sname) elif 'SPKIR' in r: fig, ax = pf.plot_spkir(t_final, dd_data, sv, lunits[0]) ax.set_title((r + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1), fontsize=8) for etimes in end_times: ax.axvline(x=etimes, color='k', linestyle='--', linewidth=.6) pf.save_fig(psave_dir, sname) # plot each wavelength wavelengths = [ '412nm', '443nm', '490nm', '510nm', '555nm', '620nm', '683nm' ] for wvi in range(len(dd_data)): fig, ax = pf.plot_spkir_wv( t_final, dd_data[wvi], sv, lunits[0], wvi) ax.set_title((r + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1), fontsize=8) for etimes in end_times: ax.axvline(x=etimes, color='k', linestyle='--', linewidth=.6) snamewvi = '-'.join((sname, wavelengths[wvi])) pf.save_fig(psave_dir, snamewvi) elif 'presf_abc_wave_burst' in m: fig, ax = pf.plot_presf_2d(t_final, dd_data, sv, lunits[0]) ax.set_title((r + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1), fontsize=8) for etimes in end_times: ax.axvline(x=etimes, color='k', linestyle='--', linewidth=.6) snamewave = '-'.join((sname, m)) pf.save_fig(psave_dir, snamewave) else: # plot all data if not streamed fig, ax = pf.plot_timeseries_all(t_final, data_final, sv, lunits[0], stdev=None) ax.set_title((r + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1), fontsize=8) for etimes in end_times: ax.axvline(x=etimes, color='k', linestyle='--', linewidth=.6) pf.save_fig(psave_dir, sname) if sd_calc: sname = '-'.join((r, sv, 'rmoutliers')) fig, ax = pf.plot_timeseries_all(t_final, data_final, sv, lunits[0], stdev=sd_calc) ax.set_title((r + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1), fontsize=8) for etimes in end_times: ax.axvline(x=etimes, color='k', linestyle='--', linewidth=.6) pf.save_fig(psave_dir, sname) fsum = pd.DataFrame(rows, columns=headers) fsum.to_csv('{}/{}_data_ranges.csv'.format(save_dir, r), index=False)
def main(url_list, sDir, stime, etime): if len(url_list) != 2: print('Please provide 2 reference designators for plotting') else: uu0 = url_list[0] uu1 = url_list[1] rd0 = uu0.split('/')[-2][20:47] rd1 = uu1.split('/')[-2][20:47] array = rd0[0:2] inst = rd0.split('-')[-1] datasets0 = [] datasets1 = [] for i in range(len(url_list)): udatasets = cf.get_nc_urls([url_list[i]]) if i == 0: datasets0.append(udatasets) else: datasets1.append(udatasets) datasets0 = list(itertools.chain(*datasets0)) datasets1 = list(itertools.chain(*datasets1)) main_sensor0 = rd0.split('-')[-1] main_sensor1 = rd1.split('-')[-1] fdatasets0_sel = cf.filter_collocated_instruments( main_sensor0, datasets0) fdatasets1_sel = cf.filter_collocated_instruments( main_sensor1, datasets1) deployments = [ dd.split('/')[-1].split('_')[0] for dd in fdatasets0_sel ] for d in deployments: fd0 = [x for x in fdatasets0_sel if d in x] fd1 = [x for x in fdatasets1_sel if d in x] ds0 = xr.open_dataset(fd0[0], mask_and_scale=False) ds0 = ds0.swap_dims({'obs': 'time'}) ds1 = xr.open_dataset(fd1[0], mask_and_scale=False) ds1 = ds1.swap_dims({'obs': 'time'}) if stime is not None and etime is not None: ds0 = ds0.sel(time=slice(stime, etime)) ds1 = ds1.sel(time=slice(stime, etime)) if len(ds0['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})'. format(start_time, end_time)) continue fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( fd0[0]) sci_vars = cf.return_science_vars(stream) save_dir_profile = os.path.join(sDir, array, subsite, inst, 'profile_plots', deployment) cf.create_dir(save_dir_profile) # get pressure variable pvarname, y1, y_units, press, y_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars( ds0) for sv in sci_vars: print('') print(sv) if 'pressure' not in sv: fig, ax = plt.subplots() plt.margins(y=.08, x=.02) plt.grid() title = ' '.join((deployment, subsite, inst, method)) sname = '-'.join((subsite, inst, method, sv)) for i in range(len(url_list)): if i == 0: ds = ds0 else: ds = ds1 t = ds['time'].values zpressure = ds[pvarname].values z1 = ds[sv].values fv = ds[sv]._FillValue sv_units = ds[sv].units # Check if the array is all NaNs if sum(np.isnan(z1)) == len(z1): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z1[z1 != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # get rid of 0.0 data if sv == 'salinity': ind = z1 > 1 elif sv == 'density': ind = z1 > 1000 elif sv == 'conductivity': ind = z1 > 0.1 elif sv == 'dissolved_oxygen': ind = z1 > 160 elif sv == 'estimated_oxygen_concentration': ind = z1 > 200 else: ind = z1 > 0 # if sv == 'sci_flbbcd_chlor_units': # ind = ndata < 7.5 # elif sv == 'sci_flbbcd_cdom_units': # ind = ndata < 25 # else: # ind = ndata > 0.0 # if 'CTD' in r: # ind = zpressure > 0.0 # else: # ind = ndata > 0.0 lenzero = np.sum(~ind) dtime = t[ind] zpressure = zpressure[ind] zdata = z1[ind] if len(dtime) > 0: ax.scatter(zdata, zpressure, s=2, edgecolor='None') xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" ax.invert_yaxis() # plt.xlim([-0.5, 0.5]) ax.set_xlabel(xlabel, fontsize=9) ax.set_ylabel(ylabel, fontsize=9) ax.set_title(title + '\nWFP02 (blue) & WFP03 (orange)', fontsize=9) fig.tight_layout() pf.save_fig(save_dir_profile, sname)
def main(sDir, url_list, start_time, end_time, preferred_only): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): rms = '-'.join((r, row[ii])) for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join( (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets main_sensor = r.split('-')[-1] fdatasets_sel = cf.filter_collocated_instruments( main_sensor, fdatasets) for fd in fdatasets_sel: with xr.open_dataset(fd, mask_and_scale=False) as ds: ds = ds.swap_dims({'obs': 'time'}) if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})' .format(start_time, end_time)) continue fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( fd) print('\nPlotting {} {}'.format(r, deployment)) array = subsite[0:2] save_dir = os.path.join(sDir, array, subsite, refdes, 'timeseries_panel_plots') filename = '_'.join(fname.split('_')[:-1]) sci_vars = cf.return_science_vars(stream) if len(sci_vars) > 1: cf.create_dir(save_dir) colors = cm.jet(np.linspace(0, 1, len(sci_vars))) t = ds['time'].values t0 = pd.to_datetime(t.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(t.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) # Plot data with outliers removed fig, ax = pf.plot_timeseries_panel(ds, t, sci_vars, colors, 5) plt.xticks(fontsize=7) ax[0].set_title((title + '\n' + t0 + ' - ' + t1), fontsize=7) sfile = '-'.join((filename, 'timeseries_panel', t0[:10])) pf.save_fig(save_dir, sfile) else: print( 'Only one science variable in file, no panel plots necessary' )
def main(folder, out, time_break): """ files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file. out: Directory to save plots """ with xr.open_mfdataset(folder, mask_and_scale=False) as ds: # change dimensions from 'obs' to 'time' ds = ds.swap_dims({'obs': 'time'}) ds_variables = ds.data_vars.keys() # List of dataset variables stream = ds.stream # List stream name associated with the data title_pre = mk_str(ds.attrs, 't') # , var, tt0, tt1, 't') save_pre = mk_str(ds.attrs, 's') # , var, tt0, tt1, 's') platform = ds.subsite node = ds.node sensor = ds.sensor save_dir = os.path.join(out, ds.subsite, ds.node, ds.stream, 'timeseries') cf.create_dir(save_dir) try: eng = stream_vars[stream] # select specific streams engineering variables except KeyError: eng = [''] misc = ['timestamp', 'provenance', 'qc', 'id', 'obs', 'deployment', 'volts', 'counts', 'quality_flag'] reg_ex = re.compile('|'.join(eng+misc)) # make regular expression # keep variables that are not in the regular expression sci_vars = [s for s in ds_variables if not reg_ex.search(s)] # t0, t1 = pf.get_rounded_start_and_end_times(ds_disk['time'].data) # tI = (pd.to_datetime(t0) + (pd.to_datetime(t1) - pd.to_datetime(t0)) / 2) # time_list = [[t0, t1], [t0, tI], [tI, t1]] times = np.unique(ds[time_break]) for t in times: time_ind = t == ds[time_break].data for var in sci_vars: x = dict(data=ds['time'].data[time_ind], info=dict(label='Time', units='GMT')) t0 = pd.to_datetime(x['data'].min()).strftime('%Y-%m-%dT%H%M%00') t1 = pd.to_datetime(x['data'].max()).strftime('%Y-%m-%dT%H%M%00') try: sci = ds[var] print var # sci = sub_ds[var] except UnicodeEncodeError: # some comments have latex characters ds[var].attrs.pop('comment') # remove from the attributes sci = ds[var] # or else the variable won't load # define possible pressure variables pressure_vars = ['seawater_pressure', 'sci_water_pressure_dbar', 'ctdgv_m_glider_instrument_recovered-sci_water_pressure_dbar', 'ctdgv_m_glider_instrument-sci_water_pressure_dbar'] rePressure = re.compile('|'.join(pressure_vars)) # define y as pressure variable pressure = [s for s in sci.variables if rePressure.search(s)] pressure = ''.join(pressure) y = sci.variables[pressure] yN = pressure y_units = sci.units try: y_lab = sci.long_name except AttributeError: y_lab = sci.standard_name y = dict(data=sci.data[time_ind], info=dict(label=y_lab, units=sci.units, var=var, platform=platform, node=node, sensor=sensor)) title = title_pre + var # plot timeseries with outliers fig, ax = pf.auto_plot(x, y, title, stdev=None, line_style='r-o', g_range=True) pf.resize(width=12, height=8.5) # Resize figure save_name = '{}-{}-{}_{}_{}-{}'.format(platform, node, sensor, var, t0, t1) pf.save_fig(save_dir, save_name, res=150) # Save figure plt.close('all') # plot z variable each time fig, ax = pf.depth_cross_section(x, y, title, stdev=1, line_style='r-o', g_range=True) pf.resize(width=12, height=8.5) # Resize figure save_name = '{}-{}-{}_{}_{}-{}_outliers_removed'.format(platform, node, sensor, var, t0, t1) pf.save_fig(save_dir, save_name, res=150) # Save figure plt.close('all') del x, y
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only, n_std, inpercentile, zcell_size, zdbar): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list and 'ENG' not in rd and 'ADCP' not in rd: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets main_sensor = r.split('-')[-1] fdatasets_sel = cf.filter_collocated_instruments(main_sensor, fdatasets) for fd in fdatasets_sel: part_d = fd.split('/')[-1] print('\n{}'.format(part_d)) ds = xr.open_dataset(fd, mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd) array = subsite[0:2] sci_vars = cf.return_science_vars(stream) # if 'CE05MOAS' in r or 'CP05MOAS' in r: # for coastal gliders, get m_water_depth for bathymetry # eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng')) # eng_url = [s for s in url_list if eng in s] # if len(eng_url) == 1: # eng_datasets = cf.get_nc_urls(eng_url) # # filter out collocated datasets # eng_dataset = [j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1])] # if len(eng_dataset) > 0: # ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False) # t_eng = ds_eng['time'].values # m_water_depth = ds_eng['m_water_depth'].values # # # m_altitude = glider height above seafloor # # m_depth = glider depth in the water column # # m_altitude = ds_eng['m_altitude'].values # # m_depth = ds_eng['m_depth'].values # # calc_water_depth = m_altitude + m_depth # # # m_altimeter_status = 0 means a good reading (not nan or -1) # try: # eng_ind = ds_eng['m_altimeter_status'].values == 0 # except KeyError: # eng_ind = (~np.isnan(m_water_depth)) & (m_water_depth >= 0) # # m_water_depth = m_water_depth[eng_ind] # t_eng = t_eng[eng_ind] # # # get rid of any remaining nans or fill values # eng_ind2 = (~np.isnan(m_water_depth)) & (m_water_depth >= 0) # m_water_depth = m_water_depth[eng_ind2] # t_eng = t_eng[eng_ind2] # else: # print('No engineering file for deployment {}'.format(deployment)) # m_water_depth = None # t_eng = None # else: # m_water_depth = None # t_eng = None # else: # m_water_depth = None # t_eng = None if deployment_num is not None: if int(int(deployment[-4:])) is not deployment_num: print(type(int(deployment[-4:])), type(deployment_num)) continue if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time)) continue stime = start_time.strftime('%Y-%m-%d') etime = end_time.strftime('%Y-%m-%d') ext = stime + 'to' + etime # .join((ds0_method, ds1_method save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment, ext) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment, ext) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment, ext) else: save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment) time1 = ds['time'].values try: ds_lat1 = ds['lat'].values except KeyError: ds_lat1 = None print('No latitude variable in file') try: ds_lon1 = ds['lon'].values except KeyError: ds_lon1 = None print('No longitude variable in file') # get pressure variable pvarname, y1, y_units, press, y_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars(ds) for sv in sci_vars: print('') print(sv) if 'pressure' not in sv: if sv == 'spkir_abj_cspp_downwelling_vector': pxso.pf_xs_spkir(ds, sv, time1, y1, ds_lat1, ds_lon1, zcell_size, inpercentile, save_dir_profile, save_dir_xsection, deployment, press, y_units, n_std, zdbar) elif 'OPTAA' in r: if sv not in ['wavelength_a', 'wavelength_c']: pxso.pf_xs_optaa(ds, sv, time1, y1, ds_lat1, ds_lon1, zcell_size, inpercentile, save_dir_profile, save_dir_xsection, deployment, press, y_units, n_std, zdbar) else: z1 = ds[sv].values fv = ds[sv]._FillValue sv_units = ds[sv].units # Check if the array is all NaNs if sum(np.isnan(z1)) == len(z1): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z1[z1 != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # remove unreasonable pressure data (e.g. for surface piercing profilers) if zdbar: po_ind = (0 < y1) & (y1 < zdbar) tm = time1[po_ind] y = y1[po_ind] z = z1[po_ind] ds_lat = ds_lat1[po_ind] ds_lon = ds_lon1[po_ind] else: tm = time1 y = y1 z = z1 ds_lat = ds_lat1 ds_lon = ds_lon1 # reject erroneous data dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \ cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon) # get rid of 0.0 data if sv == 'salinity': ind = ndata > 30 elif sv == 'density': ind = ndata > 1022.5 elif sv == 'conductivity': ind = ndata > 3.45 else: ind = ndata > 0 # if sv == 'sci_flbbcd_chlor_units': # ind = ndata < 7.5 # elif sv == 'sci_flbbcd_cdom_units': # ind = ndata < 25 # else: # ind = ndata > 0.0 # if 'CTD' in r: # ind = zpressure > 0.0 # else: # ind = ndata > 0.0 lenzero = np.sum(~ind) dtime = dtime[ind] zpressure = zpressure[ind] ndata = ndata[ind] if ds_lat is not None and ds_lon is not None: lat = lat[ind] lon = lon[ind] else: lat = None lon = None if len(dtime) > 0: # reject time range from data portal file export t_portal, z_portal, y_portal, lat_portal, lon_portal = \ cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon) print('removed {} data points using visual inspection of data'.format( len(ndata) - len(z_portal))) # create data groups if len(y_portal) > 0: columns = ['tsec', 'dbar', str(sv)] min_r = int(round(np.nanmin(y_portal) - zcell_size)) max_r = int(round(np.nanmax(y_portal) + zcell_size)) ranges = list(range(min_r, max_r, zcell_size)) groups, d_groups = gt.group_by_depth_range(t_portal, y_portal, z_portal, columns, ranges) if 'scatter' in sv: n_std = None # to use percentile else: n_std = n_std # get percentile analysis for printing on the profile plot y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups( groups, d_groups, n_std, inpercentile) """ Plot all data """ if len(time1) > 0: cf.create_dir(save_dir_profile) cf.create_dir(save_dir_xsection) sname = '-'.join((r, method, sv)) sfileall = '_'.join(('all_data', sname, pd.to_datetime(time1.min()).strftime('%Y%m%d'))) tm0 = pd.to_datetime(time1.min()).strftime('%Y-%m-%dT%H:%M:%S') tm1 = pd.to_datetime(time1.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) + '\n' + tm0 + ' to ' + tm1 if 'SPKIR' in r: title = title + '\nWavelength = 510 nm' ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" clabel = 'Time' fig, ax = pf.plot_profiles(z1, y1, time1, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) fig.tight_layout() pf.save_fig(save_dir_profile, sfileall) ''' xsection plot ''' clabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" fig, ax, bar = pf.plot_xsection(subsite, time1, y1, z1, clabel, ylabel, t_eng=None, m_water_depth=None, inpercentile=None, stdev=None) if fig: ax.set_title(title, fontsize=9) fig.tight_layout() pf.save_fig(save_dir_xsection, sfileall) """ Plot cleaned-up data """ if len(dtime) > 0: if len(y_portal) > 0: sfile = '_'.join(('rm_erroneous_data', sname, pd.to_datetime(t_portal.min()).strftime('%Y%m%d'))) t0 = pd.to_datetime(t_portal.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(t_portal.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1 if 'SPKIR' in r: title = title + '\nWavelength = 510 nm' ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" clabel = 'Time' fig, ax = pf.plot_profiles(z_portal, y_portal, t_portal, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) ax.plot(n_avg, y_avg, '-k') ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2) if inpercentile: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nexcluded {} suspect data points when inspected visually'.format( len(ndata) - len(z_portal)) + '\n(black) data average in {} dbar segments'.format(zcell_size) + '\n(magenta) {} percentile envelope in {} dbar segments'.format( int(100 - inpercentile * 2), zcell_size),) elif n_std: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nexcluded {} suspect data points when inspected visually'.format( len(ndata) - len(z_portal)) + '\n(black) data average in {} dbar segments'.format(zcell_size) + '\n(magenta) +/- {} SD envelope in {} dbar segments'.format( int(n_std), zcell_size),) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_profile, sfile) ''' xsection plot ''' clabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" # plot non-erroneous data fig, ax, bar = pf.plot_xsection(subsite, t_portal, y_portal, z_portal, clabel, ylabel, t_eng=None, m_water_depth=None, inpercentile=None, stdev=None) ax.set_title(title, fontsize=9) leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nexcluded {} suspect data points when inspected visually'.format( len(ndata) - len(z_portal)), ) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_xsection, sfile) ''' 4D plot for gliders only ''' if 'MOAS' in r: if ds_lat is not None and ds_lon is not None: cf.create_dir(save_dir_4d) clabel = sv + " (" + sv_units + ")" zlabel = press[0] + " (" + y_units[0] + ")" fig = plt.figure() ax = fig.add_subplot(111, projection='3d') sct = ax.scatter(lon_portal, lat_portal, y_portal, c=z_portal, s=2) cbar = plt.colorbar(sct, label=clabel, extend='both') cbar.ax.tick_params(labelsize=8) ax.invert_zaxis() ax.view_init(25, 32) ax.invert_xaxis() ax.invert_yaxis() ax.set_zlabel(zlabel, fontsize=9) ax.set_ylabel('Latitude', fontsize=9) ax.set_xlabel('Longitude', fontsize=9) ax.set_title(title, fontsize=9) pf.save_fig(save_dir_4d, sfile)
def main(url_list, sDir, plot_type, deployment_num, start_time, end_time, preferred_only, glider, zdbar, n_std, inpercentile, zcell_size): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list and 'ENG' not in rd: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets main_sensor = r.split('-')[-1] fdatasets_sel = cf.filter_collocated_instruments(main_sensor, fdatasets) for fd in fdatasets_sel: part_d = fd.split('/')[-1] print(part_d) ds = xr.open_dataset(fd, mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd) array = subsite[0:2] sci_vars = cf.return_science_vars(stream) if 'CE05MOAS' in r or 'CP05MOAS' in r: # for coastal gliders, get m_water_depth for bathymetry eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng')) eng_url = [s for s in url_list if eng in s] if len(eng_url) == 1: eng_datasets = cf.get_nc_urls(eng_url) # filter out collocated datasets eng_dataset = [j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1])] if len(eng_dataset) > 0: ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False) t_eng = ds_eng['time'].values m_water_depth = ds_eng['m_water_depth'].values # m_altimeter_status = 0 means a good reading (not nan or -1) eng_ind = ds_eng['m_altimeter_status'].values == 0 m_water_depth = m_water_depth[eng_ind] t_eng = t_eng[eng_ind] else: print('No engineering file for deployment {}'.format(deployment)) if deployment_num is not None: if int(deployment.split('0')[-1]) is not deployment_num: print(type(int(deployment.split('0')[-1])), type(deployment_num)) continue if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time)) continue stime = start_time.strftime('%Y-%m-%d') etime = end_time.strftime('%Y-%m-%d') ext = stime + 'to' + etime # .join((ds0_method, ds1_method save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, deployment, ext) else: save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, deployment) cf.create_dir(save_dir) tm = ds['time'].values # get pressure variable ds_vars = list(ds.data_vars.keys()) + [x for x in ds.coords.keys() if 'pressure' in x] y, y_units, press = cf.add_pressure_to_dictionary_of_sci_vars(ds) print(y_units, press) # press = pf.pressure_var(ds, ds_vars) # print(press) # y = ds[press].values # y_units = ds[press].units for sv in sci_vars: print(sv) if 'sci_water_pressure' not in sv: z = ds[sv].values fv = ds[sv]._FillValue z_units = ds[sv].units # Check if the array is all NaNs if sum(np.isnan(z)) == len(z): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z[z != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: """ clean up data """ # reject erroneous data dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max = \ cf.reject_erroneous_data(r, sv, tm, y, z, fv) # get rid of 0.0 data if 'CTD' in r: ind = zpressure > 0.0 else: ind = ndata > 0.0 lenzero = np.sum(~ind) dtime = dtime[ind] zpressure = zpressure[ind] ndata = ndata[ind] # creating data groups columns = ['tsec', 'dbar', str(sv)] min_r = int(round(min(zpressure) - zcell_size)) max_r = int(round(max(zpressure) + zcell_size)) ranges = list(range(min_r, max_r, zcell_size)) groups, d_groups = gt.group_by_depth_range(dtime, zpressure, ndata, columns, ranges) # rejecting timestamps from percentile analysis y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups( groups, d_groups, n_std, inpercentile) t_nospct, z_nospct, y_nospct = cf.reject_suspect_data(dtime, zpressure, ndata, time_ex) print('removed {} data points using {} percentile of data grouped in {} dbar segments'.format( len(zpressure) - len(z_nospct), inpercentile, zcell_size)) # reject time range from data portal file export t_portal, z_portal, y_portal = cf.reject_timestamps_dataportal(subsite, r, t_nospct, y_nospct, z_nospct) print('removed {} data points using visual inspection of data'.format(len(z_nospct) - len(z_portal))) # reject data in a depth range if zdbar: y_ind = y_portal < zdbar n_zdbar = np.sum(~y_ind) t_array = t_portal[y_ind] y_array = y_portal[y_ind] z_array = z_portal[y_ind] else: n_zdbar = 0 t_array = t_portal y_array = y_portal z_array = z_portal print('{} in water depth > {} dbar'.format(n_zdbar, zdbar)) """ Plot data """ if len(dtime) > 0: sname = '-'.join((r, method, sv)) clabel = sv + " (" + z_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" if glider == 'no': t_eng = None m_water_depth = None # plot non-erroneous data fig, ax, bar = pf.plot_xsection(subsite, dtime, zpressure, ndata, clabel, ylabel, t_eng, m_water_depth, inpercentile, stdev=None) t0 = pd.to_datetime(dtime.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(dtime.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1 ax.set_title(title, fontsize=9) leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero), ) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() sfile = '_'.join(('rm_erroneous_data', sname)) pf.save_fig(save_dir, sfile) # plots removing all suspect data if len(t_array) > 0: if len(t_array) != len(dtime): # plot bathymetry only within data time ranges if glider == 'yes': eng_ind = (t_eng >= np.min(t_array)) & (t_eng <= np.max(t_array)) t_eng = t_eng[eng_ind] m_water_depth = m_water_depth[eng_ind] fig, ax, bar = pf.plot_xsection(subsite, t_array, y_array, z_array, clabel, ylabel, t_eng, m_water_depth, inpercentile, stdev=None) t0 = pd.to_datetime(t_array.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(t_array.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1 ax.set_title(title, fontsize=9) if zdbar: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nremoved {} in the upper and lower {}th percentile of data grouped in {} dbar segments'.format( len(zpressure) - len(z_nospct), inpercentile, zcell_size) + '\nexcluded {} suspect data points when inspected visually'.format( len(z_nospct) - len(z_portal)) + '\nexcluded {} suspect data in water depth greater than {} dbar'.format(n_zdbar, zdbar), ) else: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nremoved {} in the upper and lower {}th percentile of data grouped in {} dbar segments'.format( len(zpressure) - len(z_nospct), inpercentile, zcell_size) + '\nexcluded {} suspect data points when inspected visually'.format( len(z_nospct) - len(z_portal)), ) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() sfile = '_'.join(('rm_suspect_data', sname)) pf.save_fig(save_dir, sfile)
def main(files, out, time_break, depth): """ files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file. out: Directory to save plots """ fname, ext = os.path.splitext(files) if ext in '.nc': list_files = [files] elif ext in '.ncml': list_files = [files] else: list_files = read_file(files) stream_vars = pf.load_variable_dict(var='eng') # load engineering variables for nc in list_files: print nc with xr.open_dataset(nc, mask_and_scale=False) as ds: # change dimensions from 'obs' to 'time' ds = ds.swap_dims({'obs': 'time'}) ds_variables = ds.data_vars.keys() # List of dataset variables stream = ds.stream # List stream name associated with the data title_pre = mk_str(ds.attrs, 't') # , var, tt0, tt1, 't') save_pre = mk_str(ds.attrs, 's') # , var, tt0, tt1, 's') platform = ds.subsite node = ds.node sensor = ds.sensor deployment = 'D0000{}'.format(str(np.unique(ds.deployment)[0])) stream = ds.stream save_dir = os.path.join(out, platform, deployment, node, sensor, stream, 'depth_profiles') cf.create_dir(save_dir) # try: # eng = stream_vars[stream] # select specific streams engineering variables # except KeyError: # eng = [''] misc = ['quality', 'string', 'timestamp', 'deployment', 'id', 'provenance', 'qc', 'time', 'mission', 'obs', 'volt', 'ref', 'sig', 'amp', 'rph', 'calphase', 'phase', 'therm'] # reg_ex = re.compile('|'.join(eng+misc)) # make regular expression reg_ex = re.compile('|'.join(misc)) # keep variables that are not in the regular expression sci_vars = [s for s in ds_variables if not reg_ex.search(s)] # t0, t1 = pf.get_rounded_start_and_end_times(ds_disk['time'].data) # tI = (pd.to_datetime(t0) + (pd.to_datetime(t1) - pd.to_datetime(t0)) / 2) # time_list = [[t0, t1], [t0, tI], [tI, t1]] times = np.unique(ds[time_break]) for t in times: time_ind = t == ds[time_break].data for var in sci_vars: x = dict(data=ds['time'].data[time_ind], info=dict(label='Time', units='GMT')) t0 = pd.to_datetime(x['data'].min()).strftime('%Y-%m-%dT%H%M%00') t1 = pd.to_datetime(x['data'].max()).strftime('%Y-%m-%dT%H%M%00') try: sci = ds[var] print var # sci = sub_ds[var] except UnicodeEncodeError: # some comments have latex characters ds[var].attrs.pop('comment') # remove from the attributes sci = ds[var] # or else the variable won't load y = dict(data=ds[depth].data[time_ind], info=dict(label='Pressure', units='dbar', var=var, platform=platform, node=node, sensor=sensor)) try: z_lab = sci.long_name except AttributeError: z_lab = sci.standard_name z = dict(data=sci.data[time_ind], info=dict(label=z_lab, units=sci.units, var=var, platform=platform, node=node, sensor=sensor)) title = title_pre + var # plot timeseries with outliers fig, ax = pf.depth_cross_section(z, y, x, title=title) pf.resize(width=12, height=8.5) # Resize figure save_name = '{}-{}-{}_{}_{}-{}'.format(platform, node, sensor, var, t0, t1) pf.save_fig(save_dir, save_name, res=150) # Save figure plt.close('all') # try: # y_lab = sci.standard_name # except AttributeError: # y_lab = var # y = dict(data=sci.data, info=dict(label=y_lab, units=sci.units)) del x, y
def main(url_list, sDir, plot_type): """"" URL : path to instrument data by methods sDir : path to the directory on your machine to save files plot_type: folder name for a plot type """ "" rd_list = [] ms_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) ms = uu.split(rd + '-')[1].split('/')[0] if rd not in rd_list: rd_list.append(rd) if ms not in ms_list: ms_list.append(ms) ''' separate different instruments ''' for r in rd_list: print('\n{}'.format(r)) subsite = r.split('-')[0] array = subsite[0:2] main_sensor = r.split('-')[-1] ps_df, n_streams = cf.get_preferred_stream_info(r) # read in the analysis file dr_data = cf.refdes_datareview_json(r) # get end times of deployments deployments = [] end_times = [] for index, row in ps_df.iterrows(): deploy = row['deployment'] deploy_info = get_deployment_information(dr_data, int(deploy[-4:])) deployments.append(int(deploy[-4:])) end_times.append(pd.to_datetime(deploy_info['stop_date'])) # get the list of data files and filter out collocated instruments and other streams chat datasets = [] for u in url_list: print(u) splitter = u.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = cf.filter_collocated_instruments(main_sensor, datasets) fdatasets = cf.filter_other_streams(r, ms_list, fdatasets) ''' separate the data files by methods ''' for ms in ms_list: # np.unique(methodstream) fdatasets_sel = [x for x in fdatasets if ms in x] # create a folder to save figures save_dir = os.path.join(sDir, array, subsite, r, plot_type, ms.split('-')[0]) cf.create_dir(save_dir) # create a dictionary for science variables from analysis file stream_sci_vars_dict = dict() for x in dr_data['instrument']['data_streams']: dr_ms = '-'.join((x['method'], x['stream_name'])) if ms == dr_ms: stream_sci_vars_dict[dr_ms] = dict(vars=dict()) sci_vars = dict() for y in x['stream']['parameters']: if y['data_product_type'] == 'Science Data': sci_vars.update( {y['name']: dict(db_units=y['unit'])}) if len(sci_vars) > 0: stream_sci_vars_dict[dr_ms]['vars'] = sci_vars # initialize an empty data array for science variables in dictionary sci_vars_dict = cd.initialize_empty_arrays(stream_sci_vars_dict, ms) y_unit = [] y_name = [] for fd in fdatasets_sel: ds = xr.open_dataset(fd, mask_and_scale=False) print('\nAppending data file: {}'.format(fd.split('/')[-1])) for var in list(sci_vars_dict[ms]['vars'].keys()): sh = sci_vars_dict[ms]['vars'][var] if ds[var].units == sh['db_units']: if ds[var]._FillValue not in sh['fv']: sh['fv'].append(ds[var]._FillValue) if ds[var].units not in sh['units']: sh['units'].append(ds[var].units) # time t = ds['time'].values t0 = pd.to_datetime( t.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime( t.max()).strftime('%Y-%m-%dT%H:%M:%S') # sci variable z = ds[var].values sh['t'] = np.append(sh['t'], t) sh['values'] = np.append(sh['values'], z) # add pressure to dictionary of sci vars if 'MOAS' in subsite: if 'CTD' in main_sensor: # for glider CTDs, pressure is a coordinate pressure = 'sci_water_pressure_dbar' y = ds[pressure].values if ds[pressure].units not in y_unit: y_unit.append(ds[pressure].units) if ds[pressure].long_name not in y_name: y_name.append(ds[pressure].long_name) else: pressure = 'int_ctd_pressure' y = ds[pressure].values if ds[pressure].units not in y_unit: y_unit.append(ds[pressure].units) if ds[pressure].long_name not in y_name: y_name.append(ds[pressure].long_name) else: pressure = pf.pressure_var(ds, ds.data_vars.keys()) y = ds[pressure].values if ds[pressure].units not in y_unit: y_unit.append(ds[pressure].units) if ds[pressure].long_name not in y_name: y_name.append(ds[pressure].long_name) sh['pressure'] = np.append(sh['pressure'], y) if len(y_unit) != 1: print('pressure unit varies!') else: y_unit = y_unit[0] if len(y_name) != 1: print('pressure long name varies!') else: y_name = y_name[0] for m, n in sci_vars_dict.items(): for sv, vinfo in n['vars'].items(): print('\nWorking on variable: {}'.format(sv)) if len(vinfo['t']) < 1: print('no variable data to plot') else: sv_units = vinfo['units'][0] fv = vinfo['fv'][0] t0 = pd.to_datetime(min( vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(max( vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S') t = vinfo['t'] x = vinfo['values'] y = vinfo['pressure'] # Check if the array is all NaNs if sum(np.isnan(x)) == len(x): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(x[x != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # reject fill values fv_ind = x != fv y_nofv = y[fv_ind] t_nofv = t[fv_ind] c_nofv = cm.rainbow(np.linspace(0, 1, len(t[fv_ind]))) x_nofv = x[fv_ind] print(len(x) - len(fv_ind), ' fill values') # reject NaNs nan_ind = ~np.isnan(x) t_nofv_nonan = t_nofv[nan_ind] c_nofv_nonan = c_nofv[nan_ind] y_nofv_nonan = y_nofv[nan_ind] x_nofv_nonan = x_nofv[nan_ind] print(len(x) - len(nan_ind), ' NaNs') # reject extreme values ev_ind = cf.reject_extreme_values(x_nofv_nonan) t_nofv_nonan_noev = t_nofv_nonan[ev_ind] c_nofv_nonan_noev = c_nofv_nonan[ev_ind] y_nofv_nonan_noev = y_nofv_nonan[ev_ind] x_nofv_nonan_noev = x_nofv_nonan[ev_ind] print(len(z) - len(ev_ind), ' Extreme Values', '|1e7|') # reject values outside global ranges: global_min, global_max = cf.get_global_ranges(r, sv) # platform not in qc-table (parad_k_par) # global_min = 0 # global_max = 2500 print('global ranges for : {}-{} {} - {}'.format( r, sv, global_min, global_max)) if isinstance(global_min, (int, float)) and isinstance( global_max, (int, float)): gr_ind = cf.reject_global_ranges( x_nofv_nonan_noev, global_min, global_max) t_nofv_nonan_noev_nogr = t_nofv_nonan_noev[gr_ind] y_nofv_nonan_noev_nogr = y_nofv_nonan_noev[gr_ind] x_nofv_nonan_noev_nogr = x_nofv_nonan_noev[gr_ind] else: t_nofv_nonan_noev_nogr = t_nofv_nonan_noev y_nofv_nonan_noev_nogr = y_nofv_nonan_noev x_nofv_nonan_noev_nogr = x_nofv_nonan_noev if len(x_nofv_nonan_noev) > 0: if m == 'common_stream_placeholder': sname = '-'.join((r, sv)) else: sname = '-'.join((r, m, sv)) if sv != 'pressure': columns = ['tsec', 'dbar', str(sv)] bin_size = 10 min_r = int(round(min(y_nofv_nonan_noev) - bin_size)) max_r = int(round(max(y_nofv_nonan_noev) + bin_size)) ranges = list(range(min_r, max_r, bin_size)) groups, d_groups = gt.group_by_depth_range( t_nofv_nonan_noev_nogr, y_nofv_nonan_noev_nogr, x_nofv_nonan_noev_nogr, columns, ranges) y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr = [], [], [], [], [], [], [] tm = 1 for ii in range(len(groups)): nan_ind = d_groups[ii + tm].notnull() xtime = d_groups[ii + tm][nan_ind] colors = cm.rainbow(np.linspace(0, 1, len(xtime))) ypres = d_groups[ii + tm + 1][nan_ind] nval = d_groups[ii + tm + 2][nan_ind] tm += 2 l_arr.append(len( nval)) # count of data to filter out small groups y_avg.append(ypres.mean()) n_avg.append(nval.mean()) n_min.append(nval.min()) n_max.append(nval.max()) n_std = 3 n0_std.append(nval.mean() + n_std * nval.std()) n1_std.append(nval.mean() - n_std * nval.std()) # Plot all data ylabel = y_name + " (" + y_unit + ")" xlabel = sv + " (" + sv_units + ")" clabel = 'Time' fig, ax = pf.plot_profiles(x_nofv_nonan_noev_nogr, y_nofv_nonan_noev_nogr, t_nofv_nonan_noev_nogr, ylabel, xlabel, clabel, end_times, deployments, stdev=None) title_text = ' '.join((r, ms.split('-')[-1])) + '\n' \ + t0 + ' - ' + t1 + '\n' + str(bin_size) +\ ' m average and ' + str(n_std) + ' std shown' ax.set_title(title_text, fontsize=9) ax.plot(n_avg, y_avg, '-k') ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2) pf.save_fig(save_dir, sname) # Plot data with outliers removed fig, ax = pf.plot_profiles(x_nofv_nonan_noev_nogr, y_nofv_nonan_noev_nogr, t_nofv_nonan_noev_nogr, ylabel, xlabel, clabel, end_times, deployments, stdev=5) ax.set_title(' '.join((r, ms.split('-')[-1])) + '\n' \ + t0 + ' - ' + t1, fontsize=9) sfile = '_'.join((sname, 'rmoutliers')) pf.save_fig(save_dir, sfile)