def compare_variable_attributes(fdatasets, r, name_list, sDir): vars_df = pd.DataFrame() for ii in range(len(fdatasets)): print('\n', fdatasets[ii].split('/')[-1]) deployment = fdatasets[ii].split('/')[-1].split('_')[0].split('deployment')[-1] deployment = int(deployment) ds = xr.open_dataset(fdatasets[ii], mask_and_scale=False) time = ds['time'].values dr_dp = '-'.join((str(deployment))) #,ds.collection_method, ds.stream ''' variable list ''' var_list = cf.notin_list(ds.data_vars.keys(), ['time', '_qc_']) z_id, z_data, z_unit, z_name, z_fill = cf.add_pressure_to_dictionary_of_sci_vars(ds) df = pd.DataFrame({'var_id':[z_id], 'units':[z_unit[0]], 'long_name':[z_name[0]], 'fill_values':[z_fill[0]]},index=[dr_dp]) vars_df = vars_df.append(df) for vname in name_list: vname_id, vname_unit, vname_name, vname_fv = get_variable_data(ds, var_list, vname) df = pd.DataFrame({'var_id':[vname_id], 'units':[vname_unit], 'long_name':[vname_name], 'fill_values':[str(vname_fv)]},index=[dr_dp]) vars_df = vars_df.append(df) vars_df = vars_df.drop_duplicates() vars_df.to_csv('{}/{}_velocity_variables.csv'.format(sDir, r), index=True) return vars_df
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only, n_std, inpercentile, zcell_size, zdbar): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list and 'ENG' not in rd and 'ADCP' not in rd: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets main_sensor = r.split('-')[-1] fdatasets_sel = cf.filter_collocated_instruments(main_sensor, fdatasets) for fd in fdatasets_sel: part_d = fd.split('/')[-1] print('\n{}'.format(part_d)) ds = xr.open_dataset(fd, mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd) array = subsite[0:2] sci_vars = cf.return_science_vars(stream) # if 'CE05MOAS' in r or 'CP05MOAS' in r: # for coastal gliders, get m_water_depth for bathymetry # eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng')) # eng_url = [s for s in url_list if eng in s] # if len(eng_url) == 1: # eng_datasets = cf.get_nc_urls(eng_url) # # filter out collocated datasets # eng_dataset = [j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1])] # if len(eng_dataset) > 0: # ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False) # t_eng = ds_eng['time'].values # m_water_depth = ds_eng['m_water_depth'].values # # # m_altitude = glider height above seafloor # # m_depth = glider depth in the water column # # m_altitude = ds_eng['m_altitude'].values # # m_depth = ds_eng['m_depth'].values # # calc_water_depth = m_altitude + m_depth # # # m_altimeter_status = 0 means a good reading (not nan or -1) # try: # eng_ind = ds_eng['m_altimeter_status'].values == 0 # except KeyError: # eng_ind = (~np.isnan(m_water_depth)) & (m_water_depth >= 0) # # m_water_depth = m_water_depth[eng_ind] # t_eng = t_eng[eng_ind] # # # get rid of any remaining nans or fill values # eng_ind2 = (~np.isnan(m_water_depth)) & (m_water_depth >= 0) # m_water_depth = m_water_depth[eng_ind2] # t_eng = t_eng[eng_ind2] # else: # print('No engineering file for deployment {}'.format(deployment)) # m_water_depth = None # t_eng = None # else: # m_water_depth = None # t_eng = None # else: # m_water_depth = None # t_eng = None if deployment_num is not None: if int(int(deployment[-4:])) is not deployment_num: print(type(int(deployment[-4:])), type(deployment_num)) continue if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time)) continue stime = start_time.strftime('%Y-%m-%d') etime = end_time.strftime('%Y-%m-%d') ext = stime + 'to' + etime # .join((ds0_method, ds1_method save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment, ext) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment, ext) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment, ext) else: save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment) time1 = ds['time'].values try: ds_lat1 = ds['lat'].values except KeyError: ds_lat1 = None print('No latitude variable in file') try: ds_lon1 = ds['lon'].values except KeyError: ds_lon1 = None print('No longitude variable in file') # get pressure variable pvarname, y1, y_units, press, y_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars(ds) for sv in sci_vars: print('') print(sv) if 'pressure' not in sv: if sv == 'spkir_abj_cspp_downwelling_vector': pxso.pf_xs_spkir(ds, sv, time1, y1, ds_lat1, ds_lon1, zcell_size, inpercentile, save_dir_profile, save_dir_xsection, deployment, press, y_units, n_std, zdbar) elif 'OPTAA' in r: if sv not in ['wavelength_a', 'wavelength_c']: pxso.pf_xs_optaa(ds, sv, time1, y1, ds_lat1, ds_lon1, zcell_size, inpercentile, save_dir_profile, save_dir_xsection, deployment, press, y_units, n_std, zdbar) else: z1 = ds[sv].values fv = ds[sv]._FillValue sv_units = ds[sv].units # Check if the array is all NaNs if sum(np.isnan(z1)) == len(z1): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z1[z1 != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # remove unreasonable pressure data (e.g. for surface piercing profilers) if zdbar: po_ind = (0 < y1) & (y1 < zdbar) tm = time1[po_ind] y = y1[po_ind] z = z1[po_ind] ds_lat = ds_lat1[po_ind] ds_lon = ds_lon1[po_ind] else: tm = time1 y = y1 z = z1 ds_lat = ds_lat1 ds_lon = ds_lon1 # reject erroneous data dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \ cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon) # get rid of 0.0 data if sv == 'salinity': ind = ndata > 30 elif sv == 'density': ind = ndata > 1022.5 elif sv == 'conductivity': ind = ndata > 3.45 else: ind = ndata > 0 # if sv == 'sci_flbbcd_chlor_units': # ind = ndata < 7.5 # elif sv == 'sci_flbbcd_cdom_units': # ind = ndata < 25 # else: # ind = ndata > 0.0 # if 'CTD' in r: # ind = zpressure > 0.0 # else: # ind = ndata > 0.0 lenzero = np.sum(~ind) dtime = dtime[ind] zpressure = zpressure[ind] ndata = ndata[ind] if ds_lat is not None and ds_lon is not None: lat = lat[ind] lon = lon[ind] else: lat = None lon = None if len(dtime) > 0: # reject time range from data portal file export t_portal, z_portal, y_portal, lat_portal, lon_portal = \ cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon) print('removed {} data points using visual inspection of data'.format( len(ndata) - len(z_portal))) # create data groups if len(y_portal) > 0: columns = ['tsec', 'dbar', str(sv)] min_r = int(round(np.nanmin(y_portal) - zcell_size)) max_r = int(round(np.nanmax(y_portal) + zcell_size)) ranges = list(range(min_r, max_r, zcell_size)) groups, d_groups = gt.group_by_depth_range(t_portal, y_portal, z_portal, columns, ranges) if 'scatter' in sv: n_std = None # to use percentile else: n_std = n_std # get percentile analysis for printing on the profile plot y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups( groups, d_groups, n_std, inpercentile) """ Plot all data """ if len(time1) > 0: cf.create_dir(save_dir_profile) cf.create_dir(save_dir_xsection) sname = '-'.join((r, method, sv)) sfileall = '_'.join(('all_data', sname, pd.to_datetime(time1.min()).strftime('%Y%m%d'))) tm0 = pd.to_datetime(time1.min()).strftime('%Y-%m-%dT%H:%M:%S') tm1 = pd.to_datetime(time1.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) + '\n' + tm0 + ' to ' + tm1 if 'SPKIR' in r: title = title + '\nWavelength = 510 nm' ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" clabel = 'Time' fig, ax = pf.plot_profiles(z1, y1, time1, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) fig.tight_layout() pf.save_fig(save_dir_profile, sfileall) ''' xsection plot ''' clabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" fig, ax, bar = pf.plot_xsection(subsite, time1, y1, z1, clabel, ylabel, t_eng=None, m_water_depth=None, inpercentile=None, stdev=None) if fig: ax.set_title(title, fontsize=9) fig.tight_layout() pf.save_fig(save_dir_xsection, sfileall) """ Plot cleaned-up data """ if len(dtime) > 0: if len(y_portal) > 0: sfile = '_'.join(('rm_erroneous_data', sname, pd.to_datetime(t_portal.min()).strftime('%Y%m%d'))) t0 = pd.to_datetime(t_portal.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(t_portal.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1 if 'SPKIR' in r: title = title + '\nWavelength = 510 nm' ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" clabel = 'Time' fig, ax = pf.plot_profiles(z_portal, y_portal, t_portal, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) ax.plot(n_avg, y_avg, '-k') ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2) if inpercentile: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nexcluded {} suspect data points when inspected visually'.format( len(ndata) - len(z_portal)) + '\n(black) data average in {} dbar segments'.format(zcell_size) + '\n(magenta) {} percentile envelope in {} dbar segments'.format( int(100 - inpercentile * 2), zcell_size),) elif n_std: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nexcluded {} suspect data points when inspected visually'.format( len(ndata) - len(z_portal)) + '\n(black) data average in {} dbar segments'.format(zcell_size) + '\n(magenta) +/- {} SD envelope in {} dbar segments'.format( int(n_std), zcell_size),) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_profile, sfile) ''' xsection plot ''' clabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" # plot non-erroneous data fig, ax, bar = pf.plot_xsection(subsite, t_portal, y_portal, z_portal, clabel, ylabel, t_eng=None, m_water_depth=None, inpercentile=None, stdev=None) ax.set_title(title, fontsize=9) leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nexcluded {} suspect data points when inspected visually'.format( len(ndata) - len(z_portal)), ) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_xsection, sfile) ''' 4D plot for gliders only ''' if 'MOAS' in r: if ds_lat is not None and ds_lon is not None: cf.create_dir(save_dir_4d) clabel = sv + " (" + sv_units + ")" zlabel = press[0] + " (" + y_units[0] + ")" fig = plt.figure() ax = fig.add_subplot(111, projection='3d') sct = ax.scatter(lon_portal, lat_portal, y_portal, c=z_portal, s=2) cbar = plt.colorbar(sct, label=clabel, extend='both') cbar.ax.tick_params(labelsize=8) ax.invert_zaxis() ax.view_init(25, 32) ax.invert_xaxis() ax.invert_yaxis() ax.set_zlabel(zlabel, fontsize=9) ax.set_ylabel('Latitude', fontsize=9) ax.set_xlabel('Longitude', fontsize=9) ax.set_title(title, fontsize=9) pf.save_fig(save_dir_4d, sfile)
def append_variable_data(ds, variable_dict, common_stream_name, exclude_times): pressure_unit, pressure_name = [], [] ds_vars = cf.return_raw_vars(list(ds.data_vars.keys()) + list(ds.coords)) vars_dict = variable_dict[common_stream_name]['vars'] print('\nPARAMETERS: ') for var in ds_vars: try: long_name = ds[var].long_name x = [x for x in list(vars_dict.keys()) if long_name in x] if len(x) != 0: long_name = x[0] if ds[var].units == vars_dict[long_name]['db_units']: print('______', long_name) if ds[var]._FillValue not in vars_dict[long_name]['fv']: vars_dict[long_name]['fv'].append(ds[var]._FillValue) if ds[var].units not in vars_dict[long_name]['units']: vars_dict[long_name]['units'].append(ds[var].units) tD = ds['time'].values varD = ds[var].values deployD = ds['deployment'].values # find the pressure to use from the data file pvar, pD, p_unit, p_name, p_fv = cf.add_pressure_to_dictionary_of_sci_vars( ds) if p_unit not in pressure_unit: pressure_unit.append(p_unit) if p_name not in pressure_name: pressure_name.append(p_name) if len(ds[var].dims) == 1: if len(exclude_times) > 0: for et in exclude_times: tD, pD, varD, deployD = exclude_time_ranges( tD, pD, varD, deployD, et) if len(tD) > 0: vars_dict[long_name]['t'] = np.append( vars_dict[long_name]['t'], tD) vars_dict[long_name]['pressure'] = np.append( vars_dict[long_name]['pressure'], pD) vars_dict[long_name]['values'] = np.append( vars_dict[long_name]['values'], varD) vars_dict[long_name][ 'deployments'] = np.append( vars_dict[long_name]['deployments'], deployD) else: vars_dict[long_name]['t'] = np.append( vars_dict[long_name]['t'], tD) vars_dict[long_name]['pressure'] = np.append( vars_dict[long_name]['pressure'], pD) vars_dict[long_name]['values'] = np.append( vars_dict[long_name]['values'], varD) vars_dict[long_name]['deployments'] = np.append( vars_dict[long_name]['deployments'], deployD) else: # appending 2D datasets if type(vars_dict[long_name]['values']) != dict: vars_dict[long_name].pop('values') vars_dict[long_name].update({'values': dict()}) varD = varD.T # for presf_wave_burst data, telemetered and recovered_host pressure data have a matrix of 20, # while recovered_inst data have a matrix of 1024. for DCL data, whatever is above 20 will # be an array of nans as placeholders (so the indices match between DCL and recovered_inst if common_stream_name == 'presf_abc_wave_burst': lendims = 1024 else: lendims = len(varD) for i in range(lendims): tD = ds['time'].values # reset the time variable deployD = ds['deployment'].values pDi = pD try: vars_dict[long_name]['values'][i] except KeyError: vars_dict[long_name]['values'].update( {i: np.array([])}) try: varDi = varD[i] except IndexError: varDi = np.empty(np.shape(tD)) varDi[:] = np.nan if len(exclude_times) > 0: for et in exclude_times: tD, pDi, varDi, deployD = exclude_time_ranges( tD, pDi, varDi, deployD, et) if len(tD) > 0: if i == 0: vars_dict[long_name]['t'] = np.append( vars_dict[long_name]['t'], tD) vars_dict[long_name][ 'pressure'] = np.append( vars_dict[long_name] ['pressure'], pDi) vars_dict[long_name]['values'][ i] = np.append( vars_dict[long_name]['values'] [i], varDi) vars_dict[long_name][ 'deployments'] = np.append( vars_dict[long_name] ['deployments'], deployD) else: vars_dict[long_name]['values'][ i] = np.append( vars_dict[long_name]['values'] [i], varDi) else: if i == 0: vars_dict[long_name]['t'] = np.append( vars_dict[long_name]['t'], tD) vars_dict[long_name][ 'pressure'] = np.append( vars_dict[long_name]['pressure'], pDi) vars_dict[long_name]['values'][ i] = np.append( vars_dict[long_name]['values'][i], varDi) vars_dict[long_name][ 'deployments'] = np.append( vars_dict[long_name] ['deployments'], deployD) else: vars_dict[long_name]['values'][ i] = np.append( vars_dict[long_name]['values'][i], varDi) except AttributeError: continue return variable_dict, pressure_unit, pressure_name
def append_evaluated_data(sDir, deployment, ds, variable_dict, common_stream_name, zdbar): pressure_unit, pressure_name = [], [] r = '{}-{}-{}'.format(ds.subsite, ds.node, ds.sensor) ds_vars = cf.return_raw_vars(list(ds.data_vars.keys()) + list(ds.coords)) vars_dict = variable_dict[common_stream_name]['vars'] total_len = 0 for var in ds_vars: try: long_name = ds[var].long_name x = [x for x in list(vars_dict.keys()) if long_name in x] if len(x) != 0: long_name = x[0] if ds[var].units == vars_dict[long_name]['db_units']: print('\n' + var) if ds[var]._FillValue not in vars_dict[long_name]['fv']: vars_dict[long_name]['fv'].append(ds[var]._FillValue) if ds[var].units not in vars_dict[long_name]['units']: vars_dict[long_name]['units'].append(ds[var].units) tD = ds['time'].values varD = ds[var].values deployD = ds['deployment'].values # find the pressure to use from the data file pvarname, pD, p_unit, p_name, p_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars( ds) if p_unit not in pressure_unit: pressure_unit.append(p_unit) if p_name not in pressure_name: pressure_name.append(p_name) l0 = len(tD) # reject erroneous data tD, pD, varD, deployD = reject_erroneous_data( r, var, tD, pD, varD, deployD, ds[var]._FillValue) l_erroneous = len(tD) print('{} erroneous data'.format(l0 - l_erroneous)) if l_erroneous != 0: # reject time range from data portal file export tD, pD, varD, deployD = reject_timestamps_data_portal( ds.subsite, r, tD, pD, varD, deployD) l_portal = len(tD) print('{} suspect - data portal'.format(l_erroneous - l_portal)) if l_portal != 0: # reject timestamps from stat analysis Dpath = '{}/{}/{}/{}/{}'.format( sDir, ds.subsite[0:2], ds.subsite, r, 'time_to_exclude') tD, pD, varD, deployD = reject_timestamps_from_stat_analysis( Dpath, deployment, var, tD, pD, varD, deployD) l_stat = len(tD) print( '{} suspect - stat analysis'.format(l_portal - l_stat)) # # reject timestamps in a depth range tD, pD, varD, deployD = reject_data_in_depth_range( tD, pD, varD, deployD, zdbar) l_zrange = len(tD) print('{} suspect - water depth > {} dbar'.format( l_stat - l_zrange, zdbar)) else: print( 'suspect data - rejected all, see data portal') else: print('erroneous data - rejected all') vars_dict[long_name]['t'] = np.append( vars_dict[long_name]['t'], tD) vars_dict[long_name]['pressure'] = np.append( vars_dict[long_name]['pressure'], pD) vars_dict[long_name]['values'] = np.append( vars_dict[long_name]['values'], varD) vars_dict[long_name]['deployments'] = np.append( vars_dict[long_name]['deployments'], deployD) total_len += l0 except AttributeError: continue return variable_dict, pressure_unit, pressure_name, total_len
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only, n_std, surface_params, depth_params): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list and 'ENG' not in rd: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join( (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets main_sensor = r.split('-')[-1] fdatasets_sel = cf.filter_collocated_instruments( main_sensor, fdatasets) for fd in fdatasets_sel: part_d = fd.split('/')[-1] print('\n{}'.format(part_d)) ds = xr.open_dataset(fd, mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( fd) array = subsite[0:2] sci_vars = cf.return_science_vars(stream) if 'CE05MOAS' in r or 'CP05MOAS' in r: # for coastal gliders, get m_water_depth for bathymetry eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng')) eng_url = [s for s in url_list if eng in s] if len(eng_url) == 1: eng_datasets = cf.get_nc_urls(eng_url) # filter out collocated datasets eng_dataset = [ j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1]) ] if len(eng_dataset) > 0: ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False) t_eng = ds_eng['time'].values m_water_depth = ds_eng['m_water_depth'].values # m_altimeter_status = 0 means a good reading (not nan or -1) eng_ind = ds_eng['m_altimeter_status'].values == 0 m_water_depth = m_water_depth[eng_ind] t_eng = t_eng[eng_ind] else: print('No engineering file for deployment {}'.format( deployment)) m_water_depth = None t_eng = None else: m_water_depth = None t_eng = None else: m_water_depth = None t_eng = None if deployment_num is not None: if int(deployment.split('0')[-1]) is not deployment_num: print(type(int(deployment.split('0')[-1])), type(deployment_num)) continue if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})'. format(start_time, end_time)) continue stime = start_time.strftime('%Y-%m-%d') etime = end_time.strftime('%Y-%m-%d') ext = stime + 'to' + etime # .join((ds0_method, ds1_method save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment, ext) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment, ext) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment, ext) else: save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment) tm = ds['time'].values try: ds_lat = ds['lat'].values except KeyError: ds_lat = None print('No latitude variable in file') try: ds_lon = ds['lon'].values except KeyError: ds_lon = None print('No longitude variable in file') # get pressure variable y, y_units, press = cf.add_pressure_to_dictionary_of_sci_vars(ds) for sv in sci_vars: print(sv) if 'pressure' not in sv: z = ds[sv].values fv = ds[sv]._FillValue sv_units = ds[sv].units # Check if the array is all NaNs if sum(np.isnan(z)) == len(z): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z[z != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # reject erroneous data dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \ cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon) # get rid of 0.0 data if 'CTD' in r: ind = zpressure > 0.0 else: ind = ndata > 0.0 lenzero = np.sum(~ind) dtime = dtime[ind] zpressure = zpressure[ind] ndata = ndata[ind] if ds_lat is not None and ds_lon is not None: lat = lat[ind] lon = lon[ind] else: lat = None lon = None t0 = pd.to_datetime( dtime.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime( dtime.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1 # reject time range from data portal file export t_portal, z_portal, y_portal, lat_portal, lon_portal = \ cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon) print( 'removed {} data points using visual inspection of data' .format(len(ndata) - len(z_portal))) # create data groups columns = ['tsec', 'dbar', str(sv)] # min_r = int(round(min(y_portal) - zcell_size)) # max_r = int(round(max(y_portal) + zcell_size)) # ranges = list(range(min_r, max_r, zcell_size)) #ranges = [0, 10, 20, 30, 40, 50, 60, 70, 80, 200] range1 = list( range(surface_params[0], surface_params[1], surface_params[2])) range2 = list( range(depth_params[0], depth_params[1] + depth_params[2], depth_params[2])) ranges = range1 + range2 groups, d_groups = gt.group_by_depth_range( t_portal, y_portal, z_portal, columns, ranges) if 'scatter' in sv: n_std = None # to use percentile else: n_std = n_std # get percentile analysis for printing on the profile plot inpercentile = [surface_params[3]] * len( range1) + [depth_params[3]] * len(range2) n_std = [surface_params[3]] * len( range1) + [depth_params[3]] * len(range2) y_plt, n_med, n_min, n_max, n0_std, n1_std, l_arr, time_ex = reject_timestamps_in_groups( groups, d_groups, n_std, inpercentile) """ Plot all data """ if len(tm) > 0: cf.create_dir(save_dir_profile) cf.create_dir(save_dir_xsection) sname = '-'.join((r, method, sv)) sfileall = '_'.join(('all_data', sname)) ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" clabel = 'Time' fig, ax = pf.plot_profiles(z, y, tm, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) fig.tight_layout() pf.save_fig(save_dir_profile, sfileall) ''' xsection plot ''' clabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" fig, ax, bar = pf.plot_xsection(subsite, tm, y, z, clabel, ylabel, t_eng, m_water_depth, inpercentile=None, stdev=None) ax.set_title(title, fontsize=9) fig.tight_layout() pf.save_fig(save_dir_xsection, sfileall) """ Plot cleaned-up data """ if len(dtime) > 0: sfile = '_'.join(('rm_erroneous_data', sname)) ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" clabel = 'Time' fig, ax = pf.plot_profiles(z_portal, y_portal, t_portal, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) ax.plot(n_med, y_plt, '.k') ax.fill_betweenx(y_plt, n0_std, n1_std, color='m', alpha=0.2) leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nexcluded {} suspect data points when inspected visually' .format(len(ndata) - len(z_portal)) + '\n(black) data median in {} dbar segments (break at {} dbar)' .format([surface_params[2], depth_params[2]], depth_params[0]) + '\n(magenta) upper and lower {} percentile envelope in {} dbar segments' .format( [surface_params[3], depth_params[3]], [surface_params[2], depth_params[2]]), ) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_profile, sfile) ''' xsection plot ''' clabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" # plot non-erroneous data fig, ax, bar = pf.plot_xsection(subsite, t_portal, y_portal, z_portal, clabel, ylabel, t_eng, m_water_depth, inpercentile=None, stdev=None) ax.set_title(title, fontsize=9) leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nexcluded {} suspect data points when inspected visually' .format(len(ndata) - len(z_portal)), ) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_xsection, sfile) ''' 4D plot for gliders only ''' if 'MOAS' in r: if ds_lat is not None and ds_lon is not None: cf.create_dir(save_dir_4d) clabel = sv + " (" + sv_units + ")" zlabel = press[0] + " (" + y_units[0] + ")" fig = plt.figure() ax = fig.add_subplot(111, projection='3d') sct = ax.scatter(lon_portal, lat_portal, y_portal, c=z_portal, s=2) cbar = plt.colorbar(sct, label=clabel, extend='both') cbar.ax.tick_params(labelsize=8) ax.invert_zaxis() ax.view_init(25, 32) ax.invert_xaxis() ax.invert_yaxis() ax.set_zlabel(zlabel, fontsize=9) ax.set_ylabel('Latitude', fontsize=9) ax.set_xlabel('Longitude', fontsize=9) ax.set_title(title, fontsize=9) pf.save_fig(save_dir_4d, sfile)
def plot_velocity_variables(r, fdatasets, num_plots, save_dir): fig, ax = plt.subplots(nrows=num_plots, ncols=1, sharey=True) fig.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig_file = 'calculated_currents_plot' fig0, ax0 = plt.subplots(nrows=num_plots, ncols=1, sharey=True) fig0.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig0_file = 'uvw_plots' fig1, ax1 = plt.subplots(nrows=num_plots, ncols=1, sharey=True) fig1.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig1_file = 'pressure_plots' fig2, ax2 = plt.subplots(nrows=num_plots, ncols=1, sharey=True) fig2.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig2_file = 'roll_plots' fig3, ax3 = plt.subplots(nrows=num_plots, ncols=1, sharey=True) fig3.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig3_file = 'pitch_plots' for ii in range(len(fdatasets)): if num_plots > len(fdatasets): for jj in range(len(fdatasets), num_plots, 1): ax[jj].axis('off') ax0[jj].axis('off') # ax0[jj].axis('tight') ax1[jj].axis('off') ax2[jj].axis('off') ax3[jj].axis('off') print('\n', fdatasets[ii].split('/')[-1]) deployment = fdatasets[ii].split('/')[-1].split('_')[0].split( 'deployment')[-1] deployment = int(deployment) ds = xr.open_dataset(fdatasets[ii], mask_and_scale=False) time = ds['time'].values ''' variable list ''' var_list = cf.notin_list(ds.data_vars.keys(), ['time', '_qc_']) z_name, z_data, z_unit, z_name, z_fill = cf.add_pressure_to_dictionary_of_sci_vars( ds) z_data, err_count_z = reject_err_data_1_dims(z_data, z_fill[0], r, z_name[0], n=5) if 'VELPT' in r: w_id, w_data, w_unit, w_name, w_fill = get_variable_data( ds, var_list, 'upward_velocity') w_data, err_count_w = reject_err_data_1_dims(w_data, w_fill, r, w_id, n=5) u_id, u_data, u_unit, u_name, u_fill = get_variable_data( ds, var_list, 'eastward_velocity') u_data, err_count_u = reject_err_data_1_dims(u_data, u_fill, r, u_id, n=5) v_id, v_data, v_unit, v_name, v_fill = get_variable_data( ds, var_list, 'northward_velocity') v_data, err_count_v = reject_err_data_1_dims(v_data, v_fill, r, v_id, n=5) roll_id, roll_data, roll_unit, roll_name, roll_fill = get_variable_data( ds, var_list, 'roll') roll_data, err_count_roll = reject_err_data_1_dims(roll_data, roll_fill, r, roll_id, n=5) pitch_id, pitch_data, pitch_unit, pitch_name, pitch_fill = get_variable_data( ds, var_list, 'pitch') pitch_data, err_count_pitch = reject_err_data_1_dims(pitch_data, pitch_fill, r, pitch_id, n=5) ''' According to VELPT manufacturer, data are suspect when the instrument is tilted more than 20 degrees redmine ticket # 12960 ''' tilt_ind = np.logical_or( abs(pitch_data) > 200, abs(roll_data) > 200) percent_good = ( (len(time) - len(time[tilt_ind])) / len(time)) * 100 elif 'VEL3D' in r: w_id, w_data, w_unit, w_name, w_fill = get_variable_data( ds, var_list, 'upward_turbulent_velocity') w_data, err_count_w = reject_err_data_1_dims(w_data, w_fill, r, w_id, n=5) u_id, u_data, u_unit, u_name, u_fill = get_variable_data( ds, var_list, 'eastward_turbulent_velocity') u_data, err_count_u = reject_err_data_1_dims(u_data, u_fill, r, u_id, n=5) v_id, v_data, v_unit, v_name, v_fill = get_variable_data( ds, var_list, 'northward_turbulent_velocity') v_data, err_count_v = reject_err_data_1_dims(v_data, v_fill, r, v_id, n=5) ''' Plot pressure ''' ax1[ii].plot(time, z_data, 'b.', linestyle='None', marker='.', markersize=0.5) #linestyle='--', linewidth=.6 if 'VELPT' in r: ax1[ii].plot(time[tilt_ind], z_data[tilt_ind], 'r.', linestyle='None', marker='.', markersize=0.5, label=str(round(100 - percent_good, 2)) + '%') prepare_axis(r, time, deployment, ax1[ii], ii, len(fdatasets), z_name[0], z_unit[0], err_count=err_count_z) fig1_file = fig1_file + str(deployment) if 'VELPT' in r: ''' plot roll ''' ax2[ii].plot(time, roll_data, 'b.', linestyle='None', marker='.', markersize=0.5) ax2[ii].plot(time[tilt_ind], roll_data[tilt_ind], 'r.', linestyle='None', marker='.', markersize=0.5, label=str(round(100 - percent_good, 2)) + '%') prepare_axis(r, time, deployment, ax2[ii], ii, len(fdatasets), roll_name, roll_unit, err_count=err_count_roll) fig2_file = fig2_file + str(deployment) ''' plot pitch ''' ax3[ii].plot(time, pitch_data, 'b.', linestyle='None', marker='.', markersize=0.5) ax3[ii].plot(time[tilt_ind], pitch_data[tilt_ind], 'r.', linestyle='None', marker='.', markersize=0.5, label=str(round(100 - percent_good, 2)) + '%') prepare_axis(r, time, deployment, ax3[ii], ii, len(fdatasets), pitch_name, pitch_unit, err_count=err_count_pitch) fig3_file = fig3_file + str(deployment) ''' 1D Quiver plot ''' ax[ii].quiver(time, 0, u_data, v_data, color='b', units='y', scale_units='y', scale=1, headlength=1, headaxislength=1, width=0.004, alpha=0.5) if 'VELPT' in r: ax[ii].quiver(time[tilt_ind], 0, u_data[tilt_ind], v_data[tilt_ind], color='r', units='y', scale_units='y', scale=1, headlength=1, headaxislength=1, width=0.004, alpha=0.5, label=str(round(100 - percent_good, 2)) + '%') uv_magnitude = np.sqrt(u_data**2 + v_data**2) uv_maxmag = np.nanmax(uv_magnitude) ax[ii].set_ylim(-uv_maxmag, uv_maxmag) prepare_axis(r, time, deployment, ax[ii], ii, len(fdatasets), 'Current Velocity', u_unit, err_count=None) fig_file = fig_file + str(deployment) ''' Plot u and v components ''' ax0[ii].plot(time, v_data, 'b.', linestyle='None', marker='.', markersize=0.5, label='V') if 'VELPT' in r: ax0[ii].plot(time[tilt_ind], v_data[tilt_ind], 'r', linestyle='None', marker='.', markersize=0.5, label=str(round(100 - percent_good, 2)) + '%') ax0[ii].plot(time, u_data, 'g.', linestyle='None', marker='.', markersize=0.5, label='U') if 'VELPT' in r: ax0[ii].plot(time[tilt_ind], u_data[tilt_ind], 'y', linestyle='None', marker='.', markersize=0.5, label=str(round(100 - percent_good, 2)) + '%') ax0[ii].plot(time, w_data, 'm.', linestyle='None', marker='.', markersize=0.5, label='W') if 'VELPT' in r: ax0[ii].plot(time[tilt_ind], w_data[tilt_ind], 'c', linestyle='None', marker='.', markersize=0.5, label=str(round(100 - percent_good, 2)) + '%') prepare_axis(r, time, deployment, ax0[ii], ii, len(fdatasets), 'Velocity Components', u_unit, err_count=None) fig0_file = fig0_file + str(deployment) save_file = os.path.join(save_dir, fig1_file) fig1.savefig(str(save_file), dpi=150, bbox_inches='tight') save_file = os.path.join(save_dir, fig_file) fig.savefig(str(save_file), dpi=150, bbox_inches='tight') save_file = os.path.join(save_dir, fig0_file) fig0.savefig(str(save_file), dpi=150, bbox_inches='tight') save_file = os.path.join(save_dir, fig2_file) fig2.savefig(str(save_file), dpi=150, bbox_inches='tight') save_file = os.path.join(save_dir, fig3_file) fig3.savefig(str(save_file), dpi=150, bbox_inches='tight') plt.close('all')
def main(url_list, sDir, plot_type, deployment_num, start_time, end_time, preferred_only, glider, zdbar, n_std, inpercentile, zcell_size): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list and 'ENG' not in rd: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets main_sensor = r.split('-')[-1] fdatasets_sel = cf.filter_collocated_instruments(main_sensor, fdatasets) for fd in fdatasets_sel: part_d = fd.split('/')[-1] print(part_d) ds = xr.open_dataset(fd, mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd) array = subsite[0:2] sci_vars = cf.return_science_vars(stream) if 'CE05MOAS' in r or 'CP05MOAS' in r: # for coastal gliders, get m_water_depth for bathymetry eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng')) eng_url = [s for s in url_list if eng in s] if len(eng_url) == 1: eng_datasets = cf.get_nc_urls(eng_url) # filter out collocated datasets eng_dataset = [j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1])] if len(eng_dataset) > 0: ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False) t_eng = ds_eng['time'].values m_water_depth = ds_eng['m_water_depth'].values # m_altimeter_status = 0 means a good reading (not nan or -1) eng_ind = ds_eng['m_altimeter_status'].values == 0 m_water_depth = m_water_depth[eng_ind] t_eng = t_eng[eng_ind] else: print('No engineering file for deployment {}'.format(deployment)) if deployment_num is not None: if int(deployment.split('0')[-1]) is not deployment_num: print(type(int(deployment.split('0')[-1])), type(deployment_num)) continue if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time)) continue stime = start_time.strftime('%Y-%m-%d') etime = end_time.strftime('%Y-%m-%d') ext = stime + 'to' + etime # .join((ds0_method, ds1_method save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, deployment, ext) else: save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, deployment) cf.create_dir(save_dir) tm = ds['time'].values # get pressure variable ds_vars = list(ds.data_vars.keys()) + [x for x in ds.coords.keys() if 'pressure' in x] y, y_units, press = cf.add_pressure_to_dictionary_of_sci_vars(ds) print(y_units, press) # press = pf.pressure_var(ds, ds_vars) # print(press) # y = ds[press].values # y_units = ds[press].units for sv in sci_vars: print(sv) if 'sci_water_pressure' not in sv: z = ds[sv].values fv = ds[sv]._FillValue z_units = ds[sv].units # Check if the array is all NaNs if sum(np.isnan(z)) == len(z): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z[z != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: """ clean up data """ # reject erroneous data dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max = \ cf.reject_erroneous_data(r, sv, tm, y, z, fv) # get rid of 0.0 data if 'CTD' in r: ind = zpressure > 0.0 else: ind = ndata > 0.0 lenzero = np.sum(~ind) dtime = dtime[ind] zpressure = zpressure[ind] ndata = ndata[ind] # creating data groups columns = ['tsec', 'dbar', str(sv)] min_r = int(round(min(zpressure) - zcell_size)) max_r = int(round(max(zpressure) + zcell_size)) ranges = list(range(min_r, max_r, zcell_size)) groups, d_groups = gt.group_by_depth_range(dtime, zpressure, ndata, columns, ranges) # rejecting timestamps from percentile analysis y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups( groups, d_groups, n_std, inpercentile) t_nospct, z_nospct, y_nospct = cf.reject_suspect_data(dtime, zpressure, ndata, time_ex) print('removed {} data points using {} percentile of data grouped in {} dbar segments'.format( len(zpressure) - len(z_nospct), inpercentile, zcell_size)) # reject time range from data portal file export t_portal, z_portal, y_portal = cf.reject_timestamps_dataportal(subsite, r, t_nospct, y_nospct, z_nospct) print('removed {} data points using visual inspection of data'.format(len(z_nospct) - len(z_portal))) # reject data in a depth range if zdbar: y_ind = y_portal < zdbar n_zdbar = np.sum(~y_ind) t_array = t_portal[y_ind] y_array = y_portal[y_ind] z_array = z_portal[y_ind] else: n_zdbar = 0 t_array = t_portal y_array = y_portal z_array = z_portal print('{} in water depth > {} dbar'.format(n_zdbar, zdbar)) """ Plot data """ if len(dtime) > 0: sname = '-'.join((r, method, sv)) clabel = sv + " (" + z_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" if glider == 'no': t_eng = None m_water_depth = None # plot non-erroneous data fig, ax, bar = pf.plot_xsection(subsite, dtime, zpressure, ndata, clabel, ylabel, t_eng, m_water_depth, inpercentile, stdev=None) t0 = pd.to_datetime(dtime.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(dtime.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1 ax.set_title(title, fontsize=9) leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero), ) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() sfile = '_'.join(('rm_erroneous_data', sname)) pf.save_fig(save_dir, sfile) # plots removing all suspect data if len(t_array) > 0: if len(t_array) != len(dtime): # plot bathymetry only within data time ranges if glider == 'yes': eng_ind = (t_eng >= np.min(t_array)) & (t_eng <= np.max(t_array)) t_eng = t_eng[eng_ind] m_water_depth = m_water_depth[eng_ind] fig, ax, bar = pf.plot_xsection(subsite, t_array, y_array, z_array, clabel, ylabel, t_eng, m_water_depth, inpercentile, stdev=None) t0 = pd.to_datetime(t_array.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(t_array.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1 ax.set_title(title, fontsize=9) if zdbar: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nremoved {} in the upper and lower {}th percentile of data grouped in {} dbar segments'.format( len(zpressure) - len(z_nospct), inpercentile, zcell_size) + '\nexcluded {} suspect data points when inspected visually'.format( len(z_nospct) - len(z_portal)) + '\nexcluded {} suspect data in water depth greater than {} dbar'.format(n_zdbar, zdbar), ) else: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nremoved {} in the upper and lower {}th percentile of data grouped in {} dbar segments'.format( len(zpressure) - len(z_nospct), inpercentile, zcell_size) + '\nexcluded {} suspect data points when inspected visually'.format( len(z_nospct) - len(z_portal)), ) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() sfile = '_'.join(('rm_suspect_data', sname)) pf.save_fig(save_dir, sfile)
def main(url_list, sDir, stime, etime): if len(url_list) != 2: print('Please provide 2 reference designators for plotting') else: uu0 = url_list[0] uu1 = url_list[1] rd0 = uu0.split('/')[-2][20:47] rd1 = uu1.split('/')[-2][20:47] array = rd0[0:2] inst = rd0.split('-')[-1] datasets0 = [] datasets1 = [] for i in range(len(url_list)): udatasets = cf.get_nc_urls([url_list[i]]) if i == 0: datasets0.append(udatasets) else: datasets1.append(udatasets) datasets0 = list(itertools.chain(*datasets0)) datasets1 = list(itertools.chain(*datasets1)) main_sensor0 = rd0.split('-')[-1] main_sensor1 = rd1.split('-')[-1] fdatasets0_sel = cf.filter_collocated_instruments( main_sensor0, datasets0) fdatasets1_sel = cf.filter_collocated_instruments( main_sensor1, datasets1) deployments = [ dd.split('/')[-1].split('_')[0] for dd in fdatasets0_sel ] for d in deployments: fd0 = [x for x in fdatasets0_sel if d in x] fd1 = [x for x in fdatasets1_sel if d in x] ds0 = xr.open_dataset(fd0[0], mask_and_scale=False) ds0 = ds0.swap_dims({'obs': 'time'}) ds1 = xr.open_dataset(fd1[0], mask_and_scale=False) ds1 = ds1.swap_dims({'obs': 'time'}) if stime is not None and etime is not None: ds0 = ds0.sel(time=slice(stime, etime)) ds1 = ds1.sel(time=slice(stime, etime)) if len(ds0['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})'. format(start_time, end_time)) continue fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( fd0[0]) sci_vars = cf.return_science_vars(stream) save_dir_profile = os.path.join(sDir, array, subsite, inst, 'profile_plots', deployment) cf.create_dir(save_dir_profile) # get pressure variable pvarname, y1, y_units, press, y_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars( ds0) for sv in sci_vars: print('') print(sv) if 'pressure' not in sv: fig, ax = plt.subplots() plt.margins(y=.08, x=.02) plt.grid() title = ' '.join((deployment, subsite, inst, method)) sname = '-'.join((subsite, inst, method, sv)) for i in range(len(url_list)): if i == 0: ds = ds0 else: ds = ds1 t = ds['time'].values zpressure = ds[pvarname].values z1 = ds[sv].values fv = ds[sv]._FillValue sv_units = ds[sv].units # Check if the array is all NaNs if sum(np.isnan(z1)) == len(z1): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z1[z1 != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # get rid of 0.0 data if sv == 'salinity': ind = z1 > 1 elif sv == 'density': ind = z1 > 1000 elif sv == 'conductivity': ind = z1 > 0.1 elif sv == 'dissolved_oxygen': ind = z1 > 160 elif sv == 'estimated_oxygen_concentration': ind = z1 > 200 else: ind = z1 > 0 # if sv == 'sci_flbbcd_chlor_units': # ind = ndata < 7.5 # elif sv == 'sci_flbbcd_cdom_units': # ind = ndata < 25 # else: # ind = ndata > 0.0 # if 'CTD' in r: # ind = zpressure > 0.0 # else: # ind = ndata > 0.0 lenzero = np.sum(~ind) dtime = t[ind] zpressure = zpressure[ind] zdata = z1[ind] if len(dtime) > 0: ax.scatter(zdata, zpressure, s=2, edgecolor='None') xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" ax.invert_yaxis() # plt.xlim([-0.5, 0.5]) ax.set_xlabel(xlabel, fontsize=9) ax.set_ylabel(ylabel, fontsize=9) ax.set_title(title + '\nWFP02 (blue) & WFP03 (orange)', fontsize=9) fig.tight_layout() pf.save_fig(save_dir_profile, sname)
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only, zdbar, n_std, inpercentile, zcell_size): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list and 'ENG' not in rd and 'ADCP' not in rd: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join( (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets main_sensor = r.split('-')[-1] fdatasets_sel = cf.filter_collocated_instruments( main_sensor, fdatasets) for fd in fdatasets_sel: part_d = fd.split('/')[-1] print('\n{}'.format(part_d)) ds = xr.open_dataset(fd, mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( fd) array = subsite[0:2] sci_vars = cf.return_science_vars(stream) # if 'CE05MOAS' in r or 'CP05MOAS' in r: # for coastal gliders, get m_water_depth for bathymetry # eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng')) # eng_url = [s for s in url_list if eng in s] # if len(eng_url) == 1: # eng_datasets = cf.get_nc_urls(eng_url) # # filter out collocated datasets # eng_dataset = [j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1])] # if len(eng_dataset) > 0: # ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False) # t_eng = ds_eng['time'].values # m_water_depth = ds_eng['m_water_depth'].values # # # m_altimeter_status = 0 means a good reading (not nan or -1) # try: # eng_ind = ds_eng['m_altimeter_status'].values == 0 # except KeyError: # eng_ind = (~np.isnan(m_water_depth)) & (m_water_depth >= 0) # # m_water_depth = m_water_depth[eng_ind] # t_eng = t_eng[eng_ind] # # # get rid of any remaining nans or fill values # eng_ind2 = (~np.isnan(m_water_depth)) & (m_water_depth >= 0) # m_water_depth = m_water_depth[eng_ind2] # t_eng = t_eng[eng_ind2] # else: # print('No engineering file for deployment {}'.format(deployment)) # m_water_depth = None # t_eng = None # else: # m_water_depth = None # t_eng = None # else: # m_water_depth = None # t_eng = None if deployment_num is not None: if int(int(deployment[-4:])) is not deployment_num: print(type(int(deployment[-4:])), type(deployment_num)) continue if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})'. format(start_time, end_time)) continue stime = start_time.strftime('%Y-%m-%d') etime = end_time.strftime('%Y-%m-%d') ext = stime + 'to' + etime # .join((ds0_method, ds1_method save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment, ext) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment, ext) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment, ext) else: save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment) texclude_dir = os.path.join(sDir, array, subsite, refdes, 'time_to_exclude') cf.create_dir(texclude_dir) time1 = ds['time'].values try: ds_lat1 = ds['lat'].values except KeyError: ds_lat1 = None print('No latitude variable in file') try: ds_lon1 = ds['lon'].values except KeyError: ds_lon1 = None print('No longitude variable in file') # get pressure variable pvarname, y1, y_units, press, y_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars( ds) # prepare file to list timestamps with suspect data for each data parameter stat_data = pd.DataFrame( columns=['deployments', 'time_to_exclude']) file_exclude = '{}/{}_{}_{}_excluded_timestamps.csv'.format( texclude_dir, deployment, refdes, method) stat_data.to_csv(file_exclude, index=True) # loop through sensor-data parameters for sv in sci_vars: print(sv) if 'pressure' not in sv: z1 = ds[sv].values fv = ds[sv]._FillValue sv_units = ds[sv].units # Check if the array is all NaNs if sum(np.isnan(z1)) == len(z1): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z1[z1 != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # remove unreasonable pressure data (e.g. for surface piercing profilers) if zdbar: po_ind = (0 < y1) & (y1 < zdbar) n_zdbar = np.sum(~po_ind) tm = time1[po_ind] y = y1[po_ind] z = z1[po_ind] ds_lat = ds_lat1[po_ind] ds_lon = ds_lon1[po_ind] print('{} in water depth > {} dbar'.format( n_zdbar, zdbar)) else: tm = time1 y = y1 z = z1 ds_lat = ds_lat1 ds_lon = ds_lon1 # reject erroneous data dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \ cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon) # get rid of 0.0 data if sv == 'salinity': ind = ndata > 30 elif sv == 'density': ind = ndata > 1022.5 elif sv == 'conductivity': ind = ndata > 3.45 else: ind = ndata > 0 # if sv == 'sci_flbbcd_chlor_units': # ind = ndata < 7.5 # elif sv == 'sci_flbbcd_cdom_units': # ind = ndata < 25 # else: # ind = ndata > 0.0 # if 'CTD' in r: # ind = zpressure > 0.0 # else: # ind = ndata > 0.0 lenzero = np.sum(~ind) dtime = dtime[ind] zpressure = zpressure[ind] ndata = ndata[ind] if ds_lat is not None and ds_lon is not None: lat = lat[ind] lon = lon[ind] else: lat = None lon = None if len(dtime) > 0: # reject time range from data portal file export t_portal, z_portal, y_portal, lat_portal, lon_portal = \ cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon) print( 'removed {} data points using visual inspection of data' .format(len(ndata) - len(z_portal))) # create data groups if len(y_portal) > 0: columns = ['tsec', 'dbar', str(sv)] min_r = int(round(min(y_portal) - zcell_size)) max_r = int(round(max(y_portal) + zcell_size)) ranges = list(range(min_r, max_r, zcell_size)) groups, d_groups = gt.group_by_depth_range( t_portal, y_portal, z_portal, columns, ranges) if 'scatter' in sv: n_std = None # to use percentile else: n_std = n_std # identifying timestamps from percentile analysis y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups( groups, d_groups, n_std, inpercentile) """ writing timestamps to .csv file to use with data_range.py script """ if len(time_ex) != 0: t_exclude = time_ex[0] for i in range( len(time_ex))[1:len(time_ex)]: t_exclude = '{}, {}'.format( t_exclude, time_ex[i]) stat_data = pd.DataFrame( { 'deployments': deployment, 'time_to_exclude': t_exclude }, index=[sv]) stat_data.to_csv(file_exclude, index=True, mode='a', header=False) # rejecting timestamps from percentile analysis if len(time_ex) > 0: t_nospct, z_nospct, y_nospct = cf.reject_suspect_data( t_portal, y_portal, z_portal, time_ex) else: t_nospct = t_portal z_nospct = z_portal y_nospct = y_portal """ Plot data """ if len(t_nospct) > 0: if len(t_nospct) != len(dtime): cf.create_dir(save_dir_profile) cf.create_dir(save_dir_xsection) sname = '-'.join((r, method, sv)) sfile = '_'.join( ('rm_suspect_data', sname, pd.to_datetime( t_nospct.min()).strftime( '%Y%m%d'))) t0 = pd.to_datetime( t_nospct.min()).strftime( '%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime( t_nospct.max()).strftime( '%Y-%m-%dT%H:%M:%S') title = ' '.join( (deployment, refdes, method)) + '\n' + t0 + ' to ' + t1 if zdbar: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges ' '[{} - {}], {} unreasonable values' .format( lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nremoved {} in the upper and lower {} percentile of data grouped in {} ' 'dbar segments'.format( len(z_portal) - len(z_nospct), inpercentile, zcell_size) + '\nexcluded {} suspect data points when inspected visually' .format( len(ndata) - len(z_portal)) + '\nexcluded {} suspect data in water depth greater than {} dbar' .format(n_zdbar, zdbar), ) elif n_std: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} unreasonable values'. format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nremoved {} data points +/- {} SD of data grouped in {} dbar segments' .format( len(z_portal) - len(z_nospct), n_std, zcell_size) + '\nexcluded {} suspect data points when inspected visually' .format( len(ndata) - len(z_portal)), ) else: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} unreasonable values'. format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nremoved {} in the upper and lower {} percentile of data grouped in {} dbar segments' .format( len(z_portal) - len(z_nospct), inpercentile, zcell_size) + '\nexcluded {} suspect data points when inspected visually' .format( len(ndata) - len(z_portal)), ) ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[ 0] + ")" clabel = 'Time' # plot non-erroneous data print('plotting profile') fig, ax = pf.plot_profiles(z_nospct, y_nospct, t_nospct, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) ax.plot(n_avg, y_avg, '-k') #ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_profile, sfile) ''' xsection plot ''' print('plotting xsection') clabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[ 0] + ")" # plot bathymetry only within data time ranges # if t_eng is not None: # eng_ind = (t_eng >= np.nanmin(t_array)) & (t_eng <= np.nanmax(t_array)) # t_eng = t_eng[eng_ind] # m_water_depth = m_water_depth[eng_ind] # plot non-erroneous data fig, ax, bar = pf.plot_xsection( subsite, t_nospct, y_nospct, z_nospct, clabel, ylabel, t_eng=None, m_water_depth=None, inpercentile=inpercentile, stdev=None) ax.set_title(title, fontsize=9) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_xsection, sfile)
def plot_velocity_variables(r, fdatasets, num_plots, save_dir): fig0_0x, ax0_0x = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True) fig0_0x.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig0_0x_file = 'v_plots_xsection' fig0_0p, ax0_0p = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True) fig0_0p.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig0_0p_file = 'v_plots_profile' fig0_1x, ax0_1x = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True) fig0_1x.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig0_1x_file = 'u_plots_xsection' fig0_1p, ax0_1p = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True) fig0_1p.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig0_1p_file = 'u_plots_profile' fig0_2x, ax0_2x = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True) fig0_2x.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig0_2x_file = 'w_plots_xsection' fig0_2p, ax0_2p = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True) fig0_2p.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig0_2p_file = 'w_plots_profile' # fig1, ax1 = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True) # fig1.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) # fig1_file = 'Calculated_current_plots' fig2x, ax2x = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True) fig2x.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig2x_file = 'roll_plots_xsection' fig2p, ax2p = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True) fig2p.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig2p_file = 'roll_plots_profile' fig3x, ax3x = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True) fig3x.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig3x_file = 'pitch_plots_xsection' fig3p, ax3p = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True) fig3p.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig3p_file = 'pitch_plots_profile' fig4x, ax4x = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True) fig4x.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig4x_file = 'calculated_current_magnitude_plots_xsection' fig4p, ax4p = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True) fig4p.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) fig4p_file = 'calculated_current_magnitude_plots_profile' if num_plots > len(fdatasets): for jj in range(len(fdatasets), num_plots, 1): ax0_0x[jj].axis('off') ax0_1x[jj].axis('off') ax0_2x[jj].axis('off') ax0_0p[jj].axis('off') ax0_1p[jj].axis('off') ax0_2p[jj].axis('off') # ax1[jj].axis('off') ax2p[jj].axis('off') ax2x[jj].axis('off') ax3x[jj].axis('off') ax4x[jj].axis('off') ax3p[jj].axis('off') ax4p[jj].axis('off') if len(fdatasets) == 1: ax0_0x = [ax0_0x] ax0_1x = [ax0_1x] ax0_2x = [ax0_2x] ax0_0p = [ax0_0p] ax0_1p = [ax0_1p] ax0_2p = [ax0_2p] # ax1 = [ax1] ax2p = [ax2p] ax2x = [ax2x] ax3x = [ax3x] ax4x = [ax4x] ax3p = [ax3p] ax4p = [ax4p] for ii in range(len(fdatasets)): print('\n', fdatasets[ii].split('/')[-1]) deployment = fdatasets[ii].split('/')[-1].split('_')[0].split('deployment')[-1] deployment = int(deployment) ds = xr.open_dataset(fdatasets[ii], mask_and_scale=False) time = ds['time'].values collection_method = ds.collection_method ''' variable list ''' var_list = cf.notin_list(ds.data_vars.keys(), ['time', '_qc_']) z_data, z_unit, z_name, z_fill = cf.add_pressure_to_dictionary_of_sci_vars(ds) z_data, err_count_z = reject_err_data_1_dims(z_data, z_fill[0], r, z_name[0], n=5) w_id, w_data, w_unit, w_name, w_fill = get_variable_data(ds, var_list, 'upward_velocity') w_data, err_count_w = reject_err_data_1_dims(w_data, w_fill, r, w_id, n=5) u_id, u_data, u_unit, u_name, u_fill = get_variable_data(ds, var_list, 'eastward_velocity') u_data, err_count_u = reject_err_data_1_dims(u_data, u_fill, r, u_id, n=5) v_id, v_data, v_unit, v_name, v_fill = get_variable_data(ds, var_list, 'northward_velocity') v_data, err_count_v = reject_err_data_1_dims(v_data, v_fill, r, v_id, n=5) roll_id, roll_data, roll_unit, roll_name, roll_fill = get_variable_data(ds, var_list, 'roll') roll_data, err_count_roll = reject_err_data_1_dims(roll_data, roll_fill, r, roll_id, n=5) pitch_id, pitch_data, pitch_unit, pitch_name, pitch_fill = get_variable_data(ds, var_list, 'pitch') pitch_data, err_count_pitch = reject_err_data_1_dims(pitch_data, pitch_fill, r, pitch_id, n=5) ''' 2D Quiver plot ''' # ax1[ii].quiver(time, z_data, u_data, v_data, color='b', units='y', scale_units='y', scale=1, headlength=1, # headaxislength=1, width=0.004, alpha=0.5) # M = np.sqrt(u_data ** 2 + v_data ** 2) # Q = ax1[ii].quiver(time[::100], z_data[::100], u_data[::100], v_data[::100], M[::100], # units='y', pivot='tip', width=0.022, scale=1 / 0.15) # css = ax1[ii].quiverkey(Q, 0.9, 0.9, 1, r'$1 \frac{m}{s}$', labelpos='E', coordinates='figure') # # # prepare_axis(css, fig1, ax1[ii],time, [z_name[0], z_unit[0]], ['Current Velocity', u_unit], r, deployment, ii, len(fdatasets), # err_count=None) # # fig1_file = fig1_file + str(deployment) # ''' plot roll ''' # css = ax2p[ii].scatter(roll_data, z_data, c=time, s=2, edgecolor='None', cmap='rainbow') # prepare_axis_profile(css, fig2p, ax2p[ii], time, [z_name[0], z_unit[0]], [roll_name, roll_unit], r, deployment, ii, # len(fdatasets), err_count=err_count_roll) # fig2p_file = fig2p_file + '_deployment' + str(deployment)+ '_' + \ # fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0] # # # css = ax2x[ii].scatter(time, z_data, c=roll_data, cmap='RdGy', s=2, edgecolor='None') # prepare_axis_xsection(css, fig2x, ax2x[ii], time, [z_name[0], z_unit[0]], [roll_name, roll_unit], r, deployment, ii, # len(fdatasets), err_count=err_count_roll) # fig2x_file = fig2x_file + '_deployment' + str(deployment) + '_' + \ # fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0] # # ''' plot pitch ''' # css = ax3p[ii].scatter(pitch_data, z_data, c=time, s=2, edgecolor='None', cmap='rainbow') # prepare_axis_profile(css, fig3p, ax3p[ii], time, [z_name[0], z_unit[0]], [pitch_name, pitch_unit], r, deployment, ii, # len(fdatasets), err_count=err_count_pitch) # fig3p_file = fig3p_file + '_deployment' + str(deployment) + '_' + fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0] # # css = ax3x[ii].scatter(time, z_data, c=pitch_data, cmap='RdGy', s=2, edgecolor='None') # prepare_axis_xsection(css, fig3x, ax3x[ii], time, [z_name[0], z_unit[0]], [pitch_name, pitch_unit], r, deployment, ii, # len(fdatasets), err_count=err_count_pitch) # fig3x_file = fig3x_file + '_deployment' + str(deployment) + '_' + fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0] # # ''' # plot current magnitude # ''' # uv_magnitude = np.sqrt(u_data ** 2 + v_data ** 2) # # css = ax4p[ii].scatter(uv_magnitude, z_data, c=time, s=2, edgecolor='None', cmap='rainbow') # prepare_axis_profile(css, fig4p, ax4p[ii], time, [z_name[0], z_unit[0]], ['[U,V] Current Velocity', u_unit], # r, deployment, ii, len(fdatasets), err_count=err_count_pitch) # # fig4p_file = fig4p_file + '_deployment' + str(deployment) + '_' + \ # fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0] # # css = ax4x[ii].scatter(time, z_data, c=uv_magnitude, cmap='PuBu', s=2, edgecolor='None') # prepare_axis_xsection(css, fig4x, ax4x[ii], time, [z_name[0], z_unit[0]], ['[U,V] Current Velocity', u_unit], # r, deployment, ii, len(fdatasets), err_count=None) # fig4x_file = fig4x_file + '_deployment' + str(deployment) + '_' + \ # fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0] # # ''' # Plot v component # ''' # css = ax0_0p[ii].scatter(v_data, z_data, c=time, s=2, edgecolor='None', cmap='rainbow') # prepare_axis_profile(css, fig0_0p, ax0_0p[ii], time, [z_name[0], z_unit[0]], ['[V] Velocity Components', u_unit], # r, deployment, ii, len(fdatasets), err_count=err_count_pitch) # # fig0_0p_file = fig0_0p_file + '_deployment' + str(deployment) + '_' + \ # fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0] # # css = ax0_0x[ii].scatter(time, z_data, c=v_data, cmap='RdBu', s=2, edgecolor='None') # prepare_axis_xsection(css, fig0_0x, ax0_0x[ii], time, [z_name[0], z_unit[0]], ['[V] Velocity Components', v_unit], # r, deployment, ii, len(fdatasets), err_count=err_count_v) # fig0_0x_file = fig0_0x_file + '_deployment' + str(deployment) + '_' + \ # fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0] # # # ''' # Plot u component # ''' # css = ax0_1p[ii].scatter(u_data, z_data, c=time, s=2, edgecolor='None', cmap='rainbow') # prepare_axis_profile(css, fig0_1p, ax0_1p[ii], time, [z_name[0], z_unit[0]], # ['[U] Velocity Components', u_unit], # r, deployment, ii, len(fdatasets), err_count=err_count_pitch) # # fig0_1p_file = fig0_1p_file + '_deployment' + str(deployment) + '_' + \ # fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0] # # css = ax0_1x[ii].scatter(time, z_data, c=u_data, cmap='RdBu', s=2, edgecolor='None') # prepare_axis_xsection(css, fig0_1x, ax0_1x[ii], time, [z_name[0], z_unit[0]], ['[U] Velocity Components', u_unit], # r, deployment, ii, len(fdatasets), err_count=err_count_u) # # fig0_1x_file = fig0_1x_file + '_deployment' + str(deployment) + '_' + \ # fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0] # # ''' # Plot w component # ''' css = ax0_2p[ii].scatter(w_data, z_data, c=time, s=2, edgecolor='None', cmap='rainbow') prepare_axis_profile(css, fig0_2p, ax0_2p[ii], time, [z_name[0], z_unit[0]], ['[W] Velocity Components', u_unit], r, deployment, ii, len(fdatasets), err_count=err_count_pitch) fig0_2p_file = fig0_2p_file + '_deployment' + str(deployment) + '_' + \ fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0] css = ax0_2x[ii].scatter(time, z_data, c=w_data, cmap='RdBu', s=2, edgecolor='None') prepare_axis_xsection(css, fig0_2x, ax0_2x[ii], time, [z_name[0], z_unit[0]], ['[W] Velocity Components', w_unit], r, deployment, ii, len(fdatasets), err_count=err_count_w) fig0_2x_file = fig0_2x_file + '_deployment' + str(deployment) + '_' + \ fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0] # # # save_file = os.path.join(save_dir, fig1_file) # fig1.savefig(str(save_file), dpi=150, bbox_inches='tight') # save_file = os.path.join(save_dir, fig2p_file) # fig2p.savefig(str(save_file), dpi=150, bbox_inches='tight') # # save_file = os.path.join(save_dir, fig2x_file) # fig2x.savefig(str(save_file), dpi=150, bbox_inches='tight') # save_file = os.path.join(save_dir, fig3p_file) # fig3p.savefig(str(save_file), dpi=150, bbox_inches='tight') # # save_file = os.path.join(save_dir, fig3x_file) # fig3x.savefig(str(save_file), dpi=150, bbox_inches='tight') # # # save_file = os.path.join(save_dir, fig4p_file) # fig4p.savefig(str(save_file), dpi=150, bbox_inches='tight') # # save_file = os.path.join(save_dir, fig4x_file) # fig4x.savefig(str(save_file), dpi=150, bbox_inches='tight') # # save_file = os.path.join(save_dir, fig0_0p_file) # fig0_0p.savefig(str(save_file), dpi=150, bbox_inches='tight') # # save_file = os.path.join(save_dir, fig0_0x_file) # fig0_0x.savefig(str(save_file), dpi=150, bbox_inches='tight') # # # save_file = os.path.join(save_dir, fig0_1p_file) # fig0_1p.savefig(str(save_file), dpi=150, bbox_inches='tight') # # save_file = os.path.join(save_dir, fig0_1x_file) # fig0_1x.savefig(str(save_file), dpi=150, bbox_inches='tight') # # save_file = os.path.join(save_dir, fig0_2p_file) fig0_2p.savefig(str(save_file), dpi=150, bbox_inches='tight') save_file = os.path.join(save_dir, fig0_2x_file) fig0_2x.savefig(str(save_file), dpi=150, bbox_inches='tight') pyplot.close(fig0_0p) pyplot.close(fig0_0x) pyplot.close(fig0_1p) pyplot.close(fig0_1x) pyplot.close(fig0_2p) pyplot.close(fig0_2x) pyplot.close(fig2p) pyplot.close(fig2x) pyplot.close(fig3p) pyplot.close(fig3x) pyplot.close(fig4p) pyplot.close(fig4x)
def main(url_list, sDir, plot_type, deployment_num, start_time, end_time, method_num, zdbar, n_std, inpercentile, zcell_size): for i, u in enumerate(url_list): print('\nUrl {} of {}: {}'.format(i + 1, len(url_list), u)) elements = u.split('/')[-2].split('-') r = '-'.join((elements[1], elements[2], elements[3], elements[4])) ms = u.split(r + '-')[1].split('/')[0] subsite = r.split('-')[0] array = subsite[0:2] main_sensor = r.split('-')[-1] # read URL to get data datasets = cf.get_nc_urls([u]) datasets_sel = cf.filter_collocated_instruments(main_sensor, datasets) # get sci data review list dr_data = cf.refdes_datareview_json(r) ps_df, n_streams = cf.get_preferred_stream_info(r) # get end times of deployments deployments = [] end_times = [] for index, row in ps_df.iterrows(): deploy = row['deployment'] deploy_info = cf.get_deployment_information(dr_data, int(deploy[-4:])) deployments.append(int(deploy[-4:])) end_times.append(pd.to_datetime(deploy_info['stop_date'])) # create a dictionary for science variables from analysis file stream_sci_vars_dict = dict() for x in dr_data['instrument']['data_streams']: dr_ms = '-'.join((x['method'], x['stream_name'])) if ms == dr_ms: stream_sci_vars_dict[dr_ms] = dict(vars=dict()) sci_vars = dict() for y in x['stream']['parameters']: if y['data_product_type'] == 'Science Data': sci_vars.update({y['name']: dict(db_units=y['unit'])}) if len(sci_vars) > 0: stream_sci_vars_dict[dr_ms]['vars'] = sci_vars for ii, d in enumerate(datasets_sel): part_d = d.split('/')[-1] print('\nDataset {} of {}: {}'.format(ii + 1, len(datasets_sel), part_d)) with xr.open_dataset(d, mask_and_scale=False) as ds: ds = ds.swap_dims({'obs': 'time'}) fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(d) if method_num is not None: if method != method_num: print(method_num, method) continue if deployment_num is not None: if int(deployment.split('0')[-1]) is not deployment_num: print(type(int(deployment.split('0')[-1])), type(deployment_num)) continue if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time)) continue stime = start_time.strftime('%Y-%m-%d') etime = end_time.strftime('%Y-%m-%d') ext = stime + 'to' + etime # .join((ds0_method, ds1_method save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, ms.split('-')[0], deployment, ext) else: save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, ms.split('-')[0], deployment) cf.create_dir(save_dir) texclude_dir = os.path.join(sDir, array, subsite, refdes, 'time_to_exclude') cf.create_dir(texclude_dir) # initialize an empty data array for science variables in dictionary sci_vars_dict = cd.initialize_empty_arrays(stream_sci_vars_dict, ms) for var in list(sci_vars_dict[ms]['vars'].keys()): sh = sci_vars_dict[ms]['vars'][var] if ds[var].units == sh['db_units']: if ds[var]._FillValue not in sh['fv']: sh['fv'].append(ds[var]._FillValue) if ds[var].units not in sh['units']: sh['units'].append(ds[var].units) sh['t'] = np.append(sh['t'], ds['time'].values) # t = ds['time'].values sh['values'] = np.append(sh['values'], ds[var].values) # z = ds[var].values y, y_unit, y_name = cf.add_pressure_to_dictionary_of_sci_vars(ds) sh['pressure'] = np.append(sh['pressure'], y) stat_data = pd.DataFrame(columns=['deployments', 'time_to_exclude']) file_exclude = '{}/{}_{}_{}_excluded_timestamps.csv'.format(texclude_dir, deployment, refdes, method) stat_data.to_csv(file_exclude, index=True) for m, n in sci_vars_dict.items(): for sv, vinfo in n['vars'].items(): print(sv) if len(vinfo['t']) < 1: print('no variable data to plot') else: sv_units = vinfo['units'][0] fv = vinfo['fv'][0] t0 = pd.to_datetime(min(vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(max(vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S') colors = cm.rainbow(np.linspace(0, 1, len(vinfo['t']))) t = vinfo['t'] z = vinfo['values'] y = vinfo['pressure'] # Check if the array is all NaNs if sum(np.isnan(z)) == len(z): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z[z != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # reject erroneous data dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max = \ cf.reject_erroneous_data(r, sv, t, y, z, fv) # create data groups columns = ['tsec', 'dbar', str(sv)] min_r = int(round(min(zpressure) - zcell_size)) max_r = int(round(max(zpressure) + zcell_size)) ranges = list(range(min_r, max_r, zcell_size)) groups, d_groups = gt.group_by_depth_range(dtime, zpressure, ndata, columns, ranges) # ... excluding timestamps if 'scatter' in sv: n_std = None #to use percentile else: n_std = n_std # rejecting timestamps from percentile analysis y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex, \ t_nospct, z_nospct, y_nospct = cf.reject_timestamps_in_groups(groups, d_groups, n_std, dtime, zpressure, ndata, inpercentile) print('{} using {} percentile of data grouped in {} dbar segments'.format( len(zpressure) - len(z_nospct), inpercentile, zcell_size)) """ writing timestamps to .csv file to use with data_range.py script """ if len(time_ex) != 0: t_exclude = time_ex[0] for i in range(len(time_ex))[1:len(time_ex)]: t_exclude = '{}, {}'.format(t_exclude, time_ex[i]) stat_data = pd.DataFrame({'deployments': deployment, 'time_to_exclude': t_exclude}, index=[sv]) stat_data.to_csv(file_exclude, index=True, mode='a', header=False) # reject time range from data portal file export t_portal, z_portal, y_portal = cf.reject_timestamps_dataportal(subsite, r, t_nospct, z_nospct, y_nospct) print('{} using visual inspection of data'.format(len(z_nospct) - len(z_portal), inpercentile, zcell_size)) # reject data in a depth range if zdbar is not None: y_ind = y_portal < zdbar t_array = t_portal[y_ind] y_array = y_portal[y_ind] z_array = z_portal[y_ind] else: y_ind = [] t_array = t_portal y_array = y_portal z_array = z_portal print('{} in water depth > {} dbar'.format(len(y_ind), zdbar)) """ Plot data """ if len(t_array) > 0: if m == 'common_stream_placeholder': sname = '-'.join((sv, r)) else: sname = '-'.join((sv, r, m)) xlabel = sv + " (" + sv_units + ")" ylabel = y_name[0] + " (" + y_unit[0] + ")" clabel = 'Time' title = ' '.join((deployment, r, m)) # plot non-erroneous data fig, ax = pf.plot_profiles(ndata, zpressure, dtime, ylabel, xlabel, clabel, end_times, deployments, stdev=None) ax.set_title(title, fontsize=9) ax.plot(n_avg, y_avg, '-k') ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2) leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}]'.format( len(z) - lenfv, len(z) - lennan, len(z) - lenev, lengr, global_min, global_max) + '\n' + ('(black) data average in {} dbar segments'.format(zcell_size)) + '\n' + ('(magenta) upper and lower {} percentile envelope in {} dbar segments'.format( inpercentile, zcell_size)),) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() sfile = '_'.join(('rm_erroneous_data', sname)) pf.save_fig(save_dir, sfile) # plot excluding time ranges for suspect data if len(z_nospct) != len(zpressure): fig, ax = pf.plot_profiles(z_nospct, y_nospct, t_nospct, ylabel, xlabel, clabel, end_times, deployments, stdev=None) ax.set_title(title, fontsize=9) leg_text = ( 'removed {} in the upper and lower {} percentile of data grouped in {} dbar segments'.format( len(zpressure) - len(z_nospct), inpercentile, zcell_size),) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() sfile = '_'.join(('rm_suspect_data', sname)) pf.save_fig(save_dir, sfile) # plot excluding time ranges from data portal export if len(z_nospct) - len(z_portal): fig, ax = pf.plot_profiles(z_portal, y_portal, t_portal, ylabel, xlabel, clabel, end_times, deployments, stdev=None) ax.set_title(title, fontsize=9) leg_text = ('excluded {} suspect data when inspected visually'.format( len(z_nospct) - len(z_portal)),) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() sfile = '_'.join(('rm_v_suspect_data', sname)) pf.save_fig(save_dir, sfile) # Plot excluding a selected depth value if len(z_array) != len(z_array): fig, ax = pf.plot_profiles(z_array, y_array, t_array, ylabel, xlabel, clabel, end_times, deployments, stdev=None) ax.set_title(title, fontsize=9) leg_text = ('excluded {} suspect data in water depth greater than {} dbar'.format(len(y_ind), zdbar),) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() sfile = '_'.join(('rm_depth_range', sname)) pf.save_fig(save_dir, sfile)