def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only, n_std, surface_params, depth_params): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list and 'ENG' not in rd: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join( (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets main_sensor = r.split('-')[-1] fdatasets_sel = cf.filter_collocated_instruments( main_sensor, fdatasets) for fd in fdatasets_sel: part_d = fd.split('/')[-1] print('\n{}'.format(part_d)) ds = xr.open_dataset(fd, mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( fd) array = subsite[0:2] sci_vars = cf.return_science_vars(stream) if 'CE05MOAS' in r or 'CP05MOAS' in r: # for coastal gliders, get m_water_depth for bathymetry eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng')) eng_url = [s for s in url_list if eng in s] if len(eng_url) == 1: eng_datasets = cf.get_nc_urls(eng_url) # filter out collocated datasets eng_dataset = [ j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1]) ] if len(eng_dataset) > 0: ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False) t_eng = ds_eng['time'].values m_water_depth = ds_eng['m_water_depth'].values # m_altimeter_status = 0 means a good reading (not nan or -1) eng_ind = ds_eng['m_altimeter_status'].values == 0 m_water_depth = m_water_depth[eng_ind] t_eng = t_eng[eng_ind] else: print('No engineering file for deployment {}'.format( deployment)) m_water_depth = None t_eng = None else: m_water_depth = None t_eng = None else: m_water_depth = None t_eng = None if deployment_num is not None: if int(deployment.split('0')[-1]) is not deployment_num: print(type(int(deployment.split('0')[-1])), type(deployment_num)) continue if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})'. format(start_time, end_time)) continue stime = start_time.strftime('%Y-%m-%d') etime = end_time.strftime('%Y-%m-%d') ext = stime + 'to' + etime # .join((ds0_method, ds1_method save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment, ext) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment, ext) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment, ext) else: save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment) tm = ds['time'].values try: ds_lat = ds['lat'].values except KeyError: ds_lat = None print('No latitude variable in file') try: ds_lon = ds['lon'].values except KeyError: ds_lon = None print('No longitude variable in file') # get pressure variable y, y_units, press = cf.add_pressure_to_dictionary_of_sci_vars(ds) for sv in sci_vars: print(sv) if 'pressure' not in sv: z = ds[sv].values fv = ds[sv]._FillValue sv_units = ds[sv].units # Check if the array is all NaNs if sum(np.isnan(z)) == len(z): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z[z != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # reject erroneous data dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \ cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon) # get rid of 0.0 data if 'CTD' in r: ind = zpressure > 0.0 else: ind = ndata > 0.0 lenzero = np.sum(~ind) dtime = dtime[ind] zpressure = zpressure[ind] ndata = ndata[ind] if ds_lat is not None and ds_lon is not None: lat = lat[ind] lon = lon[ind] else: lat = None lon = None t0 = pd.to_datetime( dtime.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime( dtime.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1 # reject time range from data portal file export t_portal, z_portal, y_portal, lat_portal, lon_portal = \ cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon) print( 'removed {} data points using visual inspection of data' .format(len(ndata) - len(z_portal))) # create data groups columns = ['tsec', 'dbar', str(sv)] # min_r = int(round(min(y_portal) - zcell_size)) # max_r = int(round(max(y_portal) + zcell_size)) # ranges = list(range(min_r, max_r, zcell_size)) #ranges = [0, 10, 20, 30, 40, 50, 60, 70, 80, 200] range1 = list( range(surface_params[0], surface_params[1], surface_params[2])) range2 = list( range(depth_params[0], depth_params[1] + depth_params[2], depth_params[2])) ranges = range1 + range2 groups, d_groups = gt.group_by_depth_range( t_portal, y_portal, z_portal, columns, ranges) if 'scatter' in sv: n_std = None # to use percentile else: n_std = n_std # get percentile analysis for printing on the profile plot inpercentile = [surface_params[3]] * len( range1) + [depth_params[3]] * len(range2) n_std = [surface_params[3]] * len( range1) + [depth_params[3]] * len(range2) y_plt, n_med, n_min, n_max, n0_std, n1_std, l_arr, time_ex = reject_timestamps_in_groups( groups, d_groups, n_std, inpercentile) """ Plot all data """ if len(tm) > 0: cf.create_dir(save_dir_profile) cf.create_dir(save_dir_xsection) sname = '-'.join((r, method, sv)) sfileall = '_'.join(('all_data', sname)) ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" clabel = 'Time' fig, ax = pf.plot_profiles(z, y, tm, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) fig.tight_layout() pf.save_fig(save_dir_profile, sfileall) ''' xsection plot ''' clabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" fig, ax, bar = pf.plot_xsection(subsite, tm, y, z, clabel, ylabel, t_eng, m_water_depth, inpercentile=None, stdev=None) ax.set_title(title, fontsize=9) fig.tight_layout() pf.save_fig(save_dir_xsection, sfileall) """ Plot cleaned-up data """ if len(dtime) > 0: sfile = '_'.join(('rm_erroneous_data', sname)) ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" clabel = 'Time' fig, ax = pf.plot_profiles(z_portal, y_portal, t_portal, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) ax.plot(n_med, y_plt, '.k') ax.fill_betweenx(y_plt, n0_std, n1_std, color='m', alpha=0.2) leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nexcluded {} suspect data points when inspected visually' .format(len(ndata) - len(z_portal)) + '\n(black) data median in {} dbar segments (break at {} dbar)' .format([surface_params[2], depth_params[2]], depth_params[0]) + '\n(magenta) upper and lower {} percentile envelope in {} dbar segments' .format( [surface_params[3], depth_params[3]], [surface_params[2], depth_params[2]]), ) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_profile, sfile) ''' xsection plot ''' clabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" # plot non-erroneous data fig, ax, bar = pf.plot_xsection(subsite, t_portal, y_portal, z_portal, clabel, ylabel, t_eng, m_water_depth, inpercentile=None, stdev=None) ax.set_title(title, fontsize=9) leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nexcluded {} suspect data points when inspected visually' .format(len(ndata) - len(z_portal)), ) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_xsection, sfile) ''' 4D plot for gliders only ''' if 'MOAS' in r: if ds_lat is not None and ds_lon is not None: cf.create_dir(save_dir_4d) clabel = sv + " (" + sv_units + ")" zlabel = press[0] + " (" + y_units[0] + ")" fig = plt.figure() ax = fig.add_subplot(111, projection='3d') sct = ax.scatter(lon_portal, lat_portal, y_portal, c=z_portal, s=2) cbar = plt.colorbar(sct, label=clabel, extend='both') cbar.ax.tick_params(labelsize=8) ax.invert_zaxis() ax.view_init(25, 32) ax.invert_xaxis() ax.invert_yaxis() ax.set_zlabel(zlabel, fontsize=9) ax.set_ylabel('Latitude', fontsize=9) ax.set_xlabel('Longitude', fontsize=9) ax.set_title(title, fontsize=9) pf.save_fig(save_dir_4d, sfile)
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only, n_std, inpercentile, zcell_size, zdbar): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list and 'ENG' not in rd and 'ADCP' not in rd: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets main_sensor = r.split('-')[-1] fdatasets_sel = cf.filter_collocated_instruments(main_sensor, fdatasets) for fd in fdatasets_sel: part_d = fd.split('/')[-1] print('\n{}'.format(part_d)) ds = xr.open_dataset(fd, mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd) array = subsite[0:2] sci_vars = cf.return_science_vars(stream) # if 'CE05MOAS' in r or 'CP05MOAS' in r: # for coastal gliders, get m_water_depth for bathymetry # eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng')) # eng_url = [s for s in url_list if eng in s] # if len(eng_url) == 1: # eng_datasets = cf.get_nc_urls(eng_url) # # filter out collocated datasets # eng_dataset = [j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1])] # if len(eng_dataset) > 0: # ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False) # t_eng = ds_eng['time'].values # m_water_depth = ds_eng['m_water_depth'].values # # # m_altitude = glider height above seafloor # # m_depth = glider depth in the water column # # m_altitude = ds_eng['m_altitude'].values # # m_depth = ds_eng['m_depth'].values # # calc_water_depth = m_altitude + m_depth # # # m_altimeter_status = 0 means a good reading (not nan or -1) # try: # eng_ind = ds_eng['m_altimeter_status'].values == 0 # except KeyError: # eng_ind = (~np.isnan(m_water_depth)) & (m_water_depth >= 0) # # m_water_depth = m_water_depth[eng_ind] # t_eng = t_eng[eng_ind] # # # get rid of any remaining nans or fill values # eng_ind2 = (~np.isnan(m_water_depth)) & (m_water_depth >= 0) # m_water_depth = m_water_depth[eng_ind2] # t_eng = t_eng[eng_ind2] # else: # print('No engineering file for deployment {}'.format(deployment)) # m_water_depth = None # t_eng = None # else: # m_water_depth = None # t_eng = None # else: # m_water_depth = None # t_eng = None if deployment_num is not None: if int(int(deployment[-4:])) is not deployment_num: print(type(int(deployment[-4:])), type(deployment_num)) continue if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time)) continue stime = start_time.strftime('%Y-%m-%d') etime = end_time.strftime('%Y-%m-%d') ext = stime + 'to' + etime # .join((ds0_method, ds1_method save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment, ext) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment, ext) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment, ext) else: save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment) time1 = ds['time'].values try: ds_lat1 = ds['lat'].values except KeyError: ds_lat1 = None print('No latitude variable in file') try: ds_lon1 = ds['lon'].values except KeyError: ds_lon1 = None print('No longitude variable in file') # get pressure variable pvarname, y1, y_units, press, y_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars(ds) for sv in sci_vars: print('') print(sv) if 'pressure' not in sv: if sv == 'spkir_abj_cspp_downwelling_vector': pxso.pf_xs_spkir(ds, sv, time1, y1, ds_lat1, ds_lon1, zcell_size, inpercentile, save_dir_profile, save_dir_xsection, deployment, press, y_units, n_std, zdbar) elif 'OPTAA' in r: if sv not in ['wavelength_a', 'wavelength_c']: pxso.pf_xs_optaa(ds, sv, time1, y1, ds_lat1, ds_lon1, zcell_size, inpercentile, save_dir_profile, save_dir_xsection, deployment, press, y_units, n_std, zdbar) else: z1 = ds[sv].values fv = ds[sv]._FillValue sv_units = ds[sv].units # Check if the array is all NaNs if sum(np.isnan(z1)) == len(z1): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z1[z1 != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # remove unreasonable pressure data (e.g. for surface piercing profilers) if zdbar: po_ind = (0 < y1) & (y1 < zdbar) tm = time1[po_ind] y = y1[po_ind] z = z1[po_ind] ds_lat = ds_lat1[po_ind] ds_lon = ds_lon1[po_ind] else: tm = time1 y = y1 z = z1 ds_lat = ds_lat1 ds_lon = ds_lon1 # reject erroneous data dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \ cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon) # get rid of 0.0 data if sv == 'salinity': ind = ndata > 30 elif sv == 'density': ind = ndata > 1022.5 elif sv == 'conductivity': ind = ndata > 3.45 else: ind = ndata > 0 # if sv == 'sci_flbbcd_chlor_units': # ind = ndata < 7.5 # elif sv == 'sci_flbbcd_cdom_units': # ind = ndata < 25 # else: # ind = ndata > 0.0 # if 'CTD' in r: # ind = zpressure > 0.0 # else: # ind = ndata > 0.0 lenzero = np.sum(~ind) dtime = dtime[ind] zpressure = zpressure[ind] ndata = ndata[ind] if ds_lat is not None and ds_lon is not None: lat = lat[ind] lon = lon[ind] else: lat = None lon = None if len(dtime) > 0: # reject time range from data portal file export t_portal, z_portal, y_portal, lat_portal, lon_portal = \ cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon) print('removed {} data points using visual inspection of data'.format( len(ndata) - len(z_portal))) # create data groups if len(y_portal) > 0: columns = ['tsec', 'dbar', str(sv)] min_r = int(round(np.nanmin(y_portal) - zcell_size)) max_r = int(round(np.nanmax(y_portal) + zcell_size)) ranges = list(range(min_r, max_r, zcell_size)) groups, d_groups = gt.group_by_depth_range(t_portal, y_portal, z_portal, columns, ranges) if 'scatter' in sv: n_std = None # to use percentile else: n_std = n_std # get percentile analysis for printing on the profile plot y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups( groups, d_groups, n_std, inpercentile) """ Plot all data """ if len(time1) > 0: cf.create_dir(save_dir_profile) cf.create_dir(save_dir_xsection) sname = '-'.join((r, method, sv)) sfileall = '_'.join(('all_data', sname, pd.to_datetime(time1.min()).strftime('%Y%m%d'))) tm0 = pd.to_datetime(time1.min()).strftime('%Y-%m-%dT%H:%M:%S') tm1 = pd.to_datetime(time1.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) + '\n' + tm0 + ' to ' + tm1 if 'SPKIR' in r: title = title + '\nWavelength = 510 nm' ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" clabel = 'Time' fig, ax = pf.plot_profiles(z1, y1, time1, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) fig.tight_layout() pf.save_fig(save_dir_profile, sfileall) ''' xsection plot ''' clabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" fig, ax, bar = pf.plot_xsection(subsite, time1, y1, z1, clabel, ylabel, t_eng=None, m_water_depth=None, inpercentile=None, stdev=None) if fig: ax.set_title(title, fontsize=9) fig.tight_layout() pf.save_fig(save_dir_xsection, sfileall) """ Plot cleaned-up data """ if len(dtime) > 0: if len(y_portal) > 0: sfile = '_'.join(('rm_erroneous_data', sname, pd.to_datetime(t_portal.min()).strftime('%Y%m%d'))) t0 = pd.to_datetime(t_portal.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(t_portal.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1 if 'SPKIR' in r: title = title + '\nWavelength = 510 nm' ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" clabel = 'Time' fig, ax = pf.plot_profiles(z_portal, y_portal, t_portal, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) ax.plot(n_avg, y_avg, '-k') ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2) if inpercentile: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nexcluded {} suspect data points when inspected visually'.format( len(ndata) - len(z_portal)) + '\n(black) data average in {} dbar segments'.format(zcell_size) + '\n(magenta) {} percentile envelope in {} dbar segments'.format( int(100 - inpercentile * 2), zcell_size),) elif n_std: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nexcluded {} suspect data points when inspected visually'.format( len(ndata) - len(z_portal)) + '\n(black) data average in {} dbar segments'.format(zcell_size) + '\n(magenta) +/- {} SD envelope in {} dbar segments'.format( int(n_std), zcell_size),) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_profile, sfile) ''' xsection plot ''' clabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[0] + ")" # plot non-erroneous data fig, ax, bar = pf.plot_xsection(subsite, t_portal, y_portal, z_portal, clabel, ylabel, t_eng=None, m_water_depth=None, inpercentile=None, stdev=None) ax.set_title(title, fontsize=9) leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nexcluded {} suspect data points when inspected visually'.format( len(ndata) - len(z_portal)), ) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_xsection, sfile) ''' 4D plot for gliders only ''' if 'MOAS' in r: if ds_lat is not None and ds_lon is not None: cf.create_dir(save_dir_4d) clabel = sv + " (" + sv_units + ")" zlabel = press[0] + " (" + y_units[0] + ")" fig = plt.figure() ax = fig.add_subplot(111, projection='3d') sct = ax.scatter(lon_portal, lat_portal, y_portal, c=z_portal, s=2) cbar = plt.colorbar(sct, label=clabel, extend='both') cbar.ax.tick_params(labelsize=8) ax.invert_zaxis() ax.view_init(25, 32) ax.invert_xaxis() ax.invert_yaxis() ax.set_zlabel(zlabel, fontsize=9) ax.set_ylabel('Latitude', fontsize=9) ax.set_xlabel('Longitude', fontsize=9) ax.set_title(title, fontsize=9) pf.save_fig(save_dir_4d, sfile)
def main(url_list, sDir, plot_type, start_time, end_time, deployment_num): for i, u in enumerate(url_list): elements = u.split('/')[-2].split('-') r = '-'.join((elements[1], elements[2], elements[3], elements[4])) ms = u.split(r + '-')[1].split('/')[0] subsite = r.split('-')[0] array = subsite[0:2] main_sensor = r.split('-')[-1] datasets = cf.get_nc_urls([u]) datasets_sel = cf.filter_collocated_instruments(main_sensor, datasets) save_dir = os.path.join(sDir, array, subsite, r, plot_type) cf.create_dir(save_dir) sname = '-'.join((r, ms, 'track')) print('Appending....') sh = pd.DataFrame() deployments = [] end_times = [] for ii, d in enumerate(datasets_sel): print('\nDataset {} of {}: {}'.format(ii + 1, len(datasets_sel), d.split('/')[-1])) ds = xr.open_dataset(d, mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})'. format(start_time, end_time)) continue fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( d) if deployment_num is not None: if int(deployment.split('0')[-1]) is not deployment_num: print(type(int(deployment.split('0')[-1])), type(deployment_num)) continue # get end times of deployments ps_df, n_streams = cf.get_preferred_stream_info(r) dr_data = cf.refdes_datareview_json(r) for index, row in ps_df.iterrows(): deploy = row['deployment'] deploy_info = cf.get_deployment_information( dr_data, int(deploy[-4:])) if int(deploy[-4:]) not in deployments: deployments.append(int(deploy[-4:])) if pd.to_datetime(deploy_info['stop_date']) not in end_times: end_times.append(pd.to_datetime(deploy_info['stop_date'])) data = {'lat': ds['lat'].values, 'lon': ds['lon'].values} new_r = pd.DataFrame(data, columns=['lat', 'lon'], index=ds['time'].values) sh = sh.append(new_r) xD = sh.lon.values yD = sh.lat.values tD = sh.index.values clabel = 'Time' ylabel = 'Latitude' xlabel = 'Longitude' fig, ax = pf.plot_profiles(xD, yD, tD, ylabel, xlabel, clabel, end_times, deployments, stdev=None) ax.invert_yaxis() ax.set_title('Glider Track - ' + r + '\n' + 'x: platform location', fontsize=9) ax.set_xlim(-71.75, -69.75) ax.set_ylim(38.75, 40.75) #cbar.ax.set_yticklabels(end_times) # add Pioneer glider sampling area ax.add_patch( Rectangle((-71.5, 39.0), 1.58, 1.67, linewidth=3, edgecolor='b', facecolor='none')) ax.text(-71, 40.6, 'Pioneer Glider Sampling Area', color='blue', fontsize=8) # add Pioneer AUV sampling area # ax.add_patch(Rectangle((-71.17, 39.67), 0.92, 1.0, linewidth=3, edgecolor='m', facecolor='none')) array_loc = cf.return_array_subsites_standard_loc(array) ax.scatter(array_loc.lon, array_loc.lat, s=40, marker='x', color='k', alpha=0.3) #ax.legend(legn, array_loc.index, scatterpoints=1, loc='lower left', ncol=4, fontsize=8) pf.save_fig(save_dir, sname)
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only, n_std, inpercentile, zcell_size, zdbar): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list and 'ENG' not in rd and 'ADCP' not in rd: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] deployments = [] for url in url_list: splitter = url.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) catalog_rms = '-'.join((r, splitter[-2], splitter[-1])) if rd_check == r: udatasets = cf.get_nc_urls([url]) for u in udatasets: # filter out collocated data files if catalog_rms == u.split('/')[-1].split('_20')[0][15:]: datasets.append(u) deployments.append( int(u.split('/')[-1].split('_')[0][-4:])) deployments = np.unique(deployments).tolist() fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join( (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets main_sensor = r.split('-')[-1] fdatasets_sel = cf.filter_collocated_instruments( main_sensor, fdatasets) for dep in deployments: if deployment_num is not None: if dep is not deployment_num: print('\nskipping deployment {}'.format(dep)) continue rdatasets = [ s for s in fdatasets_sel if 'deployment%04d' % dep in s ] rdatasets.sort() if len(rdatasets) > 0: sci_vars_dict = {} # rdatasets = rdatasets[0:2] #### for testing for i in range(len(rdatasets)): ds = xr.open_dataset(rdatasets[i], mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) print('\nAppending data from {}: file {} of {}'.format( 'deployment%04d' % dep, i + 1, len(rdatasets))) array = r[0:2] subsite = r.split('-')[0] if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})' .format(start_time, end_time)) continue stime = start_time.strftime('%Y-%m-%d') etime = end_time.strftime('%Y-%m-%d') ext = stime + 'to' + etime # .join((ds0_method, ds1_method save_dir_profile = os.path.join( sDir, array, subsite, r, 'profile_plots', 'deployment%04d' % dep, ext) save_dir_xsection = os.path.join( sDir, array, subsite, r, 'xsection_plots', 'deployment%04d' % dep, ext) else: save_dir_profile = os.path.join( sDir, array, subsite, r, 'profile_plots', 'deployment%04d' % dep) save_dir_xsection = os.path.join( sDir, array, subsite, r, 'xsection_plots', 'deployment%04d' % dep) if len(sci_vars_dict) == 0: fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( rdatasets[0]) sci_vars = cf.return_science_vars(stream) if 'CTDPF' not in r: sci_vars.append('int_ctd_pressure') sci_vars.append('time') sci_vars = list(np.unique(sci_vars)) # initialize the dictionary for sci_var in sci_vars: if sci_var == 'time': sci_vars_dict.update({ sci_var: dict(values=np.array([], dtype=np.datetime64), units=[], fv=[]) }) else: sci_vars_dict.update({ sci_var: dict(values=np.array([]), units=[], fv=[]) }) # append data for the deployment into the dictionary for s_v in sci_vars_dict.keys(): vv = ds[s_v] try: if vv.units not in sci_vars_dict[s_v]['units']: sci_vars_dict[s_v]['units'].append(vv.units) except AttributeError: print('') try: if vv._FillValue not in sci_vars_dict[s_v]['fv']: sci_vars_dict[s_v]['fv'].append(vv._FillValue) vv_data = vv.values try: vv_data[ vv_data == vv. _FillValue] = np.nan # turn fill values to nans except ValueError: print('') except AttributeError: print('') if len(vv.dims) > 1: print('Skipping plot: variable has >1 dimension') else: sci_vars_dict[s_v]['values'] = np.append( sci_vars_dict[s_v]['values'], vv.values) # plot after appending all data into one file data_start = pd.to_datetime( min(sci_vars_dict['time']['values'])).strftime( '%Y-%m-%dT%H:%M:%S') data_stop = pd.to_datetime(max( sci_vars_dict['time']['values'])).strftime( '%Y-%m-%dT%H:%M:%S') time1 = sci_vars_dict['time']['values'] ds_lat1 = np.empty(np.shape(time1)) ds_lon1 = np.empty(np.shape(time1)) # define pressure variable try: pname = 'seawater_pressure' press = sci_vars_dict[pname] except KeyError: pname = 'int_ctd_pressure' press = sci_vars_dict[pname] y1 = press['values'] try: y_units = press['units'][0] except IndexError: y_units = '' for sv in sci_vars_dict.keys(): print('') print(sv) if sv not in [ 'seawater_pressure', 'int_ctd_pressure', 'time' ]: z1 = sci_vars_dict[sv]['values'] fv = sci_vars_dict[sv]['fv'][0] sv_units = sci_vars_dict[sv]['units'][0] # Check if the array is all NaNs if sum(np.isnan(z1)) == len(z1): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z1[z1 != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # remove unreasonable pressure data (e.g. for surface piercing profilers) if zdbar: po_ind = (0 < y1) & (y1 < zdbar) tm = time1[po_ind] y = y1[po_ind] z = z1[po_ind] ds_lat = ds_lat1[po_ind] ds_lon = ds_lon1[po_ind] else: tm = time1 y = y1 z = z1 ds_lat = ds_lat1 ds_lon = ds_lon1 # reject erroneous data dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \ cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon) # get rid of 0.0 data # if sv == 'salinity': # ind = ndata > 20 # elif sv == 'density': # ind = ndata > 1010 # elif sv == 'conductivity': # ind = ndata > 2 # else: # ind = ndata > 0 # if sv == 'sci_flbbcd_chlor_units': # ind = ndata < 7.5 # elif sv == 'sci_flbbcd_cdom_units': # ind = ndata < 25 # else: # ind = ndata > 0.0 if 'CTD' in r: ind = zpressure > 0.0 else: ind = ndata > 0.0 lenzero = np.sum(~ind) dtime = dtime[ind] zpressure = zpressure[ind] ndata = ndata[ind] if ds_lat is not None and ds_lon is not None: lat = lat[ind] lon = lon[ind] else: lat = None lon = None if len(dtime) > 0: # reject time range from data portal file export t_portal, z_portal, y_portal, lat_portal, lon_portal = \ cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon) print( 'removed {} data points using visual inspection of data' .format(len(ndata) - len(z_portal))) # create data groups # if len(y_portal) > 0: # columns = ['tsec', 'dbar', str(sv)] # min_r = int(round(np.nanmin(y_portal) - zcell_size)) # max_r = int(round(np.nanmax(y_portal) + zcell_size)) # ranges = list(range(min_r, max_r, zcell_size)) # # groups, d_groups = gt.group_by_depth_range(t_portal, y_portal, z_portal, columns, ranges) # # if 'scatter' in sv: # n_std = None # to use percentile # else: # n_std = n_std # # # get percentile analysis for printing on the profile plot # y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups( # groups, d_groups, n_std, inpercentile) """ Plot all data """ if len(time1) > 0: cf.create_dir(save_dir_profile) cf.create_dir(save_dir_xsection) sname = '-'.join((r, method, sv)) # sfileall = '_'.join(('all_data', sname, pd.to_datetime(time1.min()).strftime('%Y%m%d'))) # tm0 = pd.to_datetime(time1.min()).strftime('%Y-%m-%dT%H:%M:%S') # tm1 = pd.to_datetime(time1.max()).strftime('%Y-%m-%dT%H:%M:%S') sfileall = '_'.join( (sname, pd.to_datetime( t_portal.min()).strftime('%Y%m%d'))) tm0 = pd.to_datetime(t_portal.min()).strftime( '%Y-%m-%dT%H:%M:%S') tm1 = pd.to_datetime(t_portal.max()).strftime( '%Y-%m-%dT%H:%M:%S') title = ' '.join( (deployment, refdes, method)) + '\n' + tm0 + ' to ' + tm1 if 'SPKIR' in r: title = title + '\nWavelength = 510 nm' ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = pname + " (" + y_units + ")" clabel = 'Time' # fig, ax = pf.plot_profiles(z1, y1, time1, ylabel, xlabel, clabel, stdev=None) fig, ax = pf.plot_profiles(z_portal, y_portal, t_portal, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) fig.tight_layout() pf.save_fig(save_dir_profile, sfileall) ''' xsection plot ''' clabel = sv + " (" + sv_units + ")" ylabel = pname + " (" + y_units + ")" # fig, ax, bar = pf.plot_xsection(subsite, time1, y1, z1, clabel, ylabel, t_eng=None, # m_water_depth=None, inpercentile=None, stdev=None) fig, ax, bar = pf.plot_xsection( subsite, t_portal, y_portal, z_portal, clabel, ylabel, t_eng=None, m_water_depth=None, inpercentile=None, stdev=None) if fig: ax.set_title(title, fontsize=9) fig.tight_layout() pf.save_fig(save_dir_xsection, sfileall) """
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only, zdbar, n_std, inpercentile, zcell_size): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list and 'ENG' not in rd and 'ADCP' not in rd: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): try: rms = '-'.join((r, row[ii])) except TypeError: continue for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join( (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets main_sensor = r.split('-')[-1] fdatasets_sel = cf.filter_collocated_instruments( main_sensor, fdatasets) for fd in fdatasets_sel: part_d = fd.split('/')[-1] print('\n{}'.format(part_d)) ds = xr.open_dataset(fd, mask_and_scale=False) ds = ds.swap_dims({'obs': 'time'}) fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( fd) array = subsite[0:2] sci_vars = cf.return_science_vars(stream) # if 'CE05MOAS' in r or 'CP05MOAS' in r: # for coastal gliders, get m_water_depth for bathymetry # eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng')) # eng_url = [s for s in url_list if eng in s] # if len(eng_url) == 1: # eng_datasets = cf.get_nc_urls(eng_url) # # filter out collocated datasets # eng_dataset = [j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1])] # if len(eng_dataset) > 0: # ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False) # t_eng = ds_eng['time'].values # m_water_depth = ds_eng['m_water_depth'].values # # # m_altimeter_status = 0 means a good reading (not nan or -1) # try: # eng_ind = ds_eng['m_altimeter_status'].values == 0 # except KeyError: # eng_ind = (~np.isnan(m_water_depth)) & (m_water_depth >= 0) # # m_water_depth = m_water_depth[eng_ind] # t_eng = t_eng[eng_ind] # # # get rid of any remaining nans or fill values # eng_ind2 = (~np.isnan(m_water_depth)) & (m_water_depth >= 0) # m_water_depth = m_water_depth[eng_ind2] # t_eng = t_eng[eng_ind2] # else: # print('No engineering file for deployment {}'.format(deployment)) # m_water_depth = None # t_eng = None # else: # m_water_depth = None # t_eng = None # else: # m_water_depth = None # t_eng = None if deployment_num is not None: if int(int(deployment[-4:])) is not deployment_num: print(type(int(deployment[-4:])), type(deployment_num)) continue if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})'. format(start_time, end_time)) continue stime = start_time.strftime('%Y-%m-%d') etime = end_time.strftime('%Y-%m-%d') ext = stime + 'to' + etime # .join((ds0_method, ds1_method save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment, ext) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment, ext) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment, ext) else: save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment) save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment) save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment) texclude_dir = os.path.join(sDir, array, subsite, refdes, 'time_to_exclude') cf.create_dir(texclude_dir) time1 = ds['time'].values try: ds_lat1 = ds['lat'].values except KeyError: ds_lat1 = None print('No latitude variable in file') try: ds_lon1 = ds['lon'].values except KeyError: ds_lon1 = None print('No longitude variable in file') # get pressure variable pvarname, y1, y_units, press, y_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars( ds) # prepare file to list timestamps with suspect data for each data parameter stat_data = pd.DataFrame( columns=['deployments', 'time_to_exclude']) file_exclude = '{}/{}_{}_{}_excluded_timestamps.csv'.format( texclude_dir, deployment, refdes, method) stat_data.to_csv(file_exclude, index=True) # loop through sensor-data parameters for sv in sci_vars: print(sv) if 'pressure' not in sv: z1 = ds[sv].values fv = ds[sv]._FillValue sv_units = ds[sv].units # Check if the array is all NaNs if sum(np.isnan(z1)) == len(z1): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z1[z1 != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # remove unreasonable pressure data (e.g. for surface piercing profilers) if zdbar: po_ind = (0 < y1) & (y1 < zdbar) n_zdbar = np.sum(~po_ind) tm = time1[po_ind] y = y1[po_ind] z = z1[po_ind] ds_lat = ds_lat1[po_ind] ds_lon = ds_lon1[po_ind] print('{} in water depth > {} dbar'.format( n_zdbar, zdbar)) else: tm = time1 y = y1 z = z1 ds_lat = ds_lat1 ds_lon = ds_lon1 # reject erroneous data dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \ cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon) # get rid of 0.0 data if sv == 'salinity': ind = ndata > 30 elif sv == 'density': ind = ndata > 1022.5 elif sv == 'conductivity': ind = ndata > 3.45 else: ind = ndata > 0 # if sv == 'sci_flbbcd_chlor_units': # ind = ndata < 7.5 # elif sv == 'sci_flbbcd_cdom_units': # ind = ndata < 25 # else: # ind = ndata > 0.0 # if 'CTD' in r: # ind = zpressure > 0.0 # else: # ind = ndata > 0.0 lenzero = np.sum(~ind) dtime = dtime[ind] zpressure = zpressure[ind] ndata = ndata[ind] if ds_lat is not None and ds_lon is not None: lat = lat[ind] lon = lon[ind] else: lat = None lon = None if len(dtime) > 0: # reject time range from data portal file export t_portal, z_portal, y_portal, lat_portal, lon_portal = \ cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon) print( 'removed {} data points using visual inspection of data' .format(len(ndata) - len(z_portal))) # create data groups if len(y_portal) > 0: columns = ['tsec', 'dbar', str(sv)] min_r = int(round(min(y_portal) - zcell_size)) max_r = int(round(max(y_portal) + zcell_size)) ranges = list(range(min_r, max_r, zcell_size)) groups, d_groups = gt.group_by_depth_range( t_portal, y_portal, z_portal, columns, ranges) if 'scatter' in sv: n_std = None # to use percentile else: n_std = n_std # identifying timestamps from percentile analysis y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups( groups, d_groups, n_std, inpercentile) """ writing timestamps to .csv file to use with data_range.py script """ if len(time_ex) != 0: t_exclude = time_ex[0] for i in range( len(time_ex))[1:len(time_ex)]: t_exclude = '{}, {}'.format( t_exclude, time_ex[i]) stat_data = pd.DataFrame( { 'deployments': deployment, 'time_to_exclude': t_exclude }, index=[sv]) stat_data.to_csv(file_exclude, index=True, mode='a', header=False) # rejecting timestamps from percentile analysis if len(time_ex) > 0: t_nospct, z_nospct, y_nospct = cf.reject_suspect_data( t_portal, y_portal, z_portal, time_ex) else: t_nospct = t_portal z_nospct = z_portal y_nospct = y_portal """ Plot data """ if len(t_nospct) > 0: if len(t_nospct) != len(dtime): cf.create_dir(save_dir_profile) cf.create_dir(save_dir_xsection) sname = '-'.join((r, method, sv)) sfile = '_'.join( ('rm_suspect_data', sname, pd.to_datetime( t_nospct.min()).strftime( '%Y%m%d'))) t0 = pd.to_datetime( t_nospct.min()).strftime( '%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime( t_nospct.max()).strftime( '%Y-%m-%dT%H:%M:%S') title = ' '.join( (deployment, refdes, method)) + '\n' + t0 + ' to ' + t1 if zdbar: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges ' '[{} - {}], {} unreasonable values' .format( lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nremoved {} in the upper and lower {} percentile of data grouped in {} ' 'dbar segments'.format( len(z_portal) - len(z_nospct), inpercentile, zcell_size) + '\nexcluded {} suspect data points when inspected visually' .format( len(ndata) - len(z_portal)) + '\nexcluded {} suspect data in water depth greater than {} dbar' .format(n_zdbar, zdbar), ) elif n_std: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} unreasonable values'. format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nremoved {} data points +/- {} SD of data grouped in {} dbar segments' .format( len(z_portal) - len(z_nospct), n_std, zcell_size) + '\nexcluded {} suspect data points when inspected visually' .format( len(ndata) - len(z_portal)), ) else: leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], ' '{} unreasonable values'. format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) + '\nremoved {} in the upper and lower {} percentile of data grouped in {} dbar segments' .format( len(z_portal) - len(z_nospct), inpercentile, zcell_size) + '\nexcluded {} suspect data points when inspected visually' .format( len(ndata) - len(z_portal)), ) ''' profile plot ''' xlabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[ 0] + ")" clabel = 'Time' # plot non-erroneous data print('plotting profile') fig, ax = pf.plot_profiles(z_nospct, y_nospct, t_nospct, ylabel, xlabel, clabel, stdev=None) ax.set_title(title, fontsize=9) ax.plot(n_avg, y_avg, '-k') #ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_profile, sfile) ''' xsection plot ''' print('plotting xsection') clabel = sv + " (" + sv_units + ")" ylabel = press[0] + " (" + y_units[ 0] + ")" # plot bathymetry only within data time ranges # if t_eng is not None: # eng_ind = (t_eng >= np.nanmin(t_array)) & (t_eng <= np.nanmax(t_array)) # t_eng = t_eng[eng_ind] # m_water_depth = m_water_depth[eng_ind] # plot non-erroneous data fig, ax, bar = pf.plot_xsection( subsite, t_nospct, y_nospct, z_nospct, clabel, ylabel, t_eng=None, m_water_depth=None, inpercentile=inpercentile, stdev=None) ax.set_title(title, fontsize=9) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() pf.save_fig(save_dir_xsection, sfile)
def main(url_list, sDir, plot_type): """"" URL : path to instrument data by methods sDir : path to the directory on your machine to save files plot_type: folder name for a plot type """ "" rd_list = [] ms_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) ms = uu.split(rd + '-')[1].split('/')[0] if rd not in rd_list: rd_list.append(rd) if ms not in ms_list: ms_list.append(ms) ''' separate different instruments ''' for r in rd_list: print('\n{}'.format(r)) subsite = r.split('-')[0] array = subsite[0:2] main_sensor = r.split('-')[-1] ps_df, n_streams = cf.get_preferred_stream_info(r) # read in the analysis file dr_data = cf.refdes_datareview_json(r) # get end times of deployments deployments = [] end_times = [] for index, row in ps_df.iterrows(): deploy = row['deployment'] deploy_info = get_deployment_information(dr_data, int(deploy[-4:])) deployments.append(int(deploy[-4:])) end_times.append(pd.to_datetime(deploy_info['stop_date'])) # get the list of data files and filter out collocated instruments and other streams chat datasets = [] for u in url_list: print(u) splitter = u.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = cf.filter_collocated_instruments(main_sensor, datasets) fdatasets = cf.filter_other_streams(r, ms_list, fdatasets) ''' separate the data files by methods ''' for ms in ms_list: # np.unique(methodstream) fdatasets_sel = [x for x in fdatasets if ms in x] # create a folder to save figures save_dir = os.path.join(sDir, array, subsite, r, plot_type, ms.split('-')[0]) cf.create_dir(save_dir) # create a dictionary for science variables from analysis file stream_sci_vars_dict = dict() for x in dr_data['instrument']['data_streams']: dr_ms = '-'.join((x['method'], x['stream_name'])) if ms == dr_ms: stream_sci_vars_dict[dr_ms] = dict(vars=dict()) sci_vars = dict() for y in x['stream']['parameters']: if y['data_product_type'] == 'Science Data': sci_vars.update( {y['name']: dict(db_units=y['unit'])}) if len(sci_vars) > 0: stream_sci_vars_dict[dr_ms]['vars'] = sci_vars # initialize an empty data array for science variables in dictionary sci_vars_dict = cd.initialize_empty_arrays(stream_sci_vars_dict, ms) y_unit = [] y_name = [] for fd in fdatasets_sel: ds = xr.open_dataset(fd, mask_and_scale=False) print('\nAppending data file: {}'.format(fd.split('/')[-1])) for var in list(sci_vars_dict[ms]['vars'].keys()): sh = sci_vars_dict[ms]['vars'][var] if ds[var].units == sh['db_units']: if ds[var]._FillValue not in sh['fv']: sh['fv'].append(ds[var]._FillValue) if ds[var].units not in sh['units']: sh['units'].append(ds[var].units) # time t = ds['time'].values t0 = pd.to_datetime( t.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime( t.max()).strftime('%Y-%m-%dT%H:%M:%S') # sci variable z = ds[var].values sh['t'] = np.append(sh['t'], t) sh['values'] = np.append(sh['values'], z) # add pressure to dictionary of sci vars if 'MOAS' in subsite: if 'CTD' in main_sensor: # for glider CTDs, pressure is a coordinate pressure = 'sci_water_pressure_dbar' y = ds[pressure].values if ds[pressure].units not in y_unit: y_unit.append(ds[pressure].units) if ds[pressure].long_name not in y_name: y_name.append(ds[pressure].long_name) else: pressure = 'int_ctd_pressure' y = ds[pressure].values if ds[pressure].units not in y_unit: y_unit.append(ds[pressure].units) if ds[pressure].long_name not in y_name: y_name.append(ds[pressure].long_name) else: pressure = pf.pressure_var(ds, ds.data_vars.keys()) y = ds[pressure].values if ds[pressure].units not in y_unit: y_unit.append(ds[pressure].units) if ds[pressure].long_name not in y_name: y_name.append(ds[pressure].long_name) sh['pressure'] = np.append(sh['pressure'], y) if len(y_unit) != 1: print('pressure unit varies!') else: y_unit = y_unit[0] if len(y_name) != 1: print('pressure long name varies!') else: y_name = y_name[0] for m, n in sci_vars_dict.items(): for sv, vinfo in n['vars'].items(): print('\nWorking on variable: {}'.format(sv)) if len(vinfo['t']) < 1: print('no variable data to plot') else: sv_units = vinfo['units'][0] fv = vinfo['fv'][0] t0 = pd.to_datetime(min( vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(max( vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S') t = vinfo['t'] x = vinfo['values'] y = vinfo['pressure'] # Check if the array is all NaNs if sum(np.isnan(x)) == len(x): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(x[x != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # reject fill values fv_ind = x != fv y_nofv = y[fv_ind] t_nofv = t[fv_ind] c_nofv = cm.rainbow(np.linspace(0, 1, len(t[fv_ind]))) x_nofv = x[fv_ind] print(len(x) - len(fv_ind), ' fill values') # reject NaNs nan_ind = ~np.isnan(x) t_nofv_nonan = t_nofv[nan_ind] c_nofv_nonan = c_nofv[nan_ind] y_nofv_nonan = y_nofv[nan_ind] x_nofv_nonan = x_nofv[nan_ind] print(len(x) - len(nan_ind), ' NaNs') # reject extreme values ev_ind = cf.reject_extreme_values(x_nofv_nonan) t_nofv_nonan_noev = t_nofv_nonan[ev_ind] c_nofv_nonan_noev = c_nofv_nonan[ev_ind] y_nofv_nonan_noev = y_nofv_nonan[ev_ind] x_nofv_nonan_noev = x_nofv_nonan[ev_ind] print(len(z) - len(ev_ind), ' Extreme Values', '|1e7|') # reject values outside global ranges: global_min, global_max = cf.get_global_ranges(r, sv) # platform not in qc-table (parad_k_par) # global_min = 0 # global_max = 2500 print('global ranges for : {}-{} {} - {}'.format( r, sv, global_min, global_max)) if isinstance(global_min, (int, float)) and isinstance( global_max, (int, float)): gr_ind = cf.reject_global_ranges( x_nofv_nonan_noev, global_min, global_max) t_nofv_nonan_noev_nogr = t_nofv_nonan_noev[gr_ind] y_nofv_nonan_noev_nogr = y_nofv_nonan_noev[gr_ind] x_nofv_nonan_noev_nogr = x_nofv_nonan_noev[gr_ind] else: t_nofv_nonan_noev_nogr = t_nofv_nonan_noev y_nofv_nonan_noev_nogr = y_nofv_nonan_noev x_nofv_nonan_noev_nogr = x_nofv_nonan_noev if len(x_nofv_nonan_noev) > 0: if m == 'common_stream_placeholder': sname = '-'.join((r, sv)) else: sname = '-'.join((r, m, sv)) if sv != 'pressure': columns = ['tsec', 'dbar', str(sv)] bin_size = 10 min_r = int(round(min(y_nofv_nonan_noev) - bin_size)) max_r = int(round(max(y_nofv_nonan_noev) + bin_size)) ranges = list(range(min_r, max_r, bin_size)) groups, d_groups = gt.group_by_depth_range( t_nofv_nonan_noev_nogr, y_nofv_nonan_noev_nogr, x_nofv_nonan_noev_nogr, columns, ranges) y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr = [], [], [], [], [], [], [] tm = 1 for ii in range(len(groups)): nan_ind = d_groups[ii + tm].notnull() xtime = d_groups[ii + tm][nan_ind] colors = cm.rainbow(np.linspace(0, 1, len(xtime))) ypres = d_groups[ii + tm + 1][nan_ind] nval = d_groups[ii + tm + 2][nan_ind] tm += 2 l_arr.append(len( nval)) # count of data to filter out small groups y_avg.append(ypres.mean()) n_avg.append(nval.mean()) n_min.append(nval.min()) n_max.append(nval.max()) n_std = 3 n0_std.append(nval.mean() + n_std * nval.std()) n1_std.append(nval.mean() - n_std * nval.std()) # Plot all data ylabel = y_name + " (" + y_unit + ")" xlabel = sv + " (" + sv_units + ")" clabel = 'Time' fig, ax = pf.plot_profiles(x_nofv_nonan_noev_nogr, y_nofv_nonan_noev_nogr, t_nofv_nonan_noev_nogr, ylabel, xlabel, clabel, end_times, deployments, stdev=None) title_text = ' '.join((r, ms.split('-')[-1])) + '\n' \ + t0 + ' - ' + t1 + '\n' + str(bin_size) +\ ' m average and ' + str(n_std) + ' std shown' ax.set_title(title_text, fontsize=9) ax.plot(n_avg, y_avg, '-k') ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2) pf.save_fig(save_dir, sname) # Plot data with outliers removed fig, ax = pf.plot_profiles(x_nofv_nonan_noev_nogr, y_nofv_nonan_noev_nogr, t_nofv_nonan_noev_nogr, ylabel, xlabel, clabel, end_times, deployments, stdev=5) ax.set_title(' '.join((r, ms.split('-')[-1])) + '\n' \ + t0 + ' - ' + t1, fontsize=9) sfile = '_'.join((sname, 'rmoutliers')) pf.save_fig(save_dir, sfile)
def main(url_list, sDir, plot_type, deployment_num, start_time, end_time, method_num, zdbar, n_std, inpercentile, zcell_size): for i, u in enumerate(url_list): print('\nUrl {} of {}: {}'.format(i + 1, len(url_list), u)) elements = u.split('/')[-2].split('-') r = '-'.join((elements[1], elements[2], elements[3], elements[4])) ms = u.split(r + '-')[1].split('/')[0] subsite = r.split('-')[0] array = subsite[0:2] main_sensor = r.split('-')[-1] # read URL to get data datasets = cf.get_nc_urls([u]) datasets_sel = cf.filter_collocated_instruments(main_sensor, datasets) # get sci data review list dr_data = cf.refdes_datareview_json(r) ps_df, n_streams = cf.get_preferred_stream_info(r) # get end times of deployments deployments = [] end_times = [] for index, row in ps_df.iterrows(): deploy = row['deployment'] deploy_info = cf.get_deployment_information(dr_data, int(deploy[-4:])) deployments.append(int(deploy[-4:])) end_times.append(pd.to_datetime(deploy_info['stop_date'])) # create a dictionary for science variables from analysis file stream_sci_vars_dict = dict() for x in dr_data['instrument']['data_streams']: dr_ms = '-'.join((x['method'], x['stream_name'])) if ms == dr_ms: stream_sci_vars_dict[dr_ms] = dict(vars=dict()) sci_vars = dict() for y in x['stream']['parameters']: if y['data_product_type'] == 'Science Data': sci_vars.update({y['name']: dict(db_units=y['unit'])}) if len(sci_vars) > 0: stream_sci_vars_dict[dr_ms]['vars'] = sci_vars for ii, d in enumerate(datasets_sel): part_d = d.split('/')[-1] print('\nDataset {} of {}: {}'.format(ii + 1, len(datasets_sel), part_d)) with xr.open_dataset(d, mask_and_scale=False) as ds: ds = ds.swap_dims({'obs': 'time'}) fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(d) if method_num is not None: if method != method_num: print(method_num, method) continue if deployment_num is not None: if int(deployment.split('0')[-1]) is not deployment_num: print(type(int(deployment.split('0')[-1])), type(deployment_num)) continue if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time)) continue stime = start_time.strftime('%Y-%m-%d') etime = end_time.strftime('%Y-%m-%d') ext = stime + 'to' + etime # .join((ds0_method, ds1_method save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, ms.split('-')[0], deployment, ext) else: save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, ms.split('-')[0], deployment) cf.create_dir(save_dir) texclude_dir = os.path.join(sDir, array, subsite, refdes, 'time_to_exclude') cf.create_dir(texclude_dir) # initialize an empty data array for science variables in dictionary sci_vars_dict = cd.initialize_empty_arrays(stream_sci_vars_dict, ms) for var in list(sci_vars_dict[ms]['vars'].keys()): sh = sci_vars_dict[ms]['vars'][var] if ds[var].units == sh['db_units']: if ds[var]._FillValue not in sh['fv']: sh['fv'].append(ds[var]._FillValue) if ds[var].units not in sh['units']: sh['units'].append(ds[var].units) sh['t'] = np.append(sh['t'], ds['time'].values) # t = ds['time'].values sh['values'] = np.append(sh['values'], ds[var].values) # z = ds[var].values y, y_unit, y_name = cf.add_pressure_to_dictionary_of_sci_vars(ds) sh['pressure'] = np.append(sh['pressure'], y) stat_data = pd.DataFrame(columns=['deployments', 'time_to_exclude']) file_exclude = '{}/{}_{}_{}_excluded_timestamps.csv'.format(texclude_dir, deployment, refdes, method) stat_data.to_csv(file_exclude, index=True) for m, n in sci_vars_dict.items(): for sv, vinfo in n['vars'].items(): print(sv) if len(vinfo['t']) < 1: print('no variable data to plot') else: sv_units = vinfo['units'][0] fv = vinfo['fv'][0] t0 = pd.to_datetime(min(vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(max(vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S') colors = cm.rainbow(np.linspace(0, 1, len(vinfo['t']))) t = vinfo['t'] z = vinfo['values'] y = vinfo['pressure'] # Check if the array is all NaNs if sum(np.isnan(z)) == len(z): print('Array of all NaNs - skipping plot.') continue # Check if the array is all fill values elif len(z[z != fv]) == 0: print('Array of all fill values - skipping plot.') continue else: # reject erroneous data dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max = \ cf.reject_erroneous_data(r, sv, t, y, z, fv) # create data groups columns = ['tsec', 'dbar', str(sv)] min_r = int(round(min(zpressure) - zcell_size)) max_r = int(round(max(zpressure) + zcell_size)) ranges = list(range(min_r, max_r, zcell_size)) groups, d_groups = gt.group_by_depth_range(dtime, zpressure, ndata, columns, ranges) # ... excluding timestamps if 'scatter' in sv: n_std = None #to use percentile else: n_std = n_std # rejecting timestamps from percentile analysis y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex, \ t_nospct, z_nospct, y_nospct = cf.reject_timestamps_in_groups(groups, d_groups, n_std, dtime, zpressure, ndata, inpercentile) print('{} using {} percentile of data grouped in {} dbar segments'.format( len(zpressure) - len(z_nospct), inpercentile, zcell_size)) """ writing timestamps to .csv file to use with data_range.py script """ if len(time_ex) != 0: t_exclude = time_ex[0] for i in range(len(time_ex))[1:len(time_ex)]: t_exclude = '{}, {}'.format(t_exclude, time_ex[i]) stat_data = pd.DataFrame({'deployments': deployment, 'time_to_exclude': t_exclude}, index=[sv]) stat_data.to_csv(file_exclude, index=True, mode='a', header=False) # reject time range from data portal file export t_portal, z_portal, y_portal = cf.reject_timestamps_dataportal(subsite, r, t_nospct, z_nospct, y_nospct) print('{} using visual inspection of data'.format(len(z_nospct) - len(z_portal), inpercentile, zcell_size)) # reject data in a depth range if zdbar is not None: y_ind = y_portal < zdbar t_array = t_portal[y_ind] y_array = y_portal[y_ind] z_array = z_portal[y_ind] else: y_ind = [] t_array = t_portal y_array = y_portal z_array = z_portal print('{} in water depth > {} dbar'.format(len(y_ind), zdbar)) """ Plot data """ if len(t_array) > 0: if m == 'common_stream_placeholder': sname = '-'.join((sv, r)) else: sname = '-'.join((sv, r, m)) xlabel = sv + " (" + sv_units + ")" ylabel = y_name[0] + " (" + y_unit[0] + ")" clabel = 'Time' title = ' '.join((deployment, r, m)) # plot non-erroneous data fig, ax = pf.plot_profiles(ndata, zpressure, dtime, ylabel, xlabel, clabel, end_times, deployments, stdev=None) ax.set_title(title, fontsize=9) ax.plot(n_avg, y_avg, '-k') ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2) leg_text = ( 'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}]'.format( len(z) - lenfv, len(z) - lennan, len(z) - lenev, lengr, global_min, global_max) + '\n' + ('(black) data average in {} dbar segments'.format(zcell_size)) + '\n' + ('(magenta) upper and lower {} percentile envelope in {} dbar segments'.format( inpercentile, zcell_size)),) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() sfile = '_'.join(('rm_erroneous_data', sname)) pf.save_fig(save_dir, sfile) # plot excluding time ranges for suspect data if len(z_nospct) != len(zpressure): fig, ax = pf.plot_profiles(z_nospct, y_nospct, t_nospct, ylabel, xlabel, clabel, end_times, deployments, stdev=None) ax.set_title(title, fontsize=9) leg_text = ( 'removed {} in the upper and lower {} percentile of data grouped in {} dbar segments'.format( len(zpressure) - len(z_nospct), inpercentile, zcell_size),) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() sfile = '_'.join(('rm_suspect_data', sname)) pf.save_fig(save_dir, sfile) # plot excluding time ranges from data portal export if len(z_nospct) - len(z_portal): fig, ax = pf.plot_profiles(z_portal, y_portal, t_portal, ylabel, xlabel, clabel, end_times, deployments, stdev=None) ax.set_title(title, fontsize=9) leg_text = ('excluded {} suspect data when inspected visually'.format( len(z_nospct) - len(z_portal)),) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() sfile = '_'.join(('rm_v_suspect_data', sname)) pf.save_fig(save_dir, sfile) # Plot excluding a selected depth value if len(z_array) != len(z_array): fig, ax = pf.plot_profiles(z_array, y_array, t_array, ylabel, xlabel, clabel, end_times, deployments, stdev=None) ax.set_title(title, fontsize=9) leg_text = ('excluded {} suspect data in water depth greater than {} dbar'.format(len(y_ind), zdbar),) ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6) fig.tight_layout() sfile = '_'.join(('rm_depth_range', sname)) pf.save_fig(save_dir, sfile)
def main(sDir, f, start_time, end_time): ff = pd.read_csv(os.path.join(sDir, f)) url_list = ff['outputUrl'].tolist() for i, u in enumerate(url_list): print('\nUrl {} of {}: {}'.format(i + 1, len(url_list), u)) main_sensor = u.split('/')[-2].split('-')[4] datasets = cf.get_nc_urls([u]) datasets_sel = cf.filter_collocated_instruments(main_sensor, datasets) for ii, d in enumerate(datasets_sel): print('\nDataset {} of {}: {}'.format(ii + 1, len(datasets_sel), d)) with xr.open_dataset(d, mask_and_scale=False) as ds: ds = ds.swap_dims({'obs': 'time'}) if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})' .format(start_time, end_time)) continue fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( d) vars = ds.data_vars.keys() if 'MOAS' in subsite and 'CTD' in main_sensor: # for glider CTDs, pressure is a coordinate pressure = 'sci_water_pressure_dbar' else: pressure = pf.pressure_var(ds, vars) raw_vars = cf.return_raw_vars(vars) raw_vars = [s for s in raw_vars if s not in [pressure] ] # remove pressure from sci_vars save_dir = os.path.join(sDir, subsite, refdes, 'profile_plots', deployment) cf.create_dir(save_dir) t = ds['time'].values t0 = pd.to_datetime(t.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(t.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) colors = cm.rainbow(np.linspace(0, 1, len(t))) y = ds[pressure] print('Plotting variables...') for var in raw_vars: print(var) x = ds[var] # Plot all data xlabel = var + " (" + x.units + ")" ylabel = pressure + " (" + y.units + ")" fig, ax = pf.plot_profiles(x, y, colors, ylabel, xlabel, stdev=None) ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) sfile = '_'.join((fname[0:-46], x.name)) pf.save_fig(save_dir, sfile) # Plot data with outliers removed fig, ax = pf.plot_profiles(x, y, colors, ylabel, xlabel, stdev=5) ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) sfile = '_'.join((fname[0:-46], x.name, 'rmoutliers')) pf.save_fig(save_dir, sfile)
def main(url_list, sDir, plot_type, deployment_num, start_time, end_time): """"" URL : path to instrument data by methods sDir : path to the directory on your machine to save files plot_type: folder name for a plot type """ "" rd_list = [] ms_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) ms = uu.split(rd + '-')[1].split('/')[0] if rd not in rd_list: rd_list.append(rd) if ms not in ms_list: ms_list.append(ms) ''' separate different instruments ''' for r in rd_list: print('\n{}'.format(r)) subsite = r.split('-')[0] array = subsite[0:2] main_sensor = r.split('-')[-1] ps_df, n_streams = cf.get_preferred_stream_info(r) # read in the analysis file dr_data = cf.refdes_datareview_json(r) # get end times of deployments deployments = [] end_times = [] for index, row in ps_df.iterrows(): deploy = row['deployment'] deploy_info = get_deployment_information(dr_data, int(deploy[-4:])) deployments.append(int(deploy[-4:])) end_times.append(pd.to_datetime(deploy_info['stop_date'])) # get the list of data files and filter out collocated instruments and other streams chat datasets = [] for u in url_list: print(u) splitter = u.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = cf.filter_collocated_instruments(main_sensor, datasets) fdatasets = cf.filter_other_streams(r, ms_list, fdatasets) ''' separate the data files by methods ''' for ms in ms_list: fdatasets_sel = [x for x in fdatasets if ms in x] # create a dictionary for science variables from analysis file stream_sci_vars_dict = dict() for x in dr_data['instrument']['data_streams']: dr_ms = '-'.join((x['method'], x['stream_name'])) if ms == dr_ms: stream_sci_vars_dict[dr_ms] = dict(vars=dict()) sci_vars = dict() for y in x['stream']['parameters']: if y['data_product_type'] == 'Science Data': sci_vars.update( {y['name']: dict(db_units=y['unit'])}) if len(sci_vars) > 0: stream_sci_vars_dict[dr_ms]['vars'] = sci_vars # initialize an empty data array for science variables in dictionary sci_vars_dict = cd.initialize_empty_arrays(stream_sci_vars_dict, ms) print('\nAppending data from files: {}'.format(ms)) y_unit = [] y_name = [] for fd in fdatasets_sel: ds = xr.open_dataset(fd, mask_and_scale=False) print(fd) if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})' .format(start_time, end_time)) continue fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( fd) if deployment_num is not None: if int(deployment.split('0')[-1]) is not deployment_num: print(type(int(deployment.split('0')[-1])), type(deployment_num)) continue save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, ms.split('-')[0], deployment) cf.create_dir(save_dir) for var in list(sci_vars_dict[ms]['vars'].keys()): sh = sci_vars_dict[ms]['vars'][var] if ds[var].units == sh['db_units']: if ds[var]._FillValue not in sh['fv']: sh['fv'].append(ds[var]._FillValue) if ds[var].units not in sh['units']: sh['units'].append(ds[var].units) # time t = ds['time'].values t0 = pd.to_datetime( t.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime( t.max()).strftime('%Y-%m-%dT%H:%M:%S') # sci variable z = ds[var].values sh['t'] = np.append(sh['t'], t) sh['values'] = np.append(sh['values'], z) # add pressure to dictionary of sci vars if 'MOAS' in subsite: if 'CTD' in main_sensor: # for glider CTDs, pressure is a coordinate pressure = 'sci_water_pressure_dbar' y = ds[pressure].values if ds[pressure].units not in y_unit: y_unit.append(ds[pressure].units) if ds[pressure].long_name not in y_name: y_name.append(ds[pressure].long_name) else: pressure = 'int_ctd_pressure' y = ds[pressure].values if ds[pressure].units not in y_unit: y_unit.append(ds[pressure].units) if ds[pressure].long_name not in y_name: y_name.append(ds[pressure].long_name) else: pressure = pf.pressure_var(ds, ds.data_vars.keys()) y = ds[pressure].values if ds[pressure].units not in y_unit: y_unit.append(ds[pressure].units) if ds[pressure].long_name not in y_name: y_name.append(ds[pressure].long_name) sh['pressure'] = np.append(sh['pressure'], y) if len(y_unit) != 1: print('pressure unit varies UHHHHHHHHH') else: y_unit = y_unit[0] if len(y_name) != 1: print('pressure long name varies UHHHHHHHHH') else: y_name = y_name[0] for m, n in sci_vars_dict.items(): for sv, vinfo in n['vars'].items(): print(sv) if len(vinfo['t']) < 1: print('no variable data to plot') else: sv_units = vinfo['units'][0] fv = vinfo['fv'][0] t0 = pd.to_datetime(min( vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(max( vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S') t = vinfo['t'] z = vinfo['values'] y = vinfo['pressure'] title = ' '.join((r, ms.split('-')[1])) # Check if the array is all NaNs if sum(np.isnan(z)) == len(z): print('Array of all NaNs - skipping plot.') # Check if the array is all fill values elif len(z[z != fv]) == 0: print('Array of all fill values - skipping plot.') else: # reject fill values fv_ind = z != fv y_nofv = y[fv_ind] t_nofv = t[fv_ind] z_nofv = z[fv_ind] print(len(z) - len(fv_ind), ' fill values') # reject NaNs nan_ind = ~np.isnan(z) t_nofv_nonan = t_nofv[nan_ind] y_nofv_nonan = y_nofv[nan_ind] z_nofv_nonan = z_nofv[nan_ind] print(len(z) - len(nan_ind), ' NaNs') # reject extreme values ev_ind = cf.reject_extreme_values(z_nofv_nonan) t_nofv_nonan_noev = t_nofv_nonan[ev_ind] colors = cm.rainbow( np.linspace(0, 1, len(t_nofv_nonan_noev))) y_nofv_nonan_noev = y_nofv_nonan[ev_ind] z_nofv_nonan_noev = z_nofv_nonan[ev_ind] print( len(z) - len(ev_ind), ' Extreme Values', '|1e7|') if len(y_nofv_nonan_noev) > 0: if m == 'common_stream_placeholder': sname = '-'.join((r, sv)) else: sname = '-'.join((r, m, sv)) # Plot all data ylabel = y_name + " (" + y_unit + ")" xlabel = sv + " (" + sv_units + ")" clabel = 'Time' clabel = sv + " (" + sv_units + ")" fig, ax = pf.plot_profiles(z_nofv_nonan_noev, y_nofv_nonan_noev, colors, xlabel, ylabel, stdev=None) ax.set_title(( title + '\n' + str(deployment_num) + ': ' + t0 + ' - ' + t1 + '\n' + 'used bin = 2 dbar to calculate an average profile (black line) and 3-STD envelope (shaded area)' ), fontsize=9) # group by depth range columns = ['time', 'pressure', str(sv)] # ranges = [0, 50, 100, 200, 400, 600] ranges = list( range(int(round(min(y_nofv_nonan_noev))), int(round(max(y_nofv_nonan_noev))), 1)) groups, d_groups = gt.group_by_depth_range( t_nofv_nonan_noev, y_nofv_nonan_noev, z_nofv_nonan_noev, columns, ranges) # describe_file = '_'.join((sname, 'statistics.csv')) # # groups.describe().to_csv(save_dir + '/' + describe_file) ind = groups.describe()[sv]['mean'].notnull() groups.describe()[sv][ind].to_csv( '{}/{}_statistics.csv'.format(save_dir, sname), index=True) tm = 1 fig, ax = pyplot.subplots(nrows=2, ncols=1) pyplot.margins(y=.08, x=.02) pyplot.grid() y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr = [], [], [], [], [], [], [] for ii in range(len(groups)): nan_ind = d_groups[ii + tm].notnull() xtime = d_groups[ii + tm][nan_ind] colors = cm.rainbow(np.linspace(0, 1, len(xtime))) ypres = d_groups[ii + tm + 1][nan_ind] nval = d_groups[ii + tm + 2][nan_ind] tm += 2 # fig, ax = pf.plot_xsection(subsite, xtime, ypres, nval, clabel, ylabel, stdev=None) # ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) # pf.plot_profiles(nval, ypres, colors, ylabel, clabel, stdev=None) # ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) ind2 = cf.reject_outliers(nval, 5) xD = nval[ind2] yD = ypres[ind2] nZ = colors[ind2] outliers = str(len(nval) - len(xD)) leg_text = ('removed {} outliers (SD={})'.format( outliers, stdev), ) ax.scatter(xD, yD, c=nZ, s=2, edgecolor='None') ax.invert_yaxis() ax.set_xlabel(clabel, fontsize=9) ax.set_ylabel(ylabel, fontsize=9) ax.legend(leg_text, loc='best', fontsize=6) ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) l_arr.append( len(nval) ) # count of data to filter out small groups y_avg.append(ypres.mean()) n_avg.append(nval.mean()) n_min.append(nval.min()) n_max.append(nval.max()) n0_std.append(nval.mean() + 3 * nval.std()) n1_std.append(nval.mean() - 3 * nval.std()) ax.plot(n_avg, y_avg, '-k') # ax.plot(n_min, y_avg, '-b') # ax.plot(n_max, y_avg, '-b') ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2) sfile = '_'.join((sname, 'statistics')) pf.save_fig(save_dir, sfile)