def depth_glider_cross_section(x, y, z, s=5, title=None, stdev=3, interactive=False): fig, ax = plt.subplots() plt.grid() # remove measurements at the surface ind = y['data'] > 5 # meters (dbar) to exclude y['data'] = y['data'][ind] x['data'] = x['data'][ind] z['data'] = z['data'][ind] # remove stdev of z values ind = reject_outliers(z['data'], stdev) y['data'] = y['data'][ind] x['data'] = x['data'][ind] z['data'] = z['data'][ind] ax.invert_yaxis() sc = plt.scatter(x['data'], y['data'], s=s, c=z['data'], edgecolors='face', picker=interactive) # vmin=) # add colorbar cb = fig.colorbar(sc, ax=ax, label=z['info']['label'] + " (" + z['info']['units'] + ")") cb.formatter.set_useOffset(False) cb.update_ticks() ax.set_title(title) format_axes(ax, x_data=x['data']) set_labels(ax, x['info'], y['info']) return fig, ax
def plot_ctdmo(data_dict, var, stdev=None): colors10 = [ 'red', 'firebrick', 'orange', 'mediumseagreen', 'blue', 'darkgreen', 'purple', 'indigo', 'slategray', 'black' ] colors16 = [ 'red', 'firebrick', 'orange', 'gold', 'mediumseagreen', 'darkcyan', 'blue', 'darkgreen', 'purple', 'lightgray', 'slategray', 'black', 'coral', 'gold', 'limegreen', 'midnightblue' ] fig, ax1 = plt.subplots() sensor_list = [] median_list = [] for i, (key, value) in enumerate(data_dict.items()): if len(data_dict) < 11: colors = colors10 else: colors = colors16 t = value['time'] y = value['yD'] if stdev != None: ind = cf.reject_outliers(value['yD'], stdev) t = t[ind] y = y[ind] refdes = str(key) sensor_list.append(refdes.split('-')[-1]) median_list.append(value['median']) plt.scatter(t, y, c=colors[i], marker='.', s=.5) if i == len(data_dict) - 1: # if the last dataset has been plotted plt.grid() plt.margins(y=.05, x=.05) # refdes on secondary y-axis only for pressure and density if var in ['ctdmo_seawater_pressure', 'density']: ax2 = ax1.twinx() ax2.set_ylim(ax1.get_ylim()) plt.yticks(median_list, sensor_list, fontsize=7.5) plt.subplots_adjust(right=.85) pf.format_date_axis(ax1, fig) pf.y_axis_disable_offset(ax1) subsite = refdes.split('-')[0] title = subsite + ' ' + ('-'.join( (value['dms'].split('-')[0], value['dms'].split('-')[1]))) ax1.set_ylabel((var + " (" + value['yunits'] + ")"), fontsize=9) ax1.set_title(title, fontsize=10) fname = '-'.join((subsite, value['dms'], var)) if stdev != None: fname = '-'.join((fname, 'outliers_rejected')) sdir = os.path.join(sDir, subsite, value['dms'].split('-')[0]) cf.create_dir(sdir) pf.save_fig(sdir, fname)
def main(sDir, f): ff = pd.read_csv(os.path.join(sDir, f)) datasets = cf.get_nc_urls(ff['outputUrl'].tolist()) for d in datasets: print(d) fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( d) save_dir = os.path.join(sDir, subsite, refdes, deployment) cf.create_dir(save_dir) sci_vars = cf.return_science_vars(stream) colors = cm.jet(np.linspace(0, 1, len(sci_vars))) with xr.open_dataset(d, mask_and_scale=False) as ds: ds = ds.swap_dims({'obs': 'time'}) t = ds['time'].data t0 = pd.to_datetime(t.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(t.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) fig, ax = plt.subplots() axes = [ax] for i in range(len(sci_vars)): if i > 0: axes.append(ax.twinx() ) # twin the x-axis to make independent y-axes fig.subplots_adjust(right=0.6) right_additive = (0.98 - 0.6) / float(5) for i in range(len(sci_vars)): if i > 0: axes[i].spines['right'].set_position( ('axes', 1. + right_additive * i)) y = ds[sci_vars[i]] ind = cf.reject_outliers(y, 5) yD = y.data[ind] x = t[ind] #yD = y.data c = colors[i] axes[i].plot(x, yD, '.', markersize=2, color=c) axes[i].set_ylabel((y.name + " (" + y.units + ")"), color=c, fontsize=9) axes[i].tick_params(axis='y', colors=c) if i == len( sci_vars) - 1: # if the last variable has been plotted pf.format_date_axis(axes[i], fig) axes[0].set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) sfile = '_'.join((fname, 'timeseries')) pf.save_fig(save_dir, sfile)
def plot(x, y, title, stdev=None, line_style='.', g_ranges=False, color=None, interactive=False): """ :param x: Dictionary must be in the form: {'data': numpy data array , 'info': {'label': axis label, 'units': axis units'}} :param y: :param file_name: :param save_dir: :param line_style: :return: """ if stdev is None: y = y outlier_text = '' else: # if len(np.unique(y['data'])) is 1: # y_max = np.unique(y['data'])[0] # y_min = np.unique(y['data'])[0] # outliers = 0 # else: ind = reject_outliers(y['data'], stdev) y['data'] = y['data'][ind] x['data'] = x['data'][ind] outliers = str(len(ind) - sum(ind)) outlier_text = 'n removed $\pm$ {}$\sigma: $ {}'.format(stdev, outliers) fig, ax = plt.subplots() # ax.set_autoscale_on(False) plt.grid() if not interactive == True: plt.plot(x['data'], y['data'], line_style, linewidth=1, markersize=3, color=color) else: plt.plot(x['data'], y['data'], line_style, linewidth=1, markersize=3, color=color, picker=True) ax.set_title(title) # Format legend try: leg_text = ('$\max:$ {:6.4f}\n$\min:$ {:6.4f}\n{}'.format(np.nanmax(y['data']), np.nanmin(y['data']), outlier_text),) except ValueError: leg_text = () if g_ranges: gr = add_global_ranges(ax, y) leg_text += ('Global Ranges\n$\max$: {} \n$\min$: {}'.format(gr[1], gr[0]),) ax.legend(leg_text, loc='best', fontsize=8) format_axes(ax) set_labels(ax, x['info'], y['info']) return fig, ax
def plot_timeseries(x, y, y_name, stdev=None): """ Create a simple timeseries plot :param x: array containing data for x-axis (e.g. time) :param y: .nc data array for plotting on the y-axis, including data values, coordinates, and variable attributes :param stdev: desired standard deviation to exclude from plotting """ if type(y) is not np.ndarray: yval = y.values else: yval = y if type(x) is not np.ndarray: x = x.values if stdev is None: xD = x yD = yval leg_text = () else: ind = cf.reject_extreme_values(yval) ydata = yval[ind] xdata = x[ind] if len(xdata) > 0: ind2 = cf.reject_outliers(ydata, stdev) yD = ydata[ind2] xD = xdata[ind2] outliers = str(len(y) - len(yD)) leg_text = ('removed {} outliers (SD={})'.format(outliers, stdev), ) else: xD = [] fig, ax = plt.subplots() plt.grid() if len(xD) > 0: plt.plot(xD, yD, '.', markersize=2) y_units = get_units(y) ax.set_ylabel((y_name + " (" + y_units + ")"), fontsize=9) format_date_axis(ax, fig) y_axis_disable_offset(ax) ax.legend(leg_text, loc='best', fontsize=6) return fig, ax
def plot_profiles(x, y, t, ylabel, xlabel, clabel, stdev=None): """ Create a profile plot for mobile instruments :param x: .nc data array containing data for plotting variable of interest (e.g. density) :param y: .nc data array containing data for plotting on the y-axis (e.g. pressure) :param t: .nc data array containing time data to be used for coloring (x,y) data pairs :param stdev: desired standard deviation to exclude from plotting """ if type(t) is not np.ndarray and type(t) is not list: t = t.values if type(y) is not np.ndarray and type(t) is not list: y = y.values if type(x) is not np.ndarray and type(t) is not list: x = x.values if stdev is None: xD = x yD = y tD = t leg_text = () else: ind2 = cf.reject_outliers(x, stdev) xD = x[ind2] yD = y[ind2] tD = t[ind2] outliers = str(len(x) - len(xD)) leg_text = ('removed {} outliers (SD={})'.format(outliers, stdev), ) fig, ax = plt.subplots() plt.margins(y=.08, x=.02) plt.grid() sct = ax.scatter(xD, yD, c=tD, s=2, edgecolor='None', cmap='rainbow') cbar = plt.colorbar(sct, label=clabel) #cbar.ax.set_yticklabels(pd.to_datetime(end_times).strftime(date_format='%Y-%m-%d'), update_ticks=True) #cbar.ax.set_yticklabels(pd.to_datetime(cbar.get_ticks()).strftime(date_format='%Y-%m-%d')) cbar.ax.set_yticklabels( pd.to_datetime(cbar.ax.get_yticks()).strftime(date_format='%Y-%m-%d')) ax.invert_yaxis() #plt.xlim([-0.5, 0.5]) ax.set_xlabel(xlabel, fontsize=9) ax.set_ylabel(ylabel, fontsize=9) ax.legend(leg_text, loc='best', fontsize=6) return fig, ax
def plot_outlier_comparison(x, y, title, stdev = 1, line_style='r-o', g_range=False): """ :param x: Dictionary must be in the form: {'data': numpy data array , 'info': {'label': axis label, 'units': axis units'}} :param y: :param file_name: :param save_dir: :param line_style: :return: """ ind = reject_outliers(y['data'], stdev) y['data'] = y['data'] x['data'] = x['data'] outliers = str(len(ind) - sum(ind)) outlier_text = 'n removed $\pm$ {}$\sigma: $ {}'.format(stdev, outliers) ax1 = plt.subplot(211) plt.plot(x['data'], y['data'], line_style, linewidth=2, markersize=2) plt.grid() format_axes(ax1) # Format legend leg_text = ('$\max:$ {:6.4f}\n$\min:$ {:6.4f}\n{}'.format(np.nanmax(y['data'][ind]), np.nanmin(y['data'][ind]), outlier_text),) ax2 = plt.subplot(212, sharex=ax1) plt.grid() plt.plot(x['data'][ind], y['data'][ind], line_style, linewidth=2, markersize=2) format_axes(ax2) # plt.setp(ax2.get_xticklabels(), fontsize=8) ax1.set_title(title) # ax2.set_title('Global Ranges $\max$: {} $\min$: {}'.format(gr[1], gr[0]), fontsize=8) if g_range: gr = add_global_ranges(ax2, y) leg_text += ('Global Ranges $\max$: {} $\min$: {}'.format(gr[1], gr[0]),) ax2.legend(leg_text, loc='best', fontsize=8) ax1.set_ylabel(y['info']['label'] + " (" + y['info']['units'] + ")") ax2.set_ylabel(y['info']['label'] + " (" + y['info']['units'] + ")") ax2.set_xlabel(x['info']['label'] + " (" + x['info']['units'] + ")") return ax1, ax2
def plot_timeseries_panel(ds, x, vars, colors, stdev=None): """ Create a timeseries plot with horizontal panels of each science parameter :param ds: dataset (e.g. .nc file opened with xarray) containing data for plotting :param x: array containing data for x-axis (e.g. time) :param vars: list of science variables to plot :param colors: list of colors to be used for plotting :param stdev: desired standard deviation to exclude from plotting """ fig, ax = plt.subplots(len(vars), sharex=True) for i in range(len(vars)): y = ds[vars[i]] if stdev is None: yD = y.values xD = x leg_text = () else: ind = cf.reject_extreme_values(y.values) ydata = y[ind] xdata = x[ind] ind2 = cf.reject_outliers(ydata.values, stdev) yD = ydata[ind2].values xD = xdata[ind2] outliers = str(len(y) - len(yD)) leg_text = ('{}: rm {} outliers'.format(vars[i], outliers), ) y_units = get_units(y) c = colors[i] ax[i].plot(xD, yD, '.', markersize=2, color=c) ax[i].set_ylabel(('(' + y_units + ')'), fontsize=5) ax[i].tick_params(axis='y', labelsize=6) ax[i].legend(leg_text, loc='best', fontsize=4) y_axis_disable_offset(ax[i]) if i == len(vars) - 1: # if the last variable has been plotted format_date_axis(ax[i], fig) return fig, ax
def plot_timeseries_all(x, y, y_name, y_units, stdev=None): """ Create a simple timeseries plot :param x: array containing data for x-axis (e.g. time) :param y: array containing data for y-axis :param stdev: desired standard deviation to exclude from plotting """ if stdev is None: xD = x yD = y leg_text = () else: ind = cf.reject_extreme_values(y) ydata = y[ind] xdata = x[ind] ind2 = cf.reject_outliers(ydata, stdev) yD = ydata[ind2] xD = xdata[ind2] # ind2 = cf.reject_outliers(y, stdev) # yD = y[ind2] # xD = x[ind2] outliers = str(len(y) - len(yD)) leg_text = ('removed {} outliers (SD={})'.format(outliers, stdev), ) fig, ax = plt.subplots() plt.grid() plt.plot(xD, yD, '.', markersize=2) #plt.ylim([-10, 50]) ax.set_ylabel((y_name + " (" + y_units + ")"), fontsize=9) format_date_axis(ax, fig) y_axis_disable_offset(ax) ax.legend(leg_text, loc='best', fontsize=6) return fig, ax
def main(sDir, url_list, start_time, end_time, preferred_only): rd_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) if rd not in rd_list: rd_list.append(rd) for r in rd_list: print('\n{}'.format(r)) datasets = [] for u in url_list: splitter = u.split('/')[-2].split('-') rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = [] if preferred_only == 'yes': # get the preferred stream information ps_df, n_streams = cf.get_preferred_stream_info(r) for index, row in ps_df.iterrows(): for ii in range(n_streams): rms = '-'.join((r, row[ii])) for dd in datasets: spl = dd.split('/')[-2].split('-') catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6])) fdeploy = dd.split('/')[-1].split('_')[0] if rms == catalog_rms and fdeploy == row['deployment']: fdatasets.append(dd) else: fdatasets = datasets for fd in fdatasets: with xr.open_dataset(fd, mask_and_scale=False) as ds: ds = ds.swap_dims({'obs': 'time'}) if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time)) continue fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd) print('\nPlotting {} {}'.format(r, deployment)) array = subsite[0:2] save_dir = os.path.join(sDir, array, subsite, refdes, 'ts_plots') cf.create_dir(save_dir) tme = ds['time'].values t0 = pd.to_datetime(tme.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(tme.max()).strftime('%Y-%m-%dT%H:%M:%S') title = ' '.join((deployment, refdes, method)) filename = '-'.join(('_'.join(fname.split('_')[:-1]), 'ts', t0[:10])) ds_vars = list(ds.data_vars.keys()) raw_vars = cf.return_raw_vars(ds_vars) xvar = return_var(ds, raw_vars, 'salinity', 'Practical Salinity') sal = ds[xvar].values sal_fv = ds[xvar]._FillValue yvar = return_var(ds, raw_vars, 'temp', 'Seawater Temperature') temp = ds[yvar].values temp_fv = ds[yvar]._FillValue press = pf.pressure_var(ds, list(ds.coords.keys())) if press is None: press = pf.pressure_var(ds, list(ds.data_vars.keys())) p = ds[press].values # get rid of nans, 0.0s, fill values sind1 = (~np.isnan(sal)) & (sal != 0.0) & (sal != sal_fv) sal = sal[sind1] temp = temp[sind1] tme = tme[sind1] p = p[sind1] tind1 = (~np.isnan(temp)) & (temp != 0.0) & (temp != temp_fv) sal = sal[tind1] temp = temp[tind1] tme = tme[tind1] p = p[tind1] # reject values outside global ranges: global_min, global_max = cf.get_global_ranges(r, xvar) if any(e is None for e in [global_min, global_max]): sal = sal temp = temp tme = tme p = p else: sgr_ind = cf.reject_global_ranges(sal, global_min, global_max) sal = sal[sgr_ind] temp = temp[sgr_ind] tme = tme[sgr_ind] p = p[sgr_ind] global_min, global_max = cf.get_global_ranges(r, yvar) if any(e is None for e in [global_min, global_max]): sal = sal temp = temp tme = tme p = p else: tgr_ind = cf.reject_global_ranges(temp, global_min, global_max) sal = sal[tgr_ind] temp = temp[tgr_ind] tme = tme[tgr_ind] p = p[tgr_ind] # get rid of outliers soind = cf.reject_outliers(sal, 5) sal = sal[soind] temp = temp[soind] tme = tme[soind] p = p[soind] toind = cf.reject_outliers(temp, 5) sal = sal[toind] temp = temp[toind] tme = tme[toind] p = p[toind] if len(sal) > 0: # if there are any data to plot colors = cm.rainbow(np.linspace(0, 1, len(tme))) # Figure out boundaries (mins and maxes) #smin = sal.min() - (0.01 * sal.min()) #smax = sal.max() + (0.01 * sal.max()) if sal.max() - sal.min() < 0.2: smin = sal.min() - (0.0005 * sal.min()) smax = sal.max() + (0.0005 * sal.max()) else: smin = sal.min() - (0.001 * sal.min()) smax = sal.max() + (0.001 * sal.max()) if temp.max() - temp.min() <= 1: tmin = temp.min() - (0.01 * temp.min()) tmax = temp.max() + (0.01 * temp.max()) elif 1 < temp.max() - temp.min() < 1.5: tmin = temp.min() - (0.05 * temp.min()) tmax = temp.max() + (0.05 * temp.max()) else: tmin = temp.min() - (0.1 * temp.min()) tmax = temp.max() + (0.1 * temp.max()) # Calculate how many gridcells are needed in the x and y directions and # Create temp and sal vectors of appropriate dimensions xdim = int(round((smax-smin)/0.1 + 1, 0)) if xdim == 1: xdim = 2 si = np.linspace(0, xdim - 1, xdim) * 0.1 + smin if 1.1 <= temp.max() - temp.min() < 1.7: # if the diff between min and max temp is small ydim = int(round((tmax-tmin)/0.75 + 1, 0)) ti = np.linspace(0, ydim - 1, ydim) * 0.75 + tmin elif temp.max() - temp.min() < 1.1: ydim = int(round((tmax - tmin) / 0.1 + 1, 0)) ti = np.linspace(0, ydim - 1, ydim) * 0.1 + tmin else: ydim = int(round((tmax - tmin) + 1, 0)) ti = np.linspace(0, ydim - 1, ydim) + tmin # Create empty grid of zeros mdens = np.zeros((ydim, xdim)) # Loop to fill in grid with densities for j in range(0, ydim): for i in range(0, xdim): mdens[j, i] = gsw.density.rho(si[i], ti[j], np.median(p)) # calculate density using median pressure value fig, ax = pf.plot_ts(si, ti, mdens, sal, temp, colors) ax.set_title((title + '\n' + t0 + ' - ' + t1 + '\ncolors = time (cooler: earlier)'), fontsize=9) leg_text = ('Removed {} values (SD=5)'.format(len(ds[xvar].values) - len(sal)),) ax.legend(leg_text, loc='best', fontsize=6) pf.save_fig(save_dir, filename)
def plot_xsection(subsite, x, y, z, clabel, ylabel, t_eng=None, m_water_depth=None, inpercentile=None, stdev=None): """ Create a cross-section plot for mobile instruments :param subsite: subsite part of reference designator to plot :param x: array containing data for x-axis (e.g. time) :param y: .nc data array containing data for plotting on the y-axis (e.g. pressure) :param z: .nc data array containing data for plotting variable of interest (e.g. density) :param clabel: label for the colorbar :param ylabel: label for the y-axis :param t_eng: .nc data array containing engineering timestamps (to plot water depth) :param m_water_depth: .nc data array containing water depth data from the engineering data stream :param inpercentile: percentile of data to exclude from plot :param stdev: desired standard deviation to exclude from plotting """ if type(z) is not np.ndarray: z = z.values if type(y) is not np.ndarray: y = y.values if type(x) is not np.ndarray: x = x.values # when plotting gliders, remove zeros (glider fill values) and negative numbers if 'MOAS' in subsite: z[z <= 0.0] = np.nan zeros = str(len(z) - np.count_nonzero(~np.isnan(z))) if stdev is None: xD = x yD = y zD = z else: ind = cf.reject_extreme_values(z) xdata = x[ind] ydata = y[ind] zdata = z[ind] ind2 = cf.reject_outliers(zdata, stdev) xD = xdata[ind2] yD = ydata[ind2] zD = zdata[ind2] outliers = str(len(zdata) - len(zD)) try: zeros except NameError: zeros = None try: outliers except NameError: outliers = None fig, ax = plt.subplots() plt.margins(y=.08, x=.02) try: xc = ax.scatter(xD, yD, c=zD, s=2, edgecolor='None') #plt.ylim([0, 100]) ax.invert_yaxis() # add bathymetry for coastal gliders if t_eng is not None and m_water_depth is not None: if len(t_eng) > 1: ax.fill_between(t_eng, m_water_depth, np.max(m_water_depth) + 2, facecolor='k', alpha=0.4) # add color bar #ticks = np.linspace(np.nanmin(zD), np.nanmax(zD), 5).tolist() bar = fig.colorbar(xc, ax=ax, label=clabel, extend='both') bar.formatter.set_useOffset(False) bar.ax.tick_params(labelsize=8) if inpercentile is not None: upper_lim = np.percentile(zD, 100 - inpercentile) # upper_mid = np.percentile(zD, 100 - 15*inpercentile) # lower_mid = np.percentile(zD, 100 - 10*inpercentile) lower_lim = np.percentile(zD, inpercentile) bar.set_clim(lower_lim, upper_lim) bar.set_ticks([lower_lim, upper_lim], update_ticks=True) #lower_mid, upper_mid, ax.set_ylabel(ylabel, fontsize=9) format_date_axis(ax, fig) if zeros is None and type(outliers) is str: leg = ('rm: {} outliers (SD={})'.format(outliers, stdev), ) ax.legend(leg, loc=1, fontsize=6) if type(zeros) is str and outliers is None: leg = ('rm: {} values <=0.0'.format(zeros), ) ax.legend(leg, loc=1, fontsize=6) if type(zeros) is str and type(outliers) is str: leg = ('rm: {} values <=0.0, rm: {} outliers (SD={})'.format( zeros, outliers, stdev), ) ax.legend(leg, loc=1, fontsize=6) except ValueError: print("plot can't be generated") fig = None ax = None bar = None return fig, ax, bar
def plot_timeseries_compare(t0, t1, var0, var1, m0, m1, long_name, stdev=None): """ Create a timeseries plot containing two datasets :param t0: data array of time for dataset 0 :param t1: data array of time for dataset 1 :param var0: .nc data array for plotting on the y-axis for dataset 0, including data values and variable attributes :param var1: .nc data array for plotting on the y-axis for dataset 1, including data values and variable attributes :param stdev: desired standard deviation to exclude from plotting """ if stdev is None: t0_data = t0.values var0_data = var0.values leg_text = ('{}'.format(m0), ) t1_data = t1.values var1_data = var1.values leg_text += ('{}'.format(m1), ) else: ind0 = cf.reject_extreme_values(var0.values) t0i = t0[ind0] var0i = var0[ind0] ind02 = cf.reject_outliers(var0i.values, stdev) t0_data = t0i[ind02].values var0_data = var0i[ind02].values #var0_data[var0_data <= 0.0] = np.nan # get rid of zeros and negative numbers outliers0 = str((len(var0) - len(var0_data)) + (len(t0_data) - np.count_nonzero(~np.isnan(var0_data)))) leg_text = ('{}: removed {} outliers (SD={})'.format( m0, outliers0, stdev), ) ind1 = cf.reject_extreme_values(var1.values) t1i = t1[ind1] var1i = var1[ind1] ind12 = cf.reject_outliers(var1i.values, stdev) t1_data = t1i[ind12].values var1_data = var1i[ind12].values #var1_data[var1_data <= 0.0] = np.nan # get rid of zeros and negative numbers outliers1 = str((len(var1) - len(var1_data)) + (len(t1_data) - np.count_nonzero(~np.isnan(var1_data)))) leg_text += ('{}: removed {} outliers (SD={})'.format( m1, outliers1, stdev), ) y_units = get_units(var0) fig, ax = plt.subplots() plt.grid() #plt.ylim([2000, 2500]) ax.plot(t0_data, var0_data, 'o', markerfacecolor='none', markeredgecolor='r', markersize=5, lw=.75) #ax.plot(t1_data, var1_data, 'x', markeredgecolor='b', markersize=5, lw=.75) ax.plot(t1_data, var1_data, '.', markeredgecolor='b', markersize=2) ax.set_ylabel((long_name + " (" + y_units + ")"), fontsize=9) format_date_axis(ax, fig) y_axis_disable_offset(ax) ax.legend(leg_text, loc='best', fontsize=6) return fig, ax
def main(url_list, sDir, plot_type, deployment_num, start_time, end_time): """"" URL : path to instrument data by methods sDir : path to the directory on your machine to save files plot_type: folder name for a plot type """ "" rd_list = [] ms_list = [] for uu in url_list: elements = uu.split('/')[-2].split('-') rd = '-'.join((elements[1], elements[2], elements[3], elements[4])) ms = uu.split(rd + '-')[1].split('/')[0] if rd not in rd_list: rd_list.append(rd) if ms not in ms_list: ms_list.append(ms) ''' separate different instruments ''' for r in rd_list: print('\n{}'.format(r)) subsite = r.split('-')[0] array = subsite[0:2] main_sensor = r.split('-')[-1] ps_df, n_streams = cf.get_preferred_stream_info(r) # read in the analysis file dr_data = cf.refdes_datareview_json(r) # get end times of deployments deployments = [] end_times = [] for index, row in ps_df.iterrows(): deploy = row['deployment'] deploy_info = get_deployment_information(dr_data, int(deploy[-4:])) deployments.append(int(deploy[-4:])) end_times.append(pd.to_datetime(deploy_info['stop_date'])) # get the list of data files and filter out collocated instruments and other streams chat datasets = [] for u in url_list: print(u) splitter = u.split('/')[-2].split('-') rd_check = '-'.join( (splitter[1], splitter[2], splitter[3], splitter[4])) if rd_check == r: udatasets = cf.get_nc_urls([u]) datasets.append(udatasets) datasets = list(itertools.chain(*datasets)) fdatasets = cf.filter_collocated_instruments(main_sensor, datasets) fdatasets = cf.filter_other_streams(r, ms_list, fdatasets) ''' separate the data files by methods ''' for ms in ms_list: fdatasets_sel = [x for x in fdatasets if ms in x] # create a dictionary for science variables from analysis file stream_sci_vars_dict = dict() for x in dr_data['instrument']['data_streams']: dr_ms = '-'.join((x['method'], x['stream_name'])) if ms == dr_ms: stream_sci_vars_dict[dr_ms] = dict(vars=dict()) sci_vars = dict() for y in x['stream']['parameters']: if y['data_product_type'] == 'Science Data': sci_vars.update( {y['name']: dict(db_units=y['unit'])}) if len(sci_vars) > 0: stream_sci_vars_dict[dr_ms]['vars'] = sci_vars # initialize an empty data array for science variables in dictionary sci_vars_dict = cd.initialize_empty_arrays(stream_sci_vars_dict, ms) print('\nAppending data from files: {}'.format(ms)) y_unit = [] y_name = [] for fd in fdatasets_sel: ds = xr.open_dataset(fd, mask_and_scale=False) print(fd) if start_time is not None and end_time is not None: ds = ds.sel(time=slice(start_time, end_time)) if len(ds['time'].values) == 0: print( 'No data to plot for specified time range: ({} to {})' .format(start_time, end_time)) continue fname, subsite, refdes, method, stream, deployment = cf.nc_attributes( fd) if deployment_num is not None: if int(deployment.split('0')[-1]) is not deployment_num: print(type(int(deployment.split('0')[-1])), type(deployment_num)) continue save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, ms.split('-')[0], deployment) cf.create_dir(save_dir) for var in list(sci_vars_dict[ms]['vars'].keys()): sh = sci_vars_dict[ms]['vars'][var] if ds[var].units == sh['db_units']: if ds[var]._FillValue not in sh['fv']: sh['fv'].append(ds[var]._FillValue) if ds[var].units not in sh['units']: sh['units'].append(ds[var].units) # time t = ds['time'].values t0 = pd.to_datetime( t.min()).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime( t.max()).strftime('%Y-%m-%dT%H:%M:%S') # sci variable z = ds[var].values sh['t'] = np.append(sh['t'], t) sh['values'] = np.append(sh['values'], z) # add pressure to dictionary of sci vars if 'MOAS' in subsite: if 'CTD' in main_sensor: # for glider CTDs, pressure is a coordinate pressure = 'sci_water_pressure_dbar' y = ds[pressure].values if ds[pressure].units not in y_unit: y_unit.append(ds[pressure].units) if ds[pressure].long_name not in y_name: y_name.append(ds[pressure].long_name) else: pressure = 'int_ctd_pressure' y = ds[pressure].values if ds[pressure].units not in y_unit: y_unit.append(ds[pressure].units) if ds[pressure].long_name not in y_name: y_name.append(ds[pressure].long_name) else: pressure = pf.pressure_var(ds, ds.data_vars.keys()) y = ds[pressure].values if ds[pressure].units not in y_unit: y_unit.append(ds[pressure].units) if ds[pressure].long_name not in y_name: y_name.append(ds[pressure].long_name) sh['pressure'] = np.append(sh['pressure'], y) if len(y_unit) != 1: print('pressure unit varies UHHHHHHHHH') else: y_unit = y_unit[0] if len(y_name) != 1: print('pressure long name varies UHHHHHHHHH') else: y_name = y_name[0] for m, n in sci_vars_dict.items(): for sv, vinfo in n['vars'].items(): print(sv) if len(vinfo['t']) < 1: print('no variable data to plot') else: sv_units = vinfo['units'][0] fv = vinfo['fv'][0] t0 = pd.to_datetime(min( vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S') t1 = pd.to_datetime(max( vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S') t = vinfo['t'] z = vinfo['values'] y = vinfo['pressure'] title = ' '.join((r, ms.split('-')[1])) # Check if the array is all NaNs if sum(np.isnan(z)) == len(z): print('Array of all NaNs - skipping plot.') # Check if the array is all fill values elif len(z[z != fv]) == 0: print('Array of all fill values - skipping plot.') else: # reject fill values fv_ind = z != fv y_nofv = y[fv_ind] t_nofv = t[fv_ind] z_nofv = z[fv_ind] print(len(z) - len(fv_ind), ' fill values') # reject NaNs nan_ind = ~np.isnan(z) t_nofv_nonan = t_nofv[nan_ind] y_nofv_nonan = y_nofv[nan_ind] z_nofv_nonan = z_nofv[nan_ind] print(len(z) - len(nan_ind), ' NaNs') # reject extreme values ev_ind = cf.reject_extreme_values(z_nofv_nonan) t_nofv_nonan_noev = t_nofv_nonan[ev_ind] colors = cm.rainbow( np.linspace(0, 1, len(t_nofv_nonan_noev))) y_nofv_nonan_noev = y_nofv_nonan[ev_ind] z_nofv_nonan_noev = z_nofv_nonan[ev_ind] print( len(z) - len(ev_ind), ' Extreme Values', '|1e7|') if len(y_nofv_nonan_noev) > 0: if m == 'common_stream_placeholder': sname = '-'.join((r, sv)) else: sname = '-'.join((r, m, sv)) # Plot all data ylabel = y_name + " (" + y_unit + ")" xlabel = sv + " (" + sv_units + ")" clabel = 'Time' clabel = sv + " (" + sv_units + ")" fig, ax = pf.plot_profiles(z_nofv_nonan_noev, y_nofv_nonan_noev, colors, xlabel, ylabel, stdev=None) ax.set_title(( title + '\n' + str(deployment_num) + ': ' + t0 + ' - ' + t1 + '\n' + 'used bin = 2 dbar to calculate an average profile (black line) and 3-STD envelope (shaded area)' ), fontsize=9) # group by depth range columns = ['time', 'pressure', str(sv)] # ranges = [0, 50, 100, 200, 400, 600] ranges = list( range(int(round(min(y_nofv_nonan_noev))), int(round(max(y_nofv_nonan_noev))), 1)) groups, d_groups = gt.group_by_depth_range( t_nofv_nonan_noev, y_nofv_nonan_noev, z_nofv_nonan_noev, columns, ranges) # describe_file = '_'.join((sname, 'statistics.csv')) # # groups.describe().to_csv(save_dir + '/' + describe_file) ind = groups.describe()[sv]['mean'].notnull() groups.describe()[sv][ind].to_csv( '{}/{}_statistics.csv'.format(save_dir, sname), index=True) tm = 1 fig, ax = pyplot.subplots(nrows=2, ncols=1) pyplot.margins(y=.08, x=.02) pyplot.grid() y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr = [], [], [], [], [], [], [] for ii in range(len(groups)): nan_ind = d_groups[ii + tm].notnull() xtime = d_groups[ii + tm][nan_ind] colors = cm.rainbow(np.linspace(0, 1, len(xtime))) ypres = d_groups[ii + tm + 1][nan_ind] nval = d_groups[ii + tm + 2][nan_ind] tm += 2 # fig, ax = pf.plot_xsection(subsite, xtime, ypres, nval, clabel, ylabel, stdev=None) # ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) # pf.plot_profiles(nval, ypres, colors, ylabel, clabel, stdev=None) # ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) ind2 = cf.reject_outliers(nval, 5) xD = nval[ind2] yD = ypres[ind2] nZ = colors[ind2] outliers = str(len(nval) - len(xD)) leg_text = ('removed {} outliers (SD={})'.format( outliers, stdev), ) ax.scatter(xD, yD, c=nZ, s=2, edgecolor='None') ax.invert_yaxis() ax.set_xlabel(clabel, fontsize=9) ax.set_ylabel(ylabel, fontsize=9) ax.legend(leg_text, loc='best', fontsize=6) ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9) l_arr.append( len(nval) ) # count of data to filter out small groups y_avg.append(ypres.mean()) n_avg.append(nval.mean()) n_min.append(nval.min()) n_max.append(nval.max()) n0_std.append(nval.mean() + 3 * nval.std()) n1_std.append(nval.mean() - 3 * nval.std()) ax.plot(n_avg, y_avg, '-k') # ax.plot(n_min, y_avg, '-b') # ax.plot(n_max, y_avg, '-b') ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2) sfile = '_'.join((sname, 'statistics')) pf.save_fig(save_dir, sfile)